from datetime import datetime, timedelta
import os
import subprocess
import contextily as ctx
import folium
import geopandas as gpd
from haversine import haversine, Unit
import matplotlib.pyplot as plt
import pandas as pd
from pyprojroot import here
import requests
import r5py
from shapely.geometry import LineString
Analyse Supermaket Accessibility in Cardiff LAD
Load Supermarket Points
= pd.read_csv(here("data/external/features/geolytix_cut.csv"))
supermarkets = gpd.GeoDataFrame(
super_gdf
supermarkets,=gpd.points_from_xy(
geometry"long_wgs"], supermarkets["lat_wgs"]
supermarkets[
),="EPSG:4326")
crs"id"] = list(range(0, len(super_gdf)))# unique ID column for routing
super_gdf[ super_gdf.explore()
Ingest Cardiff Boundary
2023 boundary, full resolution clipped to coastline.
= "https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/Local_Authority_Districts_December_2023_Boundaries_UK_BFC/FeatureServer/0/query"
ENDPOINT = {
PARAMS "where": "LAD23CD = 'W06000015'",
"f": "geoJSON",
"outFields": "*",
"outSR": 27700,
}
def get_ons_geo_data(endpoint:str, params:dict) -> gpd.GeoDataFrame:
"Cast json response to gdf"
= requests.get(endpoint, params=params)
resp if resp.ok:
= resp.json()
content else:
raise requests.RequestException(
f"HTTP {resp.status_code} : {resp.reason}")
= gpd.GeoDataFrame.from_features(
gdf "features"], crs=content["crs"]["properties"]["name"])
content[
return content, gdf
= get_ons_geo_data(ENDPOINT, PARAMS)
content, poly poly.head()
geometry | FID | LAD23CD | LAD23NM | LAD23NMW | BNG_E | BNG_N | LONG | LAT | Shape__Area | Shape__Length | GlobalID | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | MULTIPOLYGON (((322081.699 165165.901, 322082.... | 353 | W06000015 | Cardiff | Caerdydd | 315272 | 178887 | -3.22209 | 51.50254 | 1.423040e+08 | 79590.02316 | 8493a146-4d0f-4048-9c96-4a15a3257573 |
# buffer by a km to avoid edge effects
= gpd.GeoDataFrame(
buffered_poly "geometry": poly.buffer(5000)},
{=content["crs"]["properties"]["name"])
crs4326, inplace=True)
buffered_poly.to_crs(4326, inplace=True)
poly.to_crs(= super_gdf.sjoin(buffered_poly) proximal_supers
= plt.subplots()
fig, ax =ax, facecolor="red", alpha=0.4)
poly.plot(ax=ax, facecolor="blue", alpha=0.2)
buffered_poly.plot(ax=ax, color="blue", markersize=3)
proximal_supers.plot(ax
ctx.add_basemap(
ax,=proximal_supers.crs.to_string(), source=ctx.providers.CartoDB.Voyager
crs
)f"{len(proximal_supers)} Supermarkets Within 5 km Buffered Cardiff LAD extent")
plt.title(
plt.tight_layout()"outputs/cardiff-supermarkets/cardiff-supermarkets-5km-buffer.png", dpi=400) plt.savefig(
By Output Area Population-Weighted Centroid
Output Areas (December 2021) PWC (V3)
= "https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/Output_Areas_2021_PWC_V3/FeatureServer/0/query"
ENDPOINT "where"] = "OA21CD like 'W%'"
PARAMS["resultOffset"] = 0
PARAMS["outSR"] = 4326
PARAMS[
def get_ons_geo_paginated(endpoint:str, params:dict) -> gpd.GeoDataFrame:
= get_ons_geo_data(ENDPOINT, PARAMS)
content, gdf = content["properties"]["exceededTransferLimit"]
more_pages = len(gdf) # number of records to offset by
offset = gdf # append gdfs here
all_gdfs while more_pages:
"resultOffset"] += offset # increment the records
params[= get_ons_geo_data(endpoint, params)
content, gdf = pd.concat([all_gdfs, gdf])
all_gdfs try:
= content["properties"]["exceededTransferLimit"]
more_pages except KeyError:
# rather than exceededTransferLimit = False, it disappears...
= False
more_pages = all_gdfs.reset_index(drop=True)
all_gdfs return all_gdfs
# PARAMS["returnCountOnly"] = True
# 10275 Welsh centroids
= get_ons_geo_paginated(ENDPOINT, PARAMS)
centroids len(centroids)
10275
= centroids.sjoin(poly)
cardiff_centroids = plt.subplots()
fig, ax =ax, facecolor="red", alpha=0.4)
poly.plot(ax=ax, facecolor="blue", alpha=0.2)
buffered_poly.plot(ax=ax, color="blue", markersize=3)
proximal_supers.plot(ax
ctx.add_basemap(
ax,=proximal_supers.crs.to_string(), source=ctx.providers.CartoDB.Voyager
crs
)f"{len(cardiff_centroids)} Centroids Within Cardiff LAD extent")
plt.title(=ax, color="green", markersize=3, alpha=0.2)
cardiff_centroids.plot(ax
plt.tight_layout()"outputs/cardiff-supermarkets/cardiff-centroids-supermarkets-5km-buffer.png", dpi=400) plt.savefig(
# get osm data
= here("data/external/osm/wales-latest.osm.pbf")
osm_pth if not os.path.exists(osm_pth):
subprocess.run(
["curl",
"https://download.geofabrik.de/europe/united-kingdom/wales-latest.osm.pbf",
"-o",
osm_pth,])
Here is where the Java dependency kicks in…
= r5py.TransportNetwork(osm_pth)
transport_network # adjust all centroids to transport network
"snapped_geometry"] = transport_network.snap_to_network(
cardiff_centroids["geometry"],
cardiff_centroids[=100,
radius=r5py.TransportMode.WALK,
street_mode )
# snap distance summaries
# reverse the lonlat to latlon
"snap_dist_m"] = cardiff_centroids.apply(
cardiff_centroids[lambda row:haversine(
"geometry"].y, row["geometry"].x),
(row["snapped_geometry"].y, row["snapped_geometry"].x),
(row[=Unit.METERS), axis=1)
unit
"snap_dist_m"].plot.hist(
cardiff_centroids[=50,
bins="Distribution of coordinate snap distance in point plane (m)"
title
) plt.show()
= cardiff_centroids.copy(deep=True)
largest_snaps # retrieve the top n rows where coordinates adjusted by the greatest dist.
= 20
n = largest_snaps.sort_values(
largest_snaps ="snap_dist_m", ascending=False).head(n)
by# create the LineString geometry
"lines"] = largest_snaps.apply(
largest_snaps[lambda row: LineString([row["geometry"], row["snapped_geometry"]]),
=1
axis
)= 11
z_start # original geometry layer
= largest_snaps.explore(
imap ="marker",
marker_type={
marker_kwds"icon": folium.map.Icon(color="red", icon="ban", prefix="fa"),
},={
map_kwds"center": {"lat": 51.478, "lng": -3.165}
},=z_start,
zoom_start
)# snapped geometry layer
= largest_snaps.set_geometry("snapped_geometry").explore(
imap =imap,
m="marker",
marker_type={
marker_kwds"icon": folium.map.Icon(color="green", icon="person-walking", prefix="fa"),
}
)# line geometry layer
= largest_snaps.set_geometry("lines").explore(
imap =imap,
m=z_start
zoom_start
) imap
# drop the original centroids in favour of snapped geoms
cardiff_centroids.drop(=["geometry", "snap_dist_m"], axis=1, inplace=True)
columns={
cardiff_centroids.rename(columns"snapped_geometry": "geometry",
"OA21CD": "id",
=True)
}, inplace"geometry", inplace=True) cardiff_centroids.set_geometry(
= datetime.now().replace(hour=8, minute=0, second=0, microsecond=0)
dept_time
= r5py.TravelTimeMatrixComputer(
travel_time_matrix
transport_network,=cardiff_centroids,
origins=proximal_supers,
destinations=[r5py.TransportMode.WALK, r5py.TransportMode.CAR],
transport_modes=dept_time,
departure=timedelta(minutes=60), # every minute until 9am
departure_time_window=False,
snap_to_network=timedelta(minutes=60),
max_time= 4.8, # default is 3.6km/h
speed_walking
).compute_travel_times()
travel_time_matrix.dropna().head()
from_id | to_id | travel_time | |
---|---|---|---|
0 | W00010120 | 86 | 9 |
1 | W00010120 | 273 | 30 |
2 | W00010120 | 455 | 16 |
3 | W00010120 | 456 | 12 |
4 | W00010120 | 643 | 30 |
Engineer Travel Time Matrix
Calculate median travel time across each supermarket. This gives us a median travel time from all centroids to all reachable supermarkets. Also need to merge to geometries to add spatial context. Going with 2021 boundaries generalised to 20m resolution as just for mapping a centroid value.
Other features of interest may be:
- number of reachable supermarkets
- an ‘effective population served within 1 hour by car and by foot’.
- an accessibility statistic such as the ratio of reachable / proximal supermarkets. eg the number of supermarkets you can reach through the transport network as a proportion of the number within a crow’s flight radius.
= travel_time_matrix.drop("to_id", axis=1).groupby("from_id").median()
med_tts = "https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/Output_Areas_2021_EW_BGC_V2/FeatureServer/0/query"
ENDPOINT "where"] = "OA21CD like 'W%'"
PARAMS["resultOffset"] = 0
PARAMS[
= get_ons_geo_paginated(ENDPOINT, PARAMS)
oa_polys len(oa_polys)
10275
= med_tts.join(oa_polys.set_index("OA21CD"))
median_tt_oa = gpd.GeoDataFrame(median_tt_oa, crs=4326)
median_tt_oa median_tt_oa.head()
travel_time | geometry | FID | LSOA21CD | LSOA21NM | LSOA21NMW | BNG_E | BNG_N | LAT | LONG | Shape__Area | Shape__Length | GlobalID | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
from_id | |||||||||||||
W00008779 | 12.5 | POLYGON ((-3.15371 51.48625, -3.15409 51.48610... | 187079 | W01001694 | Cardiff 036A | Caerdydd 036A | 319717 | 177053 | 51.48670 | -3.15765 | 37192.300659 | 1323.933139 | 22955f09-cd77-4133-b0c6-5d3d54a1564d |
W00008780 | 12.0 | POLYGON ((-3.15290 51.48853, -3.15203 51.48723... | 187080 | W01001697 | Cardiff 036D | Caerdydd 036D | 320018 | 177178 | 51.48787 | -3.15334 | 20110.197174 | 656.083765 | ede6239d-fa9e-4fa2-95e4-d2609951f8dd |
W00008781 | 12.0 | POLYGON ((-3.15856 51.48645, -3.15806 51.48628... | 187081 | W01001694 | Cardiff 036A | Caerdydd 036A | 319566 | 177006 | 51.48626 | -3.15981 | 42158.049187 | 1093.054113 | a45f08c9-683b-41fa-aebe-8c401cec73d8 |
W00008782 | 12.0 | POLYGON ((-3.15490 51.48587, -3.15718 51.48494... | 187082 | W01001694 | Cardiff 036A | Caerdydd 036A | 319789 | 176962 | 51.48589 | -3.15659 | 25546.097813 | 655.481197 | 0293396c-8ba5-4782-bf24-216461db1709 |
W00008784 | 13.0 | POLYGON ((-3.15371 51.48625, -3.15447 51.48498... | 187083 | W01001694 | Cardiff 036A | Caerdydd 036A | 319925 | 176862 | 51.48502 | -3.15461 | 34684.802540 | 1455.580623 | 888a5b61-4a7b-466d-ae54-fc9ff0685663 |
= median_tt_oa.explore("travel_time", cmap="viridis_r", tiles="CartoDB positron")
imap = proximal_supers.explore(
imap =imap,
m="marker",
marker_type={
marker_kwds"icon": folium.map.Icon(color="red", icon="cart-shopping", prefix="fa"),
}
)"outputs/cardiff-supermarkets/median_tt_to_supermarkets_5km_buffer.html")
imap.save( imap
This is one option for calculation of origins. Population-weighted centroids are ok, but I would advise taking a gridded approach, using Global Human Settlement Layer or equivalent. Finer spatial resolution, down to 10m grids if you really wanted that. There are some caveats with this data that I can point out if helpful.