Analyse Supermaket Accessibility in Cardiff LAD

Demonstrating a Method for Calculating Indiciative Travel Time Metrics
Author

Rich Leyshon

Published

February 20, 2024

from datetime import datetime, timedelta
import os
import subprocess

import contextily as ctx
import folium
import geopandas as gpd
from haversine import haversine, Unit
import matplotlib.pyplot as plt
import pandas as pd
from pyprojroot import here
import requests
import r5py
from shapely.geometry import LineString

Load Supermarket Points

supermarkets = pd.read_csv(here("data/external/features/geolytix_cut.csv"))
super_gdf = gpd.GeoDataFrame(
    supermarkets,
    geometry=gpd.points_from_xy(
        supermarkets["long_wgs"], supermarkets["lat_wgs"]
        ),
    crs="EPSG:4326")
super_gdf["id"] = list(range(0, len(super_gdf)))# unique ID column for routing
super_gdf.explore()
Make this Notebook Trusted to load map: File -> Trust Notebook

Ingest Cardiff Boundary

2023 boundary, full resolution clipped to coastline.

ENDPOINT = "https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/Local_Authority_Districts_December_2023_Boundaries_UK_BFC/FeatureServer/0/query"
PARAMS = {
    "where": "LAD23CD = 'W06000015'",
    "f": "geoJSON", 
    "outFields": "*",
    "outSR": 27700,
}


def get_ons_geo_data(endpoint:str, params:dict) -> gpd.GeoDataFrame:
    "Cast json response to gdf"
    resp = requests.get(endpoint, params=params)
    if resp.ok:
        content = resp.json()
    else:
        raise requests.RequestException(
            f"HTTP {resp.status_code} : {resp.reason}")
            
    gdf = gpd.GeoDataFrame.from_features(
        content["features"], crs=content["crs"]["properties"]["name"])

    return content, gdf


content, poly = get_ons_geo_data(ENDPOINT, PARAMS)
poly.head()
geometry FID LAD23CD LAD23NM LAD23NMW BNG_E BNG_N LONG LAT Shape__Area Shape__Length GlobalID
0 MULTIPOLYGON (((322081.699 165165.901, 322082.... 353 W06000015 Cardiff Caerdydd 315272 178887 -3.22209 51.50254 1.423040e+08 79590.02316 8493a146-4d0f-4048-9c96-4a15a3257573
# buffer by a km to avoid edge effects
buffered_poly = gpd.GeoDataFrame(
    {"geometry": poly.buffer(5000)},
    crs=content["crs"]["properties"]["name"])
buffered_poly.to_crs(4326, inplace=True)
poly.to_crs(4326, inplace=True)
proximal_supers = super_gdf.sjoin(buffered_poly)
fig, ax = plt.subplots()
poly.plot(ax=ax, facecolor="red", alpha=0.4)
buffered_poly.plot(ax=ax, facecolor="blue", alpha=0.2)
proximal_supers.plot(ax=ax, color="blue", markersize=3)
ctx.add_basemap(
    ax,
    crs=proximal_supers.crs.to_string(), source=ctx.providers.CartoDB.Voyager
)
plt.title(f"{len(proximal_supers)} Supermarkets Within 5 km Buffered Cardiff LAD extent")
plt.tight_layout()
plt.savefig("outputs/cardiff-supermarkets/cardiff-supermarkets-5km-buffer.png", dpi=400)

By Output Area Population-Weighted Centroid

Output Areas (December 2021) PWC (V3)

ENDPOINT = "https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/Output_Areas_2021_PWC_V3/FeatureServer/0/query"
PARAMS["where"] = "OA21CD like 'W%'"
PARAMS["resultOffset"] = 0
PARAMS["outSR"] = 4326


def get_ons_geo_paginated(endpoint:str, params:dict) -> gpd.GeoDataFrame:
    content, gdf = get_ons_geo_data(ENDPOINT, PARAMS)
    more_pages = content["properties"]["exceededTransferLimit"]
    offset = len(gdf) # number of records to offset by
    all_gdfs = gdf # append gdfs here
    while more_pages:
        params["resultOffset"] += offset # increment the records
        content, gdf = get_ons_geo_data(endpoint, params)
        all_gdfs = pd.concat([all_gdfs, gdf])
        try:
            more_pages = content["properties"]["exceededTransferLimit"]
        except KeyError:
            # rather than exceededTransferLimit = False, it disappears...
            more_pages = False
    all_gdfs = all_gdfs.reset_index(drop=True)
    return all_gdfs


# PARAMS["returnCountOnly"] = True
# 10275 Welsh centroids
centroids = get_ons_geo_paginated(ENDPOINT, PARAMS)
len(centroids)
10275
cardiff_centroids = centroids.sjoin(poly)
fig, ax = plt.subplots()
poly.plot(ax=ax, facecolor="red", alpha=0.4)
buffered_poly.plot(ax=ax, facecolor="blue", alpha=0.2)
proximal_supers.plot(ax=ax, color="blue", markersize=3)
ctx.add_basemap(
    ax,
    crs=proximal_supers.crs.to_string(), source=ctx.providers.CartoDB.Voyager
)
plt.title(f"{len(cardiff_centroids)} Centroids Within Cardiff LAD extent")
cardiff_centroids.plot(ax=ax, color="green", markersize=3, alpha=0.2)
plt.tight_layout()
plt.savefig("outputs/cardiff-supermarkets/cardiff-centroids-supermarkets-5km-buffer.png", dpi=400)

# get osm data
osm_pth = here("data/external/osm/wales-latest.osm.pbf")
if not os.path.exists(osm_pth):
  subprocess.run(
      [
        "curl",
        "https://download.geofabrik.de/europe/united-kingdom/wales-latest.osm.pbf",
        "-o",
        osm_pth,])

Here is where the Java dependency kicks in…

transport_network = r5py.TransportNetwork(osm_pth)
# adjust all centroids to transport network
cardiff_centroids["snapped_geometry"] = transport_network.snap_to_network(
    cardiff_centroids["geometry"],
    radius=100,
    street_mode=r5py.TransportMode.WALK,
)
# snap distance summaries
# reverse the lonlat to latlon
cardiff_centroids["snap_dist_m"] = cardiff_centroids.apply(
    lambda row:haversine(
        (row["geometry"].y, row["geometry"].x),
        (row["snapped_geometry"].y, row["snapped_geometry"].x),
        unit=Unit.METERS), axis=1)

cardiff_centroids["snap_dist_m"].plot.hist(
    bins=50,
    title="Distribution of coordinate snap distance in point plane (m)"
    )
plt.show()

largest_snaps = cardiff_centroids.copy(deep=True)
# retrieve the top n rows where coordinates adjusted by the greatest dist.
n = 20
largest_snaps = largest_snaps.sort_values(
    by="snap_dist_m", ascending=False).head(n)
# create the LineString geometry
largest_snaps["lines"] = largest_snaps.apply(
    lambda row: LineString([row["geometry"], row["snapped_geometry"]]),
    axis=1
)
z_start = 11
# original geometry layer
imap = largest_snaps.explore(
    marker_type="marker",
    marker_kwds={
        "icon": folium.map.Icon(color="red", icon="ban", prefix="fa"),
    },
    map_kwds={
        "center": {"lat": 51.478, "lng": -3.165}
    },
    zoom_start=z_start,
)
# snapped geometry layer
imap = largest_snaps.set_geometry("snapped_geometry").explore(
    m=imap,
    marker_type="marker",
    marker_kwds={
        "icon": folium.map.Icon(color="green", icon="person-walking", prefix="fa"),
    }
)
# line geometry layer
imap = largest_snaps.set_geometry("lines").explore(
    m=imap,
    zoom_start=z_start
)
imap
Make this Notebook Trusted to load map: File -> Trust Notebook
# drop the original centroids in favour of snapped geoms
cardiff_centroids.drop(
    columns=["geometry", "snap_dist_m"], axis=1, inplace=True)
cardiff_centroids.rename(columns={
    "snapped_geometry": "geometry",
    "OA21CD": "id",
    }, inplace=True)
cardiff_centroids.set_geometry("geometry", inplace=True)
dept_time = datetime.now().replace(hour=8, minute=0, second=0, microsecond=0)

travel_time_matrix = r5py.TravelTimeMatrixComputer(
    transport_network,
    origins=cardiff_centroids,
    destinations=proximal_supers,
    transport_modes=[r5py.TransportMode.WALK, r5py.TransportMode.CAR],
    departure=dept_time,
    departure_time_window=timedelta(minutes=60), # every minute until 9am
    snap_to_network=False,
    max_time=timedelta(minutes=60),
    speed_walking = 4.8, # default is 3.6km/h

).compute_travel_times()

travel_time_matrix.dropna().head()
from_id to_id travel_time
0 W00010120 86 9
1 W00010120 273 30
2 W00010120 455 16
3 W00010120 456 12
4 W00010120 643 30

Engineer Travel Time Matrix

Calculate median travel time across each supermarket. This gives us a median travel time from all centroids to all reachable supermarkets. Also need to merge to geometries to add spatial context. Going with 2021 boundaries generalised to 20m resolution as just for mapping a centroid value.

Other features of interest may be:

  • number of reachable supermarkets
  • an ‘effective population served within 1 hour by car and by foot’.
  • an accessibility statistic such as the ratio of reachable / proximal supermarkets. eg the number of supermarkets you can reach through the transport network as a proportion of the number within a crow’s flight radius.
med_tts = travel_time_matrix.drop("to_id", axis=1).groupby("from_id").median()
ENDPOINT = "https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/Output_Areas_2021_EW_BGC_V2/FeatureServer/0/query"
PARAMS["where"] = "OA21CD like 'W%'"
PARAMS["resultOffset"] = 0

oa_polys = get_ons_geo_paginated(ENDPOINT, PARAMS)
len(oa_polys)
10275
median_tt_oa = med_tts.join(oa_polys.set_index("OA21CD"))
median_tt_oa = gpd.GeoDataFrame(median_tt_oa, crs=4326)
median_tt_oa.head()
travel_time geometry FID LSOA21CD LSOA21NM LSOA21NMW BNG_E BNG_N LAT LONG Shape__Area Shape__Length GlobalID
from_id
W00008779 12.5 POLYGON ((-3.15371 51.48625, -3.15409 51.48610... 187079 W01001694 Cardiff 036A Caerdydd 036A 319717 177053 51.48670 -3.15765 37192.300659 1323.933139 22955f09-cd77-4133-b0c6-5d3d54a1564d
W00008780 12.0 POLYGON ((-3.15290 51.48853, -3.15203 51.48723... 187080 W01001697 Cardiff 036D Caerdydd 036D 320018 177178 51.48787 -3.15334 20110.197174 656.083765 ede6239d-fa9e-4fa2-95e4-d2609951f8dd
W00008781 12.0 POLYGON ((-3.15856 51.48645, -3.15806 51.48628... 187081 W01001694 Cardiff 036A Caerdydd 036A 319566 177006 51.48626 -3.15981 42158.049187 1093.054113 a45f08c9-683b-41fa-aebe-8c401cec73d8
W00008782 12.0 POLYGON ((-3.15490 51.48587, -3.15718 51.48494... 187082 W01001694 Cardiff 036A Caerdydd 036A 319789 176962 51.48589 -3.15659 25546.097813 655.481197 0293396c-8ba5-4782-bf24-216461db1709
W00008784 13.0 POLYGON ((-3.15371 51.48625, -3.15447 51.48498... 187083 W01001694 Cardiff 036A Caerdydd 036A 319925 176862 51.48502 -3.15461 34684.802540 1455.580623 888a5b61-4a7b-466d-ae54-fc9ff0685663
imap = median_tt_oa.explore("travel_time", cmap="viridis_r", tiles="CartoDB positron")
imap = proximal_supers.explore(
    m=imap,
    marker_type="marker",
    marker_kwds={
        "icon": folium.map.Icon(color="red", icon="cart-shopping", prefix="fa"),
    }
)
imap.save("outputs/cardiff-supermarkets/median_tt_to_supermarkets_5km_buffer.html")
imap
Make this Notebook Trusted to load map: File -> Trust Notebook

This is one option for calculation of origins. Population-weighted centroids are ok, but I would advise taking a gridded approach, using Global Human Settlement Layer or equivalent. Finer spatial resolution, down to 10m grids if you really wanted that. There are some caveats with this data that I can point out if helpful.