Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Tests/test_geocode.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def test_force_setup(self):
geo.force_setup()
cache_dir = geo.cache_manager.cache_dir
assert cache_dir.is_dir()
assert len([c for c in cache_dir.glob("*.p") if "gmaps" not in c.name]) == 15
assert len([c for c in cache_dir.glob("*.p") if "gmaps" not in c.name]) == 16

def test_geocode_llsoa(self):
"""
Expand Down
3 changes: 2 additions & 1 deletion geocode/geocode.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,8 @@ def reverse_geocode(self, latlons, entity, **kwargs):
version = kwargs.pop("version", "20250109")
return self.neso.reverse_geocode_gsp(latlons, version, **kwargs)
elif entity == "llsoa":
return self.ons_nrs.reverse_geocode_llsoa(latlons=latlons, **kwargs)
version = kwargs.pop("version", "2011")
return self.ons_nrs.reverse_geocode_llsoa(latlons, version, **kwargs)
elif entity == "nuts":
return self.eurostat.reverse_geocode_nuts(latlons=latlons, **kwargs)
else:
Expand Down
File renamed without changes.
Binary file added geocode/ons/nrs_2021.zip
Binary file not shown.
97 changes: 59 additions & 38 deletions geocode/ons_nrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,13 @@ def __init__(self, cache_manager, proxies=None, ssl_verify=True):
(NRS).
"""
self.cache_manager = cache_manager
data_dir = SCRIPT_DIR.joinpath("ons")
self.nrs_zipfile = data_dir.joinpath("nrs.zip")
self.constituency_lookup_file = data_dir.joinpath(
self.data_dir = SCRIPT_DIR.joinpath("ons")
self.nrs_zipfile = self.data_dir.joinpath("nrs_2011.zip")
self.constituency_lookup_file = self.data_dir.joinpath(
"constituency_centroids_Dec2020.psv"
)
self.lad_lookup_file = data_dir.joinpath("lad_centroids_May2021.psv")
self.pc_llsoa_zipfile = data_dir.joinpath(
self.lad_lookup_file = self.data_dir.joinpath("lad_centroids_May2021.psv")
self.pc_llsoa_zipfile = self.data_dir.joinpath(
"PCD_OA_LSOA_MSOA_LAD_MAY22_UK_LU.zip"
)
self.llsoa_lookup = None
Expand All @@ -68,11 +68,12 @@ def force_setup(self):
Function to setup all lookup files.
"""
self._load_llsoa_lookup()
self._load_llsoa_boundaries()
self._load_datazone_lookup()
self._load_constituency_lookup()
self._load_lad_lookup()
self._load_postcode_llsoa_lookup()
for version in ["2011", "2021"]:
self._load_llsoa_boundaries(version)

def _load_llsoa_lookup(self):
"""Load the lookup of LLSOA -> Population Weighted Centroid."""
Expand Down Expand Up @@ -133,22 +134,30 @@ def _load_llsoa_lookup(self):
)
return llsoa_lookup

def _load_llsoa_boundaries_engwales_regions(self):
def _load_llsoa_boundaries_engwales_regions(self, version: str):
"""
Load the LLSOA boundaries, either from local cache if available, else fetch from raw API
Load the LLSOA boundaries, either from local cache if available, else fetch from raw API.

Parameters
----------
`version` : str
The version of the LLSOA boundaries to load.
"""
logging.info(
"Extracting the LLSOA boundary data from ONS (this only needs to be "
"done once)"
)
ons_url = "https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/Lower_Layer_Super_Output_Areas_Dec_2011_Boundaries_Full_Extent_BFE_EW_V3_2022/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson"
ons_url = {
"2011": "https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/Lower_Layer_Super_Output_Areas_Dec_2011_Boundaries_Full_Extent_BFE_EW_V3_2022/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson",
"2021": "https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/Lower_layer_Super_Output_Areas_December_2021_Boundaries_EW_BFC_V10/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson",
}
pages = utils._fetch_from_ons_api(
ons_url, proxies=self.proxies, ssl_verify=self.ssl_verify
ons_url[version], proxies=self.proxies, ssl_verify=self.ssl_verify
)
return gpd.GeoDataFrame(
{
"llsoa11cd": [
feature["properties"]["LSOA11CD"]
feature["properties"][f"LSOA{version[-2:]}CD"]
for page in pages
for feature in page["features"]
],
Expand All @@ -161,37 +170,45 @@ def _load_llsoa_boundaries_engwales_regions(self):
crs="EPSG:4326",
)

def _load_llsoa_boundaries_scots_regions(self):
"""Load the LLSOA boundaries for Scotland from the NRS zipfile."""
nrs_shp_file = "OutputArea2011_EoR_WGS84.shp"
nrs_dbf_file = "OutputArea2011_EoR_WGS84.dbf"
with zipfile.ZipFile(self.nrs_zipfile, "r") as nrs_zip:
with nrs_zip.open(nrs_shp_file, "r") as shp:
with nrs_zip.open(nrs_dbf_file, "r") as dbf:
sf = shapefile.Reader(shp=shp, dbf=dbf)
return gpd.GeoDataFrame(
{
"llsoa11cd": [sr.record[1] for sr in sf.shapeRecords()],
"geometry": [
shape(sr.shape.__geo_interface__).buffer(0)
for sr in sf.shapeRecords()
],
},
crs="EPSG:4326",
)
def _load_llsoa_boundaries_scots_regions(self, version: str):
"""
Load the LLSOA boundaries for Scotland from the NRS zipfile.

def _load_llsoa_boundaries(self):
Parameters
----------
`version` : str
The version of the LLSOA boundaries to load.
"""
zip_path = self.data_dir.joinpath(f"nrs_{version}.zip")
llsoa_filename = {
"2011": "OutputArea2011_EoR_WGS84.shp",
"2021": "OutputArea2022_EoR.shp",
}
gdf = gpd.read_file(f"zip://{zip_path}!{llsoa_filename[version]}")
if version == "2021":
gdf.set_crs("EPSG:27700", inplace=True)
gdf.to_crs("EPSG:4326", inplace=True)
gdf.set_crs("EPSG:4326", inplace=True)
return gdf[["code", "geometry"]].rename(columns={"code": "llsoa11cd"})

def _load_llsoa_boundaries(self, version: str):
"""
Load the LLSOA boundaries, either from local cache if available, else fetch from raw API
(England and Wales) and packaged data (Scotland).
Load the LLSOA boundaries.

Parameters
----------
`version` : str
The version of the LLSOA boundaries to load.
"""
cache_label = "llsoa_boundaries"
if version not in ["2011", "2021"]:
raise ValueError(f"LLSOA boundaries version {version} is not supported.")
cache_label = f"llsoa_boundaries_{version}"
llsoa_boundaries_cache_contents = self.cache_manager.retrieve(cache_label)
if llsoa_boundaries_cache_contents is not None:
logging.debug("Loading LLSOA boundaries from cache ('%s')", cache_label)
return llsoa_boundaries_cache_contents
llsoa_regions_engwales = self._load_llsoa_boundaries_engwales_regions()
llsoa_regions_scots = self._load_llsoa_boundaries_scots_regions()
llsoa_regions_engwales = self._load_llsoa_boundaries_engwales_regions(version)
llsoa_regions_scots = self._load_llsoa_boundaries_scots_regions(version)
llsoa_regions = pd.concat(
[llsoa_regions_engwales, llsoa_regions_scots]
).reset_index()
Expand Down Expand Up @@ -283,7 +300,11 @@ def geocode_llsoa(
return results

def reverse_geocode_llsoa(
self, latlons: List[Tuple[float, float]], datazones: bool = False, **kwargs
self,
latlons: List[Tuple[float, float]],
version: str,
datazones: bool = False,
**kwargs,
) -> List[str]:
"""
Reverse-geocode latitudes and longitudes to LLSOA.
Expand All @@ -303,11 +324,11 @@ def reverse_geocode_llsoa(
do not fall inside an LLSOA boundary will return None.
"""
if self.llsoa_regions is None:
self.llsoa_regions = self._load_llsoa_boundaries()
self.llsoa_regions = self._load_llsoa_boundaries(version)
results = utils.reverse_geocode(
latlons,
self.llsoa_regions.rename({"llsoa11cd": "region_id"}, axis=1),
**kwargs
**kwargs,
)
if datazones:
if self.dz_lookup is None:
Expand Down