Skip to content

Commit

Permalink
Allow geocoding by mapit type (e.g. for more niche geocoding)
Browse files Browse the repository at this point in the history
  • Loading branch information
janbaykara committed Dec 18, 2024
1 parent 405d339 commit 5db4d3b
Show file tree
Hide file tree
Showing 4 changed files with 126 additions and 45 deletions.
35 changes: 2 additions & 33 deletions hub/management/commands/import_areas.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,43 +8,12 @@
from tqdm import tqdm

from hub.models import Area, AreaType
from utils import mapit
from utils import mapit, mapit_types


class Command(BaseCommand):
help = "Import basic area information from Mapit"

boundary_types = [
{
"mapit_type": ["WMC"],
"name": "2023 Parliamentary Constituency",
"code": "WMC23",
"area_type": "Westminster Constituency",
"description": "Westminster Parliamentary Constituency boundaries, as created in 2023",
},
{
"mapit_type": ["LBO", "UTA", "COI", "LGD", "CTY", "MTD"],
"name": "Single Tier Councils",
"code": "STC",
"area_type": "Single Tier Council",
"description": "Single Tier Council",
},
{
"mapit_type": ["DIS", "NMD"],
"name": "District Councils",
"code": "DIS",
"area_type": "District Council",
"description": "District Council",
},
{
"mapit_type": ["COI", "CPW", "DIW", "LBW", "LGW", "MTW", "UTE", "UTW"],
"name": "Wards",
"code": "WD23",
"area_type": "Electoral Ward",
"description": "Electoral wards",
},
]

def add_arguments(self, parser):
parser.add_argument(
"-q", "--quiet", action="store_true", help="Silence progress bars."
Expand All @@ -58,7 +27,7 @@ def add_arguments(self, parser):

def handle(self, quiet: bool = False, all_names: bool = False, *args, **options):
self.mapit_client = mapit.MapIt()
for b_type in self.boundary_types:
for b_type in mapit_types.boundary_types:
areas = self.mapit_client.areas_of_type(b_type["mapit_type"])
area_type, created = AreaType.objects.get_or_create(
name=b_type["name"],
Expand Down
48 changes: 36 additions & 12 deletions hub/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from enum import Enum
from typing import List, Optional, Self, Type, TypedDict, Union
from urllib.parse import urlencode, urljoin
from utils import mapit_types

from django.conf import settings
from django.contrib.auth import get_user_model
Expand Down Expand Up @@ -1681,14 +1682,16 @@ async def create_import_record(record):

for item in self.geocoding_config:
parent_area = area
literal_area_type = item.get("type", None)
literal_lih_area_type__code = item.get("lih_area_type__code", None)
literal_mapit_type = item.get("mapit_type", None)
area_types = literal_lih_area_type__code or literal_mapit_type
literal_area_field = item.get("field", None)
raw_area_value = str(
self.get_record_field(record, literal_area_field)
)

if (
literal_area_type is None
area_types is None
or literal_area_field is None
or raw_area_value is None
):
Expand All @@ -1712,18 +1715,39 @@ async def create_import_record(record):
or lower_name
)

parsed_area_types = ensure_list(literal_area_type)
is_local_authority = (
"STC" in parsed_area_types or "DIS" in parsed_area_types
parsed_area_types = [
str(s).upper()
for s in ensure_list(area_types)
]

maybe_council = (
# Check if using LIH area types
literal_lih_area_type__code is not None
and any(
[
t
in mapit_types.LIH_COUNCIL_TYPES
for t in parsed_area_types
]
)
) or (
# Check if using MapIt types
literal_mapit_type is not None
and any(
[
t
in mapit_types.MAPIT_COUNCIL_TYPES
for t in parsed_area_types
]
)
)

# logger.debug(
# f"Searching for {searchable_name} via {literal_area_field} of type {literal_area_type}. is_local_authority? {is_local_authority}"
# )
qs = Area.objects.select_related("area_type")

qs = Area.objects.select_related("area_type").filter(
area_type__code__in=parsed_area_types
)
if literal_lih_area_type__code is not None:
qs = qs.filter(area_type__code__in=parsed_area_types)
elif literal_mapit_type is not None:
qs = qs.filter(mapit_type__in=parsed_area_types)

search_values = [
raw_area_value,
Expand All @@ -1737,7 +1761,7 @@ async def create_import_record(record):
""
]

if is_local_authority:
if maybe_council:
# Mapit stores councils with their type in the name
# e.g. https://mapit.mysociety.org/area/2641.html
suffixes += [
Expand Down
52 changes: 52 additions & 0 deletions hub/tests/test_external_data_source_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from hub.models import Area, LocalJSONSource
from hub.validation import validate_and_format_phone_number
from utils import mapit_types


class TestDateFieldParer(TestCase):
Expand Down Expand Up @@ -417,3 +418,54 @@ def test_geocoding_matches(self):
print("--Geocode data:", d.id, json.dumps(d.geocode_data, indent=4))
print("--Postcode data:", d.id, json.dumps(d.postcode_data, indent=4))
raise

def test_by_mapit_types(self):
"""
Geocoding should work identically on more granular mapit_types
"""

self.source.geocoding_config = [
{
"field": "council",
"mapit_type": mapit_types.MAPIT_COUNCIL_TYPES,
},
{"field": "ward", "mapit_type": mapit_types.MAPIT_WARD_TYPES},
]
self.source.save()

# re-generate GenericData records
async_to_sync(self.source.import_many)(self.source.data)

# test that the GenericData records have valid, formatted phone field
self.data = self.source.get_import_data()

success_count = 0
for d in self.data:
try:
try:
if d.json["ward"] is None:
self.assertIsNone(d.postcode_data, "None shouldn't geocode.")
continue
elif d.json["expected_area_gss"] is None:
self.assertIsNone(
d.postcode_data, "Expect MapIt to have failed."
)
continue
elif d.json["expected_area_gss"] is not None:
self.assertEqual(
d.geocode_data["data"]["area_fields"][
d.json["expected_area_type_code"]
],
d.json["expected_area_gss"],
)
except KeyError:
raise AssertionError("Expected geocoding data was missing.")
self.assertIsNotNone(d.postcode_data)
success_count += 1
print("Geocoding success rate:", success_count / len(self.data))
except AssertionError as e:
print(e)
print("Geocoding failed:", d.id, json.dumps(d.json, indent=4))
print("--Geocode data:", d.id, json.dumps(d.geocode_data, indent=4))
print("--Postcode data:", d.id, json.dumps(d.postcode_data, indent=4))
raise
36 changes: 36 additions & 0 deletions utils/mapit_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
MAPIT_STC_TYPES = ["LBO", "UTA", "COI", "LGD", "CTY", "MTD"]
MAPIT_DIS_TYPES = ["DIS", "NMD"]
MAPIT_COUNCIL_TYPES = MAPIT_STC_TYPES + MAPIT_DIS_TYPES
LIH_COUNCIL_TYPES = ["DIS", "STC"]
MAPIT_WARD_TYPES = ["COI", "CPW", "DIW", "LBW", "LGW", "MTW", "UTE", "UTW"]

boundary_types = [
{
"mapit_type": ["WMC"],
"name": "2023 Parliamentary Constituency",
"code": "WMC23",
"area_type": "Westminster Constituency",
"description": "Westminster Parliamentary Constituency boundaries, as created in 2023",
},
{
"mapit_type": MAPIT_STC_TYPES,
"name": "Single Tier Councils",
"code": "STC",
"area_type": "Single Tier Council",
"description": "Single Tier Council",
},
{
"mapit_type": MAPIT_DIS_TYPES,
"name": "District Councils",
"code": "DIS",
"area_type": "District Council",
"description": "District Council",
},
{
"mapit_type": MAPIT_WARD_TYPES,
"name": "Wards",
"code": "WD23",
"area_type": "Electoral Ward",
"description": "Electoral wards",
},
]

0 comments on commit 5db4d3b

Please sign in to comment.