Skip to content

Commit

Permalink
Re-geocode if the geocoder has changed (e.g. bugfix, new version, etc.)
Browse files Browse the repository at this point in the history
  • Loading branch information
janbaykara committed Jan 5, 2025
1 parent 45724cf commit d149c5c
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 33 deletions.
69 changes: 50 additions & 19 deletions hub/data_imports/geocoding_config.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging
import re
from enum import Enum
from typing import TYPE_CHECKING

from django.conf import settings
Expand Down Expand Up @@ -30,6 +31,22 @@ def find_config_item(source: "ExternalDataSource", key: str, value, default=None
)


# enum of geocoders: postcodes_io, mapbox, google
class Geocoder(Enum):
POSTCODES_IO = "postcodes_io"
FINDTHATPOSTCODE = "findthatpostcode"
MAPBOX = "mapbox"
GOOGLE = "google"
AREA_GEOCODER_V2 = "AREA_GEOCODER_V2"
ADDRESS_GEOCODER_V2 = "ADDRESS_GEOCODER_V2"
COORDINATE_GEOCODER_V1 = "COORDINATE_GEOCODER_V1"


LATEST_AREA_GEOCODER = Geocoder.AREA_GEOCODER_V2
LATEST_ADDRESS_GEOCODER = Geocoder.ADDRESS_GEOCODER_V2
LATEST_COORDINATE_GEOCODER = Geocoder.COORDINATE_GEOCODER_V1


def get_config_item_value(
source: "ExternalDataSource", config_item, record, default=None
):
Expand Down Expand Up @@ -64,10 +81,29 @@ async def import_record(
data_type: "DataType",
loaders: "Loaders",
):
from hub.models import ExternalDataSource, GenericData, Geocoder
from hub.models import ExternalDataSource, GenericData

update_data = get_update_data(source, record)
id = source.get_record_id(record)
update_data = get_update_data(source, record)
update_data["geocode_data"] = update_data.get("geocode_data", {})
update_data["geocode_data"]["config"] = source.geocoding_config

# Try to identify the appropriate geocoder
geocoder: Geocoder = None
geocoding_config_type = source.geocoding_config.get("type", None)
importer_fn = None
if geocoding_config_type == ExternalDataSource.GeographyTypes.AREA:
geocoder = LATEST_AREA_GEOCODER
importer_fn = import_area_data
elif geocoding_config_type == ExternalDataSource.GeographyTypes.ADDRESS:
geocoder = LATEST_ADDRESS_GEOCODER
importer_fn = import_address_data
elif geocoding_config_type == ExternalDataSource.GeographyTypes.COORDINATES:
geocoder = LATEST_COORDINATE_GEOCODER
importer_fn = import_coordinate_data
else:
logger.debug(source.geocoding_config)
raise ValueError("geocoding_config is not a valid type")

# check if geocoding_config and dependent fields are the same; if so, skip geocoding
try:
Expand All @@ -86,6 +122,8 @@ async def import_record(
and are_dicts_equal(
generic_data.geocode_data["config"], source.geocoding_config
)
# Add geocoding code versions are the same
and generic_data.geocoder == geocoder.value
):
# Then, if so, check if the data has changed
geocoding_field_values = set()
Expand All @@ -112,21 +150,8 @@ async def import_record(
except GenericData.DoesNotExist:
# logger.debug("Generic Data doesn't exist, no equality check to be done", id)
pass
update_data["geocode_data"] = update_data.get("geocode_data", {})
update_data["geocode_data"]["config"] = source.geocoding_config
update_data["geocode_data"]["skipped"] = False
update_data["geocoder"] = Geocoder.GEOCODING_CONFIG.value

geocoding_config_type = source.geocoding_config.get("type", None)
if geocoding_config_type == ExternalDataSource.GeographyTypes.AREA:
importer_fn = import_area_data
elif geocoding_config_type == ExternalDataSource.GeographyTypes.ADDRESS:
importer_fn = import_address_data
elif geocoding_config_type == ExternalDataSource.GeographyTypes.COORDINATES:
importer_fn = import_coordinate_data
else:
logger.debug(source.geocoding_config)
raise ValueError("geocoding_config is not a valid type")
update_data["geocode_data"]["skipped"] = False

return await importer_fn(
record=record,
Expand All @@ -144,7 +169,9 @@ async def import_area_data(
loaders: "Loaders",
update_data: dict,
):
from hub.models import Area, GenericData, Geocoder
from hub.models import Area, GenericData

update_data["geocoder"] = LATEST_AREA_GEOCODER.value

# Filter down geographies by the config
parent_area = None
Expand Down Expand Up @@ -393,7 +420,9 @@ async def import_address_data(
Used to batch-import data.
"""
from hub.models import GenericData, Geocoder
from hub.models import GenericData

update_data["geocoder"] = LATEST_ADDRESS_GEOCODER.value

point = None
address_data = None
Expand Down Expand Up @@ -546,7 +575,9 @@ async def import_coordinate_data(
loaders: "Loaders",
update_data: dict,
):
from hub.models import GenericData, Geocoder
from hub.models import GenericData

update_data["geocoder"] = LATEST_COORDINATE_GEOCODER.value

steps = []

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# Generated by Django 4.2.11 on 2024-06-10 20:00

from django.db import migrations, models
import hub.data_imports
import hub.data_imports.geocoding_config
import hub.models


Expand Down Expand Up @@ -31,7 +33,9 @@ class Migration(migrations.Migration):
name="geocoder",
field=models.CharField(
blank=True,
default=hub.models.Geocoder["POSTCODES_IO"].value,
default=hub.data_imports.geocoding_config.Geocoder[
"POSTCODES_IO"
].value,
max_length=1000,
null=True,
),
Expand Down
16 changes: 3 additions & 13 deletions hub/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import math
import uuid
from datetime import datetime, timedelta, timezone
from enum import Enum
from typing import List, Optional, Self, Type, TypedDict, Union
from urllib.parse import urlencode, urljoin

Expand Down Expand Up @@ -92,15 +91,6 @@
logger = get_simple_debug_logger(__name__)


# enum of geocoders: postcodes_io, mapbox, google
class Geocoder(Enum):
POSTCODES_IO = "postcodes_io"
FINDTHATPOSTCODE = "findthatpostcode"
MAPBOX = "mapbox"
GOOGLE = "google"
GEOCODING_CONFIG = "geocoding_config"


class Organisation(models.Model):
created_at = models.DateTimeField(auto_now_add=True)
last_update = models.DateTimeField(auto_now=True)
Expand Down Expand Up @@ -783,9 +773,7 @@ class GenericData(CommonData):
public_url = models.URLField(max_length=2000, blank=True, null=True)
social_url = models.URLField(max_length=2000, blank=True, null=True)
geocode_data = JSONField(blank=True, null=True)
geocoder = models.CharField(
max_length=1000, blank=True, null=True, default=Geocoder.POSTCODES_IO.value
)
geocoder = models.CharField(max_length=1000, blank=True, null=True)
address = models.CharField(max_length=1000, blank=True, null=True)
title = models.CharField(max_length=1000, blank=True, null=True)
description = models.TextField(max_length=3000, blank=True, null=True)
Expand Down Expand Up @@ -1608,6 +1596,8 @@ async def import_many(self, members: list):
Copy data to this database for use in dashboarding features.
"""

from hub.data_imports.geocoding_config import Geocoder

if not members:
logger.error("import_many called with 0 records")
return
Expand Down

0 comments on commit d149c5c

Please sign in to comment.