From d149c5cf7a6cbd83a05cfc85e955ae8082fe3f10 Mon Sep 17 00:00:00 2001 From: Jan Baykara Date: Sun, 5 Jan 2025 19:28:35 +0000 Subject: [PATCH] Re-geocode if the geocoder has changed (e.g. bugfix, new version, etc.) --- hub/data_imports/geocoding_config.py | 69 ++++++++++++++----- ..._data_genericdata_geocode_data_and_more.py | 6 +- hub/models.py | 16 +---- 3 files changed, 58 insertions(+), 33 deletions(-) diff --git a/hub/data_imports/geocoding_config.py b/hub/data_imports/geocoding_config.py index e8a949810..7fc4e812c 100644 --- a/hub/data_imports/geocoding_config.py +++ b/hub/data_imports/geocoding_config.py @@ -1,5 +1,6 @@ import logging import re +from enum import Enum from typing import TYPE_CHECKING from django.conf import settings @@ -30,6 +31,22 @@ def find_config_item(source: "ExternalDataSource", key: str, value, default=None ) +# enum of geocoders: postcodes_io, mapbox, google +class Geocoder(Enum): + POSTCODES_IO = "postcodes_io" + FINDTHATPOSTCODE = "findthatpostcode" + MAPBOX = "mapbox" + GOOGLE = "google" + AREA_GEOCODER_V2 = "AREA_GEOCODER_V2" + ADDRESS_GEOCODER_V2 = "ADDRESS_GEOCODER_V2" + COORDINATE_GEOCODER_V1 = "COORDINATE_GEOCODER_V1" + + +LATEST_AREA_GEOCODER = Geocoder.AREA_GEOCODER_V2 +LATEST_ADDRESS_GEOCODER = Geocoder.ADDRESS_GEOCODER_V2 +LATEST_COORDINATE_GEOCODER = Geocoder.COORDINATE_GEOCODER_V1 + + def get_config_item_value( source: "ExternalDataSource", config_item, record, default=None ): @@ -64,10 +81,29 @@ async def import_record( data_type: "DataType", loaders: "Loaders", ): - from hub.models import ExternalDataSource, GenericData, Geocoder + from hub.models import ExternalDataSource, GenericData - update_data = get_update_data(source, record) id = source.get_record_id(record) + update_data = get_update_data(source, record) + update_data["geocode_data"] = update_data.get("geocode_data", {}) + update_data["geocode_data"]["config"] = source.geocoding_config + + # Try to identify the appropriate geocoder + geocoder: Geocoder = None + geocoding_config_type = source.geocoding_config.get("type", None) + importer_fn = None + if geocoding_config_type == ExternalDataSource.GeographyTypes.AREA: + geocoder = LATEST_AREA_GEOCODER + importer_fn = import_area_data + elif geocoding_config_type == ExternalDataSource.GeographyTypes.ADDRESS: + geocoder = LATEST_ADDRESS_GEOCODER + importer_fn = import_address_data + elif geocoding_config_type == ExternalDataSource.GeographyTypes.COORDINATES: + geocoder = LATEST_COORDINATE_GEOCODER + importer_fn = import_coordinate_data + else: + logger.debug(source.geocoding_config) + raise ValueError("geocoding_config is not a valid type") # check if geocoding_config and dependent fields are the same; if so, skip geocoding try: @@ -86,6 +122,8 @@ async def import_record( and are_dicts_equal( generic_data.geocode_data["config"], source.geocoding_config ) + # Add geocoding code versions are the same + and generic_data.geocoder == geocoder.value ): # Then, if so, check if the data has changed geocoding_field_values = set() @@ -112,21 +150,8 @@ async def import_record( except GenericData.DoesNotExist: # logger.debug("Generic Data doesn't exist, no equality check to be done", id) pass - update_data["geocode_data"] = update_data.get("geocode_data", {}) - update_data["geocode_data"]["config"] = source.geocoding_config - update_data["geocode_data"]["skipped"] = False - update_data["geocoder"] = Geocoder.GEOCODING_CONFIG.value - geocoding_config_type = source.geocoding_config.get("type", None) - if geocoding_config_type == ExternalDataSource.GeographyTypes.AREA: - importer_fn = import_area_data - elif geocoding_config_type == ExternalDataSource.GeographyTypes.ADDRESS: - importer_fn = import_address_data - elif geocoding_config_type == ExternalDataSource.GeographyTypes.COORDINATES: - importer_fn = import_coordinate_data - else: - logger.debug(source.geocoding_config) - raise ValueError("geocoding_config is not a valid type") + update_data["geocode_data"]["skipped"] = False return await importer_fn( record=record, @@ -144,7 +169,9 @@ async def import_area_data( loaders: "Loaders", update_data: dict, ): - from hub.models import Area, GenericData, Geocoder + from hub.models import Area, GenericData + + update_data["geocoder"] = LATEST_AREA_GEOCODER.value # Filter down geographies by the config parent_area = None @@ -393,7 +420,9 @@ async def import_address_data( Used to batch-import data. """ - from hub.models import GenericData, Geocoder + from hub.models import GenericData + + update_data["geocoder"] = LATEST_ADDRESS_GEOCODER.value point = None address_data = None @@ -546,7 +575,9 @@ async def import_coordinate_data( loaders: "Loaders", update_data: dict, ): - from hub.models import GenericData, Geocoder + from hub.models import GenericData + + update_data["geocoder"] = LATEST_COORDINATE_GEOCODER.value steps = [] diff --git a/hub/migrations/0130_rename_osm_data_genericdata_geocode_data_and_more.py b/hub/migrations/0130_rename_osm_data_genericdata_geocode_data_and_more.py index 68745284e..527ad0187 100644 --- a/hub/migrations/0130_rename_osm_data_genericdata_geocode_data_and_more.py +++ b/hub/migrations/0130_rename_osm_data_genericdata_geocode_data_and_more.py @@ -1,6 +1,8 @@ # Generated by Django 4.2.11 on 2024-06-10 20:00 from django.db import migrations, models +import hub.data_imports +import hub.data_imports.geocoding_config import hub.models @@ -31,7 +33,9 @@ class Migration(migrations.Migration): name="geocoder", field=models.CharField( blank=True, - default=hub.models.Geocoder["POSTCODES_IO"].value, + default=hub.data_imports.geocoding_config.Geocoder[ + "POSTCODES_IO" + ].value, max_length=1000, null=True, ), diff --git a/hub/models.py b/hub/models.py index fb5d9878f..adfa7a31c 100644 --- a/hub/models.py +++ b/hub/models.py @@ -5,7 +5,6 @@ import math import uuid from datetime import datetime, timedelta, timezone -from enum import Enum from typing import List, Optional, Self, Type, TypedDict, Union from urllib.parse import urlencode, urljoin @@ -92,15 +91,6 @@ logger = get_simple_debug_logger(__name__) -# enum of geocoders: postcodes_io, mapbox, google -class Geocoder(Enum): - POSTCODES_IO = "postcodes_io" - FINDTHATPOSTCODE = "findthatpostcode" - MAPBOX = "mapbox" - GOOGLE = "google" - GEOCODING_CONFIG = "geocoding_config" - - class Organisation(models.Model): created_at = models.DateTimeField(auto_now_add=True) last_update = models.DateTimeField(auto_now=True) @@ -783,9 +773,7 @@ class GenericData(CommonData): public_url = models.URLField(max_length=2000, blank=True, null=True) social_url = models.URLField(max_length=2000, blank=True, null=True) geocode_data = JSONField(blank=True, null=True) - geocoder = models.CharField( - max_length=1000, blank=True, null=True, default=Geocoder.POSTCODES_IO.value - ) + geocoder = models.CharField(max_length=1000, blank=True, null=True) address = models.CharField(max_length=1000, blank=True, null=True) title = models.CharField(max_length=1000, blank=True, null=True) description = models.TextField(max_length=3000, blank=True, null=True) @@ -1608,6 +1596,8 @@ async def import_many(self, members: list): Copy data to this database for use in dashboarding features. """ + from hub.data_imports.geocoding_config import Geocoder + if not members: logger.error("import_many called with 0 records") return