Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Validate phone number when importing external data [MAP-631] [MAP-77] #159

Merged
merged 18 commits into from
Dec 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions hub/migrations/0145_localjsonsource.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Generated by Django 4.2.11 on 2024-12-12 16:48

from django.db import migrations, models
import django.db.models.deletion
import django_jsonform.models.fields


class Migration(migrations.Migration):

dependencies = [
("hub", "0144_hubimage_description"),
]

operations = [
migrations.CreateModel(
name="LocalJSONSource",
fields=[
(
"externaldatasource_ptr",
models.OneToOneField(
auto_created=True,
on_delete=django.db.models.deletion.CASCADE,
parent_link=True,
primary_key=True,
serialize=False,
to="hub.externaldatasource",
),
),
(
"data",
django_jsonform.models.fields.JSONField(blank=True, default=list),
),
("id_field", models.CharField(default="id", max_length=250)),
],
options={
"verbose_name": "Test source",
},
bases=("hub.externaldatasource",),
),
]
104 changes: 103 additions & 1 deletion hub/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from django.contrib.gis.db.models import MultiPolygonField, PointField
from django.contrib.gis.geos import Point
from django.core.cache import cache
from django.core.exceptions import ValidationError
from django.db import models
from django.db.models import Avg, IntegerField, Max, Min, Q
from django.db.models.functions import Cast, Coalesce
Expand Down Expand Up @@ -73,6 +74,7 @@
refresh_pages,
refresh_webhooks,
)
from hub.validation import validate_and_format_phone_number
from hub.views.mapped import ExternalDataSourceWebhook
from utils import google_maps, google_sheets
from utils.log import get_simple_debug_logger
Expand Down Expand Up @@ -817,6 +819,21 @@ def get_postcode_data(self) -> Optional[PostcodesIOResult]:

return self.postcode_data

@cached_property
def external_data_source(self):
return self.data_type.data_set.external_data_source

def save(self, *args, **kwargs):
if self.phone:
try:
self.phone = validate_and_format_phone_number(
self.phone, self.external_data_source.countries
)
except ValidationError as e:
raise ValidationError({"phone": f"Invalid phone number: {e}"})

super().save(*args, **kwargs)


class Area(models.Model):
mapit_id = models.CharField(max_length=30)
Expand Down Expand Up @@ -1130,7 +1147,8 @@ def get_deduplication_hash(self) -> str:
hash_values = ["name"]
else:
hash_values = [
getattr(self, field) for field in self.get_deduplication_field_names()
str(getattr(self, field))
for field in self.get_deduplication_field_names()
]
return hashlib.md5("".join(hash_values).encode()).hexdigest()

Expand Down Expand Up @@ -1543,6 +1561,8 @@ def get_update_data(record):
value: datetime = parse_datetime(value)
if field == "can_display_point_field":
value = bool(value) # cast None value to False
if field == "phone_field":
value = validate_and_format_phone_number(value, self.countries)
update_data[field.removesuffix("_field")] = value

return update_data
Expand Down Expand Up @@ -2559,6 +2579,88 @@ def filter(self, filter: dict) -> dict:
raise NotImplementedError("Lookup not implemented for this data source type.")


class LocalJSONSource(ExternalDataSource):
"""
A test table.
"""

crm_type = "test"
has_webhooks = False
automated_webhooks = False
introspect_fields = False
default_data_type = None
data = JSONField(default=list, blank=True)
id_field = models.CharField(max_length=250, default="id")

class Meta:
verbose_name = "Test source"

@classmethod
def get_deduplication_field_names(self) -> list[str]:
return ["id"]

def healthcheck(self):
return True

@cached_property
def df(self):
return pd.DataFrame(self.data).set_index(self.id_field)

def field_definitions(self):
# get all keys from self.data
return [
self.FieldDefinition(label=col, value=col)
for col in self.df.columns.tolist()
]

def get_record_id(self, record: dict):
return record[self.id_field]

async def fetch_one(self, member_id):
return self.df[self.df[self.id_field] == member_id].to_dict(orient="records")[0]

async def fetch_many(self, id_list: list[str]):
return self.df[self.df[self.id_field].isin(id_list)].to_dict(orient="records")

async def fetch_all(self):
return self.df.to_dict(orient="records")

def get_record_field(self, record, field, field_type=None):
return get(record, field)

def get_record_dict(self, record):
return record

async def update_one(self, mapped_record, **kwargs):
id = self.get_record_id(mapped_record["member"])
data = mapped_record["update_fields"]
self.data = [
{**record, **data} if record[self.id_field] == id else record
for record in self.data
]
self.save()

async def update_many(self, mapped_records, **kwargs):
for mapped_record in mapped_records:
await self.update_one(mapped_record)

def delete_one(self, record_id):
self.data = [
record for record in self.data if record[self.id_field] != record_id
]
self.save()

def create_one(self, record):
self.data.append(record["data"])
self.save()
return record

def create_many(self, records):
self.data.extend([record["data"] for record in records])
self.save()
return records


class AirtableSource(ExternalDataSource):
"""
An Airtable table.
Expand Down
98 changes: 87 additions & 11 deletions hub/tests/test_source_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,93 @@

from django.test import TestCase

from utils.py import parse_datetime
from hub.models import LocalJSONSource
from hub.validation import validate_and_format_phone_number


class TestSourceParser(TestCase):
dates_that_should_work = [
["01/06/2024, 09:30", datetime(2024, 6, 1, 9, 30, tzinfo=timezone.utc)],
["15/06/2024, 09:30", datetime(2024, 6, 15, 9, 30, tzinfo=timezone.utc)],
["15/06/2024, 09:30:00", datetime(2024, 6, 15, 9, 30, 0, tzinfo=timezone.utc)],
["2023-12-20 06:00:00", datetime(2023, 12, 20, 6, 0, 0, tzinfo=timezone.utc)],
]

def test_dateparse(self):
for date in self.dates_that_should_work:
self.assertEqual(parse_datetime(date[0]), date[1])
async def test_date_field(self):
fixture = [
{
"id": "1",
"date": "01/06/2024, 09:30",
"expected": datetime(2024, 6, 1, 9, 30, tzinfo=timezone.utc),
},
{
"id": "2",
"date": "15/06/2024, 09:30",
"expected": datetime(2024, 6, 15, 9, 30, tzinfo=timezone.utc),
},
{
"id": "3",
"date": "15/06/2024, 09:30:00",
"expected": datetime(2024, 6, 15, 9, 30, 0, tzinfo=timezone.utc),
},
{
"id": "4",
"date": "2023-12-20 06:00:00",
"expected": datetime(2023, 12, 20, 6, 0, 0, tzinfo=timezone.utc),
},
]

source = await LocalJSONSource.objects.acreate(
name="date_test",
id_field="id",
start_time_field="date",
data=[
{
"id": d["id"],
"date": d["date"],
}
for d in fixture
],
)

# generate GenericData records
await source.import_many(source.data)

# test that the GenericData records have valid dates
data = source.get_import_data()

for e in fixture:
d = await data.aget(data=e["id"])
self.assertEqual(d.start_time, e["expected"])


class TestPhoneField(TestCase):
async def test_phone_field(self):
fixture = [
{"id": "bad1", "phone": "123456789", "expected": None},
{"id": "good1", "phone": "07123456789", "expected": "+447123456789"},
{"id": "good2", "phone": "+447123456789", "expected": "+447123456789"},
]

source = await LocalJSONSource.objects.acreate(
name="phone_test",
id_field="id",
phone_field="phone",
countries=["GB"],
data=[
{
"id": e["id"],
"phone": e["phone"],
}
for e in fixture
],
)

# generate GenericData records
await source.import_many(source.data)

# test that the GenericData records have valid, formatted phone field
data = source.get_import_data()

for e in fixture:
d = await data.aget(data=e["id"])
self.assertEqual(d.phone, e["expected"])
self.assertEqual(d.json["phone"], e["phone"])

def test_valid_phone_number_for_usa(self):
phone = "4155552671"
result = validate_and_format_phone_number(phone, ["US"])
self.assertEqual(result, "+14155552671")
22 changes: 22 additions & 0 deletions hub/validation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import phonenumbers
from phonenumbers.phonenumberutil import NumberParseException

from utils.py import ensure_list


def validate_and_format_phone_number(value, countries=[]):
"""
Validates and formats a phone number to E164 format if valid, otherwise returns None.
"""
countries = ensure_list(countries or [])
if len(countries) == 0:
countries = ["GB"]
try:
phone_number = phonenumbers.parse(value, countries[0])
if phonenumbers.is_valid_number(phone_number):
return phonenumbers.format_number(
phone_number, phonenumbers.PhoneNumberFormat.E164
)
except NumberParseException:
pass
return None
13 changes: 12 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ google-auth-oauthlib = "^1.2.0"
django-codemirror2 = "^0.2"
wagtail-color-panel = "^1.5.0"
dateparser = "^1.2.0"
phonenumbers = "8.13.51"

[tool.poetry.dev-dependencies]
django-debug-toolbar = "^4.3"
Expand Down
Loading