Skip to content

Commit

Permalink
Merge pull request #4 from bushwickayudamutua/add/google-maps-api
Browse files Browse the repository at this point in the history
Add address normalization automation
  • Loading branch information
abelsonlive authored Jan 9, 2024
2 parents a307e79 + a521714 commit 30462ef
Show file tree
Hide file tree
Showing 13 changed files with 278 additions and 34 deletions.
9 changes: 7 additions & 2 deletions app/bam_app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from bam_core.utils.phone import format_phone_number
from bam_core.utils.email import format_email
from bam_core.utils.geo import format_address


app = FastAPI()
Expand All @@ -28,6 +29,8 @@ def clean_record(
email: str = None,
dns_check: bool = False,
address: str = None,
city_state: str = "",
zip_code: str = "",
):
"""
:param phone_number: The phone number to validate
Expand Down Expand Up @@ -60,8 +63,10 @@ def clean_record(

# validate mailing address
if address:
# TOOO: add address cleaning
pass
address_response = format_address(
address=address, city_state=city_state, zipcode=zip_code
)
response.update(address_response)
return response


Expand Down
7 changes: 7 additions & 0 deletions core/bam_core/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -581,3 +581,10 @@ class View(TypedDict):
# timeout_flag_value="Groceries Request Timeout",
# )
# ]


# Geolocation constants

# location of mayday used to help lookup addresses
MAYDAY_LOCATION = {"lat": 40.7041015, "lng": -73.9163523}
MAYDAY_RADIUS = 16093.44 # 10 miles in meters
4 changes: 4 additions & 0 deletions core/bam_core/functions/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from bam_core.lib.airtable import Airtable
from bam_core.lib.mailjet import Mailjet
from bam_core.lib.s3 import S3
from bam_core.lib.google import GoogleMaps
from bam_core.lib.nyc_planning_labs import NycPlanningLabs

log = logging.getLogger(__name__)

Expand All @@ -18,6 +20,8 @@ class Function(object):
mailjet = Mailjet()
airtable = Airtable()
s3 = S3()
gmaps = GoogleMaps()
nycpl = NycPlanningLabs()

def __init__(self, parser: Optional[ArgumentParser] = None):
self.parser = parser or ArgumentParser(
Expand Down
63 changes: 44 additions & 19 deletions core/bam_core/lib/airtable.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@
from bam_core.utils.etc import to_list
from bam_core.constants import (
AIRTABLE_DATETIME_FORMAT,
EG_REQUESTS_FIELD,
EG_STATUS_FIELD,
PHONE_FIELD,
ASSISTANCE_REQUESTS_TABLE_NAME,
ESSENTIAL_GOODS_TABLE_NAME,
Expand Down Expand Up @@ -65,7 +63,9 @@ def get_view_count(
:param table_name: The name of the table to get
:return Table
"""
records = self.get_view(table_name=table, view_name=view, fields=fields)
records = self.get_view(
table_name=table, view_name=view, fields=fields
)
if unique and len(fields) and len(records):
uniques = set()
for r in records:
Expand Down Expand Up @@ -245,7 +245,9 @@ def analyze_requests(
)
continue

delivered_tags = to_list(request_tag_schema.get("delivered", []))
delivered_tags = to_list(
request_tag_schema.get("delivered", [])
)
timeout_tags = to_list(request_tag_schema.get("timeout", []))
invalid_tags = to_list(request_tag_schema.get("invalid", []))
missed_tag = request_tag_schema.get("missed", None)
Expand Down Expand Up @@ -293,9 +295,13 @@ def analyze_requests(
sub_invalid_tags = to_list(
sub_request_tag_schema.get("invalid", [])
)
sub_missed_tag = sub_request_tag_schema.get("missed", None)
sub_missed_tag = sub_request_tag_schema.get(
"missed", None
)

sub_sub_item_schema = sub_request_tag_schema.get("items", None)
sub_sub_item_schema = sub_request_tag_schema.get(
"items", None
)

if not sub_sub_item_schema:
analysis = cls._perform_request_analysis(
Expand All @@ -315,32 +321,50 @@ def analyze_requests(
#########################

# handle doubly nested requests
sub_sub_request_field = sub_sub_item_schema["request_field"]
sub_sub_status_field = sub_sub_item_schema["status_field"]
sub_sub_request_tags = record.get(sub_sub_request_field, [])
sub_sub_status_tags = record.get(sub_sub_status_field, [])
sub_sub_request_field = sub_sub_item_schema[
"request_field"
]
sub_sub_status_field = sub_sub_item_schema[
"status_field"
]
sub_sub_request_tags = record.get(
sub_sub_request_field, []
)
sub_sub_status_tags = record.get(
sub_sub_status_field, []
)

for sub_sub_request_tag in sub_sub_request_tags:
sub_sub_request_tag_schema = sub_sub_item_schema[
"items"
].get(sub_sub_request_tag, None)
sub_sub_request_tag_schema = (
sub_sub_item_schema["items"].get(
sub_sub_request_tag, None
)
)
if not sub_sub_request_tag_schema:
log.warning(
f"Unknown request tag '{sub_sub_request_tag}' for field '{sub_sub_request_field}'"
)
continue

sub_sub_delivered_tags = to_list(
sub_sub_request_tag_schema.get("delivered", [])
sub_sub_request_tag_schema.get(
"delivered", []
)
)
sub_sub_timeout_tags = to_list(
sub_sub_request_tag_schema.get("timeout", [])
sub_sub_request_tag_schema.get(
"timeout", []
)
)
sub_sub_invalid_tags = to_list(
sub_sub_request_tag_schema.get("invalid", [])
sub_sub_request_tag_schema.get(
"invalid", []
)
)
sub_sub_missed_tag = sub_sub_request_tag_schema.get(
"missed", None
sub_sub_missed_tag = (
sub_sub_request_tag_schema.get(
"missed", None
)
)

analysis = cls._perform_request_analysis(
Expand All @@ -349,7 +373,8 @@ def analyze_requests(
sub_sub_request_tag,
sub_sub_status_tags,
# respect delivered tags from one level up (only relevant for Beds)
sub_delivered_tags + sub_sub_delivered_tags,
sub_delivered_tags
+ sub_sub_delivered_tags,
sub_sub_timeout_tags
+ sub_timeout_tags
+ timeout_tags,
Expand Down
49 changes: 49 additions & 0 deletions core/bam_core/lib/google.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import googlemaps

from bam_core.settings import GOOGLE_MAPS_API_KEY
from bam_core.constants import MAYDAY_LOCATION, MAYDAY_RADIUS


class GoogleMaps(object):
def __init__(self, api_key=GOOGLE_MAPS_API_KEY):
self.api_key = api_key

@property
def client(self):
return googlemaps.Client(key=self.api_key)

def get_place(
self,
address,
location=MAYDAY_LOCATION,
radius=MAYDAY_RADIUS,
types=["premise", "subpremise", "geocode"],
language="en-US",
strict_bounds=True,
):
"""
Get a place from the Google Maps API
Args:
address (str): The address to search for
location (tuple): The location to search around
radius (int): The radius to search within
types (list): The types of places to search for
language (str): The language to search in
strict_bounds (bool): Whether to use strict bounds
"""
return self.client.places_autocomplete(
address,
location=location,
radius=radius,
types=types,
language=language,
strict_bounds=strict_bounds,
)

def get_normalized_address(self, address):
"""
Normalize an address using the Google Maps API
Args:
address (str): The address to normalize
"""
return self.client.addressvalidation(address)
27 changes: 27 additions & 0 deletions core/bam_core/lib/nyc_planning_labs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from typing import Any, Dict
import requests


class NycPlanningLabs(object):
base_url = "https://geosearch.planninglabs.nyc/v2"

def __init__(self):
self.session = requests.Session()
self.session.headers.update(
{"Content-Type": "application/json", "Accept": "application/json"}
)

def search(self, text: str, size: int = 1) -> Dict[str, Any]:
"""
Search for a location in NYC using the geosearch API
Args:
text (str): The text to search for
size (int): The number of results to return
Returns:
Dict[str, Any]: The response from the geosearch API
"""
url = f"{self.base_url}/search"
params = {"text": text, "size": size}
response = self.session.get(url, params=params)
response.raise_for_status
return response.json()
3 changes: 3 additions & 0 deletions core/bam_core/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
MAILJET_API_KEY = os.getenv("BAM_MAILJET_API_KEY", None)
MAILJET_API_SECRET = os.getenv("BAM_MAILJET_API_SECRET", None)

# google settings
GOOGLE_MAPS_API_KEY = os.getenv("BAM_GOOGLE_MAPS_API_KEY", None)

# s3 settings
DO_TOKEN = os.getenv("BAM_DO_TOKEN", None)
S3_BASE_URL = os.getenv(
Expand Down
1 change: 0 additions & 1 deletion core/bam_core/utils/email.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,4 +220,3 @@ def format_email(email: str, dns_check: bool = False) -> Dict[str, str]:
return {"email": email_info.normalized, "error": ""}
except EmailNotValidError as e:
return {"email": email, "error": str(e)}

97 changes: 97 additions & 0 deletions core/bam_core/utils/geo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
from typing import Dict, Optional
from bam_core.lib.google import GoogleMaps
from bam_core.lib.nyc_planning_labs import NycPlanningLabs

COMMON_ZIPCODE_MISTAKES = {
"112007": "11207",
}


def _fix_zip_code(zip_code: Optional[str]) -> str:
"""
Attempt to fix common mistakes in zipcodes
"""
return COMMON_ZIPCODE_MISTAKES.get(zip_code, zip_code)


def format_address(
address: Optional[str] = None,
city_state: Optional[str] = "",
zipcode: Optional[str] = "",
) -> Dict[str, str]:
"""
Format an address using the Google Maps API and the NYC Planning Labs API
Args:
address (str): The address to format
city_state (str): The city and state to use if the address is missing
zipcode (str): The zipcode to use if the address is missing
Returns:
Dict[str, str]: The formatted address, bin, and accuracy
"""
# connect to APIs
gmaps = GoogleMaps()
nycpl = NycPlanningLabs()

response = {
"cleaned_address": "",
"bin": "",
"cleaned_address_accuracy": "No result",
}
# don't do anything for missing addresses
if not address or not address.strip():
return response

# format address for query
address_query = f"{address.strip()} {city_state.strip() or 'New York'} {_fix_zip_code(zipcode.strip())}".strip().upper()

# lookup address using Google Maps Places API
place_response = gmaps.get_place(address_query)
if not len(place_response):
return response

place_address = place_response[0]["description"]
if "subpremise" in place_response[0]["types"]:
response["cleaned_address_accuracy"] = "Apartment"
elif "premise" in place_response[0]["types"]:
response["cleaned_address_accuracy"] = "Building"
else:
# ignore geocode results if not at the level of a building or apartment
return response

# lookup the cleaned address using the google maps address validation api
norm_address_result = gmaps.get_normalized_address(place_address)

norm_address = norm_address_result.get("result", {})
## TODO: Figure out if we should report granularity from here or places API
# granularity = norm_address.get("verdict", {}).get("validationGranularity", "")
# if granularity == "SUB_PREMISE":
# response["cleaned_address_accuracy"] = "Apartment"
# elif granularity == "PREMISE":
# response["cleaned_address_accuracy"] = "Building"

usps_data = norm_address.get("uspsData", {}).get("standardizedAddress", {})
cleaned_address = (
usps_data.get("firstAddressLine", "")
+ " "
+ usps_data.get("cityStateZipAddressLine", "")
).strip()
if not cleaned_address:
# if no USPS data, use the formatted address
cleaned_address = (
norm_address.get("address", {}).get("formattedAddress", "").upper()
)
# if no formatted address, use the place address
if not cleaned_address:
cleaned_address = place_address.upper()
response["cleaned_address"] = cleaned_address

# lookup the bin using the nyc planning labs api
nycpl_response = nycpl.search(cleaned_address)
response["bin"] = (
nycpl_response.get("features", [{}])[0]
.get("properties", {})
.get("addendum", {})
.get("pad", {})
.get("bin", "")
)
return response
1 change: 1 addition & 0 deletions core/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ dependencies = [
"email-validator==2.0.0.post2",
"charset-normalizer==3.2.0",
"pytz==2023.3.post1",
"googlemaps==4.10.0",
]

[build-system]
Expand Down
Loading

0 comments on commit 30462ef

Please sign in to comment.