Skip to content

Commit

Permalink
Merge pull request #229 from OCHA-DAP/hapi-population
Browse files Browse the repository at this point in the history
Hapi population dataset
  • Loading branch information
b-j-mills authored Feb 21, 2025
2 parents fdda193 + f26aa6f commit 8967bbc
Show file tree
Hide file tree
Showing 12 changed files with 12,935 additions and 62,667 deletions.
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ attrs==25.1.0
# jsonlines
# jsonschema
# referencing
cachetools==5.5.1
cachetools==5.5.2
# via google-auth
certifi==2025.1.31
# via requests
Expand Down Expand Up @@ -218,7 +218,7 @@ rfc3986==2.0.0
# via frictionless
rich==13.9.4
# via typer
rpds-py==0.22.3
rpds-py==0.23.0
# via
# jsonschema
# referencing
Expand Down
135 changes: 17 additions & 118 deletions src/hapi/pipelines/database/population.py
Original file line number Diff line number Diff line change
@@ -1,131 +1,30 @@
"""Functions specific to the population theme."""

from logging import getLogger
from typing import Dict

from hapi_schema.db_population import DBPopulation
from hdx.api.configuration import Configuration
from hdx.api.utilities.hdx_error_handler import HDXErrorHandler
from hdx.scraper.framework.utilities.reader import Read
from hdx.utilities.dateparse import parse_date_range
from sqlalchemy.orm import Session

from ..utilities.batch_populate import batch_populate
from ..utilities.provider_admin_names import get_provider_name
from . import admins
from .base_uploader import BaseUploader
from .metadata import Metadata
from .hapi_dataset_uploader import HapiDatasetUploader

logger = getLogger(__name__)


class Population(BaseUploader):
def __init__(
self,
session: Session,
metadata: Metadata,
admins: admins.Admins,
configuration: Configuration,
error_handler: HDXErrorHandler,
):
super().__init__(session)
self._metadata = metadata
self._admins = admins
self._configuration = configuration
self._error_handler = error_handler

def get_admin2_ref(self, row, headers, dataset_name, admin_level):
countryiso3 = row[headers.index("#country+code")]
if admin_level == "national":
admin_code = countryiso3
if admin_level == "adminone":
admin_code = row[headers.index("#adm1+code")]
if admin_level == "admintwo":
admin_code = row[headers.index("#adm2+code")]
admin2_code = admins.get_admin2_code_based_on_level(
admin_code=admin_code, admin_level=admin_level
class Population(HapiDatasetUploader):
def populate_row(self, output_row: Dict, row: Dict) -> None:
output_row["gender"] = row["gender"]
output_row["age_range"] = row["age_range"]
output_row["min_age"] = (
int(float(row["min_age"])) if row["min_age"] else None
)
output_row["max_age"] = (
int(float(row["max_age"])) if row["max_age"] else None
)
admin2_ref = self._admins.admin2_data.get(admin2_code)
if admin2_ref is None:
if admin_level == "adminone":
admin_code = admins.get_admin1_to_location_connector_code(
countryiso3
)
elif admin_level == "admintwo":
admin_code = admins.get_admin2_to_location_connector_code(
countryiso3
)
else:
return None
admin2_ref = self._admins.get_admin2_ref(
admin_level,
admin_code,
dataset_name,
"Population",
self._error_handler,
)
return admin2_ref
output_row["population"] = int(row["population"])

def populate(self) -> None:
logger.info("Populating population table")
reader = Read.get_reader("hdx")
dataset = reader.read_dataset("cod-ps-global", self._configuration)
self._metadata.add_dataset(dataset)
dataset_id = dataset["id"]
dataset_name = dataset["name"]
for resource in dataset.get_resources():
resource_id = resource["id"]
resource_name = resource["name"]
admin_level = _get_admin_level(resource_name)
if not admin_level:
continue
self._metadata.add_resource(dataset_id, resource)
url = resource["url"]
headers, rows = reader.get_tabular_rows(url, headers=2)
population_rows = []
for row in rows:
admin2_ref = self.get_admin2_ref(
row, headers, dataset_name, admin_level
)
gender = row[headers.index("#gender")]
age_range = row[headers.index("#age+range")]
min_age = row[headers.index("#age+min")]
max_age = row[headers.index("#age+max")]
population = row[headers.index("#population")]
reference_year = row[headers.index("#date+year")]
time_period_range = parse_date_range(reference_year, "%Y")
provider_admin1_name = get_provider_name(
row,
"#adm1+name",
headers,
)
provider_admin2_name = get_provider_name(
row,
"#adm2+name",
headers,
)
population_row = dict(
resource_hdx_id=resource_id,
admin2_ref=admin2_ref,
provider_admin1_name=provider_admin1_name,
provider_admin2_name=provider_admin2_name,
gender=gender,
age_range=age_range,
min_age=min_age,
max_age=max_age,
population=int(population),
reference_period_start=time_period_range[0],
reference_period_end=time_period_range[1],
)
population_rows.append(population_row)
batch_populate(population_rows, self._session, DBPopulation)


def _get_admin_level(resource_name: str) -> str or None:
admin_level = resource_name.split(".")[0][-1]
if admin_level == "0":
return "national"
if admin_level == "1":
return "adminone"
if admin_level == "2":
return "admintwo"
return None
self.hapi_populate(
"population",
DBPopulation,
end_resource=2,
)

Large diffs are not rendered by default.

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion tests/fixtures/input/cod-ps-global.json

This file was deleted.

Loading

0 comments on commit 8967bbc

Please sign in to comment.