Skip to content

Commit

Permalink
Read from hapi datasets
Browse files Browse the repository at this point in the history
  • Loading branch information
b-j-mills committed Feb 25, 2025
1 parent 80a9afe commit c016836
Show file tree
Hide file tree
Showing 13 changed files with 58,138 additions and 153 deletions.
1 change: 0 additions & 1 deletion src/hapi/pipelines/app/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,6 @@ def main(
"core.yaml",
"food_security.yaml",
"national_risk.yaml",
"refugees_and_returnees.yaml",
"wfp.yaml",
]
project_config_dict = load_yamls(project_configs)
Expand Down
41 changes: 28 additions & 13 deletions src/hapi/pipelines/app/pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
from hapi.pipelines.database.org_type import OrgType
from hapi.pipelines.database.population import Population
from hapi.pipelines.database.poverty_rate import PovertyRate
from hapi.pipelines.database.refugees_and_returnees import RefugeesAndReturnees
from hapi.pipelines.database.refugees import Refugees
from hapi.pipelines.database.returnees import Returnees
from hapi.pipelines.database.sector import Sector
from hapi.pipelines.database.wfp_commodity import WFPCommodity
from hapi.pipelines.database.wfp_market import WFPMarket
Expand Down Expand Up @@ -170,7 +171,6 @@ def _create_configurable_scrapers(
)

_create_configurable_scrapers("national_risk", "national")
_create_configurable_scrapers("refugees_and_returnees", "national")

def run(self):
self._runner.run()
Expand All @@ -180,6 +180,7 @@ def output_population(self):
population = Population(
session=self._session,
metadata=self._metadata,
locations=self._locations,
admins=self._admins,
configuration=self._configuration,
error_handler=self._error_handler,
Expand All @@ -200,6 +201,7 @@ def output_operational_presence(self):
operational_presence = OperationalPresence(
session=self._session,
metadata=self._metadata,
locations=self._locations,
admins=self._admins,
configuration=self._configuration,
error_handler=self._error_handler,
Expand Down Expand Up @@ -228,6 +230,7 @@ def output_humanitarian_needs(self):
humanitarian_needs = HumanitarianNeeds(
session=self._session,
metadata=self._metadata,
locations=self._locations,
admins=self._admins,
configuration=self._configuration,
error_handler=self._error_handler,
Expand All @@ -247,27 +250,36 @@ def output_national_risk(self):
)
national_risk.populate()

def output_refugees_and_returnees(self):
if (
not self._themes_to_run
or "refugees_and_returnees" in self._themes_to_run
):
results = self._runner.get_hapi_results(
self._configurable_scrapers["refugees_and_returnees"]
def output_refugees(self):
if not self._themes_to_run or "refugees" in self._themes_to_run:
refugees = Refugees(
session=self._session,
metadata=self._metadata,
locations=self._locations,
admins=self._admins,
configuration=self._configuration,
error_handler=self._error_handler,
)
refugees_and_returnees = RefugeesAndReturnees(
refugees.populate()

def output_returnees(self):
if not self._themes_to_run or "returnees" in self._themes_to_run:
returnees = Returnees(
session=self._session,
metadata=self._metadata,
locations=self._locations,
results=results,
admins=self._admins,
configuration=self._configuration,
error_handler=self._error_handler,
)
refugees_and_returnees.populate()
returnees.populate()

def output_idps(self):
if not self._themes_to_run or "idps" in self._themes_to_run:
idps = IDPs(
session=self._session,
metadata=self._metadata,
locations=self._locations,
admins=self._admins,
configuration=self._configuration,
error_handler=self._error_handler,
Expand All @@ -291,6 +303,7 @@ def output_poverty_rate(self):
poverty_rate = PovertyRate(
session=self._session,
metadata=self._metadata,
locations=self._locations,
admins=self._admins,
configuration=self._configuration,
error_handler=self._error_handler,
Expand All @@ -302,6 +315,7 @@ def output_conflict_event(self):
conflict_event = ConflictEvent(
session=self._session,
metadata=self._metadata,
locations=self._locations,
admins=self._admins,
configuration=self._configuration,
error_handler=self._error_handler,
Expand Down Expand Up @@ -350,7 +364,8 @@ def output(self):
self.output_food_security()
self.output_humanitarian_needs()
self.output_national_risk()
self.output_refugees_and_returnees()
self.output_refugees()
self.output_returnees()
self.output_idps()
self.output_funding()
self.output_poverty_rate()
Expand Down
19 changes: 15 additions & 4 deletions src/hapi/pipelines/database/hapi_dataset_uploader.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from abc import ABC
from logging import getLogger
from typing import Dict, Optional, Type
from typing import Dict, List, Optional, Type

from hapi_schema.utils.base import Base
from hdx.api.configuration import Configuration
Expand All @@ -11,7 +11,7 @@
from sqlalchemy.orm import Session

from ..utilities.batch_populate import batch_populate
from . import admins
from . import admins, locations
from hapi.pipelines.database.base_uploader import BaseUploader
from hapi.pipelines.database.metadata import Metadata

Expand All @@ -23,12 +23,14 @@ def __init__(
self,
session: Session,
metadata: Metadata,
locations: locations.Locations,
admins: admins.Admins,
configuration: Configuration,
error_handler: HDXErrorHandler,
):
super().__init__(session)
self._metadata = metadata
self._locations = locations
self._admins = admins
self._configuration = configuration
self._error_handler = error_handler
Expand All @@ -42,6 +44,7 @@ def hapi_populate(
hapi_table: Type[Base],
end_resource: Optional[int] = 1,
max_admin_level: int = 2,
location_headers: Optional[List[str]] = None,
):
log_name = name_suffix.replace("-", " ")
pipeline = []
Expand Down Expand Up @@ -73,7 +76,9 @@ def hapi_populate(
else:
output_str = dataset_id

countryiso3 = row["location_code"]
if location_headers is None:
location_headers = ["location_code"]
countryiso3 = row.get(location_headers[0])
resource_name = self._metadata.get_resource_name(resource_id)
if not resource_name:
dataset = reader.read_dataset(
Expand Down Expand Up @@ -136,7 +141,13 @@ def hapi_populate(
row["provider_admin1_name"] or ""
)
else:
output_row["location_ref"] = countryiso3
for location_header in location_headers:
countryiso3 = row[location_header]
output_header = location_header.replace(
"_code", "_ref"
)
location_ref = self._locations.data[countryiso3]
output_row[output_header] = location_ref

self.populate_row(output_row, row)
output_rows.append(output_row)
Expand Down
2 changes: 1 addition & 1 deletion src/hapi/pipelines/database/idps.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Functions specific to the refugees theme."""
"""Functions specific to the idps theme."""

from logging import getLogger
from typing import Dict
Expand Down
29 changes: 29 additions & 0 deletions src/hapi/pipelines/database/refugees.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
"""Functions specific to the refugees theme."""

from logging import getLogger
from typing import Dict

from hapi_schema.db_refugees import DBRefugees

from .hapi_dataset_uploader import HapiDatasetUploader

logger = getLogger(__name__)


class Refugees(HapiDatasetUploader):
def populate_row(self, output_row: Dict, row: Dict) -> None:
output_row["population_group"] = row["population_group"]
output_row["gender"] = row["gender"]
output_row["age_range"] = row["age_range"]
output_row["min_age"] = row["min_age"] and int(row["min_age"])
output_row["max_age"] = row["max_age"] and int(row["max_age"])
output_row["population"] = row["population"]

def populate(self) -> None:
self.hapi_populate(
"refugees",
DBRefugees,
end_resource=None,
max_admin_level=0,
location_headers=["origin_location_code", "asylum_location_code"],
)
128 changes: 0 additions & 128 deletions src/hapi/pipelines/database/refugees_and_returnees.py

This file was deleted.

28 changes: 28 additions & 0 deletions src/hapi/pipelines/database/returnees.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
"""Functions specific to the returnees theme."""

from logging import getLogger
from typing import Dict

from hapi_schema.db_returnees import DBReturnees

from .hapi_dataset_uploader import HapiDatasetUploader

logger = getLogger(__name__)


class Returnees(HapiDatasetUploader):
def populate_row(self, output_row: Dict, row: Dict) -> None:
output_row["population_group"] = row["population_group"]
output_row["gender"] = row["gender"]
output_row["age_range"] = row["age_range"]
output_row["min_age"] = row["min_age"] and int(row["min_age"])
output_row["max_age"] = row["max_age"] and int(row["max_age"])
output_row["population"] = row["population"]

def populate(self) -> None:
self.hapi_populate(
"returnees",
DBReturnees,
max_admin_level=0,
location_headers=["origin_location_code", "asylum_location_code"],
)

Large diffs are not rendered by default.

Loading

0 comments on commit c016836

Please sign in to comment.