Skip to content

Commit

Permalink
Merge pull request #89 from OCHA-DAP/HDXDSYS-688-refugees
Browse files Browse the repository at this point in the history
HDXDSYS-688 refugees
  • Loading branch information
b-j-mills authored May 15, 2024
2 parents 7717619 + a3ce19a commit 3f01931
Show file tree
Hide file tree
Showing 14 changed files with 42,639 additions and 84 deletions.
11 changes: 11 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,17 @@ All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

## [0.9.2] - 2024-05-15

### Added

- Refugees data and tests
- HXL tag parsing functions to get age and gender

### Changed

- Extended locations to global coverage

## [0.9.1] - 2024-05-14

### Changed
Expand Down
1 change: 1 addition & 0 deletions src/hapi/pipelines/app/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ def main(
"national_risk.yaml",
"operational_presence.yaml",
"population.yaml",
"refugees.yaml",
]
project_config_dict = load_yamls(project_configs)
project_config_dict = add_defaults(project_config_dict)
Expand Down
15 changes: 15 additions & 0 deletions src/hapi/pipelines/app/pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from hapi.pipelines.database.org import Org
from hapi.pipelines.database.org_type import OrgType
from hapi.pipelines.database.population import Population
from hapi.pipelines.database.refugees import Refugees
from hapi.pipelines.database.sector import Sector


Expand Down Expand Up @@ -153,6 +154,7 @@ def _create_configurable_scrapers(
"humanitarian_needs", "admintwo", adminlevel=self.admintwo
)
_create_configurable_scrapers("national_risk", "national")
_create_configurable_scrapers("refugees", "national")

def run(self):
self.runner.run()
Expand Down Expand Up @@ -239,3 +241,16 @@ def output(self):
results=results,
)
national_risk.populate()

if not self.themes_to_run or "refugees" in self.themes_to_run:
results = self.runner.get_hapi_results(
self.configurable_scrapers["refugees"]
)

refugees = Refugees(
session=self.session,
metadata=self.metadata,
locations=self.locations,
results=results,
)
refugees.populate()
91 changes: 91 additions & 0 deletions src/hapi/pipelines/configs/refugees.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
refugees_national:
refugees:
dataset: "unhcr-population-data-for-world"
resource: "Demographics and locations of forcibly displaced and stateless persons (Global)"
format: "csv"
headers: 1
use_hxl: True
filter_cols:
- "#indicator+population_type"
- "#country+code+origin"
- "#country+code+asylum"
prefilter: "#indicator+population_type in ['REF', 'OOC']"
admin:
- "#country+code+origin"
admin_exact: True
input:
- "#date+year"
- "#country+code+asylum"
- "#indicator+population_type"
- "#affected+f+infants+age_0_4"
- "#affected+f+children+age_5_11"
- "#affected+f+adolescents+age_12_17"
- "#affected+f+adults+age_18_59"
- "#affected+f+elderly+age_60"
- "#affected+f+unknown_age"
- "#affected+f+total"
- "#affected+m+infants+age_0_4"
- "#affected+m+children+age_5_11"
- "#affected+m+adolescents+age_12_17"
- "#affected+m+adults+age_18_59"
- "#affected+m+elderly+age_60"
- "#affected+m+unknown_age"
- "#affected+m+total"
- "#affected+all+total"
list:
- "#date+year"
- "#country+code+asylum"
- "#indicator+population_type"
- "#affected+f+infants+age_0_4"
- "#affected+f+children+age_5_11"
- "#affected+f+adolescents+age_12_17"
- "#affected+f+adults+age_18_59"
- "#affected+f+elderly+age_60"
- "#affected+f+unknown_age"
- "#affected+f+total"
- "#affected+m+infants+age_0_4"
- "#affected+m+children+age_5_11"
- "#affected+m+adolescents+age_12_17"
- "#affected+m+adults+age_18_59"
- "#affected+m+elderly+age_60"
- "#affected+m+unknown_age"
- "#affected+m+total"
- "#affected+all+total"
output:
- "#date+year"
- "#country+code+asylum"
- "#indicator+population_type"
- "#affected+f+infants+age_0_4"
- "#affected+f+children+age_5_11"
- "#affected+f+adolescents+age_12_17"
- "#affected+f+adults+age_18_59"
- "#affected+f+elderly+age_60plus"
- "#affected+f+unknown_age"
- "#affected+f+total"
- "#affected+m+infants+age_0_4"
- "#affected+m+children+age_5_11"
- "#affected+m+adolescents+age_12_17"
- "#affected+m+adults+age_18_59"
- "#affected+m+elderly+age_60plus"
- "#affected+m+unknown_age"
- "#affected+m+total"
- "#affected+all+total"
output_hxl:
- "#date+year"
- "#country+code+asylum"
- "#indicator+population_type"
- "#affected+f+infants+age_0_4"
- "#affected+f+children+age_5_11"
- "#affected+f+adolescents+age_12_17"
- "#affected+f+adults+age_18_59"
- "#affected+f+elderly+age_60plus"
- "#affected+f+unknown_age"
- "#affected+f+total"
- "#affected+m+infants+age_0_4"
- "#affected+m+children+age_5_11"
- "#affected+m+adolescents+age_12_17"
- "#affected+m+adults+age_18_59"
- "#affected+m+elderly+age_60plus"
- "#affected+m+unknown_age"
- "#affected+m+total"
- "#affected+all+total"
2 changes: 1 addition & 1 deletion src/hapi/pipelines/database/admins.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def _update_admin_table(
admin_filter = _AdminFilter(
source=self._libhxl_dataset,
desired_admin_level=desired_admin_level,
country_codes=list(self._locations.data.keys()),
country_codes=list(self._locations.hapi_countries),
)
for i, row in enumerate(admin_filter):
code = row.get("#adm+code")
Expand Down
47 changes: 6 additions & 41 deletions src/hapi/pipelines/database/humanitarian_needs.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
from hxl.model import Column, TagPattern
from sqlalchemy.orm import Session

from ..utilities.parse_tags import (
get_gender_and_age_range,
get_min_and_max_age,
)
from . import admins
from .base_uploader import BaseUploader
from .metadata import Metadata
Expand Down Expand Up @@ -56,10 +60,9 @@ def populate(self):
if not sector_code:
sector_code = "*"
sector_code = sector_code.upper()
gender = _get_gender(column)
# "#*+age0_4" "#*+age80plus"
age_range = _get_age_range(hxl_tag)
min_age, max_age = _get_min_and_max_age(age_range)
gender, age_range = get_gender_and_age_range(hxl_tag)
min_age, max_age = get_min_and_max_age(age_range)
# "#*+disabled"
disabled_marker = _get_disabled_marker(column)
# TODO: Will there be columns for able bodied?
Expand Down Expand Up @@ -110,16 +113,6 @@ def _get_population_status(col: Column) -> str:
return population_status


def _get_gender(col: Column) -> str:
gender_patterns = {
TagPattern.parse(f"#*+{g}"): g for g in ["f", "m", "x", "u", "o", "e"]
}
gender = match_column(col, gender_patterns)
if not gender:
gender = "*"
return gender


def _get_population_group(col: Column) -> str:
population_group_patterns = {
TagPattern.parse("#*+refugees"): "REF",
Expand All @@ -132,37 +125,9 @@ def _get_population_group(col: Column) -> str:
return population_group


def _get_age_range(hxl_tag: str) -> str:
age_component = hxl_tag.split("+")[-1]
age_range = "*"
if not age_component.startswith("age"):
return age_range
age_component = age_component[3:]
if age_component.endswith("plus"):
age_range = age_component[:-4] + "+"
else:
age_range = age_component.replace("_", "-")
return age_range


def _get_disabled_marker(col: Column) -> str:
disabled_marker = TagPattern.parse("#*+disabled").match(col)
if disabled_marker:
return "y"
if not disabled_marker:
return "*"


# TODO: this is duplicate code, either move to shared location or overwrite with new HNO pipeline
def _get_min_and_max_age(age_range: str) -> (int | None, int | None):
if age_range == "*":
return None, None
ages = age_range.split("-")
if len(ages) == 2:
# Format: 0-5
min_age, max_age = int(ages[0]), int(ages[1])
else:
# Format: 80+
min_age = int(age_range.replace("+", ""))
max_age = None
return min_age, max_age
4 changes: 1 addition & 3 deletions src/hapi/pipelines/database/locations.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,12 @@ def __init__(
country_name_overrides=configuration["country_name_overrides"],
country_name_mappings=configuration["country_name_mappings"],
)
self._hapi_countries = configuration["HAPI_countries"]
self.hapi_countries = configuration["HAPI_countries"]
self.data = {}

def populate(self):
for country in Country.countriesdata()["countries"].values():
code = country["#country+code+v_iso3"]
if code not in self._hapi_countries:
continue
location_row = DBLocation(
code=code,
name=country["#country+name+preferred"],
Expand Down
40 changes: 6 additions & 34 deletions src/hapi/pipelines/database/population.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
from hapi_schema.db_population import DBPopulation
from sqlalchemy.orm import Session

from ..utilities.parse_tags import (
get_gender_and_age_range,
get_min_and_max_age,
)
from . import admins
from .base_uploader import BaseUploader
from .metadata import Metadata
Expand Down Expand Up @@ -46,13 +50,10 @@ def populate(self):
raise ValueError(
f"HXL tag {hxl_tag} not in valid format"
)
gender, age_range = _get_gender_and_age_range_hxl_mapping(
gender, age_range = get_gender_and_age_range(
hxl_tag=hxl_tag
)
if age_range == "*":
min_age, max_age = None, None
else:
min_age, max_age = _get_min_and_max_age(age_range)
min_age, max_age = get_min_and_max_age(age_range)
for admin_code, value in values.items():
admin2_code = admins.get_admin2_code_based_on_level(
admin_code=admin_code, admin_level=admin_level
Expand Down Expand Up @@ -86,32 +87,3 @@ def _validate_gender_and_age_range_hxl_tag(hxl_tag: str) -> bool:
"""
# TODO: add tests for this (HAPI-159)
return bool(_HXL_PATTERN.match(hxl_tag))


def _get_gender_and_age_range_hxl_mapping(hxl_tag: str) -> (str, str):
components = hxl_tag.split("+")
gender = "*"
age_range = "*"
for component in components[1:]:
# components can only be age, gender, or the word "total"
if component.startswith("age_"):
age_component = component[4:]
if age_component.endswith("_plus"):
age_range = age_component[:-5] + "+"
else:
age_range = age_component.replace("_", "-")
elif component != "total":
gender = component
return gender, age_range


def _get_min_and_max_age(age_range: str) -> (int, int):
ages = age_range.split("-")
if len(ages) == 2:
# Format: 0-5
min_age, max_age = int(ages[0]), int(ages[1])
else:
# Format: 80+
min_age = int(age_range.replace("+", ""))
max_age = None
return min_age, max_age
Loading

0 comments on commit 3f01931

Please sign in to comment.