Skip to content

Commit

Permalink
Merge pull request #187 from OCHA-DAP/bugfix/global-filtering
Browse files Browse the repository at this point in the history
Bugfix/global filtering
  • Loading branch information
b-j-mills authored Oct 17, 2024
2 parents b066623 + d465161 commit b531537
Show file tree
Hide file tree
Showing 7 changed files with 101 additions and 5 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

## [0.10.13] - 2024-10-17

### Added

- Added global food security data with additional p-code mappings

## [0.10.12] - 2024-10-16

### Fixed
Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ hdx-python-api==6.3.4
# via
# hapi-pipelines (pyproject.toml)
# hdx-python-scraper
hdx-python-country==3.8.1
hdx-python-country==3.8.2
# via
# hapi-pipelines (pyproject.toml)
# hdx-python-api
Expand Down Expand Up @@ -228,7 +228,7 @@ ruamel-yaml==0.18.6
# via hdx-python-utilities
ruamel-yaml-clib==0.2.8
# via ruamel-yaml
setuptools==75.1.0
setuptools==75.2.0
# via ckanapi
shellingham==1.5.4
# via typer
Expand Down
2 changes: 2 additions & 0 deletions src/hapi/pipelines/app/pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,7 @@ def output_food_security(self):
admins=self.admins,
adminone=self.adminone,
admintwo=self.admintwo,
countryiso3s=self.countries,
configuration=self.configuration,
)
food_security.populate()
Expand Down Expand Up @@ -311,6 +312,7 @@ def output_funding(self):
funding = Funding(
session=self.session,
metadata=self.metadata,
countryiso3s=self.countries,
locations=self.locations,
configuration=self.configuration,
)
Expand Down
12 changes: 12 additions & 0 deletions src/hapi/pipelines/configs/core.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ admin1:
"CMR|Far-North": "CM004"
"CMR|West": "CM008"
"CPV|Santo Antao": "CV06"
"DJI|Djibouti Ville": "DJ04"
"ETH|B. Gumuz": "ET06"
"HTI|Nord-Ouest": "HT09"
"HTI|Sud": "HT07"
Expand All @@ -53,6 +54,7 @@ admin1:
"KEN|Meru South": "KE013"
"KEN|Moyale": "KE010"
"MDA|Tighina": "MD009"
"NAM|Khomas Region": "NA06"
"NGA|Abuja": "NG015"
"NIC|Atlantico Sur": "NI93"
"PAK|FATA": "PK5"
Expand Down Expand Up @@ -90,9 +92,13 @@ admin2:
"AF08|Onaba": "AF0805"
"AF14|Khost": "AF1401"
"AF23|Chaghcharan": "AF2301"
"AO09|Município dos Gambos (ex-chiange)": "AO09073"
"AO16|Mocamedes": "AO16139"
"CD43|Ville de Gbadolite": "CD4301"
"CD61|Beni-ville": "CD6109"
"CD83|Territoire de LODJA": "CD8303"
"CO08|Distrito Especial, Industrial Y Portuario De Barranquilla": "CO08001"
"DOM|Santiago": "DO0303"
"ET01|C. TIGRAY": "ET0102"
"ET01|NW. TIGRAY": "ET0101"
"ET02|ZONE1": "ET0201"
Expand Down Expand Up @@ -132,6 +138,7 @@ admin2:
"NG027|Munya": "NG027018"
"PH08|Samar": "PH08060"
"PK2|Mekran": "PK211"
"PK5|Dera Ismail Khan": "PK509"
"PS01|Ramallah and Albireh": "PS0130"
"RUS|Sankt-peterburg": "RU004002"
"SD01|Sharq El Nile": "SD01004"
Expand All @@ -144,6 +151,7 @@ admin2:
"TD11|La kabbia": "TD1102"
"TD17|Iriba": "TD1703"
"TD22|Tibest-Ouest": "TD2202"
"TG05|Kpendial- Ouest": "TG0513"
"TZ12|Mbozi": "TZ2606" # TODO: this unit is not getting picked up because of the mismatched admin1
"UA12|Apostolivskyi": "UA1206"
"UA12|Marhanetska": "UA1208"
Expand Down Expand Up @@ -271,6 +279,10 @@ admin2:
- "maguindanao"
- "meru south"
- "metropolitan manila"
- "GHA|ashanti"
- "GHA|bono"
- "GHA|bono east"
- "GHA|central"

orphan_admin2s:
SS0001: "SSD"
Expand Down
16 changes: 16 additions & 0 deletions src/hapi/pipelines/configs/food_security.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,30 @@ food_security:
# This is where all "Areas" are non-admin units and so there is only admin 1
# data available in "Level 1"
adm1_only:
- "DJI"
- "HTI"
- "MMR"
- "SOM"

# This is where "Level 1" is blank and there is only admin 2 data available
# in "Area" (usually blank "Level 1" means "Area" is admin 1 rather than 2)
adm2_only:
- "DOM"
- "GMB"
- "MWI"
- "PSE"
- "TZA"
- "UGA"

# This is where there is only admin 2 data available in "Level 1" whether
# "Area" is blank or not
adm2_in_level1:
- "LBN"

# This is where "Level 1" is non-admin units and so there is only admin 1
# data available in "Area"
adm1_in_area:
- "KEN"

# The errors below were picked up from the logging which outputs all the
# fuzzy matches
Expand Down
61 changes: 58 additions & 3 deletions src/hapi/pipelines/database/food_security.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from dataclasses import dataclass
from logging import getLogger
from typing import Dict, Optional, Set
from typing import Dict, List, Optional, Set

from hapi_schema.db_food_security import DBFoodSecurity
from hdx.api.configuration import Configuration
Expand Down Expand Up @@ -38,13 +38,15 @@ def __init__(
admins: admins.Admins,
adminone: AdminLevel,
admintwo: AdminLevel,
countryiso3s: List[str],
configuration: Configuration,
):
super().__init__(session)
self._metadata = metadata
self._admins = admins
self._adminone = adminone
self._admintwo = admintwo
self._countryiso3s = countryiso3s
self._configuration = configuration
self._country_status = {}

Expand Down Expand Up @@ -212,7 +214,23 @@ def process_subnational(
admin_level == "admintwo"
and countryiso3 in food_sec_config["adm1_only"]
):
return None
self._country_status[countryiso3] = (
"Level 1: Admin 1, Area: ignored"
)
adminoneinfo = self.get_adminoneinfo(
food_sec_config["adm_ignore_patterns"],
warnings,
dataset_name,
countryiso3,
row["Level 1"],
)
return self.get_adminone_admin2_ref(
food_sec_config,
warnings,
errors,
dataset_name,
adminoneinfo,
)
# The YAML configuration "adm2_only" specifies locations where
# "Level 1" is not populated and "Area" is admin 2. (These are
# exceptions since "Level 1" would normally be populated if "Area" is
Expand All @@ -234,6 +252,43 @@ def process_subnational(
adminoneinfo,
)

if countryiso3 in food_sec_config["adm2_in_level1"]:
row["Area"] = row["Level 1"]
row["Level 1"] = None
adminoneinfo = AdminInfo(countryiso3, "NOT GIVEN", "", None, False)
self._country_status[countryiso3] = (
"Level 1: Admin 2, Area: ignored"
)
return self.get_admintwo_admin2_ref(
food_sec_config,
warnings,
errors,
dataset_name,
row,
adminoneinfo,
)

if countryiso3 in food_sec_config["adm1_in_area"]:
if admin_level == "adminone":
return None
self._country_status[countryiso3] = (
"Level 1: ignored, Area: Admin 1"
)
adminoneinfo = self.get_adminoneinfo(
food_sec_config["adm_ignore_patterns"],
warnings,
dataset_name,
countryiso3,
row["Area"],
)
return self.get_adminone_admin2_ref(
food_sec_config,
warnings,
errors,
dataset_name,
adminoneinfo,
)

adminone_name = row["Level 1"]

if not adminone_name:
Expand Down Expand Up @@ -334,7 +389,7 @@ def populate(self) -> None:
if "#" in row["Date of analysis"]: # ignore HXL row
continue
countryiso3 = row["Country"]
if countryiso3 not in self._configuration["HAPI_countries"]:
if countryiso3 not in self._countryiso3s:
continue
provider_admin1_name = get_provider_name(row, "Level 1")
provider_admin2_name = get_provider_name(row, "Area")
Expand Down
5 changes: 5 additions & 0 deletions src/hapi/pipelines/database/funding.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Functions specific to the funding theme."""

from logging import getLogger
from typing import List

from hapi_schema.db_funding import DBFunding
from hdx.api.configuration import Configuration
Expand All @@ -21,11 +22,13 @@ def __init__(
self,
session: Session,
metadata: Metadata,
countryiso3s: List[str],
locations: locations,
configuration: Configuration,
):
super().__init__(session)
self._metadata = metadata
self._countryiso3s = countryiso3s
self._locations = locations
self._configuration = configuration

Expand All @@ -45,6 +48,8 @@ def populate(self) -> None:
if dataset["archived"]:
continue
admin_code = dataset.get_location_iso3s()[0]
if admin_code not in self._countryiso3s:
continue
resource = [
r
for r in dataset.get_resources()
Expand Down

0 comments on commit b531537

Please sign in to comment.