diff --git a/src/hapi/pipelines/configs/humanitarian_needs.yaml b/src/hapi/pipelines/configs/humanitarian_needs.yaml index 244e8fcd..27dc3ae4 100755 --- a/src/hapi/pipelines/configs/humanitarian_needs.yaml +++ b/src/hapi/pipelines/configs/humanitarian_needs.yaml @@ -24,7 +24,7 @@ humanitarian_needs_admintwo: output_hxl: - "#inneed+total" - "#targeted+total" - humanitarian_needs_yem_hno: + humanitarian_needs_yem_inneed: dataset: "yemen-humanitarian-needs-overview" resource: "YEM_PIN_2023.xlsx" format: "xlsx" @@ -47,6 +47,49 @@ humanitarian_needs_admintwo: - "#inneed +wsh +men" - "#inneed +wsh +girls" - "#inneed +wsh +women" + - "#inneed +shl" + - "#inneed +shl +boys" + - "#inneed +shl +men" + - "#inneed +shl +girls" + - "#inneed +shl +women" + - "#inneed +nut" + - "#inneed +nut +boys" + - "#inneed +nut +men" + - "#inneed +nut +girls" + - "#inneed +nut +women" + - "#inneed +edu" + - "#inneed +edu +boys" + - "#inneed +edu +men" + - "#inneed +edu +girls" + - "#inneed +edu +women" + - "#inneed +fsac" + - "#inneed +fsac +boys" + - "#inneed +fsac +men" + - "#inneed +fsac +girls" + - "#inneed +fsac +women" + - "#inneed +cccm" + - "#inneed +cccm +boys" + - "#inneed +cccm +men" + - "#inneed +cccm +girls" + - "#inneed +cccm +women" + - "#inneed +hea" + - "#inneed +hea +boys" + - "#inneed +hea +men" + - "#inneed +hea +girls" + - "#inneed +hea +women" + - "#inneed +pro" + - "#inneed +pro +boys" + - "#inneed +pro +men" + - "#inneed +pro +girls" + - "#inneed +pro +women" + - "#inneed +GBV" + - "#inneed +GBV +boys" + - "#inneed +GBV +men" + - "#inneed +GBV +girls" + - "#inneed +GBV +women" + - "#inneed +CP" + - "#inneed +CP +boys" + - "#inneed +CP +girls" output: - "IDP population" - "Resident population" @@ -60,6 +103,49 @@ humanitarian_needs_admintwo: - "InNeed WASH men" - "InNeed WASH girls" - "InNeed WASH women" + - "InNeed Shelter" + - "InNeed Shelter boys" + - "InNeed Shelter men" + - "InNeed Shelter girls" + - "InNeed Shelter women" + - "InNeed Nutrition" + - "InNeed Nutrition boys" + - "InNeed Nutrition men" + - "InNeed Nutrition girls" + - "InNeed Nutrition women" + - "InNeed Education" + - "InNeed Education boys" + - "InNeed Education men" + - "InNeed Education girls" + - "InNeed Education women" + - "InNeed Food Security" + - "InNeed Food Security boys" + - "InNeed Food Security men" + - "InNeed Food Security girls" + - "InNeed Food Security women" + - "InNeed CCM" + - "InNeed CCM boys" + - "InNeed CCM men" + - "InNeed CCM girls" + - "InNeed CCM women" + - "InNeed Health" + - "InNeed Health boys" + - "InNeed Health men" + - "InNeed Health girls" + - "InNeed Health women" + - "InNeed Protection" + - "InNeed Protection boys" + - "InNeed Protection men" + - "InNeed Protection girls" + - "InNeed Protection women" + - "InNeed GBV" + - "InNeed GBV boys" + - "InNeed GBV men" + - "InNeed GBV girls" + - "InNeed GBV women" + - "InNeed Child Protection" + - "InNeed Child Protection boys" + - "InNeed Child Protection girls" output_hxl: - "#population+idps" - "#population+residents" @@ -73,23 +159,225 @@ humanitarian_needs_admintwo: - "#inneed+wsh+m+age18plus" - "#inneed+wsh+f+age0_17" - "#inneed+wsh+f+age18plus" -# humanitarian_needs_yem_tgt: -# resource: "YEM_TRG_2023.xlsx" -# format: "xlsx" -# use_hxl: False -# admin: -# - ~ -# - "admin2Pcode" -# admin_exact: True -# input: -# - "F_TL" -# - "M_TL" -# - "T_TL" -# output: -# - "F_TL" -# - "M_TL" -# - "T_TL" -# output_hxl: -# - "#population+f+total" -# - "#population+m+total" -# - "#population+total" + - "#inneed+shl+total" + - "#inneed+shl+m+age0_17" + - "#inneed+shl+m+age18plus" + - "#inneed+shl+f+age0_17" + - "#inneed+shl+f+age18plus" + - "#inneed+nut+total" + - "#inneed+nut+m+age0_17" + - "#inneed+nut+m+age18plus" + - "#inneed+nut+f+age0_17" + - "#inneed+nut+f+age18plus" + - "#inneed+edu+total" + - "#inneed+edu+m+age0_17" + - "#inneed+edu+m+age18plus" + - "#inneed+edu+f+age0_17" + - "#inneed+edu+f+age18plus" + - "#inneed+fsc+total" + - "#inneed+fsc+m+age0_17" + - "#inneed+fsc+m+age18plus" + - "#inneed+fsc+f+age0_17" + - "#inneed+fsc+f+age18plus" + - "#inneed+ccm+total" + - "#inneed+ccm+m+age0_17" + - "#inneed+ccm+m+age18plus" + - "#inneed+ccm+f+age0_17" + - "#inneed+ccm+f+age18plus" + - "#inneed+hea+total" + - "#inneed+hea+m+age0_17" + - "#inneed+hea+m+age18plus" + - "#inneed+hea+f+age0_17" + - "#inneed+hea+f+age18plus" + - "#inneed+pro+total" + - "#inneed+pro+m+age0_17" + - "#inneed+pro+m+age18plus" + - "#inneed+pro+f+age0_17" + - "#inneed+pro+f+age18plus" + - "#inneed+pro_gbv+total" + - "#inneed+pro_gbv+m+age0_17" + - "#inneed+pro_gbv+m+age18plus" + - "#inneed+pro_gbv+f+age0_17" + - "#inneed+pro_gbv+f+age18plus" + - "#inneed+pro_cpn+total" + - "#inneed+pro_cpn+m+age0_17" + - "#inneed+pro_cpn+f+age0_17" + humanitarian_needs_yem_targeted: + dataset: "yemen-humanitarian-needs-overview" + resource: "YEM_TRG_2023.xlsx" + format: "xlsx" + headers: + - 1 + - 2 + admin: + - ~ + - "Demographic Information Dis_PCODE" + admin_exact: True + input: + - "Total Targeted People max" + - "Target People Disaggregated by Gender Men" + - "Target People Disaggregated by Gender Women" + - "Target People Disaggregated by Gender Boys" + - "Target People Disaggregated by Gender Girls" + - "WASH Total Target" + - "WASH Men" + - "WASH Women" + - "WASH Boys" + - "WASH Girls" + - "Shelter Total Target" + - "Shelter Men" + - "Shelter Women" + - "Shelter Boys" + - "Shelter Girls" + - "Nutrition Total Target Nutrition" + - "Nutrition Men" + - "Nutrition Women" + - "Nutrition Boys" + - "Nutrition Girls" + - "Education Total Target" + - "Education Men" + - "Education Women" + - "Education Boys" + - "Education Girls" + - "FSAC Total Target FSAC" + - "FSAC Men" + - "FSAC Women" + - "FSAC Boys" + - "FSAC Girls" + - "CCCM Total Target" + - "CCCM Men" + - "CCCM Women" + - "CCCM Boys" + - "CCCM Girls" + - "Health Total Target Health" + - "Health Men" + - "Health Women" + - "Health Boys" + - "Health Girls" + - "Overall PC Target Total Target" + - "Men" + - "Women" + - "Boys" + - "Girls" + - "Child Protection AoR overall target Total Target" + - "Men3" + - "Women3" + - "Boys3" + - "Girls3" + - "GBV AoR overall target Total Target" + - "GBV AoR overall target Men" + - "GBV AoR overall target Women" + - "GBV AoR overall target Boys" + - "GBV AoR overall target Girls" + output: + - "Targeted" + - "Targeted Men" + - "Targeted Women" + - "Targeted Boys" + - "Targeted Girls" + - "Targeted WASH" + - "Targeted WASH men" + - "Targeted WASH women" + - "Targeted WASH boys" + - "Targeted WASH girls" + - "Targeted Shelter" + - "Targeted Shelter men" + - "Targeted Shelter women" + - "Targeted Shelter boys" + - "Targeted Shelter girls" + - "Targeted Nutrition" + - "Targeted Nutrition men" + - "Targeted Nutrition women" + - "Targeted Nutrition boys" + - "Targeted Nutrition girls" + - "Targeted Education" + - "Targeted Education men" + - "Targeted Education women" + - "Targeted Education boys" + - "Targeted Education girls" + - "Targeted Food Security" + - "Targeted Food Security men" + - "Targeted Food Security women" + - "Targeted Food Security boys" + - "Targeted Food Security girls" + - "Targeted CCM" + - "Targeted CCM men" + - "Targeted CCM women" + - "Targeted CCM boys" + - "Targeted CCM girls" + - "Targeted Health" + - "Targeted Health men" + - "Targeted Health women" + - "Targeted Health boys" + - "Targeted Health girls" + - "Targeted Protection" + - "Targeted Protection men" + - "Targeted Protection women" + - "Targeted Protection boys" + - "Targeted Protection girls" + - "Targeted Child Protection" + - "Targeted Child Protection men" + - "Targeted Child Protection women" + - "Targeted Child Protection boys" + - "Targeted Child Protection girls" + - "Targeted GBV" + - "Targeted GBV men" + - "Targeted GBV women" + - "Targeted GBV boys" + - "Targeted GBV girls" + output_hxl: + - "#targeted+total" + - "#targeted+m+age18plus" + - "#targeted+f+age18plus" + - "#targeted+m+age0_17" + - "#targeted+f+age0_17" + - "#targeted+wsh+total" + - "#targeted+wsh+m+age18plus" + - "#targeted+wsh+f+age18plus" + - "#targeted+wsh+m+age0_17" + - "#targeted+wsh+f+age0_17" + - "#targeted+shl+total" + - "#targeted+shl+m+age18plus" + - "#targeted+shl+f+age18plus" + - "#targeted+shl+m+age0_17" + - "#targeted+shl+f+age0_17" + - "#targeted+nut+total" + - "#targeted+nut+m+age18plus" + - "#targeted+nut+f+age18plus" + - "#targeted+nut+m+age0_17" + - "#targeted+nut+f+age0_17" + - "#targeted+edu+total" + - "#targeted+edu+m+age18plus" + - "#targeted+edu+f+age18plus" + - "#targeted+edu+m+age0_17" + - "#targeted+edu+f+age0_17" + - "#targeted+fsc+total" + - "#targeted+fsc+m+age18plus" + - "#targeted+fsc+f+age18plus" + - "#targeted+fsc+m+age0_17" + - "#targeted+fsc+f+age0_17" + - "#targeted+ccm+total" + - "#targeted+ccm+m+age18plus" + - "#targeted+ccm+f+age18plus" + - "#targeted+ccm+m+age0_17" + - "#targeted+ccm+f+age0_17" + - "#targeted+hea+total" + - "#targeted+hea+m+age18plus" + - "#targeted+hea+f+age18plus" + - "#targeted+hea+m+age0_17" + - "#targeted+hea+f+age0_17" + - "#targeted+pro+total" + - "#targeted+pro+m+age18plus" + - "#targeted+pro+f+age18plus" + - "#targeted+pro+m+age0_17" + - "#targeted+pro+f+age0_17" + - "#targeted+pro_cpn+total" + - "#targeted+pro_cpn+m+age18plus" + - "#targeted+pro_cpn+f+age18plus" + - "#targeted+pro_cpn+m+age0_17" + - "#targeted+pro_cpn+f+age0_17" + - "#targeted+pro_gbv+total" + - "#targeted+pro_gbv+m+age18plus" + - "#targeted+pro_gbv+f+age18plus" + - "#targeted+pro_gbv+m+age0_17" + - "#targeted+pro_gbv+f+age0_17" diff --git a/src/hapi/pipelines/database/humanitarian_needs.py b/src/hapi/pipelines/database/humanitarian_needs.py index a7773b50..7ca03cdd 100644 --- a/src/hapi/pipelines/database/humanitarian_needs.py +++ b/src/hapi/pipelines/database/humanitarian_needs.py @@ -94,6 +94,10 @@ def match_column(col, patterns): disabled_marker = None # no disabled attribute # TODO: Will there be columns for able bodied? for admin_code, value in values.items(): + try: + value = int(value) + except (ValueError, TypeError): + continue admin2_code = admins.get_admin2_code_based_on_level( admin_code=admin_code, admin_level=admin_level ) @@ -108,7 +112,7 @@ def match_column(col, patterns): gender_code=gender_code, age_range_code=age_range_code, disabled_marker=disabled_marker, - population=int(value), + population=value, reference_period_start=reference_period_start, reference_period_end=reference_period_end, # TODO: For v2+, add to scraper (HAPI-199) diff --git a/tests/fixtures/input/humanitarian_needs_yem_hno_yem_pin_2023.xlsx b/tests/fixtures/input/humanitarian_needs_yem_inneed_yem_pin_2023.xlsx similarity index 100% rename from tests/fixtures/input/humanitarian_needs_yem_hno_yem_pin_2023.xlsx rename to tests/fixtures/input/humanitarian_needs_yem_inneed_yem_pin_2023.xlsx diff --git a/tests/fixtures/input/humanitarian_needs_yem_targeted_yem_trg_2023.xlsx b/tests/fixtures/input/humanitarian_needs_yem_targeted_yem_trg_2023.xlsx new file mode 100644 index 00000000..f79b17b5 Binary files /dev/null and b/tests/fixtures/input/humanitarian_needs_yem_targeted_yem_trg_2023.xlsx differ diff --git a/tests/test_main.py b/tests/test_main.py index dcc56f69..ddc7c381 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -98,7 +98,7 @@ def test_pipelines(self, configuration, folder): pipelines.output() count = session.scalar(select(func.count(DBResource.id))) - assert count == 14 + assert count == 15 count = session.scalar(select(func.count(DBDataset.id))) assert count == 9 count = session.scalar(select(func.count(DBLocation.id))) @@ -142,7 +142,7 @@ def test_pipelines(self, configuration, folder): count = session.scalar( select(func.count(DBHumanitarianNeeds.id)) ) - assert count == 4136 + assert count == 18122 org_mapping = pipelines.org._org_lookup assert org_mapping["Action against Hunger"] == {