diff --git a/pyproject.toml b/pyproject.toml index 85f70910..346f12e9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,7 @@ dependencies = [ "hapi-schema>= 0.5.0", "hdx-python-country>= 3.6.3", "hdx-python-database[postgresql]>= 1.2.9", - "hdx-python-scraper>= 2.3.0", + "hdx-python-scraper>= 2.3.2", "libhxl", "sqlalchemy" ] diff --git a/requirements.txt b/requirements.txt index 9cab4aa3..dc006614 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,7 @@ # annotated-types==0.6.0 # via pydantic -attrs==23.1.0 +attrs==23.2.0 # via # frictionless # jsonlines @@ -29,7 +29,7 @@ click==8.1.7 # via typer colorama==0.4.6 # via typer -coverage[toml]==7.3.4 +coverage[toml]==7.4.0 # via pytest-cov cryptography==41.0.7 # via pyopenssl @@ -53,7 +53,7 @@ filelock==3.13.1 # via virtualenv frictionless==5.16.0 # via hdx-python-utilities -google-auth==2.25.2 +google-auth==2.26.0 # via # google-auth-oauthlib # gspread @@ -61,7 +61,7 @@ google-auth-oauthlib==1.2.0 # via gspread greenlet==3.0.3 # via sqlalchemy -gspread==5.12.3 +gspread==5.12.4 # via hdx-python-scraper hapi-schema==0.5.0 # via hapi-pipelines (pyproject.toml) @@ -74,7 +74,7 @@ hdx-python-country==3.6.3 # hdx-python-scraper hdx-python-database[postgresql]==1.2.9 # via hapi-pipelines (pyproject.toml) -hdx-python-scraper==2.3.0 +hdx-python-scraper==2.3.2 # via hapi-pipelines (pyproject.toml) hdx-python-utilities==3.6.3 # via @@ -163,11 +163,11 @@ pyasn1-modules==0.3.0 # via google-auth pycparser==2.21 # via cffi -pydantic==2.5.2 +pydantic==2.5.3 # via # frictionless # inflect -pydantic-core==2.14.5 +pydantic-core==2.14.6 # via pydantic pygments==2.17.2 # via rich @@ -179,7 +179,7 @@ pyphonetics==0.5.3 # via hdx-python-country pyrsistent==0.20.0 # via jsonschema -pytest==7.4.3 +pytest==7.4.4 # via # hapi-pipelines (pyproject.toml) # pytest-cov @@ -205,7 +205,7 @@ quantulum3==0.9.0 # via hdx-python-api ratelimit==2.2.1 # via hdx-python-utilities -regex==2023.10.3 +regex==2023.12.25 # via hdx-python-scraper requests==2.31.0 # via @@ -243,14 +243,14 @@ six==1.16.0 # sphinxcontrib-napoleon sphinxcontrib-napoleon==0.7 # via defopt -sqlalchemy==2.0.23 +sqlalchemy==2.0.25 # via # hapi-pipelines (pyproject.toml) # hapi-schema # hdx-python-database stringcase==1.2.0 # via frictionless -structlog==23.2.0 +structlog==23.3.0 # via libhxl tableschema-to-template==0.0.13 # via hdx-python-utilities diff --git a/src/hapi/pipelines/app/pipelines.py b/src/hapi/pipelines/app/pipelines.py index fc905c41..dfccee47 100644 --- a/src/hapi/pipelines/app/pipelines.py +++ b/src/hapi/pipelines/app/pipelines.py @@ -149,6 +149,9 @@ def _create_configurable_scrapers( _create_configurable_scrapers( "food_security", "admintwo", adminlevel=self.admintwo ) + _create_configurable_scrapers( + "humanitarian_needs", "adminone", adminlevel=self.adminone + ) _create_configurable_scrapers( "humanitarian_needs", "admintwo", adminlevel=self.admintwo ) diff --git a/src/hapi/pipelines/configs/humanitarian_needs.yaml b/src/hapi/pipelines/configs/humanitarian_needs.yaml index 52b71bfa..c9ac76e4 100755 --- a/src/hapi/pipelines/configs/humanitarian_needs.yaml +++ b/src/hapi/pipelines/configs/humanitarian_needs.yaml @@ -5,10 +5,520 @@ # #inneed # #inneed+boys +humanitarian_needs_adminone: + humanitarian_needs_afg_total: + dataset: "afghanistan-humanitarian-needs-overview" + resource: "afg_hno_pin_2024.xlsx" + filename: "humanitarian_needs_afg.xlsx" + format: "xlsx" + sheet: "Total" + headers: 6 + use_hxl: True + filter_cols: + - "#adm2+code" + prefilter: "#adm2+code == '-'" + admin: + - ~ + - "#adm1+code" + admin_exact: True + input: + - "#inneed" + - "#inneed+m+children+idps" + - "#inneed+f+children+idps" + - "#inneed+m+adult+idps" + - "#inneed+f+adult+idps" + - "#inneed+idps" + - "#inneed+m+children+returnees" + - "#inneed+f+children+returnees" + - "#inneed+m+adult+returnees" + - "#inneed+f+adult+returnees" + - "#inneed+returnees" + - "#inneed+m+children+refugees" + - "#inneed+f+children+refugees" + - "#inneed+m+adult+refugees" + - "#inneed+f+adult+refugees" + - "#inneed+refugees" + - "#inneed+children" + - "#inneed+adults" + - "#inneed+elderly" + - "#inneed+disability" + output: + - "Inneed Total" + - "Inneed IDPs Boys" + - "Inneed IDPs Girls" + - "Inneed IDPs Men" + - "Inneed IDPs Women" + - "Inneed IDPs" + - "Inneed Returnees Boys" + - "Inneed Returnees Girls" + - "Inneed Returnees Men" + - "Inneed Returnees Women" + - "Inneed Returnees" + - "Inneed Refugees Boys" + - "Inneed Refugees Girls" + - "Inneed Refugees Men" + - "Inneed Refugees Women" + - "Inneed Refugees" + - "Inneed Children" + - "Inneed Adults" + - "Inneed Elderly" + - "Inneed Disabled" + output_hxl: + - "#inneed+total" + - "#inneed+idps+m+age0_17" + - "#inneed+idps+f+age0_17" + - "#inneed+idps+m+age18plus" + - "#inneed+idps+f+age18plus" + - "#inneed+idps" + - "#inneed+returnees+m+age0_17" + - "#inneed+returnees+f+age0_17" + - "#inneed+returnees+m+age18plus" + - "#inneed+returnees+f+age18plus" + - "#inneed+returnees" + - "#inneed+refugees+m+age0_17" + - "#inneed+refugees+f+age0_17" + - "#inneed+refugees+m+age18plus" + - "#inneed+refugees+f+age18plus" + - "#inneed+refugees" + - "#inneed+age0_17" + - "#inneed+age18_64" + - "#inneed+age65plus" + - "#inneed+disabled" + humanitarian_needs_afg_edu: + dataset: "afghanistan-humanitarian-needs-overview" + resource: "afg_hno_pin_2024.xlsx" + filename: "humanitarian_needs_afg.xlsx" + format: "xlsx" + sheet: "EDU" + headers: 6 + use_hxl: True + admin: + - ~ + - "#adm1+code" + admin_exact: True + input: + - "#inneed" + - "#inneed+idps" + - "#inneed+returnees" + - "#inneed+refugees" + - "#inneed+children" + - "#inneed+adults" + - "#inneed+elderly" + - "#inneed+disability" + output: + - "Inneed EDU" + - "Inneed EDU IDPs" + - "Inneed EDU Returnees" + - "Inneed EDU Refugees" + - "Inneed EDU Children" + - "Inneed EDU Adults" + - "Inneed EDU Elderly" + - "Inneed EDU Disabled" + output_hxl: + - "#inneed+edu" + - "#inneed+edu+idps" + - "#inneed+edu+returnees" + - "#inneed+edu+refugees" + - "#inneed+edu+age0_17" + - "#inneed+edu+age18_64" + - "#inneed+edu+age65plus" + - "#inneed+edu+disabled" + humanitarian_needs_afg_shl: + dataset: "afghanistan-humanitarian-needs-overview" + resource: "afg_hno_pin_2024.xlsx" + filename: "humanitarian_needs_afg.xlsx" + format: "xlsx" + sheet: "SHL" + headers: 6 + use_hxl: True + admin: + - ~ + - "#adm1+code" + admin_exact: True + input: + - "#inneed" + - "#inneed+idps" + - "#inneed+returnees" + - "#inneed+refugees" + - "#inneed+children" + - "#inneed+adults" + - "#inneed+elderly" + - "#inneed+disability" + output: + - "Inneed SHL" + - "Inneed SHL IDPs" + - "Inneed SHL Returnees" + - "Inneed SHL Refugees" + - "Inneed SHL Children" + - "Inneed SHL Adults" + - "Inneed SHL Elderly" + - "Inneed SHL Disabled" + output_hxl: + - "#inneed+shl" + - "#inneed+shl+idps" + - "#inneed+shl+returnees" + - "#inneed+shl+refugees" + - "#inneed+shl+age0_17" + - "#inneed+shl+age18_64" + - "#inneed+shl+age65plus" + - "#inneed+shl+disabled" + humanitarian_needs_afg_fsc: + dataset: "afghanistan-humanitarian-needs-overview" + resource: "afg_hno_pin_2024.xlsx" + filename: "humanitarian_needs_afg.xlsx" + format: "xlsx" + sheet: "FSC" + headers: 6 + use_hxl: True + admin: + - ~ + - "#adm1+code" + admin_exact: True + input: + - "#inneed" + - "#inneed+idps" + - "#inneed+returnees" + - "#inneed+refugees" + - "#inneed+children" + - "#inneed+adults" + - "#inneed+elderly" + - "#inneed+disability" + output: + - "Inneed FSC" + - "Inneed FSC IDPs" + - "Inneed FSC Returnees" + - "Inneed FSC Refugees" + - "Inneed FSC Children" + - "Inneed FSC Adults" + - "Inneed FSC Elderly" + - "Inneed FSC Disabled" + output_hxl: + - "#inneed+fsc" + - "#inneed+fsc+idps" + - "#inneed+fsc+returnees" + - "#inneed+fsc+refugees" + - "#inneed+fsc+age0_17" + - "#inneed+fsc+age18_64" + - "#inneed+fsc+age65plus" + - "#inneed+fsc+disabled" + humanitarian_needs_afg_hea: + dataset: "afghanistan-humanitarian-needs-overview" + resource: "afg_hno_pin_2024.xlsx" + filename: "humanitarian_needs_afg.xlsx" + format: "xlsx" + sheet: "HEA" + headers: 6 + use_hxl: True + admin: + - ~ + - "#adm1+code" + admin_exact: True + input: + - "#inneed" + - "#inneed+idps" + - "#inneed+returnees" + - "#inneed+refugees" + - "#inneed+children" + - "#inneed+adults" + - "#inneed+elderly" + - "#inneed+disability" + output: + - "Inneed HEA" + - "Inneed HEA IDPs" + - "Inneed HEA Returnees" + - "Inneed HEA Refugees" + - "Inneed HEA Children" + - "Inneed HEA Adults" + - "Inneed HEA Elderly" + - "Inneed HEA Disabled" + output_hxl: + - "#inneed+hea" + - "#inneed+hea+idps" + - "#inneed+hea+returnees" + - "#inneed+hea+refugees" + - "#inneed+hea+age0_17" + - "#inneed+hea+age18_64" + - "#inneed+hea+age65plus" + - "#inneed+hea+disabled" + humanitarian_needs_afg_nut: + dataset: "afghanistan-humanitarian-needs-overview" + resource: "afg_hno_pin_2024.xlsx" + filename: "humanitarian_needs_afg.xlsx" + format: "xlsx" + sheet: "NUT" + headers: 6 + use_hxl: True + admin: + - ~ + - "#adm1+code" + admin_exact: True + input: + - "#inneed" + - "#inneed+idps" + - "#inneed+returnees" + - "#inneed+refugees" + - "#inneed+children" + - "#inneed+adults" + - "#inneed+elderly" + - "#inneed+disability" + output: + - "Inneed NUT" + - "Inneed NUT IDPs" + - "Inneed NUT Returnees" + - "Inneed NUT Refugees" + - "Inneed NUT Children" + - "Inneed NUT Adults" + - "Inneed NUT Elderly" + - "Inneed NUT Disabled" + output_hxl: + - "#inneed+nut" + - "#inneed+nut+idps" + - "#inneed+nut+returnees" + - "#inneed+nut+refugees" + - "#inneed+nut+age0_17" + - "#inneed+nut+age18_64" + - "#inneed+nut+age65plus" + - "#inneed+nut+disabled" + humanitarian_needs_afg_pro: + dataset: "afghanistan-humanitarian-needs-overview" + resource: "afg_hno_pin_2024.xlsx" + filename: "humanitarian_needs_afg.xlsx" + format: "xlsx" + sheet: "PRO" + headers: 6 + use_hxl: True + admin: + - ~ + - "#adm1+code" + admin_exact: True + input: + - "#inneed" + - "#inneed+idps" + - "#inneed+returnees" + - "#inneed+refugees" + - "#inneed+children" + - "#inneed+adults" + - "#inneed+elderly" + - "#inneed+disability" + output: + - "Inneed PRO" + - "Inneed PRO IDPs" + - "Inneed PRO Returnees" + - "Inneed PRO Refugees" + - "Inneed PRO Children" + - "Inneed PRO Adults" + - "Inneed PRO Elderly" + - "Inneed PRO Disabled" + output_hxl: + - "#inneed+pro" + - "#inneed+pro+idps" + - "#inneed+pro+returnees" + - "#inneed+pro+refugees" + - "#inneed+pro+age0_17" + - "#inneed+pro+age18_64" + - "#inneed+pro+age65plus" + - "#inneed+pro+disabled" + humanitarian_needs_afg_pro_cpn: + dataset: "afghanistan-humanitarian-needs-overview" + resource: "afg_hno_pin_2024.xlsx" + filename: "humanitarian_needs_afg.xlsx" + format: "xlsx" + sheet: "PRO_CP" + headers: 6 + use_hxl: True + admin: + - ~ + - "#adm1+code" + admin_exact: True + input: + - "#inneed" + - "#inneed+idps" + - "#inneed+returnees" + - "#inneed+refugees" + - "#inneed+children" + - "#inneed+adults" + - "#inneed+elderly" + - "#inneed+disability" + output: + - "Inneed PRO_CPN" + - "Inneed PRO_CPN IDPs" + - "Inneed PRO_CPN Returnees" + - "Inneed PRO_CPN Refugees" + - "Inneed PRO_CPN Children" + - "Inneed PRO_CPN Adults" + - "Inneed PRO_CPN Elderly" + - "Inneed PRO_CPN Disabled" + output_hxl: + - "#inneed+pro_cpn" + - "#inneed+pro_cpn+idps" + - "#inneed+pro_cpn+returnees" + - "#inneed+pro_cpn+refugees" + - "#inneed+pro_cpn+age0_17" + - "#inneed+pro_cpn+age18_64" + - "#inneed+pro_cpn+age65plus" + - "#inneed+pro_cpn+disabled" + humanitarian_needs_afg_pro_hlp: + dataset: "afghanistan-humanitarian-needs-overview" + resource: "afg_hno_pin_2024.xlsx" + filename: "humanitarian_needs_afg.xlsx" + format: "xlsx" + sheet: "PRO_HLP" + headers: 6 + use_hxl: True + admin: + - ~ + - "#adm1+code" + admin_exact: True + input: + - "#inneed" + - "#inneed+idps" + - "#inneed+returnees" + - "#inneed+refugees" + - "#inneed+children" + - "#inneed+adults" + - "#inneed+elderly" + - "#inneed+disability" + output: + - "Inneed PRO_HLP" + - "Inneed PRO_HLP IDPs" + - "Inneed PRO_HLP Returnees" + - "Inneed PRO_HLP Refugees" + - "Inneed PRO_HLP Children" + - "Inneed PRO_HLP Adults" + - "Inneed PRO_HLP Elderly" + - "Inneed PRO_HLP Disabled" + output_hxl: + - "#inneed+pro_hlp" + - "#inneed+pro_hlp+idps" + - "#inneed+pro_hlp+returnees" + - "#inneed+pro_hlp+refugees" + - "#inneed+pro_hlp+age0_17" + - "#inneed+pro_hlp+age18_64" + - "#inneed+pro_hlp+age65plus" + - "#inneed+pro_hlp+disabled" + humanitarian_needs_afg_pro_gbv: + dataset: "afghanistan-humanitarian-needs-overview" + resource: "afg_hno_pin_2024.xlsx" + filename: "humanitarian_needs_afg.xlsx" + format: "xlsx" + sheet: "PRO_GBV" + headers: 6 + use_hxl: True + admin: + - ~ + - "#adm1+code" + admin_exact: True + input: + - "#inneed" + - "#inneed+idps" + - "#inneed+returnees" + - "#inneed+refugees" + - "#inneed+children" + - "#inneed+adults" + - "#inneed+elderly" + - "#inneed+disability" + output: + - "Inneed PRO_GBV" + - "Inneed PRO_GBV IDPs" + - "Inneed PRO_GBV Returnees" + - "Inneed PRO_GBV Refugees" + - "Inneed PRO_GBV Children" + - "Inneed PRO_GBV Adults" + - "Inneed PRO_GBV Elderly" + - "Inneed PRO_GBV Disabled" + output_hxl: + - "#inneed+pro_gbv" + - "#inneed+pro_gbv+idps" + - "#inneed+pro_gbv+returnees" + - "#inneed+pro_gbv+refugees" + - "#inneed+pro_gbv+age0_17" + - "#inneed+pro_gbv+age18_64" + - "#inneed+pro_gbv+age65plus" + - "#inneed+pro_gbv+disabled" + humanitarian_needs_afg_pro_min: + dataset: "afghanistan-humanitarian-needs-overview" + resource: "afg_hno_pin_2024.xlsx" + filename: "humanitarian_needs_afg.xlsx" + format: "xlsx" + sheet: "PRO_MA" + headers: 6 + use_hxl: True + admin: + - ~ + - "#adm1+code" + admin_exact: True + input: + - "#inneed" + - "#inneed+idps" + - "#inneed+returnees" + - "#inneed+refugees" + - "#inneed+children" + - "#inneed+adults" + - "#inneed+elderly" + - "#inneed+disability" + output: + - "Inneed PRO_MIN" + - "Inneed PRO_MIN IDPs" + - "Inneed PRO_MIN Returnees" + - "Inneed PRO_MIN Refugees" + - "Inneed PRO_MIN Children" + - "Inneed PRO_MIN Adults" + - "Inneed PRO_MIN Elderly" + - "Inneed PRO_MIN Disabled" + output_hxl: + - "#inneed+pro_min" + - "#inneed+pro_min+idps" + - "#inneed+pro_min+returnees" + - "#inneed+pro_min+refugees" + - "#inneed+pro_min+age0_17" + - "#inneed+pro_min+age18_64" + - "#inneed+pro_min+age65plus" + - "#inneed+pro_min+disabled" + humanitarian_needs_afg_wsh: + dataset: "afghanistan-humanitarian-needs-overview" + resource: "afg_hno_pin_2024.xlsx" + filename: "humanitarian_needs_afg.xlsx" + format: "xlsx" + sheet: "WAS" + headers: 6 + use_hxl: True + admin: + - ~ + - "#adm1+code" + admin_exact: True + input: + - "#inneed" + - "#inneed+idps" + - "#inneed+returnees" + - "#inneed+refugees" + - "#inneed+children" + - "#inneed+adults" + - "#inneed+elderly" + - "#inneed+disability" + output: + - "Inneed WSH" + - "Inneed WSH IDPs" + - "Inneed WSH Returnees" + - "Inneed WSH Refugees" + - "Inneed WSH Children" + - "Inneed WSH Adults" + - "Inneed WSH Elderly" + - "Inneed WSH Disabled" + output_hxl: + - "#inneed+wsh" + - "#inneed+wsh+idps" + - "#inneed+wsh+returnees" + - "#inneed+wsh+refugees" + - "#inneed+wsh+age0_17" + - "#inneed+wsh+age18_64" + - "#inneed+wsh+age65plus" + - "#inneed+wsh+disabled" + humanitarian_needs_admintwo: humanitarian_needs_afg: dataset: "afghanistan-humanitarian-needs-overview" resource: "afg_hno_pin_2024.xlsx" + filename: "humanitarian_needs_afg.xlsx" format: "xlsx" sheet: "Total" headers: 6 @@ -88,7 +598,7 @@ humanitarian_needs_admintwo: use_hxl: True admin: - ~ - - "#admé+code" + - "#adm2+code" admin_exact: True input: - "#inneed +ind" diff --git a/tests/fixtures/input/humanitarian_needs_afg.xlsx b/tests/fixtures/input/humanitarian_needs_afg.xlsx new file mode 100644 index 00000000..34806525 Binary files /dev/null and b/tests/fixtures/input/humanitarian_needs_afg.xlsx differ diff --git a/tests/fixtures/input/humanitarian_needs_afg_afg_hno_pin_2024.xlsx b/tests/fixtures/input/humanitarian_needs_afg_afg_hno_pin_2024.xlsx deleted file mode 100644 index c0078e23..00000000 Binary files a/tests/fixtures/input/humanitarian_needs_afg_afg_hno_pin_2024.xlsx and /dev/null differ diff --git a/tests/fixtures/input/humanitarian_needs_tcd_tcd_hpc2023_rev_pin_targets.xlsx b/tests/fixtures/input/humanitarian_needs_tcd_tcd_hpc2023_rev_pin_targets.xlsx index 2b504701..19cb2d03 100644 Binary files a/tests/fixtures/input/humanitarian_needs_tcd_tcd_hpc2023_rev_pin_targets.xlsx and b/tests/fixtures/input/humanitarian_needs_tcd_tcd_hpc2023_rev_pin_targets.xlsx differ diff --git a/tests/test_main.py b/tests/test_main.py index 041080dd..150e7373 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -142,7 +142,7 @@ def test_pipelines(self, configuration, folder): count = session.scalar( select(func.count(DBHumanitarianNeeds.id)) ) - assert count == 26142 + assert count == 47126 org_mapping = pipelines.org._org_lookup assert org_mapping["Action against Hunger"] == {