Skip to content

Commit

Permalink
HDXDSYS-1754 Update HAPI Pipelines operational presence to read from …
Browse files Browse the repository at this point in the history
…HAPI dataset (#224)

* Read from HAPI operational presence dataset

* Read from HAPI operational presence dataset

* Fix admin test

* Update CHANGELOG
  • Loading branch information
mcarans authored Feb 12, 2025
1 parent b2ec81f commit 27b8e92
Show file tree
Hide file tree
Showing 41 changed files with 50,139 additions and 51,613 deletions.
4 changes: 2 additions & 2 deletions .config/pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ repos:
- id: end-of-file-fixer
- id: check-ast
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.8.6
rev: v0.9.6
hooks:
# Run the linter.
- id: ruff
Expand All @@ -17,7 +17,7 @@ repos:
- id: ruff-format
args: [--config, .config/ruff.toml]
- repo: https://github.com/astral-sh/uv-pre-commit
rev: 0.5.15
rev: 0.5.29
hooks:
# Run the pip compile
- id: pip-compile
Expand Down
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

## [0.10.38] = 2025-02-11

### Changed

- Operational presence reads from HAPI dataset

## [0.10.37] = 2025-02-10

### Changed
Expand Down
8 changes: 4 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ requires-python = ">=3.8"
dependencies = [
"hapi-schema>= 0.9.7",
"hdx-python-api>= 6.3.7",
"hdx-python-country>= 3.8.7",
"hdx-python-database[postgresql]>= 1.3.4",
"hdx-python-scraper>= 2.5.5",
"hdx-python-country>= 3.8.8",
"hdx-python-database[postgresql]>= 1.3.5",
"hdx-python-scraper>= 2.5.8",
"hdx-python-utilities>= 3.8.3",
"libhxl",
"sqlalchemy"
Expand Down Expand Up @@ -97,7 +97,7 @@ run = """
"""

[tool.hatch.envs.hatch-static-analysis]
dependencies = ["ruff==0.8.6"]
dependencies = ["ruff==0.9.6"]

[tool.hatch.envs.hatch-static-analysis.scripts]
format-check = ["ruff format --config .config/ruff.toml --check --diff {args:.}",]
Expand Down
61 changes: 18 additions & 43 deletions src/hapi/pipelines/database/operational_presence.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from sqlalchemy.orm import Session

from ..utilities.batch_populate import batch_populate
from ..utilities.provider_admin_names import get_provider_name
from . import admins
from .base_uploader import BaseUploader
from .metadata import Metadata
Expand All @@ -38,49 +37,39 @@ def populate(self) -> None:
logger.info("Populating operational presence table")
reader = Read.get_reader("hdx")
dataset = reader.read_dataset(
"global-operational-presence", self._configuration
"hdx-hapi-operational-presence", self._configuration
)
resource = dataset.get_resource()
url = resource["url"]
headers, rows = reader.get_tabular_rows(url, dict_form=True)
hxltag_to_header = invert_dictionary(next(rows))
max_admin_level = self._admins.get_max_admin_from_hxltags(
hxltag_to_header
)
resources_to_ignore = []
operational_presence_rows = []
# Country ISO3,Admin 1 PCode,Admin 1 Name,Admin 2 PCode,Admin 2 Name,Admin 3 PCode,Admin 3 Name,Org Name,Org Acronym,Org Type,Sector,Start Date,End Date,Resource Id

for row in rows:
resource_id = row["Resource Id"]
if row["error"]:
continue
resource_id = row["resource_hdx_id"]
if resource_id in resources_to_ignore:
continue
dataset_id = row["Dataset Id"]
dataset_id = row["dataset_hdx_id"]
dataset_name = self._metadata.get_dataset_name(dataset_id)
if not dataset_name:
dataset_name = dataset_id
admin_level = self._admins.get_admin_level_from_row(
hxltag_to_header, row, max_admin_level
hxltag_to_header, row, 2
)
actual_admin_level = admin_level
# Higher admin levels treat as admin 2
if admin_level > 2:
error_when_duplicate = False
admin_level = 2
else:
error_when_duplicate = True
admin2_ref = self._admins.get_admin2_ref_from_row(
hxltag_to_header,
row,
dataset_name,
"OperationalPresence",
admin_level,
)
if not admin2_ref:
continue

countryiso3 = row["Country ISO3"]
provider_admin1_name = get_provider_name(row, "Admin 1 Name")
provider_admin2_name = get_provider_name(row, "Admin 2 Name")
countryiso3 = row["location_code"]
provider_admin1_name = row["provider_admin1_name"] or ""
provider_admin2_name = row["provider_admin2_name"] or ""

resource_name = self._metadata.get_resource_name(resource_id)
if not resource_name:
Expand All @@ -101,36 +90,22 @@ def populate(self) -> None:
resources_to_ignore.append(resource_id)
continue

resource_id = row["Resource Id"]
operational_presence_row = {
"resource_hdx_id": resource_id,
"admin2_ref": admin2_ref,
"provider_admin1_name": provider_admin1_name,
"provider_admin2_name": provider_admin2_name,
"org_acronym": row["Org Acronym"],
"org_name": row["Org Name"],
"sector_code": row["Sector"],
"reference_period_start": parse_date(row["Start Date"]),
"org_acronym": row["org_acronym"],
"org_name": row["org_name"],
"sector_code": row["sector_code"],
"reference_period_start": parse_date(
row["reference_period_start"]
),
"reference_period_end": parse_date(
row["End Date"], max_time=True
row["reference_period_end"], max_time=True
),
}
if operational_presence_row in operational_presence_rows:
if error_when_duplicate:
self._error_handler.add_message(
"OperationalPresence",
dataset_name,
f"admin level {actual_admin_level} row {str(operational_presence_row)} is a duplicate in {countryiso3}",
)
else:
self._error_handler.add_message(
"OperationalPresence",
dataset_name,
f"admin level {actual_admin_level} duplicate rows in {countryiso3}",
message_type="warning",
)
else:
operational_presence_rows.append(operational_presence_row)
operational_presence_rows.append(operational_presence_row)
logger.info("Writing to operational presence table")
batch_populate(
operational_presence_rows, self._session, DBOperationalPresence
Expand Down
10 changes: 5 additions & 5 deletions src/hapi/pipelines/database/org.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,22 +28,22 @@ def populate(self) -> None:
logger.info("Populating org table")
reader = Read.get_reader("hdx")
dataset = reader.read_dataset(
"global-organisations", self._configuration
"hdx-hapi-organisations", self._configuration
)
self._metadata.add_dataset(dataset)
resource = dataset.get_resource()
url = resource["url"]
headers, rows = reader.get_tabular_rows(url, dict_form=True)
# Acronym, Name, Org Type Code
for row in rows:
acronym = row["Acronym"]
acronym = row["acronym"]
# Ignore HXL row
if acronym == "#org+acronym":
continue
org_row = DBOrg(
acronym=row["Acronym"],
name=row["Name"],
org_type_code=row["Org Type Code"],
acronym=row["acronym"],
name=row["name"],
org_type_code=row["org_type_code"],
)
self._session.add(org_row)
self._session.commit()
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"archived": false, "batch": "15ebac04-de80-4ce6-8010-90b397081b46", "creator_user_id": "e780fabb-29f0-4c65-92a9-ed8896f7faf6", "data_update_frequency": "180", "dataset_date": "[2021-01-01T00:00:00 TO 2021-08-01T23:59:59]", "dataset_preview": "first_resource", "dataset_source": "Multiple Sources (Humanitarian Partners in Zimbabwe)", "due_date": "2022-07-16T22:44:17", "has_geodata": false, "has_quickcharts": true, "has_showcases": false, "id": "0fb9ee35-c701-4f26-af4c-0ee1222e8330", "is_requestdata_type": false, "isopen": true, "last_modified": "2022-01-17T22:44:17.728974", "license_id": "cc-by", "license_title": "Creative Commons Attribution International", "license_url": "http://www.opendefinition.org/licenses/cc-by", "maintainer": "c55b890e-7492-4a74-9933-fd9497ad0843", "maintainer_email": null, "metadata_created": "2021-10-21T14:21:16.549667", "metadata_modified": "2024-12-11T06:10:10.987713", "methodology": "Registry", "name": "zimbabwe-operational-presence", "notes": "Who is doing what and where in zimbabwe", "num_resources": 1, "num_tags": 3, "organization": {"id": "37a0c6e3-3bcd-4457-aa81-6ddeb99dd203", "name": "ocha-rosea", "title": "OCHA Regional Office for Southern and Eastern Africa (ROSEA)", "type": "organization", "description": "In 2016, OCHA merged its offices in Johannesburg and Nairobi into the OCHA Regional Office for Southern and Eastern Africa (ROSEA), covering 25 countries.\r\n\r\nComprising the Horn of Africa and the Great Lakes, eastern Africa is a region in which emergencies tend to be large scale, resulting in significant displacement and other needs. For this reason, OCHA maintains country presences in Burundi, DRC, Eritrea, Ethiopia, Kenya, Somalia, South Sudan and Sudan. The southern Africa region has fewer protracted humanitarian crises, but is prone to drought and floods. For this reason, OCHA does not have country offices in the region. In both regions the 2015-2016 El Nino weather phenomenon continues to have a significant humanitarian impact.\r\n\r\nFor those countries where OCHA does not have a presence, it is essential that OCHA can deploy from the regional hub swiftly and effectively in times of emergency, and to otherwise ensure preparedness for potential crises. OCHA works closely with local authorities and partners to bolster national disaster preparedness in these countries and supports response.\r\n\r\nWhether we\u2019re mobilizing relief money or raising awareness of forgotten crises, it\u2019s our mandate to keep world attention focused on humanitarian issues. For this reason, we produce and release timely regional reporting and analytical products to strengthen the humanitarian case and highlight the needs of the most vulnerable in the region. OCHA ROSEA also provides a platform for the analysis of cross-border issues of humanitarian concern, such as facilitating multi-country preparedness and planning consultations. OCHA ROSEA also works to strengthen collaboration on emergency preparedness and response with regional bodies, such as the Intergovernmental Authority on Development (IGAD) in eastern Africa and the Southern Africa Development Community (SADC) in southern Africa.", "image_url": "", "created": "2014-04-28T17:49:00.350372", "is_organization": true, "approval_status": "approved", "state": "active"}, "overdue_date": "2022-08-15T22:44:17", "owner_org": "37a0c6e3-3bcd-4457-aa81-6ddeb99dd203", "package_creator": "marindi", "pageviews_last_14_days": 0, "private": false, "qa_completed": false, "review_date": "2021-10-21T14:23:08.552656", "solr_additions": "{\"countries\": [\"Zimbabwe\"]}", "state": "active", "subnational": "1", "title": "Zimbabwe: Operational Presence", "total_res_downloads": 119, "type": "dataset", "url": null, "version": null, "extras": [{"key": "qa_checklist", "value": "{\"modified_date\": \"2022-01-14T09:58:56.387392\", \"version\": 1, \"dataProtection\": {}, \"metadata\": {}}"}, {"key": "qa_completed", "value": "true"}], "groups": [{"description": "", "display_name": "Zimbabwe", "id": "zwe", "image_display_url": "", "name": "zwe", "title": "Zimbabwe"}], "tags": [{"display_name": "hxl", "id": "a0fbb23a-6aad-4ccc-8062-e9ef9f20e5d2", "name": "hxl", "state": "active", "vocabulary_id": "b891512e-9516-4bf5-962a-7a289772a2a1"}, {"display_name": "operational presence", "id": "a25059f9-7e1f-49be-b629-ccccd97a95f8", "name": "operational presence", "state": "active", "vocabulary_id": "b891512e-9516-4bf5-962a-7a289772a2a1"}, {"display_name": "who is doing what and where-3w-4w-5w", "id": "ec53893c-6dba-4656-978b-4a32289ea2eb", "name": "who is doing what and where-3w-4w-5w", "state": "active", "vocabulary_id": "b891512e-9516-4bf5-962a-7a289772a2a1"}], "relationships_as_subject": [], "relationships_as_object": [], "is_fresh": false, "update_status": "needs_update", "x_resource_grouping": [], "resources": [{"alt_url": "https://data.humdata.org/dataset/0fb9ee35-c701-4f26-af4c-0ee1222e8330/resource/e66d4346-5c48-4a2c-840a-f0ffad339318/download/", "cache_last_updated": null, "cache_url": null, "created": "2021-10-21T14:21:22.318182", "datastore_active": false, "description": "Who is doing what and where in zimbabwe (3w). This is specific to operational partners responding only to the 2021 HRP targeted interventions", "download_url": "https://data.humdata.org/dataset/0fb9ee35-c701-4f26-af4c-0ee1222e8330/resource/e66d4346-5c48-4a2c-840a-f0ffad339318/download/zim_3w_august_2021.xlsx", "format": "XLSX", "hash": "", "hdx_rel_url": "/dataset/0fb9ee35-c701-4f26-af4c-0ee1222e8330/resource/e66d4346-5c48-4a2c-840a-f0ffad339318/download/zim_3w_august_2021.xlsx", "id": "e66d4346-5c48-4a2c-840a-f0ffad339318", "in_hapi": "yes", "last_modified": "2022-01-17T22:44:17.728974", "metadata_modified": "2024-12-11T06:10:11.018561", "microdata": false, "mimetype": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "mimetype_inner": null, "name": "Zim_3W_August_2021.xlsx", "originalHash": "459991226", "package_id": "0fb9ee35-c701-4f26-af4c-0ee1222e8330", "pii": "false", "position": 0, "resource_type": "file.upload", "size": 202589, "state": "active", "url": "https://data.humdata.org/dataset/0fb9ee35-c701-4f26-af4c-0ee1222e8330/resource/e66d4346-5c48-4a2c-840a-f0ffad339318/download/zim_3w_august_2021.xlsx", "url_type": "upload"}]}
{"archived": false, "batch": "15ebac04-de80-4ce6-8010-90b397081b46", "creator_user_id": "e780fabb-29f0-4c65-92a9-ed8896f7faf6", "data_update_frequency": "180", "dataset_date": "[2021-01-01T00:00:00 TO 2021-08-01T23:59:59]", "dataset_preview": "first_resource", "dataset_source": "Multiple Sources (Humanitarian Partners in Zimbabwe)", "due_date": "2022-07-16T22:44:17", "has_geodata": false, "has_quickcharts": true, "has_showcases": false, "id": "0fb9ee35-c701-4f26-af4c-0ee1222e8330", "is_requestdata_type": false, "isopen": true, "last_modified": "2022-01-17T22:44:17.728974", "license_id": "cc-by", "license_title": "Creative Commons Attribution International", "license_url": "http://www.opendefinition.org/licenses/cc-by", "maintainer": "c55b890e-7492-4a74-9933-fd9497ad0843", "maintainer_email": null, "metadata_created": "2021-10-21T14:21:16.549667", "metadata_modified": "2025-02-05T21:43:20.521432", "methodology": "Registry", "name": "zimbabwe-operational-presence", "notes": "Who is doing what and where in zimbabwe", "num_resources": 1, "num_tags": 3, "organization": {"id": "37a0c6e3-3bcd-4457-aa81-6ddeb99dd203", "name": "ocha-rosea", "title": "OCHA Regional Office for Southern and Eastern Africa (ROSEA)", "type": "organization", "description": "In 2016, OCHA merged its offices in Johannesburg and Nairobi into the OCHA Regional Office for Southern and Eastern Africa (ROSEA), covering 25 countries.\r\n\r\nComprising the Horn of Africa and the Great Lakes, eastern Africa is a region in which emergencies tend to be large scale, resulting in significant displacement and other needs. For this reason, OCHA maintains country presences in Burundi, DRC, Eritrea, Ethiopia, Kenya, Somalia, South Sudan and Sudan. The southern Africa region has fewer protracted humanitarian crises, but is prone to drought and floods. For this reason, OCHA does not have country offices in the region. In both regions the 2015-2016 El Nino weather phenomenon continues to have a significant humanitarian impact.\r\n\r\nFor those countries where OCHA does not have a presence, it is essential that OCHA can deploy from the regional hub swiftly and effectively in times of emergency, and to otherwise ensure preparedness for potential crises. OCHA works closely with local authorities and partners to bolster national disaster preparedness in these countries and supports response.\r\n\r\nWhether we\u2019re mobilizing relief money or raising awareness of forgotten crises, it\u2019s our mandate to keep world attention focused on humanitarian issues. For this reason, we produce and release timely regional reporting and analytical products to strengthen the humanitarian case and highlight the needs of the most vulnerable in the region. OCHA ROSEA also provides a platform for the analysis of cross-border issues of humanitarian concern, such as facilitating multi-country preparedness and planning consultations. OCHA ROSEA also works to strengthen collaboration on emergency preparedness and response with regional bodies, such as the Intergovernmental Authority on Development (IGAD) in eastern Africa and the Southern Africa Development Community (SADC) in southern Africa.", "image_url": "", "created": "2014-04-28T17:49:00.350372", "is_organization": true, "approval_status": "approved", "state": "active"}, "overdue_date": "2022-08-15T22:44:17", "owner_org": "37a0c6e3-3bcd-4457-aa81-6ddeb99dd203", "package_creator": "marindi", "pageviews_last_14_days": 0, "private": false, "qa_completed": false, "review_date": "2021-10-21T14:23:08.552656", "solr_additions": "{\"countries\": [\"Zimbabwe\"]}", "state": "active", "subnational": "1", "title": "Zimbabwe: Operational Presence", "total_res_downloads": 0, "type": "dataset", "updated_by_script": "HDXINTERNAL:HDXPythonLibrary/6.3.2-Standardisation (2025-02-05T21:43:20.325631)", "url": null, "version": null, "extras": [{"key": "qa_checklist", "value": "{\"modified_date\": \"2022-01-14T09:58:56.387392\", \"version\": 1, \"dataProtection\": {}, \"metadata\": {}}"}, {"key": "qa_completed", "value": "true"}], "groups": [{"description": "", "display_name": "Zimbabwe", "id": "zwe", "image_display_url": "", "name": "zwe", "title": "Zimbabwe"}], "tags": [{"display_name": "hxl", "id": "a0fbb23a-6aad-4ccc-8062-e9ef9f20e5d2", "name": "hxl", "state": "active", "vocabulary_id": "b891512e-9516-4bf5-962a-7a289772a2a1"}, {"display_name": "operational presence", "id": "a25059f9-7e1f-49be-b629-ccccd97a95f8", "name": "operational presence", "state": "active", "vocabulary_id": "b891512e-9516-4bf5-962a-7a289772a2a1"}, {"display_name": "who is doing what and where-3w-4w-5w", "id": "ec53893c-6dba-4656-978b-4a32289ea2eb", "name": "who is doing what and where-3w-4w-5w", "state": "active", "vocabulary_id": "b891512e-9516-4bf5-962a-7a289772a2a1"}], "relationships_as_subject": [], "relationships_as_object": [], "is_fresh": false, "update_status": "needs_update", "x_resource_grouping": [], "resources": [{"alt_url": "https://stage.data-humdata-org.ahconu.org/dataset/0fb9ee35-c701-4f26-af4c-0ee1222e8330/resource/e66d4346-5c48-4a2c-840a-f0ffad339318/download/", "cache_last_updated": null, "cache_url": null, "created": "2021-10-21T14:21:22.318182", "datastore_active": false, "description": "Who is doing what and where in zimbabwe (3w). This is specific to operational partners responding only to the 2021 HRP targeted interventions", "download_url": "https://stage.data-humdata-org.ahconu.org/dataset/0fb9ee35-c701-4f26-af4c-0ee1222e8330/resource/e66d4346-5c48-4a2c-840a-f0ffad339318/download/zim_3w_august_2021.xlsx", "format": "XLSX", "hash": "", "hdx_rel_url": "/dataset/0fb9ee35-c701-4f26-af4c-0ee1222e8330/resource/e66d4346-5c48-4a2c-840a-f0ffad339318/download/zim_3w_august_2021.xlsx", "id": "e66d4346-5c48-4a2c-840a-f0ffad339318", "in_hapi": "yes", "last_modified": "2022-01-17T22:44:17.728974", "metadata_modified": "2024-12-11T06:10:11.018561", "microdata": false, "mimetype": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "mimetype_inner": null, "name": "Zim_3W_August_2021.xlsx", "originalHash": "459991226", "package_id": "0fb9ee35-c701-4f26-af4c-0ee1222e8330", "pii": "false", "position": 0, "resource_type": "file.upload", "size": 202589, "state": "active", "url": "https://stage.data-humdata-org.ahconu.org/dataset/0fb9ee35-c701-4f26-af4c-0ee1222e8330/resource/e66d4346-5c48-4a2c-840a-f0ffad339318/download/zim_3w_august_2021.xlsx", "url_type": "upload"}]}
Loading

0 comments on commit 27b8e92

Please sign in to comment.