Skip to content

Commit

Permalink
Merge pull request #190 from OCHA-DAP/HDXDSYS-913-error-messages
Browse files Browse the repository at this point in the history
HDXDSYS-913 error messages
  • Loading branch information
b-j-mills authored Nov 19, 2024
2 parents 21f58f1 + 5944937 commit 10d5278
Show file tree
Hide file tree
Showing 19 changed files with 405 additions and 249 deletions.
1 change: 1 addition & 0 deletions .github/workflows/db_export.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ jobs:
PREPREFIX: ${{ secrets.HDX_PIPELINE_PREPREFIX }}
USER_AGENT: ${{ secrets.USER_AGENT }}
BASIC_AUTHS: ${{ secrets.BASIC_AUTHS }}
ERR_TO_HDX: ${{ secrets.ERR_TO_HDX }}
run: python3.11 -m hapi.pipelines.app -db "postgresql+psycopg://postgres:postgres@localhost:5432/hapi"

- name: Dump PostgreSQL Views
Expand Down
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

## [0.10.18] - 2024-11-19

### Changed

- Centralized error handling and added function to write errors to HDX resource metadata

## [0.10.17] - 2024-11-11

### Changed
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ requires-python = ">=3.8"

dependencies = [
"hapi-schema>= 0.9.2",
"hdx-python-api>= 6.3.4",
"hdx-python-api>= 6.3.5",
"hdx-python-country>= 3.8.1",
"hdx-python-database[postgresql]>= 1.3.4",
"hdx-python-scraper>= 2.5.0",
Expand Down
10 changes: 5 additions & 5 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ ckanapi==4.8
# via hdx-python-api
click==8.1.7
# via typer
coverage==7.6.4
coverage==7.6.7
# via pytest-cov
defopt==6.4.0
# via hdx-python-api
Expand Down Expand Up @@ -56,11 +56,11 @@ gspread==6.1.4
# via hdx-python-scraper
hapi-schema==0.9.3
# via hapi-pipelines (pyproject.toml)
hdx-python-api==6.3.4
hdx-python-api==6.3.5
# via
# hapi-pipelines (pyproject.toml)
# hdx-python-scraper
hdx-python-country==3.8.3
hdx-python-country==3.8.4
# via
# hapi-pipelines (pyproject.toml)
# hdx-python-api
Expand Down Expand Up @@ -228,7 +228,7 @@ ruamel-yaml==0.18.6
# via hdx-python-utilities
ruamel-yaml-clib==0.2.12
# via ruamel-yaml
setuptools==75.4.0
setuptools==75.5.0
# via ckanapi
shellingham==1.5.4
# via typer
Expand Down Expand Up @@ -263,7 +263,7 @@ text-unidecode==1.3
# via python-slugify
typeguard==4.4.1
# via inflect
typer==0.13.0
typer==0.13.1
# via frictionless
typing-extensions==4.12.2
# via
Expand Down
15 changes: 15 additions & 0 deletions src/hapi/pipelines/app/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,13 @@ def parse_args():
action="store_true",
help="Debug",
)
parser.add_argument(
"-ehx",
"--err-to-hdx",
default=False,
action="store_true",
help="Write relevant found errors to HDX metadata",
)
return parser.parse_args()


Expand All @@ -98,6 +105,7 @@ def main(
save: bool = False,
use_saved: bool = False,
debug: bool = False,
err_to_hdx: bool = False,
**ignore,
) -> None:
"""Run HAPI. Either a database connection string (db_uri) or database
Expand All @@ -114,6 +122,7 @@ def main(
save (bool): Whether to save state for testing. Defaults to False.
use_saved (bool): Whether to use saved state for testing. Defaults to False.
debug (bool): Whether to output debug info. Defaults to False.
err_to_hdx (bool): Whether to write any errors to HDX metadata. Defaults to False.
Returns:
None
Expand Down Expand Up @@ -160,6 +169,7 @@ def main(
)
pipelines.run()
pipelines.output()
pipelines.output_errors(err_to_hdx)
if debug:
pipelines.debug("debug")
logger.info("HAPI pipelines completed!")
Expand Down Expand Up @@ -197,6 +207,9 @@ def main(
basic_auths = string_params_to_dict(ba)
else:
basic_auths = None
ehx = args.err_to_hdx
if ehx is None:
ehx = getenv("ERR_TO_HDX")
project_configs = [
"conflict_event.yaml",
"core.yaml",
Expand All @@ -213,6 +226,7 @@ def main(
project_config_dict = add_defaults(project_config_dict)
facade(
main,
hdx_read_only=False,
user_agent_config_yaml=join(expanduser("~"), ".useragents.yaml"),
user_agent_lookup=lookup,
project_config_dict=project_config_dict,
Expand All @@ -224,4 +238,5 @@ def main(
save=args.save,
use_saved=args.use_saved,
debug=args.debug,
err_to_hdx=ehx,
)
13 changes: 13 additions & 0 deletions src/hapi/pipelines/app/pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from hapi.pipelines.database.sector import Sector
from hapi.pipelines.database.wfp_commodity import WFPCommodity
from hapi.pipelines.database.wfp_market import WFPMarket
from hapi.pipelines.utilities.error_handling import ErrorManager

logger = logging.getLogger(__name__)

Expand All @@ -57,6 +58,7 @@ def __init__(
countries=countries_to_run,
)
self.countries = self.locations.hapi_countries
self.error_manager = ErrorManager()
reader = Read.get_reader("hdx")
libhxl_dataset = AdminLevel.get_libhxl_dataset(
retriever=reader
Expand Down Expand Up @@ -235,6 +237,7 @@ def output_operational_presence(self):
sector=self.sector,
results=results,
config=self.configuration,
error_manager=self.error_manager,
)
operational_presence.populate()

Expand All @@ -248,6 +251,7 @@ def output_food_security(self):
admintwo=self.admintwo,
countryiso3s=self.countries,
configuration=self.configuration,
error_manager=self.error_manager,
)
food_security.populate()

Expand All @@ -262,6 +266,7 @@ def output_humanitarian_needs(self):
admins=self.admins,
sector=self.sector,
configuration=self.configuration,
error_manager=self.error_manager,
)
humanitarian_needs.populate()

Expand Down Expand Up @@ -304,6 +309,7 @@ def output_idps(self):
metadata=self.metadata,
admins=self.admins,
results=results,
error_manager=self.error_manager,
)
idps.populate()

Expand All @@ -315,6 +321,7 @@ def output_funding(self):
countryiso3s=self.countries,
locations=self.locations,
configuration=self.configuration,
error_manager=self.error_manager,
)
funding.populate()

Expand Down Expand Up @@ -343,6 +350,7 @@ def output_conflict_event(self):
admins=self.admins,
results=results,
config=self.configuration,
error_manager=self.error_manager,
)
conflict_event.populate()

Expand All @@ -361,6 +369,7 @@ def output_food_prices(self):
adminone=self.adminone,
admintwo=self.admintwo,
configuration=self.configuration,
error_manager=self.error_manager,
)
wfp_market.populate()
food_price = FoodPrice(
Expand All @@ -371,6 +380,7 @@ def output_food_prices(self):
currency=self.currency,
commodity=wfp_commodity,
market=wfp_market,
error_manager=self.error_manager,
)
food_price.populate()

Expand All @@ -396,3 +406,6 @@ def output(self):

def debug(self, folder: str) -> None:
self.org.output_org_map(folder)

def output_errors(self, err_to_hdx: bool) -> None:
self.error_manager.output_errors(err_to_hdx)
8 changes: 6 additions & 2 deletions src/hapi/pipelines/configs/conflict_event.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
#Conflict event config file
conflict_event_error_messages:
colombia-acled-conflict-data: "El Tablon is miscoded, these rows have been removed"
nigeria-acled-conflict-data: "Edati and Ijebu Ode are miscoded, these rows have been removed"
colombia-acled-conflict-data|colombia_HRP_political_violence_events_and_fatalities_by_month-year: "El Tablon is miscoded"
colombia-acled-conflict-data|colombia_HRP_civilian_targeting_events_and_fatalities_by_month-year: "El Tablon is miscoded"
colombia-acled-conflict-data|colombia_HRP_demonstration_events_by_month-year: "El Tablon is miscoded"
nigeria-acled-conflict-data|nigeria_HRP_political_violence_events_and_fatalities_by_month-year: "Edati and Ijebu Ode are miscoded"
nigeria-acled-conflict-data|nigeria_HRP_civilian_targeting_events_and_fatalities_by_month-year: "Edati and Ijebu Ode are miscoded"
nigeria-acled-conflict-data|nigeria_HRP_demonstration_events_by_month-year: "Edati and Ijebu Ode are miscoded"

conflict_event_default:
scrapers_with_defaults:
Expand Down
16 changes: 11 additions & 5 deletions src/hapi/pipelines/database/admins.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import logging
from abc import ABC
from typing import Dict, List, Literal, Optional, Set
from typing import Dict, List, Literal, Optional

import hxl
from hapi_schema.db_admin1 import DBAdmin1
Expand All @@ -14,7 +14,7 @@
from sqlalchemy import select
from sqlalchemy.orm import Session

from ..utilities.logging_helpers import add_missing_value_message
from ..utilities.error_handling import ErrorManager
from .base_uploader import BaseUploader
from .locations import Locations

Expand Down Expand Up @@ -159,15 +159,21 @@ def get_admin_level(self, pcode: str) -> _ADMIN_LEVELS_LITERAL:
raise ValueError(f"Pcode {pcode} not in admin1 or admin2 tables.")

def get_admin2_ref(
self, admin_level: str, admin_code: str, dataset_name: str, errors: Set
self,
admin_level: str,
admin_code: str,
dataset_name: str,
pipeline: str,
error_manager: ErrorManager,
) -> Optional[int]:
admin2_code = get_admin2_code_based_on_level(
admin_code=admin_code, admin_level=admin_level
)
ref = self.admin2_data.get(admin2_code)
if ref is None:
add_missing_value_message(
errors, dataset_name, "admin 2 code", admin2_code
# TODO: resolve pipeline name
error_manager.add_missing_value_message(
pipeline, dataset_name, "admin 2 code", admin2_code
)
return ref

Expand Down
28 changes: 19 additions & 9 deletions src/hapi/pipelines/database/conflict_event.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from sqlalchemy.orm import Session

from ..utilities.batch_populate import batch_populate
from ..utilities.logging_helpers import add_message
from ..utilities.error_handling import ErrorManager
from ..utilities.provider_admin_names import get_provider_name
from . import admins
from .base_uploader import BaseUploader
Expand All @@ -26,16 +26,17 @@ def __init__(
admins: admins.Admins,
results: Dict,
config: Dict,
error_manager: ErrorManager,
):
super().__init__(session)
self._metadata = metadata
self._admins = admins
self._results = results
self._config = config
self._error_manager = error_manager

def populate(self) -> None:
logger.info("Populating conflict event table")
errors = set()
for dataset in self._results.values():
dataset_name = dataset["hdx_stub"]
conflict_event_rows = []
Expand Down Expand Up @@ -115,17 +116,26 @@ def populate(self) -> None:
conflict_event_rows.append(conflict_event_row)

if number_duplicates > 0:
add_message(
errors, dataset_name, f"{number_duplicates} duplicate rows"
self._error_manager.add_message(
"ConflictEvent",
dataset_name,
f"{number_duplicates} duplicate rows",
)
if len(conflict_event_rows) == 0:
add_message(errors, dataset_name, "no rows found")
self._error_manager.add_message(
"ConflictEvent", dataset_name, "no rows found"
)
continue
batch_populate(conflict_event_rows, self._session, DBConflictEvent)

for dataset, msg in self._config.get(
for identifier, message in self._config.get(
"conflict_event_error_messages", {}
).items():
add_message(errors, dataset, msg)
for error in sorted(errors):
logger.error(error)
dataset, resource_name = identifier.split("|")
self._error_manager.add_message(
"ConfictEvent",
dataset,
message,
resource_name=resource_name,
err_to_hdx=True,
)
14 changes: 5 additions & 9 deletions src/hapi/pipelines/database/food_price.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from hdx.utilities.dateparse import parse_date
from sqlalchemy.orm import Session

from ..utilities.logging_helpers import add_missing_value_message
from ..utilities.error_handling import ErrorManager
from .base_uploader import BaseUploader
from .currency import Currency
from .metadata import Metadata
Expand All @@ -29,6 +29,7 @@ def __init__(
currency: Currency,
commodity: WFPCommodity,
market: WFPMarket,
error_manager: ErrorManager,
):
super().__init__(session)
self._datasetinfo = datasetinfo
Expand All @@ -37,6 +38,7 @@ def __init__(
self._currency = currency
self._commodity = commodity
self._market = market
self._error_manager = error_manager

def populate(self) -> None:
logger.info("Populating WFP price table")
Expand All @@ -55,8 +57,6 @@ def populate(self) -> None:
"admin_single": countryiso3,
}
)
warnings = set()
errors = set()
for datasetinfo in datasetinfos:
headers, iterator = reader.read(datasetinfo)
hapi_dataset_metadata = datasetinfo["hapi_dataset_metadata"]
Expand All @@ -72,8 +72,8 @@ def populate(self) -> None:
market = row["market"]
market_code = self._market.get_market_code(countryiso3, market)
if not market_code:
add_missing_value_message(
errors, dataset_name, "market code", market
self._error_manager.add_missing_value_message(
"FoodPrice", dataset_name, "market code", market
)
continue
commodity_code = self._commodity.get_commodity_code(
Expand Down Expand Up @@ -109,7 +109,3 @@ def populate(self) -> None:
)
self._session.add(price_row)
self._session.commit()
for warning in sorted(warnings):
logger.warning(warning)
for error in sorted(errors):
logger.error(error)
Loading

0 comments on commit 10d5278

Please sign in to comment.