Skip to content

Commit

Permalink
Round to 2dp
Browse files Browse the repository at this point in the history
  • Loading branch information
mcarans committed Dec 4, 2024
1 parent e237f7c commit f990274
Show file tree
Hide file tree
Showing 10 changed files with 4,328 additions and 4,233 deletions.
20 changes: 10 additions & 10 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ ckanapi==4.8
# via hdx-python-api
click==8.1.7
# via typer
coverage==7.6.7
coverage==7.6.8
# via pytest-cov
defopt==6.4.0
# via hdx-python-api
Expand Down Expand Up @@ -55,7 +55,7 @@ hdx-python-utilities==3.7.4
# hdx-python-country
humanize==4.11.0
# via frictionless
identify==2.6.2
identify==2.6.3
# via pre-commit
idna==3.10
# via
Expand Down Expand Up @@ -121,15 +121,15 @@ pockets==0.9.1
# via sphinxcontrib-napoleon
pre-commit==4.0.1
# via hdx-scraper-ophi (pyproject.toml)
pydantic==2.10.0
pydantic==2.10.3
# via frictionless
pydantic-core==2.27.0
pydantic-core==2.27.1
# via pydantic
pygments==2.18.0
# via rich
pyphonetics==0.5.3
# via hdx-python-country
pytest==8.3.3
pytest==8.3.4
# via
# hdx-scraper-ophi (pyproject.toml)
# pytest-check
Expand Down Expand Up @@ -175,7 +175,7 @@ rfc3986==2.0.0
# via frictionless
rich==13.9.4
# via typer
rpds-py==0.21.0
rpds-py==0.22.3
# via
# jsonschema
# referencing
Expand All @@ -191,7 +191,7 @@ simpleeval==1.0.3
# via frictionless
simplejson==3.19.3
# via ckanapi
six==1.16.0
six==1.17.0
# via
# ckanapi
# pockets
Expand All @@ -213,7 +213,7 @@ text-unidecode==1.3
# via python-slugify
typeguard==4.4.1
# via inflect
typer==0.13.1
typer==0.15.1
# via frictionless
typing-extensions==4.12.2
# via
Expand All @@ -232,9 +232,9 @@ urllib3==2.2.3
# requests
validators==0.34.0
# via frictionless
virtualenv==20.27.1
virtualenv==20.28.0
# via pre-commit
wheel==0.45.0
wheel==0.45.1
# via libhxl
xlrd==2.0.1
# via hdx-python-utilities
Expand Down
13 changes: 9 additions & 4 deletions src/hdx/scraper/ophi/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
lookup = "hdx-scraper-ophi"
updated_by_script = "HDX Scraper: OPHI"

create_country_datasets = False
create_country_datasets = True


def main(
Expand Down Expand Up @@ -80,8 +80,6 @@ def update_dataset(dataset):
mpi_subnational_path,
trend_path,
)
if create_country_datasets:
dataset_generator.load_showcase_links(retriever)
standardised_global = pipeline.get_standardised_global()
standardised_global_trend = (
pipeline.get_standardised_global_trend()
Expand All @@ -101,6 +99,7 @@ def update_dataset(dataset):
update_dataset(dataset)

if create_country_datasets:
dataset_generator.load_showcase_links(retriever)
for (
countryiso3,
standardised_country,
Expand All @@ -109,7 +108,7 @@ def update_dataset(dataset):
countryiso3
)
standardised_country_trend = (
standardised_countries_trend.get(countryiso3)
standardised_countries_trend.get(countryiso3, {})
)
dataset = dataset_generator.generate_dataset(
folder,
Expand All @@ -121,6 +120,12 @@ def update_dataset(dataset):
)
dataset.add_country_location(countryiso3)
update_dataset(dataset)
showcase = dataset_generator.generate_showcase(
countryiso3, countryname
)
if showcase:
showcase.create_in_hdx()
showcase.add_dataset(dataset)

logger.info("HDX Scraper OPHI pipeline completed!")

Expand Down
25 changes: 20 additions & 5 deletions src/hdx/scraper/ophi/dataset_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,16 +99,31 @@ def generate_dataset_metadata(
dataset.set_subnational(True)
return dataset

@staticmethod
def get_title(countryname: str) -> str:
return f"{countryname} Multi Dimensional Poverty Index"

@staticmethod
def get_name(countryname: str) -> str:
return f"{countryname} MPI"

def generate_showcase(
self, name: str, title: str, countryiso3: str
) -> Showcase:
self,
countryiso3: str,
countryname: str,
) -> Optional[Showcase]:
url = self._showcase_links.get(countryiso3)
if not url:
return None
name = self.get_name(countryname)
title = self.get_title(countryname)
showcase = Showcase(
{
"name": f"{self._slugified_name(name)}-showcase",
"title": title,
"notes": self._configuration["showcaseinfo"]["notes"],
"url": self._showcase_links[countryiso3],
"image_url": "",
"image_url": "https://raw.githubusercontent.com/OCHA-DAP/hdx-scraper-ophi/main/ophi_mpi.jpg",
}
)
showcase.add_tags(self.tags)
Expand All @@ -125,8 +140,8 @@ def generate_dataset(
) -> Optional[Dataset]:
if not standardised_rows:
return None
title = f"{countryname} Multi Dimensional Poverty Index"
name = f"{countryname} MPI"
title = self.get_title(countryname)
name = self.get_name(countryname)
dataset = self.generate_dataset_metadata(title, name)
dataset.set_time_period(date_range["start"], date_range["end"])
resource_descriptions = self._configuration["resource_descriptions"]
Expand Down
120 changes: 58 additions & 62 deletions src/hdx/scraper/ophi/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,21 @@
from hdx.utilities.dateparse import parse_date_range
from hdx.utilities.dictandlist import dict_of_dicts_add
from hdx.utilities.retriever import Retrieve
from hdx.utilities.text import number_format

logger = logging.getLogger(__name__)


class Pipeline:
headers = (
"mpi",
"headcount_ratio",
"intensity_of_deprivation",
"vulnerable_to_poverty",
"in_severe_poverty",
)
timepoints = ("t0", "t1")

def __init__(
self,
configuration: Configuration,
Expand Down Expand Up @@ -77,9 +87,22 @@ def add_row(
global_dict[key] = row
dict_of_dicts_add(country_dict, countryiso3, key, row)

@classmethod
def set_mpi(cls, inheaders: Tuple[str], inrow: Dict, row: Dict) -> None:
for i, inheader in enumerate(inheaders):
header = cls.headers[i]
row[header] = number_format(inrow[inheader], format="%.2f")

def read_mpi_national_data(
self, path: str, format: str, sheet: str, headers: List[str]
) -> None:
inheaders = (
"Multidimensional poverty Multidimensional Poverty Index (MPI = H*A) Range 0 to 1",
"Multidimensional poverty Headcount ratio: Population in multidimensional poverty (H) % Population",
"Multidimensional poverty Intensity of deprivation among the poor (A) Average % of weighted deprivations",
"Multidimensional poverty Vulnerable to poverty (who experience 20-33.32% intensity of deprivations) % Population",
"Multidimensional poverty In severe poverty (severity 50% or higher) % Population",
)
_, iterator = self._retriever.downloader.get_tabular_rows(
path,
format=format,
Expand All @@ -96,21 +119,7 @@ def read_mpi_national_data(
"admin1_code": "",
"admin1_name": "",
}
row["mpi"] = inrow[
"Multidimensional poverty Multidimensional Poverty Index (MPI = H*A) Range 0 to 1"
]
row["headcount_ratio"] = inrow[
"Multidimensional poverty Headcount ratio: Population in multidimensional poverty (H) % Population"
]
row["intensity_of_deprivation"] = inrow[
"Multidimensional poverty Intensity of deprivation among the poor (A) Average % of weighted deprivations"
]
row["vulnerable_to_poverty"] = inrow[
"Multidimensional poverty Vulnerable to poverty (who experience 20-33.32% intensity of deprivations) % Population"
]
row["in_severe_poverty"] = inrow[
"Multidimensional poverty In severe poverty (severity 50% or higher) % Population"
]
self.set_mpi(inheaders, inrow, row)
date_range = inrow["MPI data source Year"]
self.add_row(
countryiso3,
Expand All @@ -126,6 +135,13 @@ def read_mpi_national_data(
def read_mpi_subnational_data(
self, path: str, format: str, sheet: str, headers: List[str]
) -> None:
inheaders = (
"Multidimensional poverty by region Multidimensional Poverty Index (MPI = H*A) Range 0 to 1",
"Multidimensional poverty by region Headcount ratio: Population in multidimensional poverty (H) % Population",
"Multidimensional poverty by region Intensity of deprivation among the poor (A) Average % of weighted deprivations",
"Multidimensional poverty by region Vulnerable to poverty % Population",
"Multidimensional poverty by region In severe poverty % Population",
)
_, iterator = self._retriever.downloader.get_tabular_rows(
path,
format=format,
Expand All @@ -144,21 +160,7 @@ def read_mpi_subnational_data(
"admin1_code": admin1_code,
"admin1_name": admin1_name,
}
row["mpi"] = inrow[
"Multidimensional poverty by region Multidimensional Poverty Index (MPI = H*A) Range 0 to 1"
]
row["headcount_ratio"] = inrow[
"Multidimensional poverty by region Headcount ratio: Population in multidimensional poverty (H) % Population"
]
row["intensity_of_deprivation"] = inrow[
"Multidimensional poverty by region Intensity of deprivation among the poor (A) Average % of weighted deprivations"
]
row["vulnerable_to_poverty"] = inrow[
"Multidimensional poverty by region Vulnerable to poverty % Population"
]
row["in_severe_poverty"] = inrow[
"Multidimensional poverty by region In severe poverty % Population"
]
self.set_mpi(inheaders, inrow, row)
date_range = inrow["MPI data source Year"]
self.add_row(
countryiso3,
Expand All @@ -174,6 +176,16 @@ def read_mpi_subnational_data(
def read_trends_national_data(
self, path: str, format: str, sheet: str, headers: List[str]
) -> None:
inheaders_tn = []
for timepoint in self.timepoints:
inheaders = (
f"Multidimensional Poverty Index (MPIT) {timepoint} Range 0 to 1",
f"Multidimensional Headcount Ratio (HT) {timepoint} % pop.",
f"Intensity of Poverty (AT) {timepoint} Avg % of weighted deprivations",
f"Vulnerable to poverty {timepoint} % pop.",
f"In severe poverty {timepoint} % pop.",
)
inheaders_tn.append(inheaders)
_, iterator = self._retriever.downloader.get_tabular_rows(
path,
format=format,
Expand All @@ -185,27 +197,14 @@ def read_trends_national_data(
countryiso3 = inrow["ISO country code"]
if not countryiso3:
continue
for i, timepoint in enumerate(("t0", "t1")):
for i, timepoint in enumerate(self.timepoints):
row = {
"country_code": countryiso3,
"admin1_code": "",
"admin1_name": "",
}
row["mpi"] = inrow[
f"Multidimensional Poverty Index (MPIT) {timepoint} Range 0 to 1"
]
row["headcount_ratio"] = inrow[
f"Multidimensional Headcount Ratio (HT) {timepoint} % pop."
]
row["intensity_of_deprivation"] = inrow[
f"Intensity of Poverty (AT) {timepoint} Avg % of weighted deprivations"
]
row["vulnerable_to_poverty"] = inrow[
f"Vulnerable to poverty {timepoint} % pop."
]
row["in_severe_poverty"] = inrow[
f"In severe poverty {timepoint} % pop."
]
inheaders = inheaders_tn[i]
self.set_mpi(inheaders, inrow, row)
date_range = inrow[f"MPI data source {timepoint} Year"]
self.add_row(
countryiso3,
Expand All @@ -221,6 +220,16 @@ def read_trends_national_data(
def read_trends_subnational_data(
self, path: str, format: str, sheet: str, headers: List[str]
) -> None:
inheaders_tn = []
for timepoint in self.timepoints:
inheaders = (
f"Multidimensional Poverty Index (MPIT) {timepoint} Range 0 to 1",
f"Multidimensional Headcount Ratio (HT) {timepoint} % pop.",
f"Intensity of Poverty (AT) {timepoint} Avg % of weighted deprivations",
f"Vulnerable to poverty {timepoint} % pop.",
f"In severe poverty {timepoint} % pop.",
)
inheaders_tn.append(inheaders)
_, iterator = self._retriever.downloader.get_tabular_rows(
path,
format=format,
Expand All @@ -234,27 +243,14 @@ def read_trends_subnational_data(
continue
admin1_name = inrow["Region"]
admin1_code, _ = self._adminone.get_pcode(countryiso3, admin1_name)
for i, timepoint in enumerate(("t0", "t1")):
for i, timepoint in enumerate(self.timepoints):
row = {
"country_code": countryiso3,
"admin1_code": admin1_code,
"admin1_name": admin1_name,
}
row["mpi"] = inrow[
f"Multidimensional Poverty Index (MPIT) {timepoint} Range 0 to 1"
]
row["headcount_ratio"] = inrow[
f"Multidimensional Headcount Ratio (HT) {timepoint} % pop."
]
row["intensity_of_deprivation"] = inrow[
f"Intensity of Poverty (AT) {timepoint} Avg % of weighted deprivations"
]
row["vulnerable_to_poverty"] = inrow[
f"Vulnerable to poverty {timepoint} % pop."
]
row["in_severe_poverty"] = inrow[
f"In severe poverty {timepoint} % pop."
]
inheaders = inheaders_tn[i]
self.set_mpi(inheaders, inrow, row)
date_range = inrow[f"MPI data source {timepoint} Year"]
self.add_row(
countryiso3,
Expand Down
Loading

0 comments on commit f990274

Please sign in to comment.