Skip to content

Commit

Permalink
Add Yemen to humanitarian needs (still in progress)
Browse files Browse the repository at this point in the history
  • Loading branch information
Mike committed Dec 22, 2023
1 parent 9229cf3 commit e571e0b
Show file tree
Hide file tree
Showing 10 changed files with 260 additions and 183 deletions.
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ google-auth==2.25.2
# gspread
google-auth-oauthlib==1.2.0
# via gspread
greenlet==3.0.2
greenlet==3.0.3
# via sqlalchemy
gspread==5.12.3
# via hdx-python-scraper
Expand Down
11 changes: 11 additions & 0 deletions src/hapi/pipelines/app/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def parse_args():
default=None,
help="Database connection parameters. Overrides --db-uri.",
)
parser.add_argument("-th", "--themes", default=None, help="Themes to run")
parser.add_argument(
"-sc", "--scrapers", default=None, help="Scrapers to run"
)
Expand All @@ -72,6 +73,7 @@ def parse_args():
def main(
db_uri: Optional[str] = None,
db_params: Optional[str] = None,
themes_to_run: Optional[List[str]] = None,
scrapers_to_run: Optional[List[str]] = None,
save: bool = False,
use_saved: bool = False,
Expand All @@ -84,7 +86,10 @@ def main(
Args:
db_uri (Optional[str]): Database connection URI. Defaults to None.
db_params (Optional[str]): Database connection parameters. Defaults to None.
themes_to_run (Optional[ListTuple[str]]): Themes to run. Defaults to None (all themes).
scrapers_to_run (Optional[ListTuple[str]]): Scrapers to run. Defaults to None (all scrapers).
save (bool): Whether to save state for testing. Defaults to False.
use_saved (bool): Whether to use saved state for testing. Defaults to False.
Returns:
None
Expand Down Expand Up @@ -125,6 +130,7 @@ def main(
configuration,
session,
today,
themes_to_run,
scrapers_to_run,
errors_on_exit,
)
Expand Down Expand Up @@ -156,6 +162,10 @@ def main(
db_uri = getenv("DB_URI")
if db_uri and "://" not in db_uri:
db_uri = f"postgresql://{db_uri}"
if args.themes:
themes_to_run = args.themes.split(",")
else:
themes_to_run = None
if args.scrapers:
scrapers_to_run = args.scrapers.split(",")
else:
Expand All @@ -178,6 +188,7 @@ def main(
project_config_dict=project_config_dict,
db_uri=db_uri,
db_params=args.db_params,
themes_to_run=themes_to_run,
scrapers_to_run=scrapers_to_run,
save=args.save,
use_saved=args.use_saved,
Expand Down
195 changes: 105 additions & 90 deletions src/hapi/pipelines/app/pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,14 @@ def __init__(
configuration: Dict,
session: Session,
today: datetime,
themes_to_run: Optional[ListTuple[str]] = None,
scrapers_to_run: Optional[ListTuple[str]] = None,
errors_on_exit: Optional[ErrorsOnExit] = None,
use_live: bool = True,
):
self.configuration = configuration
self.session = session
self.themes_to_run = themes_to_run
self.locations = Locations(
configuration=configuration, session=session, use_live=use_live
)
Expand All @@ -54,17 +56,17 @@ def __init__(
self.admintwo.load_pcode_formats()
self.admintwo.set_parent_admins_from_adminlevels([self.adminone])

self.gender = Gender(
self.population_status = PopulationStatus(
session=session,
gender_descriptions=configuration["gender_descriptions"],
)
self.age_range = AgeRange(
session=session, age_range_codes=configuration["age_range_codes"]
population_status_descriptions=configuration[
"population_status_descriptions"
],
)
self.sector = Sector(
self.population_group = PopulationGroup(
session=session,
datasetinfo=configuration["sector"],
sector_map=configuration["sector_map"],
population_group_descriptions=configuration[
"population_group_descriptions"
],
)
self.org = Org(
session=session,
Expand All @@ -75,17 +77,10 @@ def __init__(
datasetinfo=configuration["org_type"],
org_type_map=configuration["org_type_map"],
)
self.population_group = PopulationGroup(
session=session,
population_group_descriptions=configuration[
"population_group_descriptions"
],
)
self.population_status = PopulationStatus(
self.sector = Sector(
session=session,
population_status_descriptions=configuration[
"population_status_descriptions"
],
datasetinfo=configuration["sector"],
sector_map=configuration["sector_map"],
)
self.ipc_phase = IpcPhase(
session=session,
Expand All @@ -96,6 +91,13 @@ def __init__(
session=session,
ipc_type_descriptions=configuration["ipc_type_descriptions"],
)
self.gender = Gender(
session=session,
gender_descriptions=configuration["gender_descriptions"],
)
self.age_range = AgeRange(
session=session, age_range_codes=configuration["age_range_codes"]
)

Sources.set_default_source_date_format("%Y-%m-%d")
self.runner = Runner(
Expand All @@ -112,21 +114,24 @@ def create_configurable_scrapers(self):
def _create_configurable_scrapers(
prefix, level, suffix_attribute=None, adminlevel=None
):
suffix = f"_{level}"
source_configuration = Sources.create_source_configuration(
suffix_attribute=suffix_attribute,
admin_sources=True,
adminlevel=adminlevel,
)
scrapers = self.runner.add_configurables(
self.configuration[f"{prefix}{suffix}"],
level,
adminlevel=adminlevel,
source_configuration=source_configuration,
suffix=suffix,
)
current_scrapers = self.configurable_scrapers.get(prefix, [])
self.configurable_scrapers[prefix] = current_scrapers + scrapers
if not self.themes_to_run or prefix in self.themes_to_run:
suffix = f"_{level}"
source_configuration = Sources.create_source_configuration(
suffix_attribute=suffix_attribute,
admin_sources=True,
adminlevel=adminlevel,
)
scrapers = self.runner.add_configurables(
self.configuration[f"{prefix}{suffix}"],
level,
adminlevel=adminlevel,
source_configuration=source_configuration,
suffix=suffix,
)
current_scrapers = self.configurable_scrapers.get(prefix, [])
self.configurable_scrapers[prefix] = (
current_scrapers + scrapers
)

_create_configurable_scrapers("population", "national")
_create_configurable_scrapers(
Expand Down Expand Up @@ -155,70 +160,80 @@ def output(self):
self.locations.populate()
self.admins.populate()
self.metadata.populate()
self.gender.populate()
self.age_range.populate()
self.sector.populate()
self.population_status.populate()
self.population_group.populate()
self.org.populate()
self.org_type.populate()
self.population_group.populate()
self.population_status.populate()
self.sector.populate()
self.ipc_phase.populate()
self.ipc_type.populate()
self.gender.populate()
self.age_range.populate()

results = self.runner.get_hapi_results(
self.configurable_scrapers["population"]
)
if not self.themes_to_run or "population" in self.themes_to_run:
results = self.runner.get_hapi_results(
self.configurable_scrapers["population"]
)

population = Population(
session=self.session,
metadata=self.metadata,
admins=self.admins,
gender=self.gender,
age_range=self.age_range,
results=results,
)
population.populate()
population = Population(
session=self.session,
metadata=self.metadata,
admins=self.admins,
gender=self.gender,
age_range=self.age_range,
results=results,
)
population.populate()

results = self.runner.get_hapi_results(
self.configurable_scrapers["operational_presence"]
)
operational_presence = OperationalPresence(
session=self.session,
metadata=self.metadata,
admins=self.admins,
org=self.org,
org_type=self.org_type,
sector=self.sector,
results=results,
)
operational_presence.populate()
if (
not self.themes_to_run
or "operational_presence" in self.themes_to_run
):
results = self.runner.get_hapi_results(
self.configurable_scrapers["operational_presence"]
)
operational_presence = OperationalPresence(
session=self.session,
metadata=self.metadata,
admins=self.admins,
org=self.org,
org_type=self.org_type,
sector=self.sector,
results=results,
)
operational_presence.populate()

results = self.runner.get_hapi_results(
self.configurable_scrapers["food_security"]
)
food_security = FoodSecurity(
session=self.session,
metadata=self.metadata,
admins=self.admins,
ipc_phase=self.ipc_phase,
ipc_type=self.ipc_type,
results=results,
)
food_security.populate()
if not self.themes_to_run or "food_security" in self.themes_to_run:
results = self.runner.get_hapi_results(
self.configurable_scrapers["food_security"]
)
food_security = FoodSecurity(
session=self.session,
metadata=self.metadata,
admins=self.admins,
ipc_phase=self.ipc_phase,
ipc_type=self.ipc_type,
results=results,
)
food_security.populate()

results = self.runner.get_hapi_results(
self.configurable_scrapers["humanitarian_needs"]
)
if (
not self.themes_to_run
or "humanitarian_needs" in self.themes_to_run
):
results = self.runner.get_hapi_results(
self.configurable_scrapers["humanitarian_needs"]
)

humanitarian_needs = HumanitarianNeeds(
session=self.session,
metadata=self.metadata,
admins=self.admins,
gender=self.gender,
age_range=self.age_range,
sector=self.sector,
population_group=self.population_group,
population_status=self.population_status,
results=results,
)
humanitarian_needs.populate()
humanitarian_needs = HumanitarianNeeds(
session=self.session,
metadata=self.metadata,
admins=self.admins,
population_status=self.population_status,
population_group=self.population_group,
sector=self.sector,
gender=self.gender,
age_range=self.age_range,
results=results,
)
humanitarian_needs.populate()
Loading

0 comments on commit e571e0b

Please sign in to comment.