From 99b877a85dcd9fc8f13ed703969fbbd1a4496ada Mon Sep 17 00:00:00 2001 From: Mike Date: Mon, 8 Jan 2024 12:31:02 +1300 Subject: [PATCH 1/2] Fix database export (wrong code being used for age ranges which only seems to come up when using postgres) --- src/hapi/pipelines/database/age_range.py | 9 +++-- src/hapi/pipelines/database/gender.py | 5 ++- .../pipelines/database/humanitarian_needs.py | 40 ++++++++++--------- .../pipelines/database/population_group.py | 5 ++- .../pipelines/database/population_status.py | 5 ++- src/hapi/pipelines/database/sector.py | 5 ++- 6 files changed, 38 insertions(+), 31 deletions(-) diff --git a/src/hapi/pipelines/database/age_range.py b/src/hapi/pipelines/database/age_range.py index 117e0156..f0aa3ba6 100644 --- a/src/hapi/pipelines/database/age_range.py +++ b/src/hapi/pipelines/database/age_range.py @@ -14,7 +14,7 @@ class AgeRange(BaseUploader): def __init__(self, session: Session, age_range_codes: List[str]): super().__init__(session) self.data = age_range_codes - self.patterns = [] + self.pattern_to_code = {} def populate(self): logger.info("Populating age ranges table") @@ -27,14 +27,15 @@ def populate_single(self, age_range_code: str): if len(ages) == 2: # Format: 0-5 age_min, age_max = int(ages[0]), int(ages[1]) - pattern_string = f"age{age_min}_{age_max}" + pattern = f"age{age_min}_{age_max}" else: # Format: 80+ age_min = int(age_range_code.replace("+", "")) age_max = None - pattern_string = f"age{age_min}plus" + pattern = f"age{age_min}plus" age_range_row = DBAgeRange( code=age_range_code, age_min=age_min, age_max=age_max ) self._session.add(age_range_row) - self.patterns.append(TagPattern.parse(f"#*+{pattern_string}")) + tagpattern = TagPattern.parse(f"#*+{pattern}") + self.pattern_to_code[tagpattern] = age_range_code diff --git a/src/hapi/pipelines/database/gender.py b/src/hapi/pipelines/database/gender.py index 535f186b..28891c4b 100644 --- a/src/hapi/pipelines/database/gender.py +++ b/src/hapi/pipelines/database/gender.py @@ -15,7 +15,7 @@ def __init__(self, session: Session, gender_descriptions: Dict[str, str]): super().__init__(session) self._gender_descriptions = gender_descriptions self.data = [] - self.patterns = [] + self.pattern_to_code = {} def populate(self): logger.info("Populating gender table") @@ -23,5 +23,6 @@ def populate(self): gender_row = DBGender(code=gender, description=description) self._session.add(gender_row) self.data.append(gender) - self.patterns.append(TagPattern.parse(f"#*+{gender}")) + tagpattern = TagPattern.parse(f"#*+{gender}") + self.pattern_to_code[tagpattern] = gender self._session.commit() diff --git a/src/hapi/pipelines/database/humanitarian_needs.py b/src/hapi/pipelines/database/humanitarian_needs.py index 7ca03cdd..198177eb 100644 --- a/src/hapi/pipelines/database/humanitarian_needs.py +++ b/src/hapi/pipelines/database/humanitarian_needs.py @@ -35,27 +35,25 @@ def __init__( super().__init__(session) self._metadata = metadata self._admins = admins - self.population_status_patterns = population_status.patterns - self.population_group_patterns = population_group.patterns - self.sector_patterns = sector.patterns - self.gender_patterns = gender.patterns - self.age_range_patterns = age_range.patterns + self.population_status_pattern_to_code = ( + population_status.pattern_to_code + ) + self.population_group_pattern_to_code = ( + population_group.pattern_to_code + ) + self.sector_pattern_to_code = sector.pattern_to_code + self.gender_pattern_to_code = gender.pattern_to_code + self.age_range_pattern_to_code = age_range.pattern_to_code self.disabled_pattern = TagPattern.parse("#*+disabled") self._results = results def populate(self): logger.info("Populating humanitarian needs table") - def match_column(col, patterns): - for pattern in patterns: + def match_column(col, pattern_to_code): + for pattern in pattern_to_code: if pattern.match(col): - result = pattern.tag - if result and result != "#*": - return result[1:] - result = pattern.include_attributes - if result: - return next(iter(result)) - break + return pattern_to_code[pattern] return None for dataset in self._results.values(): @@ -70,23 +68,27 @@ def match_column(col, patterns): column = Column.parse(hxl_tag) # "#inneed" "#affected" population_status_code = match_column( - column, self.population_status_patterns + column, self.population_status_pattern_to_code ) if not population_status_code: raise ValueError(f"Invalid HXL tag {hxl_tag}!") # "#*+idps" "#*+refugees" population_group_code = match_column( - column, self.population_group_patterns + column, self.population_group_pattern_to_code ) # "#*+wsh" "#*+pro_gbv" - sector_code = match_column(column, self.sector_patterns) + sector_code = match_column( + column, self.sector_pattern_to_code + ) if sector_code: sector_code = sector_code.upper() # "#*+f" "#*+m" - gender_code = match_column(column, self.gender_patterns) + gender_code = match_column( + column, self.gender_pattern_to_code + ) # "#*+age0_4" "#*+age80plus" age_range_code = match_column( - column, self.age_range_patterns + column, self.age_range_pattern_to_code ) # "#*+disabled" disabled_marker = self.disabled_pattern.match(column) diff --git a/src/hapi/pipelines/database/population_group.py b/src/hapi/pipelines/database/population_group.py index 841ef002..f453a04e 100644 --- a/src/hapi/pipelines/database/population_group.py +++ b/src/hapi/pipelines/database/population_group.py @@ -17,7 +17,7 @@ def __init__( super().__init__(session) self._population_group_descriptions = population_group_descriptions self.data = [] - self.patterns = [] + self.pattern_to_code = {} def populate(self): logger.info("Populating population group table") @@ -30,5 +30,6 @@ def populate(self): ) self._session.add(population_group_row) self.data.append(population_group) - self.patterns.append(TagPattern.parse(f"#*+{population_group}")) + tagpattern = TagPattern.parse(f"#*+{population_group}") + self.pattern_to_code[tagpattern] = population_group self._session.commit() diff --git a/src/hapi/pipelines/database/population_status.py b/src/hapi/pipelines/database/population_status.py index e64d7d0d..a4763b00 100644 --- a/src/hapi/pipelines/database/population_status.py +++ b/src/hapi/pipelines/database/population_status.py @@ -17,7 +17,7 @@ def __init__( super().__init__(session) self._population_status_descriptions = population_status_descriptions self.data = [] - self.patterns = [] + self.pattern_to_code = {} def populate(self): logger.info("Populating population status table") @@ -30,5 +30,6 @@ def populate(self): ) self._session.add(population_status_row) self.data.append(population_status) - self.patterns.append(TagPattern.parse(f"#{population_status}")) + tagpattern = TagPattern.parse(f"#{population_status}") + self.pattern_to_code[tagpattern] = population_status self._session.commit() diff --git a/src/hapi/pipelines/database/sector.py b/src/hapi/pipelines/database/sector.py index bf0244f7..fa74d2fb 100644 --- a/src/hapi/pipelines/database/sector.py +++ b/src/hapi/pipelines/database/sector.py @@ -24,7 +24,7 @@ def __init__( self._datasetinfo = datasetinfo self.data = {} self._sector_map = sector_map - self.patterns = [] + self.pattern_to_code = {} def populate(self): logger.info("Populating sector table") @@ -45,7 +45,8 @@ def populate(self): ) self._session.add(sector_row) pattern = code.lower().replace("-", "_") - self.patterns.append(TagPattern.parse(f"#*+{pattern}")) + pattern = TagPattern.parse(f"#*+{pattern}") + self.pattern_to_code[pattern] = code self._session.commit() def get_sector_code(self, sector: str) -> str: From 600011191a217316032b74c17e65fe4deff01c49 Mon Sep 17 00:00:00 2001 From: Mike Date: Mon, 8 Jan 2024 12:32:46 +1300 Subject: [PATCH 2/2] Fix database export (wrong code being used for age ranges which only seems to come up when using postgres) --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 741fadc9..a47e995e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). +## [0.6.6] - 2023-01-08 + +### Added + +- Fix db export (wrong codes being used for age range) + ## [0.6.5] - 2023-01-08 ### Added