Skip to content

Commit

Permalink
Merge pull request #11 from quantifyearth/mwd-collate-more-info
Browse files Browse the repository at this point in the history
Store extra info per species for validation/lineage
  • Loading branch information
mdales authored Jan 27, 2025
2 parents 221a80a + cf653af commit 46e922c
Showing 1 changed file with 25 additions and 18 deletions.
43 changes: 25 additions & 18 deletions validation/collate_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,22 @@

import pandas as pd

ESSENTIAL_COLUMNS = {
"id_no",
"class_name",
"family_name",
"scientific_name",
"season",
"elevation_upper",
"elevation_lower",
"full_habitat_code",
"range_total",
"dem_total",
"hab_total",
"aoh_total",
"prevalence"
}

def collate_data(
aoh_results: str,
output_path: str,
Expand All @@ -15,31 +31,22 @@ def collate_data(
print(f"Found no manifests in {aoh_results}", file=sys.stderr)
sys.exit(-1)

columns = [
"id_no",
"class_name",
"family_name",
"scientific_name",
"season",
"elevation_upper",
"elevation_lower",
"full_habitat_code",
"range_total",
"dem_total",
"hab_total",
"aoh_total",
"prevalence"
]
res = []
keys = None
for manifest in manifests:
with open(manifest, encoding="utf-8") as f:
data = json.load(f)
row = []
for c in columns:
row.append(data[c])
if keys is None:
keys = list(data.keys())
assert ESSENTIAL_COLUMNS.issubset(set(keys))
else:
assert keys == list(data.keys())
for k in keys:
row.append(data[k])
row.append(len(data['full_habitat_code'].split('|')))
res.append(row)
df = pd.DataFrame(res, columns=columns + ['n_habitats'])
df = pd.DataFrame(res, columns=keys + ['n_habitats'])
df.to_csv(output_path, index=False)

def main() -> None:
Expand Down

0 comments on commit 46e922c

Please sign in to comment.