Skip to content
This repository has been archived by the owner on Sep 4, 2024. It is now read-only.

Commit

Permalink
Merge pull request #328 from specifysystems/291-split-occurrences-too…
Browse files Browse the repository at this point in the history
…l-should-optionally-output-a-species-list

Add output species list to occ splitter
  • Loading branch information
cjgrady authored May 23, 2022
2 parents 4e05a7f + 1a43214 commit 945eaa6
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 0 deletions.
13 changes: 13 additions & 0 deletions lmpy/data_preparation/occurrence_splitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ def __init__(
self.writer_fields = write_fields
self.max_writers = max_writers
self.writers = {}
self.seen_taxa = set()

# .......................
def __enter__(self):
Expand Down Expand Up @@ -176,12 +177,24 @@ def write_points(self, points):
"""
if points:
writer_key = self.get_writer_key(points[0])
self.seen_taxa.add(points[0].species_name)
if writer_key not in self.writers.keys():
if self.writer_fields is None:
self.writer_fields = list(points[0].attributes.keys())
self.open_writer(writer_key)
self.writers[writer_key].write_points(points)

# .......................
def write_species_list(self, species_list_filename):
"""Write a species list of species seen (after wrangling).
Args:
species_list_filename (str): File location to write the species list.
"""
with open(species_list_filename, mode='wt') as species_out:
for sp in list(self.seen_taxa):
species_out.write(f'{sp}\n')


# .....................................................................................
__all__ = [
Expand Down
7 changes: 7 additions & 0 deletions lmpy/tools/split_occurrence_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,11 @@ def build_parser():
'a species header key, an x header key, and a y header key.'
),
)
parser.add_argument(
'--species_list_filename',
type=str,
help='File location to write list of species seen (after wrangling).'
)

parser.add_argument(
'out_dir',
Expand Down Expand Up @@ -131,6 +136,8 @@ def cli():
with open(wranglers_fn, mode='rt') as in_json:
wranglers = wrangler_factory.get_wranglers(json.load(in_json))
occurrence_processor.process_reader(reader, wranglers)
if args.species_list_filename:
occurrence_processor.write_species_list(args.species_list_filename)


# .....................................................................................
Expand Down
12 changes: 12 additions & 0 deletions tests/test_data_preparation/test_occurrence_splitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def test_one_dwca(monkeypatch, generate_temp_filename, temp_directory):
"""
# Temporary files
dwca_filename = generate_temp_filename()
species_list_filename = generate_temp_filename(suffix='.txt')

# Generate a DWCA and wranglers
dwca_fields = [
Expand Down Expand Up @@ -91,12 +92,17 @@ def test_one_dwca(monkeypatch, generate_temp_filename, temp_directory):
splitter.process_reader(
PointDwcaReader(dwca_filename), factory.get_wranglers(wrangler_config)
)
splitter.write_species_list(species_list_filename)
splitter.close()

# Check output
assert validate_point_csvs(
glob.glob(f'{temp_directory}/*.csv'), 'taxonname', 'longitude', 'latitude'
)
# Check that species in species list are accepted
with open(species_list_filename, mode='rt') as species_in:
for line in species_in:
assert line.strip() in list(SPECIES_MAP.values())


# .....................................................................................
Expand Down Expand Up @@ -179,6 +185,7 @@ def test_complex(monkeypatch, generate_temp_filename, temp_directory):
dwca_2_filename = generate_temp_filename()
csv_1_filename = generate_temp_filename()
csv_2_filename = generate_temp_filename()
species_list_filename = generate_temp_filename(suffix='.txt')

# Reader and wrangler configurations
# DWCA 1
Expand Down Expand Up @@ -413,8 +420,13 @@ def test_complex(monkeypatch, generate_temp_filename, temp_directory):
PointCsvReader(csv_2_filename, 'taxname', 'dec_lon', 'dec_lat'),
factory.get_wranglers(csv_2_wrangler_conf)
)
splitter.write_species_list(species_list_filename)

# Check output
assert validate_point_csvs(
glob.glob(f'{temp_directory}/*.csv'), 'species', 'longitude', 'latitude'
)
# Check that species in species list are accepted
with open(species_list_filename, mode='rt') as species_in:
for line in species_in:
assert line.strip() in list(SPECIES_MAP.values())
8 changes: 8 additions & 0 deletions tests/test_tools/test_split_occurrence_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,8 @@ def test_complex(monkeypatch, generate_temp_filename, temp_directory):
wrangler_3_filename = generate_temp_filename()
wrangler_4_filename = generate_temp_filename()

species_list_filename = generate_temp_filename(suffix='.txt')

# Reader and wrangler configurations
# DWCA 1
dwca_1_fields = [
Expand Down Expand Up @@ -518,6 +520,8 @@ def test_complex(monkeypatch, generate_temp_filename, temp_directory):
'taxname',
'dec_lon',
'dec_lat',
'--species_list_filename',
species_list_filename,
temp_directory
]

Expand All @@ -528,3 +532,7 @@ def test_complex(monkeypatch, generate_temp_filename, temp_directory):
assert validate_point_csvs(
glob.glob(f'{temp_directory}/*.csv'), 'species_name', 'x', 'y'
)
# Check that species in species list are accepted
with open(species_list_filename, mode='rt') as species_in:
for line in species_in:
assert line.strip() in list(SPECIES_MAP.values())

0 comments on commit 945eaa6

Please sign in to comment.