Skip to content
This repository has been archived by the owner on Sep 4, 2024. It is now read-only.

Commit

Permalink
Aggregate occurrence data testing
Browse files Browse the repository at this point in the history
  • Loading branch information
cjgrady committed May 24, 2022
1 parent bbc4c4c commit 3a92489
Show file tree
Hide file tree
Showing 5 changed files with 122 additions and 16 deletions.
15 changes: 13 additions & 2 deletions lmpy/point.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ def __init__(
y_field,
geopoint=None,
group_field='species_name',
encoding='utf8',
):
"""Constructor for a Point CSV retriever.
Expand All @@ -188,6 +189,7 @@ def __init__(
geopoint (:obj:`str`): The field name of the column containing geopoint
data.
group_field (:obj:`str`): The name of the field to use for grouping points.
encoding (str): The encoding to use when opening the file.
"""
self.filename = filename
self.file = None
Expand All @@ -199,6 +201,7 @@ def __init__(
self.group_field = group_field
self._next_points = []
self._curr_val = None
self.encoding = encoding

# .......................
def __enter__(self):
Expand Down Expand Up @@ -275,7 +278,7 @@ def __next__(self):
# .......................
def open(self):
"""Open the file and initialize."""
self.file = open(self.filename, 'r')
self.file = open(self.filename, 'r', encoding=self.encoding)
temp_lines = []
try:
for _ in range(3):
Expand Down Expand Up @@ -494,8 +497,10 @@ def __next__(self):
Raises:
StopIteration: Raised when there are no additional objects.
"""
for point_row in self.reader:
more_rows = True
while more_rows:
try:
point_row = next(self.reader)
point_dict = {
term: self.fields[term](point_row) for term in self.fields.keys()
}
Expand All @@ -514,10 +519,16 @@ def __next__(self):
return tmp
self._curr_val = test_val
self._next_points.append(pt)
except IndexError:
pass
except KeyError:
pass
except TypeError:
pass
except StopIteration:
more_rows = False
except csv.Error:
pass

if self._next_points:
tmp = self._next_points
Expand Down
4 changes: 4 additions & 0 deletions lmpy/tools/split_occurrence_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,10 @@ def cli():
parser = build_parser()
args = _process_arguments(parser, 'config_file')

# Default key field is 'species_name'
if args.key_field is None:
args.key_field = ['species_name']

# Establish functions for getting writer key and filename
writer_key_func = get_writer_key_from_fields_func(*tuple(args.key_field))
writer_filename_func = get_writer_filename_func(args.out_dir)
Expand Down
49 changes: 48 additions & 1 deletion sample_data/name_map/croc_name_map.json
Original file line number Diff line number Diff line change
@@ -1 +1,48 @@
{}
{
"Crocodylus porosus": "Crocodylus porosus",
"Crocodylus johnstoni": "Crocodylus johnstoni",
"Crocodylus": "Crocodylus",
"Crocodylus niloticus cowiei (smith in hewitt, 1937)": "Crocodylus niloticus cowiei",
"Crocodylus niloticus laurenti, 1768": "Crocodylus niloticus",
"Crocodylus acutus (cuvier, 1807)": "Crocodylus acutus",
"Crocodylus siamensis schneider, 1801": "Crocodylus siamensis",
"Crocodylus moreletii (dum\u00e9ril & bibron, 1851)": "Crocodylus moreletii",
"Crocodylus porosus schneider, 1801": "Crocodylus porosus",
"Crocodylus palustris (lesson, 1831)": "Crocodylus palustris",
"Crocodylus suchus geoffroy saint-hilaire, 1807": "Crocodylus suchus",
"Crocodylus johnsoni krefft, 1873": "Crocodylus johnsoni",
"Crocodylus mindorensis schmidt, 1935": "Crocodylus mindorensis",
"Crocodylus niloticus africanus laurenti, 1768": "Crocodylus niloticus",
"Crocodylus johnstoni krefft, 1873": "Crocodylus johnstoni",
"Crocodylus niloticus pauciscutatus deraniyagala, 1948": "Crocodylus niloticus",
"Crocodylus laurenti, 1768": "Crocodylus",
"Crocodylus intermedius (graves, 1819)": "Crocodylus intermedius",
"Crocodylus rhombifer (cuvier, 1807)": "Crocodylus rhombifer",
"Crocodylus niloticus niloticus": "Crocodylus niloticus niloticus",
"Crocodylus niloticus chamses bory de saint vincent, 1824": "Crocodylus niloticus",
"Crocodylus halli murray, russo, zorilla & mcmahan, 2019": "Crocodylus halli",
"Bold:aac4128": "BOLD:AAC4128",
"Bold:aar5109": "BOLD:AAR5109",
"Bold:adc8567": "BOLD:ADC8567",
"Bold:aac4127": "BOLD:AAC4127",
"Crocodylus novaeguineae schmidt, 1928": "Crocodylus novaeguineae",
"Crocodilus antiquus leidy, 1852": "Crocodilus antiquus",
"Crocodilus ziphodon marsh, 1871": "Crocodilus ziphodon",
"Crocodylus greenwoodi suneja et al., 1977": "Crocodylus greenwoodi",
"Crocodylus falconensis scheyer, aguilera, delfino, fortier, carlini, s\u00e1nchez, carrillo-brice\u00f1o, quiroz & s\u00e1nchez-villagra, 2013": "Crocodylus falconensis",
"Crocodylus checchiai maccagno, 1947": "Crocodylus checchiai",
"Crocodylus megarhinus (andrews, 1905)": "Crocodylus megarhinus",
"Crocodylus anthropophagus brochu, njau, blumenschine & densmore, 2010": "Crocodylus anthropophagus",
"Crocodylus palaeindicus falconer, 1859": "Crocodylus palaeindicus",
"Crocodylus thorbjarnarsoni brochu & storrs, 2012": "Crocodylus thorbjarnarsoni",
"Crocodylus gariepensis pickford, 2003": "Crocodylus gariepensis",
"Crocodylus palustris palustris": "Crocodylus palustris palustris",
"Crocodylus novaeguineae mindorensis schmidt, 1935": "Crocodylus novaeguineae",
"Crocodilus laurenti, 1768": "Crocodilus",
"Crocodylus novaeguineae novaeguineae": "Crocodylus novaeguineae novaeguineae",
"Crocodylus palustris kimbula deraniyagala, 1936": "Crocodylus palustris",
"Crocodilus americanus laurenti, 1768": "Crocodilus americanus",
"Crocodylus raninus ross, 1990": "Crocodylus raninus",
"Crocodylus americanus laurenti, 1768": "Crocodylus americanus",
"Crocodilus porosus schneider, 1801": null
}
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ def get_script_runner(console_script, script_module, args):
cmd_args.extend(args)

# Run command
subprocess.run(cmd_args)
subprocess.run(cmd_args, capture_output=True, check=True)

return get_script_runner

Expand Down
68 changes: 56 additions & 12 deletions tests/test_tutorials/test_aggregate_occurrence_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,7 @@ def wrangler_configs(data_dir):
'gbif': [
dict(
wrangler_type='AcceptedNameOccurrenceWrangler',
name_resolver='gbif',
out_map_filename=os.path.join(
data_dir, 'name_map/croc_name_map.json'
)
name_map=os.path.join(data_dir, 'name_map/croc_name_map.json'),
),
dict(
wrangler_type='AttributeFilterWrangler',
Expand Down Expand Up @@ -56,7 +53,7 @@ def wrangler_configs(data_dir):
'idigbio': [
dict(
wrangler_type='AcceptedNameOccurrenceWrangler',
name_map=os.path.join(data_dir, 'name_map/croc_name_map.json')
name_map=os.path.join(data_dir, 'name_map/croc_name_map.json'),
),
dict(
wrangler_type='AttributeFilterWrangler',
Expand Down Expand Up @@ -98,10 +95,7 @@ def wrangler_configs(data_dir):
'ala': [
dict(
wrangler_type='AcceptedNameOccurrenceWrangler',
out_map_filename=os.path.join(
data_dir,
'name_map/croc_name_map.json'
)
name_map=os.path.join(data_dir, 'name_map/croc_name_map.json'),
),
dict(
wrangler_type='AttributeModifierWrangler',
Expand All @@ -122,11 +116,17 @@ def wrangler_configs(data_dir):


# .....................................................................................
def test_instructions_python(tutorial_data_dir, generate_temp_filename, temp_directory):
def test_instructions_python(
tutorial_data_dir,
generate_temp_filename,
temp_directory
):
"""Test the python instructions.
Args:
tutorial_data_dir (pytest.Fixture): The tutorial data directory.
generate_temp_filename (pytest.Fixture): A fixture to generate filenames.
temp_directory (pytest.Fixture): A temporary directory to write outputs.
"""
gbif_dwca_filename = os.path.join(tutorial_data_dir, 'occurrence/gbif.zip')
idigbio_dwca_filename = os.path.join(tutorial_data_dir, 'occurrence/idigbio.zip')
Expand Down Expand Up @@ -169,10 +169,17 @@ def test_instructions_python(tutorial_data_dir, generate_temp_filename, temp_dir
writer_filename_func,
write_fields=write_fields,
) as occurrence_processor:
for reader, wranglers in readers_and_wranglers[1:2]:
for reader, wranglers in readers_and_wranglers[2:]:
occurrence_processor.process_reader(reader, wranglers)
occurrence_processor.write_species_list(species_list_filename)

# Check the outputs
_validate_outputs(
species_list_filename,
out_dir,
speces_name_map
)


# .....................................................................................
def test_instructions_console_script(
Expand Down Expand Up @@ -216,7 +223,7 @@ def test_instructions_console_script(
)

script_args = [
f'--species_list_fileanme={species_list_filename}',
f'--species_list_filename={species_list_filename}',
'--dwca',
gbif_dwca_filename,
gbif_wranglers_filename,
Expand All @@ -236,3 +243,40 @@ def test_instructions_console_script(
'lmpy.tools.split_occurrence_data',
script_args
)

# Check the outputs
_validate_outputs(
species_list_filename,
output_dir,
os.path.join(tutorial_data_dir, 'name_map/croc_name_map.json')
)


# .....................................................................................
def _validate_outputs(species_list_filename, output_dir, accepted_names_filename):
"""Validate outputs to ensure they are what we expect.
Args:
species_list_filename (str): File containing species seen.
output_dir (str): Directory where outputs are stored.
accepted_names_filename (str): File containing accepted names mapping.
"""
# Load accepted names
with open(accepted_names_filename, mode='rt') as in_species:
accepted_names = [
val.lower() for val in json.load(in_species).values() if val is not None
]

with open(species_list_filename, mode='rt') as species_list_in:
for line in species_list_in:
species = line.strip()
assert species.lower() not in ['null', 'none']
species_filename = os.path.join(output_dir, f'{species}.csv')
assert os.path.exists(species_filename)
sp_point_count = 0
with PointCsvReader(species_filename, 'species_name', 'x', 'y') as reader:
for points in reader:
for point in points:
sp_point_count += 1
assert point.species_name.lower() in accepted_names
assert sp_point_count > 0

0 comments on commit 3a92489

Please sign in to comment.