Skip to content

Commit

Permalink
Add unit tests
Browse files Browse the repository at this point in the history
  • Loading branch information
romainsacchi authored and romainsacchi committed Apr 11, 2024
1 parent 7ee5e96 commit 7c5d05c
Show file tree
Hide file tree
Showing 9 changed files with 341 additions and 36 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ jobs:
- uses: conda-incubator/setup-miniconda@v2
with:
python-version: ${{ matrix.python-version }}
channels: conda-forge,cmutel,konstantinstadler,haasad,pascallesage,romainsacchi
channels: conda-forge
allow-softlinks: true
channel-priority: strict
auto-update-conda: true
Expand Down
2 changes: 1 addition & 1 deletion pathways/data_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def validate_datapackage(
validate_scenario_data(dataframe)

# Check that the mapping is valid
validate_mapping(datapackage.get_resource("mapping"), dataframe)
validate_mapping(datapackage.get_resource("mapping"))

# fetch filepaths to resources
filepaths = []
Expand Down
13 changes: 8 additions & 5 deletions pathways/lca.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,16 @@
import bw_processing as bwp
import numpy as np
import pyprind
from bw2calc import MonteCarloLCA
from bw2calc.monte_carlo import MonteCarloLCA
from bw_processing import Datapackage
from numpy import ndarray, dtype
from scipy import sparse
from scipy.sparse import csr_matrix

from .filesystem_constants import DIR_CACHED_DB
from .lcia import fill_characterization_factors_matrices
from .pathways import _group_technosphere_indices
from .utils import get_unit_conversion_factors, fetch_indices, check_unclassified_activities

from .utils import get_unit_conversion_factors, fetch_indices, check_unclassified_activities, \
_group_technosphere_indices

logging.basicConfig(
level=logging.DEBUG,
Expand Down Expand Up @@ -54,7 +53,11 @@ def read_indices_csv(file_path: Path) -> dict[tuple[str, str, str, str], int]:
with open(file_path) as read_obj:
csv_reader = csv.reader(read_obj, delimiter=";")
for row in csv_reader:
indices[(row[0], row[1], row[2], row[3])] = int(row[4])
try:
indices[(row[0], row[1], row[2], row[3])] = int(row[4])
except IndexError as err:
logging.error(f"Error reading row {row} from {file_path}: {err}. "
f"Could it be that the file uses commas instead of semicolons?")
return indices


Expand Down
31 changes: 2 additions & 29 deletions pathways/pathways.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from multiprocessing import Pool, cpu_count
from typing import List, Optional

import datapackage
import numpy as np
import pandas
import pandas as pd
Expand Down Expand Up @@ -38,32 +37,6 @@
warnings.filterwarnings("ignore")


def _group_technosphere_indices(
technosphere_indices: dict, group_by, group_values: list
) -> dict:
"""
Generalized function to group technosphere indices by an arbitrary attribute (category, location, etc.).
:param technosphere_indices: Mapping of activities to their indices in the technosphere matrix.
:param group_by: A function that takes an activity and returns its group value (e.g., category or location).
:param group_values: The set of all possible group values (e.g., all categories or locations).
:return: A tuple containing a list of lists of indices, a dictionary mapping group values to lists of indices,
and a 2D numpy array of indices, where rows have been padded with -1 to ensure equal lengths.
"""

acts_dict = {}
for value in group_values:
# Collect indices for activities belonging to the current group value
x = [
int(technosphere_indices[a])
for a in technosphere_indices
if group_by(a) == value
]
acts_dict[value] = x

return acts_dict


def _get_mapping(data) -> dict:
"""
Read the mapping file which maps scenario variables to LCA datasets.
Expand Down Expand Up @@ -91,7 +64,7 @@ def _read_scenario_data(data: dict, scenario: str):
return pd.read_excel(filepath, index_col=0)


def _read_datapackage(datapackage: DataPackage) -> DataPackage:
def _read_datapackage(datapackage: str) -> DataPackage:
"""Read the datapackage.json file.
:return: DataPackage
Expand All @@ -111,7 +84,7 @@ class Pathways:
def __init__(self, datapackage, debug=False):
self.datapackage = datapackage
self.data, dataframe, self.filepaths = validate_datapackage(
_read_datapackage()
_read_datapackage(datapackage)
)
self.mapping = _get_mapping()
self.mapping.update(self._get_final_energy_mapping())
Expand Down
37 changes: 37 additions & 0 deletions pathways/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@
def load_classifications():
"""Load the activities classifications."""

# check if file exists
if not Path(CLASSIFICATIONS).exists():
raise FileNotFoundError(f"File {CLASSIFICATIONS} not found")

with open(CLASSIFICATIONS, "r") as f:
data = yaml.full_load(f)

Expand All @@ -53,6 +57,9 @@ def harmonize_units(scenario: xr.DataArray, variables: list) -> xr.DataArray:

units = [scenario.attrs["units"][var] for var in variables]

if len(variables) == 0:
raise ValueError("Empty list of variables")

# if not all units are the same, we need to convert
if len(set(units)) > 1:
if all(x in ["PJ/yr", "EJ/yr", "PJ/yr."] for x in units):
Expand Down Expand Up @@ -133,6 +140,10 @@ def create_lca_results_array(
:rtype: xr.DataArray
"""

# check if any of the list parameters is empty, and if so, throw an error
if not all([methods, years, regions, locations, models, scenarios]):
raise ValueError("Empty list parameter")

# Define the coordinates for the xarray DataArray
coords = {
"act_category": list(set(classifications.values())),
Expand Down Expand Up @@ -450,3 +461,29 @@ def check_unclassified_activities(
writer.writerows(missing_classifications)

return missing_classifications


def _group_technosphere_indices(
technosphere_indices: dict, group_by, group_values: list
) -> dict:
"""
Generalized function to group technosphere indices by an arbitrary attribute (category, location, etc.).
:param technosphere_indices: Mapping of activities to their indices in the technosphere matrix.
:param group_by: A function that takes an activity and returns its group value (e.g., category or location).
:param group_values: The set of all possible group values (e.g., all categories or locations).
:return: A tuple containing a list of lists of indices, a dictionary mapping group values to lists of indices,
and a 2D numpy array of indices, where rows have been padded with -1 to ensure equal lengths.
"""

acts_dict = {}
for value in group_values:
# Collect indices for activities belonging to the current group value
x = [
int(technosphere_indices[a])
for a in technosphere_indices
if group_by(a) == value
]
acts_dict[value] = x

return acts_dict
50 changes: 50 additions & 0 deletions tests/test_lca.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import pytest
from unittest.mock import mock_open, patch
from pathways.lca import read_indices_csv, load_matrix_and_index
from pathlib import Path
import numpy as np


def test_read_indices_csv_success():
mock_csv_data = "activity;product;location;unit;1\nanother_activity;another_product;another_location;another_unit;2"
expected_dict = {
('activity', 'product', 'location', 'unit'): 1,
('another_activity', 'another_product', 'another_location', 'another_unit'): 2,
}
with patch("builtins.open", mock_open(read_data=mock_csv_data)):
result = read_indices_csv(Path("dummy_path.csv"))
assert result == expected_dict


def test_load_matrix_and_index(tmp_path):
mock_csv_data = ("row;col;value;uncertainty type;loc;scale;shape;minimum;maximum;negative;flip"
"\n1;0;3.5;3;4;5;6;7;8;0;0"
"\n1;1;0.5;3;4;5;6;7;8;0;1")
expected_output = (
np.array([3.5, 0.5]),
np.array([(0, 1), (1, 1)], dtype=[('row', 'i4'), ('col', 'i4')]),
np.array([False, True]),
np.array([(3, 4.0, 5.0, 6.0, 7.0, 8.0, False), (3, 4.0, 5.0, 6.0, 7.0, 8.0, False)],
dtype=[('uncertainty_type', 'i4'), ('loc', 'f4'), ('scale', 'f4'), ('shape', 'f4'), ('minimum', 'f4'),
('maximum', 'f4'), ('negative', '?')])
)

# Write mock CSV data to a temporary file
temp_file = tmp_path / "temp.csv"
temp_file.write_text(mock_csv_data)

# Call the function with the path to the temporary file
data_array, indices_array, flip_array, distributions_array = load_matrix_and_index(temp_file)

print("distributions_array", distributions_array)
print("expected_output", expected_output[3])

# Check that the output matches the expected output
# but they have different dtypes

assert np.allclose(data_array, expected_output[0])
assert np.array_equal(indices_array, expected_output[1])
assert np.array_equal(flip_array, expected_output[2])
assert np.array_equal(distributions_array, expected_output[3])


69 changes: 69 additions & 0 deletions tests/test_lcia.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import pytest
from unittest.mock import mock_open, patch
from pathways.lcia import get_lcia_method_names, format_lcia_method_exchanges, fill_characterization_factors_matrices
from scipy.sparse import csr_matrix
import numpy as np
import json


def test_get_lcia_method_names_success():
mock_data = '[{"name": ["IPCC", "2021", "Global Warming Potential"]}, {"name": ["ReCiPe", "2016", "Midpoint"]} ]'
expected_result = ["IPCC - 2021 - Global Warming Potential", "ReCiPe - 2016 - Midpoint"]
with patch("builtins.open", mock_open(read_data=mock_data)):
with patch("json.load", return_value=json.loads(mock_data)):
method_names = get_lcia_method_names()
assert method_names == expected_result, "Method names not correctly formatted"


def test_format_lcia_method_exchanges():
method_input = {
"exchanges": [
{"name": "CO2", "categories": ["air"], "amount": 1},
{"name": "CH4", "categories": ["air", "low population density, long-term"], "amount": 25},
]
}
expected_output = {
("CO2", "air", "unspecified"): 1,
("CH4", "air", "low population density, long-term"): 25,
}
assert format_lcia_method_exchanges(method_input) == expected_output, "Exchange formatting incorrect"


@pytest.fixture
def mock_lcia_methods_data():
"""Returns mock LCIA methods similar to what get_lcia_methods would return."""
return {
"IPCC 2021 - Global Warming Potential": {
("CO2", "air", "unspecified"): 1,
("CH4", "air", "low population density, long-term"): 25,
}
}


@pytest.fixture
def mock_biosphere_data():
"""Returns mock biosphere dictionary and matrix dict for testing."""
biosphere_dict = {
("CO2", "air", "unspecified"): 0,
("CH4", "air", "low population density, long-term"): 1,
}
biosphere_matrix_dict = {0: 0, 1: 1} # Mapping of biosphere_dict indices to matrix indices
return biosphere_matrix_dict, biosphere_dict


def test_fill_characterization_factors_matrices(mock_lcia_methods_data, mock_biosphere_data):
methods = ["IPCC 2021 - Global Warming Potential"]
biosphere_matrix_dict, biosphere_dict = mock_biosphere_data

with patch('pathways.lcia.get_lcia_methods', return_value=mock_lcia_methods_data):
matrix = fill_characterization_factors_matrices(methods, biosphere_matrix_dict, biosphere_dict, debug=False)

assert isinstance(matrix, csr_matrix), "Output is not a CSR matrix"
assert matrix.shape == (len(methods), len(biosphere_matrix_dict)), "Matrix shape is incorrect"

# Verifying content of the matrix
expected_data = np.array([1, 25])
np.testing.assert_array_equal(matrix.data, expected_data, "Matrix data does not match expected values")
np.testing.assert_array_equal(matrix.indices, np.array([0, 1]), "Matrix indices do not match expected values")
np.testing.assert_array_equal(matrix.indptr, np.array([0, 2]), "Matrix indices does not match expected values")

25 changes: 25 additions & 0 deletions tests/test_pathways.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import pytest
from unittest.mock import Mock

from pathways.pathways import _get_mapping
from pathways.utils import _group_technosphere_indices


def test_group_technosphere_indices():
indices = {('activity1', 'location1'): 0, ('activity2', 'location2'): 1}
group_by = lambda x: x[1] # Group by location
group_values = ['location1', 'location2']
expected = {'location1': [0], 'location2': [1]}
result = _group_technosphere_indices(indices, group_by, group_values)
assert result == expected, "Grouping does not match expected output"


def test_get_mapping():
mock_data = Mock()
mock_data.get_resource.return_value.raw_read.return_value = """
variable1:
dataset: [details]
"""
expected_mapping = {'variable1': {'dataset': ['details']}}
assert _get_mapping(mock_data) == expected_mapping, "Mapping does not match expected dictionary"

Loading

0 comments on commit 7c5d05c

Please sign in to comment.