From 7c5d05c86ec9c55fc9cb8cde0459978f687dd656 Mon Sep 17 00:00:00 2001 From: romainsacchi Date: Thu, 11 Apr 2024 14:04:12 +0200 Subject: [PATCH] Add unit tests --- .github/workflows/main.yml | 2 +- pathways/data_validation.py | 2 +- pathways/lca.py | 13 ++-- pathways/pathways.py | 31 +------- pathways/utils.py | 37 +++++++++ tests/test_lca.py | 50 ++++++++++++ tests/test_lcia.py | 69 +++++++++++++++++ tests/test_pathways.py | 25 ++++++ tests/test_utilities.py | 148 ++++++++++++++++++++++++++++++++++++ 9 files changed, 341 insertions(+), 36 deletions(-) create mode 100644 tests/test_lca.py create mode 100644 tests/test_lcia.py create mode 100644 tests/test_pathways.py create mode 100644 tests/test_utilities.py diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 8a83935..75e036c 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -144,7 +144,7 @@ jobs: - uses: conda-incubator/setup-miniconda@v2 with: python-version: ${{ matrix.python-version }} - channels: conda-forge,cmutel,konstantinstadler,haasad,pascallesage,romainsacchi + channels: conda-forge allow-softlinks: true channel-priority: strict auto-update-conda: true diff --git a/pathways/data_validation.py b/pathways/data_validation.py index a2f9e1f..2e9dd96 100644 --- a/pathways/data_validation.py +++ b/pathways/data_validation.py @@ -69,7 +69,7 @@ def validate_datapackage( validate_scenario_data(dataframe) # Check that the mapping is valid - validate_mapping(datapackage.get_resource("mapping"), dataframe) + validate_mapping(datapackage.get_resource("mapping")) # fetch filepaths to resources filepaths = [] diff --git a/pathways/lca.py b/pathways/lca.py index c754f5f..342975b 100644 --- a/pathways/lca.py +++ b/pathways/lca.py @@ -14,7 +14,7 @@ import bw_processing as bwp import numpy as np import pyprind -from bw2calc import MonteCarloLCA +from bw2calc.monte_carlo import MonteCarloLCA from bw_processing import Datapackage from numpy import ndarray, dtype from scipy import sparse @@ -22,9 +22,8 @@ from .filesystem_constants import DIR_CACHED_DB from .lcia import fill_characterization_factors_matrices -from .pathways import _group_technosphere_indices -from .utils import get_unit_conversion_factors, fetch_indices, check_unclassified_activities - +from .utils import get_unit_conversion_factors, fetch_indices, check_unclassified_activities, \ + _group_technosphere_indices logging.basicConfig( level=logging.DEBUG, @@ -54,7 +53,11 @@ def read_indices_csv(file_path: Path) -> dict[tuple[str, str, str, str], int]: with open(file_path) as read_obj: csv_reader = csv.reader(read_obj, delimiter=";") for row in csv_reader: - indices[(row[0], row[1], row[2], row[3])] = int(row[4]) + try: + indices[(row[0], row[1], row[2], row[3])] = int(row[4]) + except IndexError as err: + logging.error(f"Error reading row {row} from {file_path}: {err}. " + f"Could it be that the file uses commas instead of semicolons?") return indices diff --git a/pathways/pathways.py b/pathways/pathways.py index 7f28275..57cf76d 100644 --- a/pathways/pathways.py +++ b/pathways/pathways.py @@ -10,7 +10,6 @@ from multiprocessing import Pool, cpu_count from typing import List, Optional -import datapackage import numpy as np import pandas import pandas as pd @@ -38,32 +37,6 @@ warnings.filterwarnings("ignore") -def _group_technosphere_indices( - technosphere_indices: dict, group_by, group_values: list -) -> dict: - """ - Generalized function to group technosphere indices by an arbitrary attribute (category, location, etc.). - - :param technosphere_indices: Mapping of activities to their indices in the technosphere matrix. - :param group_by: A function that takes an activity and returns its group value (e.g., category or location). - :param group_values: The set of all possible group values (e.g., all categories or locations). - :return: A tuple containing a list of lists of indices, a dictionary mapping group values to lists of indices, - and a 2D numpy array of indices, where rows have been padded with -1 to ensure equal lengths. - """ - - acts_dict = {} - for value in group_values: - # Collect indices for activities belonging to the current group value - x = [ - int(technosphere_indices[a]) - for a in technosphere_indices - if group_by(a) == value - ] - acts_dict[value] = x - - return acts_dict - - def _get_mapping(data) -> dict: """ Read the mapping file which maps scenario variables to LCA datasets. @@ -91,7 +64,7 @@ def _read_scenario_data(data: dict, scenario: str): return pd.read_excel(filepath, index_col=0) -def _read_datapackage(datapackage: DataPackage) -> DataPackage: +def _read_datapackage(datapackage: str) -> DataPackage: """Read the datapackage.json file. :return: DataPackage @@ -111,7 +84,7 @@ class Pathways: def __init__(self, datapackage, debug=False): self.datapackage = datapackage self.data, dataframe, self.filepaths = validate_datapackage( - _read_datapackage() + _read_datapackage(datapackage) ) self.mapping = _get_mapping() self.mapping.update(self._get_final_energy_mapping()) diff --git a/pathways/utils.py b/pathways/utils.py index 23e380e..d4368da 100644 --- a/pathways/utils.py +++ b/pathways/utils.py @@ -36,6 +36,10 @@ def load_classifications(): """Load the activities classifications.""" + # check if file exists + if not Path(CLASSIFICATIONS).exists(): + raise FileNotFoundError(f"File {CLASSIFICATIONS} not found") + with open(CLASSIFICATIONS, "r") as f: data = yaml.full_load(f) @@ -53,6 +57,9 @@ def harmonize_units(scenario: xr.DataArray, variables: list) -> xr.DataArray: units = [scenario.attrs["units"][var] for var in variables] + if len(variables) == 0: + raise ValueError("Empty list of variables") + # if not all units are the same, we need to convert if len(set(units)) > 1: if all(x in ["PJ/yr", "EJ/yr", "PJ/yr."] for x in units): @@ -133,6 +140,10 @@ def create_lca_results_array( :rtype: xr.DataArray """ + # check if any of the list parameters is empty, and if so, throw an error + if not all([methods, years, regions, locations, models, scenarios]): + raise ValueError("Empty list parameter") + # Define the coordinates for the xarray DataArray coords = { "act_category": list(set(classifications.values())), @@ -450,3 +461,29 @@ def check_unclassified_activities( writer.writerows(missing_classifications) return missing_classifications + + +def _group_technosphere_indices( + technosphere_indices: dict, group_by, group_values: list +) -> dict: + """ + Generalized function to group technosphere indices by an arbitrary attribute (category, location, etc.). + + :param technosphere_indices: Mapping of activities to their indices in the technosphere matrix. + :param group_by: A function that takes an activity and returns its group value (e.g., category or location). + :param group_values: The set of all possible group values (e.g., all categories or locations). + :return: A tuple containing a list of lists of indices, a dictionary mapping group values to lists of indices, + and a 2D numpy array of indices, where rows have been padded with -1 to ensure equal lengths. + """ + + acts_dict = {} + for value in group_values: + # Collect indices for activities belonging to the current group value + x = [ + int(technosphere_indices[a]) + for a in technosphere_indices + if group_by(a) == value + ] + acts_dict[value] = x + + return acts_dict diff --git a/tests/test_lca.py b/tests/test_lca.py new file mode 100644 index 0000000..dad1044 --- /dev/null +++ b/tests/test_lca.py @@ -0,0 +1,50 @@ +import pytest +from unittest.mock import mock_open, patch +from pathways.lca import read_indices_csv, load_matrix_and_index +from pathlib import Path +import numpy as np + + +def test_read_indices_csv_success(): + mock_csv_data = "activity;product;location;unit;1\nanother_activity;another_product;another_location;another_unit;2" + expected_dict = { + ('activity', 'product', 'location', 'unit'): 1, + ('another_activity', 'another_product', 'another_location', 'another_unit'): 2, + } + with patch("builtins.open", mock_open(read_data=mock_csv_data)): + result = read_indices_csv(Path("dummy_path.csv")) + assert result == expected_dict + + +def test_load_matrix_and_index(tmp_path): + mock_csv_data = ("row;col;value;uncertainty type;loc;scale;shape;minimum;maximum;negative;flip" + "\n1;0;3.5;3;4;5;6;7;8;0;0" + "\n1;1;0.5;3;4;5;6;7;8;0;1") + expected_output = ( + np.array([3.5, 0.5]), + np.array([(0, 1), (1, 1)], dtype=[('row', 'i4'), ('col', 'i4')]), + np.array([False, True]), + np.array([(3, 4.0, 5.0, 6.0, 7.0, 8.0, False), (3, 4.0, 5.0, 6.0, 7.0, 8.0, False)], + dtype=[('uncertainty_type', 'i4'), ('loc', 'f4'), ('scale', 'f4'), ('shape', 'f4'), ('minimum', 'f4'), + ('maximum', 'f4'), ('negative', '?')]) + ) + + # Write mock CSV data to a temporary file + temp_file = tmp_path / "temp.csv" + temp_file.write_text(mock_csv_data) + + # Call the function with the path to the temporary file + data_array, indices_array, flip_array, distributions_array = load_matrix_and_index(temp_file) + + print("distributions_array", distributions_array) + print("expected_output", expected_output[3]) + + # Check that the output matches the expected output + # but they have different dtypes + + assert np.allclose(data_array, expected_output[0]) + assert np.array_equal(indices_array, expected_output[1]) + assert np.array_equal(flip_array, expected_output[2]) + assert np.array_equal(distributions_array, expected_output[3]) + + diff --git a/tests/test_lcia.py b/tests/test_lcia.py new file mode 100644 index 0000000..3f16d2c --- /dev/null +++ b/tests/test_lcia.py @@ -0,0 +1,69 @@ +import pytest +from unittest.mock import mock_open, patch +from pathways.lcia import get_lcia_method_names, format_lcia_method_exchanges, fill_characterization_factors_matrices +from scipy.sparse import csr_matrix +import numpy as np +import json + + +def test_get_lcia_method_names_success(): + mock_data = '[{"name": ["IPCC", "2021", "Global Warming Potential"]}, {"name": ["ReCiPe", "2016", "Midpoint"]} ]' + expected_result = ["IPCC - 2021 - Global Warming Potential", "ReCiPe - 2016 - Midpoint"] + with patch("builtins.open", mock_open(read_data=mock_data)): + with patch("json.load", return_value=json.loads(mock_data)): + method_names = get_lcia_method_names() + assert method_names == expected_result, "Method names not correctly formatted" + + +def test_format_lcia_method_exchanges(): + method_input = { + "exchanges": [ + {"name": "CO2", "categories": ["air"], "amount": 1}, + {"name": "CH4", "categories": ["air", "low population density, long-term"], "amount": 25}, + ] + } + expected_output = { + ("CO2", "air", "unspecified"): 1, + ("CH4", "air", "low population density, long-term"): 25, + } + assert format_lcia_method_exchanges(method_input) == expected_output, "Exchange formatting incorrect" + + +@pytest.fixture +def mock_lcia_methods_data(): + """Returns mock LCIA methods similar to what get_lcia_methods would return.""" + return { + "IPCC 2021 - Global Warming Potential": { + ("CO2", "air", "unspecified"): 1, + ("CH4", "air", "low population density, long-term"): 25, + } + } + + +@pytest.fixture +def mock_biosphere_data(): + """Returns mock biosphere dictionary and matrix dict for testing.""" + biosphere_dict = { + ("CO2", "air", "unspecified"): 0, + ("CH4", "air", "low population density, long-term"): 1, + } + biosphere_matrix_dict = {0: 0, 1: 1} # Mapping of biosphere_dict indices to matrix indices + return biosphere_matrix_dict, biosphere_dict + + +def test_fill_characterization_factors_matrices(mock_lcia_methods_data, mock_biosphere_data): + methods = ["IPCC 2021 - Global Warming Potential"] + biosphere_matrix_dict, biosphere_dict = mock_biosphere_data + + with patch('pathways.lcia.get_lcia_methods', return_value=mock_lcia_methods_data): + matrix = fill_characterization_factors_matrices(methods, biosphere_matrix_dict, biosphere_dict, debug=False) + + assert isinstance(matrix, csr_matrix), "Output is not a CSR matrix" + assert matrix.shape == (len(methods), len(biosphere_matrix_dict)), "Matrix shape is incorrect" + + # Verifying content of the matrix + expected_data = np.array([1, 25]) + np.testing.assert_array_equal(matrix.data, expected_data, "Matrix data does not match expected values") + np.testing.assert_array_equal(matrix.indices, np.array([0, 1]), "Matrix indices do not match expected values") + np.testing.assert_array_equal(matrix.indptr, np.array([0, 2]), "Matrix indices does not match expected values") + diff --git a/tests/test_pathways.py b/tests/test_pathways.py new file mode 100644 index 0000000..bddd05f --- /dev/null +++ b/tests/test_pathways.py @@ -0,0 +1,25 @@ +import pytest +from unittest.mock import Mock + +from pathways.pathways import _get_mapping +from pathways.utils import _group_technosphere_indices + + +def test_group_technosphere_indices(): + indices = {('activity1', 'location1'): 0, ('activity2', 'location2'): 1} + group_by = lambda x: x[1] # Group by location + group_values = ['location1', 'location2'] + expected = {'location1': [0], 'location2': [1]} + result = _group_technosphere_indices(indices, group_by, group_values) + assert result == expected, "Grouping does not match expected output" + + +def test_get_mapping(): + mock_data = Mock() + mock_data.get_resource.return_value.raw_read.return_value = """ + variable1: + dataset: [details] + """ + expected_mapping = {'variable1': {'dataset': ['details']}} + assert _get_mapping(mock_data) == expected_mapping, "Mapping does not match expected dictionary" + diff --git a/tests/test_utilities.py b/tests/test_utilities.py new file mode 100644 index 0000000..ba0c787 --- /dev/null +++ b/tests/test_utilities.py @@ -0,0 +1,148 @@ +import pytest +import xarray as xr +import numpy as np +from unittest.mock import mock_open, patch +from pathways.utils import load_classifications, harmonize_units, create_lca_results_array, clean_cache_directory + + +def test_load_classifications_success(): + mock_content = """ + activity1: classification1 + activity2: classification2 + """ + with patch("builtins.open", mock_open(read_data=mock_content)): + with patch("yaml.full_load", return_value={"activity1": "classification1", "activity2": "classification2"}): + classifications = load_classifications() + assert classifications == {"activity1": "classification1", "activity2": "classification2"} + + +def test_load_classifications_file_not_found(): + with patch('pathways.utils.CLASSIFICATIONS', new='non_existent_file.yaml'): + with pytest.raises(FileNotFoundError): + load_classifications() + + +def test_harmonize_units_conversion_required(): + scenario = xr.DataArray( + np.random.rand(2, 2, 2), + dims=["variables", "x", "y"], + coords={"variables": ["var1", "var2"]}, + ) + scenario.attrs["units"] = {"var1": "PJ/yr", "var2": "EJ/yr"} + variables = ["var1", "var2"] + + harmonized_scenario = harmonize_units(scenario, variables) + assert all(harmonized_scenario.attrs["units"][var] == "EJ/yr" for var in variables), "Units not harmonized to EJ/yr" + + +def test_harmonize_units_no_conversion_required(): + scenario = xr.DataArray( + np.random.rand(1, 2, 2), + dims=["variables", "x", "y"], + coords={"variables": ["var1"]}, + ) + scenario.attrs["units"] = {"var1": "EJ/yr"} + variables = ["var1"] + + harmonized_scenario = harmonize_units(scenario, variables) + assert harmonized_scenario.equals(scenario), "Scenario was modified unnecessarily" + + +def test_harmonize_units_missing_units_attribute(): + scenario = xr.DataArray( + np.random.rand(1, 2, 2), + dims=["variables", "x", "y"], + coords={"variables": ["var1"]}, + ) + variables = ["var1"] + + with pytest.raises(KeyError): + harmonize_units(scenario, variables) + + +def test_harmonize_units_empty_data_array(): + scenario = xr.DataArray( + [[[1]], [[2]], [[3]]], + dims=["variables", "x", "y"], + coords={"variables": ["var1", "var2", "var3"]} + ) + scenario.attrs["units"] = {} + variables = [] + + # should return ValueError + with pytest.raises(ValueError): + harmonize_units(scenario, variables) + + +def test_create_lca_results_array_structure_and_initialization(): + methods = ['method1', 'method2'] + years = [2020, 2025] + regions = ['region1', 'region2'] + locations = ['location1', 'location2'] + models = ['model1', 'model2'] + scenarios = ['scenario1', 'scenario2'] + classifications = {'activity1': 'category1', 'activity2': 'category2'} + mapping = {'variable1': 'dataset1', 'variable2': 'dataset2'} + + result = create_lca_results_array( + methods, years, regions, locations, models, scenarios, classifications, mapping + ) + + # Check dimensions and coordinates + assert 'act_category' in result.coords + assert 'impact_category' in result.coords + assert 'year' in result.coords + assert 'region' in result.coords + assert 'model' in result.coords + assert 'scenario' in result.coords + assert set(result.coords['impact_category'].values) == set(methods) + assert set(result.coords['year'].values) == set(years) + assert set(result.coords['region'].values) == set(regions) + assert np.all(result == 0), "DataArray should be initialized with zeros" + + +def test_create_lca_results_array_with_distributions(): + methods = ['method1'] + years = [2020] + regions = ['region1'] + locations = ['location1'] + models = ['model1'] + scenarios = ['scenario1'] + classifications = {'activity1': 'category1'} + mapping = {'variable1': 'dataset1'} + + result = create_lca_results_array( + methods, years, regions, locations, models, scenarios, classifications, mapping, use_distributions=True + ) + + # Check for the 'quantile' dimension + assert 'quantile' in result.dims + assert result.coords['quantile'].values.tolist() == [0.05, 0.5, 0.95] + + +def test_create_lca_results_array_empty_inputs(): + with pytest.raises(Exception): # Assuming the function raises an exception for empty inputs + create_lca_results_array([], [], [], [], [], [], {}, {}) + + +def test_create_lca_results_array_input_validation(): + with pytest.raises(Exception): + create_lca_results_array(None, None, None, None, None, None, None, None) + + +def test_clean_cache_directory(tmp_path, monkeypatch): + # Use a temporary directory to simulate the cache directory + cache_dir = tmp_path / "cache" + cache_dir.mkdir() + (cache_dir / "temp_cache_file").write_text("This is a cache file.") + non_cache_dir = tmp_path / "non_cache" + non_cache_dir.mkdir() + (non_cache_dir / "temp_non_cache_file").write_text("This should remain.") + + # Use monkeypatch to set DIR_CACHED_DB for the duration of the test + monkeypatch.setattr('pathways.utils.DIR_CACHED_DB', str(cache_dir)) + + clean_cache_directory() + + assert not (cache_dir / "temp_cache_file").exists(), "Cache file was not deleted" + assert (non_cache_dir / "temp_non_cache_file").exists(), "Non-cache file was incorrectly deleted"