Add unit tests

polca · Apr 11, 2024 · 7c5d05c · 7c5d05c
1 parent 7ee5e96
commit 7c5d05c
Show file tree

Hide file tree

Showing 9 changed files with 341 additions and 36 deletions.
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -144,7 +144,7 @@ jobs:
       - uses: conda-incubator/setup-miniconda@v2
         with:
           python-version: ${{ matrix.python-version }}
-          channels: conda-forge,cmutel,konstantinstadler,haasad,pascallesage,romainsacchi
+          channels: conda-forge
           allow-softlinks: true
           channel-priority: strict
           auto-update-conda: true

diff --git a/pathways/data_validation.py b/pathways/data_validation.py
@@ -69,7 +69,7 @@ def validate_datapackage(
     validate_scenario_data(dataframe)
 
     # Check that the mapping is valid
-    validate_mapping(datapackage.get_resource("mapping"), dataframe)
+    validate_mapping(datapackage.get_resource("mapping"))
 
     # fetch filepaths to resources
     filepaths = []

diff --git a/pathways/lca.py b/pathways/lca.py
@@ -14,17 +14,16 @@
 import bw_processing as bwp
 import numpy as np
 import pyprind
-from bw2calc import MonteCarloLCA
+from bw2calc.monte_carlo import MonteCarloLCA
 from bw_processing import Datapackage
 from numpy import ndarray, dtype
 from scipy import sparse
 from scipy.sparse import csr_matrix
 
 from .filesystem_constants import DIR_CACHED_DB
 from .lcia import fill_characterization_factors_matrices
-from .pathways import _group_technosphere_indices
-from .utils import get_unit_conversion_factors, fetch_indices, check_unclassified_activities
-
+from .utils import get_unit_conversion_factors, fetch_indices, check_unclassified_activities, \
+    _group_technosphere_indices
 
 logging.basicConfig(
     level=logging.DEBUG,
@@ -54,7 +53,11 @@ def read_indices_csv(file_path: Path) -> dict[tuple[str, str, str, str], int]:
     with open(file_path) as read_obj:
         csv_reader = csv.reader(read_obj, delimiter=";")
         for row in csv_reader:
-            indices[(row[0], row[1], row[2], row[3])] = int(row[4])
+            try:
+                indices[(row[0], row[1], row[2], row[3])] = int(row[4])
+            except IndexError as err:
+                logging.error(f"Error reading row {row} from {file_path}: {err}. "
+                              f"Could it be that the file uses commas instead of semicolons?")
     return indices
 
 

diff --git a/pathways/pathways.py b/pathways/pathways.py
@@ -10,7 +10,6 @@
 from multiprocessing import Pool, cpu_count
 from typing import List, Optional
 
-import datapackage
 import numpy as np
 import pandas
 import pandas as pd
@@ -38,32 +37,6 @@
 warnings.filterwarnings("ignore")
 
 
-def _group_technosphere_indices(
-    technosphere_indices: dict, group_by, group_values: list
-) -> dict:
-    """
-    Generalized function to group technosphere indices by an arbitrary attribute (category, location, etc.).
-
-    :param technosphere_indices: Mapping of activities to their indices in the technosphere matrix.
-    :param group_by: A function that takes an activity and returns its group value (e.g., category or location).
-    :param group_values: The set of all possible group values (e.g., all categories or locations).
-    :return: A tuple containing a list of lists of indices, a dictionary mapping group values to lists of indices,
-             and a 2D numpy array of indices, where rows have been padded with -1 to ensure equal lengths.
-    """
-
-    acts_dict = {}
-    for value in group_values:
-        # Collect indices for activities belonging to the current group value
-        x = [
-            int(technosphere_indices[a])
-            for a in technosphere_indices
-            if group_by(a) == value
-        ]
-        acts_dict[value] = x
-
-    return acts_dict
-
-
 def _get_mapping(data) -> dict:
     """
     Read the mapping file which maps scenario variables to LCA datasets.
@@ -91,7 +64,7 @@ def _read_scenario_data(data: dict, scenario: str):
         return pd.read_excel(filepath, index_col=0)
 
 
-def _read_datapackage(datapackage: DataPackage) -> DataPackage:
+def _read_datapackage(datapackage: str) -> DataPackage:
     """Read the datapackage.json file.
 
     :return: DataPackage
@@ -111,7 +84,7 @@ class Pathways:
     def __init__(self, datapackage, debug=False):
         self.datapackage = datapackage
         self.data, dataframe, self.filepaths = validate_datapackage(
-            _read_datapackage()
+            _read_datapackage(datapackage)
         )
         self.mapping = _get_mapping()
         self.mapping.update(self._get_final_energy_mapping())

diff --git a/pathways/utils.py b/pathways/utils.py
@@ -36,6 +36,10 @@
 def load_classifications():
     """Load the activities classifications."""
 
+    # check if file exists
+    if not Path(CLASSIFICATIONS).exists():
+        raise FileNotFoundError(f"File {CLASSIFICATIONS} not found")
+
     with open(CLASSIFICATIONS, "r") as f:
         data = yaml.full_load(f)
 
@@ -53,6 +57,9 @@ def harmonize_units(scenario: xr.DataArray, variables: list) -> xr.DataArray:
 
     units = [scenario.attrs["units"][var] for var in variables]
 
+    if len(variables) == 0:
+        raise ValueError("Empty list of variables")
+
     # if not all units are the same, we need to convert
     if len(set(units)) > 1:
         if all(x in ["PJ/yr", "EJ/yr", "PJ/yr."] for x in units):
@@ -133,6 +140,10 @@ def create_lca_results_array(
     :rtype: xr.DataArray
     """
 
+    # check if any of the list parameters is empty, and if so, throw an error
+    if not all([methods, years, regions, locations, models, scenarios]):
+        raise ValueError("Empty list parameter")
+
     # Define the coordinates for the xarray DataArray
     coords = {
         "act_category": list(set(classifications.values())),
@@ -450,3 +461,29 @@ def check_unclassified_activities(
             writer.writerows(missing_classifications)
 
     return missing_classifications
+
+
+def _group_technosphere_indices(
+    technosphere_indices: dict, group_by, group_values: list
+) -> dict:
+    """
+    Generalized function to group technosphere indices by an arbitrary attribute (category, location, etc.).
+
+    :param technosphere_indices: Mapping of activities to their indices in the technosphere matrix.
+    :param group_by: A function that takes an activity and returns its group value (e.g., category or location).
+    :param group_values: The set of all possible group values (e.g., all categories or locations).
+    :return: A tuple containing a list of lists of indices, a dictionary mapping group values to lists of indices,
+             and a 2D numpy array of indices, where rows have been padded with -1 to ensure equal lengths.
+    """
+
+    acts_dict = {}
+    for value in group_values:
+        # Collect indices for activities belonging to the current group value
+        x = [
+            int(technosphere_indices[a])
+            for a in technosphere_indices
+            if group_by(a) == value
+        ]
+        acts_dict[value] = x
+
+    return acts_dict
diff --git a/tests/test_lca.py b/tests/test_lca.py
@@ -0,0 +1,50 @@
+import pytest
+from unittest.mock import mock_open, patch
+from pathways.lca import read_indices_csv, load_matrix_and_index
+from pathlib import Path
+import numpy as np
+
+
+def test_read_indices_csv_success():
+    mock_csv_data = "activity;product;location;unit;1\nanother_activity;another_product;another_location;another_unit;2"
+    expected_dict = {
+        ('activity', 'product', 'location', 'unit'): 1,
+        ('another_activity', 'another_product', 'another_location', 'another_unit'): 2,
+    }
+    with patch("builtins.open", mock_open(read_data=mock_csv_data)):
+        result = read_indices_csv(Path("dummy_path.csv"))
+        assert result == expected_dict
+
+
+def test_load_matrix_and_index(tmp_path):
+    mock_csv_data = ("row;col;value;uncertainty type;loc;scale;shape;minimum;maximum;negative;flip"
+                     "\n1;0;3.5;3;4;5;6;7;8;0;0"
+                     "\n1;1;0.5;3;4;5;6;7;8;0;1")
+    expected_output = (
+        np.array([3.5, 0.5]),
+        np.array([(0, 1), (1, 1)], dtype=[('row', 'i4'), ('col', 'i4')]),
+        np.array([False, True]),
+        np.array([(3, 4.0, 5.0, 6.0, 7.0, 8.0, False), (3, 4.0, 5.0, 6.0, 7.0, 8.0, False)],
+                 dtype=[('uncertainty_type', 'i4'), ('loc', 'f4'), ('scale', 'f4'), ('shape', 'f4'), ('minimum', 'f4'),
+                        ('maximum', 'f4'), ('negative', '?')])
+    )
+
+    # Write mock CSV data to a temporary file
+    temp_file = tmp_path / "temp.csv"
+    temp_file.write_text(mock_csv_data)
+
+    # Call the function with the path to the temporary file
+    data_array, indices_array, flip_array, distributions_array = load_matrix_and_index(temp_file)
+
+    print("distributions_array", distributions_array)
+    print("expected_output", expected_output[3])
+
+    # Check that the output matches the expected output
+    # but they have different dtypes
+
+    assert np.allclose(data_array, expected_output[0])
+    assert np.array_equal(indices_array, expected_output[1])
+    assert np.array_equal(flip_array, expected_output[2])
+    assert np.array_equal(distributions_array, expected_output[3])
+
+
diff --git a/tests/test_lcia.py b/tests/test_lcia.py
@@ -0,0 +1,69 @@
+import pytest
+from unittest.mock import mock_open, patch
+from pathways.lcia import get_lcia_method_names, format_lcia_method_exchanges, fill_characterization_factors_matrices
+from scipy.sparse import csr_matrix
+import numpy as np
+import json
+
+
+def test_get_lcia_method_names_success():
+    mock_data = '[{"name": ["IPCC", "2021", "Global Warming Potential"]}, {"name": ["ReCiPe", "2016", "Midpoint"]} ]'
+    expected_result = ["IPCC - 2021 - Global Warming Potential", "ReCiPe - 2016 - Midpoint"]
+    with patch("builtins.open", mock_open(read_data=mock_data)):
+        with patch("json.load", return_value=json.loads(mock_data)):
+            method_names = get_lcia_method_names()
+            assert method_names == expected_result, "Method names not correctly formatted"
+
+
+def test_format_lcia_method_exchanges():
+    method_input = {
+        "exchanges": [
+            {"name": "CO2", "categories": ["air"], "amount": 1},
+            {"name": "CH4", "categories": ["air", "low population density, long-term"], "amount": 25},
+        ]
+    }
+    expected_output = {
+        ("CO2", "air", "unspecified"): 1,
+        ("CH4", "air", "low population density, long-term"): 25,
+    }
+    assert format_lcia_method_exchanges(method_input) == expected_output, "Exchange formatting incorrect"
+
+
+@pytest.fixture
+def mock_lcia_methods_data():
+    """Returns mock LCIA methods similar to what get_lcia_methods would return."""
+    return {
+        "IPCC 2021 - Global Warming Potential": {
+            ("CO2", "air", "unspecified"): 1,
+            ("CH4", "air", "low population density, long-term"): 25,
+        }
+    }
+
+
+@pytest.fixture
+def mock_biosphere_data():
+    """Returns mock biosphere dictionary and matrix dict for testing."""
+    biosphere_dict = {
+        ("CO2", "air", "unspecified"): 0,
+        ("CH4", "air", "low population density, long-term"): 1,
+    }
+    biosphere_matrix_dict = {0: 0, 1: 1}  # Mapping of biosphere_dict indices to matrix indices
+    return biosphere_matrix_dict, biosphere_dict
+
+
+def test_fill_characterization_factors_matrices(mock_lcia_methods_data, mock_biosphere_data):
+    methods = ["IPCC 2021 - Global Warming Potential"]
+    biosphere_matrix_dict, biosphere_dict = mock_biosphere_data
+
+    with patch('pathways.lcia.get_lcia_methods', return_value=mock_lcia_methods_data):
+        matrix = fill_characterization_factors_matrices(methods, biosphere_matrix_dict, biosphere_dict, debug=False)
+
+    assert isinstance(matrix, csr_matrix), "Output is not a CSR matrix"
+    assert matrix.shape == (len(methods), len(biosphere_matrix_dict)), "Matrix shape is incorrect"
+
+    # Verifying content of the matrix
+    expected_data = np.array([1, 25])
+    np.testing.assert_array_equal(matrix.data, expected_data, "Matrix data does not match expected values")
+    np.testing.assert_array_equal(matrix.indices, np.array([0, 1]), "Matrix indices do not match expected values")
+    np.testing.assert_array_equal(matrix.indptr, np.array([0, 2]), "Matrix indices does not match expected values")
+
diff --git a/tests/test_pathways.py b/tests/test_pathways.py
@@ -0,0 +1,25 @@
+import pytest
+from unittest.mock import Mock
+
+from pathways.pathways import _get_mapping
+from pathways.utils import _group_technosphere_indices
+
+
+def test_group_technosphere_indices():
+    indices = {('activity1', 'location1'): 0, ('activity2', 'location2'): 1}
+    group_by = lambda x: x[1]  # Group by location
+    group_values = ['location1', 'location2']
+    expected = {'location1': [0], 'location2': [1]}
+    result = _group_technosphere_indices(indices, group_by, group_values)
+    assert result == expected, "Grouping does not match expected output"
+
+
+def test_get_mapping():
+    mock_data = Mock()
+    mock_data.get_resource.return_value.raw_read.return_value = """
+    variable1:
+      dataset: [details]
+    """
+    expected_mapping = {'variable1': {'dataset': ['details']}}
+    assert _get_mapping(mock_data) == expected_mapping, "Mapping does not match expected dictionary"
+