From 7c5d05c86ec9c55fc9cb8cde0459978f687dd656 Mon Sep 17 00:00:00 2001
From: romainsacchi <romain@Romains-iMac.psi.ch>
Date: Thu, 11 Apr 2024 14:04:12 +0200
Subject: [PATCH] Add unit tests

---
 .github/workflows/main.yml  |   2 +-
 pathways/data_validation.py |   2 +-
 pathways/lca.py             |  13 ++--
 pathways/pathways.py        |  31 +-------
 pathways/utils.py           |  37 +++++++++
 tests/test_lca.py           |  50 ++++++++++++
 tests/test_lcia.py          |  69 +++++++++++++++++
 tests/test_pathways.py      |  25 ++++++
 tests/test_utilities.py     | 148 ++++++++++++++++++++++++++++++++++++
 9 files changed, 341 insertions(+), 36 deletions(-)
 create mode 100644 tests/test_lca.py
 create mode 100644 tests/test_lcia.py
 create mode 100644 tests/test_pathways.py
 create mode 100644 tests/test_utilities.py

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 8a83935..75e036c 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -144,7 +144,7 @@ jobs:
       - uses: conda-incubator/setup-miniconda@v2
         with:
           python-version: ${{ matrix.python-version }}
-          channels: conda-forge,cmutel,konstantinstadler,haasad,pascallesage,romainsacchi
+          channels: conda-forge
           allow-softlinks: true
           channel-priority: strict
           auto-update-conda: true
diff --git a/pathways/data_validation.py b/pathways/data_validation.py
index a2f9e1f..2e9dd96 100644
--- a/pathways/data_validation.py
+++ b/pathways/data_validation.py
@@ -69,7 +69,7 @@ def validate_datapackage(
     validate_scenario_data(dataframe)
 
     # Check that the mapping is valid
-    validate_mapping(datapackage.get_resource("mapping"), dataframe)
+    validate_mapping(datapackage.get_resource("mapping"))
 
     # fetch filepaths to resources
     filepaths = []
diff --git a/pathways/lca.py b/pathways/lca.py
index c754f5f..342975b 100644
--- a/pathways/lca.py
+++ b/pathways/lca.py
@@ -14,7 +14,7 @@
 import bw_processing as bwp
 import numpy as np
 import pyprind
-from bw2calc import MonteCarloLCA
+from bw2calc.monte_carlo import MonteCarloLCA
 from bw_processing import Datapackage
 from numpy import ndarray, dtype
 from scipy import sparse
@@ -22,9 +22,8 @@
 
 from .filesystem_constants import DIR_CACHED_DB
 from .lcia import fill_characterization_factors_matrices
-from .pathways import _group_technosphere_indices
-from .utils import get_unit_conversion_factors, fetch_indices, check_unclassified_activities
-
+from .utils import get_unit_conversion_factors, fetch_indices, check_unclassified_activities, \
+    _group_technosphere_indices
 
 logging.basicConfig(
     level=logging.DEBUG,
@@ -54,7 +53,11 @@ def read_indices_csv(file_path: Path) -> dict[tuple[str, str, str, str], int]:
     with open(file_path) as read_obj:
         csv_reader = csv.reader(read_obj, delimiter=";")
         for row in csv_reader:
-            indices[(row[0], row[1], row[2], row[3])] = int(row[4])
+            try:
+                indices[(row[0], row[1], row[2], row[3])] = int(row[4])
+            except IndexError as err:
+                logging.error(f"Error reading row {row} from {file_path}: {err}. "
+                              f"Could it be that the file uses commas instead of semicolons?")
     return indices
 
 
diff --git a/pathways/pathways.py b/pathways/pathways.py
index 7f28275..57cf76d 100644
--- a/pathways/pathways.py
+++ b/pathways/pathways.py
@@ -10,7 +10,6 @@
 from multiprocessing import Pool, cpu_count
 from typing import List, Optional
 
-import datapackage
 import numpy as np
 import pandas
 import pandas as pd
@@ -38,32 +37,6 @@
 warnings.filterwarnings("ignore")
 
 
-def _group_technosphere_indices(
-    technosphere_indices: dict, group_by, group_values: list
-) -> dict:
-    """
-    Generalized function to group technosphere indices by an arbitrary attribute (category, location, etc.).
-
-    :param technosphere_indices: Mapping of activities to their indices in the technosphere matrix.
-    :param group_by: A function that takes an activity and returns its group value (e.g., category or location).
-    :param group_values: The set of all possible group values (e.g., all categories or locations).
-    :return: A tuple containing a list of lists of indices, a dictionary mapping group values to lists of indices,
-             and a 2D numpy array of indices, where rows have been padded with -1 to ensure equal lengths.
-    """
-
-    acts_dict = {}
-    for value in group_values:
-        # Collect indices for activities belonging to the current group value
-        x = [
-            int(technosphere_indices[a])
-            for a in technosphere_indices
-            if group_by(a) == value
-        ]
-        acts_dict[value] = x
-
-    return acts_dict
-
-
 def _get_mapping(data) -> dict:
     """
     Read the mapping file which maps scenario variables to LCA datasets.
@@ -91,7 +64,7 @@ def _read_scenario_data(data: dict, scenario: str):
         return pd.read_excel(filepath, index_col=0)
 
 
-def _read_datapackage(datapackage: DataPackage) -> DataPackage:
+def _read_datapackage(datapackage: str) -> DataPackage:
     """Read the datapackage.json file.
 
     :return: DataPackage
@@ -111,7 +84,7 @@ class Pathways:
     def __init__(self, datapackage, debug=False):
         self.datapackage = datapackage
         self.data, dataframe, self.filepaths = validate_datapackage(
-            _read_datapackage()
+            _read_datapackage(datapackage)
         )
         self.mapping = _get_mapping()
         self.mapping.update(self._get_final_energy_mapping())
diff --git a/pathways/utils.py b/pathways/utils.py
index 23e380e..d4368da 100644
--- a/pathways/utils.py
+++ b/pathways/utils.py
@@ -36,6 +36,10 @@
 def load_classifications():
     """Load the activities classifications."""
 
+    # check if file exists
+    if not Path(CLASSIFICATIONS).exists():
+        raise FileNotFoundError(f"File {CLASSIFICATIONS} not found")
+
     with open(CLASSIFICATIONS, "r") as f:
         data = yaml.full_load(f)
 
@@ -53,6 +57,9 @@ def harmonize_units(scenario: xr.DataArray, variables: list) -> xr.DataArray:
 
     units = [scenario.attrs["units"][var] for var in variables]
 
+    if len(variables) == 0:
+        raise ValueError("Empty list of variables")
+
     # if not all units are the same, we need to convert
     if len(set(units)) > 1:
         if all(x in ["PJ/yr", "EJ/yr", "PJ/yr."] for x in units):
@@ -133,6 +140,10 @@ def create_lca_results_array(
     :rtype: xr.DataArray
     """
 
+    # check if any of the list parameters is empty, and if so, throw an error
+    if not all([methods, years, regions, locations, models, scenarios]):
+        raise ValueError("Empty list parameter")
+
     # Define the coordinates for the xarray DataArray
     coords = {
         "act_category": list(set(classifications.values())),
@@ -450,3 +461,29 @@ def check_unclassified_activities(
             writer.writerows(missing_classifications)
 
     return missing_classifications
+
+
+def _group_technosphere_indices(
+    technosphere_indices: dict, group_by, group_values: list
+) -> dict:
+    """
+    Generalized function to group technosphere indices by an arbitrary attribute (category, location, etc.).
+
+    :param technosphere_indices: Mapping of activities to their indices in the technosphere matrix.
+    :param group_by: A function that takes an activity and returns its group value (e.g., category or location).
+    :param group_values: The set of all possible group values (e.g., all categories or locations).
+    :return: A tuple containing a list of lists of indices, a dictionary mapping group values to lists of indices,
+             and a 2D numpy array of indices, where rows have been padded with -1 to ensure equal lengths.
+    """
+
+    acts_dict = {}
+    for value in group_values:
+        # Collect indices for activities belonging to the current group value
+        x = [
+            int(technosphere_indices[a])
+            for a in technosphere_indices
+            if group_by(a) == value
+        ]
+        acts_dict[value] = x
+
+    return acts_dict
diff --git a/tests/test_lca.py b/tests/test_lca.py
new file mode 100644
index 0000000..dad1044
--- /dev/null
+++ b/tests/test_lca.py
@@ -0,0 +1,50 @@
+import pytest
+from unittest.mock import mock_open, patch
+from pathways.lca import read_indices_csv, load_matrix_and_index
+from pathlib import Path
+import numpy as np
+
+
+def test_read_indices_csv_success():
+    mock_csv_data = "activity;product;location;unit;1\nanother_activity;another_product;another_location;another_unit;2"
+    expected_dict = {
+        ('activity', 'product', 'location', 'unit'): 1,
+        ('another_activity', 'another_product', 'another_location', 'another_unit'): 2,
+    }
+    with patch("builtins.open", mock_open(read_data=mock_csv_data)):
+        result = read_indices_csv(Path("dummy_path.csv"))
+        assert result == expected_dict
+
+
+def test_load_matrix_and_index(tmp_path):
+    mock_csv_data = ("row;col;value;uncertainty type;loc;scale;shape;minimum;maximum;negative;flip"
+                     "\n1;0;3.5;3;4;5;6;7;8;0;0"
+                     "\n1;1;0.5;3;4;5;6;7;8;0;1")
+    expected_output = (
+        np.array([3.5, 0.5]),
+        np.array([(0, 1), (1, 1)], dtype=[('row', 'i4'), ('col', 'i4')]),
+        np.array([False, True]),
+        np.array([(3, 4.0, 5.0, 6.0, 7.0, 8.0, False), (3, 4.0, 5.0, 6.0, 7.0, 8.0, False)],
+                 dtype=[('uncertainty_type', 'i4'), ('loc', 'f4'), ('scale', 'f4'), ('shape', 'f4'), ('minimum', 'f4'),
+                        ('maximum', 'f4'), ('negative', '?')])
+    )
+
+    # Write mock CSV data to a temporary file
+    temp_file = tmp_path / "temp.csv"
+    temp_file.write_text(mock_csv_data)
+
+    # Call the function with the path to the temporary file
+    data_array, indices_array, flip_array, distributions_array = load_matrix_and_index(temp_file)
+
+    print("distributions_array", distributions_array)
+    print("expected_output", expected_output[3])
+
+    # Check that the output matches the expected output
+    # but they have different dtypes
+
+    assert np.allclose(data_array, expected_output[0])
+    assert np.array_equal(indices_array, expected_output[1])
+    assert np.array_equal(flip_array, expected_output[2])
+    assert np.array_equal(distributions_array, expected_output[3])
+
+
diff --git a/tests/test_lcia.py b/tests/test_lcia.py
new file mode 100644
index 0000000..3f16d2c
--- /dev/null
+++ b/tests/test_lcia.py
@@ -0,0 +1,69 @@
+import pytest
+from unittest.mock import mock_open, patch
+from pathways.lcia import get_lcia_method_names, format_lcia_method_exchanges, fill_characterization_factors_matrices
+from scipy.sparse import csr_matrix
+import numpy as np
+import json
+
+
+def test_get_lcia_method_names_success():
+    mock_data = '[{"name": ["IPCC", "2021", "Global Warming Potential"]}, {"name": ["ReCiPe", "2016", "Midpoint"]} ]'
+    expected_result = ["IPCC - 2021 - Global Warming Potential", "ReCiPe - 2016 - Midpoint"]
+    with patch("builtins.open", mock_open(read_data=mock_data)):
+        with patch("json.load", return_value=json.loads(mock_data)):
+            method_names = get_lcia_method_names()
+            assert method_names == expected_result, "Method names not correctly formatted"
+
+
+def test_format_lcia_method_exchanges():
+    method_input = {
+        "exchanges": [
+            {"name": "CO2", "categories": ["air"], "amount": 1},
+            {"name": "CH4", "categories": ["air", "low population density, long-term"], "amount": 25},
+        ]
+    }
+    expected_output = {
+        ("CO2", "air", "unspecified"): 1,
+        ("CH4", "air", "low population density, long-term"): 25,
+    }
+    assert format_lcia_method_exchanges(method_input) == expected_output, "Exchange formatting incorrect"
+
+
+@pytest.fixture
+def mock_lcia_methods_data():
+    """Returns mock LCIA methods similar to what get_lcia_methods would return."""
+    return {
+        "IPCC 2021 - Global Warming Potential": {
+            ("CO2", "air", "unspecified"): 1,
+            ("CH4", "air", "low population density, long-term"): 25,
+        }
+    }
+
+
+@pytest.fixture
+def mock_biosphere_data():
+    """Returns mock biosphere dictionary and matrix dict for testing."""
+    biosphere_dict = {
+        ("CO2", "air", "unspecified"): 0,
+        ("CH4", "air", "low population density, long-term"): 1,
+    }
+    biosphere_matrix_dict = {0: 0, 1: 1}  # Mapping of biosphere_dict indices to matrix indices
+    return biosphere_matrix_dict, biosphere_dict
+
+
+def test_fill_characterization_factors_matrices(mock_lcia_methods_data, mock_biosphere_data):
+    methods = ["IPCC 2021 - Global Warming Potential"]
+    biosphere_matrix_dict, biosphere_dict = mock_biosphere_data
+
+    with patch('pathways.lcia.get_lcia_methods', return_value=mock_lcia_methods_data):
+        matrix = fill_characterization_factors_matrices(methods, biosphere_matrix_dict, biosphere_dict, debug=False)
+
+    assert isinstance(matrix, csr_matrix), "Output is not a CSR matrix"
+    assert matrix.shape == (len(methods), len(biosphere_matrix_dict)), "Matrix shape is incorrect"
+
+    # Verifying content of the matrix
+    expected_data = np.array([1, 25])
+    np.testing.assert_array_equal(matrix.data, expected_data, "Matrix data does not match expected values")
+    np.testing.assert_array_equal(matrix.indices, np.array([0, 1]), "Matrix indices do not match expected values")
+    np.testing.assert_array_equal(matrix.indptr, np.array([0, 2]), "Matrix indices does not match expected values")
+
diff --git a/tests/test_pathways.py b/tests/test_pathways.py
new file mode 100644
index 0000000..bddd05f
--- /dev/null
+++ b/tests/test_pathways.py
@@ -0,0 +1,25 @@
+import pytest
+from unittest.mock import Mock
+
+from pathways.pathways import _get_mapping
+from pathways.utils import _group_technosphere_indices
+
+
+def test_group_technosphere_indices():
+    indices = {('activity1', 'location1'): 0, ('activity2', 'location2'): 1}
+    group_by = lambda x: x[1]  # Group by location
+    group_values = ['location1', 'location2']
+    expected = {'location1': [0], 'location2': [1]}
+    result = _group_technosphere_indices(indices, group_by, group_values)
+    assert result == expected, "Grouping does not match expected output"
+
+
+def test_get_mapping():
+    mock_data = Mock()
+    mock_data.get_resource.return_value.raw_read.return_value = """
+    variable1:
+      dataset: [details]
+    """
+    expected_mapping = {'variable1': {'dataset': ['details']}}
+    assert _get_mapping(mock_data) == expected_mapping, "Mapping does not match expected dictionary"
+
diff --git a/tests/test_utilities.py b/tests/test_utilities.py
new file mode 100644
index 0000000..ba0c787
--- /dev/null
+++ b/tests/test_utilities.py
@@ -0,0 +1,148 @@
+import pytest
+import xarray as xr
+import numpy as np
+from unittest.mock import mock_open, patch
+from pathways.utils import load_classifications, harmonize_units, create_lca_results_array, clean_cache_directory
+
+
+def test_load_classifications_success():
+    mock_content = """
+    activity1: classification1
+    activity2: classification2
+    """
+    with patch("builtins.open", mock_open(read_data=mock_content)):
+        with patch("yaml.full_load", return_value={"activity1": "classification1", "activity2": "classification2"}):
+            classifications = load_classifications()
+            assert classifications == {"activity1": "classification1", "activity2": "classification2"}
+
+
+def test_load_classifications_file_not_found():
+    with patch('pathways.utils.CLASSIFICATIONS', new='non_existent_file.yaml'):
+        with pytest.raises(FileNotFoundError):
+            load_classifications()
+
+
+def test_harmonize_units_conversion_required():
+    scenario = xr.DataArray(
+        np.random.rand(2, 2, 2),
+        dims=["variables", "x", "y"],
+        coords={"variables": ["var1", "var2"]},
+    )
+    scenario.attrs["units"] = {"var1": "PJ/yr", "var2": "EJ/yr"}
+    variables = ["var1", "var2"]
+
+    harmonized_scenario = harmonize_units(scenario, variables)
+    assert all(harmonized_scenario.attrs["units"][var] == "EJ/yr" for var in variables), "Units not harmonized to EJ/yr"
+
+
+def test_harmonize_units_no_conversion_required():
+    scenario = xr.DataArray(
+        np.random.rand(1, 2, 2),
+        dims=["variables", "x", "y"],
+        coords={"variables": ["var1"]},
+    )
+    scenario.attrs["units"] = {"var1": "EJ/yr"}
+    variables = ["var1"]
+
+    harmonized_scenario = harmonize_units(scenario, variables)
+    assert harmonized_scenario.equals(scenario), "Scenario was modified unnecessarily"
+
+
+def test_harmonize_units_missing_units_attribute():
+    scenario = xr.DataArray(
+        np.random.rand(1, 2, 2),
+        dims=["variables", "x", "y"],
+        coords={"variables": ["var1"]},
+    )
+    variables = ["var1"]
+
+    with pytest.raises(KeyError):
+        harmonize_units(scenario, variables)
+
+
+def test_harmonize_units_empty_data_array():
+    scenario = xr.DataArray(
+        [[[1]], [[2]], [[3]]],
+        dims=["variables", "x", "y"],
+        coords={"variables": ["var1", "var2", "var3"]}
+    )
+    scenario.attrs["units"] = {}
+    variables = []
+
+    # should return ValueError
+    with pytest.raises(ValueError):
+        harmonize_units(scenario, variables)
+
+
+def test_create_lca_results_array_structure_and_initialization():
+    methods = ['method1', 'method2']
+    years = [2020, 2025]
+    regions = ['region1', 'region2']
+    locations = ['location1', 'location2']
+    models = ['model1', 'model2']
+    scenarios = ['scenario1', 'scenario2']
+    classifications = {'activity1': 'category1', 'activity2': 'category2'}
+    mapping = {'variable1': 'dataset1', 'variable2': 'dataset2'}
+
+    result = create_lca_results_array(
+        methods, years, regions, locations, models, scenarios, classifications, mapping
+    )
+
+    # Check dimensions and coordinates
+    assert 'act_category' in result.coords
+    assert 'impact_category' in result.coords
+    assert 'year' in result.coords
+    assert 'region' in result.coords
+    assert 'model' in result.coords
+    assert 'scenario' in result.coords
+    assert set(result.coords['impact_category'].values) == set(methods)
+    assert set(result.coords['year'].values) == set(years)
+    assert set(result.coords['region'].values) == set(regions)
+    assert np.all(result == 0), "DataArray should be initialized with zeros"
+
+
+def test_create_lca_results_array_with_distributions():
+    methods = ['method1']
+    years = [2020]
+    regions = ['region1']
+    locations = ['location1']
+    models = ['model1']
+    scenarios = ['scenario1']
+    classifications = {'activity1': 'category1'}
+    mapping = {'variable1': 'dataset1'}
+
+    result = create_lca_results_array(
+        methods, years, regions, locations, models, scenarios, classifications, mapping, use_distributions=True
+    )
+
+    # Check for the 'quantile' dimension
+    assert 'quantile' in result.dims
+    assert result.coords['quantile'].values.tolist() == [0.05, 0.5, 0.95]
+
+
+def test_create_lca_results_array_empty_inputs():
+    with pytest.raises(Exception):  # Assuming the function raises an exception for empty inputs
+        create_lca_results_array([], [], [], [], [], [], {}, {})
+
+
+def test_create_lca_results_array_input_validation():
+    with pytest.raises(Exception):
+        create_lca_results_array(None, None, None, None, None, None, None, None)
+
+
+def test_clean_cache_directory(tmp_path, monkeypatch):
+    # Use a temporary directory to simulate the cache directory
+    cache_dir = tmp_path / "cache"
+    cache_dir.mkdir()
+    (cache_dir / "temp_cache_file").write_text("This is a cache file.")
+    non_cache_dir = tmp_path / "non_cache"
+    non_cache_dir.mkdir()
+    (non_cache_dir / "temp_non_cache_file").write_text("This should remain.")
+
+    # Use monkeypatch to set DIR_CACHED_DB for the duration of the test
+    monkeypatch.setattr('pathways.utils.DIR_CACHED_DB', str(cache_dir))
+
+    clean_cache_directory()
+
+    assert not (cache_dir / "temp_cache_file").exists(), "Cache file was not deleted"
+    assert (non_cache_dir / "temp_non_cache_file").exists(), "Non-cache file was incorrectly deleted"