Fix filepaths issue

polca · Apr 9, 2024 · 9e4f6df · 9e4f6df
1 parent 1ee4b33
commit 9e4f6df
Show file tree

Hide file tree

Showing 7 changed files with 68 additions and 137 deletions.
diff --git a/.idea/.gitignore b/.idea/.gitignore
diff --git a/dev/missing_classifications.csv b/dev/missing_classifications.csv
diff --git a/dev/program.prof b/dev/program.prof
diff --git a/dev/results_image_SSP2_metals.nc b/dev/results_image_SSP2_metals.nc
diff --git a/pathways/data_validation.py b/pathways/data_validation.py
@@ -19,7 +19,7 @@
 )
 
 
-def validate_datapackage(datapackage: datapackage.DataPackage):
+def validate_datapackage(datapackage: datapackage.DataPackage) -> (datapackage.DataPackage, pd.DataFrame, list):
     """
     Validate the datapackage.json file.
     The datapackage must be valid according to the Frictionless Data.
@@ -69,7 +69,13 @@ def validate_datapackage(datapackage: datapackage.DataPackage):
     # Check that the mapping is valid
     validate_mapping(datapackage.get_resource("mapping"), dataframe)
 
-    return datapackage, dataframe
+    # fetch filepaths to resources
+    filepaths = []
+    for resource in datapackage.resources:
+        if "matrix" in resource.descriptor["name"]:
+            filepaths.append(resource.source)
+
+    return datapackage, dataframe, filepaths
 
 
 def validate_scenario_data(dataframe: pd.DataFrame) -> bool:

diff --git a/pathways/lca.py b/pathways/lca.py
@@ -96,54 +96,65 @@ def load_matrix_and_index(
 
 
 def get_lca_matrices(
-    datapackage: str,
+    filepaths: List[str],
     model: str,
     scenario: str,
     year: int,
 ) -> Tuple[Datapackage, Dict, Dict]:
     """
     Retrieve Life Cycle Assessment (LCA) matrices from disk.
 
-    ...
+    :param filepaths: A list of filepaths to the LCA matrices.
+    :type filepaths: List[str]
+    :param model: The name of the model.
+    :type model: str
+    :param scenario: The name of the scenario.
+    :type scenario: str
+    :param year: The year of the scenario.
+    :type year: int
 
     :rtype: Tuple[sparse.csr_matrix, sparse.csr_matrix, Dict, Dict]
     """
-    dirpath = (
-        Path(datapackage).parent / "inventories" / model.lower() / scenario / str(year)
-    )
-
-    # check that files exist
-    if not dirpath.exists():
-        raise FileNotFoundError(f"Directory {dirpath} does not exist.")
 
-    A_inds = read_indices_csv(dirpath / "A_matrix_index.csv")
-    B_inds = read_indices_csv(dirpath / "B_matrix_index.csv")
+    # find the correct filepaths in filepaths
+    # the correct filepath are the strings that contains
+    # the model, scenario and year
+    def filter_filepaths(suffix: str, contains: List[str]):
+        return [
+            Path(fp) for fp in filepaths
+            if all(kw in fp for kw in contains) and Path(fp).suffix == suffix and Path(fp).exists()
+        ]
+
+    def select_filepath(keyword: str, fps):
+        matches = [fp for fp in fps if keyword in fp.name]
+        if not matches:
+            raise FileNotFoundError(f"Expected file containing '{keyword}' not found.")
+        return matches[0]
+
+    fps = filter_filepaths(".csv", [model, scenario, str(year)])
+    if len(fps) != 4:
+        raise ValueError(f"Expected 4 filepaths, got {len(fps)}")
+
+    fp_A_inds = select_filepath("A_matrix_index", fps)
+    fp_B_inds = select_filepath("B_matrix_index", fps)
+    A_inds = read_indices_csv(fp_A_inds)
+    B_inds = read_indices_csv(fp_B_inds)
 
-    # create brightway datapackage
     dp = bwp.create_datapackage()
 
-    a_data, a_indices, a_sign, a_distributions = load_matrix_and_index(
-        dirpath / "A_matrix.csv",
-    )
-
-    b_data, b_indices, b_sign, b_distributions = load_matrix_and_index(
-        dirpath / "B_matrix.csv",
-    )
-
-    dp.add_persistent_vector(
-        matrix="technosphere_matrix",
-        indices_array=a_indices,
-        data_array=a_data,
-        flip_array=a_sign,
-        distributions_array=a_distributions,
-    )
-
-    dp.add_persistent_vector(
-        matrix="biosphere_matrix",
-        indices_array=b_indices,
-        data_array=b_data,
-        distributions_array=b_distributions,
-    )
+    fp_A = select_filepath("A_matrix", [fp for fp in fps if "index" not in fp.name])
+    fp_B = select_filepath("B_matrix", [fp for fp in fps if "index" not in fp.name])
+
+    # Load matrices and add them to the datapackage
+    for matrix_name, fp in [("technosphere_matrix", fp_A), ("biosphere_matrix", fp_B)]:
+        data, indices, sign, distributions = load_matrix_and_index(fp)
+        dp.add_persistent_vector(
+            matrix=matrix_name,
+            indices_array=indices,
+            data_array=data,
+            flip_array=sign if matrix_name == "technosphere_matrix" else None,
+            distributions_array=distributions,
+        )
 
     return dp, A_inds, B_inds
 

diff --git a/pathways/pathways.py b/pathways/pathways.py
@@ -277,6 +277,7 @@ def process_region(data: Tuple) -> dict[str, ndarray[Any, dtype[Any]] | list[int
     d = []
 
     for v, variable in enumerate(variables):
+
         idx, dataset = vars_idx[variable]["idx"], vars_idx[variable]["dataset"]
         # Compute the unit conversion vector for the given activities
         dataset_unit = dataset[2]
@@ -295,16 +296,18 @@ def process_region(data: Tuple) -> dict[str, ndarray[Any, dtype[Any]] | list[int
             year=year,
         )
 
-        # If the total demand is zero, return None
-        if (
+        share = (
             demand
             / scenarios.sel(
                 region=region,
                 model=model,
                 pathway=scenario,
                 year=year,
             ).sum(dim="variables")
-        ) < demand_cutoff:
+        )
+
+        # If the total demand is zero, return None
+        if share < demand_cutoff:
             continue
 
         variables_demand[variable] = {
@@ -346,7 +349,7 @@ def process_region(data: Tuple) -> dict[str, ndarray[Any, dtype[Any]] | list[int
                 f"ref.: {dataset[1]}, unit: {dataset[2][:50]}, idx: {idx},"
                 f"loc.: {dataset[3]}, demand: {round(float(demand.values * float(unit_vector)), 2)}, "
                 f"unit conv.: {unit_vector}, "
-                f"impact: {round(characterized_inventory.sum(axis=-1) / (demand.values * float(unit_vector)), 3)}. "
+                f"impact: {np.round(characterized_inventory.sum(axis=-1) / (demand.values * float(unit_vector)), 3)}. "
             )
 
     id_array = uuid.uuid4()
@@ -371,7 +374,7 @@ def _calculate_year(args):
         variables,
         methods,
         demand_cutoff,
-        datapackage,
+        filepaths,
         mapping,
         units,
         lca_results,
@@ -395,7 +398,7 @@ def _calculate_year(args):
     # Try to load LCA matrices for the given model, scenario, and year
     try:
         bw_datapackage, technosphere_indices, biosphere_indices = get_lca_matrices(
-            datapackage, model, scenario, year
+            filepaths, model, scenario, year
         )
 
     except FileNotFoundError:
@@ -544,7 +547,7 @@ class Pathways:
 
     def __init__(self, datapackage, debug=False):
         self.datapackage = datapackage
-        self.data, dataframe = validate_datapackage(self.read_datapackage())
+        self.data, dataframe, self.filepaths = validate_datapackage(self.read_datapackage())
         self.mapping = self.get_mapping()
         self.mapping.update(self.get_final_energy_mapping())
         self.debug = debug
@@ -809,7 +812,7 @@ def calculate(
         # Create xarray for storing LCA results if not already present
         if self.lca_results is None:
             _, technosphere_index, biosphere_index = get_lca_matrices(
-                self.datapackage, models[0], scenarios[0], years[0]
+                self.filepaths, models[0], scenarios[0], years[0]
             )
             locations = fetch_inventories_locations(technosphere_index)
 
@@ -841,7 +844,7 @@ def calculate(
                             variables,
                             methods,
                             demand_cutoff,
-                            self.datapackage,
+                            self.filepaths,
                             self.mapping,
                             self.units,
                             self.lca_results,
@@ -875,7 +878,7 @@ def calculate(
                                 variables,
                                 methods,
                                 demand_cutoff,
-                                self.datapackage,
+                                self.filepaths,
                                 self.mapping,
                                 self.units,
                                 self.lca_results,