Skip to content

Commit

Permalink
Fix filepaths issue
Browse files Browse the repository at this point in the history
  • Loading branch information
romainsacchi committed Apr 9, 2024
1 parent 1ee4b33 commit 9e4f6df
Show file tree
Hide file tree
Showing 7 changed files with 68 additions and 137 deletions.
2 changes: 2 additions & 0 deletions .idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

91 changes: 0 additions & 91 deletions dev/missing_classifications.csv

This file was deleted.

Binary file modified dev/program.prof
Binary file not shown.
Binary file removed dev/results_image_SSP2_metals.nc
Binary file not shown.
10 changes: 8 additions & 2 deletions pathways/data_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
)


def validate_datapackage(datapackage: datapackage.DataPackage):
def validate_datapackage(datapackage: datapackage.DataPackage) -> (datapackage.DataPackage, pd.DataFrame, list):
"""
Validate the datapackage.json file.
The datapackage must be valid according to the Frictionless Data.
Expand Down Expand Up @@ -69,7 +69,13 @@ def validate_datapackage(datapackage: datapackage.DataPackage):
# Check that the mapping is valid
validate_mapping(datapackage.get_resource("mapping"), dataframe)

return datapackage, dataframe
# fetch filepaths to resources
filepaths = []
for resource in datapackage.resources:
if "matrix" in resource.descriptor["name"]:
filepaths.append(resource.source)

return datapackage, dataframe, filepaths


def validate_scenario_data(dataframe: pd.DataFrame) -> bool:
Expand Down
79 changes: 45 additions & 34 deletions pathways/lca.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,54 +96,65 @@ def load_matrix_and_index(


def get_lca_matrices(
datapackage: str,
filepaths: List[str],
model: str,
scenario: str,
year: int,
) -> Tuple[Datapackage, Dict, Dict]:
"""
Retrieve Life Cycle Assessment (LCA) matrices from disk.
...
:param filepaths: A list of filepaths to the LCA matrices.
:type filepaths: List[str]
:param model: The name of the model.
:type model: str
:param scenario: The name of the scenario.
:type scenario: str
:param year: The year of the scenario.
:type year: int
:rtype: Tuple[sparse.csr_matrix, sparse.csr_matrix, Dict, Dict]
"""
dirpath = (
Path(datapackage).parent / "inventories" / model.lower() / scenario / str(year)
)

# check that files exist
if not dirpath.exists():
raise FileNotFoundError(f"Directory {dirpath} does not exist.")

A_inds = read_indices_csv(dirpath / "A_matrix_index.csv")
B_inds = read_indices_csv(dirpath / "B_matrix_index.csv")
# find the correct filepaths in filepaths
# the correct filepath are the strings that contains
# the model, scenario and year
def filter_filepaths(suffix: str, contains: List[str]):
return [
Path(fp) for fp in filepaths
if all(kw in fp for kw in contains) and Path(fp).suffix == suffix and Path(fp).exists()
]

def select_filepath(keyword: str, fps):
matches = [fp for fp in fps if keyword in fp.name]
if not matches:
raise FileNotFoundError(f"Expected file containing '{keyword}' not found.")
return matches[0]

fps = filter_filepaths(".csv", [model, scenario, str(year)])
if len(fps) != 4:
raise ValueError(f"Expected 4 filepaths, got {len(fps)}")

fp_A_inds = select_filepath("A_matrix_index", fps)
fp_B_inds = select_filepath("B_matrix_index", fps)
A_inds = read_indices_csv(fp_A_inds)
B_inds = read_indices_csv(fp_B_inds)

# create brightway datapackage
dp = bwp.create_datapackage()

a_data, a_indices, a_sign, a_distributions = load_matrix_and_index(
dirpath / "A_matrix.csv",
)

b_data, b_indices, b_sign, b_distributions = load_matrix_and_index(
dirpath / "B_matrix.csv",
)

dp.add_persistent_vector(
matrix="technosphere_matrix",
indices_array=a_indices,
data_array=a_data,
flip_array=a_sign,
distributions_array=a_distributions,
)

dp.add_persistent_vector(
matrix="biosphere_matrix",
indices_array=b_indices,
data_array=b_data,
distributions_array=b_distributions,
)
fp_A = select_filepath("A_matrix", [fp for fp in fps if "index" not in fp.name])
fp_B = select_filepath("B_matrix", [fp for fp in fps if "index" not in fp.name])

# Load matrices and add them to the datapackage
for matrix_name, fp in [("technosphere_matrix", fp_A), ("biosphere_matrix", fp_B)]:
data, indices, sign, distributions = load_matrix_and_index(fp)
dp.add_persistent_vector(
matrix=matrix_name,
indices_array=indices,
data_array=data,
flip_array=sign if matrix_name == "technosphere_matrix" else None,
distributions_array=distributions,
)

return dp, A_inds, B_inds

Expand Down
23 changes: 13 additions & 10 deletions pathways/pathways.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,7 @@ def process_region(data: Tuple) -> dict[str, ndarray[Any, dtype[Any]] | list[int
d = []

for v, variable in enumerate(variables):

idx, dataset = vars_idx[variable]["idx"], vars_idx[variable]["dataset"]
# Compute the unit conversion vector for the given activities
dataset_unit = dataset[2]
Expand All @@ -295,16 +296,18 @@ def process_region(data: Tuple) -> dict[str, ndarray[Any, dtype[Any]] | list[int
year=year,
)

# If the total demand is zero, return None
if (
share = (
demand
/ scenarios.sel(
region=region,
model=model,
pathway=scenario,
year=year,
).sum(dim="variables")
) < demand_cutoff:
)

# If the total demand is zero, return None
if share < demand_cutoff:
continue

variables_demand[variable] = {
Expand Down Expand Up @@ -346,7 +349,7 @@ def process_region(data: Tuple) -> dict[str, ndarray[Any, dtype[Any]] | list[int
f"ref.: {dataset[1]}, unit: {dataset[2][:50]}, idx: {idx},"
f"loc.: {dataset[3]}, demand: {round(float(demand.values * float(unit_vector)), 2)}, "
f"unit conv.: {unit_vector}, "
f"impact: {round(characterized_inventory.sum(axis=-1) / (demand.values * float(unit_vector)), 3)}. "
f"impact: {np.round(characterized_inventory.sum(axis=-1) / (demand.values * float(unit_vector)), 3)}. "
)

id_array = uuid.uuid4()
Expand All @@ -371,7 +374,7 @@ def _calculate_year(args):
variables,
methods,
demand_cutoff,
datapackage,
filepaths,
mapping,
units,
lca_results,
Expand All @@ -395,7 +398,7 @@ def _calculate_year(args):
# Try to load LCA matrices for the given model, scenario, and year
try:
bw_datapackage, technosphere_indices, biosphere_indices = get_lca_matrices(
datapackage, model, scenario, year
filepaths, model, scenario, year
)

except FileNotFoundError:
Expand Down Expand Up @@ -544,7 +547,7 @@ class Pathways:

def __init__(self, datapackage, debug=False):
self.datapackage = datapackage
self.data, dataframe = validate_datapackage(self.read_datapackage())
self.data, dataframe, self.filepaths = validate_datapackage(self.read_datapackage())
self.mapping = self.get_mapping()
self.mapping.update(self.get_final_energy_mapping())
self.debug = debug
Expand Down Expand Up @@ -809,7 +812,7 @@ def calculate(
# Create xarray for storing LCA results if not already present
if self.lca_results is None:
_, technosphere_index, biosphere_index = get_lca_matrices(
self.datapackage, models[0], scenarios[0], years[0]
self.filepaths, models[0], scenarios[0], years[0]
)
locations = fetch_inventories_locations(technosphere_index)

Expand Down Expand Up @@ -841,7 +844,7 @@ def calculate(
variables,
methods,
demand_cutoff,
self.datapackage,
self.filepaths,
self.mapping,
self.units,
self.lca_results,
Expand Down Expand Up @@ -875,7 +878,7 @@ def calculate(
variables,
methods,
demand_cutoff,
self.datapackage,
self.filepaths,
self.mapping,
self.units,
self.lca_results,
Expand Down

0 comments on commit 9e4f6df

Please sign in to comment.