diff --git a/pathways/data/technologies_shares.yaml b/pathways/data/technologies_shares.yaml new file mode 100644 index 0000000..aee1da2 --- /dev/null +++ b/pathways/data/technologies_shares.yaml @@ -0,0 +1,340 @@ +### When different technology configurations or alternatives are found, +### we are taking the best performing technology in terms of GWP. +Wind: ### Check if IAMs make the distinction between onshore and offshore + onshore: + - name: electricity production, wind, <1MW turbine, onshore + reference product: electricity, high voltage + unit: kilowatt hour + 2020: + value: 0.96 + 2050: + min: 0.83 + max: 0.96 + distribution: uniform + create copy: False + offshore: + - name: electricity production, wind, 1-3MW turbine, offshore + reference product: electricity, high voltage + unit: kilowatt hour + 2020: + value: 0.04 + 2050: + min: 0.04 + max: 0.17 + distribution: uniform + create copy: False +PV: + c-Si: + - name: electricity production, photovoltaic, at 93 kWp slanted-roof, multi-Si, laminated, integrated + reference product: electricity production, photovoltaic, at 93 kWp slanted-roof, multi-Si, laminated, integrated + unit: kilowatt hour + 2020: + value: 0.9440 + 2050: + min: 0.6250 + max: 0.9440 + distribution: uniform + create copy: False + CdTe: + - name: electricity production, photovoltaic, at 3kWp slanted-roof, CdTe, panel, mounted + reference product: electricity production, photovoltaic, at 3kWp slanted-roof, CdTe, panel, mounted + unit: kilowatt hour + 2020: + value: 0.0400 + 2050: + min: 0.0400 + max: 0.2500 + distribution: uniform + create copy: False + CIGS: + - name: electricity production, photovoltaic, 3kWp slanted-roof installation, CdTe, laminated, integrated + reference product: electricity, low voltage + unit: kilowatt hour + 2020: + value: 0.015 + 2050: + min: 0.015 + max: 0.1250 + distribution: uniform + create copy: False + a-Si: + - name: electricity production, photovoltaic, 3kWp slanted-roof installation, a-Si, laminated, integrated + reference product: electricity, low voltage + unit: kilowatt hour + 2020: + value: 0.0010 + 2050: + min: 0.0000 + max: 0.0090 + distribution: uniform + create copy: False + Perovskite: + - name: null + reference product: null + unit: kilowatt hour + 2020: + value: 0.0000 + 2050: + min: 0.0000 + max: 0.3000 + distribution: uniform + create copy: False + GaAs: + - name: null + reference product: null + unit: kilowatt hour + 2020: + value: 0.0000 + 2050: + min: 0.0000 + max: 0.1500 + distribution: uniform + create copy: False +CSP: + Parabolic trough: + - name: electricity production, solar thermal parabolic trough, 50 MW + reference product: electricity, high voltage + unit: kilowatt hour + 2020: + value: 0.9474 + 2050: + min: 0.5 + max: 0.95 + distribution: uniform + create copy: False + Solar tower: + - name: electricity production, solar tower power plant, 20 MW + reference product: electricity, high voltage + unit: kilowatt hour + 2020: + value: 0.0526 + 2050: + min: 0.05 + max: 0.5 + distribution: uniform + create copy: False +Fuel cell - Stationary: + PEMFC: + - name: electricity, residential, by conversion of hydrogen using fuel cell, PEM, allocated by exergy, distributed by pipeline, produced by Electrolysis, PEM using electricity from grid + reference product: electricity, from residential heating system + unit: kilowatt hour + 2020: + value: 0.1053 + 2050: + min: 0.1150 + max: 0.85 + distribution: uniform + create copy: False + SOFC: + - name: electricity, residential, by conversion of hydrogen using fuel cell, SOFC, allocated by exergy, distributed by pipeline, produced by Electrolysis, PEM using electricity from grid + reference product: electricity, from residential heating system + unit: kilowatt hour + 2020: + value: 0.3684 + 2050: + min: 0.117 + max: 0.40 + distribution: uniform + create copy: False + PAFC: + - name: null + reference product: null + unit: kilowatt hour + 2020: + value: 0.5263 + 2050: + min: 0.0 + max: 0.4750 + distribution: uniform + create copy: False +Electrolyzer: + PEM: + - name: hydrogen production, gaseous, 30 bar, from PEM electrolysis, from grid electricity + reference product: hydrogen, gaseous, 30 bar + unit: kilogram + 2020: + value: 0.5 + 2050: + min: 0.5057 + max: 0.75 + distribution: uniform + create copy: False + Alkaline: + - name: hydrogen production, gaseous, 20 bar, from AEC electrolysis, from grid electricity + reference product: hydrogen, gaseous, 20 bar + unit: kilogram + 2020: + value: 0.50 + 2050: + min: 0.0 + max: 0.4943 + distribution: uniform + create copy: False + HTEL: + - name: hydrogen production, gaseous, 1 bar, from SOEC electrolysis, from grid electricity + reference product: hydrogen, gaseous, 1 bar + unit: + 2020: + value: 0.0 + 2050: + min: 0.0 + max: 0.25 + distribution: uniform + create copy: False +Battery-Mobile: + NMC111: + - name: battery cell, NMC-111 # NMC alternatives w/ hyphen: version from E. Crenna et al. (2021) + reference product: battery cell + unit: kilogram + 2020: + value: 0.25 + 2050: + min: 0.0 + max: 0.22 + distribution: uniform + create copy: False + NMC622: + - name: battery cell, NMC-622 # This is the activity currently link to the transport needs + reference product: battery cell + unit: kilogram + 2020: + value: 0.45 + 2050: + min: 0.0 + max: 0.45 + distribution: uniform + create copy: False + NMC811: + - name: battery cell, NMC-811 + reference product: battery cell + unit: kilogram + 2020: + value: 0.05 + 2050: + min: 0.0 + max: 0.75 + distribution: uniform + create copy: False + NCA: + - name: battery cell, NCA + reference product: battery cell + unit: kilogram + 2020: + value: 0.15 + 2050: + min: 0.0 + max: 0.15 + distribution: uniform + create copy: False + LFP: + - name: battery cell, LFP + reference product: battery cell + unit: kilogram + 2020: + value: 0.1 + 2050: + min: 0.0 + max: 0.1 + distribution: uniform + create copy: False + LiS: + - name: battery cell production, LiS + reference product: battery cell, LiS + unit: kilogram + 2020: + value: 0.0 + 2050: + min: 0.0 + max: 0.9 + distribution: uniform + create copy: False + LiO2: + - name: battery cell production, Li-O2 + reference product: battery cell, Li-O2 + unit: kilogram + 2020: + value: 0.0 + 2050: + min: 0.0 + max: 0.1 + distribution: uniform + create copy: False +#### ON BATTERIES, WE WILL BE WORKING WITH KILOGRAMS... FORGETTING ABOUT ENERGY DENSITIES :( +Battery-Stationary: ### Home storage uses NMC811 (w/o hyphen) in the current premise version + NMC111: + - name: market for battery cell, Li-ion, NMC111 + reference product: battery cell, Li-ion, NMC111 + unit: kilogram + 2020: + value: 0.24 + 2050: + min: 0.0 + max: 0.205 + distribution: uniform + create copy: False + NMC622: + - name: battery cell, NMC-622 # This activity is the same as before - We need to create a copy of the activity + reference product: battery cell + unit: kilogram + 2020: + value: 0.13 + 2050: + min: 0.02 + max: 0.30 + distribution: uniform + create copy: True + NMC811: + - name: market for battery cell, Li-ion, NMC811 # Connected to home storage system + reference product: battery cell, Li-ion, NMC811 + unit: kilogram + 2020: + value: 0.03 + 2050: + min: 0.039 + max: 0.44 + distribution: uniform + create copy: False + Lead-Acid: + - name: market for battery, lead acid, rechargeable, stationary + reference product: battery, lead acid, rechargeable, stationary + unit: kilogram + 2020: + value: 0.10 + 2050: + min: 0.0 + max: 0.095 + distribution: uniform + create copy: False + LFP: + - name: market for battery cell, Li-ion, LFP + reference product: battery cell, Li-ion, LFP + unit: kilogram + 2020: + value: 0.40 + 2050: + min: 0.17 + max: 0.40 + distribution: uniform + create copy: False + SIB: + - name: battery cell production, SIB, with NMMT cathode and fossil-HC anode + reference product: battery cell, SIB, with NMMT cathode and fossil-HC anode + unit: kilogram + 2020: + value: 0.05 + 2050: + min: 0.05 + max: 0.33 + distribution: uniform + create copy: False + Redox-Flow: # CHECK WITH ROMAIN: Different unit + Modeling in the LCA + - name: vanadium-redox flow battery system assembly, 8.3 megawatt hour + reference product: vanadium-redox flow battery system + unit: unit # DIFFERENT UNIT!!! + 2020: + value: 0.05 + 2050: + min: 0.0.05 + max: 0.33 + distribution: uniform + create copy: False diff --git a/pathways/lca.py b/pathways/lca.py index e553855..cbfd810 100644 --- a/pathways/lca.py +++ b/pathways/lca.py @@ -1,4 +1,5 @@ import csv +import yaml import logging from pathlib import Path from typing import Any, Dict, List, Tuple @@ -51,7 +52,6 @@ def read_indices_csv(file_path: Path) -> Dict[Tuple[str, str, str, str], str]: indices[(row[0], row[1], row[2], row[3])] = row[4] return indices - def load_matrix_and_index( file_path: Path, ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: @@ -94,41 +94,59 @@ def load_matrix_and_index( return data_array, indices_array, flip_array, distributions_array - -def get_lca_matrices( - datapackage: str, - model: str, - scenario: str, - year: int, -) -> Tuple[Datapackage, Dict, Dict]: +def get_matrix_arrays( + dirpath: Path, + matrix_type: str, +) -> list: """ - Retrieve Life Cycle Assessment (LCA) matrices from disk. + Retrieve matrix arrays from disk. ... - :rtype: Tuple[sparse.csr_matrix, sparse.csr_matrix, Dict, Dict] + :rtype: List of np.ndarrays """ - dirpath = ( - Path(datapackage).parent / "inventories" / model.lower() / scenario / str(year) + + matrix_filename = f"{matrix_type}_matrix.csv" + + data, indices, sign, distributions = load_matrix_and_index( + dirpath / matrix_filename, ) - # check that files exist - if not dirpath.exists(): - raise FileNotFoundError(f"Directory {dirpath} does not exist.") + return [data, indices, sign, distributions] + +def get_indices( + dirpath: Path, +) -> Tuple[Dict, Dict]: + """ + Build the technosphere and biosphere indices. + + ... + + :rtype: Tuple[Dict, Dict] + """ + + A_indices = read_indices_csv(dirpath / "A_matrix_index.csv") + B_indices = read_indices_csv(dirpath / "B_matrix_index.csv") + + return A_indices, B_indices - A_inds = read_indices_csv(dirpath / "A_matrix_index.csv") - B_inds = read_indices_csv(dirpath / "B_matrix_index.csv") +def get_lca_matrices( + A_arrays: list, + B_arrays: list, +) -> Datapackage: + """ + Build the technosphere and biosphere matrices from matrix arrays. + + ... + + :rtype: Tuple[sparse.csr_matrix, sparse.csr_matrix, Dict, Dict] + """ # create brightway datapackage dp = bwp.create_datapackage() - a_data, a_indices, a_sign, a_distributions = load_matrix_and_index( - dirpath / "A_matrix.csv", - ) - - b_data, b_indices, b_sign, b_distributions = load_matrix_and_index( - dirpath / "B_matrix.csv", - ) + a_data, a_indices, a_sign, a_distributions = A_arrays + b_data, b_indices, b_distributions = B_arrays dp.add_persistent_vector( matrix="technosphere_matrix", @@ -145,8 +163,103 @@ def get_lca_matrices( distributions_array=b_distributions, ) - return dp, A_inds, B_inds + return dp + +def get_subshares_matrix( + A_array: list, + indices: Dict, + year: int, +) -> Datapackage: + """ + Add subshares samples to an LCA object. + : + """ + + dp_correlated = bwp.create_datapackage() + + a_data, a_indices, a_sign, _ = A_array + + # adjusted_data = adjust_matrix_based_on_shares(data_array, indices_array, indices, year) + + + dp_correlated.add_persistent_array( + matrix='technosphere_matrix', + indices_array= a_indices, + data_array= a_data, + flip_array= a_sign, + ) + return dp_correlated + +def adjust_matrix_based_on_shares(data_array, indices_array, shares_dict, year): + """ + Adjust the technosphere matrix based on shares. + :param data_array: + :param indices_array: + :param shares_dict: + :param year: + :return: + """ + ##### RIGHT NOW I AM ONLY MULTIPLYING BY THE SHARES IN 2020 - TO-DO: ADD THE SAMPLES + # TO-DO: RETURN THE LAST ARRAY NEEDED TO BUILD THE BW.PACKAGE + + index_lookup = {(row['row'], row['col']): i for i, row in enumerate(indices_array)} + + modified_data = [] + modified_indices = [] + + # Determine unique product indices from shares_dict to identify which shouldn't be automatically updated/added + unique_product_indices_from_dict = set() + for _, regions in shares_dict.items(): + for _, techs in regions.items(): + for _, details in techs.items(): + if 'idx' in details: + unique_product_indices_from_dict.add(details['idx']) + + # Helper function to find index using the lookup dictionary + def find_index(activity_idx, product_idx): + return index_lookup.get((activity_idx, product_idx)) + + for tech_category, regions in shares_dict.items(): + for region, techs in regions.items(): + all_tech_indices = [techs[tech]['idx'] for tech in techs if techs[tech]['idx'] is not None] + all_product_indices = set() + + # Optimization: Use np.isin for efficient filtering + tech_indices = np.isin(indices_array['row'], all_tech_indices) + all_product_indices.update(indices_array['col'][tech_indices]) + + for product_idx in all_product_indices: + # Vectorized operation to calculate total_output for each product_idx + relevant_indices = [find_index(tech_idx, product_idx) for tech_idx in all_tech_indices if + find_index(tech_idx, product_idx) is not None and tech_idx != product_idx] + total_output = np.sum(data_array[relevant_indices]) + + for tech, details in techs.items(): + share = details.get(year, {}).get('value', 0) + idx = details['idx'] + if idx is None or share == 0: + continue + + # Calculate new amount + new_amount = total_output * share + index = find_index(idx, product_idx) + + # Adjust value or add new exchange + if index is not None and product_idx not in unique_product_indices_from_dict: # Exclude diagonal and undesired exchanges + data_array[index] = new_amount + # Append to modified_indices regardless of whether it's a new addition or an adjustment + modified_indices.append((idx, product_idx)) + modified_data.append(new_amount) + elif product_idx not in unique_product_indices_from_dict: # Exclude diagonal and undesired exchanges + modified_data.append(new_amount) + modified_indices.append((idx, product_idx)) + + modified_data_array = np.array(modified_data, dtype=np.float64) + modified_indices_array = np.array(modified_indices, dtype=bwp.INDICES_DTYPE) + # modified_flip_array = np.array(modified_flip, dtype=flip_array.dtype) + + return modified_data_array, modified_indices_array def fill_characterization_factors_matrices( biosphere_flows: dict, methods, biosphere_dict, debug=False @@ -192,40 +305,64 @@ def fill_characterization_factors_matrices( return matrix -def remove_double_counting( - characterized_inventory: csr_matrix, vars_info: dict, activity_idx: int -) -> csr_matrix: +# def remove_double_counting( +# characterized_inventory: csr_matrix, vars_info: dict, activity_idx: int +# ) -> csr_matrix: +# """ +# Remove double counting from a characterized inventory matrix for all activities except +# the activity being evaluated, across all methods. +# +# :param characterized_inventory: Characterized inventory matrix with rows for different methods and columns for different activities. +# :param vars_info: Dictionary with information about which indices to zero out. +# :param activity_idx: Index of the activity being evaluated, which should not be zeroed out. +# :return: Characterized inventory with double counting removed for all but the evaluated activity. +# """ +# +# print("Removing double counting") +# if isinstance(characterized_inventory, np.ndarray): +# characterized_inventory = csr_matrix(characterized_inventory) +# elif not isinstance(characterized_inventory, csr_matrix): +# raise TypeError( +# "characterized_inventory must be a csr_matrix or a numpy array." +# ) +# +# # Gather all indices for which we want to avoid double counting, except the evaluated activity +# list_of_idx_to_remove = [] +# for region in vars_info: +# for variable in vars_info[region]: +# idx = vars_info[region][variable] +# if idx != activity_idx: +# list_of_idx_to_remove.append(idx) +# +# # Convert to lil_matrix for more efficient element-wise operations - CHECK IF THIS IS ACTUALLY FASTER +# characterized_inventory = characterized_inventory.tolil() +# +# # Zero out the specified indices for all methods, except for the activity being evaluated +# for idx in list_of_idx_to_remove: +# characterized_inventory[:, idx] = 0 +# +# return characterized_inventory.tocsr() + +def remove_double_counting(A: csr_matrix, vars_info: dict) -> csr_matrix: """ - Remove double counting from a characterized inventory matrix for all activities except - the activity being evaluated, across all methods. - - :param characterized_inventory: Characterized inventory matrix with rows for different methods and columns for different activities. + Remove double counting from a technosphere matrix. + :param A: Technosphere matrix :param vars_info: Dictionary with information about which indices to zero out. - :param activity_idx: Index of the activity being evaluated, which should not be zeroed out. - :return: Characterized inventory with double counting removed for all but the evaluated activity. + :return: Technosphere matrix with double counting removed. """ - print("Removing double counting") - if isinstance(characterized_inventory, np.ndarray): - characterized_inventory = csr_matrix(characterized_inventory) - elif not isinstance(characterized_inventory, csr_matrix): - raise TypeError( - "characterized_inventory must be a csr_matrix or a numpy array." - ) + A_coo = A.tocoo() + + list_of_idx = [] - # Gather all indices for which we want to avoid double counting, except the evaluated activity - list_of_idx_to_remove = [] for region in vars_info: for variable in vars_info[region]: - idx = vars_info[region][variable] - if idx != activity_idx: - list_of_idx_to_remove.append(idx) - - # Convert to lil_matrix for more efficient element-wise operations - CHECK IF THIS IS ACTUALLY FASTER - characterized_inventory = characterized_inventory.tolil() - - # Zero out the specified indices for all methods, except for the activity being evaluated - for idx in list_of_idx_to_remove: - characterized_inventory[:, idx] = 0 - - return characterized_inventory.tocsr() + idx = vars_info[region][variable]["idx"] + if idx not in list_of_idx: + list_of_idx.append(idx) + row_mask = np.isin(A_coo.row, idx) + col_mask = np.isin(A_coo.col, idx) + A_coo.data[row_mask & ~col_mask] = 0 + + A_coo.eliminate_zeros() + return A_coo.tocsr() \ No newline at end of file diff --git a/pathways/pathways.py b/pathways/pathways.py index a545850..9cdc765 100644 --- a/pathways/pathways.py +++ b/pathways/pathways.py @@ -28,6 +28,9 @@ from .lca import ( fill_characterization_factors_matrices, get_lca_matrices, + get_matrix_arrays, + get_indices, + # get_subshares_matrix, remove_double_counting, ) from .lcia import get_lcia_method_names @@ -38,9 +41,11 @@ display_results, get_unit_conversion_factors, harmonize_units, + load_subshares, load_classifications, load_numpy_array_from_disk, load_units_conversion, + get_dirpath ) warnings.filterwarnings("ignore") @@ -127,14 +132,61 @@ def resize_scenario_data( return scenario_data +def subshares_indices(regions, A_index, geo): + """ + Fetch the indices in the technosphere matrix from the activities in technologies_shares.yaml in + the given regions. + :param regions: List of regions + :param A_index: Dictionary with the indices of the activities in the technosphere matrix. + :param geo: Geomap object. + :return: dictionary of technology categories and their indices. + """ + technologies_dict = load_subshares() + + indices_dict = {} + + for region in regions: + for tech_category, techs in technologies_dict.items(): + if tech_category not in indices_dict: + indices_dict[tech_category] = {} + for tech_type, tech_list in techs.items(): + for tech in tech_list: + name = tech.get("name") + ref_prod = tech.get("reference product") + unit = tech.get("unit") + activity = (name, ref_prod, unit, region) + activity_index = _get_activity_indices([activity], A_index, geo)[0] + + value_2020 = tech.get(2020, {}).get("value") + min_2050 = tech.get(2050, {}).get("min") + max_2050 = tech.get(2050, {}).get("max") + distribution_2050 = tech.get(2050, {}).get("distribution") + + if region not in indices_dict[tech_category]: + indices_dict[tech_category][region] = {} + indices_dict[tech_category][region][tech_type] = { + 'idx': activity_index, + 2020: { + "value": value_2020 + }, + 2050: { + "min": min_2050, + "max": max_2050, + "distribution": distribution_2050 + }, + } + + return indices_dict def fetch_indices(mapping, regions, variables, A_index, geo): """ Fetch the indices for the given activities in the technosphere matrix. - :param variables: - :param A_index: - :param geo: + :param mapping: Mapping between scenario variables and LCA datasets. + :param regions: List of regions. + :param variables: List of variables. + :param A_index: Dictionary with the indices of the activities in the technosphere matrix. + :param geo: Geomap object. :return: """ @@ -184,6 +236,7 @@ def fetch_indices(mapping, regions, variables, A_index, geo): return vars_idx + def fetch_inventories_locations(A_index: Dict[str, Tuple[str, str, str]]) -> List[str]: """ Fetch the locations of the inventories. @@ -318,7 +371,6 @@ def process_region(data: Tuple) -> dict[str, ndarray[Any, dtype[Any]] | list[int characterized_inventory = ( characterization_matrix @ lca.inventory ).toarray() - else: # Use distributions for LCA calculations # next(lca) is a generator that yields the inventory matrix @@ -380,6 +432,7 @@ def _calculate_year(args): reverse_classifications, debug, use_distributions, + subshares ) = args print(f"------ Calculating LCA results for {year}...") @@ -394,22 +447,21 @@ def _calculate_year(args): # Try to load LCA matrices for the given model, scenario, and year try: - bw_datapackage, technosphere_indices, biosphere_indices = get_lca_matrices( - datapackage, model, scenario, year - ) - + dirpath = get_dirpath(datapackage, model, scenario, year) except FileNotFoundError: # If LCA matrices can't be loaded, skip to the next iteration if debug: logging.warning(f"Skipping {model}, {scenario}, {year}, as data not found.") return + A_arrays = get_matrix_arrays(dirpath, matrix_type="A") + B_arrays = get_matrix_arrays(dirpath, matrix_type="B") + technosphere_indices, biosphere_indices = get_indices(dirpath) + bw_datapackage = get_lca_matrices(A_arrays, B_arrays) + # Fetch indices vars_info = fetch_indices(mapping, regions, variables, technosphere_indices, geo) - # Remove contribution from activities in other activities - # A = remove_double_counting(A, vars_info) - # check unclassified activities missing_classifications = check_unclassified_activities( technosphere_indices, classifications @@ -479,7 +531,7 @@ def _calculate_year(args): ], ) lca.lci(factorize=True) - else: + elif use_distributions != 0 and subshares == False: lca = MonteCarloLCA( demand={0: 1}, data_objs=[ @@ -488,6 +540,22 @@ def _calculate_year(args): use_distributions=True, ) lca.lci() + else: + + subshares_indices = subshares_indices(regions, technosphere_indices, geo) + + bw_correlated = get_subshares_matrix(A_arrays, subshares_indices, year) + + lca = MonteCarloLCA( + demand={0: 1}, + data_objs=[ + bw_datapackage, bw_correlated + ], + use_distributions=True, + use_arrays=True, + ) + lca.lci() + characterization_matrix = fill_characterization_factors_matrices( biosphere_indices, methods, lca.dicts.biosphere, debug @@ -725,6 +793,7 @@ def calculate( multiprocessing: bool = False, demand_cutoff: float = 1e-3, use_distributions: int = 0, + subshares: bool = False, ) -> None: """ Calculate Life Cycle Assessment (LCA) results for given methods, models, scenarios, regions, and years. @@ -850,6 +919,7 @@ def calculate( self.reverse_classifications, self.debug, use_distributions, + subshares ) for year in years ] @@ -884,6 +954,7 @@ def calculate( self.reverse_classifications, self.debug, use_distributions, + subshares ) ) for year in years @@ -892,6 +963,9 @@ def calculate( # remove None values in results results = {k: v for k, v in results.items() if v is not None} + test = subshares_indices(regions, technosphere_index, Geomap(model=model)) + print(test) + self.fill_in_result_array(results) def fill_in_result_array(self, results: dict): diff --git a/pathways/utils.py b/pathways/utils.py index 347dbf0..25e0818 100644 --- a/pathways/utils.py +++ b/pathways/utils.py @@ -4,13 +4,36 @@ import numpy as np import xarray as xr import yaml +import math from .filesystem_constants import DATA_DIR, DIR_CACHED_DB CLASSIFICATIONS = DATA_DIR / "activities_classifications.yaml" UNITS_CONVERSION = DATA_DIR / "units_conversion.yaml" +SUBSHARES = DATA_DIR / "technologies_shares.yaml" +def get_dirpath( + datapackage: str, + model: str, + scenario: str, + year: int, +) -> Path: + """ + Get the directory path for a specific year. + + ... + + :rtype: Path + """ + + dirpath = Path(datapackage).parent / "inventories" / model.lower() / scenario / str(year) + + if not dirpath.exists(): + raise FileNotFoundError(f"Directory {dirpath} does not exist.") + + return dirpath + def load_classifications(): """Load the activities classifications.""" @@ -279,6 +302,58 @@ def load_numpy_array_from_disk(filepath): return np.load(filepath, allow_pickle=True) +def load_subshares() -> dict: + """ + Load a YAML file and return its content as a Python dictionary. + :param filepath: Path to the YAML file. + :return: A dictionary with the categories, technologies and market shares data. + """ + with open(SUBSHARES, "r") as stream: + data = yaml.safe_load(stream) + + adjust_subshares(data) + return data + +def adjust_subshares(data): + """ + Adjust the subshares data to ensure that the sum of the 2020 values is equal to 1, after neglecting the technologies + with no name. + :param data: Dictionary with the categories, technologies and market shares data. + :return: Adjusted dictionary. + """ + for category, technologies in data.items(): + + # Initialize totals + total_2020_value = 0 + total_adjustable_value = 0 + + # First pass to calculate totals + for subcategory, tech_list in technologies.items(): + for tech in tech_list: + if 2020 in tech: + value = tech[2020].get('value', 0) + total_2020_value += value + if tech.get('name') is not None: + total_adjustable_value += value + + # Skip adjustment if no values or all values are named + if total_2020_value == 0 or total_adjustable_value == 0: + continue + + adjustment_factor = total_2020_value / total_adjustable_value + + # Second pass to adjust values + adjusted_total = 0 + for subcategory, tech_list in technologies.items(): + for tech in tech_list: + if 2020 in tech and tech.get('name') is not None: + tech[2020]['value'] = tech[2020]['value'] * adjustment_factor + adjusted_total += tech[2020]['value'] + + # Check if the adjusted total is close to 1.00, allowing a small margin for floating-point arithmetic + if not math.isclose(adjusted_total, 1.00, rel_tol=1e-9): + print( + f"Warning: Total of adjusted '2020' values in category '{category}' does not add up to 1.00 (Total: {adjusted_total})") def get_visible_files(path): return [file for file in Path(path).iterdir() if not file.name.startswith(".")]