diff --git a/dev/generate datapackages.ipynb b/dev/generate datapackages.ipynb index c4b927d..4244c4d 100644 --- a/dev/generate datapackages.ipynb +++ b/dev/generate datapackages.ipynb @@ -198,7 +198,29 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, + "id": "54d3a225-e170-45ca-bb99-2cef313b7bdc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "PosixPath('/Users/romain/Library/Application Support/pathways')" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from pathways.filesystem_constants import USER_DATA_BASE_DIR\n", + "USER_DATA_BASE_DIR" + ] + }, + { + "cell_type": "code", + "execution_count": 1, "id": "9d57850c-04fa-4ef7-89e8-3e9de62f92e3", "metadata": {}, "outputs": [ @@ -219,27 +241,60 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "a081c764-a942-4627-9197-fcd373e98b62", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Calculating LCA results for image...\n", + "--- Calculating LCA results for SSP2-RCP19...\n", + "------ Calculating LCA results for 2005...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "0% [##########################] 100% | ETA: 00:00:00\n", + "Total time elapsed: 00:00:15\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------ Calculating LCA results for 2010...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "0% [##########################] 100% | ETA: 00:00:00\n", + "Total time elapsed: 00:00:12\n", + "0% [##] 100% | ETA: 00:00:00\n", + "Total time elapsed: 00:00:44\n" + ] + } + ], "source": [ "import numpy as np\n", - "dp = p.calculate(\n", + "p.calculate(\n", " methods=[\n", " 'EF v3.1 - climate change - global warming potential (GWP100)',\n", " #'RELICS - metals extraction - Lithium',\n", " ],\n", " regions=[r for r in p.scenarios.coords[\"region\"].values if r!=\"World\"],\n", - " #regions=[\"WEU\",],\n", " scenarios=[\"SSP2-RCP19\",],\n", " variables=[\n", " v for v in p.scenarios.coords[\"variables\"].values\n", " if any(i in v for i in [\"Industry\", \"Transport\", \"Heating\"])\n", " ],\n", - " years=[2005, 2010, 2020],\n", + " years=[2005, 2010],\n", " characterization=True,\n", - " #flows=[\"Lithium - natural resource - in ground - kilogram\",],\n", " multiprocessing=False,\n", " demand_cutoff=0.01,\n", ")" @@ -247,33 +302,168 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "c9786349-3b91-4439-8494-208ba7bc3b4a", + "execution_count": 3, + "id": "162124db-232b-494e-a8ef-8b90d1b87411", "metadata": {}, "outputs": [], "source": [ - "import bw2calc as bc\n", - "import bw_processing as bwp" + "df = p.lca_results.sum(dim=\"act_category\").to_dataframe(\"value\")" ] }, { "cell_type": "code", "execution_count": 4, - "id": "dedf1bb9-47a2-497c-b788-7fa64986827a", + "id": "cb52417b-e7a3-46ed-bbe6-8ff1d1607231", "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/romain/anaconda3/envs/pathways/lib/python3.11/site-packages/scikits/umfpack/umfpack.py:736: UmfpackWarning: (almost) singular matrix! (estimated cond. number: 1.21e+13)\n", - " warnings.warn(msg, UmfpackWarning)\n" - ] - }, { "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
value
variableyearregionlocationmodelscenarioimpact_category
Industry_Food and Tobacco_Electricity2005BRACN-NMimageSSP2-RCP19EF v3.1 - climate change - global warming potential (GWP100)5.463954e+06
BOimageSSP2-RCP19EF v3.1 - climate change - global warming potential (GWP100)1.244211e+04
HRimageSSP2-RCP19EF v3.1 - climate change - global warming potential (GWP100)1.482432e+04
Canada without QuebecimageSSP2-RCP19EF v3.1 - climate change - global warming potential (GWP100)2.628692e+01
IN-CTimageSSP2-RCP19EF v3.1 - climate change - global warming potential (GWP100)3.280474e+05
........................
Transport_Freight (train)_Liquid fossil2010WEUBR-PIimageSSP2-RCP19EF v3.1 - climate change - global warming potential (GWP100)0.000000e+00
JAPimageSSP2-RCP19EF v3.1 - climate change - global warming potential (GWP100)0.000000e+00
BR-ESimageSSP2-RCP19EF v3.1 - climate change - global warming potential (GWP100)0.000000e+00
BR-SPimageSSP2-RCP19EF v3.1 - climate change - global warming potential (GWP100)0.000000e+00
CLimageSSP2-RCP19EF v3.1 - climate change - global warming potential (GWP100)0.000000e+00
\n", + "

1810796 rows × 1 columns

\n", + "
" + ], "text/plain": [ - "20.305023566997583" + " value\n", + "variable year region location model scenario impact_category \n", + "Industry_Food and Tobacco_Electricity 2005 BRA CN-NM image SSP2-RCP19 EF v3.1 - climate change - global warming poten... 5.463954e+06\n", + " BO image SSP2-RCP19 EF v3.1 - climate change - global warming poten... 1.244211e+04\n", + " HR image SSP2-RCP19 EF v3.1 - climate change - global warming poten... 1.482432e+04\n", + " Canada without Quebec image SSP2-RCP19 EF v3.1 - climate change - global warming poten... 2.628692e+01\n", + " IN-CT image SSP2-RCP19 EF v3.1 - climate change - global warming poten... 3.280474e+05\n", + "... ...\n", + "Transport_Freight (train)_Liquid fossil 2010 WEU BR-PI image SSP2-RCP19 EF v3.1 - climate change - global warming poten... 0.000000e+00\n", + " JAP image SSP2-RCP19 EF v3.1 - climate change - global warming poten... 0.000000e+00\n", + " BR-ES image SSP2-RCP19 EF v3.1 - climate change - global warming poten... 0.000000e+00\n", + " BR-SP image SSP2-RCP19 EF v3.1 - climate change - global warming poten... 0.000000e+00\n", + " CL image SSP2-RCP19 EF v3.1 - climate change - global warming poten... 0.000000e+00\n", + "\n", + "[1810796 rows x 1 columns]" ] }, "execution_count": 4, @@ -282,248 +472,22 @@ } ], "source": [ - "lca = bc.LCA(\n", - " demand={1329: 1},\n", - " data_objs=[dp],\n", - ")\n", - "lca.lci()\n", - "lca.lcia()\n", - "lca.score" + "df" ] }, { "cell_type": "code", "execution_count": 5, - "id": "f501423c-de64-4604-9409-2a40c77c32bb", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[array([( 0, 0), ( 0, 11778), ( 0, 17244), ...,\n", - " (36540, 21436), (36540, 19141), (36540, 24252)],\n", - " dtype=[('row', 'Industry_Food and Tobacco_Electricity\n", " 2005\n", " BRA\n", - " Europe, without Russia and Turkey\n", + " CN-NM\n", " image\n", " SSP2-RCP19\n", " EF v3.1 - climate change - global warming pote...\n", - " 1.043825e+06\n", + " 5.463954e+06\n", " \n", " \n", " 1\n", " Industry_Food and Tobacco_Electricity\n", " 2005\n", " BRA\n", - " TZ\n", + " BO\n", " image\n", " SSP2-RCP19\n", " EF v3.1 - climate change - global warming pote...\n", - " 1.149504e+06\n", + " 1.244211e+04\n", " \n", " \n", " 2\n", " Industry_Food and Tobacco_Electricity\n", " 2005\n", " BRA\n", - " IN-DL\n", + " HR\n", " image\n", " SSP2-RCP19\n", " EF v3.1 - climate change - global warming pote...\n", - " 1.264347e+04\n", + " 1.482432e+04\n", " \n", " \n", " 3\n", " Industry_Food and Tobacco_Electricity\n", " 2005\n", " BRA\n", - " BR-TO\n", + " Canada without Quebec\n", " image\n", " SSP2-RCP19\n", " EF v3.1 - climate change - global warming pote...\n", - " 1.652670e+02\n", + " 2.628692e+01\n", " \n", " \n", " 4\n", " Industry_Food and Tobacco_Electricity\n", " 2005\n", " BRA\n", - " MR\n", + " IN-CT\n", " image\n", " SSP2-RCP19\n", " EF v3.1 - climate change - global warming pote...\n", - " 1.881197e+03\n", + " 3.280474e+05\n", " \n", " \n", "\n", "" ], "text/plain": [ - " variable year region \\\n", - "0 Industry_Food and Tobacco_Electricity 2005 BRA \n", - "1 Industry_Food and Tobacco_Electricity 2005 BRA \n", - "2 Industry_Food and Tobacco_Electricity 2005 BRA \n", - "3 Industry_Food and Tobacco_Electricity 2005 BRA \n", - "4 Industry_Food and Tobacco_Electricity 2005 BRA \n", - "\n", - " location model scenario \\\n", - "0 Europe, without Russia and Turkey image SSP2-RCP19 \n", - "1 TZ image SSP2-RCP19 \n", - "2 IN-DL image SSP2-RCP19 \n", - "3 BR-TO image SSP2-RCP19 \n", - "4 MR image SSP2-RCP19 \n", - "\n", - " impact_category value \n", - "0 EF v3.1 - climate change - global warming pote... 1.043825e+06 \n", - "1 EF v3.1 - climate change - global warming pote... 1.149504e+06 \n", - "2 EF v3.1 - climate change - global warming pote... 1.264347e+04 \n", - "3 EF v3.1 - climate change - global warming pote... 1.652670e+02 \n", - "4 EF v3.1 - climate change - global warming pote... 1.881197e+03 " + " variable year region location \\\n", + "0 Industry_Food and Tobacco_Electricity 2005 BRA CN-NM \n", + "1 Industry_Food and Tobacco_Electricity 2005 BRA BO \n", + "2 Industry_Food and Tobacco_Electricity 2005 BRA HR \n", + "3 Industry_Food and Tobacco_Electricity 2005 BRA Canada without Quebec \n", + "4 Industry_Food and Tobacco_Electricity 2005 BRA IN-CT \n", + "\n", + " model scenario impact_category \\\n", + "0 image SSP2-RCP19 EF v3.1 - climate change - global warming pote... \n", + "1 image SSP2-RCP19 EF v3.1 - climate change - global warming pote... \n", + "2 image SSP2-RCP19 EF v3.1 - climate change - global warming pote... \n", + "3 image SSP2-RCP19 EF v3.1 - climate change - global warming pote... \n", + "4 image SSP2-RCP19 EF v3.1 - climate change - global warming pote... \n", + "\n", + " value \n", + "0 5.463954e+06 \n", + "1 1.244211e+04 \n", + "2 1.482432e+04 \n", + "3 2.628692e+01 \n", + "4 3.280474e+05 " ] }, - "execution_count": 153, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -705,25 +669,17 @@ }, { "cell_type": "code", - "execution_count": 154, + "execution_count": 10, "id": "5f2c4b3b-b26c-47ee-8c36-582feb027f58", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "array([2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015,\n", - " 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024, 2025, 2026,\n", - " 2027, 2028, 2029, 2030, 2031, 2032, 2033, 2034, 2035, 2036, 2037,\n", - " 2038, 2039, 2040, 2041, 2042, 2043, 2044, 2045, 2046, 2047, 2048,\n", - " 2049, 2050, 2051, 2052, 2053, 2054, 2055, 2056, 2057, 2058, 2059,\n", - " 2060, 2061, 2062, 2063, 2064, 2065, 2066, 2067, 2068, 2069, 2070,\n", - " 2071, 2072, 2073, 2074, 2075, 2076, 2077, 2078, 2079, 2080, 2081,\n", - " 2082, 2083, 2084, 2085, 2086, 2087, 2088, 2089, 2090, 2091, 2092,\n", - " 2093, 2094, 2095, 2096, 2097, 2098, 2099, 2100])" + "array([2005, 2010])" ] }, - "execution_count": 154, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -734,7 +690,7 @@ }, { "cell_type": "code", - "execution_count": 155, + "execution_count": 11, "id": "1de1d921-4156-46b8-9113-f94214320f50", "metadata": {}, "outputs": [], @@ -745,7 +701,7 @@ }, { "cell_type": "code", - "execution_count": 156, + "execution_count": 12, "id": "57b097ed-0108-4d62-8aaf-0a3d7c76fc48", "metadata": {}, "outputs": [], @@ -758,7 +714,7 @@ }, { "cell_type": "code", - "execution_count": 157, + "execution_count": 13, "id": "c0b92aed-c162-4f5a-b200-512452d5157e", "metadata": {}, "outputs": [], @@ -768,7 +724,7 @@ }, { "cell_type": "code", - "execution_count": 158, + "execution_count": 14, "id": "829eef08-5bc8-4ee3-8d51-fd55e2beb2b0", "metadata": {}, "outputs": [], @@ -778,7 +734,7 @@ }, { "cell_type": "code", - "execution_count": 159, + "execution_count": 15, "id": "11c1861f-1a2b-49f6-a1be-94680d5a6b30", "metadata": {}, "outputs": [], @@ -790,17 +746,17 @@ }, { "cell_type": "code", - "execution_count": 160, + "execution_count": 16, "id": "9acd0483-a477-4250-a28a-aa0d2f510509", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "125945" + "23544" ] }, - "execution_count": 160, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -811,7 +767,7 @@ }, { "cell_type": "code", - "execution_count": 161, + "execution_count": 39, "id": "5b5dde7a-3eb9-4f1d-997e-3c6310e76580", "metadata": {}, "outputs": [], @@ -822,17 +778,17 @@ }, { "cell_type": "code", - "execution_count": 162, + "execution_count": 40, "id": "3361c4c8-c006-4f09-85f4-9d50c10f327f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "125945" + "11745" ] }, - "execution_count": 162, + "execution_count": 40, "metadata": {}, "output_type": "execute_result" } @@ -843,7 +799,7 @@ }, { "cell_type": "code", - "execution_count": 163, + "execution_count": 42, "id": "87646c30-91c9-4dcf-b4a0-a52471634580", "metadata": {}, "outputs": [ @@ -862,10 +818,10 @@ " " ], "text/plain": [ - "" + "" ] }, - "execution_count": 163, + "execution_count": 42, "metadata": {}, "output_type": "execute_result" } diff --git a/dev/timing.py b/dev/timing.py index 4878838..8090d50 100644 --- a/dev/timing.py +++ b/dev/timing.py @@ -13,7 +13,7 @@ ], regions=[ "WEU", - "USA", + #"USA", ], scenarios=[scenario], years=[ diff --git a/pathways/__init__.py b/pathways/__init__.py index 4e3b96d..6d5860f 100644 --- a/pathways/__init__.py +++ b/pathways/__init__.py @@ -1,8 +1,5 @@ -from pathlib import Path - __version__ = (0, 0, 1) -__all__ = ("__version__", "DATA_DIR", "Pathways") +__all__ = ("__version__", "Pathways") -DATA_DIR = Path(__file__).resolve().parent / "data" from .pathways import Pathways diff --git a/pathways/data_validation.py b/pathways/data_validation.py index d084db8..6c727cb 100644 --- a/pathways/data_validation.py +++ b/pathways/data_validation.py @@ -57,10 +57,10 @@ def validate_datapackage(datapackage: datapackage.DataPackage): dataframe = pd.DataFrame(data, columns=headers) # Check that the scenario data is valid - validate_scenario_data(dataframe) + #validate_scenario_data(dataframe) # Check that the mapping is valid - validate_mapping(datapackage.get_resource("mapping"), dataframe) + #validate_mapping(datapackage.get_resource("mapping"), dataframe) # Check that the LCA data is valid # validate_lca_data(datapackage) diff --git a/pathways/filesystem_constants.py b/pathways/filesystem_constants.py new file mode 100644 index 0000000..d17411b --- /dev/null +++ b/pathways/filesystem_constants.py @@ -0,0 +1,19 @@ +""" +This module contains constants for the filesystem paths used by Pathways. +""" + +from pathlib import Path +import platformdirs + +# Directories for data which comes with Pathways +DATA_DIR = Path(__file__).resolve().parent / "data" + +# Directories for user-created data +USER_DATA_BASE_DIR = platformdirs.user_data_path(appname="pathways", appauthor="pylca") +USER_DATA_BASE_DIR.mkdir(parents=True, exist_ok=True) + +DIR_CACHED_DB = USER_DATA_BASE_DIR / "cache" +DIR_CACHED_DB.mkdir(parents=True, exist_ok=True) + +USER_LOGS_DIR = platformdirs.user_log_path(appname="pathways", appauthor="pylca") +USER_LOGS_DIR.mkdir(parents=True, exist_ok=True) diff --git a/pathways/lca.py b/pathways/lca.py index e03ff6a..7c186dd 100644 --- a/pathways/lca.py +++ b/pathways/lca.py @@ -1,12 +1,9 @@ import csv -import warnings from pathlib import Path -from typing import Dict, List, Optional, Tuple +from typing import Dict, Tuple import bw_processing as bwp import numpy as np -import scipy.sparse -import xarray as xr from scipy import sparse from scipy.sparse import csr_matrix @@ -24,44 +21,6 @@ print("Solver: scikits.umfpack") -def create_demand_vector( - activities_idx: List[int], - A: scipy.sparse, - demand: xr.DataArray, - unit_conversion: np.ndarray, -) -> np.ndarray: - """ - Create a demand vector with the given activities indices, sparse matrix A, demand values, and unit conversion factors. - - This function multiplies the given demand with the unit conversion factors and assigns the result to the positions in - the vector corresponding to the activities indices. All other positions in the vector are set to zero. - - :param activities_idx: Indices of activities for which to create demand. These indices correspond to positions in the vector and matrix A. - :type activities_idx: List[int] - - :param A: Sparse matrix used to determine the size of the demand vector. - :type A: scipy.sparse.csr_matrix - - :param demand: Demand values for the activities, provided as a DataArray from the xarray package. - :type demand: xr.DataArray - - :param unit_conversion: Unit conversion factors corresponding to each activity in activities_idx. - :type unit_conversion: numpy.ndarray - - :return: The demand vector, represented as a 1-dimensional numpy array. - :rtype: numpy.ndarray - """ - - # Initialize the demand vector with zeros, with length equal to the number of rows in A - f = np.zeros(A.shape[0]) - - # Assign demand values to the positions in the vector corresponding to the activities indices - # Demand values are converted to the appropriate units before assignment - f[activities_idx] = float(demand) * float(unit_conversion) - - return f - - def read_indices_csv(file_path: Path) -> Dict[Tuple[str, str, str, str], str]: """ Reads a CSV file and returns its contents as a dictionary. @@ -229,49 +188,3 @@ def remove_double_counting(A: csr_matrix, vars_info: dict) -> csr_matrix: A_coo.eliminate_zeros() return A_coo.tocsr() - - -def characterize_inventory(C, lcia_matrix) -> np.ndarray: - """ - Characterize an inventory with an LCIA matrix. - :param C: Solved inventory - :param lcia_matrix: Characterization matrix - :return: Characterized inventory - """ - - # Multiply C with lcia_matrix to get D - return C[..., None] * lcia_matrix - - -def solve_inventory(A: csr_matrix, B: np.ndarray, f: np.ndarray) -> np.ndarray: - """ - Solve the inventory problem for a set of activities, given technosphere and biosphere matrices, demand vector, - LCIA matrix, and the indices of activities to consider. - - This function uses either the pypardiso or scipy library to solve the linear system, depending on the availability - of the pypardiso library. The solutions are then used to calculate LCIA scores. - - ... - - :rtype: numpy.ndarray - - """ - - if A.shape[0] != A.shape[1]: - raise ValueError("A must be a square matrix") - - if A.shape[0] != f.size: - raise ValueError("Incompatible dimensions between A and f") - - if B.shape[0] != A.shape[0]: - raise ValueError("Incompatible dimensions between A and B") - - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - # Solve the system Ax = f for x using sparse solver - A_inv = spsolve(A, f)[:, np.newaxis] - - # Compute product of A_inv and B - C = A_inv * B - - return C diff --git a/pathways/lcia.py b/pathways/lcia.py index 712570b..085e4f3 100644 --- a/pathways/lcia.py +++ b/pathways/lcia.py @@ -1,10 +1,6 @@ import json -import numpy as np -import xarray as xr -from scipy import sparse - -from . import DATA_DIR +from .filesystem_constants import DATA_DIR LCIA_METHODS = DATA_DIR / "lcia_data.json" diff --git a/pathways/pathways.py b/pathways/pathways.py index 94c416e..8f7620e 100644 --- a/pathways/pathways.py +++ b/pathways/pathways.py @@ -9,6 +9,7 @@ from multiprocessing import Pool, cpu_count from pathlib import Path from typing import Any, Dict, List, Optional, Tuple, Union +import uuid import bw2calc as bc import numpy as np @@ -19,16 +20,12 @@ from datapackage import DataPackage from numpy import dtype, ndarray from premise.geomap import Geomap -from scipy import sparse -from . import DATA_DIR +from .filesystem_constants import DATA_DIR, DIR_CACHED_DB from .data_validation import validate_datapackage from .lca import ( - characterize_inventory, - create_demand_vector, get_lca_matrices, remove_double_counting, - solve_inventory, ) from .lcia import get_lcia_method_names from .utils import ( @@ -39,6 +36,7 @@ harmonize_units, load_classifications, load_units_conversion, + load_numpy_array_from_disk ) @@ -84,12 +82,12 @@ def get_visible_files(path): def resize_scenario_data( - scenario_data: xr.DataArray, - model: List[str], - scenario: List[str], - region: List[str], - year: List[int], - variables: List[str], + scenario_data: xr.DataArray, + model: List[str], + scenario: List[str], + region: List[str], + year: List[int], + variables: List[str], ): """ Resize the scenario data to the given scenario, year, region, and variables. @@ -188,13 +186,14 @@ def fetch_inventories_locations(A_index: Dict[str, Tuple[str, str, str]]) -> Lis return list(set([act[3] for act in A_index])) -def generate_A_indices(A_index, reverse_classifications, lca_results_coords): +def group_technosphere_indices_by_category(technosphere_index, reverse_classifications, lca_results_coords) -> Tuple: # Generate a list of activity indices for each activity category acts_idx = [] + acts_dict = {} for cat in lca_results_coords["act_category"].values: - acts_idx.append( - [int(A_index[a]) for a in reverse_classifications[cat] if a in A_index] - ) + x = [int(technosphere_index[a]) for a in reverse_classifications[cat] if a in technosphere_index] + acts_idx.append(x) + acts_dict[cat] = x # Pad each list in acts_idx with -1 to make them all the same length max_len = max([len(x) for x in acts_idx]) @@ -203,11 +202,41 @@ def generate_A_indices(A_index, reverse_classifications, lca_results_coords): ) # Swap the axes of acts_idx to align with the dimensionality of D - return np.swapaxes(acts_idx, 0, 1) + return acts_idx, acts_dict, np.swapaxes(acts_idx, 0, 1) +def group_technosphere_indices_by_location(technosphere_index, locations) -> Tuple: + """ + Group the technosphere indices by location. + :param technosphere_index: + :param locations: + :return: + """ + + act_indices_list = [] + act_indices_dict = {} + for loc in locations: + x = [ + int(technosphere_index[a]) + for a in technosphere_index + if a[-1] == loc + ] + + act_indices_list.append(x) + act_indices_dict[loc] = x + + # Pad each list in act_indices_list with -1 to make them all the same length + max_len = max([len(x) for x in act_indices_list]) + act_indices_array = np.array( + [ + np.pad(x, (0, max_len - len(x)), constant_values=-1) + for x in act_indices_list + ] + ) + + return act_indices_list, act_indices_dict, act_indices_array -def process_region(data: Tuple) -> dict[str, ndarray[Any, dtype[Any]] | list[int]]: +def process_region(data: Tuple) -> dict[str, ndarray[Any, dtype[Any]] | list[int]]: ( model, scenario, @@ -255,13 +284,13 @@ def process_region(data: Tuple) -> dict[str, ndarray[Any, dtype[Any]] | list[int # If the total demand is zero, return None if ( - demand - / scenarios.sel( - region=region, - model=model, - pathway=scenario, - year=year, - ).sum(dim="variables") + demand + / scenarios.sel( + region=region, + model=model, + pathway=scenario, + year=year, + ).sum(dim="variables") ) < demand_cutoff: continue @@ -276,19 +305,20 @@ def process_region(data: Tuple) -> dict[str, ndarray[Any, dtype[Any]] | list[int d = [] for f, fu in enumerate(FU): lca.lcia(fu) - d.append(lca.supply_array) + d.append(lca.characterized_inventory.sum(axis=0)) + + id_array = uuid.uuid4() + np.save(file=DIR_CACHED_DB / f"{id_array}.npy", arr=np.vstack(d).T) # concatenate the list of sparse matrices and # add a third dimension and concatenate along it return { - "data": np.column_stack(d), - "variables": variables_demand, - "variables_idx": variables_idx, + "id_array": id_array, + "variables": {v: idx for v, idx in zip(variables_demand, variables_idx)}, } def _calculate_year(args): - ( model, scenario, @@ -338,9 +368,42 @@ def _calculate_year(args): if act not in classifications: missing_class.append(list(act)) + results = {} + locations = lca_results.coords["location"].values.tolist() location_to_index = {location: index for index, location in enumerate(locations)} - rev_technosphere_index = {int(v): k for k, v in technosphere_indices.items()} + reverse_technosphere_index = {int(v): k for k, v in technosphere_indices.items()} + loc_idx = np.array( + [ + location_to_index[act[-1]] + for act in reverse_technosphere_index.values() + if act[-1] in locations + ] + ) + + acts_category_idx_list, acts_category_idx_dict, acts_category_idx_array = group_technosphere_indices_by_category( + technosphere_indices, + reverse_classifications, + lca_results.coords, + ) + + acts_location_idx_list, acts_location_idx_dict, acts_location_idx_array = group_technosphere_indices_by_location( + technosphere_indices, + locations, + ) + + results["other"] = { + "locations": locations, + "location_to_index": location_to_index, + "reverse_technosphere_index": reverse_technosphere_index, + "activity_location_idx": loc_idx, + "acts_category_idx_list": acts_category_idx_list, + "acts_category_idx_dict": acts_category_idx_dict, + "acts_category_idx_array": acts_category_idx_array, + "acts_location_idx_list": acts_location_idx_list, + "acts_location_idx_dict": acts_location_idx_dict, + "acts_location_idx_array": acts_location_idx_array, + } lca = bc.LCA( demand={0: 1}, @@ -350,11 +413,8 @@ def _calculate_year(args): ) lca.lci(factorize=True) - results = {} - # use pyprind bar = pyprind.ProgBar(len(regions)) for region in regions: - print(region) bar.update() # Iterate over each region results[region] = process_region( @@ -376,75 +436,12 @@ def _calculate_year(args): demand_cutoff, locations, location_to_index, - rev_technosphere_index, + reverse_technosphere_index, lca, ) ) - # Populate the result array - act_locs = [a[-1] for a in rev_technosphere_index.values()] - acts_idx = generate_A_indices( - technosphere_indices, - reverse_classifications, - lca_results.coords, - ) - act_categories = lca_results.coords["act_category"].values - impact_categories = lca_results.coords["impact_category"].values - - # Generate a list of activity indices for each activity category - category_idx = [] - for cat in lca_results.coords["act_category"].values: - category_idx.append( - [ - int(technosphere_indices[a]) - for a in reverse_classifications[cat] - if a in technosphere_indices - ] - ) - - mask = np.array([act[-1] in act_locs for act in rev_technosphere_index.values()]) - loc_idx = np.array( - [ - location_to_index[act[-1]] - for act in rev_technosphere_index.values() - if act[-1] in act_locs - ] - ) - - # Initialize the new array with zeros for missing data - E = np.zeros( - ( - len(technosphere_indices), - len(locations), - len(impact_categories), - len(variables), - ) - ) - print("E.shape", E.shape) - - for region, result in results.items(): - data = result["data"] - variables_demand = result["variables"] - variables_idx = result["variables_idx"] - E[mask, loc_idx][..., variables_idx] = data[mask][:, None] - - # using acts_idx for advanced indexing - E = E[acts_idx, ...].sum(axis=0) - print("final E.shape", E.shape) - - lca_results.loc[ - { - "act_category": act_categories, - "variable": variables_demand, - "impact_category": impact_categories, - "year": year, - "region": region, - "model": model, - "scenario": scenario, - } - ] = E.transpose(0, -1, 1, 2) - - return lca_results + return results class Pathways: @@ -475,6 +472,10 @@ def __init__(self, datapackage): self.units = load_units_conversion() self.lcia_matrix = None + # clean up the cache directory + for file in get_visible_files(DIR_CACHED_DB): + file.unlink() + def read_datapackage(self) -> DataPackage: """Read the datapackage.json file. @@ -606,25 +607,25 @@ def get_scenarios(self, scenario_data: pd.DataFrame) -> xr.DataArray: units[variable] = scenario_data[ scenario_data["variables"] == self.mapping[variable]["scenario variable"] - ].iloc[0]["unit"] + ].iloc[0]["unit"] data.attrs["units"] = units return data def calculate( - self, - methods: Optional[List[str]] = None, - models: Optional[List[str]] = None, - scenarios: Optional[List[str]] = None, - regions: Optional[List[str]] = None, - years: Optional[List[int]] = None, - variables: Optional[List[str]] = None, - characterization: bool = True, - flows: Optional[List[str]] = None, - multiprocessing: bool = False, - demand_cutoff: float = 1e-3, - ) -> None: + self, + methods: Optional[List[str]] = None, + models: Optional[List[str]] = None, + scenarios: Optional[List[str]] = None, + regions: Optional[List[str]] = None, + years: Optional[List[int]] = None, + variables: Optional[List[str]] = None, + characterization: bool = True, + flows: Optional[List[str]] = None, + multiprocessing: bool = False, + demand_cutoff: float = 1e-3, + ) -> dict[Any, Any] | dict[int | Any, dict[Any, dict[str, Any]] | None]: """ Calculate Life Cycle Assessment (LCA) results for given methods, models, scenarios, regions, and years. @@ -694,14 +695,14 @@ def calculate( # Create xarray for storing LCA results if not already present if self.lca_results is None: - dp, A_index, B_index = get_lca_matrices( + dp, technosphere_index, biosphere_index = get_lca_matrices( self.datapackage, models[0], scenarios[0], years[0], methods ) - locations = fetch_inventories_locations(A_index) + locations = fetch_inventories_locations(technosphere_index) self.lca_results = create_lca_results_array( methods=methods, - B_indices=B_index, + B_indices=biosphere_index, years=years, regions=regions, locations=locations, @@ -713,6 +714,7 @@ def calculate( ) # Iterate over each combination of model, scenario, and year + results = {} for model in models: print(f"Calculating LCA results for {model}...") for scenario in scenarios: @@ -739,40 +741,93 @@ def calculate( ] # Process each region in parallel with Pool(cpu_count()) as p: - results = p.map(_calculate_year, args) + # store th results as a dictionary with years as keys + results.update({(model, scenario, year): result for year, result in + zip(years, p.map(_calculate_year, args))}) else: - results = [] - for year in years: - results.append( - _calculate_year( - ( - model, - scenario, - year, - regions, - variables, - methods, - demand_cutoff, - self.datapackage, - self.mapping, - self.units, - self.lca_results, - self.classifications, - self.scenarios, - self.reverse_classifications, - ) + results = { + (model, scenario, year): _calculate_year( + ( + model, + scenario, + year, + regions, + variables, + methods, + demand_cutoff, + self.datapackage, + self.mapping, + self.units, + self.lca_results, + self.classifications, + self.scenarios, + self.reverse_classifications, ) ) + for year in years + } - self.lca_results = xr.concat(results, dim="year") + # remove None values in results + results = {k: v for k, v in results.items() if v is not None} + + self.fill_in_result_array(results) + + def fill_in_result_array(self, results: dict): + + # Assuming DIR_CACHED_DB, results, and self.lca_results are already defined + + # Pre-loading data from disk if possible + cached_data = {data["id_array"]: load_numpy_array_from_disk(DIR_CACHED_DB / f"{data['id_array']}.npy") + for coord, result in results.items() + for region, data in result.items() if region != "other"} + # use pyprint to display progress + bar = pyprind.ProgBar(len(results)) + for coord, result in results.items(): + bar.update() + model, scenario, year = coord + acts_category_idx_dict = result["other"]["acts_category_idx_dict"] + acts_location_idx_dict = result["other"]["acts_location_idx_dict"] + + for region, data in result.items(): + if region == "other": + continue + + id_array = data["id_array"] + variables = data["variables"] + + # Use pre-loaded data + d = cached_data[id_array] + + # Attempt to perform operations more efficiently + for cat, act_cat_idx in acts_category_idx_dict.items(): + for loc, act_loc_idx in acts_location_idx_dict.items(): + # This could potentially be optimized further depending on the nature of idx + idx = np.intersect1d(act_cat_idx, act_loc_idx) + idx = idx[idx != -1] + + if idx.size > 0: + summed_data = d[idx].sum(axis=0)[None, ...].T + + # Consider if this operation can be batched or optimized + self.lca_results.loc[ + { + "region": region, + "model": model, + "scenario": scenario, + "year": year, + "act_category": cat, + "location": loc, + "variable": list(variables.keys()) + } + ] = summed_data def characterize_planetary_boundaries( - self, - models: Optional[List[str]] = None, - scenarios: Optional[List[str]] = None, - regions: Optional[List[str]] = None, - years: Optional[List[int]] = None, - variables: Optional[List[str]] = None, + self, + models: Optional[List[str]] = None, + scenarios: Optional[List[str]] = None, + regions: Optional[List[str]] = None, + years: Optional[List[int]] = None, + variables: Optional[List[str]] = None, ): self.calculate( models=models, diff --git a/pathways/utils.py b/pathways/utils.py index ffc45ef..84e5657 100644 --- a/pathways/utils.py +++ b/pathways/utils.py @@ -1,11 +1,10 @@ -import sys from typing import Any, Dict, List, Tuple, Union import numpy as np import xarray as xr import yaml -from . import DATA_DIR +from .filesystem_constants import DATA_DIR CLASSIFICATIONS = DATA_DIR / "activities_classifications.yaml" UNITS_CONVERSION = DATA_DIR / "units_conversion.yaml" @@ -266,3 +265,13 @@ def display_results( combined = combined.assign_coords({"act_category": new_act_category}) return combined + + +def load_numpy_array_from_disk(filepath): + """ + Load a numpy array from disk. + :param filepath: The path to the file containing the numpy array. + :return: numpy array + """ + + return np.load(filepath)