diff --git a/dev/generate datapackages.ipynb b/dev/generate datapackages.ipynb
index c4b927d..4244c4d 100644
--- a/dev/generate datapackages.ipynb
+++ b/dev/generate datapackages.ipynb
@@ -198,7 +198,29 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 2,
+ "id": "54d3a225-e170-45ca-bb99-2cef313b7bdc",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "PosixPath('/Users/romain/Library/Application Support/pathways')"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from pathways.filesystem_constants import USER_DATA_BASE_DIR\n",
+ "USER_DATA_BASE_DIR"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
"id": "9d57850c-04fa-4ef7-89e8-3e9de62f92e3",
"metadata": {},
"outputs": [
@@ -219,27 +241,60 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 2,
"id": "a081c764-a942-4627-9197-fcd373e98b62",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Calculating LCA results for image...\n",
+ "--- Calculating LCA results for SSP2-RCP19...\n",
+ "------ Calculating LCA results for 2005...\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "0% [##########################] 100% | ETA: 00:00:00\n",
+ "Total time elapsed: 00:00:15\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "------ Calculating LCA results for 2010...\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "0% [##########################] 100% | ETA: 00:00:00\n",
+ "Total time elapsed: 00:00:12\n",
+ "0% [##] 100% | ETA: 00:00:00\n",
+ "Total time elapsed: 00:00:44\n"
+ ]
+ }
+ ],
"source": [
"import numpy as np\n",
- "dp = p.calculate(\n",
+ "p.calculate(\n",
" methods=[\n",
" 'EF v3.1 - climate change - global warming potential (GWP100)',\n",
" #'RELICS - metals extraction - Lithium',\n",
" ],\n",
" regions=[r for r in p.scenarios.coords[\"region\"].values if r!=\"World\"],\n",
- " #regions=[\"WEU\",],\n",
" scenarios=[\"SSP2-RCP19\",],\n",
" variables=[\n",
" v for v in p.scenarios.coords[\"variables\"].values\n",
" if any(i in v for i in [\"Industry\", \"Transport\", \"Heating\"])\n",
" ],\n",
- " years=[2005, 2010, 2020],\n",
+ " years=[2005, 2010],\n",
" characterization=True,\n",
- " #flows=[\"Lithium - natural resource - in ground - kilogram\",],\n",
" multiprocessing=False,\n",
" demand_cutoff=0.01,\n",
")"
@@ -247,33 +302,168 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "id": "c9786349-3b91-4439-8494-208ba7bc3b4a",
+ "execution_count": 3,
+ "id": "162124db-232b-494e-a8ef-8b90d1b87411",
"metadata": {},
"outputs": [],
"source": [
- "import bw2calc as bc\n",
- "import bw_processing as bwp"
+ "df = p.lca_results.sum(dim=\"act_category\").to_dataframe(\"value\")"
]
},
{
"cell_type": "code",
"execution_count": 4,
- "id": "dedf1bb9-47a2-497c-b788-7fa64986827a",
+ "id": "cb52417b-e7a3-46ed-bbe6-8ff1d1607231",
"metadata": {},
"outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/Users/romain/anaconda3/envs/pathways/lib/python3.11/site-packages/scikits/umfpack/umfpack.py:736: UmfpackWarning: (almost) singular matrix! (estimated cond. number: 1.21e+13)\n",
- " warnings.warn(msg, UmfpackWarning)\n"
- ]
- },
{
"data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " value | \n",
+ "
\n",
+ " \n",
+ " variable | \n",
+ " year | \n",
+ " region | \n",
+ " location | \n",
+ " model | \n",
+ " scenario | \n",
+ " impact_category | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Industry_Food and Tobacco_Electricity | \n",
+ " 2005 | \n",
+ " BRA | \n",
+ " CN-NM | \n",
+ " image | \n",
+ " SSP2-RCP19 | \n",
+ " EF v3.1 - climate change - global warming potential (GWP100) | \n",
+ " 5.463954e+06 | \n",
+ "
\n",
+ " \n",
+ " BO | \n",
+ " image | \n",
+ " SSP2-RCP19 | \n",
+ " EF v3.1 - climate change - global warming potential (GWP100) | \n",
+ " 1.244211e+04 | \n",
+ "
\n",
+ " \n",
+ " HR | \n",
+ " image | \n",
+ " SSP2-RCP19 | \n",
+ " EF v3.1 - climate change - global warming potential (GWP100) | \n",
+ " 1.482432e+04 | \n",
+ "
\n",
+ " \n",
+ " Canada without Quebec | \n",
+ " image | \n",
+ " SSP2-RCP19 | \n",
+ " EF v3.1 - climate change - global warming potential (GWP100) | \n",
+ " 2.628692e+01 | \n",
+ "
\n",
+ " \n",
+ " IN-CT | \n",
+ " image | \n",
+ " SSP2-RCP19 | \n",
+ " EF v3.1 - climate change - global warming potential (GWP100) | \n",
+ " 3.280474e+05 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " Transport_Freight (train)_Liquid fossil | \n",
+ " 2010 | \n",
+ " WEU | \n",
+ " BR-PI | \n",
+ " image | \n",
+ " SSP2-RCP19 | \n",
+ " EF v3.1 - climate change - global warming potential (GWP100) | \n",
+ " 0.000000e+00 | \n",
+ "
\n",
+ " \n",
+ " JAP | \n",
+ " image | \n",
+ " SSP2-RCP19 | \n",
+ " EF v3.1 - climate change - global warming potential (GWP100) | \n",
+ " 0.000000e+00 | \n",
+ "
\n",
+ " \n",
+ " BR-ES | \n",
+ " image | \n",
+ " SSP2-RCP19 | \n",
+ " EF v3.1 - climate change - global warming potential (GWP100) | \n",
+ " 0.000000e+00 | \n",
+ "
\n",
+ " \n",
+ " BR-SP | \n",
+ " image | \n",
+ " SSP2-RCP19 | \n",
+ " EF v3.1 - climate change - global warming potential (GWP100) | \n",
+ " 0.000000e+00 | \n",
+ "
\n",
+ " \n",
+ " CL | \n",
+ " image | \n",
+ " SSP2-RCP19 | \n",
+ " EF v3.1 - climate change - global warming potential (GWP100) | \n",
+ " 0.000000e+00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1810796 rows × 1 columns
\n",
+ "
"
+ ],
"text/plain": [
- "20.305023566997583"
+ " value\n",
+ "variable year region location model scenario impact_category \n",
+ "Industry_Food and Tobacco_Electricity 2005 BRA CN-NM image SSP2-RCP19 EF v3.1 - climate change - global warming poten... 5.463954e+06\n",
+ " BO image SSP2-RCP19 EF v3.1 - climate change - global warming poten... 1.244211e+04\n",
+ " HR image SSP2-RCP19 EF v3.1 - climate change - global warming poten... 1.482432e+04\n",
+ " Canada without Quebec image SSP2-RCP19 EF v3.1 - climate change - global warming poten... 2.628692e+01\n",
+ " IN-CT image SSP2-RCP19 EF v3.1 - climate change - global warming poten... 3.280474e+05\n",
+ "... ...\n",
+ "Transport_Freight (train)_Liquid fossil 2010 WEU BR-PI image SSP2-RCP19 EF v3.1 - climate change - global warming poten... 0.000000e+00\n",
+ " JAP image SSP2-RCP19 EF v3.1 - climate change - global warming poten... 0.000000e+00\n",
+ " BR-ES image SSP2-RCP19 EF v3.1 - climate change - global warming poten... 0.000000e+00\n",
+ " BR-SP image SSP2-RCP19 EF v3.1 - climate change - global warming poten... 0.000000e+00\n",
+ " CL image SSP2-RCP19 EF v3.1 - climate change - global warming poten... 0.000000e+00\n",
+ "\n",
+ "[1810796 rows x 1 columns]"
]
},
"execution_count": 4,
@@ -282,248 +472,22 @@
}
],
"source": [
- "lca = bc.LCA(\n",
- " demand={1329: 1},\n",
- " data_objs=[dp],\n",
- ")\n",
- "lca.lci()\n",
- "lca.lcia()\n",
- "lca.score"
+ "df"
]
},
{
"cell_type": "code",
"execution_count": 5,
- "id": "f501423c-de64-4604-9409-2a40c77c32bb",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "[array([( 0, 0), ( 0, 11778), ( 0, 17244), ...,\n",
- " (36540, 21436), (36540, 19141), (36540, 24252)],\n",
- " dtype=[('row', 'Industry_Food and Tobacco_Electricity\n",
" 2005 | \n",
" BRA | \n",
- " Europe, without Russia and Turkey | \n",
+ " CN-NM | \n",
" image | \n",
" SSP2-RCP19 | \n",
" EF v3.1 - climate change - global warming pote... | \n",
- " 1.043825e+06 | \n",
+ " 5.463954e+06 | \n",
" \n",
" \n",
" 1 | \n",
" Industry_Food and Tobacco_Electricity | \n",
" 2005 | \n",
" BRA | \n",
- " TZ | \n",
+ " BO | \n",
" image | \n",
" SSP2-RCP19 | \n",
" EF v3.1 - climate change - global warming pote... | \n",
- " 1.149504e+06 | \n",
+ " 1.244211e+04 | \n",
"
\n",
" \n",
" 2 | \n",
" Industry_Food and Tobacco_Electricity | \n",
" 2005 | \n",
" BRA | \n",
- " IN-DL | \n",
+ " HR | \n",
" image | \n",
" SSP2-RCP19 | \n",
" EF v3.1 - climate change - global warming pote... | \n",
- " 1.264347e+04 | \n",
+ " 1.482432e+04 | \n",
"
\n",
" \n",
" 3 | \n",
" Industry_Food and Tobacco_Electricity | \n",
" 2005 | \n",
" BRA | \n",
- " BR-TO | \n",
+ " Canada without Quebec | \n",
" image | \n",
" SSP2-RCP19 | \n",
" EF v3.1 - climate change - global warming pote... | \n",
- " 1.652670e+02 | \n",
+ " 2.628692e+01 | \n",
"
\n",
" \n",
" 4 | \n",
" Industry_Food and Tobacco_Electricity | \n",
" 2005 | \n",
" BRA | \n",
- " MR | \n",
+ " IN-CT | \n",
" image | \n",
" SSP2-RCP19 | \n",
" EF v3.1 - climate change - global warming pote... | \n",
- " 1.881197e+03 | \n",
+ " 3.280474e+05 | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- " variable year region \\\n",
- "0 Industry_Food and Tobacco_Electricity 2005 BRA \n",
- "1 Industry_Food and Tobacco_Electricity 2005 BRA \n",
- "2 Industry_Food and Tobacco_Electricity 2005 BRA \n",
- "3 Industry_Food and Tobacco_Electricity 2005 BRA \n",
- "4 Industry_Food and Tobacco_Electricity 2005 BRA \n",
- "\n",
- " location model scenario \\\n",
- "0 Europe, without Russia and Turkey image SSP2-RCP19 \n",
- "1 TZ image SSP2-RCP19 \n",
- "2 IN-DL image SSP2-RCP19 \n",
- "3 BR-TO image SSP2-RCP19 \n",
- "4 MR image SSP2-RCP19 \n",
- "\n",
- " impact_category value \n",
- "0 EF v3.1 - climate change - global warming pote... 1.043825e+06 \n",
- "1 EF v3.1 - climate change - global warming pote... 1.149504e+06 \n",
- "2 EF v3.1 - climate change - global warming pote... 1.264347e+04 \n",
- "3 EF v3.1 - climate change - global warming pote... 1.652670e+02 \n",
- "4 EF v3.1 - climate change - global warming pote... 1.881197e+03 "
+ " variable year region location \\\n",
+ "0 Industry_Food and Tobacco_Electricity 2005 BRA CN-NM \n",
+ "1 Industry_Food and Tobacco_Electricity 2005 BRA BO \n",
+ "2 Industry_Food and Tobacco_Electricity 2005 BRA HR \n",
+ "3 Industry_Food and Tobacco_Electricity 2005 BRA Canada without Quebec \n",
+ "4 Industry_Food and Tobacco_Electricity 2005 BRA IN-CT \n",
+ "\n",
+ " model scenario impact_category \\\n",
+ "0 image SSP2-RCP19 EF v3.1 - climate change - global warming pote... \n",
+ "1 image SSP2-RCP19 EF v3.1 - climate change - global warming pote... \n",
+ "2 image SSP2-RCP19 EF v3.1 - climate change - global warming pote... \n",
+ "3 image SSP2-RCP19 EF v3.1 - climate change - global warming pote... \n",
+ "4 image SSP2-RCP19 EF v3.1 - climate change - global warming pote... \n",
+ "\n",
+ " value \n",
+ "0 5.463954e+06 \n",
+ "1 1.244211e+04 \n",
+ "2 1.482432e+04 \n",
+ "3 2.628692e+01 \n",
+ "4 3.280474e+05 "
]
},
- "execution_count": 153,
+ "execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
@@ -705,25 +669,17 @@
},
{
"cell_type": "code",
- "execution_count": 154,
+ "execution_count": 10,
"id": "5f2c4b3b-b26c-47ee-8c36-582feb027f58",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "array([2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015,\n",
- " 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024, 2025, 2026,\n",
- " 2027, 2028, 2029, 2030, 2031, 2032, 2033, 2034, 2035, 2036, 2037,\n",
- " 2038, 2039, 2040, 2041, 2042, 2043, 2044, 2045, 2046, 2047, 2048,\n",
- " 2049, 2050, 2051, 2052, 2053, 2054, 2055, 2056, 2057, 2058, 2059,\n",
- " 2060, 2061, 2062, 2063, 2064, 2065, 2066, 2067, 2068, 2069, 2070,\n",
- " 2071, 2072, 2073, 2074, 2075, 2076, 2077, 2078, 2079, 2080, 2081,\n",
- " 2082, 2083, 2084, 2085, 2086, 2087, 2088, 2089, 2090, 2091, 2092,\n",
- " 2093, 2094, 2095, 2096, 2097, 2098, 2099, 2100])"
+ "array([2005, 2010])"
]
},
- "execution_count": 154,
+ "execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
@@ -734,7 +690,7 @@
},
{
"cell_type": "code",
- "execution_count": 155,
+ "execution_count": 11,
"id": "1de1d921-4156-46b8-9113-f94214320f50",
"metadata": {},
"outputs": [],
@@ -745,7 +701,7 @@
},
{
"cell_type": "code",
- "execution_count": 156,
+ "execution_count": 12,
"id": "57b097ed-0108-4d62-8aaf-0a3d7c76fc48",
"metadata": {},
"outputs": [],
@@ -758,7 +714,7 @@
},
{
"cell_type": "code",
- "execution_count": 157,
+ "execution_count": 13,
"id": "c0b92aed-c162-4f5a-b200-512452d5157e",
"metadata": {},
"outputs": [],
@@ -768,7 +724,7 @@
},
{
"cell_type": "code",
- "execution_count": 158,
+ "execution_count": 14,
"id": "829eef08-5bc8-4ee3-8d51-fd55e2beb2b0",
"metadata": {},
"outputs": [],
@@ -778,7 +734,7 @@
},
{
"cell_type": "code",
- "execution_count": 159,
+ "execution_count": 15,
"id": "11c1861f-1a2b-49f6-a1be-94680d5a6b30",
"metadata": {},
"outputs": [],
@@ -790,17 +746,17 @@
},
{
"cell_type": "code",
- "execution_count": 160,
+ "execution_count": 16,
"id": "9acd0483-a477-4250-a28a-aa0d2f510509",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "125945"
+ "23544"
]
},
- "execution_count": 160,
+ "execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
@@ -811,7 +767,7 @@
},
{
"cell_type": "code",
- "execution_count": 161,
+ "execution_count": 39,
"id": "5b5dde7a-3eb9-4f1d-997e-3c6310e76580",
"metadata": {},
"outputs": [],
@@ -822,17 +778,17 @@
},
{
"cell_type": "code",
- "execution_count": 162,
+ "execution_count": 40,
"id": "3361c4c8-c006-4f09-85f4-9d50c10f327f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "125945"
+ "11745"
]
},
- "execution_count": 162,
+ "execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
@@ -843,7 +799,7 @@
},
{
"cell_type": "code",
- "execution_count": 163,
+ "execution_count": 42,
"id": "87646c30-91c9-4dcf-b4a0-a52471634580",
"metadata": {},
"outputs": [
@@ -862,10 +818,10 @@
" "
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 163,
+ "execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
diff --git a/dev/timing.py b/dev/timing.py
index 4878838..8090d50 100644
--- a/dev/timing.py
+++ b/dev/timing.py
@@ -13,7 +13,7 @@
],
regions=[
"WEU",
- "USA",
+ #"USA",
],
scenarios=[scenario],
years=[
diff --git a/pathways/__init__.py b/pathways/__init__.py
index 4e3b96d..6d5860f 100644
--- a/pathways/__init__.py
+++ b/pathways/__init__.py
@@ -1,8 +1,5 @@
-from pathlib import Path
-
__version__ = (0, 0, 1)
-__all__ = ("__version__", "DATA_DIR", "Pathways")
+__all__ = ("__version__", "Pathways")
-DATA_DIR = Path(__file__).resolve().parent / "data"
from .pathways import Pathways
diff --git a/pathways/data_validation.py b/pathways/data_validation.py
index d084db8..6c727cb 100644
--- a/pathways/data_validation.py
+++ b/pathways/data_validation.py
@@ -57,10 +57,10 @@ def validate_datapackage(datapackage: datapackage.DataPackage):
dataframe = pd.DataFrame(data, columns=headers)
# Check that the scenario data is valid
- validate_scenario_data(dataframe)
+ #validate_scenario_data(dataframe)
# Check that the mapping is valid
- validate_mapping(datapackage.get_resource("mapping"), dataframe)
+ #validate_mapping(datapackage.get_resource("mapping"), dataframe)
# Check that the LCA data is valid
# validate_lca_data(datapackage)
diff --git a/pathways/filesystem_constants.py b/pathways/filesystem_constants.py
new file mode 100644
index 0000000..d17411b
--- /dev/null
+++ b/pathways/filesystem_constants.py
@@ -0,0 +1,19 @@
+"""
+This module contains constants for the filesystem paths used by Pathways.
+"""
+
+from pathlib import Path
+import platformdirs
+
+# Directories for data which comes with Pathways
+DATA_DIR = Path(__file__).resolve().parent / "data"
+
+# Directories for user-created data
+USER_DATA_BASE_DIR = platformdirs.user_data_path(appname="pathways", appauthor="pylca")
+USER_DATA_BASE_DIR.mkdir(parents=True, exist_ok=True)
+
+DIR_CACHED_DB = USER_DATA_BASE_DIR / "cache"
+DIR_CACHED_DB.mkdir(parents=True, exist_ok=True)
+
+USER_LOGS_DIR = platformdirs.user_log_path(appname="pathways", appauthor="pylca")
+USER_LOGS_DIR.mkdir(parents=True, exist_ok=True)
diff --git a/pathways/lca.py b/pathways/lca.py
index e03ff6a..7c186dd 100644
--- a/pathways/lca.py
+++ b/pathways/lca.py
@@ -1,12 +1,9 @@
import csv
-import warnings
from pathlib import Path
-from typing import Dict, List, Optional, Tuple
+from typing import Dict, Tuple
import bw_processing as bwp
import numpy as np
-import scipy.sparse
-import xarray as xr
from scipy import sparse
from scipy.sparse import csr_matrix
@@ -24,44 +21,6 @@
print("Solver: scikits.umfpack")
-def create_demand_vector(
- activities_idx: List[int],
- A: scipy.sparse,
- demand: xr.DataArray,
- unit_conversion: np.ndarray,
-) -> np.ndarray:
- """
- Create a demand vector with the given activities indices, sparse matrix A, demand values, and unit conversion factors.
-
- This function multiplies the given demand with the unit conversion factors and assigns the result to the positions in
- the vector corresponding to the activities indices. All other positions in the vector are set to zero.
-
- :param activities_idx: Indices of activities for which to create demand. These indices correspond to positions in the vector and matrix A.
- :type activities_idx: List[int]
-
- :param A: Sparse matrix used to determine the size of the demand vector.
- :type A: scipy.sparse.csr_matrix
-
- :param demand: Demand values for the activities, provided as a DataArray from the xarray package.
- :type demand: xr.DataArray
-
- :param unit_conversion: Unit conversion factors corresponding to each activity in activities_idx.
- :type unit_conversion: numpy.ndarray
-
- :return: The demand vector, represented as a 1-dimensional numpy array.
- :rtype: numpy.ndarray
- """
-
- # Initialize the demand vector with zeros, with length equal to the number of rows in A
- f = np.zeros(A.shape[0])
-
- # Assign demand values to the positions in the vector corresponding to the activities indices
- # Demand values are converted to the appropriate units before assignment
- f[activities_idx] = float(demand) * float(unit_conversion)
-
- return f
-
-
def read_indices_csv(file_path: Path) -> Dict[Tuple[str, str, str, str], str]:
"""
Reads a CSV file and returns its contents as a dictionary.
@@ -229,49 +188,3 @@ def remove_double_counting(A: csr_matrix, vars_info: dict) -> csr_matrix:
A_coo.eliminate_zeros()
return A_coo.tocsr()
-
-
-def characterize_inventory(C, lcia_matrix) -> np.ndarray:
- """
- Characterize an inventory with an LCIA matrix.
- :param C: Solved inventory
- :param lcia_matrix: Characterization matrix
- :return: Characterized inventory
- """
-
- # Multiply C with lcia_matrix to get D
- return C[..., None] * lcia_matrix
-
-
-def solve_inventory(A: csr_matrix, B: np.ndarray, f: np.ndarray) -> np.ndarray:
- """
- Solve the inventory problem for a set of activities, given technosphere and biosphere matrices, demand vector,
- LCIA matrix, and the indices of activities to consider.
-
- This function uses either the pypardiso or scipy library to solve the linear system, depending on the availability
- of the pypardiso library. The solutions are then used to calculate LCIA scores.
-
- ...
-
- :rtype: numpy.ndarray
-
- """
-
- if A.shape[0] != A.shape[1]:
- raise ValueError("A must be a square matrix")
-
- if A.shape[0] != f.size:
- raise ValueError("Incompatible dimensions between A and f")
-
- if B.shape[0] != A.shape[0]:
- raise ValueError("Incompatible dimensions between A and B")
-
- with warnings.catch_warnings():
- warnings.simplefilter("ignore")
- # Solve the system Ax = f for x using sparse solver
- A_inv = spsolve(A, f)[:, np.newaxis]
-
- # Compute product of A_inv and B
- C = A_inv * B
-
- return C
diff --git a/pathways/lcia.py b/pathways/lcia.py
index 712570b..085e4f3 100644
--- a/pathways/lcia.py
+++ b/pathways/lcia.py
@@ -1,10 +1,6 @@
import json
-import numpy as np
-import xarray as xr
-from scipy import sparse
-
-from . import DATA_DIR
+from .filesystem_constants import DATA_DIR
LCIA_METHODS = DATA_DIR / "lcia_data.json"
diff --git a/pathways/pathways.py b/pathways/pathways.py
index 94c416e..8f7620e 100644
--- a/pathways/pathways.py
+++ b/pathways/pathways.py
@@ -9,6 +9,7 @@
from multiprocessing import Pool, cpu_count
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union
+import uuid
import bw2calc as bc
import numpy as np
@@ -19,16 +20,12 @@
from datapackage import DataPackage
from numpy import dtype, ndarray
from premise.geomap import Geomap
-from scipy import sparse
-from . import DATA_DIR
+from .filesystem_constants import DATA_DIR, DIR_CACHED_DB
from .data_validation import validate_datapackage
from .lca import (
- characterize_inventory,
- create_demand_vector,
get_lca_matrices,
remove_double_counting,
- solve_inventory,
)
from .lcia import get_lcia_method_names
from .utils import (
@@ -39,6 +36,7 @@
harmonize_units,
load_classifications,
load_units_conversion,
+ load_numpy_array_from_disk
)
@@ -84,12 +82,12 @@ def get_visible_files(path):
def resize_scenario_data(
- scenario_data: xr.DataArray,
- model: List[str],
- scenario: List[str],
- region: List[str],
- year: List[int],
- variables: List[str],
+ scenario_data: xr.DataArray,
+ model: List[str],
+ scenario: List[str],
+ region: List[str],
+ year: List[int],
+ variables: List[str],
):
"""
Resize the scenario data to the given scenario, year, region, and variables.
@@ -188,13 +186,14 @@ def fetch_inventories_locations(A_index: Dict[str, Tuple[str, str, str]]) -> Lis
return list(set([act[3] for act in A_index]))
-def generate_A_indices(A_index, reverse_classifications, lca_results_coords):
+def group_technosphere_indices_by_category(technosphere_index, reverse_classifications, lca_results_coords) -> Tuple:
# Generate a list of activity indices for each activity category
acts_idx = []
+ acts_dict = {}
for cat in lca_results_coords["act_category"].values:
- acts_idx.append(
- [int(A_index[a]) for a in reverse_classifications[cat] if a in A_index]
- )
+ x = [int(technosphere_index[a]) for a in reverse_classifications[cat] if a in technosphere_index]
+ acts_idx.append(x)
+ acts_dict[cat] = x
# Pad each list in acts_idx with -1 to make them all the same length
max_len = max([len(x) for x in acts_idx])
@@ -203,11 +202,41 @@ def generate_A_indices(A_index, reverse_classifications, lca_results_coords):
)
# Swap the axes of acts_idx to align with the dimensionality of D
- return np.swapaxes(acts_idx, 0, 1)
+ return acts_idx, acts_dict, np.swapaxes(acts_idx, 0, 1)
+def group_technosphere_indices_by_location(technosphere_index, locations) -> Tuple:
+ """
+ Group the technosphere indices by location.
+ :param technosphere_index:
+ :param locations:
+ :return:
+ """
+
+ act_indices_list = []
+ act_indices_dict = {}
+ for loc in locations:
+ x = [
+ int(technosphere_index[a])
+ for a in technosphere_index
+ if a[-1] == loc
+ ]
+
+ act_indices_list.append(x)
+ act_indices_dict[loc] = x
+
+ # Pad each list in act_indices_list with -1 to make them all the same length
+ max_len = max([len(x) for x in act_indices_list])
+ act_indices_array = np.array(
+ [
+ np.pad(x, (0, max_len - len(x)), constant_values=-1)
+ for x in act_indices_list
+ ]
+ )
+
+ return act_indices_list, act_indices_dict, act_indices_array
-def process_region(data: Tuple) -> dict[str, ndarray[Any, dtype[Any]] | list[int]]:
+def process_region(data: Tuple) -> dict[str, ndarray[Any, dtype[Any]] | list[int]]:
(
model,
scenario,
@@ -255,13 +284,13 @@ def process_region(data: Tuple) -> dict[str, ndarray[Any, dtype[Any]] | list[int
# If the total demand is zero, return None
if (
- demand
- / scenarios.sel(
- region=region,
- model=model,
- pathway=scenario,
- year=year,
- ).sum(dim="variables")
+ demand
+ / scenarios.sel(
+ region=region,
+ model=model,
+ pathway=scenario,
+ year=year,
+ ).sum(dim="variables")
) < demand_cutoff:
continue
@@ -276,19 +305,20 @@ def process_region(data: Tuple) -> dict[str, ndarray[Any, dtype[Any]] | list[int
d = []
for f, fu in enumerate(FU):
lca.lcia(fu)
- d.append(lca.supply_array)
+ d.append(lca.characterized_inventory.sum(axis=0))
+
+ id_array = uuid.uuid4()
+ np.save(file=DIR_CACHED_DB / f"{id_array}.npy", arr=np.vstack(d).T)
# concatenate the list of sparse matrices and
# add a third dimension and concatenate along it
return {
- "data": np.column_stack(d),
- "variables": variables_demand,
- "variables_idx": variables_idx,
+ "id_array": id_array,
+ "variables": {v: idx for v, idx in zip(variables_demand, variables_idx)},
}
def _calculate_year(args):
-
(
model,
scenario,
@@ -338,9 +368,42 @@ def _calculate_year(args):
if act not in classifications:
missing_class.append(list(act))
+ results = {}
+
locations = lca_results.coords["location"].values.tolist()
location_to_index = {location: index for index, location in enumerate(locations)}
- rev_technosphere_index = {int(v): k for k, v in technosphere_indices.items()}
+ reverse_technosphere_index = {int(v): k for k, v in technosphere_indices.items()}
+ loc_idx = np.array(
+ [
+ location_to_index[act[-1]]
+ for act in reverse_technosphere_index.values()
+ if act[-1] in locations
+ ]
+ )
+
+ acts_category_idx_list, acts_category_idx_dict, acts_category_idx_array = group_technosphere_indices_by_category(
+ technosphere_indices,
+ reverse_classifications,
+ lca_results.coords,
+ )
+
+ acts_location_idx_list, acts_location_idx_dict, acts_location_idx_array = group_technosphere_indices_by_location(
+ technosphere_indices,
+ locations,
+ )
+
+ results["other"] = {
+ "locations": locations,
+ "location_to_index": location_to_index,
+ "reverse_technosphere_index": reverse_technosphere_index,
+ "activity_location_idx": loc_idx,
+ "acts_category_idx_list": acts_category_idx_list,
+ "acts_category_idx_dict": acts_category_idx_dict,
+ "acts_category_idx_array": acts_category_idx_array,
+ "acts_location_idx_list": acts_location_idx_list,
+ "acts_location_idx_dict": acts_location_idx_dict,
+ "acts_location_idx_array": acts_location_idx_array,
+ }
lca = bc.LCA(
demand={0: 1},
@@ -350,11 +413,8 @@ def _calculate_year(args):
)
lca.lci(factorize=True)
- results = {}
- # use pyprind
bar = pyprind.ProgBar(len(regions))
for region in regions:
- print(region)
bar.update()
# Iterate over each region
results[region] = process_region(
@@ -376,75 +436,12 @@ def _calculate_year(args):
demand_cutoff,
locations,
location_to_index,
- rev_technosphere_index,
+ reverse_technosphere_index,
lca,
)
)
- # Populate the result array
- act_locs = [a[-1] for a in rev_technosphere_index.values()]
- acts_idx = generate_A_indices(
- technosphere_indices,
- reverse_classifications,
- lca_results.coords,
- )
- act_categories = lca_results.coords["act_category"].values
- impact_categories = lca_results.coords["impact_category"].values
-
- # Generate a list of activity indices for each activity category
- category_idx = []
- for cat in lca_results.coords["act_category"].values:
- category_idx.append(
- [
- int(technosphere_indices[a])
- for a in reverse_classifications[cat]
- if a in technosphere_indices
- ]
- )
-
- mask = np.array([act[-1] in act_locs for act in rev_technosphere_index.values()])
- loc_idx = np.array(
- [
- location_to_index[act[-1]]
- for act in rev_technosphere_index.values()
- if act[-1] in act_locs
- ]
- )
-
- # Initialize the new array with zeros for missing data
- E = np.zeros(
- (
- len(technosphere_indices),
- len(locations),
- len(impact_categories),
- len(variables),
- )
- )
- print("E.shape", E.shape)
-
- for region, result in results.items():
- data = result["data"]
- variables_demand = result["variables"]
- variables_idx = result["variables_idx"]
- E[mask, loc_idx][..., variables_idx] = data[mask][:, None]
-
- # using acts_idx for advanced indexing
- E = E[acts_idx, ...].sum(axis=0)
- print("final E.shape", E.shape)
-
- lca_results.loc[
- {
- "act_category": act_categories,
- "variable": variables_demand,
- "impact_category": impact_categories,
- "year": year,
- "region": region,
- "model": model,
- "scenario": scenario,
- }
- ] = E.transpose(0, -1, 1, 2)
-
- return lca_results
+ return results
class Pathways:
@@ -475,6 +472,10 @@ def __init__(self, datapackage):
self.units = load_units_conversion()
self.lcia_matrix = None
+ # clean up the cache directory
+ for file in get_visible_files(DIR_CACHED_DB):
+ file.unlink()
+
def read_datapackage(self) -> DataPackage:
"""Read the datapackage.json file.
@@ -606,25 +607,25 @@ def get_scenarios(self, scenario_data: pd.DataFrame) -> xr.DataArray:
units[variable] = scenario_data[
scenario_data["variables"]
== self.mapping[variable]["scenario variable"]
- ].iloc[0]["unit"]
+ ].iloc[0]["unit"]
data.attrs["units"] = units
return data
def calculate(
- self,
- methods: Optional[List[str]] = None,
- models: Optional[List[str]] = None,
- scenarios: Optional[List[str]] = None,
- regions: Optional[List[str]] = None,
- years: Optional[List[int]] = None,
- variables: Optional[List[str]] = None,
- characterization: bool = True,
- flows: Optional[List[str]] = None,
- multiprocessing: bool = False,
- demand_cutoff: float = 1e-3,
- ) -> None:
+ self,
+ methods: Optional[List[str]] = None,
+ models: Optional[List[str]] = None,
+ scenarios: Optional[List[str]] = None,
+ regions: Optional[List[str]] = None,
+ years: Optional[List[int]] = None,
+ variables: Optional[List[str]] = None,
+ characterization: bool = True,
+ flows: Optional[List[str]] = None,
+ multiprocessing: bool = False,
+ demand_cutoff: float = 1e-3,
+ ) -> dict[Any, Any] | dict[int | Any, dict[Any, dict[str, Any]] | None]:
"""
Calculate Life Cycle Assessment (LCA) results for given methods, models, scenarios, regions, and years.
@@ -694,14 +695,14 @@ def calculate(
# Create xarray for storing LCA results if not already present
if self.lca_results is None:
- dp, A_index, B_index = get_lca_matrices(
+ dp, technosphere_index, biosphere_index = get_lca_matrices(
self.datapackage, models[0], scenarios[0], years[0], methods
)
- locations = fetch_inventories_locations(A_index)
+ locations = fetch_inventories_locations(technosphere_index)
self.lca_results = create_lca_results_array(
methods=methods,
- B_indices=B_index,
+ B_indices=biosphere_index,
years=years,
regions=regions,
locations=locations,
@@ -713,6 +714,7 @@ def calculate(
)
# Iterate over each combination of model, scenario, and year
+ results = {}
for model in models:
print(f"Calculating LCA results for {model}...")
for scenario in scenarios:
@@ -739,40 +741,93 @@ def calculate(
]
# Process each region in parallel
with Pool(cpu_count()) as p:
- results = p.map(_calculate_year, args)
+ # store th results as a dictionary with years as keys
+ results.update({(model, scenario, year): result for year, result in
+ zip(years, p.map(_calculate_year, args))})
else:
- results = []
- for year in years:
- results.append(
- _calculate_year(
- (
- model,
- scenario,
- year,
- regions,
- variables,
- methods,
- demand_cutoff,
- self.datapackage,
- self.mapping,
- self.units,
- self.lca_results,
- self.classifications,
- self.scenarios,
- self.reverse_classifications,
- )
+ results = {
+ (model, scenario, year): _calculate_year(
+ (
+ model,
+ scenario,
+ year,
+ regions,
+ variables,
+ methods,
+ demand_cutoff,
+ self.datapackage,
+ self.mapping,
+ self.units,
+ self.lca_results,
+ self.classifications,
+ self.scenarios,
+ self.reverse_classifications,
)
)
+ for year in years
+ }
- self.lca_results = xr.concat(results, dim="year")
+ # remove None values in results
+ results = {k: v for k, v in results.items() if v is not None}
+
+ self.fill_in_result_array(results)
+
+ def fill_in_result_array(self, results: dict):
+
+ # Assuming DIR_CACHED_DB, results, and self.lca_results are already defined
+
+ # Pre-loading data from disk if possible
+ cached_data = {data["id_array"]: load_numpy_array_from_disk(DIR_CACHED_DB / f"{data['id_array']}.npy")
+ for coord, result in results.items()
+ for region, data in result.items() if region != "other"}
+ # use pyprint to display progress
+ bar = pyprind.ProgBar(len(results))
+ for coord, result in results.items():
+ bar.update()
+ model, scenario, year = coord
+ acts_category_idx_dict = result["other"]["acts_category_idx_dict"]
+ acts_location_idx_dict = result["other"]["acts_location_idx_dict"]
+
+ for region, data in result.items():
+ if region == "other":
+ continue
+
+ id_array = data["id_array"]
+ variables = data["variables"]
+
+ # Use pre-loaded data
+ d = cached_data[id_array]
+
+ # Attempt to perform operations more efficiently
+ for cat, act_cat_idx in acts_category_idx_dict.items():
+ for loc, act_loc_idx in acts_location_idx_dict.items():
+ # This could potentially be optimized further depending on the nature of idx
+ idx = np.intersect1d(act_cat_idx, act_loc_idx)
+ idx = idx[idx != -1]
+
+ if idx.size > 0:
+ summed_data = d[idx].sum(axis=0)[None, ...].T
+
+ # Consider if this operation can be batched or optimized
+ self.lca_results.loc[
+ {
+ "region": region,
+ "model": model,
+ "scenario": scenario,
+ "year": year,
+ "act_category": cat,
+ "location": loc,
+ "variable": list(variables.keys())
+ }
+ ] = summed_data
def characterize_planetary_boundaries(
- self,
- models: Optional[List[str]] = None,
- scenarios: Optional[List[str]] = None,
- regions: Optional[List[str]] = None,
- years: Optional[List[int]] = None,
- variables: Optional[List[str]] = None,
+ self,
+ models: Optional[List[str]] = None,
+ scenarios: Optional[List[str]] = None,
+ regions: Optional[List[str]] = None,
+ years: Optional[List[int]] = None,
+ variables: Optional[List[str]] = None,
):
self.calculate(
models=models,
diff --git a/pathways/utils.py b/pathways/utils.py
index ffc45ef..84e5657 100644
--- a/pathways/utils.py
+++ b/pathways/utils.py
@@ -1,11 +1,10 @@
-import sys
from typing import Any, Dict, List, Tuple, Union
import numpy as np
import xarray as xr
import yaml
-from . import DATA_DIR
+from .filesystem_constants import DATA_DIR
CLASSIFICATIONS = DATA_DIR / "activities_classifications.yaml"
UNITS_CONVERSION = DATA_DIR / "units_conversion.yaml"
@@ -266,3 +265,13 @@ def display_results(
combined = combined.assign_coords({"act_category": new_act_category})
return combined
+
+
+def load_numpy_array_from_disk(filepath):
+ """
+ Load a numpy array from disk.
+ :param filepath: The path to the file containing the numpy array.
+ :return: numpy array
+ """
+
+ return np.load(filepath)