From ea0717ed5eebc93c89213dff302a90f2abd02637 Mon Sep 17 00:00:00 2001 From: Konstantin Stadler Date: Fri, 25 Oct 2024 12:46:30 +0200 Subject: [PATCH] fixed extension_convert for multiple extensions --- doc/source/notebooks/convert.ipynb | 1211 ++++++++++++++++++++++- doc/source/notebooks/convert.py | 86 +- doc/source/notebooks/extract_data.ipynb | 2 +- doc/source/notebooks/extract_data.py | 2 +- pymrio/core/mriosystem.py | 13 +- tests/test_core.py | 191 ++-- 6 files changed, 1365 insertions(+), 140 deletions(-) diff --git a/doc/source/notebooks/convert.ipynb b/doc/source/notebooks/convert.ipynb index 2b77a5c8..2ccad6bc 100644 --- a/doc/source/notebooks/convert.ipynb +++ b/doc/source/notebooks/convert.ipynb @@ -20,7 +20,7 @@ "- renaming the index names of results/extensions\n", "- adjusting the numerical values of the data,\n", " e.g. for unit conversion or characterisation\n", - "- finding and extracting data based on indicies across a table or an mrio(-extension).\n", + "- finding and extracting data based on indices across a table or an mrio(-extension).\n", " This can be system based on name and potentially constrained by sector/region\n", " or any other specification.\n", "- Aggregation/Summation of satellite accounts\n", @@ -103,7 +103,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 59, "id": "ae7e3af0", "metadata": {}, "outputs": [], @@ -137,10 +137,74 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 60, "id": "f72ae844", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
stressorcompartmentchem_stressor__stressorcompartment__compartmentfactor
0Carbon Dioxide[A|a]irCO2Air1.0
1Methane[A|a]irCH4Air1.0
\n", + "
" + ], + "text/plain": [ + " stressor compartment chem_stressor__stressor \\\n", + "0 Carbon Dioxide [A|a]ir CO2 \n", + "1 Methane [A|a]ir CH4 \n", + "\n", + " compartment__compartment factor \n", + "0 Air 1.0 \n", + "1 Air 1.0 " + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "ghg_map = pd.DataFrame(\n", " columns=[\n", @@ -160,10 +224,75 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 61, "id": "f73cd886", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
regionRegion1Region2Region3
chem_stressorcompartment
CH4Air0.50.60.7
CO2Air5.06.07.0
\n", + "
" + ], + "text/plain": [ + "region Region1 Region2 Region3\n", + "chem_stressor compartment \n", + "CH4 Air 0.5 0.6 0.7\n", + "CO2 Air 5.0 6.0 7.0" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "ghg_new = pymrio.convert(ghg_result, ghg_map)\n", "ghg_new" @@ -192,10 +321,67 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 62, "id": "48688b14", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
stressorcompartmentchem_stressor__stressorcompartment__compartment
0Carbon Dioxide[A|a]irCO2Air
1Methane[A|a]irCH4Air
\n", + "
" + ], + "text/plain": [ + " stressor compartment chem_stressor__stressor compartment__compartment\n", + "0 Carbon Dioxide [A|a]ir CO2 Air\n", + "1 Methane [A|a]ir CH4 Air" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "ghg_map_wo_factor = pd.DataFrame(\n", " columns=[\n", @@ -214,12 +400,77 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 63, "id": "826f558a", "metadata": { "lines_to_next_cell": 2 }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
regionRegion1Region2Region3
chem_stressorcompartment
CH4Air0.50.60.7
CO2Air5.06.07.0
\n", + "
" + ], + "text/plain": [ + "region Region1 Region2 Region3\n", + "chem_stressor compartment \n", + "CH4 Air 0.5 0.6 0.7\n", + "CO2 Air 5.0 6.0 7.0" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "ghg_new_wo_factor = pymrio.convert(ghg_result, ghg_map_wo_factor)\n", "ghg_new_wo_factor" @@ -245,10 +496,75 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 64, "id": "e4fe084e", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
regionRegion1Region2Region3
stressorcompartment
Carbon DioxideAir5.06.07.0
Methaneair0.50.60.7
\n", + "
" + ], + "text/plain": [ + "region Region1 Region2 Region3\n", + "stressor compartment \n", + "Carbon Dioxide Air 5.0 6.0 7.0\n", + "Methane air 0.5 0.6 0.7" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "ghg_result_ton = pd.DataFrame(\n", " columns=[\"Region1\", \"Region2\", \"Region3\"],\n", @@ -270,9 +586,80 @@ "id": "1ad41ad6", "metadata": {}, "source": [ - "We can get the data in kg by\n", - "\n", - "\n", + "We can get the data in kg by" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "id": "ae5eba1e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
stressorcompartmentchem_stressor__stressorcompartment__compartmentfactor
0Carbon Dioxide[A|a]irCO2Air1000
1Methane[A|a]irCH4Air1000
\n", + "
" + ], + "text/plain": [ + " stressor compartment chem_stressor__stressor \\\n", + "0 Carbon Dioxide [A|a]ir CO2 \n", + "1 Methane [A|a]ir CH4 \n", + "\n", + " compartment__compartment factor \n", + "0 Air 1000 \n", + "1 Air 1000 " + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ "ghg_map_to_kg = pd.DataFrame(\n", " columns=[\n", " \"stressor\",\n", @@ -286,8 +673,81 @@ " [\"Methane\", \"[A|a]ir\", \"CH4\", \"Air\", 1000],\n", " ],\n", ")\n", - "ghg_map_to_kg\n", - "\n", + "ghg_map_to_kg" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "id": "77effc7a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
regionRegion1Region2Region3
chem_stressorcompartment
CH4Air500.0600.0700.0
CO2Air5000.06000.07000.0
\n", + "
" + ], + "text/plain": [ + "region Region1 Region2 Region3\n", + "chem_stressor compartment \n", + "CH4 Air 500.0 600.0 700.0\n", + "CO2 Air 5000.0 6000.0 7000.0" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ "ghg_new_kg = pymrio.convert(ghg_result_ton, ghg_map_to_kg)\n", "ghg_new_kg" ] @@ -340,12 +800,77 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 67, "id": "677f3872", "metadata": { "lines_to_next_cell": 2 }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
regionRegion1Region2Region3
chem_stressorcompartment
CH4Air500.0600.0700.0
CO2Air5000.06000.07000.0
\n", + "
" + ], + "text/plain": [ + "region Region1 Region2 Region3\n", + "chem_stressor compartment \n", + "CH4 Air 500.0 600.0 700.0\n", + "CO2 Air 5000.0 6000.0 7000.0" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "ghg_new_kg" ] @@ -363,10 +888,113 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 68, "id": "289eae3b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
chem_stressorGWP__chem_stressorfactor
0CO2GWP1001
1CH4GWP10029
2NHxGWP100273
3CO2GWP201
4CH4GWP2080
5NHxGWP20273
6CO2GWP5001
7CH4GWP5008
8NHxGWP500130
\n", + "
" + ], + "text/plain": [ + " chem_stressor GWP__chem_stressor factor\n", + "0 CO2 GWP100 1\n", + "1 CH4 GWP100 29\n", + "2 NHx GWP100 273\n", + "3 CO2 GWP20 1\n", + "4 CH4 GWP20 80\n", + "5 NHx GWP20 273\n", + "6 CO2 GWP500 1\n", + "7 CH4 GWP500 8\n", + "8 NHx GWP500 130" + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "GWP_characterization = pd.DataFrame(\n", " columns=[\"chem_stressor\", \"GWP__chem_stressor\", \"factor\"],\n", @@ -387,12 +1015,80 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 69, "id": "04df3552", "metadata": { "lines_to_next_cell": 2 }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
regionRegion1Region2Region3
GWP
GWP10019500.023400.027300.0
GWP2045000.054000.063000.0
GWP5009000.010800.012600.0
\n", + "
" + ], + "text/plain": [ + "region Region1 Region2 Region3\n", + "GWP \n", + "GWP100 19500.0 23400.0 27300.0\n", + "GWP20 45000.0 54000.0 63000.0\n", + "GWP500 9000.0 10800.0 12600.0" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "GWP_result = pymrio.convert(ghg_new_kg, GWP_characterization)\n", "GWP_result" @@ -413,13 +1109,86 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 70, "id": "ab77360e", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
regionRegion1Region2Region3
GWPcompartment
GWP100Air19500.023400.027300.0
GWP20Air45000.054000.063000.0
GWP500Air9000.010800.012600.0
\n", + "
" + ], + "text/plain": [ + "region Region1 Region2 Region3\n", + "GWP compartment \n", + "GWP100 Air 19500.0 23400.0 27300.0\n", + "GWP20 Air 45000.0 54000.0 63000.0\n", + "GWP500 Air 9000.0 10800.0 12600.0" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "GWP_result_with_comp = pymrio.convert(\n", - " ghg_new_kg, GWP_characterization, drop_not_bridged=False\n", + " ghg_new_kg, GWP_characterization, drop_not_bridged_index=False\n", ")\n", "GWP_result_with_comp" ] @@ -457,10 +1226,99 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 71, "id": "00156b6d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
regionRegion1Region2Region3
stressor
Wheat3101
Maize5203
Rice01234
Pasture12349
Forest extensive322711
Forest intensive431724
\n", + "
" + ], + "text/plain": [ + "region Region1 Region2 Region3\n", + "stressor \n", + "Wheat 3 10 1\n", + "Maize 5 20 3\n", + "Rice 0 12 34\n", + "Pasture 12 34 9\n", + "Forest extensive 32 27 11\n", + "Forest intensive 43 17 24" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "land_use_result = pd.DataFrame(\n", " columns=[\"Region1\", \"Region2\", \"Region3\"],\n", @@ -500,11 +1358,142 @@ ] }, { - "cell_type": "markdown", - "id": "1f1b48ff", + "cell_type": "code", + "execution_count": 72, + "id": "e3ed4128", "metadata": { "lines_to_next_cell": 2 }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
stressorBioDiv__stressorregionfactor
0Wheat|MaizeBioImpactRegion13
1WheatBioImpactRegion[2,3]4
2MaizeBioImpactRegion[2,3]7
3RiceBioImpactRegion112
4RiceBioImpactRegion212
5RiceBioImpactRegion312
6PastureBioImpactRegion[1,2,3]12
7Forest.*BioImpactRegion12
8Forest.*BioImpactRegion23
9Forest ext.*BioImpactRegion31
10Forest int.*BioImpactRegion33
\n", + "
" + ], + "text/plain": [ + " stressor BioDiv__stressor region factor\n", + "0 Wheat|Maize BioImpact Region1 3\n", + "1 Wheat BioImpact Region[2,3] 4\n", + "2 Maize BioImpact Region[2,3] 7\n", + "3 Rice BioImpact Region1 12\n", + "4 Rice BioImpact Region2 12\n", + "5 Rice BioImpact Region3 12\n", + "6 Pasture BioImpact Region[1,2,3] 12\n", + "7 Forest.* BioImpact Region1 2\n", + "8 Forest.* BioImpact Region2 3\n", + "9 Forest ext.* BioImpact Region3 1\n", + "10 Forest int.* BioImpact Region3 3" + ] + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "landuse_characterization = pd.DataFrame(\n", " columns=[\"stressor\", \"BioDiv__stressor\", \"region\", \"factor\"],\n", @@ -559,10 +1548,64 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 73, "id": "8f2e3a37", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
regionRegion1Region2Region3
BioDiv
BioImpact318864624
\n", + "
" + ], + "text/plain": [ + "region Region1 Region2 Region3\n", + "BioDiv \n", + "BioImpact 318 864 624" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "biodiv_result = pymrio.convert(land_use_result, landuse_characterization)\n", "biodiv_result" @@ -583,12 +1626,106 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "9d435a0a", + "execution_count": 74, + "id": "157dd099", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "stressor region \n", + "Wheat Region1 3\n", + " Region2 10\n", + " Region3 1\n", + "Maize Region1 5\n", + " Region2 20\n", + " Region3 3\n", + "Rice Region1 0\n", + " Region2 12\n", + " Region3 34\n", + "Pasture Region1 12\n", + " Region2 34\n", + " Region3 9\n", + "Forest extensive Region1 32\n", + " Region2 27\n", + " Region3 11\n", + "Forest intensive Region1 43\n", + " Region2 17\n", + " Region3 24\n", + "dtype: int64" + ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "land_use_result_stacked = land_use_result.stack(level=\"region\")\n", + "land_use_result_stacked" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "id": "9d435a0a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
regionRegion1Region2Region3
BioDiv
BioImpact318864624
\n", + "
" + ], + "text/plain": [ + "region Region1 Region2 Region3\n", + "BioDiv \n", + "BioImpact 318 864 624" + ] + }, + "execution_count": 75, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ "biodiv_result_stacked = pymrio.convert(\n", " land_use_result_stacked, landuse_characterization, drop_not_bridged_index=False\n", ")\n", @@ -616,7 +1753,7 @@ "metadata": {}, "source": [ "Irrespectively of the table or the mrio system, the convert function always follows the same pattern.\n", - "It requires a bridge table, which contains the mapping of the indicies of the source data to the indicies of the target data.\n", + "It requires a bridge table, which contains the mapping of the indices of the source data to the indices of the target data.\n", "This bridge table has to follow a specific format, depending on the table to be converted." ] } @@ -637,7 +1774,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.19" + "version": "3.12.0" } }, "nbformat": 4, diff --git a/doc/source/notebooks/convert.py b/doc/source/notebooks/convert.py index a14daeec..f499c6a7 100644 --- a/doc/source/notebooks/convert.py +++ b/doc/source/notebooks/convert.py @@ -5,7 +5,7 @@ # extension: .py # format_name: percent # format_version: '1.3' -# jupytext_version: 1.16.4 +# jupytext_version: 1.15.2 # kernelspec: # display_name: Python 3 (ipykernel) # language: python @@ -23,7 +23,7 @@ # - renaming the index names of results/extensions # - adjusting the numerical values of the data, # e.g. for unit conversion or characterisation -# - finding and extracting data based on indicies across a table or an mrio(-extension). +# - finding and extracting data based on indices across a table or an mrio(-extension). # This can be system based on name and potentially constrained by sector/region # or any other specification. # - Aggregation/Summation of satellite accounts @@ -166,25 +166,26 @@ # %% [markdown] # We can get the data in kg by -# -# -# ghg_map_to_kg = pd.DataFrame( -# columns=[ -# "stressor", -# "compartment", -# "chem_stressor__stressor", -# "compartment__compartment", -# "factor", -# ], -# data=[ -# ["Carbon Dioxide", "[A|a]ir", "CO2", "Air", 1000], -# ["Methane", "[A|a]ir", "CH4", "Air", 1000], -# ], -# ) -# ghg_map_to_kg -# -# ghg_new_kg = pymrio.convert(ghg_result_ton, ghg_map_to_kg) -# ghg_new_kg + +# %% +ghg_map_to_kg = pd.DataFrame( + columns=[ + "stressor", + "compartment", + "chem_stressor__stressor", + "compartment__compartment", + "factor", + ], + data=[ + ["Carbon Dioxide", "[A|a]ir", "CO2", "Air", 1000], + ["Methane", "[A|a]ir", "CH4", "Air", 1000], + ], +) +ghg_map_to_kg + +# %% +ghg_new_kg = pymrio.convert(ghg_result_ton, ghg_map_to_kg) +ghg_new_kg # %% [markdown] # In case of unit conversion of pymrio satellite accounts, @@ -248,7 +249,7 @@ # %% GWP_result_with_comp = pymrio.convert( - ghg_new_kg, GWP_characterization, drop_not_bridged=False + ghg_new_kg, GWP_characterization, drop_not_bridged_index=False ) GWP_result_with_comp @@ -299,24 +300,24 @@ # any bridge column mapping it to a new name. Thus, the "region" can either be in the index # or in the columns of the source data - in the given case it is in the columns. -# %% [markdown] -# landuse_characterization = pd.DataFrame( -# columns=["stressor", "BioDiv__stressor", "region", "factor"], -# data=[ -# ["Wheat|Maize", "BioImpact", "Region1", 3], -# ["Wheat", "BioImpact", "Region[2,3]", 4], -# ["Maize", "BioImpact", "Region[2,3]", 7], -# ["Rice", "BioImpact", "Region1", 12], -# ["Rice", "BioImpact", "Region2", 12], -# ["Rice", "BioImpact", "Region3", 12], -# ["Pasture", "BioImpact", "Region[1,2,3]", 12], -# ["Forest.*", "BioImpact", "Region1", 2], -# ["Forest.*", "BioImpact", "Region2", 3], -# ["Forest ext.*", "BioImpact", "Region3", 1], -# ["Forest int.*", "BioImpact", "Region3", 3], -# ], -# ) -# landuse_characterization +# %% +landuse_characterization = pd.DataFrame( + columns=["stressor", "BioDiv__stressor", "region", "factor"], + data=[ + ["Wheat|Maize", "BioImpact", "Region1", 3], + ["Wheat", "BioImpact", "Region[2,3]", 4], + ["Maize", "BioImpact", "Region[2,3]", 7], + ["Rice", "BioImpact", "Region1", 12], + ["Rice", "BioImpact", "Region2", 12], + ["Rice", "BioImpact", "Region3", 12], + ["Pasture", "BioImpact", "Region[1,2,3]", 12], + ["Forest.*", "BioImpact", "Region1", 2], + ["Forest.*", "BioImpact", "Region2", 3], + ["Forest ext.*", "BioImpact", "Region3", 1], + ["Forest int.*", "BioImpact", "Region3", 3], + ], +) +landuse_characterization # %% [markdown] @@ -354,6 +355,9 @@ # %% land_use_result_stacked = land_use_result.stack(level="region") +land_use_result_stacked + +# %% biodiv_result_stacked = pymrio.convert( land_use_result_stacked, landuse_characterization, drop_not_bridged_index=False ) @@ -370,5 +374,5 @@ # %% [markdown] # Irrespectively of the table or the mrio system, the convert function always follows the same pattern. -# It requires a bridge table, which contains the mapping of the indicies of the source data to the indicies of the target data. +# It requires a bridge table, which contains the mapping of the indices of the source data to the indices of the target data. # This bridge table has to follow a specific format, depending on the table to be converted. diff --git a/doc/source/notebooks/extract_data.ipynb b/doc/source/notebooks/extract_data.ipynb index 93512e89..609243c8 100644 --- a/doc/source/notebooks/extract_data.ipynb +++ b/doc/source/notebooks/extract_data.ipynb @@ -1242,7 +1242,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.19" + "version": "3.12.0" } }, "nbformat": 4, diff --git a/doc/source/notebooks/extract_data.py b/doc/source/notebooks/extract_data.py index 397557fc..c6608581 100644 --- a/doc/source/notebooks/extract_data.py +++ b/doc/source/notebooks/extract_data.py @@ -5,7 +5,7 @@ # extension: .py # format_name: percent # format_version: '1.3' -# jupytext_version: 1.16.4 +# jupytext_version: 1.15.2 # kernelspec: # display_name: Python 3 (ipykernel) # language: python diff --git a/pymrio/core/mriosystem.py b/pymrio/core/mriosystem.py index ee7189c6..e2128948 100644 --- a/pymrio/core/mriosystem.py +++ b/pymrio/core/mriosystem.py @@ -3246,6 +3246,11 @@ def remove_extension(self, ext): return self + def extension_convert(): + raise NotImplementedError("TODO: extension convert as method") + # Can pass extensions a string of extensions in the IO System, get them as extensions, + # put it as new extension in pymrio, with a switch to remove converted extensions + def extension_convert( *extensions, @@ -3271,7 +3276,7 @@ def extension_convert( extensions : list of extensions Extensions to convert. All extensions passed must - have an index structure (index names) ase described in df_map. + have an index structure (index names) as described in df_map. df_map : pd.DataFrame The DataFrame with the mapping of the old to the new classification. @@ -3376,6 +3381,12 @@ def extension_convert( gather = [] for ext in extensions: + if ext.name not in df_map[extension_col_name].unique(): + warnings.warn( + f"Extension {ext.name} not found in df_map. Skipping extension." + ) + # TODO: later go to logging + continue gather.append( ext.convert( df_map=df_map[df_map[extension_col_name] == ext.name], diff --git a/tests/test_core.py b/tests/test_core.py index a4eebffe..de69fe09 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -679,13 +679,7 @@ def test_extension_convert_function(fix_testmrio): ], ) - # CONT: Something wrong with setting the index to a multiindex when compartment is passed - # Next steps: run this in interprester (with autoreload) and set breakpoint in extension_convert - # Seems to be in gather, but after that in the aggregation or Concatenate we get a problem - - # x = tt_pre.extension_convert(df_map, new_extension_name="emissions_new_pre_calc") - - # Doing two time the same extension + # CASE 1: Doing two time the same extension, must double the results ext_double = pymrio.extension_convert( tt_pre.emissions, tt_pre.emissions, @@ -693,58 +687,137 @@ def test_extension_convert_function(fix_testmrio): new_extension_name="emissions_new_pre_calc", ) - # TODO: check return type and update test - # assert ext_double.unit.loc["total_sum_tonnes", "unit"] == "t" - # assert ext_double.unit.loc["water_emissions", "unit"] == "g" - - # pdt.assert_series_equal( - # ext_double.F.loc["total_sum_tonnes"], - # tt_pre.emissions.F.sum(axis=0) * 1e-3 * 2, - # check_names=False, - # ) - # - # pdt.assert_series_equal( - # ext_double.F.loc["water_emissions"], - # tt_pre.emissions.F.loc["emission_type2", :].iloc[0, :] * 1000 * 2, - # check_names=False, - # ) - # - # tt_pre.emission_new = ext_double - # - # df_map_add_across = pd.DataFrame( - # columns=[ - # "extension", - # "stressor", - # "compartment", - # "total__stressor", - # "factor", - # "unit_orig", - # "unit_new", - # ], - # data=[ - # ["Emissions", "emission_type2", ".*", "water", 1, "kg", "kg"], - # [ - # "emission_new_pre_calc", - # "water_emissions", - # ".*", - # "water", - # 1e-3, - # "g", - # "kg", - # ], - # ], - # ) - # - # ext_across = pymrio.extension_convert( - # tt_pre.emissions, - # ext_double, - # df_map=df_map_add_across, - # new_extension_name="add_across", - # ) - - # CONT: - # make a second extensions are check running over 2 - # cleanup docstrings and write docs + assert ext_double.unit.loc["total_sum_tonnes", "unit"].values == ["t"] + assert ext_double.unit.loc["water_emissions", "unit"].values == ["g"] + + pdt.assert_series_equal( + ext_double.F.loc[("total_sum_tonnes", "total")], + tt_pre.emissions.F.sum(axis=0) * 1e-3 * 2, + check_names=False, + ) + + pdt.assert_series_equal( + ext_double.F.loc[("water_emissions", "water")], + tt_pre.emissions.F.loc["emission_type2", :].iloc[0, :] * 1000 * 2, + check_names=False, + ) + + # CASE 2: convert across 2 extensions + tt_pre.emission_new = ext_double + + df_map_add_across = pd.DataFrame( + columns=[ + "extension", + "stressor", + "compartment", + "total__stressor", + "factor", + "unit_orig", + "unit_new", + ], + data=[ + ["Emissions", "emission_type2", ".*", "water", 1, "kg", "kg"], + [ + "emissions_new_pre_calc", + "water_emissions", + ".*", + "water", + 1e-3, + "g", + "kg", + ], + ], + ) + + df_map_add_across_wrong_name = df_map_add_across.copy() + + df_map_add_across_wrong_name.loc[:, "extension"] = df_map_add_across_wrong_name.extension.str.replace("emissions_new_pre_calc", "foo") + + ext_across_correct = pymrio.extension_convert( + tt_pre.emissions, + ext_double, + df_map=df_map_add_across, + new_extension_name="add_across", + ) + + ext_across_wrong = pymrio.extension_convert( + tt_pre.emissions, + ext_double, + df_map=df_map_add_across_wrong_name, + new_extension_name="add_across", + ) + + expected_df_correct_F = tt_pre.emissions.F.loc["emission_type2", :].iloc[0, :] + ext_double.F.loc[("water_emissions", "water")] * 1e-3 + expected_df_wrong_F = tt_pre.emissions.F.loc["emission_type2", :].iloc[0, :] + expected_df_correct_F_Y = tt_pre.emissions.F_Y.loc["emission_type2", :].iloc[0, :] + ext_double.F_Y.loc[("water_emissions", "water")] * 1e-3 + expected_df_wrong_F_Y = tt_pre.emissions.F_Y.loc["emission_type2", :].iloc[0, :] + + pdt.assert_series_equal( + ext_across_correct.F.loc[("water",)], + expected_df_correct_F, + check_names=False, + ) + pdt.assert_series_equal( + ext_across_correct.F_Y.loc[("water",)], + expected_df_correct_F_Y, + check_names=False, + ) + pdt.assert_series_equal( + ext_across_wrong.F.loc[("water",)], + expected_df_wrong_F, + check_names=False, + ) + pdt.assert_series_equal( + ext_across_wrong.F_Y.loc[("water",)], + expected_df_wrong_F_Y, + check_names=False, + ) + + # CASE 3: Test for full calculated system + tt_post = fix_testmrio.testmrio.copy().calc_all() + + # when one extensions has less calculated parts then the other, these should + # silently set to None + ext_test_missing = pymrio.extension_convert( + tt_post.emissions, + ext_double, + df_map=df_map_add_across, + new_extension_name="add_across", + ) + + assert ext_test_missing.S is None + assert ext_test_missing.D_cba is None + + tt_post.add_across = ext_double + tt_post.calc_all() + + ext_test_all = pymrio.extension_convert( + tt_post.emissions, + tt_post.add_across, + df_map=df_map_add_across, + new_extension_name="add_across", + ) + + expected_df_D_cba = tt_post.emissions.D_cba.loc["emission_type2", :].iloc[0, :] + tt_post.add_across.D_cba.loc[("water_emissions", "water")] * 1e-3 + expected_df_S = tt_post.emissions.S.loc["emission_type2", :].iloc[0, :] + tt_post.add_across.S.loc[("water_emissions", "water")] * 1e-3 + expected_df_M = tt_post.emissions.M.loc["emission_type2", :].iloc[0, :] + tt_post.add_across.M.loc[("water_emissions", "water")] * 1e-3 + + pdt.assert_series_equal( + ext_test_all.D_cba.iloc[0], + expected_df_D_cba, + check_names=False, + ) + pdt.assert_series_equal( + ext_test_all.S.iloc[0], + expected_df_S, + check_names=False, + ) + pdt.assert_series_equal( + ext_test_all.M.iloc[0], + expected_df_M, + check_names=False, + ) + def test_extension_convert_test_unit_fail(fix_testmrio):