From 84d56ecf887f3b5279c18bf33654b14a368d6288 Mon Sep 17 00:00:00 2001 From: Konstantin Stadler <konstantin.stadler@ntnu.no> Date: Fri, 5 Apr 2024 18:20:04 +0200 Subject: [PATCH] continued notebook extraction description --- doc/source/notebooks/extract_data.ipynb | 1017 ++++++++++++++++++++++- doc/source/notebooks/extract_data.py | 98 ++- pymrio/core/mriosystem.py | 2 +- 3 files changed, 1089 insertions(+), 28 deletions(-) diff --git a/doc/source/notebooks/extract_data.ipynb b/doc/source/notebooks/extract_data.ipynb index faee60e7..59978606 100644 --- a/doc/source/notebooks/extract_data.ipynb +++ b/doc/source/notebooks/extract_data.ipynb @@ -13,28 +13,12 @@ "id": "bcb8ce71-6b45-4ccc-81ad-866bc0d6c6f8", "metadata": {}, "source": [ - "This notebook shows how to extract specific data from the pymrio object for further processing in Python. For exporting/saving the data to another file format [see the Notebook on loading/saving/exporting](./load_save_export.ipynb)." + "This notebook shows how to extract specific data from the pymrio object for further processing in Python. For exporting/saving the data to another file format see [the notebook on saving/loading/exporting data.](./load_save_export.ipynb)" ] }, { "cell_type": "code", - "execution_count": null, - "id": "564fd5e1-8a3f-449a-9759-d36aa83473ce", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "04097807-b375-42eb-b5c8-d0c457502a70", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 1, + "execution_count": 39, "id": "4f298de3-3641-44bb-88bb-c428d9cf5693", "metadata": { "tags": [] @@ -46,23 +30,1012 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 40, "id": "a38719d5-011c-4dff-969e-f915a205fd44", "metadata": { "tags": [] }, "outputs": [], "source": [ - "mrio = pymrio.load_test()" + "mrio = pymrio.load_test().calc_all()\n", + "\n", + "### Basic pandas indexing of pymrio tables" + ] + }, + { + "cell_type": "markdown", + "id": "f50f0173", + "metadata": {}, + "source": [ + "Since pymrio is built on top of pandas, we can use the pandas functions to extract data from the pymrio object. For example, to access the part of the A matrix from the region 2 we can use:" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 41, "id": "f6aadba4-ef17-40d2-aaec-845e274026b7", "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th>sector</th>\n", + " <th>food</th>\n", + " <th>mining</th>\n", + " <th>manufactoring</th>\n", + " <th>electricity</th>\n", + " <th>construction</th>\n", + " <th>trade</th>\n", + " <th>transport</th>\n", + " <th>other</th>\n", + " </tr>\n", + " <tr>\n", + " <th>sector</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>food</th>\n", + " <td>0.000486</td>\n", + " <td>0.000638</td>\n", + " <td>0.000194</td>\n", + " <td>0.000005</td>\n", + " <td>0.000019</td>\n", + " <td>0.000092</td>\n", + " <td>0.000027</td>\n", + " <td>0.000026</td>\n", + " </tr>\n", + " <tr>\n", + " <th>mining</th>\n", + " <td>0.000006</td>\n", + " <td>0.050904</td>\n", + " <td>0.000047</td>\n", + " <td>0.000218</td>\n", + " <td>0.000203</td>\n", + " <td>0.000011</td>\n", + " <td>0.000010</td>\n", + " <td>0.000013</td>\n", + " </tr>\n", + " <tr>\n", + " <th>manufactoring</th>\n", + " <td>0.000488</td>\n", + " <td>0.069862</td>\n", + " <td>0.001529</td>\n", + " <td>0.000196</td>\n", + " <td>0.005915</td>\n", + " <td>0.001191</td>\n", + " <td>0.002294</td>\n", + " <td>0.000844</td>\n", + " </tr>\n", + " <tr>\n", + " <th>electricity</th>\n", + " <td>0.000089</td>\n", + " <td>0.050427</td>\n", + " <td>0.000137</td>\n", + " <td>0.000604</td>\n", + " <td>0.000146</td>\n", + " <td>0.000177</td>\n", + " <td>0.000280</td>\n", + " <td>0.000248</td>\n", + " </tr>\n", + " <tr>\n", + " <th>construction</th>\n", + " <td>0.000025</td>\n", + " <td>0.007375</td>\n", + " <td>0.000032</td>\n", + " <td>0.000109</td>\n", + " <td>0.004615</td>\n", + " <td>0.000088</td>\n", + " <td>0.000515</td>\n", + " <td>0.000422</td>\n", + " </tr>\n", + " <tr>\n", + " <th>trade</th>\n", + " <td>0.000251</td>\n", + " <td>0.028770</td>\n", + " <td>0.000531</td>\n", + " <td>0.000095</td>\n", + " <td>0.001640</td>\n", + " <td>0.000772</td>\n", + " <td>0.001372</td>\n", + " <td>0.000487</td>\n", + " </tr>\n", + " <tr>\n", + " <th>transport</th>\n", + " <td>0.000071</td>\n", + " <td>0.031839</td>\n", + " <td>0.000212</td>\n", + " <td>0.000069</td>\n", + " <td>0.000714</td>\n", + " <td>0.000579</td>\n", + " <td>0.004747</td>\n", + " <td>0.000494</td>\n", + " </tr>\n", + " <tr>\n", + " <th>other</th>\n", + " <td>0.000171</td>\n", + " <td>0.064935</td>\n", + " <td>0.000595</td>\n", + " <td>0.000291</td>\n", + " <td>0.002844</td>\n", + " <td>0.001897</td>\n", + " <td>0.003800</td>\n", + " <td>0.003936</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + "sector food mining manufactoring electricity construction \\\n", + "sector \n", + "food 0.000486 0.000638 0.000194 0.000005 0.000019 \n", + "mining 0.000006 0.050904 0.000047 0.000218 0.000203 \n", + "manufactoring 0.000488 0.069862 0.001529 0.000196 0.005915 \n", + "electricity 0.000089 0.050427 0.000137 0.000604 0.000146 \n", + "construction 0.000025 0.007375 0.000032 0.000109 0.004615 \n", + "trade 0.000251 0.028770 0.000531 0.000095 0.001640 \n", + "transport 0.000071 0.031839 0.000212 0.000069 0.000714 \n", + "other 0.000171 0.064935 0.000595 0.000291 0.002844 \n", + "\n", + "sector trade transport other \n", + "sector \n", + "food 0.000092 0.000027 0.000026 \n", + "mining 0.000011 0.000010 0.000013 \n", + "manufactoring 0.001191 0.002294 0.000844 \n", + "electricity 0.000177 0.000280 0.000248 \n", + "construction 0.000088 0.000515 0.000422 \n", + "trade 0.000772 0.001372 0.000487 \n", + "transport 0.000579 0.004747 0.000494 \n", + "other 0.001897 0.003800 0.003936 " + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "A_reg2 = mrio.A.loc[\"reg2\", \"reg2\"]\n", + "A_reg2" + ] + }, + { + "cell_type": "markdown", + "id": "1a9e6bcd", + "metadata": { + "lines_to_next_cell": 2 + }, + "source": [ + "Most tables are indexed via a multiindex, in case of the A matrix the index is a tuple of the region and the sector.\n", + "To access all technical coefficients (column) data for mining from all regions we can use:" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "bdc4c511", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead tr th {\n", + " text-align: left;\n", + " }\n", + "\n", + " .dataframe thead tr:last-of-type th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr>\n", + " <th></th>\n", + " <th>region</th>\n", + " <th>reg1</th>\n", + " <th>reg2</th>\n", + " <th>reg3</th>\n", + " <th>reg4</th>\n", + " <th>reg5</th>\n", + " <th>reg6</th>\n", + " </tr>\n", + " <tr>\n", + " <th></th>\n", + " <th>sector</th>\n", + " <th>mining</th>\n", + " <th>mining</th>\n", + " <th>mining</th>\n", + " <th>mining</th>\n", + " <th>mining</th>\n", + " <th>mining</th>\n", + " </tr>\n", + " <tr>\n", + " <th>region</th>\n", + " <th>sector</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th rowspan=\"8\" valign=\"top\">reg1</th>\n", + " <th>food</th>\n", + " <td>0.001179</td>\n", + " <td>0.000010</td>\n", + " <td>3.652734e-09</td>\n", + " <td>1.626677e-06</td>\n", + " <td>3.767567e-07</td>\n", + " <td>1.481621e-05</td>\n", + " </tr>\n", + " <tr>\n", + " <th>mining</th>\n", + " <td>0.048022</td>\n", + " <td>0.000268</td>\n", + " <td>7.486558e-07</td>\n", + " <td>6.899387e-05</td>\n", + " <td>3.628651e-05</td>\n", + " <td>8.801658e-05</td>\n", + " </tr>\n", + " <tr>\n", + " <th>manufactoring</th>\n", + " <td>0.124366</td>\n", + " <td>0.017417</td>\n", + " <td>2.799765e-05</td>\n", + " <td>9.161688e-03</td>\n", + " <td>4.792741e-03</td>\n", + " <td>2.087445e-03</td>\n", + " </tr>\n", + " <tr>\n", + " <th>electricity</th>\n", + " <td>0.037991</td>\n", + " <td>0.001099</td>\n", + " <td>2.170169e-07</td>\n", + " <td>1.150382e-08</td>\n", + " <td>1.444660e-05</td>\n", + " <td>8.892062e-06</td>\n", + " </tr>\n", + " <tr>\n", + " <th>construction</th>\n", + " <td>0.017324</td>\n", + " <td>0.000022</td>\n", + " <td>8.884210e-08</td>\n", + " <td>1.331990e-07</td>\n", + " <td>1.129186e-05</td>\n", + " <td>7.641830e-07</td>\n", + " </tr>\n", + " <tr>\n", + " <th>trade</th>\n", + " <td>0.035429</td>\n", + " <td>0.001836</td>\n", + " <td>6.170323e-07</td>\n", + " <td>3.835340e-04</td>\n", + " <td>1.071471e-03</td>\n", + " <td>5.575273e-04</td>\n", + " </tr>\n", + " <tr>\n", + " <th>transport</th>\n", + " <td>0.060324</td>\n", + " <td>0.001544</td>\n", + " <td>4.060594e-06</td>\n", + " <td>5.820972e-04</td>\n", + " <td>1.278089e-03</td>\n", + " <td>6.555200e-03</td>\n", + " </tr>\n", + " <tr>\n", + " <th>other</th>\n", + " <td>0.092059</td>\n", + " <td>0.005024</td>\n", + " <td>1.788858e-05</td>\n", + " <td>3.664017e-04</td>\n", + " <td>7.664473e-04</td>\n", + " <td>3.287131e-04</td>\n", + " </tr>\n", + " <tr>\n", + " <th rowspan=\"8\" valign=\"top\">reg2</th>\n", + " <th>food</th>\n", + " <td>0.000084</td>\n", + " <td>0.000638</td>\n", + " <td>3.772203e-09</td>\n", + " <td>2.165695e-07</td>\n", + " <td>9.237430e-08</td>\n", + " <td>3.105702e-05</td>\n", + " </tr>\n", + " <tr>\n", + " <th>mining</th>\n", + " <td>0.000523</td>\n", + " <td>0.050904</td>\n", + " <td>5.883755e-06</td>\n", + " <td>5.472492e-05</td>\n", + " <td>2.212937e-05</td>\n", + " <td>3.108304e-05</td>\n", + " </tr>\n", + " <tr>\n", + " <th>manufactoring</th>\n", + " <td>0.014563</td>\n", + " <td>0.069862</td>\n", + " <td>1.223889e-05</td>\n", + " <td>4.170428e-03</td>\n", + " <td>2.164157e-03</td>\n", + " <td>1.355740e-03</td>\n", + " </tr>\n", + " <tr>\n", + " <th>electricity</th>\n", + " <td>0.000430</td>\n", + " <td>0.050427</td>\n", + " <td>1.143580e-08</td>\n", + " <td>2.061508e-08</td>\n", + " <td>3.266417e-06</td>\n", + " <td>3.535380e-07</td>\n", + " </tr>\n", + " <tr>\n", + " <th>construction</th>\n", + " <td>0.000050</td>\n", + " <td>0.007375</td>\n", + " <td>1.666888e-07</td>\n", + " <td>1.910407e-07</td>\n", + " <td>1.702833e-05</td>\n", + " <td>1.421644e-06</td>\n", + " </tr>\n", + " <tr>\n", + " <th>trade</th>\n", + " <td>0.000769</td>\n", + " <td>0.028770</td>\n", + " <td>1.092766e-06</td>\n", + " <td>1.792846e-04</td>\n", + " <td>1.000066e-03</td>\n", + " <td>3.103879e-04</td>\n", + " </tr>\n", + " <tr>\n", + " <th>transport</th>\n", + " <td>0.003095</td>\n", + " <td>0.031839</td>\n", + " <td>1.993361e-06</td>\n", + " <td>2.990600e-05</td>\n", + " <td>1.294761e-03</td>\n", + " <td>1.368746e-03</td>\n", + " </tr>\n", + " <tr>\n", + " <th>other</th>\n", + " <td>0.001584</td>\n", + " <td>0.064935</td>\n", + " <td>2.418065e-05</td>\n", + " <td>2.702272e-04</td>\n", + " <td>4.571674e-04</td>\n", + " <td>3.392091e-04</td>\n", + " </tr>\n", + " <tr>\n", + " <th rowspan=\"8\" valign=\"top\">reg3</th>\n", + " <th>food</th>\n", + " <td>0.000037</td>\n", + " <td>0.000005</td>\n", + " <td>1.309056e-05</td>\n", + " <td>6.378397e-06</td>\n", + " <td>4.673894e-07</td>\n", + " <td>3.461944e-06</td>\n", + " </tr>\n", + " <tr>\n", + " <th>mining</th>\n", + " <td>0.000396</td>\n", + " <td>0.000151</td>\n", + " <td>4.337922e-04</td>\n", + " <td>3.495268e-04</td>\n", + " <td>1.564949e-04</td>\n", + " <td>4.882839e-05</td>\n", + " </tr>\n", + " <tr>\n", + " <th>manufactoring</th>\n", + " <td>0.007782</td>\n", + " <td>0.003842</td>\n", + " <td>6.572771e-04</td>\n", + " <td>5.043959e-03</td>\n", + " <td>3.621657e-03</td>\n", + " <td>1.629835e-03</td>\n", + " </tr>\n", + " <tr>\n", + " <th>electricity</th>\n", + " <td>0.000045</td>\n", + " <td>0.000107</td>\n", + " <td>2.495370e-04</td>\n", + " <td>1.401577e-04</td>\n", + " <td>5.464145e-07</td>\n", + " <td>1.165971e-05</td>\n", + " </tr>\n", + " <tr>\n", + " <th>construction</th>\n", + " <td>0.000006</td>\n", + " <td>0.000003</td>\n", + " <td>2.786913e-04</td>\n", + " <td>2.624508e-07</td>\n", + " <td>6.239448e-06</td>\n", + " <td>1.237150e-08</td>\n", + " </tr>\n", + " <tr>\n", + " <th>trade</th>\n", + " <td>0.000150</td>\n", + " <td>0.000060</td>\n", + " <td>2.485529e-04</td>\n", + " <td>4.141811e-05</td>\n", + " <td>1.517855e-03</td>\n", + " <td>2.062017e-03</td>\n", + " </tr>\n", + " <tr>\n", + " <th>transport</th>\n", + " <td>0.001768</td>\n", + " <td>0.000618</td>\n", + " <td>2.091958e-04</td>\n", + " <td>4.965269e-04</td>\n", + " <td>3.777028e-04</td>\n", + " <td>2.449723e-03</td>\n", + " </tr>\n", + " <tr>\n", + " <th>other</th>\n", + " <td>0.002269</td>\n", + " <td>0.007583</td>\n", + " <td>1.861799e-03</td>\n", + " <td>2.275626e-03</td>\n", + " <td>2.430304e-03</td>\n", + " <td>1.329935e-03</td>\n", + " </tr>\n", + " <tr>\n", + " <th rowspan=\"8\" valign=\"top\">reg4</th>\n", + " <th>food</th>\n", + " <td>0.000171</td>\n", + " <td>0.000578</td>\n", + " <td>6.340547e-10</td>\n", + " <td>1.413307e-03</td>\n", + " <td>1.378513e-07</td>\n", + " <td>2.636256e-05</td>\n", + " </tr>\n", + " <tr>\n", + " <th>mining</th>\n", + " <td>0.000311</td>\n", + " <td>0.002467</td>\n", + " <td>2.719214e-05</td>\n", + " <td>6.138652e-02</td>\n", + " <td>1.552800e-04</td>\n", + " <td>2.699482e-04</td>\n", + " </tr>\n", + " <tr>\n", + " <th>manufactoring</th>\n", + " <td>0.005207</td>\n", + " <td>0.003579</td>\n", + " <td>4.405268e-05</td>\n", + " <td>1.493263e-01</td>\n", + " <td>2.139451e-03</td>\n", + " <td>2.578051e-03</td>\n", + " </tr>\n", + " <tr>\n", + " <th>electricity</th>\n", + " <td>0.000081</td>\n", + " <td>0.000013</td>\n", + " <td>1.295279e-06</td>\n", + " <td>4.967797e-02</td>\n", + " <td>1.543713e-08</td>\n", + " <td>1.669269e-05</td>\n", + " </tr>\n", + " <tr>\n", + " <th>construction</th>\n", + " <td>0.000021</td>\n", + " <td>0.000008</td>\n", + " <td>6.055807e-07</td>\n", + " <td>2.337226e-03</td>\n", + " <td>7.889622e-06</td>\n", + " <td>2.033545e-06</td>\n", + " </tr>\n", + " <tr>\n", + " <th>trade</th>\n", + " <td>0.000005</td>\n", + " <td>0.000016</td>\n", + " <td>1.595023e-07</td>\n", + " <td>2.826377e-02</td>\n", + " <td>9.701854e-05</td>\n", + " <td>7.025049e-04</td>\n", + " </tr>\n", + " <tr>\n", + " <th>transport</th>\n", + " <td>0.000356</td>\n", + " <td>0.000048</td>\n", + " <td>9.595997e-07</td>\n", + " <td>1.586496e-02</td>\n", + " <td>3.012550e-04</td>\n", + " <td>1.991747e-03</td>\n", + " </tr>\n", + " <tr>\n", + " <th>other</th>\n", + " <td>0.000873</td>\n", + " <td>0.000958</td>\n", + " <td>6.314447e-06</td>\n", + " <td>4.648443e-02</td>\n", + " <td>1.117863e-04</td>\n", + " <td>6.261181e-04</td>\n", + " </tr>\n", + " <tr>\n", + " <th rowspan=\"8\" valign=\"top\">reg5</th>\n", + " <th>food</th>\n", + " <td>0.000062</td>\n", + " <td>0.000028</td>\n", + " <td>1.108149e-07</td>\n", + " <td>1.524618e-05</td>\n", + " <td>1.996382e-04</td>\n", + " <td>2.327498e-05</td>\n", + " </tr>\n", + " <tr>\n", + " <th>mining</th>\n", + " <td>0.003143</td>\n", + " <td>0.001784</td>\n", + " <td>2.499451e-06</td>\n", + " <td>1.013051e-03</td>\n", + " <td>5.149154e-02</td>\n", + " <td>9.996927e-04</td>\n", + " </tr>\n", + " <tr>\n", + " <th>manufactoring</th>\n", + " <td>0.004680</td>\n", + " <td>0.002365</td>\n", + " <td>1.612636e-05</td>\n", + " <td>5.466745e-04</td>\n", + " <td>8.061924e-02</td>\n", + " <td>1.405121e-03</td>\n", + " </tr>\n", + " <tr>\n", + " <th>electricity</th>\n", + " <td>0.000956</td>\n", + " <td>0.000018</td>\n", + " <td>5.676661e-08</td>\n", + " <td>2.164907e-06</td>\n", + " <td>2.545719e-02</td>\n", + " <td>1.096431e-05</td>\n", + " </tr>\n", + " <tr>\n", + " <th>construction</th>\n", + " <td>0.000071</td>\n", + " <td>0.000034</td>\n", + " <td>5.621276e-07</td>\n", + " <td>3.027990e-08</td>\n", + " <td>1.765549e-02</td>\n", + " <td>3.883278e-06</td>\n", + " </tr>\n", + " <tr>\n", + " <th>trade</th>\n", + " <td>0.000149</td>\n", + " <td>0.000639</td>\n", + " <td>1.895713e-06</td>\n", + " <td>5.016841e-05</td>\n", + " <td>4.981458e-02</td>\n", + " <td>1.014394e-03</td>\n", + " </tr>\n", + " <tr>\n", + " <th>transport</th>\n", + " <td>0.001154</td>\n", + " <td>0.000820</td>\n", + " <td>2.183364e-06</td>\n", + " <td>2.255475e-05</td>\n", + " <td>3.801613e-02</td>\n", + " <td>2.591267e-03</td>\n", + " </tr>\n", + " <tr>\n", + " <th>other</th>\n", + " <td>0.000347</td>\n", + " <td>0.000512</td>\n", + " <td>4.026853e-06</td>\n", + " <td>9.871058e-05</td>\n", + " <td>6.984674e-02</td>\n", + " <td>5.676656e-04</td>\n", + " </tr>\n", + " <tr>\n", + " <th rowspan=\"8\" valign=\"top\">reg6</th>\n", + " <th>food</th>\n", + " <td>0.000143</td>\n", + " <td>0.000093</td>\n", + " <td>1.099390e-07</td>\n", + " <td>6.355405e-05</td>\n", + " <td>1.119791e-06</td>\n", + " <td>6.959613e-04</td>\n", + " </tr>\n", + " <tr>\n", + " <th>mining</th>\n", + " <td>0.007456</td>\n", + " <td>0.005567</td>\n", + " <td>3.220657e-05</td>\n", + " <td>2.052196e-03</td>\n", + " <td>9.791972e-04</td>\n", + " <td>1.512044e-01</td>\n", + " </tr>\n", + " <tr>\n", + " <th>manufactoring</th>\n", + " <td>0.005841</td>\n", + " <td>0.005784</td>\n", + " <td>2.046172e-05</td>\n", + " <td>2.154067e-03</td>\n", + " <td>6.448726e-03</td>\n", + " <td>5.043697e-02</td>\n", + " </tr>\n", + " <tr>\n", + " <th>electricity</th>\n", + " <td>0.000197</td>\n", + " <td>0.000168</td>\n", + " <td>1.470729e-07</td>\n", + " <td>1.102498e-05</td>\n", + " <td>3.010193e-05</td>\n", + " <td>6.787911e-03</td>\n", + " </tr>\n", + " <tr>\n", + " <th>construction</th>\n", + " <td>0.000038</td>\n", + " <td>0.000023</td>\n", + " <td>5.264129e-07</td>\n", + " <td>5.950288e-07</td>\n", + " <td>3.829687e-05</td>\n", + " <td>2.041605e-03</td>\n", + " </tr>\n", + " <tr>\n", + " <th>trade</th>\n", + " <td>0.000209</td>\n", + " <td>0.000184</td>\n", + " <td>6.734943e-07</td>\n", + " <td>1.334155e-04</td>\n", + " <td>1.324385e-04</td>\n", + " <td>4.701872e-03</td>\n", + " </tr>\n", + " <tr>\n", + " <th>transport</th>\n", + " <td>0.002636</td>\n", + " <td>0.002308</td>\n", + " <td>4.652734e-06</td>\n", + " <td>4.705530e-05</td>\n", + " <td>2.978642e-04</td>\n", + " <td>4.032717e-02</td>\n", + " </tr>\n", + " <tr>\n", + " <th>other</th>\n", + " <td>0.000505</td>\n", + " <td>0.000804</td>\n", + " <td>3.262233e-06</td>\n", + " <td>2.790342e-04</td>\n", + " <td>2.084267e-04</td>\n", + " <td>2.052462e-02</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + "region reg1 reg2 reg3 reg4 \\\n", + "sector mining mining mining mining \n", + "region sector \n", + "reg1 food 0.001179 0.000010 3.652734e-09 1.626677e-06 \n", + " mining 0.048022 0.000268 7.486558e-07 6.899387e-05 \n", + " manufactoring 0.124366 0.017417 2.799765e-05 9.161688e-03 \n", + " electricity 0.037991 0.001099 2.170169e-07 1.150382e-08 \n", + " construction 0.017324 0.000022 8.884210e-08 1.331990e-07 \n", + " trade 0.035429 0.001836 6.170323e-07 3.835340e-04 \n", + " transport 0.060324 0.001544 4.060594e-06 5.820972e-04 \n", + " other 0.092059 0.005024 1.788858e-05 3.664017e-04 \n", + "reg2 food 0.000084 0.000638 3.772203e-09 2.165695e-07 \n", + " mining 0.000523 0.050904 5.883755e-06 5.472492e-05 \n", + " manufactoring 0.014563 0.069862 1.223889e-05 4.170428e-03 \n", + " electricity 0.000430 0.050427 1.143580e-08 2.061508e-08 \n", + " construction 0.000050 0.007375 1.666888e-07 1.910407e-07 \n", + " trade 0.000769 0.028770 1.092766e-06 1.792846e-04 \n", + " transport 0.003095 0.031839 1.993361e-06 2.990600e-05 \n", + " other 0.001584 0.064935 2.418065e-05 2.702272e-04 \n", + "reg3 food 0.000037 0.000005 1.309056e-05 6.378397e-06 \n", + " mining 0.000396 0.000151 4.337922e-04 3.495268e-04 \n", + " manufactoring 0.007782 0.003842 6.572771e-04 5.043959e-03 \n", + " electricity 0.000045 0.000107 2.495370e-04 1.401577e-04 \n", + " construction 0.000006 0.000003 2.786913e-04 2.624508e-07 \n", + " trade 0.000150 0.000060 2.485529e-04 4.141811e-05 \n", + " transport 0.001768 0.000618 2.091958e-04 4.965269e-04 \n", + " other 0.002269 0.007583 1.861799e-03 2.275626e-03 \n", + "reg4 food 0.000171 0.000578 6.340547e-10 1.413307e-03 \n", + " mining 0.000311 0.002467 2.719214e-05 6.138652e-02 \n", + " manufactoring 0.005207 0.003579 4.405268e-05 1.493263e-01 \n", + " electricity 0.000081 0.000013 1.295279e-06 4.967797e-02 \n", + " construction 0.000021 0.000008 6.055807e-07 2.337226e-03 \n", + " trade 0.000005 0.000016 1.595023e-07 2.826377e-02 \n", + " transport 0.000356 0.000048 9.595997e-07 1.586496e-02 \n", + " other 0.000873 0.000958 6.314447e-06 4.648443e-02 \n", + "reg5 food 0.000062 0.000028 1.108149e-07 1.524618e-05 \n", + " mining 0.003143 0.001784 2.499451e-06 1.013051e-03 \n", + " manufactoring 0.004680 0.002365 1.612636e-05 5.466745e-04 \n", + " electricity 0.000956 0.000018 5.676661e-08 2.164907e-06 \n", + " construction 0.000071 0.000034 5.621276e-07 3.027990e-08 \n", + " trade 0.000149 0.000639 1.895713e-06 5.016841e-05 \n", + " transport 0.001154 0.000820 2.183364e-06 2.255475e-05 \n", + " other 0.000347 0.000512 4.026853e-06 9.871058e-05 \n", + "reg6 food 0.000143 0.000093 1.099390e-07 6.355405e-05 \n", + " mining 0.007456 0.005567 3.220657e-05 2.052196e-03 \n", + " manufactoring 0.005841 0.005784 2.046172e-05 2.154067e-03 \n", + " electricity 0.000197 0.000168 1.470729e-07 1.102498e-05 \n", + " construction 0.000038 0.000023 5.264129e-07 5.950288e-07 \n", + " trade 0.000209 0.000184 6.734943e-07 1.334155e-04 \n", + " transport 0.002636 0.002308 4.652734e-06 4.705530e-05 \n", + " other 0.000505 0.000804 3.262233e-06 2.790342e-04 \n", + "\n", + "region reg5 reg6 \n", + "sector mining mining \n", + "region sector \n", + "reg1 food 3.767567e-07 1.481621e-05 \n", + " mining 3.628651e-05 8.801658e-05 \n", + " manufactoring 4.792741e-03 2.087445e-03 \n", + " electricity 1.444660e-05 8.892062e-06 \n", + " construction 1.129186e-05 7.641830e-07 \n", + " trade 1.071471e-03 5.575273e-04 \n", + " transport 1.278089e-03 6.555200e-03 \n", + " other 7.664473e-04 3.287131e-04 \n", + "reg2 food 9.237430e-08 3.105702e-05 \n", + " mining 2.212937e-05 3.108304e-05 \n", + " manufactoring 2.164157e-03 1.355740e-03 \n", + " electricity 3.266417e-06 3.535380e-07 \n", + " construction 1.702833e-05 1.421644e-06 \n", + " trade 1.000066e-03 3.103879e-04 \n", + " transport 1.294761e-03 1.368746e-03 \n", + " other 4.571674e-04 3.392091e-04 \n", + "reg3 food 4.673894e-07 3.461944e-06 \n", + " mining 1.564949e-04 4.882839e-05 \n", + " manufactoring 3.621657e-03 1.629835e-03 \n", + " electricity 5.464145e-07 1.165971e-05 \n", + " construction 6.239448e-06 1.237150e-08 \n", + " trade 1.517855e-03 2.062017e-03 \n", + " transport 3.777028e-04 2.449723e-03 \n", + " other 2.430304e-03 1.329935e-03 \n", + "reg4 food 1.378513e-07 2.636256e-05 \n", + " mining 1.552800e-04 2.699482e-04 \n", + " manufactoring 2.139451e-03 2.578051e-03 \n", + " electricity 1.543713e-08 1.669269e-05 \n", + " construction 7.889622e-06 2.033545e-06 \n", + " trade 9.701854e-05 7.025049e-04 \n", + " transport 3.012550e-04 1.991747e-03 \n", + " other 1.117863e-04 6.261181e-04 \n", + "reg5 food 1.996382e-04 2.327498e-05 \n", + " mining 5.149154e-02 9.996927e-04 \n", + " manufactoring 8.061924e-02 1.405121e-03 \n", + " electricity 2.545719e-02 1.096431e-05 \n", + " construction 1.765549e-02 3.883278e-06 \n", + " trade 4.981458e-02 1.014394e-03 \n", + " transport 3.801613e-02 2.591267e-03 \n", + " other 6.984674e-02 5.676656e-04 \n", + "reg6 food 1.119791e-06 6.959613e-04 \n", + " mining 9.791972e-04 1.512044e-01 \n", + " manufactoring 6.448726e-03 5.043697e-02 \n", + " electricity 3.010193e-05 6.787911e-03 \n", + " construction 3.829687e-05 2.041605e-03 \n", + " trade 1.324385e-04 4.701872e-03 \n", + " transport 2.978642e-04 4.032717e-02 \n", + " other 2.084267e-04 2.052462e-02 " + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "A_mining = mrio.A.loc[:, (slice(None), \"mining\")]\n", + "A_mining" + ] + }, + { + "cell_type": "markdown", + "id": "6b77959c", + "metadata": {}, + "source": [ + "For further information on the pandas multiindex see the [pandas documentation on advanced indexing.](https://pandas.pydata.org/docs/user_guide/advanced.html)\n", + "\n", + "## Extracting data across extension tables" + ] + }, + { + "cell_type": "markdown", + "id": "6d77f052", + "metadata": {}, + "source": [ + "Pymrio includes methods for bulk extraction of data across extension tables. These can either work on a specific extension or across all extensions of the system." + ] + }, + { + "cell_type": "markdown", + "id": "e9f1a370", + "metadata": { + "lines_to_next_cell": 2 + }, + "source": [ + "### Extracting from a specific extension" + ] + }, + { + "cell_type": "markdown", + "id": "9ec046cc", + "metadata": {}, + "source": [ + "Here we use use the `extract` method available in the extension object.\n", + "This expect a list of rows (index) to extract.\n", + "Here we extract some rows from the emission extension table.\n", + "To do so, we first define the rows (index) to extract:" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "debe154e", + "metadata": {}, "outputs": [], - "source": [] + "source": [ + "rows_to_extract =[('emission_type1', 'air'),\n", + " ('emission_type2', 'water')]" + ] + }, + { + "cell_type": "markdown", + "id": "8051fc37", + "metadata": {}, + "source": [ + "We can now use the `extract` method to extract the data, either as a pandas DataFrame" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "c945af36", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['F', 'F_Y', 'S', 'S_Y', 'M', 'M_down', 'D_cba', 'D_pba', 'D_imp', 'D_exp', 'unit', 'D_cba_reg', 'D_pba_reg', 'D_imp_reg', 'D_exp_reg', 'D_cba_cap', 'D_pba_cap', 'D_imp_cap', 'D_exp_cap'])" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_extract = mrio.emissions.extract(rows_to_extract, return_type=\"dataframe\")\n", + "df_extract.keys()" + ] + }, + { + "cell_type": "markdown", + "id": "a5aea0ad", + "metadata": {}, + "source": [ + "Or we extract into a new extension object:" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "8f596735", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Extension Emissions_extracted with parameters: name, F, F_Y, S, S_Y, M, M_down, D_cba, D_pba, D_imp, D_exp, unit, D_cba_reg, D_pba_reg, D_imp_reg, D_exp_reg, D_cba_cap, D_pba_cap, D_imp_cap, D_exp_cap'" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ext_extract = mrio.emissions.extract(rows_to_extract, return_type=\"extension\")\n", + "str(ext_extract)" + ] + }, + { + "cell_type": "markdown", + "id": "66981d62", + "metadata": {}, + "source": [ + "Note that the name of the extension object is now `Emissions_extracted`, based on the name of the original extension object.\n", + "To use another name, just pass the name as the `return_type` method." + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "c39cfc64", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Extension new_extension with parameters: name, F, F_Y, S, S_Y, M, M_down, D_cba, D_pba, D_imp, D_exp, unit, D_cba_reg, D_pba_reg, D_imp_reg, D_exp_reg, D_cba_cap, D_pba_cap, D_imp_cap, D_exp_cap'" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_extension = mrio.emissions.extract(rows_to_extract, return_type=\"new_extension\")\n", + "str(new_extension)" + ] + }, + { + "cell_type": "markdown", + "id": "68f6f3e8", + "metadata": {}, + "source": [ + "Extracting to dataframes is also a convienient way to convert an extension object to a dictionary:" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "b23d7415", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['F', 'F_Y', 'S', 'S_Y', 'M', 'M_down', 'D_cba', 'D_pba', 'D_imp', 'D_exp', 'unit', 'D_cba_reg', 'D_pba_reg', 'D_imp_reg', 'D_exp_reg', 'D_cba_cap', 'D_pba_cap', 'D_imp_cap', 'D_exp_cap'])" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_all = mrio.emissions.extract(mrio.emissions.get_rows(), return_type=\"dfs\")\n", + "df_all.keys()" + ] + }, + { + "cell_type": "markdown", + "id": "4357fd67", + "metadata": { + "lines_to_next_cell": 2 + }, + "source": [ + "CONT: DESRIBE STUFF ABOVE\n", + "For example, to extract the total value added for all regions and sectors we can use:" + ] } ], "metadata": { diff --git a/doc/source/notebooks/extract_data.py b/doc/source/notebooks/extract_data.py index ebd0d99b..e37d9fd4 100644 --- a/doc/source/notebooks/extract_data.py +++ b/doc/source/notebooks/extract_data.py @@ -57,11 +57,99 @@ # %% [markdown] # Here we use use the `extract` method available in the extension object. # This expect a list of rows (index) to extract. +# Here we extract some rows from the emission extension table. +# To do so, we first define the rows (index) to extract: -row = mrio.emissions.get_rows() +# %% +rows_to_extract =[('emission_type1', 'air'), + ('emission_type2', 'water')] + +# %% [markdown] +# We can now use the `extract` method to extract the data, either as a pandas DataFrame + +# %% +df_extract = mrio.emissions.extract(rows_to_extract, return_type="dataframe") +df_extract.keys() + +# %% [markdown] +# Or we extract into a new extension object: + +# %% +ext_extract = mrio.emissions.extract(rows_to_extract, return_type="extension") +str(ext_extract) + +# %% [markdown] +# Note that the name of the extension object is now `Emissions_extracted`, based on the name of the original extension object. +# To use another name, just pass the name as the `return_type` method. -df_extract = mrio.emissions.extract(row, return_type="dataframe") -ext_extract = mrio.emissions.extract(row, return_type="extension") +# %% +new_extension = mrio.emissions.extract(rows_to_extract, return_type="new_extension") +str(new_extension) + +# %% [markdown] +# Extracting to dataframes is also a convienient +# way to convert an extension object to a dictionary: + +# %% +df_all = mrio.emissions.extract(mrio.emissions.get_rows(), return_type="dfs") +df_all.keys() + + +# The method also allows to only extract some of the accounts: +df_some = mrio.emissions.extract(mrio.emissions.get_rows(), dataframes=['D_cba', 'D_pba'], return_type="dfs") +df_some.keys() + + + +# %% [markdown] +#### Extracting from all extensions + +# %% [markdown] +# We can also extract data from all extensions at once. +# This is done using the `extension_extract` method from the pymrio object. +# This expect a dict with keys based on the extension names and values as a list of rows (index) to extract. + +# %% [markdown] +# Lets assume we want to extract value added and all emissions. +# We first define the rows (index) to extract: + +# %% +to_extract = {'Factor Inputs': 'Value Added', + 'Emissions': [('emission_type1', 'air'), + ('emission_type2', 'water')]} + + +# %% [markdown] +# And can then use the `extension_extract` method to extract the data, either as a pandas DataFrame, +# which returns a dictionary with the extension names as keys + +# %% +df_extract_all = mrio.extension_extract(to_extract, return_type="dataframe") +df_extract_all.keys() + +# %% +df_extract_all['Factor Inputs'].keys() + +# %% [markdown] +# We can also extract into a dictionary of extension objects: + +# %% +ext_extract_all = mrio.extension_extract(to_extract, return_type="extensions") +ext_extract_all.keys() + +extracts = ext_extract_all + +r = pymrio.concate_extension(*extracts.values(), name="abc") + +# %% +str(ext_extract_all['Factor Inputs']) + +# %% [markdown] +# Or merge the extracted data into a new pymrio Extension object (when passing a new name as return_type): +ext_new = mrio.extension_extract(to_extract, return_type="new_merged_extension") +str(ext_new) + +# %% [markdown] +# CONT: Extraction to a single extensio does not work. +# Issue: when only one extension row, it becomes a data series, not a dataframe. -# CONT: DESRIBE STUFF ABOVE -# For example, to extract the total value added for all regions and sectors we can use: diff --git a/pymrio/core/mriosystem.py b/pymrio/core/mriosystem.py index d838c8c5..161655d6 100644 --- a/pymrio/core/mriosystem.py +++ b/pymrio/core/mriosystem.py @@ -2428,7 +2428,7 @@ def extension_extract( Parameters ---------- index_dict : dict - A dict with the index names as keys and the values as the + A dict with the extension names as keys and the values as the corresponding index values. The values can be a single value or a list of values.