OpenGeoScales · djam-dev · Apr 9, 2021 · Apr 26, 2021
diff --git a/.DS_Store b/.DS_Store
diff --git a/.gitignore b/.gitignore
@@ -26,6 +26,7 @@ share/python-wheels/
 .installed.cfg
 *.egg
 MANIFEST
+.DS_Store
 
 # PyInstaller
 #  Usually these files are written by a python script from a template

diff --git a/data-catalog/.DS_Store b/data-catalog/.DS_Store
diff --git a/data-catalog/ghg-emissions/.DS_Store b/data-catalog/ghg-emissions/.DS_Store
diff --git a/data/.DS_Store b/data/.DS_Store
diff --git a/data/ghg-emissions/.DS_Store b/data/ghg-emissions/.DS_Store
diff --git a/data/ghg-emissions/citepa/.DS_Store b/data/ghg-emissions/citepa/.DS_Store
diff --git a/notebooks/.DS_Store b/notebooks/.DS_Store
diff --git a/notebooks/ghg-emissions/.DS_Store b/notebooks/ghg-emissions/.DS_Store
diff --git a/notebooks/ghg-emissions/edgar/EDGAR_CO2_Merge_SRC_BySector.ipynb b/notebooks/ghg-emissions/edgar/EDGAR_CO2_Merge_SRC_BySector.ipynb
@@ -0,0 +1,108 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Goal => make one file/sector concatenating each file in a sector\n",
+    "#      => modify the shape adding 3 colonnes : Year, compound(gas) and Process Group(sector)\n",
+    "import pandas as pd\n",
+    "\n",
+    "import glob\n",
+    "\n",
+    "\n",
+    "# tree structure source folder ./EDGAR_Import/\n",
+    "#                                             EDGAR_excl_short_cycle/\n",
+    "#                                                                    AGS_txt/\n",
+    "#                                                                    CHE_txt/\n",
+    "#                                                                    ...\n",
+    "#                                             EDGAR_short_cycle/\n",
+    "#                                                               AWB_txt/\n",
+    "#                                                               ENE_txt/\n",
+    "#                                                             ...\n",
+    "# tree structure destination folder ./EDGAR_merge/\n",
+    "\n",
+    "folder_list = glob.glob(\"./EDGAR_Import/*/*/\")\n",
+    "\n",
+    "for f in folder_list:\n",
+    "    \n",
+    "    #get the sector from folder name\n",
+    "    sector = f.replace('_txt','').split(\"/\")[3]\n",
+    "    #print(sector)\n",
+    "    #get file list from folder\n",
+    "    file_list = glob.glob(f+\"v50_CO2*.txt\")\n",
+    "    \n",
+    "    for i in range(len(file_list)):\n",
+    "        \n",
+    "        #get exclude short cycle or not\n",
+    "        list_type = [m.split(\"/\")[4].split(\"_\")[2] for m in file_list]\n",
+    "        \n",
+    "        #skip the two first lines which are just informative\n",
+    "        df_eachdata = pd.read_csv(file_list[i],sep=\";\", skiprows=[0,1])\n",
+    "        \n",
+    "        #delete the year in column name to make unique column name\n",
+    "        df_eachdata.rename(columns={ df_eachdata.columns[2]: \"emission (tons)\" }, inplace = True)\n",
+    "        \n",
+    "        #get second line to make columns from part of it\n",
+    "        df_eachinfo = pd.read_csv(file_list[i],sep=\";\", skiprows=[0,2], nrows=0)\n",
+    "    \n",
+    "        #delete space from column name\n",
+    "        df_eachinfo.columns = df_eachinfo.columns.str.strip()\n",
+    "        \n",
+    "        #split column name and get each part in a an b variable\n",
+    "        a = df_eachinfo.columns.str.split(\":\").str[0]\n",
+    "        b = df_eachinfo.columns.str.split(\":\").str[1]\n",
+    "        \n",
+    "        #make df with a variable as key and b as value\n",
+    "        df_eachinfo = pd.DataFrame(dict(zip(a,b)), index=[0])\n",
+    "        \n",
+    "        #keep year column and Compound column (gas). As process group info is not in all file, the column is create\n",
+    "        #after from the current folder name.\n",
+    "        df_eachinfo = df_eachinfo[[\"Year\",\"Compound\"]]\n",
+    "        \n",
+    "        #cross join to add year and compound columns to the original data\n",
+    "        df_eachdata = df_eachinfo.assign(foo=1).merge(df_eachdata.assign(foo=1)).drop('foo', 1)\n",
+    "    \n",
+    "        if (i == 0):\n",
+    "            df_data = df_eachdata\n",
+    "    \n",
+    "        else:\n",
+    "            df_data = pd.concat([df_data, df_eachdata])\n",
+    "       \n",
+    "    #Add column \"Process Group\" from folder name \n",
+    "    df_data['Process Group'] = sector\n",
+    "    \n",
+    "    \n",
+    "    emission_type = list_type[0]\n",
+    "    #print(file_list)\n",
+    "    \n",
+    "    #print(sector,df_eachinfo.columns)\n",
+    "    df_data.to_csv(\"./EDGAR_merge/v50_CO2_\"+emission_type+\"_short-cycle_\"+sector+\"_all_year.csv\")\n",
+    "\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/notebooks/ghg-emissions/edgar/edgar-notebook.md b/notebooks/ghg-emissions/edgar/edgar-notebook.md
@@ -0,0 +1,4 @@
+
+EDGAR_CO2_Merge_SRC_BySector.ipynb merge all the year files of CO2 by sector of activity and by exlusion of Short cycle or not. 
+
+
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,4 @@

		EDGAR_CO2_Merge_SRC_BySector.ipynb merge all the year files of CO2 by sector of activity and by exlusion of Short cycle or not.