Skip to content

Commit

Permalink
Added country example
Browse files Browse the repository at this point in the history
  • Loading branch information
Juan-Mateos committed Jul 23, 2019
1 parent 60b434e commit cfcd211
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 39 deletions.
112 changes: 73 additions & 39 deletions .ipynb_checkpoints/aux_ai_index_data-checkpoint.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,22 @@
},
{
"cell_type": "code",
"execution_count": 30,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import datetime\n",
"\n",
"%matplotlib inline"
"%matplotlib inline\n",
"\n",
"today = str(datetime.date.today())"
]
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -88,16 +91,16 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"arx_papers = pd.read_csv('../data/processed/23_7_2019_ai_index_papers.csv',compression='zip')"
"arx_papers = pd.read_csv('input_data/23_7_2019_ai_index_papers.csv',compression='zip')"
]
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 4,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -316,7 +319,7 @@
"[5 rows x 25 columns]"
]
},
"execution_count": 9,
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -343,13 +346,15 @@
},
{
"cell_type": "code",
"execution_count": 154,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"#Geography to focus on\n",
"geo = 'institute_region'\n",
"\n",
"#You could also do 'institute_country'\n",
"\n",
"#Rank in the year citation\n",
"high_cited = 0.5\n",
"\n",
Expand All @@ -372,7 +377,7 @@
},
{
"cell_type": "code",
"execution_count": 155,
"execution_count": 6,
"metadata": {},
"outputs": [
{
Expand All @@ -386,7 +391,7 @@
"Name: institute_region, dtype: int64"
]
},
"execution_count": 155,
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -399,7 +404,7 @@
},
{
"cell_type": "code",
"execution_count": 156,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -408,7 +413,7 @@
},
{
"cell_type": "code",
"execution_count": 157,
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -432,7 +437,7 @@
},
{
"cell_type": "code",
"execution_count": 158,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -444,7 +449,7 @@
},
{
"cell_type": "code",
"execution_count": 159,
"execution_count": 10,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -652,29 +657,29 @@
"Shanghai 13.0 15.0 45.0 97.0 140.0 "
]
},
"execution_count": 159,
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dl_counts = high_counts.loc[high_counts.sum(axis=1)>0].sort_values(2018,ascending=False)\n",
"counts_year = high_counts.loc[high_counts.sum(axis=1)>0].sort_values(2018,ascending=False)\n",
"\n",
"dl_counts.head(n=10)"
"counts_year.head(n=10)"
]
},
{
"cell_type": "code",
"execution_count": 160,
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x11e706780>"
"<matplotlib.axes._subplots.AxesSubplot at 0x10dbbc668>"
]
},
"execution_count": 160,
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
},
Expand All @@ -690,7 +695,16 @@
}
],
"source": [
"dl_counts.T.plot(legend=False,cmap='seismic_r',title='Paper activity per year',alpha=0.7)"
"counts_year.T.plot(legend=False,cmap='seismic_r',title='Paper activity per year',alpha=0.7)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"counts_year.to_csv(f'output_data/{today}_counts_year.csv')"
]
},
{
Expand All @@ -702,19 +716,19 @@
},
{
"cell_type": "code",
"execution_count": 161,
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"rca = pd.concat([create_lq_df(pd.crosstab(arx_high.loc[arx_high['year_created']==y,geo],\n",
"rca_year = pd.concat([create_lq_df(pd.crosstab(arx_high.loc[arx_high['year_created']==y,geo],\n",
" arx_high.loc[arx_high['year_created']==y,'dl_cat']))['dl'] for y in np.arange(2008,2019)],axis=1,sort=True).fillna(0)\n",
"\n",
"rca.columns = np.arange(2008,2019)"
"rca_year.columns = np.arange(2008,2019)"
]
},
{
"cell_type": "code",
"execution_count": 162,
"execution_count": 14,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -946,14 +960,14 @@
"California 1.162464 1.178109 "
]
},
"execution_count": 162,
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#LQs focusing on the top 100 regions\n",
"rca.loc[top_locs].sort_values(2018,ascending=False).head(n=10)"
"rca_year.loc[top_locs].sort_values(2018,ascending=False).head(n=10)"
]
},
{
Expand All @@ -965,6 +979,15 @@
"Also produce the discretised table with before / after 2012"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"rca_year.to_csv(f'output_data/{today}_rca_year.csv')"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand All @@ -974,7 +997,7 @@
},
{
"cell_type": "code",
"execution_count": 163,
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -993,19 +1016,19 @@
},
{
"cell_type": "code",
"execution_count": 164,
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"high_counts_discr = pd.concat([pd.crosstab(arx_high.loc[arx_high['discr']==discr,geo],\n",
"counts_discr = pd.concat([pd.crosstab(arx_high.loc[arx_high['discr']==discr,geo],\n",
" arx_high.loc[arx_high['discr']==discr,'dl_cat'])['dl'] for discr in discr_vars],axis=1,sort=True).fillna(0)\n",
"\n",
"high_counts_discr.columns = discr_vars"
"counts_discr.columns = discr_vars"
]
},
{
"cell_type": "code",
"execution_count": 165,
"execution_count": 18,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -1102,13 +1125,22 @@
"Ontario 22.0 362.0"
]
},
"execution_count": 165,
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"high_counts_discr.sort_values(discr_vars[1],ascending=False).head(n=10)"
"counts_discr.sort_values(discr_vars[1],ascending=False).head(n=10)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"counts_discr.to_csv(f'output_data/{today}_counts_discretised.csv')"
]
},
{
Expand All @@ -1120,7 +1152,7 @@
},
{
"cell_type": "code",
"execution_count": 166,
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -1132,7 +1164,7 @@
},
{
"cell_type": "code",
"execution_count": 167,
"execution_count": 21,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -1229,7 +1261,7 @@
"Canton of Zürich 0.923684 1.154979"
]
},
"execution_count": 167,
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -1240,10 +1272,12 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": []
"source": [
"lq_discr.to_csv(f'output_data/{today}_lq_discretised.csv')"
]
}
],
"metadata": {
Expand Down
2 changes: 2 additions & 0 deletions aux_ai_index_data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,8 @@
"#Geography to focus on\n",
"geo = 'institute_region'\n",
"\n",
"#You could also do 'institute_country'\n",
"\n",
"#Rank in the year citation\n",
"high_cited = 0.5\n",
"\n",
Expand Down

0 comments on commit cfcd211

Please sign in to comment.