\n",
""
],
"text/plain": [
- " Language\n",
- "0 Java\n",
- "1 C\n",
- "2 C++\n",
- "3 Python\n",
- "4 R\n",
- "5 Fortran\n",
- "6 Ruby\n",
- "7 HTML\n",
- "8 Shell\n",
- "9 Others"
+ "Year Java C C++ Python Ruby Shell\n",
+ "2008 0.000000 0.578053 0.058089 0.361369 0.000000 0.002489\n",
+ "2009 0.000000 0.001607 0.417636 0.566579 0.000000 0.014178\n",
+ "2010 0.000000 0.009153 0.270674 0.716161 0.000000 0.004011\n",
+ "2011 0.000000 0.139155 0.024259 0.786091 0.000000 0.050494\n",
+ "2012 0.017804 0.456689 0.140129 0.333853 0.000000 0.051525\n",
+ "2013 0.240166 0.396410 0.165945 0.167947 0.014956 0.014577\n",
+ "2014 0.131448 0.373647 0.163997 0.316473 0.000000 0.014435\n",
+ "2015 0.001600 0.219418 0.209717 0.291359 0.269503 0.008403\n",
+ "2016 0.032361 0.205968 0.234869 0.489617 0.015521 0.021665\n",
+ "2017 0.000000 0.137536 0.425045 0.236715 0.000003 0.200702\n",
+ "2018 0.000708 0.186219 0.208667 0.570827 0.000005 0.033575\n",
+ "2019 0.192866 0.136325 0.153222 0.513684 0.000000 0.003903\n",
+ "2020 NaN NaN NaN NaN NaN NaN"
]
},
"metadata": {},
"output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "trying Java in year 2008\n",
- "trying C in year 2008\n",
- "trying C++ in year 2008\n",
- "trying Python in year 2008\n",
- "trying R in year 2008\n",
- "trying Fortran in year 2008\n",
- "trying Ruby in year 2008\n",
- "trying HTML in year 2008\n",
- "trying Shell in year 2008\n",
- "trying Others in year 2008\n",
- "trying Java in year 2009\n",
- "trying C in year 2009\n",
- "trying C++ in year 2009\n",
- "trying Python in year 2009\n",
- "trying R in year 2009\n",
- "trying Fortran in year 2009\n",
- "trying Ruby in year 2009\n",
- "trying HTML in year 2009\n",
- "trying Shell in year 2009\n",
- "trying Others in year 2009\n",
- "trying Java in year 2010\n",
- "trying C in year 2010\n",
- "trying C++ in year 2010\n",
- "trying Python in year 2010\n",
- "trying R in year 2010\n",
- "trying Fortran in year 2010\n",
- "trying Ruby in year 2010\n",
- "trying HTML in year 2010\n",
- "trying Shell in year 2010\n",
- "trying Others in year 2010\n",
- "trying Java in year 2011\n",
- "trying C in year 2011\n",
- "trying C++ in year 2011\n",
- "trying Python in year 2011\n",
- "trying R in year 2011\n",
- "trying Fortran in year 2011\n",
- "trying Ruby in year 2011\n",
- "trying HTML in year 2011\n",
- "trying Shell in year 2011\n",
- "trying Others in year 2011\n",
- "trying Java in year 2012\n",
- "trying C in year 2012\n",
- "trying C++ in year 2012\n",
- "trying Python in year 2012\n",
- "trying R in year 2012\n",
- "trying Fortran in year 2012\n",
- "trying Ruby in year 2012\n",
- "trying HTML in year 2012\n",
- "trying Shell in year 2012\n",
- "trying Others in year 2012\n",
- "trying Java in year 2013\n",
- "trying C in year 2013\n",
- "trying C++ in year 2013\n",
- "trying Python in year 2013\n",
- "trying R in year 2013\n",
- "trying Fortran in year 2013\n",
- "trying Ruby in year 2013\n",
- "trying HTML in year 2013\n",
- "trying Shell in year 2013\n",
- "trying Others in year 2013\n",
- "trying Java in year 2014\n",
- "trying C in year 2014\n",
- "trying C++ in year 2014\n",
- "trying Python in year 2014\n",
- "trying R in year 2014\n",
- "trying Fortran in year 2014\n",
- "trying Ruby in year 2014\n",
- "trying HTML in year 2014\n",
- "trying Shell in year 2014\n",
- "trying Others in year 2014\n",
- "trying Java in year 2015\n",
- "trying C in year 2015\n",
- "trying C++ in year 2015\n",
- "trying Python in year 2015\n",
- "trying R in year 2015\n",
- "trying Fortran in year 2015\n",
- "trying Ruby in year 2015\n",
- "trying HTML in year 2015\n",
- "trying Shell in year 2015\n",
- "trying Others in year 2015\n",
- "trying Java in year 2016\n",
- "trying C in year 2016\n",
- "trying C++ in year 2016\n",
- "trying Python in year 2016\n",
- "trying R in year 2016\n",
- "trying Fortran in year 2016\n",
- "trying Ruby in year 2016\n",
- "trying HTML in year 2016\n",
- "trying Shell in year 2016\n",
- "trying Others in year 2016\n",
- "trying Java in year 2017\n",
- "trying C in year 2017\n",
- "trying C++ in year 2017\n",
- "trying Python in year 2017\n",
- "trying R in year 2017\n",
- "trying Fortran in year 2017\n",
- "trying Ruby in year 2017\n",
- "trying HTML in year 2017\n",
- "trying Shell in year 2017\n",
- "trying Others in year 2017\n",
- "trying Java in year 2018\n",
- "trying C in year 2018\n",
- "trying C++ in year 2018\n",
- "trying Python in year 2018\n",
- "trying R in year 2018\n",
- "trying Fortran in year 2018\n",
- "trying Ruby in year 2018\n",
- "trying HTML in year 2018\n",
- "trying Shell in year 2018\n",
- "trying Others in year 2018\n",
- "trying Java in year 2019\n",
- "trying C in year 2019\n",
- "trying C++ in year 2019\n",
- "trying Python in year 2019\n",
- "trying R in year 2019\n",
- "trying Fortran in year 2019\n",
- "trying Ruby in year 2019\n",
- "trying HTML in year 2019\n",
- "trying Shell in year 2019\n",
- "trying Others in year 2019\n",
- "trying Java in year 2020\n",
- "trying C in year 2020\n",
- "trying C++ in year 2020\n",
- "trying Python in year 2020\n",
- "trying R in year 2020\n",
- "trying Fortran in year 2020\n",
- "trying Ruby in year 2020\n",
- "trying HTML in year 2020\n",
- "trying Shell in year 2020\n",
- "trying Others in year 2020\n"
- ]
- },
+ }
+ ],
+ "source": [
+ "#link to how to get stacked area chart\n",
+ "#https://python-graph-gallery.com/255-percentage-stacked-area-chart/\n",
+ "\n",
+ "#x axis - year\n",
+ "#y axis - percentage of the total\n",
+ "#for each language, we need an array of languages over years\n",
+ "\n",
+ "#let's only take a look at the most popular languages\n",
+ "\n",
+ "#each language is a separate row\n",
+ "#each column is a year, and each cell represents the bytes of that language in that year\n",
+ "\n",
+ "repo_years = np.sort(df.repo_year.unique())\n",
+ "\n",
+ "#can use either of these, or define your own subset to get different information\n",
+ "most_bytes_langs = ['Java', 'C', 'C++', 'Python', 'R', 'Fortran', 'Ruby', 'HTML', 'Shell', 'Others']\n",
+ "language_subset = ['Java', 'C', 'C++', 'Python', 'Ruby', 'Shell']\n",
+ "\n",
+ "requested_langs = language_subset\n",
+ "repo_df = pd.DataFrame({'Language': requested_langs})\n",
+ "\n",
+ "def bytes_that_year(row, year):\n",
+ " #print('trying ' + row['Language'] + ' in year ' + str(year))\n",
+ " \n",
+ " if row['Language'] == 'Others':\n",
+ " #sum up all the non-request_langs bytes\n",
+ " return df[ ~(df['Language'].isin(requested_langs)) & (df['repo_year'] == year)]['Bytes'].sum()\n",
+ " lang = row['Language']\n",
+ " \n",
+ " try:\n",
+ " return df[df['repo_year'] == year].groupby('Language').sum().loc[lang]['Bytes']\n",
+ " except:\n",
+ " return 0\n",
+ "\n",
+ " \n",
+ "#for each year in repo_years, create a column\n",
+ "# each cell will be the number of bytes in that year for that language \n",
+ "\n",
+ "for year in repo_years:\n",
+ " #create columns for the total number of bytes\n",
+ " arr = repo_df.apply (lambda row: bytes_that_year(row, year), axis=1)\n",
+ " #col_name = str(year) + \" bytes\"\n",
+ " #repo_df[col_name] = arr\n",
+ " total_bytes = arr.sum()\n",
+ " pct_col_name = str(year) #+ \" pct\"\n",
+ " repo_df[pct_col_name] = arr.divide(total_bytes)\n",
+ "\n",
+ "#display(repo_df)\n",
+ "\n",
+ "\n",
+ "#need to get the whole df sideways to make the stackchart easy\n",
+ "repo_df = repo_df.T\n",
+ "new_header = repo_df.iloc[0] \n",
+ "repo_df = repo_df[1:]\n",
+ "repo_df.columns = new_header\n",
+ "\n",
+ "repo_df.columns.name = 'Year'\n",
+ "repo_df = repo_df.apply(pd.to_numeric, errors='coerce')\n",
+ "display(repo_df)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 211,
+ "metadata": {},
+ "outputs": [
{
"data": {
"text/html": [
@@ -422,234 +439,152 @@
"
\n",
" \n",
"
\n",
- "
\n",
- "
Language
\n",
- "
2008 bytes
\n",
- "
2009 bytes
\n",
- "
2010 bytes
\n",
- "
2011 bytes
\n",
- "
2012 bytes
\n",
- "
2013 bytes
\n",
- "
2014 bytes
\n",
- "
2015 bytes
\n",
- "
2016 bytes
\n",
- "
2017 bytes
\n",
- "
2018 bytes
\n",
- "
2019 bytes
\n",
- "
2020 bytes
\n",
+ "
Year
\n",
+ "
Java
\n",
+ "
C
\n",
+ "
C++
\n",
+ "
Python
\n",
+ "
Ruby
\n",
+ "
Shell
\n",
"
\n",
" \n",
" \n",
"
\n",
- "
0
\n",
- "
Java
\n",
- "
0.0
\n",
- "
0.0
\n",
- "
0.0
\n",
- "
0.0
\n",
- "
1401162.0
\n",
- "
42933030.0
\n",
- "
14083813.0
\n",
- "
296924.0
\n",
- "
2638211.0
\n",
- "
0.0
\n",
- "
2.828600e+04
\n",
- "
1732024.0
\n",
- "
0.0
\n",
+ "
2008
\n",
+ "
0.000000
\n",
+ "
0.578053
\n",
+ "
0.058089
\n",
+ "
0.361369
\n",
+ "
0.000000
\n",
+ "
0.002489
\n",
+ "
\n",
+ "
\n",
+ "
2009
\n",
+ "
0.000000
\n",
+ "
0.001634
\n",
+ "
0.424616
\n",
+ "
0.559335
\n",
+ "
0.000000
\n",
+ "
0.014415
\n",
+ "
\n",
+ "
\n",
+ "
2010
\n",
+ "
0.000000
\n",
+ "
0.018108
\n",
+ "
0.742881
\n",
+ "
0.235232
\n",
+ "
0.000000
\n",
+ "
0.003779
\n",
+ "
\n",
+ "
\n",
+ "
2011
\n",
+ "
0.000000
\n",
+ "
0.132769
\n",
+ "
0.024802
\n",
+ "
0.790838
\n",
+ "
0.000000
\n",
+ "
0.051591
\n",
"
\n",
"
\n",
- "
1
\n",
- "
C
\n",
- "
732140.0
\n",
- "
5396.0
\n",
- "
134128.0
\n",
- "
2912534.0
\n",
- "
35940777.0
\n",
- "
70863794.0
\n",
- "
40033787.0
\n",
- "
40715157.0
\n",
- "
16791222.0
\n",
- "
27935931.0
\n",
- "
7.443884e+06
\n",
- "
1224262.0
\n",
- "
0.0
\n",
+ "
2012
\n",
+ "
0.018174
\n",
+ "
0.463017
\n",
+ "
0.135944
\n",
+ "
0.330277
\n",
+ "
0.000000
\n",
+ "
0.052587
\n",
"
\n",
"
\n",
- "
2
\n",
- "
C++
\n",
- "
73573.0
\n",
- "
1402452.0
\n",
- "
3966318.0
\n",
- "
507749.0
\n",
- "
11027923.0
\n",
- "
29664973.0
\n",
- "
17571170.0
\n",
- "
38915066.0
\n",
- "
19147323.0
\n",
- "
86334220.0
\n",
- "
8.341191e+06
\n",
- "
1376004.0
\n",
- "
0.0
\n",
+ "
2013
\n",
+ "
0.245234
\n",
+ "
0.394822
\n",
+ "
0.168273
\n",
+ "
0.161516
\n",
+ "
0.015271
\n",
+ "
0.014883
\n",
"
\n",
"
\n",
- "
3
\n",
- "
Python
\n",
- "
457697.0
\n",
- "
1902611.0
\n",
- "
10494246.0
\n",
- "
16452958.0
\n",
- "
26273713.0
\n",
- "
30022866.0
\n",
- "
33908005.0
\n",
- "
54064633.0
\n",
- "
39915315.0
\n",
- "
48081048.0
\n",
- "
2.281809e+07
\n",
- "
4613109.0
\n",
- "
0.0
\n",
+ "
2014
\n",
+ "
0.135650
\n",
+ "
0.384275
\n",
+ "
0.168759
\n",
+ "
0.296789
\n",
+ "
0.000000
\n",
+ "
0.014526
\n",
"
\n",
"
\n",
- "
4
\n",
- "
R
\n",
- "
0.0
\n",
- "
0.0
\n",
- "
0.0
\n",
- "
5473.0
\n",
- "
131963.0
\n",
- "
832599.0
\n",
- "
161349.0
\n",
- "
230893.0
\n",
- "
86513.0
\n",
- "
969179.0
\n",
- "
0.000000e+00
\n",
- "
870358.0
\n",
- "
0.0
\n",
+ "
2015
\n",
+ "
0.001723
\n",
+ "
0.214573
\n",
+ "
0.225117
\n",
+ "
0.259376
\n",
+ "
0.290272
\n",
+ "
0.008939
\n",
"
\n",
"
\n",
- "
5
\n",
- "
Fortran
\n",
- "
0.0
\n",
- "
2826100.0
\n",
- "
0.0
\n",
- "
3665668.0
\n",
- "
106958019.0
\n",
- "
10561397.0
\n",
- "
11857286.0
\n",
- "
59772042.0
\n",
- "
7302514.0
\n",
- "
25221003.0
\n",
- "
3.025633e+07
\n",
- "
4989103.0
\n",
- "
1782.0
\n",
+ "
2016
\n",
+ "
0.033668
\n",
+ "
0.212853
\n",
+ "
0.236641
\n",
+ "
0.478205
\n",
+ "
0.016147
\n",
+ "
0.022486
\n",
"
\n",
"
\n",
- "
6
\n",
- "
Ruby
\n",
- "
0.0
\n",
- "
0.0
\n",
- "
0.0
\n",
- "
0.0
\n",
- "
0.0
\n",
- "
2673561.0
\n",
- "
0.0
\n",
- "
50009073.0
\n",
- "
1265301.0
\n",
- "
522.0
\n",
- "
1.850000e+02
\n",
- "
0.0
\n",
- "
0.0
\n",
+ "
2017
\n",
+ "
0.000000
\n",
+ "
0.137341
\n",
+ "
0.412399
\n",
+ "
0.233996
\n",
+ "
0.000003
\n",
+ "
0.216262
\n",
"
\n",
"
\n",
- "
7
\n",
- "
HTML
\n",
- "
0.0
\n",
- "
0.0
\n",
- "
0.0
\n",
- "
3339.0
\n",
- "
7672931.0
\n",
- "
69808148.0
\n",
- "
2625860.0
\n",
- "
129641.0
\n",
- "
7977816.0
\n",
- "
5191228.0
\n",
- "
1.898321e+06
\n",
- "
0.0
\n",
- "
0.0
\n",
+ "
2018
\n",
+ "
0.000725
\n",
+ "
0.189609
\n",
+ "
0.213774
\n",
+ "
0.561629
\n",
+ "
0.000005
\n",
+ "
0.034258
\n",
"
\n",
"
\n",
- "
8
\n",
- "
Shell
\n",
- "
3153.0
\n",
- "
47612.0
\n",
- "
58778.0
\n",
- "
1056847.0
\n",
- "
4054943.0
\n",
- "
2605824.0
\n",
- "
1546652.0
\n",
- "
1559203.0
\n",
- "
1766204.0
\n",
- "
40766099.0
\n",
- "
1.342099e+06
\n",
- "
35050.0
\n",
- "
0.0
\n",
+ "
2019
\n",
+ "
0.255698
\n",
+ "
0.172278
\n",
+ "
0.070655
\n",
+ "
0.498916
\n",
+ "
0.000000
\n",
+ "
0.002454
\n",
"
\n",
"
\n",
- "
9
\n",
- "
Others
\n",
- "
3441.0
\n",
- "
222938.0
\n",
- "
817757.0
\n",
- "
3343580.0
\n",
- "
85308437.0
\n",
- "
34670998.0
\n",
- "
50389592.0
\n",
- "
169066350.0
\n",
- "
108228261.0
\n",
- "
138933263.0
\n",
- "
1.107641e+09
\n",
- "
21152630.0
\n",
- "
2501.0
\n",
+ "
2020
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
NaN
\n",
+ "
NaN
\n",
"
\n",
" \n",
"
\n",
""
],
"text/plain": [
- " Language 2008 bytes 2009 bytes 2010 bytes 2011 bytes 2012 bytes \\\n",
- "0 Java 0.0 0.0 0.0 0.0 1401162.0 \n",
- "1 C 732140.0 5396.0 134128.0 2912534.0 35940777.0 \n",
- "2 C++ 73573.0 1402452.0 3966318.0 507749.0 11027923.0 \n",
- "3 Python 457697.0 1902611.0 10494246.0 16452958.0 26273713.0 \n",
- "4 R 0.0 0.0 0.0 5473.0 131963.0 \n",
- "5 Fortran 0.0 2826100.0 0.0 3665668.0 106958019.0 \n",
- "6 Ruby 0.0 0.0 0.0 0.0 0.0 \n",
- "7 HTML 0.0 0.0 0.0 3339.0 7672931.0 \n",
- "8 Shell 3153.0 47612.0 58778.0 1056847.0 4054943.0 \n",
- "9 Others 3441.0 222938.0 817757.0 3343580.0 85308437.0 \n",
- "\n",
- " 2013 bytes 2014 bytes 2015 bytes 2016 bytes 2017 bytes \\\n",
- "0 42933030.0 14083813.0 296924.0 2638211.0 0.0 \n",
- "1 70863794.0 40033787.0 40715157.0 16791222.0 27935931.0 \n",
- "2 29664973.0 17571170.0 38915066.0 19147323.0 86334220.0 \n",
- "3 30022866.0 33908005.0 54064633.0 39915315.0 48081048.0 \n",
- "4 832599.0 161349.0 230893.0 86513.0 969179.0 \n",
- "5 10561397.0 11857286.0 59772042.0 7302514.0 25221003.0 \n",
- "6 2673561.0 0.0 50009073.0 1265301.0 522.0 \n",
- "7 69808148.0 2625860.0 129641.0 7977816.0 5191228.0 \n",
- "8 2605824.0 1546652.0 1559203.0 1766204.0 40766099.0 \n",
- "9 34670998.0 50389592.0 169066350.0 108228261.0 138933263.0 \n",
- "\n",
- " 2018 bytes 2019 bytes 2020 bytes \n",
- "0 2.828600e+04 1732024.0 0.0 \n",
- "1 7.443884e+06 1224262.0 0.0 \n",
- "2 8.341191e+06 1376004.0 0.0 \n",
- "3 2.281809e+07 4613109.0 0.0 \n",
- "4 0.000000e+00 870358.0 0.0 \n",
- "5 3.025633e+07 4989103.0 1782.0 \n",
- "6 1.850000e+02 0.0 0.0 \n",
- "7 1.898321e+06 0.0 0.0 \n",
- "8 1.342099e+06 35050.0 0.0 \n",
- "9 1.107641e+09 21152630.0 2501.0 "
+ "Year Java C C++ Python Ruby Shell\n",
+ "2008 0.000000 0.578053 0.058089 0.361369 0.000000 0.002489\n",
+ "2009 0.000000 0.001634 0.424616 0.559335 0.000000 0.014415\n",
+ "2010 0.000000 0.018108 0.742881 0.235232 0.000000 0.003779\n",
+ "2011 0.000000 0.132769 0.024802 0.790838 0.000000 0.051591\n",
+ "2012 0.018174 0.463017 0.135944 0.330277 0.000000 0.052587\n",
+ "2013 0.245234 0.394822 0.168273 0.161516 0.015271 0.014883\n",
+ "2014 0.135650 0.384275 0.168759 0.296789 0.000000 0.014526\n",
+ "2015 0.001723 0.214573 0.225117 0.259376 0.290272 0.008939\n",
+ "2016 0.033668 0.212853 0.236641 0.478205 0.016147 0.022486\n",
+ "2017 0.000000 0.137341 0.412399 0.233996 0.000003 0.216262\n",
+ "2018 0.000725 0.189609 0.213774 0.561629 0.000005 0.034258\n",
+ "2019 0.255698 0.172278 0.070655 0.498916 0.000000 0.002454\n",
+ "2020 NaN NaN NaN NaN NaN NaN"
]
},
"metadata": {},
@@ -657,36 +592,21 @@
}
],
"source": [
- "#link to how to get stacked area chart\n",
- "#https://python-graph-gallery.com/255-percentage-stacked-area-chart/\n",
- "\n",
- "#x axis - year\n",
- "#y axis - percentage of the total\n",
- "#for each language, we need an array of languages over years\n",
- "\n",
- "#let's only take a look at the most popular languages\n",
- "\n",
- "#each language is a separate row\n",
- "#each column is a year, and each cell represents the bytes of that language in that year\n",
+ "#Now, do the same for ascl_year instead of github repo_year\n",
+ "ascl_years = np.sort(df.ascl_year.unique())\n",
"\n",
- "top_langs = most['Language']\n",
- "#display(top_langs)\n",
- "repo_years = np.sort(df.repo_year.unique())\n",
+ "#drop all the codes where the ascl_year is 0 (they have no ascl-id)\n",
+ "df = df[df['ascl_year'] != 0]\n",
"\n",
- "#repo_df = pd.DataFrame({'Language': top_langs})\n",
- "most_bytes_langs = ['Java', 'C', 'C++', 'Python', 'R', 'Fortran', 'Ruby', 'HTML', 'Shell', 'Others']\n",
- "repo_df = pd.DataFrame({'Language': most_bytes_langs})\n",
- "display(repo_df)\n",
+ "ascl_df = pd.DataFrame({'Language': requested_langs})\n",
"\n",
- "#for each year in repo_years, create a column\n",
- "# each cell will be the number of bytes in that year for that language\n",
"\n",
- "def bytes_that_year(row, year):\n",
- " print('trying ' + row['Language'] + ' in year ' + str(year))\n",
+ "def bytes_that_year_ascl(row, year):\n",
+ " #print('trying ' + row['Language'] + ' in year ' + str(year))\n",
" \n",
" if row['Language'] == 'Others':\n",
- " #sum up all the non-most_bytes_langs bytes\n",
- " return df[ ~(df['Language'].isin(most_bytes_langs)) & (df['repo_year'] == year)]['Bytes'].sum()\n",
+ " #sum up all the non-request_langs bytes\n",
+ " return df[ ~(df['Language'].isin(requested_langs)) & (df['ascl_year'] == year)]['Bytes'].sum()\n",
" lang = row['Language']\n",
" \n",
" try:\n",
@@ -696,33 +616,71 @@
"\n",
"for year in repo_years:\n",
" #create columns for the total number of bytes\n",
- " arr = repo_df.apply (lambda row: bytes_that_year(row, year), axis=1)\n",
- " col_name = str(year) + \" bytes\"\n",
- " repo_df[col_name] = arr\n",
- " \n",
- " \n",
+ " arr = ascl_df.apply (lambda row: bytes_that_year_ascl(row, year), axis=1)\n",
+ " total_bytes = arr.sum()\n",
+ " pct_col_name = str(year) #+ \" pct\"\n",
+ " ascl_df[pct_col_name] = arr.divide(total_bytes)\n",
"\n",
- " \n",
- "display(repo_df)\n",
- "#display(df[df['repo_year'] == 2019].groupby('Language').sum().loc['C']['Bytes'])"
+ "ascl_df = ascl_df.T\n",
+ "new_header = ascl_df.iloc[0] \n",
+ "ascl_df = ascl_df[1:]\n",
+ "ascl_df.columns = new_header\n",
+ "\n",
+ "ascl_df.columns.name = 'Year'\n",
+ "ascl_df = ascl_df.apply(pd.to_numeric, errors='coerce')\n",
+ "display(ascl_df)\n"
]
},
{
"cell_type": "code",
- "execution_count": 159,
+ "execution_count": 212,
"metadata": {},
"outputs": [
{
"data": {
+ "image/png": "\n",
"text/plain": [
- "138933263"
+ "