diff --git a/Language-Date visualizations.ipynb b/Language-Date visualizations.ipynb index c4e8b79..3f4e30e 100644 --- a/Language-Date visualizations.ipynb +++ b/Language-Date visualizations.ipynb @@ -2,13 +2,14 @@ "cells": [ { "cell_type": "code", - "execution_count": 10, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", "\n", "df = pd.read_csv('language_data_with_dates.csv')\n", "#codes where df[df['ascl-id'] == 0] need to be dealt with when thinking about dates" @@ -16,31 +17,18 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(-1.1122943007093113,\n", - " 1.101258600491493,\n", - " -1.1010813502712005,\n", - " 1.1000514928760843)" + "\"\\nfig1, ax1 = plt.subplots()\\nax1.pie(lang[lang.columns[1]], labels=lang[lang.columns[0]], autopct='%1.1f%%',\\n shadow=True, startangle=90)\\nax1.axis('equal')\\n\"" ] }, - "execution_count": 16, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" } ], "source": [ @@ -70,15 +58,19 @@ "lang = lang.sort_values(by='% of total', ascending=False)\n", "\n", "#Pie chart time!\n", + "\n", + "#this creates a pie chart without consolidating languages\n", + "'''\n", "fig1, ax1 = plt.subplots()\n", "ax1.pie(lang[lang.columns[1]], labels=lang[lang.columns[0]], autopct='%1.1f%%',\n", " shadow=True, startangle=90)\n", - "ax1.axis('equal')" + "ax1.axis('equal')\n", + "'''" ] }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -109,7 +101,7 @@ " 1.1000514928489695)" ] }, - "execution_count": 21, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" }, @@ -152,6 +144,955 @@ " shadow=True, startangle=90)\n", "ax2.axis('equal')" ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AuthorRepoascl-idrepo_dateLanguageBytesascl_yearrepo_year
0EdoardoCarlesicmbeasy1007.0042013-04-13T10:55:00ZC++231391920102013
1EdoardoCarlesicmbeasy1007.0042013-04-13T10:55:00ZC2028720102013
2EdoardoCarlesicmbeasy1007.0042013-04-13T10:55:00ZObjective-C429420102013
3daddeptrNeedlets1010.0042014-02-24T18:28:03ZFortran6358920102014
4daddeptrNeedlets1010.0042014-02-24T18:28:03ZIDL734320102014
5daddeptrNeedlets1010.0042014-02-24T18:28:03ZPerl178220102014
6piernik-devpiernik1010.0052013-06-14T11:31:14ZFortran286380920102013
7piernik-devpiernik1010.0052013-06-14T11:31:14ZPython12047720102013
8piernik-devpiernik1010.0052013-06-14T11:31:14ZShell2444620102013
9piernik-devpiernik1010.0052013-06-14T11:31:14ZRoff720120102013
10piernik-devpiernik1010.0052013-06-14T11:31:14ZGnuplot502520102013
11piernik-devpiernik1010.0052013-06-14T11:31:14ZMakefile448220102013
12piernik-devpiernik1010.0052013-06-14T11:31:14ZC317120102013
13piernik-devpiernik1010.0052013-06-14T11:31:14ZPerl106020102013
14Starlinkstarlink1407.0022012-07-24T18:15:25ZFortran9883752420142012
15Starlinkstarlink1407.0022012-07-24T18:15:25ZTeX2858871320142012
16Starlinkstarlink1407.0022012-07-24T18:15:25ZC2763139720142012
17Starlinkstarlink1407.0022012-07-24T18:15:25ZTcl762092320142012
18Starlinkstarlink1407.0022012-07-24T18:15:25ZHTML648899720142012
19Starlinkstarlink1407.0022012-07-24T18:15:25ZPerl330426220142012
20Starlinkstarlink1407.0022012-07-24T18:15:25ZShell315670320142012
21Starlinkstarlink1407.0022012-07-24T18:15:25ZPostScript268222420142012
22Starlinkstarlink1407.0022012-07-24T18:15:25ZMakefile186700820142012
23Starlinkstarlink1407.0022012-07-24T18:15:25ZC++163487720142012
24Starlinkstarlink1407.0022012-07-24T18:15:25ZEmacs Lisp128312220142012
25Starlinkstarlink1407.0022012-07-24T18:15:25ZPython119662920142012
26Starlinkstarlink1407.0022012-07-24T18:15:25ZClean101635420142012
27Starlinkstarlink1407.0022012-07-24T18:15:25ZM477862220142012
28Starlinkstarlink1407.0022012-07-24T18:15:25ZGAP23732420142012
29Starlinkstarlink1407.0022012-07-24T18:15:25ZArc18747820142012
...........................
3399CobayaSamplercobaya1910.0192018-09-16T10:52:52ZDockerfile48920192018
3400AOtoolsaotools1910.0212016-03-09T15:00:56ZPython23621220192016
3401astroboylrxPLAN1911.0012016-03-11T17:40:38ZC++43493820192016
3402astroboylrxPLAN1911.0012016-03-11T17:40:38ZPython515920192016
3403astroboylrxPLAN1911.0012016-03-11T17:40:38ZCMake363820192016
3404gmbrandtxwavecal0.0002019-09-11T23:15:01ZPython25587002019
3405soleneulmerbem0.0002019-07-22T12:39:08ZPython6140202019
3406shbhukmrexo0.0002018-04-20T15:21:01ZPython10400402018
3407Bo-NingPredicting-exoplanet-mass-and-radius-relationship0.0002017-11-15T20:36:42ZR8316902017
3408rodlugerLimbdark.jl0.0002018-04-23T16:28:45ZMathematica206652602018
3409rodlugerLimbdark.jl0.0002018-04-23T16:28:45ZJupyter Notebook157240202018
3410rodlugerLimbdark.jl0.0002018-04-23T16:28:45ZTeX85761802018
3411rodlugerLimbdark.jl0.0002018-04-23T16:28:45ZJulia41246602018
3412rodlugerLimbdark.jl0.0002018-04-23T16:28:45ZFortran3202602018
3413rodlugerLimbdark.jl0.0002018-04-23T16:28:45ZPython3106802018
3414rodlugerLimbdark.jl0.0002018-04-23T16:28:45ZIDL2230802018
3415rodlugerLimbdark.jl0.0002018-04-23T16:28:45ZShell538102018
3416rodlugerLimbdark.jl0.0002018-04-23T16:28:45ZProlog294402018
3417rodlugerLimbdark.jl0.0002018-04-23T16:28:45ZMakefile141802018
3418rodlugerplanetplanet0.0002017-05-09T23:33:16ZPython41581202017
3419rodlugerplanetplanet0.0002017-05-09T23:33:16ZC9471702017
3420rodlugerplanetplanet0.0002017-05-09T23:33:16ZJulia3324702017
3421rodlugerplanetplanet0.0002017-05-09T23:33:16ZShell49802017
3422mihankeathos0.0002018-09-03T07:19:26ZPython3414402018
3423pypeitPypeIt1911.0042015-06-05T22:25:37ZPython483655320192015
3424pypeitPypeIt1911.0042015-06-05T22:25:37ZTeX12933620192015
3425pypeitPypeIt1911.0042015-06-05T22:25:37ZJupyter Notebook10235520192015
3426pypeitPypeIt1911.0042015-06-05T22:25:37ZC1176720192015
3427pypeitPypeIt1911.0042015-06-05T22:25:37ZShell41220192015
3428pypeitPypeIt1911.0042015-06-05T22:25:37ZFLUX27520192015
\n", + "

3427 rows × 8 columns

\n", + "
" + ], + "text/plain": [ + " Author Repo \\\n", + "0 EdoardoCarlesi cmbeasy \n", + "1 EdoardoCarlesi cmbeasy \n", + "2 EdoardoCarlesi cmbeasy \n", + "3 daddeptr Needlets \n", + "4 daddeptr Needlets \n", + "5 daddeptr Needlets \n", + "6 piernik-dev piernik \n", + "7 piernik-dev piernik \n", + "8 piernik-dev piernik \n", + "9 piernik-dev piernik \n", + "10 piernik-dev piernik \n", + "11 piernik-dev piernik \n", + "12 piernik-dev piernik \n", + "13 piernik-dev piernik \n", + "14 Starlink starlink \n", + "15 Starlink starlink \n", + "16 Starlink starlink \n", + "17 Starlink starlink \n", + "18 Starlink starlink \n", + "19 Starlink starlink \n", + "20 Starlink starlink \n", + "21 Starlink starlink \n", + "22 Starlink starlink \n", + "23 Starlink starlink \n", + "24 Starlink starlink \n", + "25 Starlink starlink \n", + "26 Starlink starlink \n", + "27 Starlink starlink \n", + "28 Starlink starlink \n", + "29 Starlink starlink \n", + "... ... ... \n", + "3399 CobayaSampler cobaya \n", + "3400 AOtools aotools \n", + "3401 astroboylrx PLAN \n", + "3402 astroboylrx PLAN \n", + "3403 astroboylrx PLAN \n", + "3404 gmbrandt xwavecal \n", + "3405 soleneulmer bem \n", + "3406 shbhuk mrexo \n", + "3407 Bo-Ning Predicting-exoplanet-mass-and-radius-relationship \n", + "3408 rodluger Limbdark.jl \n", + "3409 rodluger Limbdark.jl \n", + "3410 rodluger Limbdark.jl \n", + "3411 rodluger Limbdark.jl \n", + "3412 rodluger Limbdark.jl \n", + "3413 rodluger Limbdark.jl \n", + "3414 rodluger Limbdark.jl \n", + "3415 rodluger Limbdark.jl \n", + "3416 rodluger Limbdark.jl \n", + "3417 rodluger Limbdark.jl \n", + "3418 rodluger planetplanet \n", + "3419 rodluger planetplanet \n", + "3420 rodluger planetplanet \n", + "3421 rodluger planetplanet \n", + "3422 mihanke athos \n", + "3423 pypeit PypeIt \n", + "3424 pypeit PypeIt \n", + "3425 pypeit PypeIt \n", + "3426 pypeit PypeIt \n", + "3427 pypeit PypeIt \n", + "3428 pypeit PypeIt \n", + "\n", + " ascl-id repo_date Language Bytes ascl_year \\\n", + "0 1007.004 2013-04-13T10:55:00Z C++ 2313919 2010 \n", + "1 1007.004 2013-04-13T10:55:00Z C 20287 2010 \n", + "2 1007.004 2013-04-13T10:55:00Z Objective-C 4294 2010 \n", + "3 1010.004 2014-02-24T18:28:03Z Fortran 63589 2010 \n", + "4 1010.004 2014-02-24T18:28:03Z IDL 7343 2010 \n", + "5 1010.004 2014-02-24T18:28:03Z Perl 1782 2010 \n", + "6 1010.005 2013-06-14T11:31:14Z Fortran 2863809 2010 \n", + "7 1010.005 2013-06-14T11:31:14Z Python 120477 2010 \n", + "8 1010.005 2013-06-14T11:31:14Z Shell 24446 2010 \n", + "9 1010.005 2013-06-14T11:31:14Z Roff 7201 2010 \n", + "10 1010.005 2013-06-14T11:31:14Z Gnuplot 5025 2010 \n", + "11 1010.005 2013-06-14T11:31:14Z Makefile 4482 2010 \n", + "12 1010.005 2013-06-14T11:31:14Z C 3171 2010 \n", + "13 1010.005 2013-06-14T11:31:14Z Perl 1060 2010 \n", + "14 1407.002 2012-07-24T18:15:25Z Fortran 98837524 2014 \n", + "15 1407.002 2012-07-24T18:15:25Z TeX 28588713 2014 \n", + "16 1407.002 2012-07-24T18:15:25Z C 27631397 2014 \n", + "17 1407.002 2012-07-24T18:15:25Z Tcl 7620923 2014 \n", + "18 1407.002 2012-07-24T18:15:25Z HTML 6488997 2014 \n", + "19 1407.002 2012-07-24T18:15:25Z Perl 3304262 2014 \n", + "20 1407.002 2012-07-24T18:15:25Z Shell 3156703 2014 \n", + "21 1407.002 2012-07-24T18:15:25Z PostScript 2682224 2014 \n", + "22 1407.002 2012-07-24T18:15:25Z Makefile 1867008 2014 \n", + "23 1407.002 2012-07-24T18:15:25Z C++ 1634877 2014 \n", + "24 1407.002 2012-07-24T18:15:25Z Emacs Lisp 1283122 2014 \n", + "25 1407.002 2012-07-24T18:15:25Z Python 1196629 2014 \n", + "26 1407.002 2012-07-24T18:15:25Z Clean 1016354 2014 \n", + "27 1407.002 2012-07-24T18:15:25Z M4 778622 2014 \n", + "28 1407.002 2012-07-24T18:15:25Z GAP 237324 2014 \n", + "29 1407.002 2012-07-24T18:15:25Z Arc 187478 2014 \n", + "... ... ... ... ... ... \n", + "3399 1910.019 2018-09-16T10:52:52Z Dockerfile 489 2019 \n", + "3400 1910.021 2016-03-09T15:00:56Z Python 236212 2019 \n", + "3401 1911.001 2016-03-11T17:40:38Z C++ 434938 2019 \n", + "3402 1911.001 2016-03-11T17:40:38Z Python 5159 2019 \n", + "3403 1911.001 2016-03-11T17:40:38Z CMake 3638 2019 \n", + "3404 0.000 2019-09-11T23:15:01Z Python 255870 0 \n", + "3405 0.000 2019-07-22T12:39:08Z Python 61402 0 \n", + "3406 0.000 2018-04-20T15:21:01Z Python 104004 0 \n", + "3407 0.000 2017-11-15T20:36:42Z R 83169 0 \n", + "3408 0.000 2018-04-23T16:28:45Z Mathematica 2066526 0 \n", + "3409 0.000 2018-04-23T16:28:45Z Jupyter Notebook 1572402 0 \n", + "3410 0.000 2018-04-23T16:28:45Z TeX 857618 0 \n", + "3411 0.000 2018-04-23T16:28:45Z Julia 412466 0 \n", + "3412 0.000 2018-04-23T16:28:45Z Fortran 32026 0 \n", + "3413 0.000 2018-04-23T16:28:45Z Python 31068 0 \n", + "3414 0.000 2018-04-23T16:28:45Z IDL 22308 0 \n", + "3415 0.000 2018-04-23T16:28:45Z Shell 5381 0 \n", + "3416 0.000 2018-04-23T16:28:45Z Prolog 2944 0 \n", + "3417 0.000 2018-04-23T16:28:45Z Makefile 1418 0 \n", + "3418 0.000 2017-05-09T23:33:16Z Python 415812 0 \n", + "3419 0.000 2017-05-09T23:33:16Z C 94717 0 \n", + "3420 0.000 2017-05-09T23:33:16Z Julia 33247 0 \n", + "3421 0.000 2017-05-09T23:33:16Z Shell 498 0 \n", + "3422 0.000 2018-09-03T07:19:26Z Python 34144 0 \n", + "3423 1911.004 2015-06-05T22:25:37Z Python 4836553 2019 \n", + "3424 1911.004 2015-06-05T22:25:37Z TeX 129336 2019 \n", + "3425 1911.004 2015-06-05T22:25:37Z Jupyter Notebook 102355 2019 \n", + "3426 1911.004 2015-06-05T22:25:37Z C 11767 2019 \n", + "3427 1911.004 2015-06-05T22:25:37Z Shell 412 2019 \n", + "3428 1911.004 2015-06-05T22:25:37Z FLUX 275 2019 \n", + "\n", + " repo_year \n", + "0 2013 \n", + "1 2013 \n", + "2 2013 \n", + "3 2014 \n", + "4 2014 \n", + "5 2014 \n", + "6 2013 \n", + "7 2013 \n", + "8 2013 \n", + "9 2013 \n", + "10 2013 \n", + "11 2013 \n", + "12 2013 \n", + "13 2013 \n", + "14 2012 \n", + "15 2012 \n", + "16 2012 \n", + "17 2012 \n", + "18 2012 \n", + "19 2012 \n", + "20 2012 \n", + "21 2012 \n", + "22 2012 \n", + "23 2012 \n", + "24 2012 \n", + "25 2012 \n", + "26 2012 \n", + "27 2012 \n", + "28 2012 \n", + "29 2012 \n", + "... ... \n", + "3399 2018 \n", + "3400 2016 \n", + "3401 2016 \n", + "3402 2016 \n", + "3403 2016 \n", + "3404 2019 \n", + "3405 2019 \n", + "3406 2018 \n", + "3407 2017 \n", + "3408 2018 \n", + "3409 2018 \n", + "3410 2018 \n", + "3411 2018 \n", + "3412 2018 \n", + "3413 2018 \n", + "3414 2018 \n", + "3415 2018 \n", + "3416 2018 \n", + "3417 2018 \n", + "3418 2017 \n", + "3419 2017 \n", + "3420 2017 \n", + "3421 2017 \n", + "3422 2018 \n", + "3423 2015 \n", + "3424 2015 \n", + "3425 2015 \n", + "3426 2015 \n", + "3427 2015 \n", + "3428 2015 \n", + "\n", + "[3427 rows x 8 columns]" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#link to how to get stacked area chart\n", + "#https://python-graph-gallery.com/255-percentage-stacked-area-chart/\n", + "\n", + "#x axis - year\n", + "#y axis - percentage of the total\n", + "#for each language, we need an array of languages over years\n", + "\n", + "#display(df)\n", + "#print (len(df[df['ascl-id'] == 0]))\n", + "#insert ascl year and repo year field into the df\n", + "#ascl-year is '20' + first 2 digits of ascl-id\n", + "#repo-year is first 4 chars of repo-date\n", + "\n", + "#function to return the correct year depending on the first 2 digits of the ascl-id\n", + "def ascl_year(row):\n", + " year = str(row['ascl-id'])[0:2]\n", + " try:\n", + " year = int(year)\n", + " except:\n", + " year = float(year)\n", + " if year > 89:\n", + " return 1900+int(year)\n", + " elif year == 0:\n", + " return None\n", + " else:\n", + " return 2000+int(year)\n", + "\n", + "#now, a function to generate the repo year\n", + "def repo_year(row):\n", + " return row['repo_date'][0:4]\n", + "\n", + "df['ascl_year'] = df.apply (lambda row: ascl_year(row), axis=1)\n", + "df.ascl_year = df.ascl_year.fillna(0.0).astype(int)\n", + "df['repo_year'] = df.apply (lambda row: repo_year(row), axis=1)\n", + "df.repo_year = df.repo_year.fillna(\"0\").astype(int)" + ] } ], "metadata": {