diff --git a/Language-Date visualizations.ipynb b/Language-Date visualizations.ipynb index 3f4e30e..1834308 100644 --- a/Language-Date visualizations.ipynb +++ b/Language-Date visualizations.ipynb @@ -147,7 +147,37 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 53, + "metadata": {}, + "outputs": [], + "source": [ + "#function to return the correct year depending on the first 2 digits of the ascl-id\n", + "def ascl_year(row):\n", + " year = str(row['ascl-id'])[0:2]\n", + " try:\n", + " year = int(year)\n", + " except:\n", + " year = float(year)\n", + " if year > 89:\n", + " return 1900+int(year)\n", + " elif year == 0:\n", + " return None\n", + " else:\n", + " return 2000+int(year)\n", + "\n", + "#now, a function to generate the repo year\n", + "def repo_year(row):\n", + " return row['repo_date'][0:4]\n", + "\n", + "df['ascl_year'] = df.apply (lambda row: ascl_year(row), axis=1)\n", + "df.ascl_year = df.ascl_year.fillna(0.0).astype(int)\n", + "df['repo_year'] = df.apply (lambda row: repo_year(row), axis=1)\n", + "df.repo_year = df.repo_year.fillna(\"0\").astype(int)" + ] + }, + { + "cell_type": "code", + "execution_count": 161, "metadata": {}, "outputs": [ { @@ -171,889 +201,459 @@ " \n", " \n", " \n", - " Author\n", - " Repo\n", - " ascl-id\n", - " repo_date\n", " Language\n", - " Bytes\n", - " ascl_year\n", - " repo_year\n", " \n", " \n", " \n", " \n", " 0\n", - " EdoardoCarlesi\n", - " cmbeasy\n", - " 1007.004\n", - " 2013-04-13T10:55:00Z\n", - " C++\n", - " 2313919\n", - " 2010\n", - " 2013\n", + " Java\n", " \n", " \n", " 1\n", - " EdoardoCarlesi\n", - " cmbeasy\n", - " 1007.004\n", - " 2013-04-13T10:55:00Z\n", " C\n", - " 20287\n", - " 2010\n", - " 2013\n", " \n", " \n", " 2\n", - " EdoardoCarlesi\n", - " cmbeasy\n", - " 1007.004\n", - " 2013-04-13T10:55:00Z\n", - " Objective-C\n", - " 4294\n", - " 2010\n", - " 2013\n", + " C++\n", " \n", " \n", " 3\n", - " daddeptr\n", - " Needlets\n", - " 1010.004\n", - " 2014-02-24T18:28:03Z\n", - " Fortran\n", - " 63589\n", - " 2010\n", - " 2014\n", + " Python\n", " \n", " \n", " 4\n", - " daddeptr\n", - " Needlets\n", - " 1010.004\n", - " 2014-02-24T18:28:03Z\n", - " IDL\n", - " 7343\n", - " 2010\n", - " 2014\n", + " R\n", " \n", " \n", " 5\n", - " daddeptr\n", - " Needlets\n", - " 1010.004\n", - " 2014-02-24T18:28:03Z\n", - " Perl\n", - " 1782\n", - " 2010\n", - " 2014\n", + " Fortran\n", " \n", " \n", " 6\n", - " piernik-dev\n", - " piernik\n", - " 1010.005\n", - " 2013-06-14T11:31:14Z\n", - " Fortran\n", - " 2863809\n", - " 2010\n", - " 2013\n", + " Ruby\n", " \n", " \n", " 7\n", - " piernik-dev\n", - " piernik\n", - " 1010.005\n", - " 2013-06-14T11:31:14Z\n", - " Python\n", - " 120477\n", - " 2010\n", - " 2013\n", + " HTML\n", " \n", " \n", " 8\n", - " piernik-dev\n", - " piernik\n", - " 1010.005\n", - " 2013-06-14T11:31:14Z\n", " Shell\n", - " 24446\n", - " 2010\n", - " 2013\n", " \n", " \n", " 9\n", - " piernik-dev\n", - " piernik\n", - " 1010.005\n", - " 2013-06-14T11:31:14Z\n", - " Roff\n", - " 7201\n", - " 2010\n", - " 2013\n", - " \n", - " \n", - " 10\n", - " piernik-dev\n", - " piernik\n", - " 1010.005\n", - " 2013-06-14T11:31:14Z\n", - " Gnuplot\n", - " 5025\n", - " 2010\n", - " 2013\n", - " \n", - " \n", - " 11\n", - " piernik-dev\n", - " piernik\n", - " 1010.005\n", - " 2013-06-14T11:31:14Z\n", - " Makefile\n", - " 4482\n", - " 2010\n", - " 2013\n", - " \n", - " \n", - " 12\n", - " piernik-dev\n", - " piernik\n", - " 1010.005\n", - " 2013-06-14T11:31:14Z\n", - " C\n", - " 3171\n", - " 2010\n", - " 2013\n", - " \n", - " \n", - " 13\n", - " piernik-dev\n", - " piernik\n", - " 1010.005\n", - " 2013-06-14T11:31:14Z\n", - " Perl\n", - " 1060\n", - " 2010\n", - " 2013\n", + " Others\n", " \n", - " \n", - " 14\n", - " Starlink\n", - " starlink\n", - " 1407.002\n", - " 2012-07-24T18:15:25Z\n", - " Fortran\n", - " 98837524\n", - " 2014\n", - " 2012\n", + " \n", + "\n", + "" + ], + "text/plain": [ + " Language\n", + "0 Java\n", + "1 C\n", + "2 C++\n", + "3 Python\n", + "4 R\n", + "5 Fortran\n", + "6 Ruby\n", + "7 HTML\n", + "8 Shell\n", + "9 Others" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "trying Java in year 2008\n", + "trying C in year 2008\n", + "trying C++ in year 2008\n", + "trying Python in year 2008\n", + "trying R in year 2008\n", + "trying Fortran in year 2008\n", + "trying Ruby in year 2008\n", + "trying HTML in year 2008\n", + "trying Shell in year 2008\n", + "trying Others in year 2008\n", + "trying Java in year 2009\n", + "trying C in year 2009\n", + "trying C++ in year 2009\n", + "trying Python in year 2009\n", + "trying R in year 2009\n", + "trying Fortran in year 2009\n", + "trying Ruby in year 2009\n", + "trying HTML in year 2009\n", + "trying Shell in year 2009\n", + "trying Others in year 2009\n", + "trying Java in year 2010\n", + "trying C in year 2010\n", + "trying C++ in year 2010\n", + "trying Python in year 2010\n", + "trying R in year 2010\n", + "trying Fortran in year 2010\n", + "trying Ruby in year 2010\n", + "trying HTML in year 2010\n", + "trying Shell in year 2010\n", + "trying Others in year 2010\n", + "trying Java in year 2011\n", + "trying C in year 2011\n", + "trying C++ in year 2011\n", + "trying Python in year 2011\n", + "trying R in year 2011\n", + "trying Fortran in year 2011\n", + "trying Ruby in year 2011\n", + "trying HTML in year 2011\n", + "trying Shell in year 2011\n", + "trying Others in year 2011\n", + "trying Java in year 2012\n", + "trying C in year 2012\n", + "trying C++ in year 2012\n", + "trying Python in year 2012\n", + "trying R in year 2012\n", + "trying Fortran in year 2012\n", + "trying Ruby in year 2012\n", + "trying HTML in year 2012\n", + "trying Shell in year 2012\n", + "trying Others in year 2012\n", + "trying Java in year 2013\n", + "trying C in year 2013\n", + "trying C++ in year 2013\n", + "trying Python in year 2013\n", + "trying R in year 2013\n", + "trying Fortran in year 2013\n", + "trying Ruby in year 2013\n", + "trying HTML in year 2013\n", + "trying Shell in year 2013\n", + "trying Others in year 2013\n", + "trying Java in year 2014\n", + "trying C in year 2014\n", + "trying C++ in year 2014\n", + "trying Python in year 2014\n", + "trying R in year 2014\n", + "trying Fortran in year 2014\n", + "trying Ruby in year 2014\n", + "trying HTML in year 2014\n", + "trying Shell in year 2014\n", + "trying Others in year 2014\n", + "trying Java in year 2015\n", + "trying C in year 2015\n", + "trying C++ in year 2015\n", + "trying Python in year 2015\n", + "trying R in year 2015\n", + "trying Fortran in year 2015\n", + "trying Ruby in year 2015\n", + "trying HTML in year 2015\n", + "trying Shell in year 2015\n", + "trying Others in year 2015\n", + "trying Java in year 2016\n", + "trying C in year 2016\n", + "trying C++ in year 2016\n", + "trying Python in year 2016\n", + "trying R in year 2016\n", + "trying Fortran in year 2016\n", + "trying Ruby in year 2016\n", + "trying HTML in year 2016\n", + "trying Shell in year 2016\n", + "trying Others in year 2016\n", + "trying Java in year 2017\n", + "trying C in year 2017\n", + "trying C++ in year 2017\n", + "trying Python in year 2017\n", + "trying R in year 2017\n", + "trying Fortran in year 2017\n", + "trying Ruby in year 2017\n", + "trying HTML in year 2017\n", + "trying Shell in year 2017\n", + "trying Others in year 2017\n", + "trying Java in year 2018\n", + "trying C in year 2018\n", + "trying C++ in year 2018\n", + "trying Python in year 2018\n", + "trying R in year 2018\n", + "trying Fortran in year 2018\n", + "trying Ruby in year 2018\n", + "trying HTML in year 2018\n", + "trying Shell in year 2018\n", + "trying Others in year 2018\n", + "trying Java in year 2019\n", + "trying C in year 2019\n", + "trying C++ in year 2019\n", + "trying Python in year 2019\n", + "trying R in year 2019\n", + "trying Fortran in year 2019\n", + "trying Ruby in year 2019\n", + "trying HTML in year 2019\n", + "trying Shell in year 2019\n", + "trying Others in year 2019\n", + "trying Java in year 2020\n", + "trying C in year 2020\n", + "trying C++ in year 2020\n", + "trying Python in year 2020\n", + "trying R in year 2020\n", + "trying Fortran in year 2020\n", + "trying Ruby in year 2020\n", + "trying HTML in year 2020\n", + "trying Shell in year 2020\n", + "trying Others in year 2020\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
Language2008 bytes2009 bytes2010 bytes2011 bytes2012 bytes2013 bytes2014 bytes2015 bytes2016 bytes2017 bytes2018 bytes2019 bytes2020 bytes
15Starlinkstarlink1407.0022012-07-24T18:15:25ZTeX28588713201420120Java0.00.00.00.01401162.042933030.014083813.0296924.02638211.00.02.828600e+041732024.00.0
16Starlinkstarlink1407.0022012-07-24T18:15:25Z1C2763139720142012
17Starlinkstarlink1407.0022012-07-24T18:15:25ZTcl762092320142012
18Starlinkstarlink1407.0022012-07-24T18:15:25ZHTML648899720142012
19Starlinkstarlink1407.0022012-07-24T18:15:25ZPerl330426220142012
20Starlinkstarlink1407.0022012-07-24T18:15:25ZShell315670320142012
21Starlinkstarlink1407.0022012-07-24T18:15:25ZPostScript268222420142012
22Starlinkstarlink1407.0022012-07-24T18:15:25ZMakefile186700820142012
23Starlinkstarlink1407.0022012-07-24T18:15:25ZC++163487720142012
24Starlinkstarlink1407.0022012-07-24T18:15:25ZEmacs Lisp128312220142012732140.05396.0134128.02912534.035940777.070863794.040033787.040715157.016791222.027935931.07.443884e+061224262.00.0
25Starlinkstarlink1407.0022012-07-24T18:15:25ZPython119662920142012
26Starlinkstarlink1407.0022012-07-24T18:15:25ZClean101635420142012
27Starlinkstarlink1407.0022012-07-24T18:15:25ZM477862220142012
28Starlinkstarlink1407.0022012-07-24T18:15:25ZGAP23732420142012
29Starlinkstarlink1407.0022012-07-24T18:15:25ZArc18747820142012
...........................
3399CobayaSamplercobaya1910.0192018-09-16T10:52:52ZDockerfile48920192018
3400AOtoolsaotools1910.0212016-03-09T15:00:56ZPython23621220192016
3401astroboylrxPLAN1911.0012016-03-11T17:40:38Z2C++4349382019201673573.01402452.03966318.0507749.011027923.029664973.017571170.038915066.019147323.086334220.08.341191e+061376004.00.0
3402astroboylrxPLAN1911.0012016-03-11T17:40:38ZPython515920192016
3403astroboylrxPLAN1911.0012016-03-11T17:40:38ZCMake363820192016
3404gmbrandtxwavecal0.0002019-09-11T23:15:01ZPython25587002019
3405soleneulmerbem0.0002019-07-22T12:39:08ZPython6140202019
3406shbhukmrexo0.0002018-04-20T15:21:01Z3Python10400402018457697.01902611.010494246.016452958.026273713.030022866.033908005.054064633.039915315.048081048.02.281809e+074613109.00.0
3407Bo-NingPredicting-exoplanet-mass-and-radius-relationship0.0002017-11-15T20:36:42Z4R83169020170.00.00.05473.0131963.0832599.0161349.0230893.086513.0969179.00.000000e+00870358.00.0
3408rodlugerLimbdark.jl0.0002018-04-23T16:28:45ZMathematica206652602018
3409rodlugerLimbdark.jl0.0002018-04-23T16:28:45ZJupyter Notebook157240202018
3410rodlugerLimbdark.jl0.0002018-04-23T16:28:45ZTeX85761802018
3411rodlugerLimbdark.jl0.0002018-04-23T16:28:45ZJulia41246602018
3412rodlugerLimbdark.jl0.0002018-04-23T16:28:45Z5Fortran3202602018
3413rodlugerLimbdark.jl0.0002018-04-23T16:28:45ZPython3106802018
3414rodlugerLimbdark.jl0.0002018-04-23T16:28:45ZIDL2230802018
3415rodlugerLimbdark.jl0.0002018-04-23T16:28:45ZShell538102018
3416rodlugerLimbdark.jl0.0002018-04-23T16:28:45ZProlog294402018
3417rodlugerLimbdark.jl0.0002018-04-23T16:28:45ZMakefile141802018
3418rodlugerplanetplanet0.0002017-05-09T23:33:16ZPython41581202017
3419rodlugerplanetplanet0.0002017-05-09T23:33:16ZC9471702017
3420rodlugerplanetplanet0.0002017-05-09T23:33:16ZJulia33247020170.02826100.00.03665668.0106958019.010561397.011857286.059772042.07302514.025221003.03.025633e+074989103.01782.0
3421rodlugerplanetplanet0.0002017-05-09T23:33:16ZShell49802017
3422mihankeathos0.0002018-09-03T07:19:26ZPython3414402018
3423pypeitPypeIt1911.0042015-06-05T22:25:37ZPython483655320192015
3424pypeitPypeIt1911.0042015-06-05T22:25:37ZTeX12933620192015
3425pypeitPypeIt1911.0042015-06-05T22:25:37ZJupyter Notebook102355201920156Ruby0.00.00.00.00.02673561.00.050009073.01265301.0522.01.850000e+020.00.0
3426pypeitPypeIt1911.0042015-06-05T22:25:37ZC11767201920157HTML0.00.00.03339.07672931.069808148.02625860.0129641.07977816.05191228.01.898321e+060.00.0
3427pypeitPypeIt1911.0042015-06-05T22:25:37Z8Shell412201920153153.047612.058778.01056847.04054943.02605824.01546652.01559203.01766204.040766099.01.342099e+0635050.00.0
3428pypeitPypeIt1911.0042015-06-05T22:25:37ZFLUX275201920159Others3441.0222938.0817757.03343580.085308437.034670998.050389592.0169066350.0108228261.0138933263.01.107641e+0921152630.02501.0
\n", - "

3427 rows × 8 columns

\n", "
" ], "text/plain": [ - " Author Repo \\\n", - "0 EdoardoCarlesi cmbeasy \n", - "1 EdoardoCarlesi cmbeasy \n", - "2 EdoardoCarlesi cmbeasy \n", - "3 daddeptr Needlets \n", - "4 daddeptr Needlets \n", - "5 daddeptr Needlets \n", - "6 piernik-dev piernik \n", - "7 piernik-dev piernik \n", - "8 piernik-dev piernik \n", - "9 piernik-dev piernik \n", - "10 piernik-dev piernik \n", - "11 piernik-dev piernik \n", - "12 piernik-dev piernik \n", - "13 piernik-dev piernik \n", - "14 Starlink starlink \n", - "15 Starlink starlink \n", - "16 Starlink starlink \n", - "17 Starlink starlink \n", - "18 Starlink starlink \n", - "19 Starlink starlink \n", - "20 Starlink starlink \n", - "21 Starlink starlink \n", - "22 Starlink starlink \n", - "23 Starlink starlink \n", - "24 Starlink starlink \n", - "25 Starlink starlink \n", - "26 Starlink starlink \n", - "27 Starlink starlink \n", - "28 Starlink starlink \n", - "29 Starlink starlink \n", - "... ... ... \n", - "3399 CobayaSampler cobaya \n", - "3400 AOtools aotools \n", - "3401 astroboylrx PLAN \n", - "3402 astroboylrx PLAN \n", - "3403 astroboylrx PLAN \n", - "3404 gmbrandt xwavecal \n", - "3405 soleneulmer bem \n", - "3406 shbhuk mrexo \n", - "3407 Bo-Ning Predicting-exoplanet-mass-and-radius-relationship \n", - "3408 rodluger Limbdark.jl \n", - "3409 rodluger Limbdark.jl \n", - "3410 rodluger Limbdark.jl \n", - "3411 rodluger Limbdark.jl \n", - "3412 rodluger Limbdark.jl \n", - "3413 rodluger Limbdark.jl \n", - "3414 rodluger Limbdark.jl \n", - "3415 rodluger Limbdark.jl \n", - "3416 rodluger Limbdark.jl \n", - "3417 rodluger Limbdark.jl \n", - "3418 rodluger planetplanet \n", - "3419 rodluger planetplanet \n", - "3420 rodluger planetplanet \n", - "3421 rodluger planetplanet \n", - "3422 mihanke athos \n", - "3423 pypeit PypeIt \n", - "3424 pypeit PypeIt \n", - "3425 pypeit PypeIt \n", - "3426 pypeit PypeIt \n", - "3427 pypeit PypeIt \n", - "3428 pypeit PypeIt \n", - "\n", - " ascl-id repo_date Language Bytes ascl_year \\\n", - "0 1007.004 2013-04-13T10:55:00Z C++ 2313919 2010 \n", - "1 1007.004 2013-04-13T10:55:00Z C 20287 2010 \n", - "2 1007.004 2013-04-13T10:55:00Z Objective-C 4294 2010 \n", - "3 1010.004 2014-02-24T18:28:03Z Fortran 63589 2010 \n", - "4 1010.004 2014-02-24T18:28:03Z IDL 7343 2010 \n", - "5 1010.004 2014-02-24T18:28:03Z Perl 1782 2010 \n", - "6 1010.005 2013-06-14T11:31:14Z Fortran 2863809 2010 \n", - "7 1010.005 2013-06-14T11:31:14Z Python 120477 2010 \n", - "8 1010.005 2013-06-14T11:31:14Z Shell 24446 2010 \n", - "9 1010.005 2013-06-14T11:31:14Z Roff 7201 2010 \n", - "10 1010.005 2013-06-14T11:31:14Z Gnuplot 5025 2010 \n", - "11 1010.005 2013-06-14T11:31:14Z Makefile 4482 2010 \n", - "12 1010.005 2013-06-14T11:31:14Z C 3171 2010 \n", - "13 1010.005 2013-06-14T11:31:14Z Perl 1060 2010 \n", - "14 1407.002 2012-07-24T18:15:25Z Fortran 98837524 2014 \n", - "15 1407.002 2012-07-24T18:15:25Z TeX 28588713 2014 \n", - "16 1407.002 2012-07-24T18:15:25Z C 27631397 2014 \n", - "17 1407.002 2012-07-24T18:15:25Z Tcl 7620923 2014 \n", - "18 1407.002 2012-07-24T18:15:25Z HTML 6488997 2014 \n", - "19 1407.002 2012-07-24T18:15:25Z Perl 3304262 2014 \n", - "20 1407.002 2012-07-24T18:15:25Z Shell 3156703 2014 \n", - "21 1407.002 2012-07-24T18:15:25Z PostScript 2682224 2014 \n", - "22 1407.002 2012-07-24T18:15:25Z Makefile 1867008 2014 \n", - "23 1407.002 2012-07-24T18:15:25Z C++ 1634877 2014 \n", - "24 1407.002 2012-07-24T18:15:25Z Emacs Lisp 1283122 2014 \n", - "25 1407.002 2012-07-24T18:15:25Z Python 1196629 2014 \n", - "26 1407.002 2012-07-24T18:15:25Z Clean 1016354 2014 \n", - "27 1407.002 2012-07-24T18:15:25Z M4 778622 2014 \n", - "28 1407.002 2012-07-24T18:15:25Z GAP 237324 2014 \n", - "29 1407.002 2012-07-24T18:15:25Z Arc 187478 2014 \n", - "... ... ... ... ... ... \n", - "3399 1910.019 2018-09-16T10:52:52Z Dockerfile 489 2019 \n", - "3400 1910.021 2016-03-09T15:00:56Z Python 236212 2019 \n", - "3401 1911.001 2016-03-11T17:40:38Z C++ 434938 2019 \n", - "3402 1911.001 2016-03-11T17:40:38Z Python 5159 2019 \n", - "3403 1911.001 2016-03-11T17:40:38Z CMake 3638 2019 \n", - "3404 0.000 2019-09-11T23:15:01Z Python 255870 0 \n", - "3405 0.000 2019-07-22T12:39:08Z Python 61402 0 \n", - "3406 0.000 2018-04-20T15:21:01Z Python 104004 0 \n", - "3407 0.000 2017-11-15T20:36:42Z R 83169 0 \n", - "3408 0.000 2018-04-23T16:28:45Z Mathematica 2066526 0 \n", - "3409 0.000 2018-04-23T16:28:45Z Jupyter Notebook 1572402 0 \n", - "3410 0.000 2018-04-23T16:28:45Z TeX 857618 0 \n", - "3411 0.000 2018-04-23T16:28:45Z Julia 412466 0 \n", - "3412 0.000 2018-04-23T16:28:45Z Fortran 32026 0 \n", - "3413 0.000 2018-04-23T16:28:45Z Python 31068 0 \n", - "3414 0.000 2018-04-23T16:28:45Z IDL 22308 0 \n", - "3415 0.000 2018-04-23T16:28:45Z Shell 5381 0 \n", - "3416 0.000 2018-04-23T16:28:45Z Prolog 2944 0 \n", - "3417 0.000 2018-04-23T16:28:45Z Makefile 1418 0 \n", - "3418 0.000 2017-05-09T23:33:16Z Python 415812 0 \n", - "3419 0.000 2017-05-09T23:33:16Z C 94717 0 \n", - "3420 0.000 2017-05-09T23:33:16Z Julia 33247 0 \n", - "3421 0.000 2017-05-09T23:33:16Z Shell 498 0 \n", - "3422 0.000 2018-09-03T07:19:26Z Python 34144 0 \n", - "3423 1911.004 2015-06-05T22:25:37Z Python 4836553 2019 \n", - "3424 1911.004 2015-06-05T22:25:37Z TeX 129336 2019 \n", - "3425 1911.004 2015-06-05T22:25:37Z Jupyter Notebook 102355 2019 \n", - "3426 1911.004 2015-06-05T22:25:37Z C 11767 2019 \n", - "3427 1911.004 2015-06-05T22:25:37Z Shell 412 2019 \n", - "3428 1911.004 2015-06-05T22:25:37Z FLUX 275 2019 \n", + " Language 2008 bytes 2009 bytes 2010 bytes 2011 bytes 2012 bytes \\\n", + "0 Java 0.0 0.0 0.0 0.0 1401162.0 \n", + "1 C 732140.0 5396.0 134128.0 2912534.0 35940777.0 \n", + "2 C++ 73573.0 1402452.0 3966318.0 507749.0 11027923.0 \n", + "3 Python 457697.0 1902611.0 10494246.0 16452958.0 26273713.0 \n", + "4 R 0.0 0.0 0.0 5473.0 131963.0 \n", + "5 Fortran 0.0 2826100.0 0.0 3665668.0 106958019.0 \n", + "6 Ruby 0.0 0.0 0.0 0.0 0.0 \n", + "7 HTML 0.0 0.0 0.0 3339.0 7672931.0 \n", + "8 Shell 3153.0 47612.0 58778.0 1056847.0 4054943.0 \n", + "9 Others 3441.0 222938.0 817757.0 3343580.0 85308437.0 \n", "\n", - " repo_year \n", - "0 2013 \n", - "1 2013 \n", - "2 2013 \n", - "3 2014 \n", - "4 2014 \n", - "5 2014 \n", - "6 2013 \n", - "7 2013 \n", - "8 2013 \n", - "9 2013 \n", - "10 2013 \n", - "11 2013 \n", - "12 2013 \n", - "13 2013 \n", - "14 2012 \n", - "15 2012 \n", - "16 2012 \n", - "17 2012 \n", - "18 2012 \n", - "19 2012 \n", - "20 2012 \n", - "21 2012 \n", - "22 2012 \n", - "23 2012 \n", - "24 2012 \n", - "25 2012 \n", - "26 2012 \n", - "27 2012 \n", - "28 2012 \n", - "29 2012 \n", - "... ... \n", - "3399 2018 \n", - "3400 2016 \n", - "3401 2016 \n", - "3402 2016 \n", - "3403 2016 \n", - "3404 2019 \n", - "3405 2019 \n", - "3406 2018 \n", - "3407 2017 \n", - "3408 2018 \n", - "3409 2018 \n", - "3410 2018 \n", - "3411 2018 \n", - "3412 2018 \n", - "3413 2018 \n", - "3414 2018 \n", - "3415 2018 \n", - "3416 2018 \n", - "3417 2018 \n", - "3418 2017 \n", - "3419 2017 \n", - "3420 2017 \n", - "3421 2017 \n", - "3422 2018 \n", - "3423 2015 \n", - "3424 2015 \n", - "3425 2015 \n", - "3426 2015 \n", - "3427 2015 \n", - "3428 2015 \n", + " 2013 bytes 2014 bytes 2015 bytes 2016 bytes 2017 bytes \\\n", + "0 42933030.0 14083813.0 296924.0 2638211.0 0.0 \n", + "1 70863794.0 40033787.0 40715157.0 16791222.0 27935931.0 \n", + "2 29664973.0 17571170.0 38915066.0 19147323.0 86334220.0 \n", + "3 30022866.0 33908005.0 54064633.0 39915315.0 48081048.0 \n", + "4 832599.0 161349.0 230893.0 86513.0 969179.0 \n", + "5 10561397.0 11857286.0 59772042.0 7302514.0 25221003.0 \n", + "6 2673561.0 0.0 50009073.0 1265301.0 522.0 \n", + "7 69808148.0 2625860.0 129641.0 7977816.0 5191228.0 \n", + "8 2605824.0 1546652.0 1559203.0 1766204.0 40766099.0 \n", + "9 34670998.0 50389592.0 169066350.0 108228261.0 138933263.0 \n", "\n", - "[3427 rows x 8 columns]" + " 2018 bytes 2019 bytes 2020 bytes \n", + "0 2.828600e+04 1732024.0 0.0 \n", + "1 7.443884e+06 1224262.0 0.0 \n", + "2 8.341191e+06 1376004.0 0.0 \n", + "3 2.281809e+07 4613109.0 0.0 \n", + "4 0.000000e+00 870358.0 0.0 \n", + "5 3.025633e+07 4989103.0 1782.0 \n", + "6 1.850000e+02 0.0 0.0 \n", + "7 1.898321e+06 0.0 0.0 \n", + "8 1.342099e+06 35050.0 0.0 \n", + "9 1.107641e+09 21152630.0 2501.0 " ] }, - "execution_count": 52, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" } ], "source": [ @@ -1064,35 +664,65 @@ "#y axis - percentage of the total\n", "#for each language, we need an array of languages over years\n", "\n", - "#display(df)\n", - "#print (len(df[df['ascl-id'] == 0]))\n", - "#insert ascl year and repo year field into the df\n", - "#ascl-year is '20' + first 2 digits of ascl-id\n", - "#repo-year is first 4 chars of repo-date\n", + "#let's only take a look at the most popular languages\n", "\n", - "#function to return the correct year depending on the first 2 digits of the ascl-id\n", - "def ascl_year(row):\n", - " year = str(row['ascl-id'])[0:2]\n", + "#each language is a separate row\n", + "#each column is a year, and each cell represents the bytes of that language in that year\n", + "\n", + "top_langs = most['Language']\n", + "#display(top_langs)\n", + "repo_years = np.sort(df.repo_year.unique())\n", + "\n", + "#repo_df = pd.DataFrame({'Language': top_langs})\n", + "most_bytes_langs = ['Java', 'C', 'C++', 'Python', 'R', 'Fortran', 'Ruby', 'HTML', 'Shell', 'Others']\n", + "repo_df = pd.DataFrame({'Language': most_bytes_langs})\n", + "display(repo_df)\n", + "\n", + "#for each year in repo_years, create a column\n", + "# each cell will be the number of bytes in that year for that language\n", + "\n", + "def bytes_that_year(row, year):\n", + " print('trying ' + row['Language'] + ' in year ' + str(year))\n", + " \n", + " if row['Language'] == 'Others':\n", + " #sum up all the non-most_bytes_langs bytes\n", + " return df[ ~(df['Language'].isin(most_bytes_langs)) & (df['repo_year'] == year)]['Bytes'].sum()\n", + " lang = row['Language']\n", + " \n", " try:\n", - " year = int(year)\n", + " return df[df['repo_year'] == year].groupby('Language').sum().loc[lang]['Bytes']\n", " except:\n", - " year = float(year)\n", - " if year > 89:\n", - " return 1900+int(year)\n", - " elif year == 0:\n", - " return None\n", - " else:\n", - " return 2000+int(year)\n", + " return 0\n", "\n", - "#now, a function to generate the repo year\n", - "def repo_year(row):\n", - " return row['repo_date'][0:4]\n", + "for year in repo_years:\n", + " #create columns for the total number of bytes\n", + " arr = repo_df.apply (lambda row: bytes_that_year(row, year), axis=1)\n", + " col_name = str(year) + \" bytes\"\n", + " repo_df[col_name] = arr\n", + " \n", + " \n", "\n", - "df['ascl_year'] = df.apply (lambda row: ascl_year(row), axis=1)\n", - "df.ascl_year = df.ascl_year.fillna(0.0).astype(int)\n", - "df['repo_year'] = df.apply (lambda row: repo_year(row), axis=1)\n", - "df.repo_year = df.repo_year.fillna(\"0\").astype(int)" + " \n", + "display(repo_df)\n", + "#display(df[df['repo_year'] == 2019].groupby('Language').sum().loc['C']['Bytes'])" ] + }, + { + "cell_type": "code", + "execution_count": 159, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "138933263" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [] } ], "metadata": {