diff --git a/Language-Date visualizations.ipynb b/Language-Date visualizations.ipynb
index 3f4e30e..1834308 100644
--- a/Language-Date visualizations.ipynb
+++ b/Language-Date visualizations.ipynb
@@ -147,7 +147,37 @@
},
{
"cell_type": "code",
- "execution_count": 52,
+ "execution_count": 53,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#function to return the correct year depending on the first 2 digits of the ascl-id\n",
+ "def ascl_year(row):\n",
+ " year = str(row['ascl-id'])[0:2]\n",
+ " try:\n",
+ " year = int(year)\n",
+ " except:\n",
+ " year = float(year)\n",
+ " if year > 89:\n",
+ " return 1900+int(year)\n",
+ " elif year == 0:\n",
+ " return None\n",
+ " else:\n",
+ " return 2000+int(year)\n",
+ "\n",
+ "#now, a function to generate the repo year\n",
+ "def repo_year(row):\n",
+ " return row['repo_date'][0:4]\n",
+ "\n",
+ "df['ascl_year'] = df.apply (lambda row: ascl_year(row), axis=1)\n",
+ "df.ascl_year = df.ascl_year.fillna(0.0).astype(int)\n",
+ "df['repo_year'] = df.apply (lambda row: repo_year(row), axis=1)\n",
+ "df.repo_year = df.repo_year.fillna(\"0\").astype(int)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 161,
"metadata": {},
"outputs": [
{
@@ -171,889 +201,459 @@
" \n",
" \n",
" | \n",
- " Author | \n",
- " Repo | \n",
- " ascl-id | \n",
- " repo_date | \n",
" Language | \n",
- " Bytes | \n",
- " ascl_year | \n",
- " repo_year | \n",
"
\n",
" \n",
"
\n",
" \n",
" 0 | \n",
- " EdoardoCarlesi | \n",
- " cmbeasy | \n",
- " 1007.004 | \n",
- " 2013-04-13T10:55:00Z | \n",
- " C++ | \n",
- " 2313919 | \n",
- " 2010 | \n",
- " 2013 | \n",
+ " Java | \n",
"
\n",
" \n",
" 1 | \n",
- " EdoardoCarlesi | \n",
- " cmbeasy | \n",
- " 1007.004 | \n",
- " 2013-04-13T10:55:00Z | \n",
" C | \n",
- " 20287 | \n",
- " 2010 | \n",
- " 2013 | \n",
"
\n",
" \n",
" 2 | \n",
- " EdoardoCarlesi | \n",
- " cmbeasy | \n",
- " 1007.004 | \n",
- " 2013-04-13T10:55:00Z | \n",
- " Objective-C | \n",
- " 4294 | \n",
- " 2010 | \n",
- " 2013 | \n",
+ " C++ | \n",
"
\n",
" \n",
" 3 | \n",
- " daddeptr | \n",
- " Needlets | \n",
- " 1010.004 | \n",
- " 2014-02-24T18:28:03Z | \n",
- " Fortran | \n",
- " 63589 | \n",
- " 2010 | \n",
- " 2014 | \n",
+ " Python | \n",
"
\n",
" \n",
" 4 | \n",
- " daddeptr | \n",
- " Needlets | \n",
- " 1010.004 | \n",
- " 2014-02-24T18:28:03Z | \n",
- " IDL | \n",
- " 7343 | \n",
- " 2010 | \n",
- " 2014 | \n",
+ " R | \n",
"
\n",
" \n",
" 5 | \n",
- " daddeptr | \n",
- " Needlets | \n",
- " 1010.004 | \n",
- " 2014-02-24T18:28:03Z | \n",
- " Perl | \n",
- " 1782 | \n",
- " 2010 | \n",
- " 2014 | \n",
+ " Fortran | \n",
"
\n",
" \n",
" 6 | \n",
- " piernik-dev | \n",
- " piernik | \n",
- " 1010.005 | \n",
- " 2013-06-14T11:31:14Z | \n",
- " Fortran | \n",
- " 2863809 | \n",
- " 2010 | \n",
- " 2013 | \n",
+ " Ruby | \n",
"
\n",
" \n",
" 7 | \n",
- " piernik-dev | \n",
- " piernik | \n",
- " 1010.005 | \n",
- " 2013-06-14T11:31:14Z | \n",
- " Python | \n",
- " 120477 | \n",
- " 2010 | \n",
- " 2013 | \n",
+ " HTML | \n",
"
\n",
" \n",
" 8 | \n",
- " piernik-dev | \n",
- " piernik | \n",
- " 1010.005 | \n",
- " 2013-06-14T11:31:14Z | \n",
" Shell | \n",
- " 24446 | \n",
- " 2010 | \n",
- " 2013 | \n",
"
\n",
" \n",
" 9 | \n",
- " piernik-dev | \n",
- " piernik | \n",
- " 1010.005 | \n",
- " 2013-06-14T11:31:14Z | \n",
- " Roff | \n",
- " 7201 | \n",
- " 2010 | \n",
- " 2013 | \n",
- "
\n",
- " \n",
- " 10 | \n",
- " piernik-dev | \n",
- " piernik | \n",
- " 1010.005 | \n",
- " 2013-06-14T11:31:14Z | \n",
- " Gnuplot | \n",
- " 5025 | \n",
- " 2010 | \n",
- " 2013 | \n",
- "
\n",
- " \n",
- " 11 | \n",
- " piernik-dev | \n",
- " piernik | \n",
- " 1010.005 | \n",
- " 2013-06-14T11:31:14Z | \n",
- " Makefile | \n",
- " 4482 | \n",
- " 2010 | \n",
- " 2013 | \n",
- "
\n",
- " \n",
- " 12 | \n",
- " piernik-dev | \n",
- " piernik | \n",
- " 1010.005 | \n",
- " 2013-06-14T11:31:14Z | \n",
- " C | \n",
- " 3171 | \n",
- " 2010 | \n",
- " 2013 | \n",
- "
\n",
- " \n",
- " 13 | \n",
- " piernik-dev | \n",
- " piernik | \n",
- " 1010.005 | \n",
- " 2013-06-14T11:31:14Z | \n",
- " Perl | \n",
- " 1060 | \n",
- " 2010 | \n",
- " 2013 | \n",
+ " Others | \n",
"
\n",
- " \n",
- " 14 | \n",
- " Starlink | \n",
- " starlink | \n",
- " 1407.002 | \n",
- " 2012-07-24T18:15:25Z | \n",
- " Fortran | \n",
- " 98837524 | \n",
- " 2014 | \n",
- " 2012 | \n",
+ "
\n",
+ "\n",
+ ""
+ ],
+ "text/plain": [
+ " Language\n",
+ "0 Java\n",
+ "1 C\n",
+ "2 C++\n",
+ "3 Python\n",
+ "4 R\n",
+ "5 Fortran\n",
+ "6 Ruby\n",
+ "7 HTML\n",
+ "8 Shell\n",
+ "9 Others"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "trying Java in year 2008\n",
+ "trying C in year 2008\n",
+ "trying C++ in year 2008\n",
+ "trying Python in year 2008\n",
+ "trying R in year 2008\n",
+ "trying Fortran in year 2008\n",
+ "trying Ruby in year 2008\n",
+ "trying HTML in year 2008\n",
+ "trying Shell in year 2008\n",
+ "trying Others in year 2008\n",
+ "trying Java in year 2009\n",
+ "trying C in year 2009\n",
+ "trying C++ in year 2009\n",
+ "trying Python in year 2009\n",
+ "trying R in year 2009\n",
+ "trying Fortran in year 2009\n",
+ "trying Ruby in year 2009\n",
+ "trying HTML in year 2009\n",
+ "trying Shell in year 2009\n",
+ "trying Others in year 2009\n",
+ "trying Java in year 2010\n",
+ "trying C in year 2010\n",
+ "trying C++ in year 2010\n",
+ "trying Python in year 2010\n",
+ "trying R in year 2010\n",
+ "trying Fortran in year 2010\n",
+ "trying Ruby in year 2010\n",
+ "trying HTML in year 2010\n",
+ "trying Shell in year 2010\n",
+ "trying Others in year 2010\n",
+ "trying Java in year 2011\n",
+ "trying C in year 2011\n",
+ "trying C++ in year 2011\n",
+ "trying Python in year 2011\n",
+ "trying R in year 2011\n",
+ "trying Fortran in year 2011\n",
+ "trying Ruby in year 2011\n",
+ "trying HTML in year 2011\n",
+ "trying Shell in year 2011\n",
+ "trying Others in year 2011\n",
+ "trying Java in year 2012\n",
+ "trying C in year 2012\n",
+ "trying C++ in year 2012\n",
+ "trying Python in year 2012\n",
+ "trying R in year 2012\n",
+ "trying Fortran in year 2012\n",
+ "trying Ruby in year 2012\n",
+ "trying HTML in year 2012\n",
+ "trying Shell in year 2012\n",
+ "trying Others in year 2012\n",
+ "trying Java in year 2013\n",
+ "trying C in year 2013\n",
+ "trying C++ in year 2013\n",
+ "trying Python in year 2013\n",
+ "trying R in year 2013\n",
+ "trying Fortran in year 2013\n",
+ "trying Ruby in year 2013\n",
+ "trying HTML in year 2013\n",
+ "trying Shell in year 2013\n",
+ "trying Others in year 2013\n",
+ "trying Java in year 2014\n",
+ "trying C in year 2014\n",
+ "trying C++ in year 2014\n",
+ "trying Python in year 2014\n",
+ "trying R in year 2014\n",
+ "trying Fortran in year 2014\n",
+ "trying Ruby in year 2014\n",
+ "trying HTML in year 2014\n",
+ "trying Shell in year 2014\n",
+ "trying Others in year 2014\n",
+ "trying Java in year 2015\n",
+ "trying C in year 2015\n",
+ "trying C++ in year 2015\n",
+ "trying Python in year 2015\n",
+ "trying R in year 2015\n",
+ "trying Fortran in year 2015\n",
+ "trying Ruby in year 2015\n",
+ "trying HTML in year 2015\n",
+ "trying Shell in year 2015\n",
+ "trying Others in year 2015\n",
+ "trying Java in year 2016\n",
+ "trying C in year 2016\n",
+ "trying C++ in year 2016\n",
+ "trying Python in year 2016\n",
+ "trying R in year 2016\n",
+ "trying Fortran in year 2016\n",
+ "trying Ruby in year 2016\n",
+ "trying HTML in year 2016\n",
+ "trying Shell in year 2016\n",
+ "trying Others in year 2016\n",
+ "trying Java in year 2017\n",
+ "trying C in year 2017\n",
+ "trying C++ in year 2017\n",
+ "trying Python in year 2017\n",
+ "trying R in year 2017\n",
+ "trying Fortran in year 2017\n",
+ "trying Ruby in year 2017\n",
+ "trying HTML in year 2017\n",
+ "trying Shell in year 2017\n",
+ "trying Others in year 2017\n",
+ "trying Java in year 2018\n",
+ "trying C in year 2018\n",
+ "trying C++ in year 2018\n",
+ "trying Python in year 2018\n",
+ "trying R in year 2018\n",
+ "trying Fortran in year 2018\n",
+ "trying Ruby in year 2018\n",
+ "trying HTML in year 2018\n",
+ "trying Shell in year 2018\n",
+ "trying Others in year 2018\n",
+ "trying Java in year 2019\n",
+ "trying C in year 2019\n",
+ "trying C++ in year 2019\n",
+ "trying Python in year 2019\n",
+ "trying R in year 2019\n",
+ "trying Fortran in year 2019\n",
+ "trying Ruby in year 2019\n",
+ "trying HTML in year 2019\n",
+ "trying Shell in year 2019\n",
+ "trying Others in year 2019\n",
+ "trying Java in year 2020\n",
+ "trying C in year 2020\n",
+ "trying C++ in year 2020\n",
+ "trying Python in year 2020\n",
+ "trying R in year 2020\n",
+ "trying Fortran in year 2020\n",
+ "trying Ruby in year 2020\n",
+ "trying HTML in year 2020\n",
+ "trying Shell in year 2020\n",
+ "trying Others in year 2020\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Language | \n",
+ " 2008 bytes | \n",
+ " 2009 bytes | \n",
+ " 2010 bytes | \n",
+ " 2011 bytes | \n",
+ " 2012 bytes | \n",
+ " 2013 bytes | \n",
+ " 2014 bytes | \n",
+ " 2015 bytes | \n",
+ " 2016 bytes | \n",
+ " 2017 bytes | \n",
+ " 2018 bytes | \n",
+ " 2019 bytes | \n",
+ " 2020 bytes | \n",
"
\n",
+ " \n",
+ " \n",
" \n",
- " 15 | \n",
- " Starlink | \n",
- " starlink | \n",
- " 1407.002 | \n",
- " 2012-07-24T18:15:25Z | \n",
- " TeX | \n",
- " 28588713 | \n",
- " 2014 | \n",
- " 2012 | \n",
+ " 0 | \n",
+ " Java | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1401162.0 | \n",
+ " 42933030.0 | \n",
+ " 14083813.0 | \n",
+ " 296924.0 | \n",
+ " 2638211.0 | \n",
+ " 0.0 | \n",
+ " 2.828600e+04 | \n",
+ " 1732024.0 | \n",
+ " 0.0 | \n",
"
\n",
" \n",
- " 16 | \n",
- " Starlink | \n",
- " starlink | \n",
- " 1407.002 | \n",
- " 2012-07-24T18:15:25Z | \n",
+ " 1 | \n",
" C | \n",
- " 27631397 | \n",
- " 2014 | \n",
- " 2012 | \n",
- "
\n",
- " \n",
- " 17 | \n",
- " Starlink | \n",
- " starlink | \n",
- " 1407.002 | \n",
- " 2012-07-24T18:15:25Z | \n",
- " Tcl | \n",
- " 7620923 | \n",
- " 2014 | \n",
- " 2012 | \n",
- "
\n",
- " \n",
- " 18 | \n",
- " Starlink | \n",
- " starlink | \n",
- " 1407.002 | \n",
- " 2012-07-24T18:15:25Z | \n",
- " HTML | \n",
- " 6488997 | \n",
- " 2014 | \n",
- " 2012 | \n",
- "
\n",
- " \n",
- " 19 | \n",
- " Starlink | \n",
- " starlink | \n",
- " 1407.002 | \n",
- " 2012-07-24T18:15:25Z | \n",
- " Perl | \n",
- " 3304262 | \n",
- " 2014 | \n",
- " 2012 | \n",
- "
\n",
- " \n",
- " 20 | \n",
- " Starlink | \n",
- " starlink | \n",
- " 1407.002 | \n",
- " 2012-07-24T18:15:25Z | \n",
- " Shell | \n",
- " 3156703 | \n",
- " 2014 | \n",
- " 2012 | \n",
- "
\n",
- " \n",
- " 21 | \n",
- " Starlink | \n",
- " starlink | \n",
- " 1407.002 | \n",
- " 2012-07-24T18:15:25Z | \n",
- " PostScript | \n",
- " 2682224 | \n",
- " 2014 | \n",
- " 2012 | \n",
- "
\n",
- " \n",
- " 22 | \n",
- " Starlink | \n",
- " starlink | \n",
- " 1407.002 | \n",
- " 2012-07-24T18:15:25Z | \n",
- " Makefile | \n",
- " 1867008 | \n",
- " 2014 | \n",
- " 2012 | \n",
- "
\n",
- " \n",
- " 23 | \n",
- " Starlink | \n",
- " starlink | \n",
- " 1407.002 | \n",
- " 2012-07-24T18:15:25Z | \n",
- " C++ | \n",
- " 1634877 | \n",
- " 2014 | \n",
- " 2012 | \n",
- "
\n",
- " \n",
- " 24 | \n",
- " Starlink | \n",
- " starlink | \n",
- " 1407.002 | \n",
- " 2012-07-24T18:15:25Z | \n",
- " Emacs Lisp | \n",
- " 1283122 | \n",
- " 2014 | \n",
- " 2012 | \n",
+ " 732140.0 | \n",
+ " 5396.0 | \n",
+ " 134128.0 | \n",
+ " 2912534.0 | \n",
+ " 35940777.0 | \n",
+ " 70863794.0 | \n",
+ " 40033787.0 | \n",
+ " 40715157.0 | \n",
+ " 16791222.0 | \n",
+ " 27935931.0 | \n",
+ " 7.443884e+06 | \n",
+ " 1224262.0 | \n",
+ " 0.0 | \n",
"
\n",
" \n",
- " 25 | \n",
- " Starlink | \n",
- " starlink | \n",
- " 1407.002 | \n",
- " 2012-07-24T18:15:25Z | \n",
- " Python | \n",
- " 1196629 | \n",
- " 2014 | \n",
- " 2012 | \n",
- "
\n",
- " \n",
- " 26 | \n",
- " Starlink | \n",
- " starlink | \n",
- " 1407.002 | \n",
- " 2012-07-24T18:15:25Z | \n",
- " Clean | \n",
- " 1016354 | \n",
- " 2014 | \n",
- " 2012 | \n",
- "
\n",
- " \n",
- " 27 | \n",
- " Starlink | \n",
- " starlink | \n",
- " 1407.002 | \n",
- " 2012-07-24T18:15:25Z | \n",
- " M4 | \n",
- " 778622 | \n",
- " 2014 | \n",
- " 2012 | \n",
- "
\n",
- " \n",
- " 28 | \n",
- " Starlink | \n",
- " starlink | \n",
- " 1407.002 | \n",
- " 2012-07-24T18:15:25Z | \n",
- " GAP | \n",
- " 237324 | \n",
- " 2014 | \n",
- " 2012 | \n",
- "
\n",
- " \n",
- " 29 | \n",
- " Starlink | \n",
- " starlink | \n",
- " 1407.002 | \n",
- " 2012-07-24T18:15:25Z | \n",
- " Arc | \n",
- " 187478 | \n",
- " 2014 | \n",
- " 2012 | \n",
- "
\n",
- " \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " 3399 | \n",
- " CobayaSampler | \n",
- " cobaya | \n",
- " 1910.019 | \n",
- " 2018-09-16T10:52:52Z | \n",
- " Dockerfile | \n",
- " 489 | \n",
- " 2019 | \n",
- " 2018 | \n",
- "
\n",
- " \n",
- " 3400 | \n",
- " AOtools | \n",
- " aotools | \n",
- " 1910.021 | \n",
- " 2016-03-09T15:00:56Z | \n",
- " Python | \n",
- " 236212 | \n",
- " 2019 | \n",
- " 2016 | \n",
- "
\n",
- " \n",
- " 3401 | \n",
- " astroboylrx | \n",
- " PLAN | \n",
- " 1911.001 | \n",
- " 2016-03-11T17:40:38Z | \n",
+ " 2 | \n",
" C++ | \n",
- " 434938 | \n",
- " 2019 | \n",
- " 2016 | \n",
+ " 73573.0 | \n",
+ " 1402452.0 | \n",
+ " 3966318.0 | \n",
+ " 507749.0 | \n",
+ " 11027923.0 | \n",
+ " 29664973.0 | \n",
+ " 17571170.0 | \n",
+ " 38915066.0 | \n",
+ " 19147323.0 | \n",
+ " 86334220.0 | \n",
+ " 8.341191e+06 | \n",
+ " 1376004.0 | \n",
+ " 0.0 | \n",
"
\n",
" \n",
- " 3402 | \n",
- " astroboylrx | \n",
- " PLAN | \n",
- " 1911.001 | \n",
- " 2016-03-11T17:40:38Z | \n",
- " Python | \n",
- " 5159 | \n",
- " 2019 | \n",
- " 2016 | \n",
- "
\n",
- " \n",
- " 3403 | \n",
- " astroboylrx | \n",
- " PLAN | \n",
- " 1911.001 | \n",
- " 2016-03-11T17:40:38Z | \n",
- " CMake | \n",
- " 3638 | \n",
- " 2019 | \n",
- " 2016 | \n",
- "
\n",
- " \n",
- " 3404 | \n",
- " gmbrandt | \n",
- " xwavecal | \n",
- " 0.000 | \n",
- " 2019-09-11T23:15:01Z | \n",
- " Python | \n",
- " 255870 | \n",
- " 0 | \n",
- " 2019 | \n",
- "
\n",
- " \n",
- " 3405 | \n",
- " soleneulmer | \n",
- " bem | \n",
- " 0.000 | \n",
- " 2019-07-22T12:39:08Z | \n",
- " Python | \n",
- " 61402 | \n",
- " 0 | \n",
- " 2019 | \n",
- "
\n",
- " \n",
- " 3406 | \n",
- " shbhuk | \n",
- " mrexo | \n",
- " 0.000 | \n",
- " 2018-04-20T15:21:01Z | \n",
+ " 3 | \n",
" Python | \n",
- " 104004 | \n",
- " 0 | \n",
- " 2018 | \n",
+ " 457697.0 | \n",
+ " 1902611.0 | \n",
+ " 10494246.0 | \n",
+ " 16452958.0 | \n",
+ " 26273713.0 | \n",
+ " 30022866.0 | \n",
+ " 33908005.0 | \n",
+ " 54064633.0 | \n",
+ " 39915315.0 | \n",
+ " 48081048.0 | \n",
+ " 2.281809e+07 | \n",
+ " 4613109.0 | \n",
+ " 0.0 | \n",
"
\n",
" \n",
- " 3407 | \n",
- " Bo-Ning | \n",
- " Predicting-exoplanet-mass-and-radius-relationship | \n",
- " 0.000 | \n",
- " 2017-11-15T20:36:42Z | \n",
+ " 4 | \n",
" R | \n",
- " 83169 | \n",
- " 0 | \n",
- " 2017 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 5473.0 | \n",
+ " 131963.0 | \n",
+ " 832599.0 | \n",
+ " 161349.0 | \n",
+ " 230893.0 | \n",
+ " 86513.0 | \n",
+ " 969179.0 | \n",
+ " 0.000000e+00 | \n",
+ " 870358.0 | \n",
+ " 0.0 | \n",
"
\n",
" \n",
- " 3408 | \n",
- " rodluger | \n",
- " Limbdark.jl | \n",
- " 0.000 | \n",
- " 2018-04-23T16:28:45Z | \n",
- " Mathematica | \n",
- " 2066526 | \n",
- " 0 | \n",
- " 2018 | \n",
- "
\n",
- " \n",
- " 3409 | \n",
- " rodluger | \n",
- " Limbdark.jl | \n",
- " 0.000 | \n",
- " 2018-04-23T16:28:45Z | \n",
- " Jupyter Notebook | \n",
- " 1572402 | \n",
- " 0 | \n",
- " 2018 | \n",
- "
\n",
- " \n",
- " 3410 | \n",
- " rodluger | \n",
- " Limbdark.jl | \n",
- " 0.000 | \n",
- " 2018-04-23T16:28:45Z | \n",
- " TeX | \n",
- " 857618 | \n",
- " 0 | \n",
- " 2018 | \n",
- "
\n",
- " \n",
- " 3411 | \n",
- " rodluger | \n",
- " Limbdark.jl | \n",
- " 0.000 | \n",
- " 2018-04-23T16:28:45Z | \n",
- " Julia | \n",
- " 412466 | \n",
- " 0 | \n",
- " 2018 | \n",
- "
\n",
- " \n",
- " 3412 | \n",
- " rodluger | \n",
- " Limbdark.jl | \n",
- " 0.000 | \n",
- " 2018-04-23T16:28:45Z | \n",
+ " 5 | \n",
" Fortran | \n",
- " 32026 | \n",
- " 0 | \n",
- " 2018 | \n",
- "
\n",
- " \n",
- " 3413 | \n",
- " rodluger | \n",
- " Limbdark.jl | \n",
- " 0.000 | \n",
- " 2018-04-23T16:28:45Z | \n",
- " Python | \n",
- " 31068 | \n",
- " 0 | \n",
- " 2018 | \n",
- "
\n",
- " \n",
- " 3414 | \n",
- " rodluger | \n",
- " Limbdark.jl | \n",
- " 0.000 | \n",
- " 2018-04-23T16:28:45Z | \n",
- " IDL | \n",
- " 22308 | \n",
- " 0 | \n",
- " 2018 | \n",
- "
\n",
- " \n",
- " 3415 | \n",
- " rodluger | \n",
- " Limbdark.jl | \n",
- " 0.000 | \n",
- " 2018-04-23T16:28:45Z | \n",
- " Shell | \n",
- " 5381 | \n",
- " 0 | \n",
- " 2018 | \n",
- "
\n",
- " \n",
- " 3416 | \n",
- " rodluger | \n",
- " Limbdark.jl | \n",
- " 0.000 | \n",
- " 2018-04-23T16:28:45Z | \n",
- " Prolog | \n",
- " 2944 | \n",
- " 0 | \n",
- " 2018 | \n",
- "
\n",
- " \n",
- " 3417 | \n",
- " rodluger | \n",
- " Limbdark.jl | \n",
- " 0.000 | \n",
- " 2018-04-23T16:28:45Z | \n",
- " Makefile | \n",
- " 1418 | \n",
- " 0 | \n",
- " 2018 | \n",
- "
\n",
- " \n",
- " 3418 | \n",
- " rodluger | \n",
- " planetplanet | \n",
- " 0.000 | \n",
- " 2017-05-09T23:33:16Z | \n",
- " Python | \n",
- " 415812 | \n",
- " 0 | \n",
- " 2017 | \n",
- "
\n",
- " \n",
- " 3419 | \n",
- " rodluger | \n",
- " planetplanet | \n",
- " 0.000 | \n",
- " 2017-05-09T23:33:16Z | \n",
- " C | \n",
- " 94717 | \n",
- " 0 | \n",
- " 2017 | \n",
- "
\n",
- " \n",
- " 3420 | \n",
- " rodluger | \n",
- " planetplanet | \n",
- " 0.000 | \n",
- " 2017-05-09T23:33:16Z | \n",
- " Julia | \n",
- " 33247 | \n",
- " 0 | \n",
- " 2017 | \n",
+ " 0.0 | \n",
+ " 2826100.0 | \n",
+ " 0.0 | \n",
+ " 3665668.0 | \n",
+ " 106958019.0 | \n",
+ " 10561397.0 | \n",
+ " 11857286.0 | \n",
+ " 59772042.0 | \n",
+ " 7302514.0 | \n",
+ " 25221003.0 | \n",
+ " 3.025633e+07 | \n",
+ " 4989103.0 | \n",
+ " 1782.0 | \n",
"
\n",
" \n",
- " 3421 | \n",
- " rodluger | \n",
- " planetplanet | \n",
- " 0.000 | \n",
- " 2017-05-09T23:33:16Z | \n",
- " Shell | \n",
- " 498 | \n",
- " 0 | \n",
- " 2017 | \n",
- "
\n",
- " \n",
- " 3422 | \n",
- " mihanke | \n",
- " athos | \n",
- " 0.000 | \n",
- " 2018-09-03T07:19:26Z | \n",
- " Python | \n",
- " 34144 | \n",
- " 0 | \n",
- " 2018 | \n",
- "
\n",
- " \n",
- " 3423 | \n",
- " pypeit | \n",
- " PypeIt | \n",
- " 1911.004 | \n",
- " 2015-06-05T22:25:37Z | \n",
- " Python | \n",
- " 4836553 | \n",
- " 2019 | \n",
- " 2015 | \n",
- "
\n",
- " \n",
- " 3424 | \n",
- " pypeit | \n",
- " PypeIt | \n",
- " 1911.004 | \n",
- " 2015-06-05T22:25:37Z | \n",
- " TeX | \n",
- " 129336 | \n",
- " 2019 | \n",
- " 2015 | \n",
- "
\n",
- " \n",
- " 3425 | \n",
- " pypeit | \n",
- " PypeIt | \n",
- " 1911.004 | \n",
- " 2015-06-05T22:25:37Z | \n",
- " Jupyter Notebook | \n",
- " 102355 | \n",
- " 2019 | \n",
- " 2015 | \n",
+ " 6 | \n",
+ " Ruby | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 2673561.0 | \n",
+ " 0.0 | \n",
+ " 50009073.0 | \n",
+ " 1265301.0 | \n",
+ " 522.0 | \n",
+ " 1.850000e+02 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
"
\n",
" \n",
- " 3426 | \n",
- " pypeit | \n",
- " PypeIt | \n",
- " 1911.004 | \n",
- " 2015-06-05T22:25:37Z | \n",
- " C | \n",
- " 11767 | \n",
- " 2019 | \n",
- " 2015 | \n",
+ " 7 | \n",
+ " HTML | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 3339.0 | \n",
+ " 7672931.0 | \n",
+ " 69808148.0 | \n",
+ " 2625860.0 | \n",
+ " 129641.0 | \n",
+ " 7977816.0 | \n",
+ " 5191228.0 | \n",
+ " 1.898321e+06 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
"
\n",
" \n",
- " 3427 | \n",
- " pypeit | \n",
- " PypeIt | \n",
- " 1911.004 | \n",
- " 2015-06-05T22:25:37Z | \n",
+ " 8 | \n",
" Shell | \n",
- " 412 | \n",
- " 2019 | \n",
- " 2015 | \n",
+ " 3153.0 | \n",
+ " 47612.0 | \n",
+ " 58778.0 | \n",
+ " 1056847.0 | \n",
+ " 4054943.0 | \n",
+ " 2605824.0 | \n",
+ " 1546652.0 | \n",
+ " 1559203.0 | \n",
+ " 1766204.0 | \n",
+ " 40766099.0 | \n",
+ " 1.342099e+06 | \n",
+ " 35050.0 | \n",
+ " 0.0 | \n",
"
\n",
" \n",
- " 3428 | \n",
- " pypeit | \n",
- " PypeIt | \n",
- " 1911.004 | \n",
- " 2015-06-05T22:25:37Z | \n",
- " FLUX | \n",
- " 275 | \n",
- " 2019 | \n",
- " 2015 | \n",
+ " 9 | \n",
+ " Others | \n",
+ " 3441.0 | \n",
+ " 222938.0 | \n",
+ " 817757.0 | \n",
+ " 3343580.0 | \n",
+ " 85308437.0 | \n",
+ " 34670998.0 | \n",
+ " 50389592.0 | \n",
+ " 169066350.0 | \n",
+ " 108228261.0 | \n",
+ " 138933263.0 | \n",
+ " 1.107641e+09 | \n",
+ " 21152630.0 | \n",
+ " 2501.0 | \n",
"
\n",
" \n",
"
\n",
- "
3427 rows × 8 columns
\n",
"
"
],
"text/plain": [
- " Author Repo \\\n",
- "0 EdoardoCarlesi cmbeasy \n",
- "1 EdoardoCarlesi cmbeasy \n",
- "2 EdoardoCarlesi cmbeasy \n",
- "3 daddeptr Needlets \n",
- "4 daddeptr Needlets \n",
- "5 daddeptr Needlets \n",
- "6 piernik-dev piernik \n",
- "7 piernik-dev piernik \n",
- "8 piernik-dev piernik \n",
- "9 piernik-dev piernik \n",
- "10 piernik-dev piernik \n",
- "11 piernik-dev piernik \n",
- "12 piernik-dev piernik \n",
- "13 piernik-dev piernik \n",
- "14 Starlink starlink \n",
- "15 Starlink starlink \n",
- "16 Starlink starlink \n",
- "17 Starlink starlink \n",
- "18 Starlink starlink \n",
- "19 Starlink starlink \n",
- "20 Starlink starlink \n",
- "21 Starlink starlink \n",
- "22 Starlink starlink \n",
- "23 Starlink starlink \n",
- "24 Starlink starlink \n",
- "25 Starlink starlink \n",
- "26 Starlink starlink \n",
- "27 Starlink starlink \n",
- "28 Starlink starlink \n",
- "29 Starlink starlink \n",
- "... ... ... \n",
- "3399 CobayaSampler cobaya \n",
- "3400 AOtools aotools \n",
- "3401 astroboylrx PLAN \n",
- "3402 astroboylrx PLAN \n",
- "3403 astroboylrx PLAN \n",
- "3404 gmbrandt xwavecal \n",
- "3405 soleneulmer bem \n",
- "3406 shbhuk mrexo \n",
- "3407 Bo-Ning Predicting-exoplanet-mass-and-radius-relationship \n",
- "3408 rodluger Limbdark.jl \n",
- "3409 rodluger Limbdark.jl \n",
- "3410 rodluger Limbdark.jl \n",
- "3411 rodluger Limbdark.jl \n",
- "3412 rodluger Limbdark.jl \n",
- "3413 rodluger Limbdark.jl \n",
- "3414 rodluger Limbdark.jl \n",
- "3415 rodluger Limbdark.jl \n",
- "3416 rodluger Limbdark.jl \n",
- "3417 rodluger Limbdark.jl \n",
- "3418 rodluger planetplanet \n",
- "3419 rodluger planetplanet \n",
- "3420 rodluger planetplanet \n",
- "3421 rodluger planetplanet \n",
- "3422 mihanke athos \n",
- "3423 pypeit PypeIt \n",
- "3424 pypeit PypeIt \n",
- "3425 pypeit PypeIt \n",
- "3426 pypeit PypeIt \n",
- "3427 pypeit PypeIt \n",
- "3428 pypeit PypeIt \n",
- "\n",
- " ascl-id repo_date Language Bytes ascl_year \\\n",
- "0 1007.004 2013-04-13T10:55:00Z C++ 2313919 2010 \n",
- "1 1007.004 2013-04-13T10:55:00Z C 20287 2010 \n",
- "2 1007.004 2013-04-13T10:55:00Z Objective-C 4294 2010 \n",
- "3 1010.004 2014-02-24T18:28:03Z Fortran 63589 2010 \n",
- "4 1010.004 2014-02-24T18:28:03Z IDL 7343 2010 \n",
- "5 1010.004 2014-02-24T18:28:03Z Perl 1782 2010 \n",
- "6 1010.005 2013-06-14T11:31:14Z Fortran 2863809 2010 \n",
- "7 1010.005 2013-06-14T11:31:14Z Python 120477 2010 \n",
- "8 1010.005 2013-06-14T11:31:14Z Shell 24446 2010 \n",
- "9 1010.005 2013-06-14T11:31:14Z Roff 7201 2010 \n",
- "10 1010.005 2013-06-14T11:31:14Z Gnuplot 5025 2010 \n",
- "11 1010.005 2013-06-14T11:31:14Z Makefile 4482 2010 \n",
- "12 1010.005 2013-06-14T11:31:14Z C 3171 2010 \n",
- "13 1010.005 2013-06-14T11:31:14Z Perl 1060 2010 \n",
- "14 1407.002 2012-07-24T18:15:25Z Fortran 98837524 2014 \n",
- "15 1407.002 2012-07-24T18:15:25Z TeX 28588713 2014 \n",
- "16 1407.002 2012-07-24T18:15:25Z C 27631397 2014 \n",
- "17 1407.002 2012-07-24T18:15:25Z Tcl 7620923 2014 \n",
- "18 1407.002 2012-07-24T18:15:25Z HTML 6488997 2014 \n",
- "19 1407.002 2012-07-24T18:15:25Z Perl 3304262 2014 \n",
- "20 1407.002 2012-07-24T18:15:25Z Shell 3156703 2014 \n",
- "21 1407.002 2012-07-24T18:15:25Z PostScript 2682224 2014 \n",
- "22 1407.002 2012-07-24T18:15:25Z Makefile 1867008 2014 \n",
- "23 1407.002 2012-07-24T18:15:25Z C++ 1634877 2014 \n",
- "24 1407.002 2012-07-24T18:15:25Z Emacs Lisp 1283122 2014 \n",
- "25 1407.002 2012-07-24T18:15:25Z Python 1196629 2014 \n",
- "26 1407.002 2012-07-24T18:15:25Z Clean 1016354 2014 \n",
- "27 1407.002 2012-07-24T18:15:25Z M4 778622 2014 \n",
- "28 1407.002 2012-07-24T18:15:25Z GAP 237324 2014 \n",
- "29 1407.002 2012-07-24T18:15:25Z Arc 187478 2014 \n",
- "... ... ... ... ... ... \n",
- "3399 1910.019 2018-09-16T10:52:52Z Dockerfile 489 2019 \n",
- "3400 1910.021 2016-03-09T15:00:56Z Python 236212 2019 \n",
- "3401 1911.001 2016-03-11T17:40:38Z C++ 434938 2019 \n",
- "3402 1911.001 2016-03-11T17:40:38Z Python 5159 2019 \n",
- "3403 1911.001 2016-03-11T17:40:38Z CMake 3638 2019 \n",
- "3404 0.000 2019-09-11T23:15:01Z Python 255870 0 \n",
- "3405 0.000 2019-07-22T12:39:08Z Python 61402 0 \n",
- "3406 0.000 2018-04-20T15:21:01Z Python 104004 0 \n",
- "3407 0.000 2017-11-15T20:36:42Z R 83169 0 \n",
- "3408 0.000 2018-04-23T16:28:45Z Mathematica 2066526 0 \n",
- "3409 0.000 2018-04-23T16:28:45Z Jupyter Notebook 1572402 0 \n",
- "3410 0.000 2018-04-23T16:28:45Z TeX 857618 0 \n",
- "3411 0.000 2018-04-23T16:28:45Z Julia 412466 0 \n",
- "3412 0.000 2018-04-23T16:28:45Z Fortran 32026 0 \n",
- "3413 0.000 2018-04-23T16:28:45Z Python 31068 0 \n",
- "3414 0.000 2018-04-23T16:28:45Z IDL 22308 0 \n",
- "3415 0.000 2018-04-23T16:28:45Z Shell 5381 0 \n",
- "3416 0.000 2018-04-23T16:28:45Z Prolog 2944 0 \n",
- "3417 0.000 2018-04-23T16:28:45Z Makefile 1418 0 \n",
- "3418 0.000 2017-05-09T23:33:16Z Python 415812 0 \n",
- "3419 0.000 2017-05-09T23:33:16Z C 94717 0 \n",
- "3420 0.000 2017-05-09T23:33:16Z Julia 33247 0 \n",
- "3421 0.000 2017-05-09T23:33:16Z Shell 498 0 \n",
- "3422 0.000 2018-09-03T07:19:26Z Python 34144 0 \n",
- "3423 1911.004 2015-06-05T22:25:37Z Python 4836553 2019 \n",
- "3424 1911.004 2015-06-05T22:25:37Z TeX 129336 2019 \n",
- "3425 1911.004 2015-06-05T22:25:37Z Jupyter Notebook 102355 2019 \n",
- "3426 1911.004 2015-06-05T22:25:37Z C 11767 2019 \n",
- "3427 1911.004 2015-06-05T22:25:37Z Shell 412 2019 \n",
- "3428 1911.004 2015-06-05T22:25:37Z FLUX 275 2019 \n",
+ " Language 2008 bytes 2009 bytes 2010 bytes 2011 bytes 2012 bytes \\\n",
+ "0 Java 0.0 0.0 0.0 0.0 1401162.0 \n",
+ "1 C 732140.0 5396.0 134128.0 2912534.0 35940777.0 \n",
+ "2 C++ 73573.0 1402452.0 3966318.0 507749.0 11027923.0 \n",
+ "3 Python 457697.0 1902611.0 10494246.0 16452958.0 26273713.0 \n",
+ "4 R 0.0 0.0 0.0 5473.0 131963.0 \n",
+ "5 Fortran 0.0 2826100.0 0.0 3665668.0 106958019.0 \n",
+ "6 Ruby 0.0 0.0 0.0 0.0 0.0 \n",
+ "7 HTML 0.0 0.0 0.0 3339.0 7672931.0 \n",
+ "8 Shell 3153.0 47612.0 58778.0 1056847.0 4054943.0 \n",
+ "9 Others 3441.0 222938.0 817757.0 3343580.0 85308437.0 \n",
"\n",
- " repo_year \n",
- "0 2013 \n",
- "1 2013 \n",
- "2 2013 \n",
- "3 2014 \n",
- "4 2014 \n",
- "5 2014 \n",
- "6 2013 \n",
- "7 2013 \n",
- "8 2013 \n",
- "9 2013 \n",
- "10 2013 \n",
- "11 2013 \n",
- "12 2013 \n",
- "13 2013 \n",
- "14 2012 \n",
- "15 2012 \n",
- "16 2012 \n",
- "17 2012 \n",
- "18 2012 \n",
- "19 2012 \n",
- "20 2012 \n",
- "21 2012 \n",
- "22 2012 \n",
- "23 2012 \n",
- "24 2012 \n",
- "25 2012 \n",
- "26 2012 \n",
- "27 2012 \n",
- "28 2012 \n",
- "29 2012 \n",
- "... ... \n",
- "3399 2018 \n",
- "3400 2016 \n",
- "3401 2016 \n",
- "3402 2016 \n",
- "3403 2016 \n",
- "3404 2019 \n",
- "3405 2019 \n",
- "3406 2018 \n",
- "3407 2017 \n",
- "3408 2018 \n",
- "3409 2018 \n",
- "3410 2018 \n",
- "3411 2018 \n",
- "3412 2018 \n",
- "3413 2018 \n",
- "3414 2018 \n",
- "3415 2018 \n",
- "3416 2018 \n",
- "3417 2018 \n",
- "3418 2017 \n",
- "3419 2017 \n",
- "3420 2017 \n",
- "3421 2017 \n",
- "3422 2018 \n",
- "3423 2015 \n",
- "3424 2015 \n",
- "3425 2015 \n",
- "3426 2015 \n",
- "3427 2015 \n",
- "3428 2015 \n",
+ " 2013 bytes 2014 bytes 2015 bytes 2016 bytes 2017 bytes \\\n",
+ "0 42933030.0 14083813.0 296924.0 2638211.0 0.0 \n",
+ "1 70863794.0 40033787.0 40715157.0 16791222.0 27935931.0 \n",
+ "2 29664973.0 17571170.0 38915066.0 19147323.0 86334220.0 \n",
+ "3 30022866.0 33908005.0 54064633.0 39915315.0 48081048.0 \n",
+ "4 832599.0 161349.0 230893.0 86513.0 969179.0 \n",
+ "5 10561397.0 11857286.0 59772042.0 7302514.0 25221003.0 \n",
+ "6 2673561.0 0.0 50009073.0 1265301.0 522.0 \n",
+ "7 69808148.0 2625860.0 129641.0 7977816.0 5191228.0 \n",
+ "8 2605824.0 1546652.0 1559203.0 1766204.0 40766099.0 \n",
+ "9 34670998.0 50389592.0 169066350.0 108228261.0 138933263.0 \n",
"\n",
- "[3427 rows x 8 columns]"
+ " 2018 bytes 2019 bytes 2020 bytes \n",
+ "0 2.828600e+04 1732024.0 0.0 \n",
+ "1 7.443884e+06 1224262.0 0.0 \n",
+ "2 8.341191e+06 1376004.0 0.0 \n",
+ "3 2.281809e+07 4613109.0 0.0 \n",
+ "4 0.000000e+00 870358.0 0.0 \n",
+ "5 3.025633e+07 4989103.0 1782.0 \n",
+ "6 1.850000e+02 0.0 0.0 \n",
+ "7 1.898321e+06 0.0 0.0 \n",
+ "8 1.342099e+06 35050.0 0.0 \n",
+ "9 1.107641e+09 21152630.0 2501.0 "
]
},
- "execution_count": 52,
"metadata": {},
- "output_type": "execute_result"
+ "output_type": "display_data"
}
],
"source": [
@@ -1064,35 +664,65 @@
"#y axis - percentage of the total\n",
"#for each language, we need an array of languages over years\n",
"\n",
- "#display(df)\n",
- "#print (len(df[df['ascl-id'] == 0]))\n",
- "#insert ascl year and repo year field into the df\n",
- "#ascl-year is '20' + first 2 digits of ascl-id\n",
- "#repo-year is first 4 chars of repo-date\n",
+ "#let's only take a look at the most popular languages\n",
"\n",
- "#function to return the correct year depending on the first 2 digits of the ascl-id\n",
- "def ascl_year(row):\n",
- " year = str(row['ascl-id'])[0:2]\n",
+ "#each language is a separate row\n",
+ "#each column is a year, and each cell represents the bytes of that language in that year\n",
+ "\n",
+ "top_langs = most['Language']\n",
+ "#display(top_langs)\n",
+ "repo_years = np.sort(df.repo_year.unique())\n",
+ "\n",
+ "#repo_df = pd.DataFrame({'Language': top_langs})\n",
+ "most_bytes_langs = ['Java', 'C', 'C++', 'Python', 'R', 'Fortran', 'Ruby', 'HTML', 'Shell', 'Others']\n",
+ "repo_df = pd.DataFrame({'Language': most_bytes_langs})\n",
+ "display(repo_df)\n",
+ "\n",
+ "#for each year in repo_years, create a column\n",
+ "# each cell will be the number of bytes in that year for that language\n",
+ "\n",
+ "def bytes_that_year(row, year):\n",
+ " print('trying ' + row['Language'] + ' in year ' + str(year))\n",
+ " \n",
+ " if row['Language'] == 'Others':\n",
+ " #sum up all the non-most_bytes_langs bytes\n",
+ " return df[ ~(df['Language'].isin(most_bytes_langs)) & (df['repo_year'] == year)]['Bytes'].sum()\n",
+ " lang = row['Language']\n",
+ " \n",
" try:\n",
- " year = int(year)\n",
+ " return df[df['repo_year'] == year].groupby('Language').sum().loc[lang]['Bytes']\n",
" except:\n",
- " year = float(year)\n",
- " if year > 89:\n",
- " return 1900+int(year)\n",
- " elif year == 0:\n",
- " return None\n",
- " else:\n",
- " return 2000+int(year)\n",
+ " return 0\n",
"\n",
- "#now, a function to generate the repo year\n",
- "def repo_year(row):\n",
- " return row['repo_date'][0:4]\n",
+ "for year in repo_years:\n",
+ " #create columns for the total number of bytes\n",
+ " arr = repo_df.apply (lambda row: bytes_that_year(row, year), axis=1)\n",
+ " col_name = str(year) + \" bytes\"\n",
+ " repo_df[col_name] = arr\n",
+ " \n",
+ " \n",
"\n",
- "df['ascl_year'] = df.apply (lambda row: ascl_year(row), axis=1)\n",
- "df.ascl_year = df.ascl_year.fillna(0.0).astype(int)\n",
- "df['repo_year'] = df.apply (lambda row: repo_year(row), axis=1)\n",
- "df.repo_year = df.repo_year.fillna(\"0\").astype(int)"
+ " \n",
+ "display(repo_df)\n",
+ "#display(df[df['repo_year'] == 2019].groupby('Language').sum().loc['C']['Bytes'])"
]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 159,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "138933263"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": []
}
],
"metadata": {