diff --git a/arxiv_productivity_covid.ipynb b/arxiv_productivity_covid.ipynb index 36e6505..d06e31f 100644 --- a/arxiv_productivity_covid.ipynb +++ b/arxiv_productivity_covid.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -10,23 +10,24 @@ "# URL form is\n", "# https://arxiv.org/search/advanced?advanced=1&date-from_date=2019-03-16&date-to_date=2019-03-17&terms-0-operator=AND&terms-0-field=title&classification-physics=y&classification-physics_archives=astro-ph&classification-include_cross_list=include&date-year=&date-filter_by=date_range\n", "#\n", - "# Written by BMG 19 Mar 2020" + "# Written by BMG 19 Mar 2020\n", + "# Updated by MDJH Apr, Dec 2020" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Install required packages\n", "\n", - "#%pip install -r requirements.txt\n" + "#%pip install -r requirements.txt" ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -37,7 +38,6 @@ "import numpy as np\n", "import matplotlib.pyplot as plt\n", "from matplotlib.pyplot import figure\n", - "import matplotlib.dates as mdates\n", "\n", "\n", "def daterange(start_date, end_date):\n", @@ -47,11 +47,11 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "# Using info from https://web.archive.org/web/20200319144443/https://hackersandslackers.com/scraping-urls-with-beautifulsoup/\n", + "# Using info from https://web.archive.org/web/20200319144443/ and https://hackersandslackers.com/scraping-urls-with-beautifulsoup/\n", "\n", "# Set headers to not trigger robot alert\n", "headers = requests.utils.default_headers()\n", @@ -60,14 +60,16 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": { "scrolled": false }, "outputs": [], "source": [ - "# Loop through years\n", + "# Define empty dictionary for storage\n", + "submissions = {}\n", "\n", + "# Loop through years\n", "num2020 = np.array([])\n", "date2020 = np.array([])\n", "num2019 = np.array([])\n", @@ -100,12 +102,16 @@ " date2020 = np.append(date2020,date2)\n", " num2020 = np.append(num2020,num1)\n", " date2019 = np.append(date2019,date4)\n", - " num2019 = np.append(num2019,num2)\n" + " num2019 = np.append(num2019,num2)\n", + "\n", + "# Store weekly submissions in dictionary (duplicate entries will just be overwritten)\n", + " submissions.update({date2:num1})\n", + " submissions.update({date4:num2})" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -116,7 +122,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": { "scrolled": true }, @@ -129,17 +135,17 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "day2020 = [datetime.strptime(x, '%Y-%m-%d') for x in date2020]\n", - "day2019 = [datetime.strptime(x, '%Y-%m-%d') for x in date2019]\n" + "day2019 = [datetime.strptime(x, '%Y-%m-%d') for x in date2019]" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -185,7 +191,40 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "# Calculate monthly submissions\n", + "yrs = ['2018-','2019-','2020-']\n", + "mths = ['{:02d}'.format(x) for x in list(range(1,13))]\n", + "months = [y+m for m in mths for y in yrs]\n", + "\n", + "subs = []\n", + "\n", + "for month in months:\n", + " num = [int(value) for key, value in submissions.items() if month in key]\n", + " total = np.sum(num, dtype=int)\n", + "\n", + " subs.append(total)\n", + "\n", + "monthly_submissions = np.reshape(np.array(subs), (-1, len(yrs)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Display monthly submissions\n", + "yrs = [y.strip('-') for y in yrs]\n", + "row_format ='{:>10}' * (len(yrs) + 1)\n", + "\n", + "print('Submissions by month:')\n", + "print('')\n", + "print(row_format.format('', *yrs))\n", + "print(row_format.format('','====','====','===='))\n", + "for month, row in zip(mths, monthly_submissions):\n", + " print(row_format.format(month, *row))" + ] }, { "cell_type": "code",