Skip to content

Commit

Permalink
Added table of submissions by month
Browse files Browse the repository at this point in the history
  • Loading branch information
M-Hollis committed Dec 9, 2020
1 parent cbda600 commit d08bc07
Showing 1 changed file with 56 additions and 17 deletions.
73 changes: 56 additions & 17 deletions arxiv_productivity_covid.ipynb
Expand Up @@ -2,31 +2,32 @@
"cells": [
{
"cell_type": "code",
"execution_count": 9,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Script to scrape arXiv to compute number of astro papers published each day\n",
"# URL form is\n",
"# https://arxiv.org/search/advanced?advanced=1&date-from_date=2019-03-16&date-to_date=2019-03-17&terms-0-operator=AND&terms-0-field=title&classification-physics=y&classification-physics_archives=astro-ph&classification-include_cross_list=include&date-year=&date-filter_by=date_range\n",
"#\n",
"# Written by BMG 19 Mar 2020"
"# Written by BMG 19 Mar 2020\n",
"# Updated by MDJH Apr, Dec 2020"
]
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Install required packages\n",
"\n",
"#%pip install -r requirements.txt\n"
"#%pip install -r requirements.txt"
]
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -37,7 +38,6 @@
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"from matplotlib.pyplot import figure\n",
"import matplotlib.dates as mdates\n",
"\n",
"\n",
"def daterange(start_date, end_date):\n",
Expand All @@ -47,11 +47,11 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Using info from https://web.archive.org/web/20200319144443/https://hackersandslackers.com/scraping-urls-with-beautifulsoup/\n",
"# Using info from https://web.archive.org/web/20200319144443/ and https://hackersandslackers.com/scraping-urls-with-beautifulsoup/\n",
"\n",
"# Set headers to not trigger robot alert\n",
"headers = requests.utils.default_headers()\n",
Expand All @@ -60,14 +60,16 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": null,
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"# Loop through years\n",
"# Define empty dictionary for storage\n",
"submissions = {}\n",
"\n",
"# Loop through years\n",
"num2020 = np.array([])\n",
"date2020 = np.array([])\n",
"num2019 = np.array([])\n",
Expand Down Expand Up @@ -100,12 +102,16 @@
" date2020 = np.append(date2020,date2)\n",
" num2020 = np.append(num2020,num1)\n",
" date2019 = np.append(date2019,date4)\n",
" num2019 = np.append(num2019,num2)\n"
" num2019 = np.append(num2019,num2)\n",
"\n",
"# Store weekly submissions in dictionary (duplicate entries will just be overwritten)\n",
" submissions.update({date2:num1})\n",
" submissions.update({date4:num2})"
]
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -116,7 +122,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": null,
"metadata": {
"scrolled": true
},
Expand All @@ -129,17 +135,17 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"day2020 = [datetime.strptime(x, '%Y-%m-%d') for x in date2020]\n",
"day2019 = [datetime.strptime(x, '%Y-%m-%d') for x in date2019]\n"
"day2019 = [datetime.strptime(x, '%Y-%m-%d') for x in date2019]"
]
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": null,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -185,7 +191,40 @@
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
"source": [
"# Calculate monthly submissions\n",
"yrs = ['2018-','2019-','2020-']\n",
"mths = ['{:02d}'.format(x) for x in list(range(1,13))]\n",
"months = [y+m for m in mths for y in yrs]\n",
"\n",
"subs = []\n",
"\n",
"for month in months:\n",
" num = [int(value) for key, value in submissions.items() if month in key]\n",
" total = np.sum(num, dtype=int)\n",
"\n",
" subs.append(total)\n",
"\n",
"monthly_submissions = np.reshape(np.array(subs), (-1, len(yrs)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Display monthly submissions\n",
"yrs = [y.strip('-') for y in yrs]\n",
"row_format ='{:>10}' * (len(yrs) + 1)\n",
"\n",
"print('Submissions by month:')\n",
"print('')\n",
"print(row_format.format('', *yrs))\n",
"print(row_format.format('','====','====','===='))\n",
"for month, row in zip(mths, monthly_submissions):\n",
" print(row_format.format(month, *row))"
]
},
{
"cell_type": "code",
Expand Down

0 comments on commit d08bc07

Please sign in to comment.