Skip to content

Commit

Permalink
Added switch for accepted vs submitted papers. Resources: https://www…
Browse files Browse the repository at this point in the history
  • Loading branch information
M-Hollis committed Dec 9, 2020
1 parent d08bc07 commit 251a36a
Showing 1 changed file with 31 additions and 8 deletions.
39 changes: 31 additions & 8 deletions arxiv_productivity_covid.ipynb
Expand Up @@ -8,7 +8,7 @@
"source": [
"# Script to scrape arXiv to compute number of astro papers published each day\n",
"# URL form is\n",
"# https://arxiv.org/search/advanced?advanced=1&date-from_date=2019-03-16&date-to_date=2019-03-17&terms-0-operator=AND&terms-0-field=title&classification-physics=y&classification-physics_archives=astro-ph&classification-include_cross_list=include&date-year=&date-filter_by=date_range\n",
"# https://arxiv.org/search/advanced?advanced=&terms-0-field=doi&classification-physics=y&classification-physics_archives=astro-ph&classification-include_cross_list=include&date-year=&date-filter_by=date_range&date-from_date=2019-03-16&date-to_date=2019-03-17&date-date_type=submitted_date&abstracts=show&size=50&order=-announced_date_first\n",
"#\n",
"# Written by BMG 19 Mar 2020\n",
"# Updated by MDJH Apr, Dec 2020"
Expand Down Expand Up @@ -42,7 +42,11 @@
"\n",
"def daterange(start_date, end_date):\n",
" for n in range(0,int ((end_date - start_date).days),7):\n",
" yield start_date + timedelta(n)"
" yield start_date + timedelta(n)\n",
"\n",
"def insert_str(string, str_to_insert, after_string):\n",
" index = string.find(after_string)\n",
" return string[:index] + str_to_insert + string[index:]"
]
},
{
Expand All @@ -58,6 +62,18 @@
"headers.update({ 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0'})"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# USER INPUT\n",
"\n",
"# Choose whether to search accepted papers (ACCEPTED = 'Yes') or just submitted papers (ACCEPTED = 'No')\n",
"ACCEPTED = 'No'"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -82,12 +98,16 @@
" date2 = (single_date - timedelta(days=7)).strftime(\"%Y-%m-%d\")\n",
" date3 = (single_date - timedelta(days=52*7)).strftime(\"%Y-%m-%d\")\n",
" date4 = (single_date - timedelta(days=52*7+7)).strftime(\"%Y-%m-%d\")\n",
"\n",
"# Define search URLs\n",
"# Astronomy\n",
" url1 = \"https://arxiv.org/search/advanced?advanced=1&date-from_date=\"+date2+\"&\"+\"date-to_date=\"+date1+\"&terms-0-operator=AND&terms-0-field=title&classification-physics=y&classification-physics_archives=astro-ph&classification-include_cross_list=include&date-year=&date-filter_by=date_range\"\n",
" url2 = \"https://arxiv.org/search/advanced?advanced=1&date-from_date=\"+date4+\"&\"+\"date-to_date=\"+date3+\"&terms-0-operator=AND&terms-0-field=title&classification-physics=y&classification-physics_archives=astro-ph&classification-include_cross_list=include&date-year=&date-filter_by=date_range\"\n",
"# Physics \n",
"# url1 = \"https://arxiv.org/search/advanced?advanced=1&date-from_date=\"+date2+\"&\"+\"date-to_date=\"+date1+\"&terms-0-operator=AND&terms-0-field=title&classification-physics=y&classification-include_cross_list=include&date-year=&date-filter_by=date_range\"\n",
"# url2 = \"https://arxiv.org/search/advanced?advanced=1&date-from_date=\"+date4+\"&\"+\"date-to_date=\"+date3+\"&terms-0-operator=AND&terms-0-field=title&classification-physics=y&classification-include_cross_list=include&date-year=&date-filter_by=date_range\"\n",
" url1 = 'https://arxiv.org/search/advanced?advanced=&terms-0-field=doi&classification-physics=y&classification-physics_archives=astro-ph&classification-include_cross_list=include&date-year=&date-filter_by=date_range&date-from_date='+date2+'&'+'date-to_date='+date1+'&date-date_type=submitted_date&abstracts=show&size=50&order=-announced_date_first'\n",
" url2 = 'https://arxiv.org/search/advanced?advanced=&terms-0-field=doi&classification-physics=y&classification-physics_archives=astro-ph&classification-include_cross_list=include&date-year=&date-filter_by=date_range&date-from_date='+date4+'&'+'date-to_date='+date3+'&date-date_type=submitted_date&abstracts=show&size=50&order=-announced_date_first'\n",
"\n",
" if ACCEPTED == \"Yes\":\n",
" url1 = insert_str(url1, '&terms-0-operator=AND&terms-0-term=10.*', '&terms-0-field=doi')\n",
" url2 = insert_str(url2, '&terms-0-operator=AND&terms-0-term=10.*', '&terms-0-field=doi')\n",
"\n",
" req1 = requests.get(url1, headers)\n",
" req2 = requests.get(url2, headers)\n",
" soup1 = BeautifulSoup(req1.content, 'html.parser')\n",
Expand Down Expand Up @@ -172,7 +192,10 @@
"plt.plot(day2020,numratio)\n",
"plt.plot(day2020,numratio,'bo')\n",
"plt.xlabel('Date', fontsize=18,labelpad=20)\n",
"plt.ylabel('Number of astrophysics papers submitted to arXiv \\n relative to same week in previous year', fontsize=16, labelpad=20)\n",
"if ACCEPTED == 'Yes':\n",
" plt.ylabel('Number of accepted astrophysics papers in arXiv \\n relative to same week in previous year', fontsize=16, labelpad=20)\n",
"else:\n",
" plt.ylabel('Number of astrophysics papers submitted to arXiv \\n relative to same week in previous year', fontsize=16, labelpad=20)\n",
"#plt.plot(day2020,num2019,label='2019')\n",
"#plt.plot(day2020,num2020,label='2020')\n",
"#plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc='lower left', ncol=2, mode=\"expand\", borderaxespad=0.)\n",
Expand Down

0 comments on commit 251a36a

Please sign in to comment.