diff --git a/arxiv_productivity_covid.ipynb b/arxiv_productivity_covid.ipynb index d06e31f..0008e42 100644 --- a/arxiv_productivity_covid.ipynb +++ b/arxiv_productivity_covid.ipynb @@ -8,7 +8,7 @@ "source": [ "# Script to scrape arXiv to compute number of astro papers published each day\n", "# URL form is\n", - "# https://arxiv.org/search/advanced?advanced=1&date-from_date=2019-03-16&date-to_date=2019-03-17&terms-0-operator=AND&terms-0-field=title&classification-physics=y&classification-physics_archives=astro-ph&classification-include_cross_list=include&date-year=&date-filter_by=date_range\n", + "# https://arxiv.org/search/advanced?advanced=&terms-0-field=doi&classification-physics=y&classification-physics_archives=astro-ph&classification-include_cross_list=include&date-year=&date-filter_by=date_range&date-from_date=2019-03-16&date-to_date=2019-03-17&date-date_type=submitted_date&abstracts=show&size=50&order=-announced_date_first\n", "#\n", "# Written by BMG 19 Mar 2020\n", "# Updated by MDJH Apr, Dec 2020" @@ -42,7 +42,11 @@ "\n", "def daterange(start_date, end_date):\n", " for n in range(0,int ((end_date - start_date).days),7):\n", - " yield start_date + timedelta(n)" + " yield start_date + timedelta(n)\n", + "\n", + "def insert_str(string, str_to_insert, after_string):\n", + " index = string.find(after_string)\n", + " return string[:index] + str_to_insert + string[index:]" ] }, { @@ -58,6 +62,18 @@ "headers.update({ 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0'})" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# USER INPUT\n", + "\n", + "# Choose whether to search accepted papers (ACCEPTED = 'Yes') or just submitted papers (ACCEPTED = 'No')\n", + "ACCEPTED = 'No'" + ] + }, { "cell_type": "code", "execution_count": null, @@ -82,12 +98,16 @@ " date2 = (single_date - timedelta(days=7)).strftime(\"%Y-%m-%d\")\n", " date3 = (single_date - timedelta(days=52*7)).strftime(\"%Y-%m-%d\")\n", " date4 = (single_date - timedelta(days=52*7+7)).strftime(\"%Y-%m-%d\")\n", + "\n", + "# Define search URLs\n", "# Astronomy\n", - " url1 = \"https://arxiv.org/search/advanced?advanced=1&date-from_date=\"+date2+\"&\"+\"date-to_date=\"+date1+\"&terms-0-operator=AND&terms-0-field=title&classification-physics=y&classification-physics_archives=astro-ph&classification-include_cross_list=include&date-year=&date-filter_by=date_range\"\n", - " url2 = \"https://arxiv.org/search/advanced?advanced=1&date-from_date=\"+date4+\"&\"+\"date-to_date=\"+date3+\"&terms-0-operator=AND&terms-0-field=title&classification-physics=y&classification-physics_archives=astro-ph&classification-include_cross_list=include&date-year=&date-filter_by=date_range\"\n", - "# Physics \n", - "# url1 = \"https://arxiv.org/search/advanced?advanced=1&date-from_date=\"+date2+\"&\"+\"date-to_date=\"+date1+\"&terms-0-operator=AND&terms-0-field=title&classification-physics=y&classification-include_cross_list=include&date-year=&date-filter_by=date_range\"\n", - "# url2 = \"https://arxiv.org/search/advanced?advanced=1&date-from_date=\"+date4+\"&\"+\"date-to_date=\"+date3+\"&terms-0-operator=AND&terms-0-field=title&classification-physics=y&classification-include_cross_list=include&date-year=&date-filter_by=date_range\"\n", + " url1 = 'https://arxiv.org/search/advanced?advanced=&terms-0-field=doi&classification-physics=y&classification-physics_archives=astro-ph&classification-include_cross_list=include&date-year=&date-filter_by=date_range&date-from_date='+date2+'&'+'date-to_date='+date1+'&date-date_type=submitted_date&abstracts=show&size=50&order=-announced_date_first'\n", + " url2 = 'https://arxiv.org/search/advanced?advanced=&terms-0-field=doi&classification-physics=y&classification-physics_archives=astro-ph&classification-include_cross_list=include&date-year=&date-filter_by=date_range&date-from_date='+date4+'&'+'date-to_date='+date3+'&date-date_type=submitted_date&abstracts=show&size=50&order=-announced_date_first'\n", + "\n", + " if ACCEPTED == \"Yes\":\n", + " url1 = insert_str(url1, '&terms-0-operator=AND&terms-0-term=10.*', '&terms-0-field=doi')\n", + " url2 = insert_str(url2, '&terms-0-operator=AND&terms-0-term=10.*', '&terms-0-field=doi')\n", + "\n", " req1 = requests.get(url1, headers)\n", " req2 = requests.get(url2, headers)\n", " soup1 = BeautifulSoup(req1.content, 'html.parser')\n", @@ -172,7 +192,10 @@ "plt.plot(day2020,numratio)\n", "plt.plot(day2020,numratio,'bo')\n", "plt.xlabel('Date', fontsize=18,labelpad=20)\n", - "plt.ylabel('Number of astrophysics papers submitted to arXiv \\n relative to same week in previous year', fontsize=16, labelpad=20)\n", + "if ACCEPTED == 'Yes':\n", + " plt.ylabel('Number of accepted astrophysics papers in arXiv \\n relative to same week in previous year', fontsize=16, labelpad=20)\n", + "else:\n", + " plt.ylabel('Number of astrophysics papers submitted to arXiv \\n relative to same week in previous year', fontsize=16, labelpad=20)\n", "#plt.plot(day2020,num2019,label='2019')\n", "#plt.plot(day2020,num2020,label='2020')\n", "#plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc='lower left', ncol=2, mode=\"expand\", borderaxespad=0.)\n",