-
Notifications
You must be signed in to change notification settings - Fork 3
/
get_springer_books.py
29 lines (20 loc) · 1 KB
/
get_springer_books.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import re, progressbar, requests
import pandas as pd
from bs4 import BeautifulSoup, SoupStrainer
df = pd.read_excel(r"Springer Ebooks.xlsx", header=1)
books = [(unicode(x).encode('UTF8'), y, unicode(z).encode('UTF8')) for x, y, z in zip(df['Book Title'], df['OpenURL'],df['Author'])]
bar = progressbar.ProgressBar(maxval=len(books)).start()
index = 0
try:
for index, book in enumerate(books):
url = requests.head(book[1], allow_redirects=True).url
url = url.replace("book", "content/pdf") + ".pdf"
response = requests.get(url)
with open("./books/" + ''.join(ch for ch in book[0] if ch.isalnum() or ch == ' ') + " by "
+ ''.join(ch for ch in book[2] if ch.isalnum() or ch == ' ') +".pdf", 'wb') as f:
f.write(response.content)
print(index,book[0], book[2])
bar.update(index)
except:
print("Download unsuccessful")
if index == len(books) - 1: print("Downloading " + str(len(books)) + " books complete")