Skip to content

Commit

Permalink
Added loop for more PacktPub pages to solve issue ozzieperez#26
Browse files Browse the repository at this point in the history
Added loop for more PacktPub pages to solve issue ozzieperez#26 as a workaround for new PacktPub ebooks pages structure.
  • Loading branch information
vlaxa committed Jul 20, 2018
1 parent b9b7592 commit daef833
Showing 1 changed file with 25 additions and 15 deletions.
40 changes: 25 additions & 15 deletions downloader.py
Expand Up @@ -316,21 +316,31 @@ def main(argv):
if book_assets:

# get the list of books
books_page = session.get("https://www.packtpub.com/account/my-ebooks", verify=True, headers=headers)
books_tree = html.fromstring(books_page.content)
book_nodes = books_tree.xpath("//div[@id='product-account-list']/div[contains(@class,'product-line unseen')]")

print('###########################################################################')
print("FOUND {0} BOOKS: STARTING DOWNLOADS".format(len(book_nodes)))
print('###########################################################################')

# loop through the books
for book in book_nodes:

# download the book
books_directory = os.path.join(root_directory, "books")
download_book(book, books_directory, book_assets, session, headers)

page = 1
books_page = session.get("https://www.packtpub.com/account/my-ebooks?page={0}".format(page), verify=True, headers=headers)
pages_tree = html.fromstring(books_page.content)
pages_nodes = pages_tree.xpath("//*[contains(@class,'solr-page-page-selector-page')]")
pages_max = (len(pages_nodes)) + 1
# added loop for the more PacktPub pages
for page in range((pages_max)):
page += 1
url = 'https://www.packtpub.com/account/my-ebooks?page='
url = (url + str(page))
books_page = session.get(url, verify=True, headers=headers)
books_tree = html.fromstring(books_page.content)
book_nodes = books_tree.xpath("//div[@id='product-account-list']/div[contains(@class,'product-line unseen')]")

print('###########################################################################')
print("FOUND {0} BOOKS: STARTING DOWNLOADS".format(len(book_nodes)))
print('###########################################################################')

# loop through thpages
for book in book_nodes:

# download the book
books_directory = os.path.join(root_directory, "books")
download_book(book, books_directory, book_assets, session, headers)

if video_assets:

# get the list of videos
Expand Down

0 comments on commit daef833

Please sign in to comment.