Skip to content

Commit

Permalink
fix: [favicon] crawler favicon
Browse files Browse the repository at this point in the history
  • Loading branch information
Terrtia committed Feb 21, 2024
1 parent c219feb commit 81c4dde
Showing 1 changed file with 9 additions and 0 deletions.
9 changes: 9 additions & 0 deletions bin/crawlers/Crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from lib.objects import CookiesNames
from lib.objects import Etags
from lib.objects.Domains import Domain
from lib.objects import Favicons
from lib.objects.Items import Item
from lib.objects import Screenshots
from lib.objects import Titles
Expand Down Expand Up @@ -198,6 +199,7 @@ def enqueue_capture(self, task_uuid, priority):
user_agent=task.get_user_agent(),
proxy=task.get_proxy(),
cookies=task.get_cookies(),
with_favicon=True,
force=force,
general_timeout_in_sec=90) # TODO increase timeout if onion ????

Expand Down Expand Up @@ -245,6 +247,7 @@ def compute(self, capture):
parent_id = task.get_parent()

entries = self.lacus.get_capture(capture.uuid)

print(entries.get('status'))
self.har = task.get_har()
self.screenshot = task.get_screenshot()
Expand Down Expand Up @@ -369,6 +372,12 @@ def save_capture_response(self, parent_id, entries):
etag.add(self.date.replace('/', ''), self.domain)
crawlers.extract_hhhash(entries['har'], self.domain.id, self.date.replace('/', ''))

# FAVICON
if entries.get('potential_favicons'):
for favicon in entries['potential_favicons']:
fav = Favicons.create(favicon)
fav.add(item.get_date(), item)

# Next Children
entries_children = entries.get('children')
if entries_children:
Expand Down

0 comments on commit 81c4dde

Please sign in to comment.