Skip to content

Commit

Permalink
fix: html with metadata image (#328)
Browse files Browse the repository at this point in the history
  • Loading branch information
andremacola committed Apr 21, 2023
1 parent 3919970 commit 82043f7
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 1 deletion.
3 changes: 3 additions & 0 deletions tests/metadata_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,9 @@ def test_images():
assert metadata.image == 'https://example.org/example-opengraph.jpg'
metadata = extract_metadata('<html><head><meta property="twitter:image" content="https://example.org/example-twitter.jpg"></html>')
assert metadata.image == 'https://example.org/example-twitter.jpg'
'''Without Image'''
metadata = extract_metadata('<html><head><meta name="robots" content="index, follow, max-image-preview:large, max-snippet:-1, max-video-preview:-1" /></html>')
assert metadata.image == None

def test_Document_as_dict():
"""Tests that the dict serialization works and preserves data."""
Expand Down
2 changes: 1 addition & 1 deletion trafilatura/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,7 +463,7 @@ def extract_image(tree):
'''Search meta tags following the OpenGraph guidelines (https://ogp.me/)
and search meta tags with Twitter Image'''

for elem in tree.xpath('.//head/meta[@property="og:image" or "og:image:url"][@content]'):
for elem in tree.xpath('.//head/meta[@property="og:image" or @property="og:image:url"][@content]'):
return elem.get('content')

for elem in tree.xpath('.//head/meta[@property="twitter:image" or @property="twitter:image:src"][@content]'):
Expand Down

0 comments on commit 82043f7

Please sign in to comment.