Skip to content

Commit

Permalink
Implement minimal EXIF and IPTC tag support
Browse files Browse the repository at this point in the history
Use PIL's TAGS to deal with EXIF. Use
iptcinfo3 to deal with IPTC tags. Allows to
use these as fallback options for copyright
and caption of photos.

Will not change the default behaviour without
specifying the setting tag_map, exif or iptc to
the configuration file.
  • Loading branch information
sebastiaan-lampo committed Jun 13, 2022
1 parent d1b32a6 commit cb39946
Show file tree
Hide file tree
Showing 7 changed files with 213 additions and 12 deletions.
50 changes: 50 additions & 0 deletions docs/README.md
Expand Up @@ -24,6 +24,9 @@ The following options are available in the ``hugophotoswipe.yml`` file:
| jpeg_progressive | False | Output progressive JPEGs |
| jpeg_optimize | False | Optimize JPEG output |
| jpeg_quality | 75 | JPEG quality factor |
| tag_map | None | Dictionary map of exif/iptc tags to photo properties |
| exif | None | List of tags to be included or excluded |
| iptc | None | List of tags to be included or excluded |

Naturally, the jpeg options are only applied when ``output_format`` is
``jpg``.
Expand All @@ -42,6 +45,53 @@ unchanged. When a single number is given, the *maximum* dimension of the photo
will be reduced to the given number, and the other dimension is chosen
according to the aspect ratio.

EXIF/IPTC Tags
--------------

You can use the metadata embedded in the photos as a starter to fill out captions
and copyright information. HugoPhotoSwipe will use this information when you run
`hps update` in the following manner:
1. Prefer the information already specified in the album file
2. Use the information in the metadata

Using metadata requires a mapping for each field you want to be populated. Currently
only caption and copyright are supported. In the configuration file, add the `tag_map`
setting as follows:

```yaml
tag_map:
caption: exif.ImageDescription
copyright: exif.Artist
```

Caption will be saved to the album yaml file with the photo information. You can then
edit it there if you wish. Because the album file is given priority, your changes will
not be overridden, even after an update.

Copyright information is loaded from the photos in the album and populated into the
album copyright. All unique photo copyright values are added to the album, comma
separated.

The format of the option is: `property: iptc/exif.tag`. Tag may include spaces. A full
list of tags can be found here:
* [IPTC tags](https://github.com/jamesacampbell/iptcinfo3/blob/a9cea6cb1981e4ad29cf317d44419e4fd45c2170/iptcinfo3.py#L445)
* [EXIF tags](https://github.com/python-pillow/Pillow/blob/master/src/PIL/ExifTags.py)

The following two configuration file options allow you more granular control over what
metadata HugoPhotoSwipe loads from the file. The intended use is to reduce the number of
tags that are saved. If you want to use a tag in the `tag_map`, it must included or
not be excluded. Specifying `include: []` will result in no data being loaded.
Not specifying either option will result in all tags being loaded.

```yaml
iptc:
include: ['tag1', 'tag2', ...]
exclude: ['tag1', 'tag2', ...]
exif:
include: ['tag1', 'tag2', ...]
exclude: ['tag1', 'tag2', ...]
```

Shortcodes
==========

Expand Down
6 changes: 2 additions & 4 deletions hugophotoswipe/album.py
Expand Up @@ -232,10 +232,8 @@ def load(cls, album_dir):
all_photos = []
for p in album.photos:
photo_path = os.path.join(album_dir, settings.photo_dir, p["file"])
caption = (
"" if p.get("caption", None) is None else p["caption"].strip()
)
alt = "" if p.get("alt", None) is None else p["alt"].strip()
caption = p.get("caption", "").strip()
alt = p.get("alt", "").strip()
photo = Photo(
album_name=album.name,
original_path=photo_path,
Expand Down
3 changes: 3 additions & 0 deletions hugophotoswipe/config.py
Expand Up @@ -53,6 +53,9 @@
"jpeg_quality": 75,
"fast": False,
"verbose": False,
"exif": None,
"iptc": None,
"tag_map": None,
}

DONT_DUMP = ["verbose", "fast"]
Expand Down
116 changes: 109 additions & 7 deletions hugophotoswipe/photo.py
Expand Up @@ -17,8 +17,10 @@
import smartcrop
import tempfile

from PIL.TiffImagePlugin import IFDRational
from PIL import Image
from PIL import ExifTags
from PIL.ExifTags import TAGS, GPSTAGS
import iptcinfo3 as iptc3
from functools import total_ordering
from textwrap import wrap
from textwrap import indent
Expand Down Expand Up @@ -50,11 +52,13 @@ def __init__(
# names
self.name = name
self.alt = alt
self.caption = caption
self._caption = caption

# other
self.copyright = copyright
self._copyright = copyright
self.cover_path = None
self._exif = None
self._iptc = None

# caching
self._original_img = None
Expand All @@ -78,14 +82,13 @@ def original_image(self):
def _load_original_image(self):
img = Image.open(self.original_path)
# if there is no exif data, simply return the image
exif = img._getexif()
self._load_exif(img)
exif = self._exif
if exif is None:
return img

# get the orientation tag code from the ExifTags dict
orientation = next(
(k for k, v in ExifTags.TAGS.items() if v == "Orientation"), None
)
orientation = exif.get('Orientation')
if orientation is None:
print("Couldn't find orientation tag in ExifTags.TAGS")
return img
Expand All @@ -105,13 +108,105 @@ def _load_original_image(self):
# fallback for unhandled rotation tags
return img

def _load_exif(self, image):
if settings.exif:
tags = set(_filter_tags(TAGS.values(),
settings.exif.get('include'),
settings.exif.get('exclude')))
tags.add('Orientation') # always need this for resize
else:
tags = TAGS.values()

exif_data = {}
exif = image._getexif() or {}
for k, v in exif.items():
decoded = TAGS.get(k)
if decoded in tags and not isinstance(v, IFDRational): # Filter complex data values
exif_data[decoded] = v
for k, v in exif_data.pop('GPSInfo', {}).items():
decoded = GPSTAGS.get(k, k)
exif_data[decoded] = v
self._exif = exif_data

def free(self):
"""Manually clean up the cached image"""
if hasattr(self, "_original_img") and self._original_img:
self._original_img.close()
del self._original_img
self._original_img = None

@property
def iptc(self):
if self._iptc is None:
if settings.iptc:
tags = _filter_tags(iptc3.c_datasets_r.keys(),
settings.iptc.get('include'),
settings.iptc.get('exclude'))
else:
tags = iptc3.c_datasets_r.keys()

info = iptc3.IPTCInfo(self.original_path)
iptc = {}
for k in tags:
if type(info[k]) is bytes:
iptc[k] = info[k].decode('utf-8')
elif type(info[k]) is list:
l = []
for v in info[k]:
l.append(v.decode('utf-8'))
iptc[k] = l
else:
iptc[k] = info[k]
self._iptc = iptc

return self._iptc

@property
def exif(self):
if not self._exif:
_ = self.original_image # Trigger loading image and exif data
return self._exif

def _get_tag_value(self, tag):
assert tag is not None
try:
obj, t = tag.split('.')
except Exception as e:
logging.warning(e)
raise ValueError(f"Tag improperly formatted. Should be of format (exif/iptc).tag Provided: ({tag})")
if obj.lower() not in ['exif', 'iptc']:
raise ValueError(f"Tags can only reference iptc or exif data. ({tag})")
o = getattr(self, obj.lower(), {})
if o is None:
logging.warning(f'Tag "{tag}" specified but {obj} not loaded. Returning "".')
o = {}
return o.get(t, "")

@property
def caption(self):
if self._caption:
return self._caption
elif settings.tag_map and settings.tag_map.get('caption'):
return str(self._get_tag_value(settings.tag_map.get('caption')))
return ""

@property
def copyright(self):
if self._copyright:
return self._copyright
elif settings.tag_map and settings.tag_map.get('copyright'):
return str(self._get_tag_value(settings.tag_map.get('copyright')))
return ""

def __getattribute__(self, attr):
""" Allow property style access to all tags defined in settings.tag_map """
try:
return super().__getattribute__(attr)
except AttributeError as e:
if settings.tag_map and settings.tag_map.get(attr):
return self._get_tag_value(settings.tag_map.get(attr))
raise e

def has_sizes(self):
""" Check if all necessary sizes exist on disk """
if self.name is None:
Expand Down Expand Up @@ -435,3 +530,10 @@ def __eq__(self, other):

def __del__(self):
self.free()


def _filter_tags(tags, include=None, exclude=None):
exc = lambda k: True if not exclude else k not in exclude
inc = lambda k: True if not include else k in include
return filter(exc, filter(inc, tags))

1 change: 1 addition & 0 deletions setup.py
Expand Up @@ -27,6 +27,7 @@
"pyyaml",
"tqdm",
"smartcrop",
"iptcinfo3",
]

docs_require = []
Expand Down
6 changes: 6 additions & 0 deletions tests/test_config.py
Expand Up @@ -32,6 +32,8 @@ def test_config_1(self):
self.assertEqual(line(fp), "dirname_large: large")
self.assertEqual(line(fp), "dirname_small: small")
self.assertEqual(line(fp), "dirname_thumb: thumb")
self.assertEqual(line(fp), "exif:")
self.assertEqual(line(fp), "iptc:")
self.assertEqual(line(fp), "jpeg_optimize: False")
self.assertEqual(line(fp), "jpeg_progressive: False")
self.assertEqual(line(fp), "jpeg_quality: 75")
Expand All @@ -40,6 +42,7 @@ def test_config_1(self):
self.assertEqual(line(fp), "output_format: jpg")
self.assertEqual(line(fp), "photo_dir: photos")
self.assertEqual(line(fp), "smartcrop_js_path:")
self.assertEqual(line(fp), "tag_map:")
self.assertEqual(line(fp), "url_prefix:")
self.assertEqual(line(fp), "use_smartcrop_js: False")

Expand All @@ -65,6 +68,8 @@ def test_config_2(self):
self.assertEqual(line(fp), "dirname_large: large")
self.assertEqual(line(fp), "dirname_small: small")
self.assertEqual(line(fp), "dirname_thumb: thumb")
self.assertEqual(line(fp), "exif:")
self.assertEqual(line(fp), "iptc:")
self.assertEqual(line(fp), "jpeg_optimize: False")
self.assertEqual(line(fp), "jpeg_progressive: True")
self.assertEqual(line(fp), "jpeg_quality: 75")
Expand All @@ -73,6 +78,7 @@ def test_config_2(self):
self.assertEqual(line(fp), "output_format: jpg")
self.assertEqual(line(fp), "photo_dir: photo_files")
self.assertEqual(line(fp), "smartcrop_js_path:")
self.assertEqual(line(fp), "tag_map:")
self.assertEqual(line(fp), "url_prefix:")
self.assertEqual(line(fp), "use_smartcrop_js: False")

Expand Down
43 changes: 42 additions & 1 deletion tests/test_photo.py
Expand Up @@ -196,7 +196,48 @@ def test_thumb(self):

def test_sha256sum(self):
self.assertEqual(self.photo.sha256sum(),
"c2fdf14c548a08032fd06e6036197fc7e9c262e6d06fac40e54ec5dd2ce6912f")
"c2fdf14c548a08032fd06e6036197fc7e9c262e6d06fac40e54ec5dd2ce6912f")


class PhotoTagsTestCase(unittest.TestCase):
def setUp(self):
here = os.path.dirname(os.path.realpath(__file__))
test_file = os.path.join(here, "data", "dogs", "dog-2.jpg")
self.photo = Photo(
album_name="test_album",
original_path=test_file,
name="dog_2",
alt="Alt text",
caption="caption text",
copyright=None,
)
self._tmpdir = tempfile.mkdtemp(prefix="hps_photo_")
# Reset settings after each test
settings.__init__(**dict())

def tearDown(self):
shutil.rmtree(self._tmpdir)

def test_iptc_tags_retain_default_behaviour(self):
setattr(settings, "tag_map", {"caption": "iptc.headline"})
self.assertEqual(self.photo.caption, "caption text")

def test_iptc_tags_backfill(self):
setattr(self.photo, "_caption", None)
setattr(settings, "tag_map", {"caption": "iptc.headline"})
self.assertEqual(self.photo.caption, "A photo by Andrew Branch. unsplash.com/photos/owWHkSUmCCQ")

def test_exif_tags_retain_default_behaviour(self):
setattr(settings, "tag_map", {"copyright": "exif.Copyright"})
setattr(self.photo, "_copyright", "The other dog")
self.assertEqual(self.photo.copyright, "The other dog")

def test_exif_tags_backfill(self):
# Configure Tag Map to point copyright to a filled-out EXIF tag.
setattr(settings, "tag_map", {"copyright": "exif.Copyright"})

self.assertEqual(self.photo.exif.get("Make"), "Canon")
self.assertEqual(self.photo.copyright, "Free (do whatever you want) high-resolution photos. unsplash.com/license")


if __name__ == "__main__":
Expand Down

0 comments on commit cb39946

Please sign in to comment.