Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement minimal EXIF and IPTC tag support #40

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
47 changes: 47 additions & 0 deletions docs/README.md
Expand Up @@ -24,6 +24,9 @@ The following options are available in the ``hugophotoswipe.yml`` file:
| jpeg_progressive | False | Output progressive JPEGs |
| jpeg_optimize | False | Optimize JPEG output |
| jpeg_quality | 75 | JPEG quality factor |
| tag_map | None | Dictionary map of exif/iptc tags to photo properties |
| exif | None | List of tags to be included or excluded |
| iptc | None | List of tags to be included or excluded |

Naturally, the jpeg options are only applied when ``output_format`` is
``jpg``.
Expand All @@ -42,6 +45,50 @@ unchanged. When a single number is given, the *maximum* dimension of the photo
will be reduced to the given number, and the other dimension is chosen
according to the aspect ratio.

EXIF/IPTC Tags
--------------

You can use the metadata embedded in the photos as a starter to fill out captions
and copyright information. HugoPhotoSwipe will use this information when you run
`hps update` in the following manner:
1. Prefer the information already specified in the album file
2. Use the information in the metadata

Using metadata requires a mapping for each field you want to be populated. Currently
only caption and copyright are supported. In the configuration file, add the `tag_map`
setting as follows:

```yaml
tag_map:
caption: exif.ImageDescription
copyright: exif.Artist
```

Caption will be saved to the album yaml file with the photo information. You can then
edit it there if you wish. Because the album file is given priority, your changes will
not be overridden, even after an update. Copyright is not yet used as this is currently
an album-level property.

The format of the option is: `<property_name>: iptc.<tag_name>` or `<property_name>:exif.<tag_name>`
(for example: `copyright: exif.Artist`). Tag may include spaces. A full list of tags can be found here:
* [IPTC tags](https://github.com/jamesacampbell/iptcinfo3/blob/a9cea6cb1981e4ad29cf317d44419e4fd45c2170/iptcinfo3.py#L445)
* [EXIF tags](https://github.com/python-pillow/Pillow/blob/master/src/PIL/ExifTags.py)

The following two configuration file options allow you more granular control over what
metadata HugoPhotoSwipe loads from the file. The intended use is to reduce the number of
tags that are saved. If you want to use a tag in the `tag_map`, it must included or
not be excluded. Specifying `include: []` will result in no data being loaded.
Not specifying either option will result in all tags being loaded.

```yaml
iptc:
include: ['tag1', 'tag2', ...]
exclude: ['tag1', 'tag2', ...]
exif:
include: ['tag1', 'tag2', ...]
exclude: ['tag1', 'tag2', ...]
```

Shortcodes
==========

Expand Down
3 changes: 3 additions & 0 deletions hugophotoswipe/config.py
Expand Up @@ -53,6 +53,9 @@
"jpeg_quality": 75,
"fast": False,
"verbose": False,
"exif": None,
"iptc": None,
"tag_map": None,
}

DONT_DUMP = ["verbose", "fast"]
Expand Down
141 changes: 118 additions & 23 deletions hugophotoswipe/photo.py
Expand Up @@ -17,8 +17,10 @@
import smartcrop
import tempfile

from PIL.TiffImagePlugin import IFDRational
from PIL import Image
from PIL import ExifTags
from PIL.ExifTags import TAGS, GPSTAGS
import iptcinfo3 as iptc3
from functools import total_ordering
from textwrap import wrap
from textwrap import indent
Expand All @@ -27,17 +29,19 @@
from .config import settings
from .utils import cached_property

# logging.getLogger('iptcinfo').setLevel('ERROR') # Avoid warnings about missing / undecoded IPTC tags.


@total_ordering
class Photo(object):
def __init__(
self,
album_name=None,
original_path=None,
name=None,
alt=None,
caption=None,
copyright=None,
self,
album_name=None,
original_path=None,
name=None,
alt=None,
caption=None,
copyright=None,
):
# album
self.album_name = album_name
Expand All @@ -50,11 +54,13 @@ def __init__(
# names
self.name = name
self.alt = alt
self.caption = caption
self._caption = caption

# other
self.copyright = copyright
self._copyright = copyright
self.cover_path = None
self._exif = None
self._iptc = None

# caching
self._original_img = None
Expand All @@ -77,29 +83,27 @@ def original_image(self):

def _load_original_image(self):
img = Image.open(self.original_path)
self._load_exif(img)
self._load_iptc()

# if there is no exif data, simply return the image
exif = img._getexif()
exif = self._exif
if exif is None:
return img

# get the orientation tag code from the ExifTags dict
orientation = next(
(k for k, v in ExifTags.TAGS.items() if v == "Orientation"), None
)
orientation = exif.get('Orientation')
if orientation is None:
print("Couldn't find orientation tag in ExifTags.TAGS")
return img

# if no orientation is defined in the exif, return the image
if not orientation in exif:
# if no orientation is defined in the exif, return the image
return img

logging.warning(f"Issue with orientation: ({orientation}).")
# rotate the image according to the exif
if exif[orientation] == 3:
if orientation == 3:
return img.rotate(180, expand=True)
elif exif[orientation] == 6:
elif orientation == 6:
return img.rotate(270, expand=True)
elif exif[orientation] == 8:
elif orientation == 8:
return img.rotate(90, expand=True)

# fallback for unhandled rotation tags
Expand All @@ -112,6 +116,91 @@ def free(self):
del self._original_img
self._original_img = None

def _load_exif(self, image):
if settings.exif:
tags = set(_filter_tags(TAGS.values(),
settings.exif.get('include'),
settings.exif.get('exclude')))
tags.add('Orientation') # always need this for resize
else:
tags = TAGS.values()

exif_data = {}
exif = image._getexif() or {}
for k, v in exif.items():
decoded = TAGS.get(k)
if decoded in tags and not isinstance(v, IFDRational): # Filter complex data values
exif_data[decoded] = v
for k, v in exif_data.pop('GPSInfo', {}).items():
decoded = GPSTAGS.get(k, k)
exif_data[decoded] = v
self._exif = exif_data

def _load_iptc(self):
if settings.iptc:
tags = _filter_tags(iptc3.c_datasets_r.keys(),
settings.iptc.get('include'),
settings.iptc.get('exclude'))
else:
tags = iptc3.c_datasets_r.keys()

info = iptc3.IPTCInfo(self.original_path)
iptc = {}
for k in tags:
if type(info[k]) is bytes:
iptc[k] = info[k].decode('utf-8')
elif type(info[k]) is list:
iptc[k] = [v.decode('utf-8') for v in info[k]]
else:
iptc[k] = info[k]
self._iptc = iptc

@property
def exif(self):
if not self._exif:
_ = self.original_image # Trigger loading image and exif data
return self._exif

@property
def iptc(self):
if not self._iptc:
_ = self.original_image
return self._iptc

def _get_tag_value(self, tag):
assert tag is not None
try:
obj, t = tag.split('.')
except ValueError as e:
raise ValueError(
f"Tag(s) improperly formatted. "
f"Tags should be of format iptc.<tag_name> or exif.<tag_name>. Provided: ({tag})")
if obj.lower() not in ['exif', 'iptc']:
raise ValueError(
f"Tags can only reference iptc or exif data. "
f"Tags should be of format: iptc.<tag_name> or exif.<tag_name>. Provided: ({tag})")
o = getattr(self, obj.lower(), {})
if o is None:
logging.warning(f'Tag "{tag}" specified but {obj} not loaded. Returning "".')
o = {}
return o.get(t, "")

@property
def caption(self):
if self._caption:
return self._caption
elif settings.tag_map and settings.tag_map.get('caption'):
return str(self._get_tag_value(settings.tag_map.get('caption')))
return ""

@property
def copyright(self):
if self._copyright:
return self._copyright
elif settings.tag_map and settings.tag_map.get('copyright'):
return str(self._get_tag_value(settings.tag_map.get('copyright')))
return ""

def has_sizes(self):
""" Check if all necessary sizes exist on disk """
if self.name is None:
Expand All @@ -123,7 +212,7 @@ def has_sizes(self):
if not os.path.exists(self.thumb_path):
return False
if (not self.cover_path is None) and (
not os.path.exists(self.cover_path)
not os.path.exists(self.cover_path)
):
return False
return True
Expand Down Expand Up @@ -435,3 +524,9 @@ def __eq__(self, other):

def __del__(self):
self.free()


def _filter_tags(tags, include=None, exclude=None):
exc = lambda k: True if not exclude else k not in exclude
inc = lambda k: True if not include else k in include
return filter(exc, filter(inc, tags))
1 change: 1 addition & 0 deletions setup.py
Expand Up @@ -27,6 +27,7 @@
"pyyaml",
"tqdm",
"smartcrop",
"iptcinfo3",
]

docs_require = []
Expand Down
6 changes: 6 additions & 0 deletions tests/test_config.py
Expand Up @@ -32,6 +32,8 @@ def test_config_1(self):
self.assertEqual(line(fp), "dirname_large: large")
self.assertEqual(line(fp), "dirname_small: small")
self.assertEqual(line(fp), "dirname_thumb: thumb")
self.assertEqual(line(fp), "exif:")
self.assertEqual(line(fp), "iptc:")
self.assertEqual(line(fp), "jpeg_optimize: False")
self.assertEqual(line(fp), "jpeg_progressive: False")
self.assertEqual(line(fp), "jpeg_quality: 75")
Expand All @@ -40,6 +42,7 @@ def test_config_1(self):
self.assertEqual(line(fp), "output_format: jpg")
self.assertEqual(line(fp), "photo_dir: photos")
self.assertEqual(line(fp), "smartcrop_js_path:")
self.assertEqual(line(fp), "tag_map:")
self.assertEqual(line(fp), "url_prefix:")
self.assertEqual(line(fp), "use_smartcrop_js: False")

Expand All @@ -65,6 +68,8 @@ def test_config_2(self):
self.assertEqual(line(fp), "dirname_large: large")
self.assertEqual(line(fp), "dirname_small: small")
self.assertEqual(line(fp), "dirname_thumb: thumb")
self.assertEqual(line(fp), "exif:")
self.assertEqual(line(fp), "iptc:")
self.assertEqual(line(fp), "jpeg_optimize: False")
self.assertEqual(line(fp), "jpeg_progressive: True")
self.assertEqual(line(fp), "jpeg_quality: 75")
Expand All @@ -73,6 +78,7 @@ def test_config_2(self):
self.assertEqual(line(fp), "output_format: jpg")
self.assertEqual(line(fp), "photo_dir: photo_files")
self.assertEqual(line(fp), "smartcrop_js_path:")
self.assertEqual(line(fp), "tag_map:")
self.assertEqual(line(fp), "url_prefix:")
self.assertEqual(line(fp), "use_smartcrop_js: False")

Expand Down
61 changes: 60 additions & 1 deletion tests/test_photo.py
Expand Up @@ -196,7 +196,66 @@ def test_thumb(self):

def test_sha256sum(self):
self.assertEqual(self.photo.sha256sum(),
"c2fdf14c548a08032fd06e6036197fc7e9c262e6d06fac40e54ec5dd2ce6912f")
"c2fdf14c548a08032fd06e6036197fc7e9c262e6d06fac40e54ec5dd2ce6912f")


class PhotoTagsTestCase(unittest.TestCase):
def setUp(self):
here = os.path.dirname(os.path.realpath(__file__))
test_file = os.path.join(here, "data", "dogs", "dog-2.jpg")
self.photo = Photo(
album_name="test_album",
original_path=test_file,
name="dog_2",
alt="Alt text",
caption="caption text",
copyright=None,
)
self._tmpdir = tempfile.mkdtemp(prefix="hps_photo_")
# Reset settings after each test
settings.__init__(**dict())

def tearDown(self):
shutil.rmtree(self._tmpdir)

def test_iptc_tags_retain_default_behaviour(self):
setattr(settings, "tag_map", {"caption": "iptc.headline"})
self.assertEqual("caption text", self.photo.caption)

def test_iptc_tags_backfill(self):
setattr(self.photo, "_caption", None)
setattr(settings, "tag_map", {"caption": "iptc.headline"})
self.assertEqual("A photo by Andrew Branch. unsplash.com/photos/owWHkSUmCCQ", self.photo.caption)

def test_exif_tags_retain_default_behaviour(self):
setattr(settings, "tag_map", {"copyright": "exif.Copyright"})
setattr(self.photo, "_copyright", "The other dog")
self.assertEqual("The other dog", self.photo.copyright)

def test_exif_tags_backfill(self):
# Configure Tag Map to point copyright to a filled-out EXIF tag.
setattr(settings, "tag_map", {"copyright": "exif.Copyright"})
self.assertEqual("Free (do whatever you want) high-resolution photos. unsplash.com/license",
self.photo.copyright)

def test_incorrect_tags_namespace_value_error(self):
# Configure Tag Map to point copyright to a filled-out EXIF tag.
setattr(settings, "tag_map", {"copyright": "bad.Copyright"})
with self.assertRaises(ValueError) as cm:
_ = self.photo.copyright
self.assertEqual(cm.exception.args[0],
"Tags can only reference iptc or exif data. "
"Tags should be of format: iptc.<tag_name> or exif.<tag_name>. Provided: (bad.Copyright)")

def test_incorrect_tags_format_value_error(self):
# Configure Tag Map to point copyright to a filled-out EXIF tag.
setattr(settings, "tag_map", {"copyright": "exif_Copyright"})
with self.assertRaises(ValueError) as cm:
_ = self.photo.copyright
# print(cm.exception)
self.assertEqual(cm.exception.args[0],
f"Tag(s) improperly formatted. "
f"Tags should be of format iptc.<tag_name> or exif.<tag_name>. Provided: (exif_Copyright)")


if __name__ == "__main__":
Expand Down