Skip to content

Commit

Permalink
Add area and parameter hashing to overlay caching (#54)
Browse files Browse the repository at this point in the history
* Update 'add_overlay_from_dict' with improved cache filenames

* Add parameter hashing to caching

* Refactor caching to be a little cleaner

* Add more parameter changes to cache test

* Use sha1 for dict hashing to make deepcode happy

* Use sha256 for dict hashing to make deepcode happy

* Add docstring to hash_dict function

* Add warning about caching font objects in docstring

* Rearrange mention of caching in add_overlay_from_dict docstring
  • Loading branch information
djhoese committed Aug 16, 2021
1 parent 716294a commit bc3b5cd
Show file tree
Hide file tree
Showing 2 changed files with 173 additions and 73 deletions.
135 changes: 95 additions & 40 deletions pycoast/cw_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
"""Base class for contour writers."""

import os
import hashlib
import json
import shapefile
import numpy as np
from PIL import Image
Expand Down Expand Up @@ -58,6 +60,14 @@ def get_resolution_from_area(area_def):
return "f"


def hash_dict(dict_to_hash: dict) -> str:
"""Hash dict object by serializing with json."""
dhash = hashlib.sha256()
encoded = json.dumps(dict_to_hash, sort_keys=True).encode()
dhash.update(encoded)
return dhash.hexdigest()


class Proj(pyproj.Proj):
"""Wrapper around pyproj to add in 'is_latlong'."""

Expand Down Expand Up @@ -709,6 +719,15 @@ def _config_to_dict(self, config_file):
def add_overlay_from_dict(self, overlays, area_def, cache_epoch=None, background=None):
"""Create and return a transparent image adding all the overlays contained in the `overlays` dict.
Optionally caches overlay results for faster rendering of images with
the same provided AreaDefinition and parameters. Cached results are
identified by hashing the AreaDefinition and the overlays dictionary.
.. warning::
Font objects are ignored in parameter hashing as they can't be easily hashed.
Therefore font changes will not trigger a new rendering for the cache.
:Parameters:
overlays : dict
overlays configuration
Expand Down Expand Up @@ -745,24 +764,15 @@ def add_overlay_from_dict(self, overlays, area_def, cache_epoch=None, background
# Cache management
cache_file = None
if 'cache' in overlays:
cache_file = overlays['cache']['file'] + '_' + area_def.area_id + '.png'

try:
config_time = cache_epoch or 0
cache_time = os.path.getmtime(cache_file)
# Cache file will be used only if it's newer than config file
cache_is_recent = config_time is not None and config_time < cache_time
should_regen = overlays['cache'].get('regenerate', False)
if cache_is_recent and not should_regen:
foreground = Image.open(cache_file)
logger.info('Using image in cache %s', cache_file)
if background is not None:
background.paste(foreground, mask=foreground.split()[-1])
return foreground
else:
logger.info("Regenerating cache file.")
except OSError:
logger.info("No overlay image found, new overlay image will be saved in cache.")
cache_file = self._generate_cache_filename(
overlays['cache']['file'],
area_def,
overlays,
)
regenerate = overlays['cache'].get('regenerate', False)
foreground = self._apply_cached_image(cache_file, cache_epoch, background, regenerate=regenerate)
if foreground is not None:
return foreground

x_size = area_def.width
y_size = area_def.height
Expand All @@ -787,24 +797,22 @@ def add_overlay_from_dict(self, overlays, area_def, cache_epoch=None, background
'resolution': default_resolution}

for section, fun in zip(['coasts', 'rivers', 'borders'],
[self.add_coastlines,
self.add_rivers,
self.add_borders]):
if section in overlays:

params = DEFAULT.copy()
params.update(overlays[section])
[self.add_coastlines, self.add_rivers, self.add_borders]):
if section not in overlays:
continue
params = DEFAULT.copy()
params.update(overlays[section])

if section != "coasts":
params.pop('fill_opacity', None)
params.pop('fill', None)
if section != "coasts":
params.pop('fill_opacity', None)
params.pop('fill', None)

if not is_agg:
for key in ['width', 'outline_opacity', 'fill_opacity']:
params.pop(key, None)
if not is_agg:
for key in ['width', 'outline_opacity', 'fill_opacity']:
params.pop(key, None)

fun(foreground, area_def, **params)
logger.info("%s added", section.capitalize())
fun(foreground, area_def, **params)
logger.info("%s added", section.capitalize())

# Cities management
if 'cities' in overlays:
Expand Down Expand Up @@ -855,6 +863,10 @@ def add_overlay_from_dict(self, overlays, area_def, cache_epoch=None, background
lat_minor = float(overlays['grid'].get('lat_minor', 2.0))
font = overlays['grid'].get('font', None)
font_size = int(overlays['grid'].get('font_size', 10))
grid_kwargs = {}
if is_agg:
width = float(overlays['grid'].get('width', 1.0))
grid_kwargs["width"] = width

write_text = overlays['grid'].get('write_text', True)
if isinstance(write_text, str):
Expand All @@ -881,17 +893,60 @@ def add_overlay_from_dict(self, overlays, area_def, cache_epoch=None, background
outline=outline, minor_outline=minor_outline,
minor_is_tick=minor_is_tick,
lon_placement=lon_placement,
lat_placement=lat_placement)
lat_placement=lat_placement,
**grid_kwargs)

if cache_file is not None:
try:
foreground.save(cache_file)
except IOError as e:
logger.error("Can't save cache: %s", str(e))
if background is not None:
background.paste(foreground, mask=foreground.split()[-1])
self._write_and_apply_new_cached_image(cache_file, foreground, background)
return foreground

@staticmethod
def _apply_cached_image(cache_file, cache_epoch, background, regenerate=False):
try:
config_time = cache_epoch or 0
cache_time = os.path.getmtime(cache_file)
# Cache file will be used only if it's newer than config file
if config_time is not None and config_time < cache_time and not regenerate:
foreground = Image.open(cache_file)
logger.info('Using image in cache %s', cache_file)
if background is not None:
background.paste(foreground, mask=foreground.split()[-1])
return foreground
logger.info("Regenerating cache file.")
except OSError:
logger.info("No overlay image found, new overlay image will be saved in cache.")
return None

@staticmethod
def _write_and_apply_new_cached_image(cache_file, foreground, background):
try:
foreground.save(cache_file)
except IOError as e:
logger.error("Can't save cache: %s", str(e))
if background is not None:
background.paste(foreground, mask=foreground.split()[-1])

def _generate_cache_filename(self, cache_prefix, area_def, overlays_dict):
area_hash = hash(area_def)
base_dir, file_prefix = os.path.split(cache_prefix)
params_to_hash = self._prepare_hashable_dict(overlays_dict)
param_hash = hash_dict(params_to_hash)
return os.path.join(base_dir, f"{file_prefix}_{area_hash}_{param_hash}.png")

@staticmethod
def _prepare_hashable_dict(nonhashable_dict):
params_to_hash = {}
# avoid wasteful deep copy by only doing two levels of copying
for overlay_name, overlay_dict in nonhashable_dict.items():
if overlay_name == 'cache':
continue
params_to_hash[overlay_name] = overlay_dict.copy()
# font objects are not hashable
for font_cat in ('cities', 'points', 'grid'):
if font_cat in params_to_hash:
params_to_hash[font_cat].pop('font', None)
return params_to_hash

def add_overlay_from_config(self, config_file, area_def, background=None):
"""Create and return a transparent image adding all the overlays contained in a configuration file.
Expand Down
111 changes: 78 additions & 33 deletions pycoast/tests/test_pycoast.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import os
import unittest
from glob import glob

import numpy as np
from PIL import Image, ImageFont
Expand Down Expand Up @@ -847,7 +848,7 @@ def test_coastlines_convert_to_rgba_agg(self):
self.assertTrue(image_mode == 'RGBA', 'Conversion to RGBA failed.')


class FakeAreaDef():
class FakeAreaDef:
"""A fake area definition object."""

def __init__(self, proj4_string, area_extent, x_size, y_size):
Expand All @@ -858,7 +859,7 @@ def __init__(self, proj4_string, area_extent, x_size, y_size):
self.area_id = 'fakearea'


class TestFromConfig(TestPycoast):
class TestFromConfig:
"""Test burning overlays from a config file."""

def test_foreground(self):
Expand All @@ -878,83 +879,127 @@ def test_foreground(self):
img = cw.add_overlay_from_config(config_file, area_def)

res = np.array(img)
self.assertTrue(fft_metric(euro_data, res),
'Writing of contours failed')
assert fft_metric(euro_data, res), 'Writing of contours failed'

overlays = {'coasts': {'level': [1, 2, 3, 4], 'resolution': 'l'},
'borders': {'outline': (255, 0, 0), 'resolution': 'c'},
'rivers': {'outline': 'blue', 'resolution': 'c', 'level': 5}}

img = cw.add_overlay_from_dict(overlays, area_def)
res = np.array(img)
self.assertTrue(fft_metric(euro_data, res),
'Writing of contours failed')
assert fft_metric(euro_data, res), 'Writing of contours failed'

def test_cache(self):
def test_cache(self, tmpdir):
"""Test generating a transparent foreground and cache it."""
from pycoast import ContourWriterPIL
from tempfile import gettempdir
euro_img = Image.open(os.path.join(os.path.dirname(__file__),
'contours_europe_alpha.png'))
euro_data = np.array(euro_img)

# img = Image.new('RGB', (640, 480))
proj4_string = \
'+proj=stere +lon_0=8.00 +lat_0=50.00 +lat_ts=50.00 +ellps=WGS84'
area_extent = (-3363403.31, -2291879.85, 2630596.69, 2203620.1)
area_def = FakeAreaDef(proj4_string, area_extent, 640, 480)
cw = ContourWriterPIL(gshhs_root_dir)

tmp = gettempdir()

overlays = {'cache': {'file': os.path.join(tmp, 'pycoast_cache')},
overlays = {'cache': {'file': os.path.join(tmpdir, 'pycoast_cache')},
'coasts': {'level': 4, 'resolution': 'l'},
'borders': {'outline': (255, 0, 0), 'resolution': 'c'},
'rivers': {'outline': 'blue', 'resolution': 'c', 'level': 5}}

# Create the original cache file
cache_filename = os.path.join(tmp, 'pycoast_cache_fakearea.png')
img = cw.add_overlay_from_dict(overlays, area_def)
res = np.array(img)
self.assertTrue(fft_metric(euro_data, res),
'Writing of contours failed')
self.assertTrue(os.path.isfile(cache_filename))
cache_glob = glob(os.path.join(tmpdir, 'pycoast_cache_*.png'))
assert len(cache_glob) == 1
cache_filename = cache_glob[0]
assert fft_metric(euro_data, res), 'Writing of contours failed'
assert os.path.isfile(cache_filename)
mtime = os.path.getmtime(cache_filename)

# Reuse the generated cache file
img = cw.add_overlay_from_dict(overlays, area_def)
res = np.array(img)
self.assertTrue(fft_metric(euro_data, res),
'Writing of contours failed')
self.assertTrue(os.path.isfile(cache_filename))
self.assertEqual(os.path.getmtime(cache_filename), mtime)
assert fft_metric(euro_data, res), 'Writing of contours failed'
assert os.path.isfile(cache_filename)
assert os.path.getmtime(cache_filename) == mtime

# Regenerate cache file
current_time = time.time()
cw.add_overlay_from_dict(overlays, area_def, current_time)
mtime = os.path.getmtime(cache_filename)
self.assertGreater(mtime, current_time)
self.assertTrue(fft_metric(euro_data, res),
'Writing of contours failed')
assert mtime > current_time
assert fft_metric(euro_data, res), 'Writing of contours failed'

cw.add_overlay_from_dict(overlays, area_def, current_time)
self.assertEqual(os.path.getmtime(cache_filename), mtime)
self.assertTrue(fft_metric(euro_data, res),
'Writing of contours failed')
assert os.path.getmtime(cache_filename) == mtime
assert fft_metric(euro_data, res), 'Writing of contours failed'
overlays['cache']['regenerate'] = True
cw.add_overlay_from_dict(overlays, area_def)

self.assertNotEqual(os.path.getmtime(cache_filename), mtime)
self.assertTrue(fft_metric(euro_data, res),
'Writing of contours failed')
assert os.path.getmtime(cache_filename) != mtime
assert fft_metric(euro_data, res), 'Writing of contours failed'

overlays.pop('cache')
overlays['grid'] = {'outline': (255, 255, 255), 'outline_opacity': 175,
'minor_outline': (200, 200, 200), 'minor_outline_opacity': 127,
'width': 1.0, 'minor_width': 0.5, 'minor_is_tick': True,
'write_text': True, 'lat_placement': 'lr', 'lon_placement': 'b'}
img = cw.add_overlay_from_dict(overlays, area_def)
os.remove(os.path.join(tmp, 'pycoast_cache_fakearea.png'))
cw.add_overlay_from_dict(overlays, area_def)
os.remove(cache_filename)

def test_caching_with_param_changes(self, tmpdir):
"""Testing caching when changing parameters."""
from pycoast import ContourWriterPIL

# img = Image.new('RGB', (640, 480))
proj4_string = \
'+proj=stere +lon_0=8.00 +lat_0=50.00 +lat_ts=50.00 +ellps=WGS84'
area_extent = (-3363403.31, -2291879.85, 2630596.69, 2203620.1)
area_def = FakeAreaDef(proj4_string, area_extent, 640, 480)
cw = ContourWriterPIL(gshhs_root_dir)

font = ImageFont.truetype(os.path.join(
os.path.dirname(__file__), 'test_data', 'DejaVuSerif.ttf'))
overlays = {'cache': {'file': os.path.join(tmpdir, 'pycoast_cache')},
'grid': {'font': font}}

# Create the original cache file
cw.add_overlay_from_dict(overlays, area_def)
cache_glob = glob(os.path.join(tmpdir, 'pycoast_cache_*.png'))
assert len(cache_glob) == 1
cache_filename = cache_glob[0]
assert os.path.isfile(cache_filename)
mtime = os.path.getmtime(cache_filename)

# Reuse the generated cache file
cw.add_overlay_from_dict(overlays, area_def)
cache_glob = glob(os.path.join(tmpdir, 'pycoast_cache_*.png'))
assert len(cache_glob) == 1
assert os.path.isfile(cache_filename)
assert os.path.getmtime(cache_filename) == mtime

# Remove the font option, should produce the same result
# font is not considered when caching
del overlays['grid']['font']
cw.add_overlay_from_dict(overlays, area_def)
cache_glob = glob(os.path.join(tmpdir, 'pycoast_cache_*.png'))
assert len(cache_glob) == 1
assert os.path.isfile(cache_filename)
assert os.path.getmtime(cache_filename) == mtime

# Changing a parameter should create a new cache file
overlays = {'cache': {'file': os.path.join(tmpdir, 'pycoast_cache')},
'grid': {'width': 2.0}}
cw.add_overlay_from_dict(overlays, area_def)
cache_glob = glob(os.path.join(tmpdir, 'pycoast_cache_*.png'))
assert len(cache_glob) == 2
assert os.path.isfile(cache_filename)
new_cache_filename = cache_glob[0] if cache_glob[0] != cache_filename else cache_glob[1]
# original cache file should be unchanged
assert os.path.getmtime(cache_filename) == mtime
# new cache file should be...new
assert os.path.getmtime(new_cache_filename) != mtime

def test_get_resolution(self):
"""Get the automagical resolution computation."""
Expand All @@ -963,6 +1008,6 @@ def test_get_resolution(self):
'+proj=stere +lon_0=8.00 +lat_0=50.00 +lat_ts=50.00 +ellps=WGS84'
area_extent = (-3363403.31, -2291879.85, 2630596.69, 2203620.1)
area_def = FakeAreaDef(proj4_string, area_extent, 640, 480)
self.assertEqual(get_resolution_from_area(area_def), 'l')
assert get_resolution_from_area(area_def) == 'l'
area_def = FakeAreaDef(proj4_string, area_extent, 6400, 4800)
self.assertEqual(get_resolution_from_area(area_def), 'h')
assert get_resolution_from_area(area_def) == 'h'

0 comments on commit bc3b5cd

Please sign in to comment.