Merge pull request #194 from astrofrog/vector-comparison

Added support for EPS, PDF, and SVG image comparison
matplotlib · Apr 1, 2023 · df3a8fe · df3a8fe
2 parents dfe07f9 + 70b5d55
commit df3a8fe
Show file tree

Hide file tree

Showing 15 changed files with 1,442 additions and 34 deletions.
diff --git a/.github/workflows/test_and_publish.yml b/.github/workflows/test_and_publish.yml
@@ -17,6 +17,10 @@ jobs:
   test:
     uses: OpenAstronomy/github-actions-workflows/.github/workflows/tox.yml@v1
     with:
+      libraries: |
+        apt:
+          - ghostscript
+          - inkscape
       envs: |
         # Test the oldest and newest configuration on Mac and Windows
         - macos: py36-test-mpl20

diff --git a/README.rst b/README.rst
@@ -90,7 +90,6 @@ When generating a hash library, the tests will also be run as usual against the
 existing hash library specified by ``--mpl-hash-library`` or the keyword argument.
 However, generating baseline images will always result in the tests being skipped.
 
-
 Hybrid Mode: Hashes and Images
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -278,6 +277,81 @@ decorator:
 This will make the test insensitive to changes in e.g. the freetype
 library.
 
+Supported formats and deterministic output
+------------------------------------------
+
+By default, pytest-mpl will save and compare figures in PNG format. However,
+it is possible to set the format to use by setting e.g. ``savefig_kwargs={'format': 'pdf'}``
+in ``mpl_image_compare``. Supported formats are ``'eps'``, ``'pdf'``, ``'png'``, and ``'svg'``.
+Note that Ghostscript is required to be installed for comparing PDF and EPS figures, while
+Inkscape is required for SVG comparison.
+
+By default, Matplotlib does not produce deterministic output that will have a
+consistent hash every time it is run, or over different Matplotlib versions. In
+order to enforce that the output is deterministic, you will need to set metadata
+as described in the following subsections.
+
+PNG
+^^^
+
+For PNG files, the output can be made deterministic by setting:
+
+.. code:: python
+
+    @pytest.mark.mpl_image_compare(savefig_kwargs={'metadata': {"Software": None}})
+
+PDF
+^^^
+
+For PDF files, the output can be made deterministic by setting:
+
+.. code:: python
+
+    @pytest.mark.mpl_image_compare(savefig_kwargs={'format': 'pdf',
+                                                   'metadata': {"Creator": None,
+                                                                "Producer": None,
+                                                                "CreationDate": None}})
+
+Note that deterministic PDF output can only be achieved with Matplotlib 2.1 and above
+
+EPS
+^^^
+
+For PDF files, the output can be made deterministic by setting:
+
+.. code:: python
+
+    @pytest.mark.mpl_image_compare(savefig_kwargs={'format': 'pdf',
+                                                   'metadata': {"Creator": "test"})
+
+and in addition you will need to set the SOURCE_DATE_EPOCH environment variable to
+a constant value (this is a unit timestamp):
+
+.. code:: python
+
+    os.environ['SOURCE_DATE_EPOCH'] = '1680254601'
+
+You could do this inside the test.
+
+Note that deterministic PDF output can only be achieved with Matplotlib 2.1 and above
+
+SVG
+^^^
+
+For SVG files, the output can be made deterministic by setting:
+
+.. code:: python
+
+    @pytest.mark.mpl_image_compare(savefig_kwargs={'metadata': '{"Date": None}})
+
+and in addition, you should make sure the following rcParam is set to a constant string:
+
+.. code:: python
+
+    plt.rcParams['svg.hashsalt'] = 'test'
+
+Note that SVG files can only be used in pytest-mpl with Matplotlib 3.3 and above.
+
 Test failure example
 --------------------
 

diff --git a/pytest_mpl/plugin.py b/pytest_mpl/plugin.py
@@ -52,6 +52,12 @@
   Actual shape: {actual_shape}
     {actual_path}"""
 
+# The following are the subsets of formats supported by the Matplotlib image
+# comparison machinery
+RASTER_IMAGE_FORMATS = ['png']
+VECTOR_IMAGE_FORMATS = ['eps', 'pdf', 'svg']
+ALL_IMAGE_FORMATS = RASTER_IMAGE_FORMATS + VECTOR_IMAGE_FORMATS
+
 
 def _hash_file(in_stream):
     """
@@ -70,8 +76,8 @@ def pathify(path):
     """
     path = Path(path)
     ext = ''
-    if path.suffixes[-1] == '.png':
-        ext = '.png'
+    if path.suffixes[-1][1:] in ALL_IMAGE_FORMATS:
+        ext = path.suffixes[-1]
         path = str(path).split(ext)[0]
     path = str(path)
     path = path.replace('[', '_').replace(']', '_')
@@ -315,18 +321,24 @@ def __init__(self,
         self.logger.setLevel(level)
         self.logger.addHandler(handler)
 
+    def _file_extension(self, item):
+        compare = get_compare(item)
+        savefig_kwargs = compare.kwargs.get('savefig_kwargs', {})
+        return savefig_kwargs.get('format', 'png')
+
     def generate_filename(self, item):
         """
         Given a pytest item, generate the figure filename.
         """
+        ext = self._file_extension(item)
         if self.config.getini('mpl-use-full-test-name'):
-            filename = generate_test_name(item) + '.png'
+            filename = generate_test_name(item) + f'.{ext}'
         else:
             compare = get_compare(item)
             # Find test name to use as plot name
             filename = compare.kwargs.get('filename', None)
             if filename is None:
-                filename = item.name + '.png'
+                filename = item.name + f'.{ext}'
 
         filename = str(pathify(filename))
         return filename
@@ -441,10 +453,10 @@ def generate_image_hash(self, item, fig):
         compare = get_compare(item)
         savefig_kwargs = compare.kwargs.get('savefig_kwargs', {})
 
-        imgdata = io.BytesIO()
+        ext = self._file_extension(item)
 
+        imgdata = io.BytesIO()
         fig.savefig(imgdata, **savefig_kwargs)
-
         out = _hash_file(imgdata)
         imgdata.close()
 
@@ -465,11 +477,17 @@ def compare_image_to_baseline(self, item, fig, result_dir, summary=None):
         tolerance = compare.kwargs.get('tolerance', 2)
         savefig_kwargs = compare.kwargs.get('savefig_kwargs', {})
 
+        ext = self._file_extension(item)
+
         baseline_image_ref = self.obtain_baseline_image(item, result_dir)
 
-        test_image = (result_dir / "result.png").absolute()
+        test_image = (result_dir / f"result.{ext}").absolute()
         fig.savefig(str(test_image), **savefig_kwargs)
-        summary['result_image'] = test_image.relative_to(self.results_dir).as_posix()
+
+        if ext in ['png', 'svg']:  # Use original file
+            summary['result_image'] = test_image.relative_to(self.results_dir).as_posix()
+        else:
+            summary['result_image'] = (result_dir / f"result_{ext}.png").relative_to(self.results_dir).as_posix()
 
         if not os.path.exists(baseline_image_ref):
             summary['status'] = 'failed'
@@ -484,26 +502,33 @@ def compare_image_to_baseline(self, item, fig, result_dir, summary=None):
 
         # setuptools may put the baseline images in non-accessible places,
         # copy to our tmpdir to be sure to keep them in case of failure
-        baseline_image = (result_dir / "baseline.png").absolute()
+        baseline_image = (result_dir / f"baseline.{ext}").absolute()
         shutil.copyfile(baseline_image_ref, baseline_image)
-        summary['baseline_image'] = baseline_image.relative_to(self.results_dir).as_posix()
+
+        if ext in ['png', 'svg']:  # Use original file
+            summary['baseline_image'] = baseline_image.relative_to(self.results_dir).as_posix()
+        else:
+            summary['baseline_image'] = (result_dir / f"baseline_{ext}.png").relative_to(self.results_dir).as_posix()
 
         # Compare image size ourselves since the Matplotlib
         # exception is a bit cryptic in this case and doesn't show
-        # the filenames
-        expected_shape = imread(str(baseline_image)).shape[:2]
-        actual_shape = imread(str(test_image)).shape[:2]
-        if expected_shape != actual_shape:
-            summary['status'] = 'failed'
-            summary['image_status'] = 'diff'
-            error_message = SHAPE_MISMATCH_ERROR.format(expected_path=baseline_image,
-                                                        expected_shape=expected_shape,
-                                                        actual_path=test_image,
-                                                        actual_shape=actual_shape)
-            summary['status_msg'] = error_message
-            return error_message
+        # the filenames. However imread won't work for vector graphics so we
+        # only do this for raster files.
+        if ext in RASTER_IMAGE_FORMATS:
+            expected_shape = imread(str(baseline_image)).shape[:2]
+            actual_shape = imread(str(test_image)).shape[:2]
+            if expected_shape != actual_shape:
+                summary['status'] = 'failed'
+                summary['image_status'] = 'diff'
+                error_message = SHAPE_MISMATCH_ERROR.format(expected_path=baseline_image,
+                                                            expected_shape=expected_shape,
+                                                            actual_path=test_image,
+                                                            actual_shape=actual_shape)
+                summary['status_msg'] = error_message
+                return error_message
 
         results = compare_images(str(baseline_image), str(test_image), tol=tolerance, in_decorator=True)
+
         summary['tolerance'] = tolerance
         if results is None:
             summary['status'] = 'passed'
@@ -514,8 +539,7 @@ def compare_image_to_baseline(self, item, fig, result_dir, summary=None):
             summary['status'] = 'failed'
             summary['image_status'] = 'diff'
             summary['rms'] = results['rms']
-            diff_image = (result_dir / 'result-failed-diff.png').absolute()
-            summary['diff_image'] = diff_image.relative_to(self.results_dir).as_posix()
+            summary['diff_image'] = Path(results['diff']).relative_to(self.results_dir).as_posix()
             template = ['Error: Image files did not match.',
                         'RMS Value: {rms}',
                         'Expected:  \n    {expected}',
@@ -538,6 +562,8 @@ def compare_image_to_hash_library(self, item, fig, result_dir, summary=None):
         compare = get_compare(item)
         savefig_kwargs = compare.kwargs.get('savefig_kwargs', {})
 
+        ext = self._file_extension(item)
+
         if not self.results_hash_library_name:
             # Use hash library name of current test as results hash library name
             self.results_hash_library_name = Path(compare.kwargs.get("hash_library", "")).name
@@ -574,7 +600,7 @@ def compare_image_to_hash_library(self, item, fig, result_dir, summary=None):
                                      f"{hash_library_filename} for test {hash_name}.")
 
         # Save the figure for later summary (will be removed later if not needed)
-        test_image = (result_dir / "result.png").absolute()
+        test_image = (result_dir / f"result.{ext}").absolute()
         fig.savefig(str(test_image), **savefig_kwargs)
         summary['result_image'] = test_image.relative_to(self.results_dir).as_posix()
 
@@ -627,6 +653,8 @@ def pytest_runtest_call(self, item):  # noqa
         remove_text = compare.kwargs.get('remove_text', False)
         backend = compare.kwargs.get('backend', 'agg')
 
+        ext = self._file_extension(item)
+
         with plt.style.context(style, after_reset=True), switch_backend(backend):
 
             # Run test and get figure object
@@ -665,7 +693,7 @@ def pytest_runtest_call(self, item):  # noqa
                 summary['status_msg'] = 'Skipped test, since generating image.'
                 generate_image = self.generate_baseline_image(item, fig)
                 if self.results_always:  # Make baseline image available in HTML
-                    result_image = (result_dir / "baseline.png").absolute()
+                    result_image = (result_dir / f"baseline.{ext}").absolute()
                     shutil.copy(generate_image, result_image)
                     summary['baseline_image'] = \
                         result_image.relative_to(self.results_dir).as_posix()