Merge pull request #75 from tovrstra/npz-cleaning

Small cleanups in `NDArraysRegressionFixture`
ESSS · Jan 4, 2022 · ff069c4 · ff069c4
2 parents 41f1d48 + fd0de21
commit ff069c4
Show file tree

Hide file tree

Showing 3 changed files with 108 additions and 40 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -2,6 +2,7 @@
 ------------------
 
 * `#54 <https://github.com/ESSS/pytest-regressions/pull/54>`__: New ``--with-test-class-names`` command-line flag to consider test class names when composing the expected and obtained data filenames. Needed when the same module contains different classes with the same method names.
+* `#72 <https://github.com/ESSS/pytest-regressions/pull/72>`__: New ``ndarrays_regression``, for comparing NumPy arrays with arbitrary shape.
 * `#74 <https://github.com/ESSS/pytest-regressions/pull/74>`__: Fix ``empty string bug`` on dataframe regression.
 
 2.2.0 (2020-01-27)

diff --git a/src/pytest_regressions/ndarrays_regression.py b/src/pytest_regressions/ndarrays_regression.py
@@ -1,3 +1,5 @@
+import zipfile
+
 from pytest_regressions.common import import_error_message
 from pytest_regressions.common import perform_regression_check
 
@@ -52,9 +54,9 @@ def _check_data_types(self, key, obtained_array, expected_array):
             # In case they are not, assume they are not comparable
             error_msg = (
                 "Data types are not the same.\n"
-                f"key: {key}\n"
-                f"Obtained: {obtained_array.dtype}\n"
-                f"Expected: {expected_array.dtype}\n"
+                f"  key: {key}\n"
+                f"  Obtained: {obtained_array.dtype}\n"
+                f"  Expected: {expected_array.dtype}\n"
             )
             raise AssertionError(error_msg)
 
@@ -68,9 +70,9 @@ def _check_data_shapes(self, key, obtained_array, expected_array):
         if obtained_array.shape != expected_array.shape:
             error_msg = (
                 "Shapes are not the same.\n"
-                f"Key: {key}\n"
-                f"Obtained: {obtained_array.shape}\n"
-                f"Expected: {expected_array.shape}\n"
+                f"  Key: {key}\n"
+                f"  Obtained: {obtained_array.shape}\n"
+                f"  Expected: {expected_array.shape}\n"
             )
             raise AssertionError(error_msg)
 
@@ -89,8 +91,8 @@ def _check_fn(self, obtained_filename, expected_filename):
         __tracebackhide__ = True
 
         # Turn result of np.load into a dictionary, such that the files are closed immediately.
-        obtained_data = dict(np.load(str(obtained_filename)))
-        expected_data = dict(np.load(str(expected_filename)))
+        expected_data = self._load_fn(expected_filename)
+        obtained_data = self._load_fn(obtained_filename)
 
         # Check mismatches in the keys.
         if set(obtained_data) != set(expected_data):
@@ -224,11 +226,35 @@ def _check_fn(self, obtained_filename, expected_filename):
                         else "",
                     )
                 error_msg += "\n"
+
             raise AssertionError(error_msg)
 
+    def _load_fn(self, filename):
+        """
+        Load dict contents from the given filename.
+
+        :param str filename"
+        """
+        try:
+            import numpy as np
+        except ModuleNotFoundError:
+            raise ModuleNotFoundError(import_error_message("NumPy"))
+
+        try:
+            # Open the file with a context manager manually, because np.load does not
+            # follow such good practices internally, causing avoidable error messages
+            # in the unit tests.
+            with open(filename, "rb") as f:
+                result = dict(np.load(f))
+        except (zipfile.BadZipFile, ValueError) as e:
+            raise IOError(
+                f"NPZ file {filename} could not be loaded. Corrupt file?"
+            ) from e
+        return result
+
     def _dump_fn(self, data_object, filename):
         """
-        Dump dict contents to the given filename
+        Dump dict contents to the given filename.
 
         :param Dict[str, np.ndarray] data_object:
         :param str filename:
@@ -237,6 +263,7 @@ def _dump_fn(self, data_object, filename):
             import numpy as np
         except ModuleNotFoundError:
             raise ModuleNotFoundError(import_error_message("NumPy"))
+
         np.savez_compressed(str(filename), **data_object)
 
     def check(
@@ -302,10 +329,9 @@ def test_some_data(ndarrays_regression):
                 "Object with type '{}' was given.".format(str(type(data_dict)))
             )
         for key, array in data_dict.items():
-            assert isinstance(
-                key, str
-            ), "The dictionary keys must be strings. " "Found key with type '%s'" % (
-                str(type(key))
+            assert isinstance(key, str), (
+                "The dictionary keys must be strings. "
+                "Found key with type '{}'".format(str(type(key)))
             )
             data_dict[key] = np.asarray(array)
 

diff --git a/tests/test_ndarrays_regression.py b/tests/test_ndarrays_regression.py
@@ -1,3 +1,4 @@
+import os
 import re
 
 import numpy as np
@@ -291,11 +292,18 @@ def test_different_data_types(ndarrays_regression, no_regen):
 
     # Run check with incompatible type.
     data = {"data1": np.array([True] * 10)}
-    with pytest.raises(
-        AssertionError,
-        match="Data types are not the same.\nkey: data1\nObtained: bool\nExpected: int64\n",
-    ):
+    with pytest.raises(AssertionError) as excinfo:
         ndarrays_regression.check(data)
+    obtained_error_msg = str(excinfo.value)
+    expected = "\n".join(
+        [
+            "Data types are not the same.",
+            "  key: data1",
+            "  Obtained: bool",
+            "  Expected: int64",
+        ]
+    )
+    assert expected in obtained_error_msg
 
 
 class Foo:
@@ -305,12 +313,16 @@ def __init__(self, bar):
 
 def test_object_dtype(ndarrays_regression, no_regen):
     data1 = {"data1": np.array([Foo(i) for i in range(4)], dtype=object)}
-    with pytest.raises(
-        TypeError,
-        match="Only numeric or unicode data is supported on ndarrays_regression fixture.\n"
-        "Array 'data1' with type 'object' was given.",
-    ):
+    with pytest.raises(TypeError) as excinfo:
         ndarrays_regression.check(data1)
+    obtained_error_msg = str(excinfo.value)
+    expected = "\n".join(
+        [
+            "Only numeric or unicode data is supported on ndarrays_regression fixture.",
+            "Array 'data1' with type 'object' was given.",
+        ]
+    )
+    assert expected in obtained_error_msg
 
 
 def test_integer_values_smoke_test(ndarrays_regression, no_regen):
@@ -400,20 +412,27 @@ def test_arrays_with_different_sizes_1d(ndarrays_regression, no_regen):
 
     # Original NPY file contains 11 elements.
     data = {"data1": np.ones(10, dtype=np.float64)}
-    expected = re.escape(
-        "Shapes are not the same.\nKey: data1\nObtained: (10,)\nExpected: (11,)\n"
-    )
-    with pytest.raises(AssertionError, match=expected):
+    with pytest.raises(AssertionError) as excinfo:
         ndarrays_regression.check(data)
+    obtained_error_msg = str(excinfo.value)
+    expected = "\n".join(
+        [
+            "Shapes are not the same.",
+            "  Key: data1",
+            "  Obtained: (10,)",
+            "  Expected: (11,)",
+        ]
+    )
+    assert expected in obtained_error_msg
 
 
 def test_arrays_of_same_shape(ndarrays_regression):
-    same_size_int_arrays = {
+    data = {
         "2d": np.zeros((3, 4), dtype=int),
         "3d": np.ones((7, 8, 9), dtype=float),
         "4d": np.full((2, 1, 1, 4), 3, dtype=int),
     }
-    ndarrays_regression.check(same_size_int_arrays)
+    ndarrays_regression.check(data)
 
 
 def test_arrays_with_different_shapes(ndarrays_regression):
@@ -429,9 +448,9 @@ def test_arrays_with_different_shapes(ndarrays_regression):
     expected = "\n".join(
         [
             "Shapes are not the same.",
-            "Key: 2d",
-            "Obtained: (3, 2)",
-            "Expected: (3, 4)",
+            "  Key: 2d",
+            "  Obtained: (3, 2)",
+            "  Expected: (3, 4)",
         ]
     )
     assert expected in obtained_error_msg
@@ -450,9 +469,9 @@ def test_scalars(ndarrays_regression):
     expected = "\n".join(
         [
             "Shapes are not the same.",
-            "Key: data1",
-            "Obtained: (1,)",
-            "Expected: ()",
+            "  Key: data1",
+            "  Obtained: (1,)",
+            "  Expected: ()",
         ]
     )
     assert expected in obtained_error_msg
@@ -529,13 +548,19 @@ def test_string_array(ndarrays_regression):
 
 def test_non_dict(ndarrays_regression):
     data = np.ones(shape=(10, 10))
-    with pytest.raises(
-        TypeError,
-        match="Only dictionaries with NumPy arrays or array-like objects are supported "
-        "on ndarray_regression fixture.\nObject with type '%s' was given."
-        % (str(type(data)),),
-    ):
+    with pytest.raises(TypeError) as excinfo:
         ndarrays_regression.check(data)
+    obtained_error_msg = str(excinfo.value)
+    expected = "\n".join(
+        [
+            "Only dictionaries with NumPy arrays or array-like objects are supported "
+            "on ndarray_regression fixture.",
+            "Object with type '{}' was given.".format(
+                str(type(data)),
+            ),
+        ]
+    )
+    assert expected in obtained_error_msg
 
 
 def test_structured_array(ndarrays_regression):
@@ -597,3 +622,19 @@ def test_missing_obtained(ndarrays_regression):
         ]
     )
     assert expected in obtained_error_msg
+
+
+@pytest.mark.parametrize("prefix", [True, False])
+def test_corrupt_npz(ndarrays_regression, tmpdir, prefix):
+    data = {"data1": np.array([4, 5])}
+    fn_npz = os.path.join(tmpdir, "corrupt.npz")
+    # Write random bytes to a file
+    with open(fn_npz, "wb") as f:
+        if prefix:
+            f.write(b"PK\x03\x04")
+        np.random.randint(0, 256, 1000, dtype=np.ubyte).tofile(f)
+    with pytest.raises(IOError) as excinfo:
+        ndarrays_regression.check(data, fullpath=fn_npz)
+    obtained_error_msg = str(excinfo.value)
+    expected = f"NPZ file {fn_npz} could not be loaded. Corrupt file?"
+    assert expected in obtained_error_msg