Skip to content

Commit

Permalink
Merge pull request #75 from tovrstra/npz-cleaning
Browse files Browse the repository at this point in the history
Small cleanups in `NDArraysRegressionFixture`
  • Loading branch information
tadeu committed Jan 4, 2022
2 parents 41f1d48 + fd0de21 commit ff069c4
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 40 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
------------------

* `#54 <https://github.com/ESSS/pytest-regressions/pull/54>`__: New ``--with-test-class-names`` command-line flag to consider test class names when composing the expected and obtained data filenames. Needed when the same module contains different classes with the same method names.
* `#72 <https://github.com/ESSS/pytest-regressions/pull/72>`__: New ``ndarrays_regression``, for comparing NumPy arrays with arbitrary shape.
* `#74 <https://github.com/ESSS/pytest-regressions/pull/74>`__: Fix ``empty string bug`` on dataframe regression.

2.2.0 (2020-01-27)
Expand Down
52 changes: 39 additions & 13 deletions src/pytest_regressions/ndarrays_regression.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import zipfile

from pytest_regressions.common import import_error_message
from pytest_regressions.common import perform_regression_check

Expand Down Expand Up @@ -52,9 +54,9 @@ def _check_data_types(self, key, obtained_array, expected_array):
# In case they are not, assume they are not comparable
error_msg = (
"Data types are not the same.\n"
f"key: {key}\n"
f"Obtained: {obtained_array.dtype}\n"
f"Expected: {expected_array.dtype}\n"
f" key: {key}\n"
f" Obtained: {obtained_array.dtype}\n"
f" Expected: {expected_array.dtype}\n"
)
raise AssertionError(error_msg)

Expand All @@ -68,9 +70,9 @@ def _check_data_shapes(self, key, obtained_array, expected_array):
if obtained_array.shape != expected_array.shape:
error_msg = (
"Shapes are not the same.\n"
f"Key: {key}\n"
f"Obtained: {obtained_array.shape}\n"
f"Expected: {expected_array.shape}\n"
f" Key: {key}\n"
f" Obtained: {obtained_array.shape}\n"
f" Expected: {expected_array.shape}\n"
)
raise AssertionError(error_msg)

Expand All @@ -89,8 +91,8 @@ def _check_fn(self, obtained_filename, expected_filename):
__tracebackhide__ = True

# Turn result of np.load into a dictionary, such that the files are closed immediately.
obtained_data = dict(np.load(str(obtained_filename)))
expected_data = dict(np.load(str(expected_filename)))
expected_data = self._load_fn(expected_filename)
obtained_data = self._load_fn(obtained_filename)

# Check mismatches in the keys.
if set(obtained_data) != set(expected_data):
Expand Down Expand Up @@ -224,11 +226,35 @@ def _check_fn(self, obtained_filename, expected_filename):
else "",
)
error_msg += "\n"

raise AssertionError(error_msg)

def _load_fn(self, filename):
"""
Load dict contents from the given filename.
:param str filename"
"""
try:
import numpy as np
except ModuleNotFoundError:
raise ModuleNotFoundError(import_error_message("NumPy"))

try:
# Open the file with a context manager manually, because np.load does not
# follow such good practices internally, causing avoidable error messages
# in the unit tests.
with open(filename, "rb") as f:
result = dict(np.load(f))
except (zipfile.BadZipFile, ValueError) as e:
raise IOError(
f"NPZ file {filename} could not be loaded. Corrupt file?"
) from e
return result

def _dump_fn(self, data_object, filename):
"""
Dump dict contents to the given filename
Dump dict contents to the given filename.
:param Dict[str, np.ndarray] data_object:
:param str filename:
Expand All @@ -237,6 +263,7 @@ def _dump_fn(self, data_object, filename):
import numpy as np
except ModuleNotFoundError:
raise ModuleNotFoundError(import_error_message("NumPy"))

np.savez_compressed(str(filename), **data_object)

def check(
Expand Down Expand Up @@ -302,10 +329,9 @@ def test_some_data(ndarrays_regression):
"Object with type '{}' was given.".format(str(type(data_dict)))
)
for key, array in data_dict.items():
assert isinstance(
key, str
), "The dictionary keys must be strings. " "Found key with type '%s'" % (
str(type(key))
assert isinstance(key, str), (
"The dictionary keys must be strings. "
"Found key with type '{}'".format(str(type(key)))
)
data_dict[key] = np.asarray(array)

Expand Down
95 changes: 68 additions & 27 deletions tests/test_ndarrays_regression.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import re

import numpy as np
Expand Down Expand Up @@ -291,11 +292,18 @@ def test_different_data_types(ndarrays_regression, no_regen):

# Run check with incompatible type.
data = {"data1": np.array([True] * 10)}
with pytest.raises(
AssertionError,
match="Data types are not the same.\nkey: data1\nObtained: bool\nExpected: int64\n",
):
with pytest.raises(AssertionError) as excinfo:
ndarrays_regression.check(data)
obtained_error_msg = str(excinfo.value)
expected = "\n".join(
[
"Data types are not the same.",
" key: data1",
" Obtained: bool",
" Expected: int64",
]
)
assert expected in obtained_error_msg


class Foo:
Expand All @@ -305,12 +313,16 @@ def __init__(self, bar):

def test_object_dtype(ndarrays_regression, no_regen):
data1 = {"data1": np.array([Foo(i) for i in range(4)], dtype=object)}
with pytest.raises(
TypeError,
match="Only numeric or unicode data is supported on ndarrays_regression fixture.\n"
"Array 'data1' with type 'object' was given.",
):
with pytest.raises(TypeError) as excinfo:
ndarrays_regression.check(data1)
obtained_error_msg = str(excinfo.value)
expected = "\n".join(
[
"Only numeric or unicode data is supported on ndarrays_regression fixture.",
"Array 'data1' with type 'object' was given.",
]
)
assert expected in obtained_error_msg


def test_integer_values_smoke_test(ndarrays_regression, no_regen):
Expand Down Expand Up @@ -400,20 +412,27 @@ def test_arrays_with_different_sizes_1d(ndarrays_regression, no_regen):

# Original NPY file contains 11 elements.
data = {"data1": np.ones(10, dtype=np.float64)}
expected = re.escape(
"Shapes are not the same.\nKey: data1\nObtained: (10,)\nExpected: (11,)\n"
)
with pytest.raises(AssertionError, match=expected):
with pytest.raises(AssertionError) as excinfo:
ndarrays_regression.check(data)
obtained_error_msg = str(excinfo.value)
expected = "\n".join(
[
"Shapes are not the same.",
" Key: data1",
" Obtained: (10,)",
" Expected: (11,)",
]
)
assert expected in obtained_error_msg


def test_arrays_of_same_shape(ndarrays_regression):
same_size_int_arrays = {
data = {
"2d": np.zeros((3, 4), dtype=int),
"3d": np.ones((7, 8, 9), dtype=float),
"4d": np.full((2, 1, 1, 4), 3, dtype=int),
}
ndarrays_regression.check(same_size_int_arrays)
ndarrays_regression.check(data)


def test_arrays_with_different_shapes(ndarrays_regression):
Expand All @@ -429,9 +448,9 @@ def test_arrays_with_different_shapes(ndarrays_regression):
expected = "\n".join(
[
"Shapes are not the same.",
"Key: 2d",
"Obtained: (3, 2)",
"Expected: (3, 4)",
" Key: 2d",
" Obtained: (3, 2)",
" Expected: (3, 4)",
]
)
assert expected in obtained_error_msg
Expand All @@ -450,9 +469,9 @@ def test_scalars(ndarrays_regression):
expected = "\n".join(
[
"Shapes are not the same.",
"Key: data1",
"Obtained: (1,)",
"Expected: ()",
" Key: data1",
" Obtained: (1,)",
" Expected: ()",
]
)
assert expected in obtained_error_msg
Expand Down Expand Up @@ -529,13 +548,19 @@ def test_string_array(ndarrays_regression):

def test_non_dict(ndarrays_regression):
data = np.ones(shape=(10, 10))
with pytest.raises(
TypeError,
match="Only dictionaries with NumPy arrays or array-like objects are supported "
"on ndarray_regression fixture.\nObject with type '%s' was given."
% (str(type(data)),),
):
with pytest.raises(TypeError) as excinfo:
ndarrays_regression.check(data)
obtained_error_msg = str(excinfo.value)
expected = "\n".join(
[
"Only dictionaries with NumPy arrays or array-like objects are supported "
"on ndarray_regression fixture.",
"Object with type '{}' was given.".format(
str(type(data)),
),
]
)
assert expected in obtained_error_msg


def test_structured_array(ndarrays_regression):
Expand Down Expand Up @@ -597,3 +622,19 @@ def test_missing_obtained(ndarrays_regression):
]
)
assert expected in obtained_error_msg


@pytest.mark.parametrize("prefix", [True, False])
def test_corrupt_npz(ndarrays_regression, tmpdir, prefix):
data = {"data1": np.array([4, 5])}
fn_npz = os.path.join(tmpdir, "corrupt.npz")
# Write random bytes to a file
with open(fn_npz, "wb") as f:
if prefix:
f.write(b"PK\x03\x04")
np.random.randint(0, 256, 1000, dtype=np.ubyte).tofile(f)
with pytest.raises(IOError) as excinfo:
ndarrays_regression.check(data, fullpath=fn_npz)
obtained_error_msg = str(excinfo.value)
expected = f"NPZ file {fn_npz} could not be loaded. Corrupt file?"
assert expected in obtained_error_msg

0 comments on commit ff069c4

Please sign in to comment.