From 62e86ed3149574b87eeb0e7ef1f0799d8e5ee56e Mon Sep 17 00:00:00 2001 From: Toon Verstraelen Date: Sat, 4 Sep 2021 00:00:13 +0200 Subject: [PATCH 1/4] NDArraysRegressionFixture: regression on arrays with arbitrary shape. Fixes #71 This is a first attempt, including unit tests. Also some minor issues in DataFrameRegressionFixture were fixed, which was used as a starting point for this PR. The new fixture only uses NumPy, so also the error message makes no use for pandas for formatting the errors. --- .../dataframe_regression.py | 10 +- src/pytest_regressions/ndarrays_regression.py | 290 +++++++++++++++++ src/pytest_regressions/plugin.py | 31 +- tests/test_dataframe_regression.py | 6 +- tests/test_ndarrays_regression.py | 301 ++++++++++++++++++ .../test_arrays_of_same_shape.npz | Bin 0 -> 603 bytes .../test_arrays_of_same_size_1d.npz | Bin 0 -> 392 bytes .../test_arrays_with_different_shapes.npz | Bin 0 -> 203 bytes .../test_arrays_with_different_sizes_1d.npz | Bin 0 -> 212 bytes .../test_bool_array.npz | Bin 0 -> 207 bytes .../test_common_case.npz | Bin 0 -> 580 bytes .../test_common_case_nd.npz | Bin 0 -> 455 bytes .../test_different_data_types.npz | Bin 0 -> 211 bytes .../test_integer_values_smoke_test.npz | Bin 0 -> 211 bytes .../test_non_numeric_data_array0_.npz | Bin 0 -> 226 bytes .../test_number_formats.npz | Bin 0 -> 223 bytes .../test_string_array.npz | Bin 0 -> 250 bytes 17 files changed, 629 insertions(+), 9 deletions(-) create mode 100644 src/pytest_regressions/ndarrays_regression.py create mode 100644 tests/test_ndarrays_regression.py create mode 100644 tests/test_ndarrays_regression/test_arrays_of_same_shape.npz create mode 100644 tests/test_ndarrays_regression/test_arrays_of_same_size_1d.npz create mode 100644 tests/test_ndarrays_regression/test_arrays_with_different_shapes.npz create mode 100644 tests/test_ndarrays_regression/test_arrays_with_different_sizes_1d.npz create mode 100644 tests/test_ndarrays_regression/test_bool_array.npz create mode 100644 tests/test_ndarrays_regression/test_common_case.npz create mode 100644 tests/test_ndarrays_regression/test_common_case_nd.npz create mode 100644 tests/test_ndarrays_regression/test_different_data_types.npz create mode 100644 tests/test_ndarrays_regression/test_integer_values_smoke_test.npz create mode 100644 tests/test_ndarrays_regression/test_non_numeric_data_array0_.npz create mode 100644 tests/test_ndarrays_regression/test_number_formats.npz create mode 100644 tests/test_ndarrays_regression/test_string_array.npz diff --git a/src/pytest_regressions/dataframe_regression.py b/src/pytest_regressions/dataframe_regression.py index 37fde1d..a6d4e5b 100644 --- a/src/pytest_regressions/dataframe_regression.py +++ b/src/pytest_regressions/dataframe_regression.py @@ -42,7 +42,7 @@ def _check_data_types(self, key, obtained_column, expected_column): try: import numpy as np except ModuleNotFoundError: - raise ModuleNotFoundError(import_error_message("Numpy")) + raise ModuleNotFoundError(import_error_message("NumPy")) __tracebackhide__ = True obtained_data_type = obtained_column.values.dtype @@ -89,7 +89,7 @@ def _check_fn(self, obtained_filename, expected_filename): try: import numpy as np except ModuleNotFoundError: - raise ModuleNotFoundError(import_error_message("Numpy")) + raise ModuleNotFoundError(import_error_message("NumPy")) try: import pandas as pd except ModuleNotFoundError: @@ -199,7 +199,7 @@ def check( will ignore embed_data completely, being useful if a reference file is located in the session data dir for example. - :param dict tolerances: dict mapping keys from the data_dict to tolerance settings for the + :param dict tolerances: dict mapping keys from the data_frame to tolerance settings for the given data. Example:: tolerances={'U': Tolerance(atol=1e-2)} @@ -223,7 +223,7 @@ def check( __tracebackhide__ = True assert type(data_frame) is pd.DataFrame, ( - "Only pandas DataFrames are supported on on dataframe_regression fixture.\n" + "Only pandas DataFrames are supported on dataframe_regression fixture.\n" "Object with type '%s' was given." % (str(type(data_frame)),) ) @@ -235,7 +235,7 @@ def check( # Rejected: timedelta, datetime, objects, zero-terminated bytes, unicode strings and raw data assert array.dtype not in ["m", "M", "O", "S", "a", "U", "V"], ( "Only numeric data is supported on dataframe_regression fixture.\n" - "Array with type '%s' was given.\n" % (str(array.dtype),) + "Array with type '%s' was given." % (str(array.dtype),) ) if tolerances is None: diff --git a/src/pytest_regressions/ndarrays_regression.py b/src/pytest_regressions/ndarrays_regression.py new file mode 100644 index 0000000..433f73d --- /dev/null +++ b/src/pytest_regressions/ndarrays_regression.py @@ -0,0 +1,290 @@ +from pytest_regressions.common import perform_regression_check, import_error_message + + +class NDArraysRegressionFixture: + """ + NumPy NPZ regression fixture implementation used on ndarrays_regression fixture. + """ + + THRESHOLD = 100 + ROWFORMAT = "{:>10s} {:>20s} {:>20s} {:>20s}\n" + + def __init__(self, datadir, original_datadir, request): + """ + :type datadir: Path + :type original_datadir: Path + :type request: FixtureRequest + """ + self._tolerances_dict = {} + self._default_tolerance = {} + + self.request = request + self.datadir = datadir + self.original_datadir = original_datadir + self._force_regen = False + self._with_test_class_names = False + + def _check_data_types(self, key, obtained_array, expected_array): + """ + Check if data type of obtained and expected arrays are the same. Fail if not. + Helper method used in _check_fn method. + """ + try: + import numpy as np + except ModuleNotFoundError: + raise ModuleNotFoundError(import_error_message("NumPy")) + + __tracebackhide__ = True + + obtained_data_type = obtained_array.dtype + expected_data_type = expected_array.dtype + if obtained_data_type != expected_data_type: + # Check if both data types are comparable as numbers (float, int, short, bytes, etc...) + if np.issubdtype(obtained_data_type, np.number) and np.issubdtype( + expected_data_type, np.number + ): + return + + # In case they are not, assume they are not comparable + error_msg = ( + "Data types are not the same.\n" + "key: %s\n" + "Obtained: %s\n" + "Expected: %s\n" % (key, obtained_data_type, expected_data_type) + ) + raise AssertionError(error_msg) + + def _check_data_shapes(self, key, obtained_array, expected_array): + """ + Check if obtained and expected arrays have the same size. + Helper method used in _check_fn method. + """ + __tracebackhide__ = True + + obtained_data_shape = obtained_array.shape + expected_data_shape = expected_array.shape + if obtained_data_shape != expected_data_shape: + error_msg = ( + "Shapes are not the same.\n" + "Key: %s\n" + "Obtained: %s\n" + "Expected: %s\n" % (key, obtained_data_shape, expected_data_shape) + ) + raise AssertionError(error_msg) + + def _check_fn(self, obtained_filename, expected_filename): + """ + Check if dict contents dumped to a file match the contents in expected file. + + :param str obtained_filename: + :param str expected_filename: + """ + try: + import numpy as np + except ModuleNotFoundError: + raise ModuleNotFoundError(import_error_message("NumPy")) + + __tracebackhide__ = True + + # Turn result of np.load into a dictionary, such that the files are closed immediately. + obtained_data = dict(np.load(str(obtained_filename))) + expected_data = dict(np.load(str(expected_filename))) + + comparison_tables_dict = {} + for k in obtained_data.keys(): + obtained_array = obtained_data[k] + expected_array = expected_data.get(k) + + if expected_array is None: + error_msg = f"Could not find key '{k}' in the expected results.\n" + error_msg += "Keys in the obtained data table: [" + for k in obtained_data.keys(): + error_msg += f"'{k}', " + error_msg += "]\n" + error_msg += "Keys in the expected data table: [" + for k in expected_data.keys(): + error_msg += f"'{k}', " + error_msg += "]\n" + error_msg += "To update values, use --force-regen option.\n\n" + raise AssertionError(error_msg) + + tolerance_args = self._tolerances_dict.get(k, self._default_tolerance) + + self._check_data_types(k, obtained_array, expected_array) + self._check_data_shapes(k, obtained_array, expected_array) + + data_type = obtained_array.dtype + if data_type in [float, np.float16, np.float32, np.float64]: + not_close_mask = ~np.isclose( + obtained_array, + expected_array, + equal_nan=True, + **tolerance_args, + ) + else: + not_close_mask = obtained_array != expected_array + + if np.any(not_close_mask): + diff_ids = np.nonzero(not_close_mask) + comparison_tables_dict[k] = ( + np.array(diff_ids).T, + obtained_array[diff_ids], + expected_array[diff_ids], + ) + + if len(comparison_tables_dict) > 0: + error_msg = "Values are not sufficiently close.\n" + error_msg += "To update values, use --force-regen option.\n\n" + for k, ( + diff_ids, + obtained_array, + expected_array, + ) in comparison_tables_dict.items(): + if len(diff_ids) > self.THRESHOLD: + error_msg += f"Only showing first {self.THRESHOLD} mismatches.\n" + diff_ids = diff_ids[: self.THRESHOLD] + obtained_array = obtained_array[: self.THRESHOLD] + expected_array = expected_array[: self.THRESHOLD] + error_msg += f"{k}:\n" + error_msg += self.ROWFORMAT.format( + "Index", + "Obtained", + "Expected", + "Difference", + ) + for diff_id, obtained, expected in zip( + diff_ids, obtained_array, expected_array + ): + error_msg += self.ROWFORMAT.format( + ",".join(str(i) for i in diff_id), + str(obtained), + str(expected), + str(obtained - expected) + if isinstance(obtained, np.number) + else "", + ) + error_msg += "\n\n" + raise AssertionError(error_msg) + + def _dump_fn(self, data_object, filename): + """ + Dump dict contents to the given filename + + :param Dict[str, np.ndarray] data_object: + :param str filename: + """ + try: + import numpy as np + except ModuleNotFoundError: + raise ModuleNotFoundError(import_error_message("NumPy")) + np.savez_compressed(str(filename), **data_object) + + def check( + self, + data_dict, + basename=None, + fullpath=None, + tolerances=None, + default_tolerance=None, + ): + """ + Checks a dictionary of NumPy ndarrays, containing only numeric data, against a previously recorded version, or generate a new file. + + Example:: + + def test_some_data(ndarrays_regression): + points, values = some_function() + ndarrays_regression.check( + { + 'points': points, # array with shape (100, 3) + 'values': values, # array with shape (100,) + }, + default_tolerance=dict(atol=1e-8, rtol=1e-8) + ) + + :param Dict[str, numpy.ndarray] data_dict: dictionary of NumPy ndarrays containing + data for regression check. The arrays can have any shape. + + :param str basename: basename of the file to test/record. If not given the name + of the test is used. + + :param str fullpath: complete path to use as a reference file. This option + will ignore embed_data completely, being useful if a reference file is located + in the session data dir for example. + + :param dict tolerances: dict mapping keys from the data_dict to tolerance settings + for the given data. Example:: + + tolerances={'U': Tolerance(atol=1e-2)} + + :param dict default_tolerance: dict mapping the default tolerance for the current + check call. Example:: + + default_tolerance=dict(atol=1e-7, rtol=1e-18). + + If not provided, will use defaults from numpy's ``isclose`` function. + + ``basename`` and ``fullpath`` are exclusive. + """ + try: + import numpy as np + except ModuleNotFoundError: + raise ModuleNotFoundError(import_error_message("NumPy")) + + import functools + + __tracebackhide__ = True + + assert isinstance(data_dict, dict), ( + "Only dictionaries with NumPy arrays or array-like objects are " + "supported on ndarray_regression fixture.\n" + "Object with type '%s' was given. " % (str(type(data_dict)),) + ) + for key, array in data_dict.items(): + assert isinstance( + key, str + ), "The dictionary keys must be strings. " "Found key with type '%s'" % ( + str(type(key)) + ) + data_dict[key] = np.asarray(array) + + for key, array in data_dict.items(): + # Skip assertion if an array of strings + if (array.dtype == "O") and (type(array[0]) is str): + continue + # Rejected: timedelta, datetime, objects, zero-terminated bytes, unicode strings and raw data + assert array.dtype not in [ + "m", + "M", + "O", + "S", + "a", + "U", + "V", + ], "Only numeric data is supported on ndarrays_regression fixture.\n" "Array '%s' with type '%s' was given.\n" % ( + key, + str(array.dtype), + ) + + if tolerances is None: + tolerances = {} + self._tolerances_dict = tolerances + + if default_tolerance is None: + default_tolerance = {} + self._default_tolerance = default_tolerance + + dump_fn = functools.partial(self._dump_fn, data_dict) + + perform_regression_check( + datadir=self.datadir, + original_datadir=self.original_datadir, + request=self.request, + check_fn=self._check_fn, + dump_fn=dump_fn, + extension=".npz", + basename=basename, + fullpath=fullpath, + force_regen=self._force_regen, + with_test_class_names=self._with_test_class_names, + ) diff --git a/src/pytest_regressions/plugin.py b/src/pytest_regressions/plugin.py index db136b6..3c5d828 100644 --- a/src/pytest_regressions/plugin.py +++ b/src/pytest_regressions/plugin.py @@ -51,7 +51,7 @@ def dataframe_regression(datadir, original_datadir, request): """ Example usage: - def testSomeData(num_regression): + def testSomeData(dataframe_regression): dataframe_regression.check( pandas.DataFrame.from_dict( { @@ -75,6 +75,35 @@ def testSomeData(num_regression): return DataFrameRegressionFixture(datadir, original_datadir, request) +@pytest.fixture +def ndarrays_regression(datadir, original_datadir, request): + """ + Similar to num_regression, but supports numpy arrays with arbitrary shape. The + dictionary is stored as an NPZ file. The values of the dictionary must be accepted + by ``np.asarray``. + + Example:: + + def test_some_data(ndarrays_regression): + points, values = some_function() + ndarrays_regression.check( + { + 'points': points, # array with shape (100, 3) + 'values': values, # array with shape (100,) + }, + default_tolerance=dict(atol=1e-8, rtol=1e-8) + ) + + :type embed_data: _EmbedDataFixture + :type request: FixtureRequest + :rtype: DataRegressionFixture + :return: Data regression fixture. + """ + from .ndarrays_regression import NDArraysRegressionFixture + + return NDArraysRegressionFixture(datadir, original_datadir, request) + + @pytest.fixture def file_regression(datadir, original_datadir, request): """ diff --git a/tests/test_dataframe_regression.py b/tests/test_dataframe_regression.py index ed2ddeb..416c81d 100644 --- a/tests/test_dataframe_regression.py +++ b/tests/test_dataframe_regression.py @@ -179,7 +179,7 @@ def test_non_numeric_data(dataframe_regression, array, no_regen): with pytest.raises( AssertionError, match="Only numeric data is supported on dataframe_regression fixture.\n" - " Array with type '%s' was given." % (str(data1["data1"].dtype),), + " *Array with type '%s' was given." % (str(data1["data1"].dtype),), ): dataframe_regression.check(data1) @@ -243,7 +243,7 @@ def test_non_pandas_dataframe(dataframe_regression): data = np.ones(shape=(10, 10)) with pytest.raises( AssertionError, - match="Only pandas DataFrames are supported on on dataframe_regression fixture.\n" - " Object with type '%s' was given." % (str(type(data)),), + match="Only pandas DataFrames are supported on dataframe_regression fixture.\n" + " *Object with type '%s' was given." % (str(type(data)),), ): dataframe_regression.check(data) diff --git a/tests/test_ndarrays_regression.py b/tests/test_ndarrays_regression.py new file mode 100644 index 0000000..2dcb607 --- /dev/null +++ b/tests/test_ndarrays_regression.py @@ -0,0 +1,301 @@ +import re + +import numpy as np +import pytest + +from pytest_regressions.testing import check_regression_fixture_workflow + + +@pytest.fixture +def no_regen(ndarrays_regression, request): + if ndarrays_regression._force_regen or request.config.getoption("force_regen"): + pytest.fail("--force-regen should not be used on this test.") + + +def test_usage_workflow(testdir, monkeypatch): + """ + :type testdir: _pytest.pytester.TmpTestdir + + :type monkeypatch: _pytest.monkeypatch.monkeypatch + """ + + import sys + + monkeypatch.setattr( + sys, "testing_get_data", lambda: {"data": 1.1 * np.ones(50)}, raising=False + ) + source = """ + import sys + def test_1(ndarrays_regression): + contents = sys.testing_get_data() + ndarrays_regression.check(contents) + """ + + def get_npz_contents(): + filename = testdir.tmpdir / "test_file" / "test_1.npz" + return dict(np.load(str(filename))) + + def compare_arrays(obtained, expected): + assert (obtained["data"] == expected["data"]).all() + + check_regression_fixture_workflow( + testdir, + source=source, + data_getter=get_npz_contents, + data_modifier=lambda: monkeypatch.setattr( + sys, "testing_get_data", lambda: {"data": 1.2 * np.ones(50)}, raising=False + ), + expected_data_1={"data": 1.1 * np.ones(50)}, + expected_data_2={"data": 1.2 * np.ones(50)}, + compare_fn=compare_arrays, + ) + + +def test_common_case(ndarrays_regression, no_regen): + # Most common case: Data is valid, is present and should pass + data1 = 1.1 * np.ones(5000) + data2 = 2.2 * np.ones(5000) + ndarrays_regression.check({"data1": data1, "data2": data2}) + + # Assertion error case 1: Data has one invalid place + data1 = 1.1 * np.ones(5000) + data2 = 2.2 * np.ones(5000) + data1[500] += 0.1 + with pytest.raises(AssertionError) as excinfo: + ndarrays_regression.check({"data1": data1, "data2": data2}) + obtained_error_msg = str(excinfo.value) + expected = "\n".join( + [ + "Values are not sufficiently close.", + "To update values, use --force-regen option.", + ] + ) + assert expected in obtained_error_msg + expected = "\n".join( + [ + "data1:", + " Index Obtained Expected Difference", + " 500 1.2000000000000002 1.1 0.10000000000000009", + ] + ) + assert expected in obtained_error_msg + + # Assertion error case 2: More than one invalid data + data1 = 1.1 * np.ones(5000) + data2 = 2.2 * np.ones(5000) + data1[500] += 0.1 + data1[600] += 0.2 + data2[700] += 0.3 + with pytest.raises(AssertionError) as excinfo: + ndarrays_regression.check({"data1": data1, "data2": data2}) + obtained_error_msg = str(excinfo.value) + expected = "\n".join( + [ + "Values are not sufficiently close.", + "To update values, use --force-regen option.", + ] + ) + assert expected in obtained_error_msg + expected = "\n".join( + [ + "data1:", + " Index Obtained Expected Difference", + " 500 1.2000000000000002 1.1 0.10000000000000009", + " 600 1.3 1.1 0.19999999999999996", + ] + ) + assert expected in obtained_error_msg + expected = "\n".join( + [ + "data2:", + " Index Obtained Expected Difference", + " 700 2.5 2.2 0.2999999999999998", + ] + ) + assert expected in obtained_error_msg + + +def test_common_case_nd(ndarrays_regression, no_regen): + # Most common case: Data is valid, is present and should pass + data1 = 1.1 * np.ones((50, 20)) + data2 = 2.2 * np.ones((3, 4, 5)) + ndarrays_regression.check({"data1": data1, "data2": data2}) + + # Assertion error case 1: Data has one invalid place + data1 = 1.1 * np.ones((50, 20)) + data2 = 2.2 * np.ones((3, 4, 5)) + data1[30, 2] += 0.1 + with pytest.raises(AssertionError) as excinfo: + ndarrays_regression.check({"data1": data1, "data2": data2}) + obtained_error_msg = str(excinfo.value) + expected = "\n".join( + [ + "Values are not sufficiently close.", + "To update values, use --force-regen option.", + ] + ) + assert expected in obtained_error_msg + expected = "\n".join( + [ + "data1:", + " Index Obtained Expected Difference", + " 30,2 1.2000000000000002 1.1 0.10000000000000009", + ] + ) + assert expected in obtained_error_msg + + # Assertion error case 2: More than one invalid data + data1 = 1.1 * np.ones((50, 20)) + data2 = 2.2 * np.ones((3, 4, 5)) + data1[20, 15] += 0.1 + data1[0, 9] = 1.43248324e35 + data2[2, 3, 4] += 0.3 + with pytest.raises(AssertionError) as excinfo: + ndarrays_regression.check({"data1": data1, "data2": data2}) + obtained_error_msg = str(excinfo.value) + expected = "\n".join( + [ + "Values are not sufficiently close.", + "To update values, use --force-regen option.", + ] + ) + assert expected in obtained_error_msg + expected = "\n".join( + [ + "data1:", + " Index Obtained Expected Difference", + " 0,9 1.43248324e+35 1.1 1.43248324e+35", + " 20,15 1.2000000000000002 1.1 0.10000000000000009", + ] + ) + assert expected in obtained_error_msg + expected = "\n".join( + [ + "data2:", + " Index Obtained Expected Difference", + " 2,3,4 2.5 2.2 0.2999999999999998", + ] + ) + assert expected in obtained_error_msg + + +def test_different_data_types(ndarrays_regression, no_regen): + # Original NPZ file contains integer data + data1 = np.array([True] * 10) + with pytest.raises( + AssertionError, + match="Data types are not the same.\nkey: data1\nObtained: bool\nExpected: int64\n", + ): + ndarrays_regression.check({"data1": data1}) + + +class Foo: + def __init__(self, bar): + self.bar = bar + + +def test_object_dtype(ndarrays_regression, no_regen): + data1 = {"data1": np.array([Foo(i) for i in range(4)], dtype=object)} + with pytest.raises( + AssertionError, + match="Only numeric data is supported on ndarrays_regression fixture.\n" + " *Array 'data1' with type '%s' was given." % (str(data1["data1"].dtype),), + ): + ndarrays_regression.check(data1) + + +def test_integer_values_smoke_test(ndarrays_regression, no_regen): + data1 = np.ones(11, dtype=int) + ndarrays_regression.check({"data1": data1}) + + +def test_number_formats(ndarrays_regression): + data1 = np.array([1.2345678e50, 1.2345678e-50, 0.0]) + ndarrays_regression.check({"data1": data1}) + + +def test_bool_array(ndarrays_regression, no_regen): + data1 = np.array([True, True, True], dtype=bool) + with pytest.raises(AssertionError) as excinfo: + ndarrays_regression.check({"data1": data1}) + obtained_error_msg = str(excinfo.value) + expected = "\n".join( + [ + "Values are not sufficiently close.", + "To update values, use --force-regen option.", + ] + ) + assert expected in obtained_error_msg + expected = "\n".join( + [ + "data1:", + " Index Obtained Expected Difference", + " 0 True False ", + " 1 True False ", + " 2 True False ", + ] + ) + assert expected in obtained_error_msg + + +def test_arrays_of_same_size_1d(ndarrays_regression): + same_size_int_arrays = { + "hello": np.zeros((1,), dtype=int), + "world": np.zeros((1,), dtype=int), + } + ndarrays_regression.check(same_size_int_arrays) + + +def test_arrays_with_different_sizes_1d(ndarrays_regression, no_regen): + # Original NPY file contains 11 elements. + data1 = np.ones(10, dtype=np.float64) + expected = re.escape( + "Shapes are not the same.\nKey: data1\nObtained: (10,)\nExpected: (11,)\n" + ) + with pytest.raises(AssertionError, match=expected): + ndarrays_regression.check({"data1": data1}) + + +def test_arrays_of_same_shape(ndarrays_regression): + same_size_int_arrays = { + "2d": np.zeros((3, 4), dtype=int), + "3d": np.ones((7, 8, 9), dtype=float), + "4d": np.full((2, 1, 1, 4), 3, dtype=int), + } + ndarrays_regression.check(same_size_int_arrays) + + +def test_arrays_with_different_shapes(ndarrays_regression): + same_size_int_arrays = { + # Originally with shape (3, 4) + "2d": np.zeros((3, 2), dtype=int), + } + with pytest.raises(AssertionError) as excinfo: + ndarrays_regression.check(same_size_int_arrays) + obtained_error_msg = str(excinfo.value) + print(obtained_error_msg) + expected = "\n".join( + [ + "Shapes are not the same.", + "Key: 2d", + "Obtained: (3, 2)", + "Expected: (3, 4)", + ] + ) + assert expected in obtained_error_msg + + +def test_string_array(ndarrays_regression): + data1 = {"potato": ["delicious", "nutritive", "yummy"]} + ndarrays_regression.check(data1) + + +def test_non_dict(ndarrays_regression): + data = np.ones(shape=(10, 10)) + with pytest.raises( + AssertionError, + match="Only dictionaries with NumPy arrays or array-like objects are supported " + "on ndarray_regression fixture.\n *Object with type '%s' was given." + % (str(type(data)),), + ): + ndarrays_regression.check(data) diff --git a/tests/test_ndarrays_regression/test_arrays_of_same_shape.npz b/tests/test_ndarrays_regression/test_arrays_of_same_shape.npz new file mode 100644 index 0000000000000000000000000000000000000000..61d99b0f753134f15c1165335b874d8418643b32 GIT binary patch literal 603 zcmWIWW@Zs#U|`??Vnv2eQNI>@0$C4$n2kY%!6-#9ub`5VK>#cQ6l4I&g23!I;;*Cy zPM!!j7O-~Sq^LPDOY)X3$eWzGaNXhs^QOgz1WlPCK0kiSBrc(H?=zZytX<;iDq>5R zeo7s);!>Ms#kG|yg;|eHikTq*#R03@w9jP$jdu`$IKUY00FVd>fMh`chXc|GI^dZV zmzWjTG_V_c6!3i-s*0>cLV*R2LqKeZJlgX#W8q1_hA`+=UCtokGppub604xX;X8O@o0U0Iv(m^NVs)Xzl_=CJ|;_p#bqLh-_d4k?=4H@FrGU Y6Hpu6BLUv5Y#?)(fG`h8vw%$n0E~ZKv;Y7A literal 0 HcmV?d00001 diff --git a/tests/test_ndarrays_regression/test_arrays_with_different_shapes.npz b/tests/test_ndarrays_regression/test_arrays_with_different_shapes.npz new file mode 100644 index 0000000000000000000000000000000000000000..0daa7dde9c106cf8089fb65e01226959e4672ebe GIT binary patch literal 203 zcmWIWW@Zs#U|`??Vnv2eQNI>@0$C4$n2kY%!6-#9ub`5VK>#cQ6l4I&g23!I;;*Cy zPM!!j7O-~Sq^LPDOY)X3$eWzGaNXhs^QOgz1WlPCK0kiSBrc(H?=zZytX<;iDq>5R zeo7s);!>Ms#kG|yg;|eHikTt6n~_O`8J80vmVw9yMi2>cP=Gfp8;HjUgeE|`5yW8t E0OrUp)&Kwi literal 0 HcmV?d00001 diff --git a/tests/test_ndarrays_regression/test_arrays_with_different_sizes_1d.npz b/tests/test_ndarrays_regression/test_arrays_with_different_sizes_1d.npz new file mode 100644 index 0000000000000000000000000000000000000000..4ea4fc5dce2a9dbcd6c91299dc268d62037f7655 GIT binary patch literal 212 zcmWIWW@Zs#U|`??Vnv1(Dm+hpfUFxp%*i0akdj!EXsDN0P|3(302Ty_Gk_F;!0b2T zucQS|o(MP=uy)?0s5vo9@|G^ho1C_A-QorFrp1Q@O_?D+KYq$2E}?SoGn#&^UE=8~ zVoR8QO0~6F&9dU!%Jqa67H-3Z&o&t03#5Z K1L<}UhXDYi{xQ4& literal 0 HcmV?d00001 diff --git a/tests/test_ndarrays_regression/test_bool_array.npz b/tests/test_ndarrays_regression/test_bool_array.npz new file mode 100644 index 0000000000000000000000000000000000000000..c2c1f5a1eafc6833a49d5442a1052d25cc9d4700 GIT binary patch literal 207 zcmWIWW@Zs#U|`??Vnv4SCq2))16j>L%*i0akdj!EXsDN0P|3(302Ty_Gk_F;!0b2T zucQS|o(MP=uy)?0s5vo9@|G@$o1C&@-QorFrp1Q@O_?D+KYq$2E}?SoGn#&^UE=8~ zVoR8QN*%MBWyQ6XON2p>jg5^Vz?+dtgc+9`AXb6M21XDGcTs>hD;r3F5eUtJbQ6ff F000g;F696K literal 0 HcmV?d00001 diff --git a/tests/test_ndarrays_regression/test_common_case.npz b/tests/test_ndarrays_regression/test_common_case.npz new file mode 100644 index 0000000000000000000000000000000000000000..df5b7b2814cf83a420a260ce8ce3f9a3be7e9d93 GIT binary patch literal 580 zcmWIWW@Zs#U|`??Vnv2?QBD_@0a*v;Ffed3h%lrimLwYL}^?OF?PPO_A<{Du;6=j+|5z`zWHe_i5gPGS<(Zr?$VZ&Hq08 zZlX<9&%BKqw}XYJE3RAR>f^NL)w6p~t|#9Wd3xdc&!xJ*@6G!hee?fI8{f+*(^|rR zluv%29x;8QisvNq(3?&20Vo~}yZu3Y3DE7x9yCJtAV>iSygjjy>reng>%+CbM7GDa zwu(9)*^m;f!#0^`d!m5)?1&@F4PIW}wYTY~_0z|H-oLHeY`x^9a^-jbAEHASlq+=zWe4EW;&ATITcSM-MRp8 zMkWzvTrmjo2Z(H71d;IgMAwEIE+C~4(7^Z*$N*^rhf;tyD;r3H2?+Cm^Z~G`0L%@< A<^TWy literal 0 HcmV?d00001 diff --git a/tests/test_ndarrays_regression/test_common_case_nd.npz b/tests/test_ndarrays_regression/test_common_case_nd.npz new file mode 100644 index 0000000000000000000000000000000000000000..1837b3cf5a8e0fd596eacef2387700a5e26d8ba1 GIT binary patch literal 455 zcmWIWW@Zs#U|`??Vnv2`LR)&OfUE=Z3=EtMA`B^sC5eW5c?Fe>3<6+5pd1580SLT3 zv5~7OK%y-%dUsd-Z{BkXP6|Rhc8coq`4yf}=37|Qx5Xp;Vo3Q-*(KXA-1@D{fB&~l z=<(vco$oHL>E#Yzzw^byvsXH{HP7GkR=+v_&Moh1?!1=^H!qH^+vDG}IOKyh^S{L^cKf$m9QLO9w8=4g;02!IrT!0b2TucQS|o(MP=uy)?0 zs5vo9@|G^ho1C_A-QorFrp1Q@O_?D+KYq$2E}?SoGn#&^UE=8~VoR8QN*%M}QnTV( zHp_}@E7udob&uu5bHo>10#rpyDGq&l?^1o2!!T9 Jx(&o(004&-FJJ%w literal 0 HcmV?d00001 diff --git a/tests/test_ndarrays_regression/test_non_numeric_data_array0_.npz b/tests/test_ndarrays_regression/test_non_numeric_data_array0_.npz new file mode 100644 index 0000000000000000000000000000000000000000..31b11cfdad6994b4ae8d6119d1f459eb866e9da8 GIT binary patch literal 226 zcmWIWW@Zs#U|`??Vnv2c#};Zu0a*(e85lSjL>N*MOA-zB@(L;$83e$BKsg4G0uY$} zM*Nkuz{wK<#{$;Qn-n!CW=Y=C1$mP*7p_~pVBWO&kf13u#OKFPnZzYj?tMnnkF`rY zT}5mO(@&{1E3ULzR$N=TQkeA=**G4wUiuRd#qHJYoT;f{d+7=@Lx49UlL#{|XF+TS bkqtm+G9Vlo;LXYg5?};Eb09qx#9;scsFF7m literal 0 HcmV?d00001 diff --git a/tests/test_ndarrays_regression/test_number_formats.npz b/tests/test_ndarrays_regression/test_number_formats.npz new file mode 100644 index 0000000000000000000000000000000000000000..379d2612c93662e5f9cddd6f75d4568e8ce91a82 GIT binary patch literal 223 zcmWIWW@Zs#U|`??Vnv3wxZF$OK-LT(=423INJ%V7G}Oy0sAOai01E=e89)j^VD=mF zSJDC}PXrtbSUYc0)SQ?lc}o}MO-@_5Zt;S7)8a#drpyqZA3tRhmr%L)8BIUdF7b2~ zu_a7DrH)z6vf|pxCBm>yQa=3X&6BNFE3ZrV)dgFgoWjV(5a7+oB*Ki#O%SU=WCJ6J Wgu5`no0SbDzzBrqKzb60!vFv}l{Z8H literal 0 HcmV?d00001 diff --git a/tests/test_ndarrays_regression/test_string_array.npz b/tests/test_ndarrays_regression/test_string_array.npz new file mode 100644 index 0000000000000000000000000000000000000000..c2fbb224e9dc8c14b6189cf3c69e0c57b734b9fb GIT binary patch literal 250 zcmWIWW@Zs#U|`??Vnqhaef&oXfUGw_%*7zWP>^4eSdy=oS5V2wAOIExN-%&_fWYiG z;;*CyPM!!j7O-~Sq^LPDOY)X3$P><5xo+`-dDG%Uf~L$6pC3PE5|>c9_Zdw;)-LgM z6|p5uKc$XY&9dU!$|b_EPK1rEEkxlQ<0BshzqU(19b&kS9Cet(o;sbOP%va4i`R6= xui;KtpEwzI^z$(Ucr!AIFynG9#7!Wwfe}O^+#cY~$_5f)1VRfSy#~Z#008uALP`Jt literal 0 HcmV?d00001 From fade52f796de54b5b803e3981bdd4a479f8f5153 Mon Sep 17 00:00:00 2001 From: Toon Verstraelen Date: Tue, 7 Sep 2021 20:36:32 +0200 Subject: [PATCH 2/4] Improve NDArraysRegression code Changes: - Add statistics of differing elements. - Add support for complex and string arrays. - Add support for 0-D arrays. - Add more unit tests. - Also raise AssertionError when new items are present in the dictionary. - Simplified code and clarified error messages. --- .../dataframe_regression.py | 5 +- src/pytest_regressions/ndarrays_regression.py | 160 ++++++---- tests/test_dataframe_regression.py | 5 + tests/test_ndarrays_regression.py | 290 +++++++++++++++--- .../test_common_case.npz | Bin 580 -> 8082 bytes .../test_common_case_nd.npz | Bin 455 -> 567 bytes .../test_complex_array.npz | Bin 0 -> 228 bytes .../test_missing_obtained.npz | Bin 0 -> 397 bytes .../test_new_obtained.npz | Bin 0 -> 210 bytes .../test_object_dtype.npz | Bin 0 -> 404 bytes .../test_ndarrays_regression/test_scalars.npz | Bin 0 -> 391 bytes 11 files changed, 365 insertions(+), 95 deletions(-) create mode 100644 tests/test_ndarrays_regression/test_complex_array.npz create mode 100644 tests/test_ndarrays_regression/test_missing_obtained.npz create mode 100644 tests/test_ndarrays_regression/test_new_obtained.npz create mode 100644 tests/test_ndarrays_regression/test_object_dtype.npz create mode 100644 tests/test_ndarrays_regression/test_scalars.npz diff --git a/src/pytest_regressions/dataframe_regression.py b/src/pytest_regressions/dataframe_regression.py index a6d4e5b..bfe4617 100644 --- a/src/pytest_regressions/dataframe_regression.py +++ b/src/pytest_regressions/dataframe_regression.py @@ -123,8 +123,7 @@ def _check_fn(self, obtained_filename, expected_filename): self._check_data_types(k, obtained_column, expected_column) self._check_data_shapes(obtained_column, expected_column) - data_type = obtained_column.values.dtype - if data_type in [float, np.float16, np.float32, np.float64]: + if np.issubdtype(obtained_column.values.dtype, np.inexact): not_close_mask = ~np.isclose( obtained_column.values, expected_column.values, @@ -138,7 +137,7 @@ def _check_fn(self, obtained_filename, expected_filename): diff_ids = np.where(not_close_mask)[0] diff_obtained_data = obtained_column[diff_ids] diff_expected_data = expected_column[diff_ids] - if data_type == bool: + if obtained_column.values.dtype == bool: diffs = np.logical_xor(obtained_column, expected_column)[diff_ids] else: diffs = np.abs(obtained_column - expected_column)[diff_ids] diff --git a/src/pytest_regressions/ndarrays_regression.py b/src/pytest_regressions/ndarrays_regression.py index 433f73d..9ff3b50 100644 --- a/src/pytest_regressions/ndarrays_regression.py +++ b/src/pytest_regressions/ndarrays_regression.py @@ -7,7 +7,7 @@ class NDArraysRegressionFixture: """ THRESHOLD = 100 - ROWFORMAT = "{:>10s} {:>20s} {:>20s} {:>20s}\n" + ROWFORMAT = "{:>15s} {:>20s} {:>20s} {:>20s}\n" def __init__(self, datadir, original_datadir, request): """ @@ -36,21 +36,24 @@ def _check_data_types(self, key, obtained_array, expected_array): __tracebackhide__ = True - obtained_data_type = obtained_array.dtype - expected_data_type = expected_array.dtype - if obtained_data_type != expected_data_type: + if obtained_array.dtype != expected_array.dtype: # Check if both data types are comparable as numbers (float, int, short, bytes, etc...) - if np.issubdtype(obtained_data_type, np.number) and np.issubdtype( - expected_data_type, np.number + if np.issubdtype(obtained_array.dtype, np.number) and np.issubdtype( + expected_array.dtype, np.number + ): + return + # Check if both data types are comparable as strings + if np.issubdtype(obtained_array.dtype, str) and np.issubdtype( + expected_array.dtype, str ): return # In case they are not, assume they are not comparable error_msg = ( "Data types are not the same.\n" - "key: %s\n" - "Obtained: %s\n" - "Expected: %s\n" % (key, obtained_data_type, expected_data_type) + f"key: {key}\n" + f"Obtained: {obtained_array.dtype}\n" + f"Expected: {expected_array.dtype}\n" ) raise AssertionError(error_msg) @@ -61,14 +64,12 @@ def _check_data_shapes(self, key, obtained_array, expected_array): """ __tracebackhide__ = True - obtained_data_shape = obtained_array.shape - expected_data_shape = expected_array.shape - if obtained_data_shape != expected_data_shape: + if obtained_array.shape != expected_array.shape: error_msg = ( "Shapes are not the same.\n" - "Key: %s\n" - "Obtained: %s\n" - "Expected: %s\n" % (key, obtained_data_shape, expected_data_shape) + f"Key: {key}\n" + f"Obtained: {obtained_array.shape}\n" + f"Expected: {expected_array.shape}\n" ) raise AssertionError(error_msg) @@ -90,31 +91,33 @@ def _check_fn(self, obtained_filename, expected_filename): obtained_data = dict(np.load(str(obtained_filename))) expected_data = dict(np.load(str(expected_filename))) + # Check mismatches in the keys. + if set(obtained_data) != set(expected_data): + error_msg = ( + "They keys in the obtained results differ from the expected results.\n" + ) + error_msg += " Matching keys: " + error_msg += str(list(set(obtained_data) & set(expected_data))) + error_msg += "\n" + error_msg += " New in obtained: " + error_msg += str(list(set(obtained_data) - set(expected_data))) + error_msg += "\n" + error_msg += " Missing from obtained: " + error_msg += str(list(set(expected_data) - set(obtained_data))) + error_msg += "\n" + error_msg += "To update values, use --force-regen option.\n\n" + raise AssertionError(error_msg) + + # Compare the contents of the arrays. comparison_tables_dict = {} - for k in obtained_data.keys(): - obtained_array = obtained_data[k] + for k, obtained_array in obtained_data.items(): expected_array = expected_data.get(k) - - if expected_array is None: - error_msg = f"Could not find key '{k}' in the expected results.\n" - error_msg += "Keys in the obtained data table: [" - for k in obtained_data.keys(): - error_msg += f"'{k}', " - error_msg += "]\n" - error_msg += "Keys in the expected data table: [" - for k in expected_data.keys(): - error_msg += f"'{k}', " - error_msg += "]\n" - error_msg += "To update values, use --force-regen option.\n\n" - raise AssertionError(error_msg) - tolerance_args = self._tolerances_dict.get(k, self._default_tolerance) self._check_data_types(k, obtained_array, expected_array) self._check_data_shapes(k, obtained_array, expected_array) - data_type = obtained_array.dtype - if data_type in [float, np.float16, np.float32, np.float64]: + if np.issubdtype(obtained_array.dtype, np.inexact): not_close_mask = ~np.isclose( obtained_array, expected_array, @@ -125,27 +128,77 @@ def _check_fn(self, obtained_filename, expected_filename): not_close_mask = obtained_array != expected_array if np.any(not_close_mask): - diff_ids = np.nonzero(not_close_mask) + if not_close_mask.ndim == 0: + diff_ids = [()] + else: + diff_ids = np.array(np.nonzero(not_close_mask)).T comparison_tables_dict[k] = ( - np.array(diff_ids).T, - obtained_array[diff_ids], - expected_array[diff_ids], + expected_array.size, + expected_array.shape, + diff_ids, + obtained_array[not_close_mask], + expected_array[not_close_mask], ) if len(comparison_tables_dict) > 0: error_msg = "Values are not sufficiently close.\n" error_msg += "To update values, use --force-regen option.\n\n" for k, ( + size, + shape, diff_ids, obtained_array, expected_array, ) in comparison_tables_dict.items(): + # Summary + error_msg += f"{k}:\n Shape: {shape}\n" + pct = 100 * len(diff_ids) / size + error_msg += ( + f" Number of differences: {len(diff_ids)} / {size} ({pct:.1f}%)\n" + ) + if np.issubdtype(obtained_array.dtype, np.number) and len(diff_ids) > 1: + error_msg += ( + " Statistics are computed for differing elements only.\n" + ) + + abs_errors = abs(obtained_array - expected_array) + error_msg += " Stats for abs(obtained - expected):\n" + error_msg += f" Max: {abs_errors.max()}\n" + error_msg += f" Mean: {abs_errors.mean()}\n" + error_msg += f" Median: {np.median(abs_errors)}\n" + + error_msg += ( + f" Stats for abs(obtained - expected) / abs(expected):\n" + ) + expected_nonzero = np.array(np.nonzero(expected_array)).T + rel_errors = abs( + ( + obtained_array[expected_nonzero] + - expected_array[expected_nonzero] + ) + / expected_array[expected_nonzero] + ) + if len(rel_errors) != len(abs_errors): + pct = 100 * len(rel_errors) / len(abs_errors) + error_msg += f" Number of (differing) non-zero expected results: {len(rel_errors)} / {len(abs_errors)} ({pct:.1f}%)\n" + error_msg += f" Relative errors are computed for the non-zero expected results.\n" + else: + rel_errors = abs( + (obtained_array - expected_array) / expected_array + ) + error_msg += f" Max: {rel_errors.max()}\n" + error_msg += f" Mean: {rel_errors.mean()}\n" + error_msg += f" Median: {np.median(rel_errors)}\n" + + # Details results + error_msg += " Individual errors:\n" if len(diff_ids) > self.THRESHOLD: - error_msg += f"Only showing first {self.THRESHOLD} mismatches.\n" + error_msg += ( + f" Only showing first {self.THRESHOLD} mismatches.\n" + ) diff_ids = diff_ids[: self.THRESHOLD] obtained_array = obtained_array[: self.THRESHOLD] expected_array = expected_array[: self.THRESHOLD] - error_msg += f"{k}:\n" error_msg += self.ROWFORMAT.format( "Index", "Obtained", @@ -155,15 +208,18 @@ def _check_fn(self, obtained_filename, expected_filename): for diff_id, obtained, expected in zip( diff_ids, obtained_array, expected_array ): + diff_id_str = ", ".join(str(i) for i in diff_id) + if len(diff_id) != 1: + diff_id_str = f"({diff_id_str})" error_msg += self.ROWFORMAT.format( - ",".join(str(i) for i in diff_id), + diff_id_str, str(obtained), str(expected), str(obtained - expected) if isinstance(obtained, np.number) else "", ) - error_msg += "\n\n" + error_msg += "\n" raise AssertionError(error_msg) def _dump_fn(self, data_object, filename): @@ -249,22 +305,12 @@ def test_some_data(ndarrays_regression): data_dict[key] = np.asarray(array) for key, array in data_dict.items(): - # Skip assertion if an array of strings - if (array.dtype == "O") and (type(array[0]) is str): - continue - # Rejected: timedelta, datetime, objects, zero-terminated bytes, unicode strings and raw data - assert array.dtype not in [ - "m", - "M", - "O", - "S", - "a", - "U", - "V", - ], "Only numeric data is supported on ndarrays_regression fixture.\n" "Array '%s' with type '%s' was given.\n" % ( - key, - str(array.dtype), - ) + # Rejected: timedelta, datetime, objects, zero-terminated bytes and raw data + if array.dtype in ["m", "M", "O", "S", "a", "V"]: + raise TypeError( + "Only numeric data is supported on ndarrays_regression fixture.\n" + f"Array '{key}' with type '{array.dtype}' was given.\n" + ) if tolerances is None: tolerances = {} diff --git a/tests/test_dataframe_regression.py b/tests/test_dataframe_regression.py index 416c81d..687c000 100644 --- a/tests/test_dataframe_regression.py +++ b/tests/test_dataframe_regression.py @@ -238,6 +238,11 @@ def test_string_array(dataframe_regression): data1 = {"potato": ["delicious", "nutritive", "yummy"]} dataframe_regression.check(pd.DataFrame.from_dict(data1)) + # TODO: The following fails with a confusing error message. + # Try wrong data + # data1 = {"potato": ["delicious", "nutritive", "yikes"]} + # dataframe_regression.check(pd.DataFrame.from_dict(data1)) + def test_non_pandas_dataframe(dataframe_regression): data = np.ones(shape=(10, 10)) diff --git a/tests/test_ndarrays_regression.py b/tests/test_ndarrays_regression.py index 2dcb607..0cb89a2 100644 --- a/tests/test_ndarrays_regression.py +++ b/tests/test_ndarrays_regression.py @@ -53,13 +53,13 @@ def compare_arrays(obtained, expected): def test_common_case(ndarrays_regression, no_regen): # Most common case: Data is valid, is present and should pass - data1 = 1.1 * np.ones(5000) - data2 = 2.2 * np.ones(5000) + data1 = np.full(5000, 1.1, dtype=float) + data2 = np.arange(5000, dtype=int) ndarrays_regression.check({"data1": data1, "data2": data2}) # Assertion error case 1: Data has one invalid place - data1 = 1.1 * np.ones(5000) - data2 = 2.2 * np.ones(5000) + data1 = np.full(5000, 1.1, dtype=float) + data2 = np.arange(5000, dtype=int) data1[500] += 0.1 with pytest.raises(AssertionError) as excinfo: ndarrays_regression.check({"data1": data1, "data2": data2}) @@ -74,18 +74,22 @@ def test_common_case(ndarrays_regression, no_regen): expected = "\n".join( [ "data1:", - " Index Obtained Expected Difference", - " 500 1.2000000000000002 1.1 0.10000000000000009", + " Shape: (5000,)", + " Number of differences: 1 / 5000 (0.0%)", + " Individual errors:", + " Index Obtained Expected Difference", + " 500 1.2000000000000002 1.1 0.10000000000000009", ] ) assert expected in obtained_error_msg # Assertion error case 2: More than one invalid data - data1 = 1.1 * np.ones(5000) - data2 = 2.2 * np.ones(5000) + data1 = np.full(5000, 1.1, dtype=float) + data2 = np.arange(5000, dtype=int) data1[500] += 0.1 data1[600] += 0.2 - data2[700] += 0.3 + data2[0] += 5 + data2[700:900] += 5 with pytest.raises(AssertionError) as excinfo: ndarrays_regression.check({"data1": data1, "data2": data2}) obtained_error_msg = str(excinfo.value) @@ -99,17 +103,46 @@ def test_common_case(ndarrays_regression, no_regen): expected = "\n".join( [ "data1:", - " Index Obtained Expected Difference", - " 500 1.2000000000000002 1.1 0.10000000000000009", - " 600 1.3 1.1 0.19999999999999996", + " Shape: (5000,)", + " Number of differences: 2 / 5000 (0.0%)", + " Statistics are computed for differing elements only.", + " Stats for abs(obtained - expected):", + " Max: 0.19999999999999996", + " Mean: 0.15000000000000002", + " Median: 0.15000000000000002", + " Stats for abs(obtained - expected) / abs(expected):", + " Max: 0.18181818181818177", + " Mean: 0.13636363636363638", + " Median: 0.13636363636363638", + " Individual errors:", + " Index Obtained Expected Difference", + " 500 1.2000000000000002 1.1 0.10000000000000009", + " 600 1.3 1.1 0.19999999999999996", ] ) assert expected in obtained_error_msg expected = "\n".join( [ "data2:", - " Index Obtained Expected Difference", - " 700 2.5 2.2 0.2999999999999998", + " Shape: (5000,)", + " Number of differences: 201 / 5000 (4.0%)", + " Statistics are computed for differing elements only.", + " Stats for abs(obtained - expected):", + " Max: 5", + " Mean: 5.0", + " Median: 5.0", + " Stats for abs(obtained - expected) / abs(expected):", + " Number of (differing) non-zero expected results: 200 / 201 (99.5%)", + " Relative errors are computed for the non-zero expected results.", + " Max: 0.007142857142857143", + " Mean: 0.006286830640674575", + " Median: 0.006253911138923655", + " Individual errors:", + " Only showing first 100 mismatches.", + " Index Obtained Expected Difference", + " 0 5 0 5", + " 700 705 700 5", + " 701 706 701 5", ] ) assert expected in obtained_error_msg @@ -117,13 +150,13 @@ def test_common_case(ndarrays_regression, no_regen): def test_common_case_nd(ndarrays_regression, no_regen): # Most common case: Data is valid, is present and should pass - data1 = 1.1 * np.ones((50, 20)) - data2 = 2.2 * np.ones((3, 4, 5)) + data1 = np.full((50, 20), 1.1, dtype=float) + data2 = np.arange(60, dtype=int).reshape((3, 4, 5)) ndarrays_regression.check({"data1": data1, "data2": data2}) # Assertion error case 1: Data has one invalid place - data1 = 1.1 * np.ones((50, 20)) - data2 = 2.2 * np.ones((3, 4, 5)) + data1 = np.full((50, 20), 1.1, dtype=float) + data2 = np.arange(60, dtype=int).reshape((3, 4, 5)) data1[30, 2] += 0.1 with pytest.raises(AssertionError) as excinfo: ndarrays_regression.check({"data1": data1, "data2": data2}) @@ -138,18 +171,21 @@ def test_common_case_nd(ndarrays_regression, no_regen): expected = "\n".join( [ "data1:", - " Index Obtained Expected Difference", - " 30,2 1.2000000000000002 1.1 0.10000000000000009", + " Shape: (50, 20)", + " Number of differences: 1 / 1000 (0.1%)", + " Individual errors:", + " Index Obtained Expected Difference", + " (30, 2) 1.2000000000000002 1.1 0.10000000000000009", ] ) assert expected in obtained_error_msg # Assertion error case 2: More than one invalid data - data1 = 1.1 * np.ones((50, 20)) - data2 = 2.2 * np.ones((3, 4, 5)) + data1 = np.full((50, 20), 1.1, dtype=float) + data2 = np.arange(60, dtype=int).reshape((3, 4, 5)) data1[20, 15] += 0.1 data1[0, 9] = 1.43248324e35 - data2[2, 3, 4] += 0.3 + data2[:2, 0, [0, 2, 4]] += 71 with pytest.raises(AssertionError) as excinfo: ndarrays_regression.check({"data1": data1, "data2": data2}) obtained_error_msg = str(excinfo.value) @@ -163,17 +199,48 @@ def test_common_case_nd(ndarrays_regression, no_regen): expected = "\n".join( [ "data1:", - " Index Obtained Expected Difference", - " 0,9 1.43248324e+35 1.1 1.43248324e+35", - " 20,15 1.2000000000000002 1.1 0.10000000000000009", + " Shape: (50, 20)", + " Number of differences: 2 / 1000 (0.2%)", + " Statistics are computed for differing elements only.", + " Stats for abs(obtained - expected):", + " Max: 1.43248324e+35", + " Mean: 7.1624162e+34", + " Median: 7.1624162e+34", + " Stats for abs(obtained - expected) / abs(expected):", + " Max: 1.3022574909090907e+35", + " Mean: 6.511287454545454e+34", + " Median: 6.511287454545454e+34", + " Individual errors:", + " Index Obtained Expected Difference", + " (0, 9) 1.43248324e+35 1.1 1.43248324e+35", + " (20, 15) 1.2000000000000002 1.1 0.10000000000000009", ] ) assert expected in obtained_error_msg expected = "\n".join( [ "data2:", - " Index Obtained Expected Difference", - " 2,3,4 2.5 2.2 0.2999999999999998", + " Shape: (3, 4, 5)", + " Number of differences: 6 / 60 (10.0%)", + " Statistics are computed for differing elements only.", + " Stats for abs(obtained - expected):", + " Max: 71", + " Mean: 71.0", + " Median: 71.0", + " Stats for abs(obtained - expected) / abs(expected):", + " Number of (differing) non-zero expected results: 5 / 6 (83.3%)", + " Relative errors are computed for the non-zero expected results.", + " Max: 35.5", + " Mean: 12.597121212121213", + " Median: 3.55", + " Individual errors:", + " Index Obtained Expected Difference", + " (0, 0, 0) 71 0 71", + " (0, 0, 2) 73 2 71", + " (0, 0, 4) 75 4 71", + " (1, 0, 0) 91 20 71", + " (1, 0, 2) 93 22 71", + " (1, 0, 4) 95 24 71", ] ) assert expected in obtained_error_msg @@ -197,7 +264,7 @@ def __init__(self, bar): def test_object_dtype(ndarrays_regression, no_regen): data1 = {"data1": np.array([Foo(i) for i in range(4)], dtype=object)} with pytest.raises( - AssertionError, + TypeError, match="Only numeric data is supported on ndarrays_regression fixture.\n" " *Array 'data1' with type '%s' was given." % (str(data1["data1"].dtype),), ): @@ -215,7 +282,12 @@ def test_number_formats(ndarrays_regression): def test_bool_array(ndarrays_regression, no_regen): - data1 = np.array([True, True, True], dtype=bool) + # Correct data + data1 = np.array([False, False, False], dtype=bool) + ndarrays_regression.check({"data1": data1}) + + # Data with errors + data1 = np.array([True, True, False], dtype=bool) with pytest.raises(AssertionError) as excinfo: ndarrays_regression.check({"data1": data1}) obtained_error_msg = str(excinfo.value) @@ -229,15 +301,49 @@ def test_bool_array(ndarrays_regression, no_regen): expected = "\n".join( [ "data1:", - " Index Obtained Expected Difference", - " 0 True False ", - " 1 True False ", - " 2 True False ", + " Shape: (3,)", + " Number of differences: 2 / 3 (66.7%)", + " Individual errors:", + " Index Obtained Expected Difference", + " 0 True False ", + " 1 True False ", ] ) assert expected in obtained_error_msg +def test_complex_array(ndarrays_regression, no_regen): + # Correct data + data1 = np.array([3.0 + 2.5j, -0.5, -1.879j]) + ndarrays_regression.check({"data1": data1}) + + # Data with errors + data1 = np.array([3.0 + 2.5j, 0.5, -1.879]) + with pytest.raises(AssertionError) as excinfo: + ndarrays_regression.check({"data1": data1}) + obtained_error_msg = str(excinfo.value) + expected = "\n".join( + [ + "data1:", + " Shape: (3,)", + " Number of differences: 2 / 3 (66.7%)", + " Statistics are computed for differing elements only.", + " Stats for abs(obtained - expected):", + " Max: 2.6573072836990455", + " Mean: 1.8286536418495227", + " Median: 1.8286536418495227", + " Stats for abs(obtained - expected) / abs(expected):", + " Max: 2.0", + " Mean: 1.7071067811865475", + " Median: 1.7071067811865475", + " Individual errors:", + " Index Obtained Expected Difference", + " 1 (0.5+0j) (-0.5+0j) (1+0j)", + " 2 (-1.879+0j) (-0-1.879j) (-1.879+1.879j)", + ] + ) + + def test_arrays_of_same_size_1d(ndarrays_regression): same_size_int_arrays = { "hello": np.zeros((1,), dtype=int), @@ -273,7 +379,6 @@ def test_arrays_with_different_shapes(ndarrays_regression): with pytest.raises(AssertionError) as excinfo: ndarrays_regression.check(same_size_int_arrays) obtained_error_msg = str(excinfo.value) - print(obtained_error_msg) expected = "\n".join( [ "Shapes are not the same.", @@ -285,10 +390,91 @@ def test_arrays_with_different_shapes(ndarrays_regression): assert expected in obtained_error_msg +def test_scalars(ndarrays_regression): + data = {"data1": 4.0, "data2": 42} + ndarrays_regression.check(data) + + data = {"data1": np.array([4.0]), "data2": np.array([42, 21])} + with pytest.raises(AssertionError) as excinfo: + ndarrays_regression.check(data) + obtained_error_msg = str(excinfo.value) + expected = "\n".join( + [ + "Shapes are not the same.", + "Key: data1", + "Obtained: (1,)", + "Expected: ()", + ] + ) + assert expected in obtained_error_msg + + data = {"data1": 5.0, "data2": 21} + with pytest.raises(AssertionError) as excinfo: + ndarrays_regression.check(data) + obtained_error_msg = str(excinfo.value) + expected = "\n".join( + [ + "data1:", + " Shape: ()", + " Number of differences: 1 / 1 (100.0%)", + " Individual errors:", + " Index Obtained Expected Difference", + " () 5.0 4.0 1.0", + ] + ) + assert expected in obtained_error_msg + expected = "\n".join( + [ + "data2:", + " Shape: ()", + " Number of differences: 1 / 1 (100.0%)", + " Individual errors:", + " Index Obtained Expected Difference", + " () 21 42 -21", + ] + ) + assert expected in obtained_error_msg + + def test_string_array(ndarrays_regression): data1 = {"potato": ["delicious", "nutritive", "yummy"]} ndarrays_regression.check(data1) + # Try wrong data + data1 = {"potato": ["delicious", "nutritive", "yikes"]} + with pytest.raises(AssertionError) as excinfo: + ndarrays_regression.check(data1) + obtained_error_msg = str(excinfo.value) + expected = "\n".join( + [ + "potato:", + " Shape: (3,)", + " Number of differences: 1 / 3 (33.3%)", + " Individual errors:", + " Index Obtained Expected Difference", + " 2 yikes yummy ", + ] + ) + assert expected in obtained_error_msg + + # Try data with incompatible dtype + data1 = {"potato": ["disgusting", "nutritive", "yikes"]} + with pytest.raises(AssertionError) as excinfo: + ndarrays_regression.check(data1) + obtained_error_msg = str(excinfo.value) + expected = "\n".join( + [ + "potato:", + " Shape: (3,)", + " Number of differences: 2 / 3 (66.7%)", + " Individual errors:", + " Index Obtained Expected Difference", + " 0 disgusting delicious ", + " 2 yikes yummy ", + ] + ) + assert expected in obtained_error_msg + def test_non_dict(ndarrays_regression): data = np.ones(shape=(10, 10)) @@ -299,3 +485,37 @@ def test_non_dict(ndarrays_regression): % (str(type(data)),), ): ndarrays_regression.check(data) + + +def test_new_obtained(ndarrays_regression): + # The original array only contains ar1. + data1 = {"ar1": np.array([2.3, 9.4]), "ar2": np.array([3, 4, 9])} + with pytest.raises(AssertionError) as excinfo: + ndarrays_regression.check(data1) + obtained_error_msg = str(excinfo.value) + expected = "\n".join( + [ + "They keys in the obtained results differ from the expected results.", + " Matching keys: ['ar1']", + " New in obtained: ['ar2']", + " Missing from obtained: []", + ] + ) + assert expected in obtained_error_msg + + +def test_missing_obtained(ndarrays_regression): + # The original array also contains ar2. + data1 = {"ar1": np.array([2.3, 9.4])} # , "ar2": np.array([3, 4, 9])} + with pytest.raises(AssertionError) as excinfo: + ndarrays_regression.check(data1) + obtained_error_msg = str(excinfo.value) + expected = "\n".join( + [ + "They keys in the obtained results differ from the expected results.", + " Matching keys: ['ar1']", + " New in obtained: []", + " Missing from obtained: ['ar2']", + ] + ) + assert expected in obtained_error_msg diff --git a/tests/test_ndarrays_regression/test_common_case.npz b/tests/test_ndarrays_regression/test_common_case.npz index df5b7b2814cf83a420a260ce8ce3f9a3be7e9d93..2251dad4fb0dd17fa01f84e98f51bc255447f3ef 100644 GIT binary patch literal 8082 zcmeI1doKNn?YnoWokIJ_gZJ2*=wCY_E~$KuCCAcS?#k2t)ul(?lHo0xZ&{6(%;IPQUUTf>|tul2!(?qAM zFI~Bn_PXgM4*fZU(|K$<+NRO9{ZHp2&bIwoHM=y4e`%}yZc_i1_nTRcE?&OM>uR$wpUn$zzKg#T@UI|)0{{4&JmFnZc_>e=<_ z?*BP{T4p_3LFZw+Q1y_m3+ENt4^Zd^pi_{$Z$7os!W4$*RzZJ`zSv ze0^0Yb#nar$z(fT;julJ{k`dvKtFw+FwigUyMEF3bQH(^)~`Q5ivDBOoaff|oc&tf z6$4YkVN=opQzF!qymu=AcdUd2EFmIG$i|_BqoE|*P~yo@GC)9p2uNT75iKC^@gN-W zARY7|!ad0QS_vmwNrzgAr&`ITB!UNtWKSaclgO4Tf{%*itRe=f$cFX=S9_ARJrQP4 zHg6ytZyXJvQ z%05-;pQ^T8Uh-L1IWH@Nmeq#NORml;YiA|QS#8c)I?hpnI7$>pZIZa;o~W`*RQe^V z_s=eQ&#Ihem8WObMnOw%K`NUdB_c>|(ZA%?uX5~H2KKAXa+f@FRSvnzfLyiJ){^g* z%4JI#y!C9=4EvsGGS(D#(G;Ivg8iT*`I8b{TnRoq6#IT?G9eWAaVY*X0X9OAOcCG` z1o#{e>{*ZGWDnd&9{7w_?1!z%pSI$zwBkP}VM9sDBoZ!$gwIrABUQ<%DqNxppKFgj zW1oz($6d0=QyZ}78*0X*713k^Y2+MVl5XgS}M}3 z=0B)f{G@6ju1b*|Isbm-A|Z0&<4DD4()kGKB1O88AXVh}%%AmHO!is$$VZXUKL26+ z;-~EkSK1Yyr_P6_E|O9gVp0{E%kz=Ti>b>CiOY&y=lL_vi#X?nOU?=^XZ}2A@fv3V z!%@%^=g%cB5)&6LCn|2t&R>{aygs{dbyjgJXg(xp5g)V=9i*W3&xiLfrt~kw_bYDZ z&WGhLl5-bga}`-z^HE!iX=T9*$PmRb#*qwhHbZ6{G5UUlegAwp^!A^JFC{A7gqvj`bLG8!R~KqO-n zi4-gmB}m565^;`1wg)nL79u$U8B2yp4?;vALB`<_aRx-TuWj_hHpz*$u}|BihuTC} z+Qv_{i9c_XnNmhWDH0FL7>Odar-))G&XGFgh+=cb z19HS!IWnuw(Wp&{@8($ArqpFql(ab>yeZDxT=^o{zzBZ82(h;i4lG0%hrn$^5CA?L z%t!2TgCBH5?0Wz|^Z;Rshuh;3mI}DD0%2$ax3)o;*TF$`2$N{IT{L3<3%Jt@gb@O6 zgFskt;f`E{84d10Ls;qHE;_D(Mb`lf?%s+nU9SAZTE6OXe#JF(?6P*`nm2WUnz$zMU3T%@{nK4e(_Ev#E}KBE#q%!5=UlU! zT@E+7R&Tmo-p~xpG7gy0_LgP3SP4b)>#a!IS%N|^41*kbXMDIF5A!Ntp@V}!-D|c z7XZOLVCXJj%a;RxOzBLqtk zL-!&^{yl>Kb%gL|$sj{AERpcqBm#|Oh%FhJk?{K@!cEBFZOAYW!utb6um~Bdg^c_g zgx>`buC)!8w+)ZA@&42%_;cIP{kD<+XygBDoA6i4U*GnJ`qJF!DnJ|CXtn zWV(<4yifQhXRtJ9SeV0mm?Kc<3^nGAOy=;Pf8QyU;yz`{E10wE3dvzS~>V!Y;IPtjiR7!_ON~iy;4xd+@L5>}+ zj-9Zkj^j<8sQ3=|_)fp+4)5vC(}5jsft`rw9bV5n18;VC-s}u`)8YH3GuSNsuo)Fv zntrU5iVRCX8b&=SOos@mXwUQ`o>ch5^b-%Mr^x9ZWU9Y9-A7Fga!7Y|pu!r{k2g|L zvFYxyRKLk|?@8+EfONM2D&kqX*E4EhR=Q^vHQ;r+?`vwX)#_m@Ewp;|ShW@zwR$v4 zds4Ovk!jJst4Dmb@TaRMo@!5}t$L(s{a03fRK3d;46J$$XanLfBse9v7Z11VsCsX&YPc{5B#c`UdX`W(TovMF1)qHxY z?%S!RTT_j{O|=*y?-nANA;^1tB-;&H`v7?#k8DsNIX1|eIwUI^S^omrj6l|LkxevY zqYl|((Q~(=hZ){;Pu#=y>ZyI)b3di0;Z+aEv8Se~hZWybKi$(D*i-ksr|D)-G26Z;aA-kqAn-!Z~KbhSekX`pIyD2NX@pX2K)yCcG z4QABFJ=q4^ccb>{#{IO7hLsJD%SKJh1}kZ!er}^Vc%yD$qbYBraeHIVY_}D9x2=$G zkLPYH^iEqL3ysC!*9vVPJg_SS|5zdTVs@9v-YJp!?h@HMCNeLHO}-)MP64Rs42UrV zy6p-o{}NP!163`63amk8HK5{4pvvz-g)mS>C#aMPs@Agd%~^NKSViYq3=!-0aaQ>w zR>?J1)sL(K5UY&ED#oxX<*Y&!tD=`xN@rE;G5IE#JH?oya~Q@5=C(Vg{3}ce5mTka z6xd

M_NaF_k}H3jHt@-(X5_V5)zS=kJ%_sgxI8kTb^Rx4q@%Pvj-nU(HAG_E9dlu!TO2;eQBP)dfRU4 z?-YVvCjJLA@jk8TE)%~?Cf??4yX+#_J4LdAz1v~GvuH{PyAI9&uS4UzxNW_w#qU&$ zD}K&1{^yZ`ZKI3fhI?#{{x?7TudeXUoNu@@|F=8-x7U8}GVi4RHS!G?-Y#~o{l#;> U7;a*^bFk;_llt~+>9Vu?FC%A6=>Px# delta 176 zcmbPae}rYid$zFKAH#VVlge zJyAe?cEpk81}`t~+S~Ng`sw38@88yKwq9~lx$@YmcmA<4Z9B5W47oRE?ye4x>(33p znjAlU`aZd;dcFLwAC`Z%IlnnlEbifb-+l87GaV O6qX!-nt_ys(h`z5r+Q<($RY&6l&qtFEoJjkF#A-IHS>C!a&9 zg#2*5U(fn}b3NNR>{IK@clNu#vVNR>H+o9+l`xx1Y{hXo)rgJ(3}OgRFpNwQjA9J` z56|!%Vb<#Tr`H5eJfRRB%m4wi--y4G7C3n#;8?)gd6S~%#4O2Mx*%_I+QM~< z7tEU$9}+ZWhWPyWDU-N_%DvBM`muJ2r>lr9Vfra`%!*6Rifh>{E3U0vPZ-xl8ap;z bOq?P&&xB#}W=402MU($C8nHPuf@}f+Q3Nrj diff --git a/tests/test_ndarrays_regression/test_complex_array.npz b/tests/test_ndarrays_regression/test_complex_array.npz new file mode 100644 index 0000000000000000000000000000000000000000..96bfda2494d8f9ef1284f456668afc34696ae973 GIT binary patch literal 228 zcmWIWW@Zs#U|`??Vnv1n$&Y@<09hM=n3F+-AtkXS(NHh1ppub604xX;X8h-t_f;C}e=`PnGct)V<8m0p ceh}Hf2qNLm4De=U0|_t!p*fJA4&pEX01L4`o&W#< literal 0 HcmV?d00001 diff --git a/tests/test_ndarrays_regression/test_missing_obtained.npz b/tests/test_ndarrays_regression/test_missing_obtained.npz new file mode 100644 index 0000000000000000000000000000000000000000..3c44c649a5edefb6cbdb4cdbdc7f3d8db77167f4 GIT binary patch literal 397 zcmWIWW@Zs#U|`??Vnv32z3>h{AZr2;vonY=Bo-O!oVBWO&kf13u#OKFPnZzYj?tMnnkF`rYT}5mO z(@!Zkt65fDTe(CS)|nV26j@ZWu}qO+2taYhd=2}lzCb%>05QZFMlff9gg^i!4FWiv zkx9@Q$6(ITOJn0mXuLEhKnmyzMkWzvT)_dc5JWaGf=FoKpld>PJV+q~G%&UU8Hq(; UuLO9rvVnw|fG`V4^MXwT0GG>K_5c6? literal 0 HcmV?d00001 diff --git a/tests/test_ndarrays_regression/test_new_obtained.npz b/tests/test_ndarrays_regression/test_new_obtained.npz new file mode 100644 index 0000000000000000000000000000000000000000..ea7acbfd6cb9cc74eaa798a9d37e1539ccb3ff97 GIT binary patch literal 210 zcmWIWW@Zs#U|`??Vnv32z3>h{AZr2;vonY=Bo-O!oVBWO&kf13u#OKFPnZzYj?tMnnkF`rYT}5mO z(@!Zkt65fDTe(CS)|nV26j@ZWu}qO+2=HcP5@E*W5QvQ+vVjpqLY)=h&B_MiG6JC~ JkZuQY7yt|7E;;}J literal 0 HcmV?d00001 diff --git a/tests/test_ndarrays_regression/test_object_dtype.npz b/tests/test_ndarrays_regression/test_object_dtype.npz new file mode 100644 index 0000000000000000000000000000000000000000..a0ce5a2c194c986139db6b202e6dea5e933199d6 GIT binary patch literal 404 zcmWIWW@Zs#U|`??Vnv3+x2kMBj0_AlK+MS?!jO_!l4z)xS5V2wAOIEws$u{s0D+}_ zzTSr&1lr6!|3=xr{%*K^i9pdS$2}DSRRxAqn*{iU$|heGxs{VvuFN8R!Fi?a@8;6% z&-`-^ANYLc^qCs>{_p($VpY!XC`CW&=Ukc@xHI9DTY&#BCl zyubf05Z}@Jd;TGLzo?MQoA`niD=pTlRrxGkV(O>j@49fynxJ0BwZS`1Tnpa7F{`;Y z+ps|Fg5<$Af1OJ+52s0=DTyuOiQd>NAQgA;+ox9FH_VLoY#F4@g(ft;~TBB#TD?P#35$9s*K zzqEyM#`cPP2U;E74c9%K)Ve>wn~_O`8CO6;!V5$;fWrwt^# literal 0 HcmV?d00001 diff --git a/tests/test_ndarrays_regression/test_scalars.npz b/tests/test_ndarrays_regression/test_scalars.npz new file mode 100644 index 0000000000000000000000000000000000000000..518fb61f79f600e77c89e8b1c0c3dc8bd4840d0a GIT binary patch literal 391 zcmWIWW@Zs#U|`??Vnv38 Date: Wed, 8 Sep 2021 07:42:45 +0200 Subject: [PATCH 3/4] Improve error messages and add tests for corner cases --- src/pytest_regressions/ndarrays_regression.py | 58 +++++++++++------ tests/test_ndarrays_regression.py | 65 +++++++++++++++++-- 2 files changed, 98 insertions(+), 25 deletions(-) diff --git a/src/pytest_regressions/ndarrays_regression.py b/src/pytest_regressions/ndarrays_regression.py index 9ff3b50..4f74417 100644 --- a/src/pytest_regressions/ndarrays_regression.py +++ b/src/pytest_regressions/ndarrays_regression.py @@ -167,9 +167,6 @@ def _check_fn(self, obtained_filename, expected_filename): error_msg += f" Mean: {abs_errors.mean()}\n" error_msg += f" Median: {np.median(abs_errors)}\n" - error_msg += ( - f" Stats for abs(obtained - expected) / abs(expected):\n" - ) expected_nonzero = np.array(np.nonzero(expected_array)).T rel_errors = abs( ( @@ -178,17 +175,23 @@ def _check_fn(self, obtained_filename, expected_filename): ) / expected_array[expected_nonzero] ) - if len(rel_errors) != len(abs_errors): - pct = 100 * len(rel_errors) / len(abs_errors) - error_msg += f" Number of (differing) non-zero expected results: {len(rel_errors)} / {len(abs_errors)} ({pct:.1f}%)\n" - error_msg += f" Relative errors are computed for the non-zero expected results.\n" + if len(rel_errors) == 0: + error_msg += " Relative errors are not reported because all expected values are zero.\n" else: - rel_errors = abs( - (obtained_array - expected_array) / expected_array + error_msg += ( + f" Stats for abs(obtained - expected) / abs(expected):\n" ) - error_msg += f" Max: {rel_errors.max()}\n" - error_msg += f" Mean: {rel_errors.mean()}\n" - error_msg += f" Median: {np.median(rel_errors)}\n" + if len(rel_errors) != len(abs_errors): + pct = 100 * len(rel_errors) / len(abs_errors) + error_msg += f" Number of (differing) non-zero expected results: {len(rel_errors)} / {len(abs_errors)} ({pct:.1f}%)\n" + error_msg += f" Relative errors are computed for the non-zero expected results.\n" + else: + rel_errors = abs( + (obtained_array - expected_array) / expected_array + ) + error_msg += f" Max: {rel_errors.max()}\n" + error_msg += f" Mean: {rel_errors.mean()}\n" + error_msg += f" Median: {np.median(rel_errors)}\n" # Details results error_msg += " Individual errors:\n" @@ -291,11 +294,12 @@ def test_some_data(ndarrays_regression): __tracebackhide__ = True - assert isinstance(data_dict, dict), ( - "Only dictionaries with NumPy arrays or array-like objects are " - "supported on ndarray_regression fixture.\n" - "Object with type '%s' was given. " % (str(type(data_dict)),) - ) + if not isinstance(data_dict, dict): + raise TypeError( + "Only dictionaries with NumPy arrays or array-like objects are " + "supported on ndarray_regression fixture.\n" + "Object with type '{}' was given.".format(str(type(data_dict))) + ) for key, array in data_dict.items(): assert isinstance( key, str @@ -305,11 +309,23 @@ def test_some_data(ndarrays_regression): data_dict[key] = np.asarray(array) for key, array in data_dict.items(): - # Rejected: timedelta, datetime, objects, zero-terminated bytes and raw data - if array.dtype in ["m", "M", "O", "S", "a", "V"]: + # Accepted: + # - b: boolean + # - i: signed integer + # - u: unsigned integer + # - f: floating-point number + # - c: complex floating-point number + # - U: unicode string + # Rejected: + # - m: timedelta + # - M: datetime + # - O: objects + # - S: zero-terminated bytes + # - V: void (raw data, structured arrays) + if array.dtype.kind not in ["b", "i", "u", "f", "c", "U"]: raise TypeError( - "Only numeric data is supported on ndarrays_regression fixture.\n" - f"Array '{key}' with type '{array.dtype}' was given.\n" + "Only numeric or unicode data is supported on ndarrays_regression " + f"fixture.\nArray '{key}' with type '{array.dtype}' was given." ) if tolerances is None: diff --git a/tests/test_ndarrays_regression.py b/tests/test_ndarrays_regression.py index 0cb89a2..db5a319 100644 --- a/tests/test_ndarrays_regression.py +++ b/tests/test_ndarrays_regression.py @@ -246,6 +246,44 @@ def test_common_case_nd(ndarrays_regression, no_regen): assert expected in obtained_error_msg +def test_common_case_zero_expected(ndarrays_regression, no_regen): + # Most common case: Data is valid, is present and should pass + data = {"data1": np.array([0, 0, 2, 3, 0, 5, 0, 7])} + ndarrays_regression.check(data) + + # Assertion error case: Only some zeros are not reproduced. + data = {"data1": np.array([1, 5, 2, 3, 0, 5, 3, 7])} + with pytest.raises(AssertionError) as excinfo: + ndarrays_regression.check(data) + obtained_error_msg = str(excinfo.value) + expected = "\n".join( + [ + "Values are not sufficiently close.", + "To update values, use --force-regen option.", + ] + ) + assert expected in obtained_error_msg + expected = "\n".join( + [ + "data1:", + " Shape: (8,)", + " Number of differences: 3 / 8 (37.5%)", + " Statistics are computed for differing elements only.", + " Stats for abs(obtained - expected):", + " Max: 5", + " Mean: 3.0", + " Median: 3.0", + " Relative errors are not reported because all expected values are zero.", + " Individual errors:", + " Index Obtained Expected Difference", + " 0 1 0 1", + " 1 5 0 5", + " 6 3 0 3", + ] + ) + assert expected in obtained_error_msg + + def test_different_data_types(ndarrays_regression, no_regen): # Original NPZ file contains integer data data1 = np.array([True] * 10) @@ -265,8 +303,8 @@ def test_object_dtype(ndarrays_regression, no_regen): data1 = {"data1": np.array([Foo(i) for i in range(4)], dtype=object)} with pytest.raises( TypeError, - match="Only numeric data is supported on ndarrays_regression fixture.\n" - " *Array 'data1' with type '%s' was given." % (str(data1["data1"].dtype),), + match="Only numeric or unicode data is supported on ndarrays_regression fixture.\n" + "Array 'data1' with type 'object' was given.", ): ndarrays_regression.check(data1) @@ -479,14 +517,33 @@ def test_string_array(ndarrays_regression): def test_non_dict(ndarrays_regression): data = np.ones(shape=(10, 10)) with pytest.raises( - AssertionError, + TypeError, match="Only dictionaries with NumPy arrays or array-like objects are supported " - "on ndarray_regression fixture.\n *Object with type '%s' was given." + "on ndarray_regression fixture.\nObject with type '%s' was given." % (str(type(data)),), ): ndarrays_regression.check(data) +def test_structured_array(ndarrays_regression): + data = { + "array": np.array( + [("spam", 1, 3.0), ("egg", 0, 4.3)], + dtype=[("item", "U5"), ("count", "i4"), ("price", "f8")], + ) + } + with pytest.raises(TypeError) as excinfo: + ndarrays_regression.check(data) + obtained_error_msg = str(excinfo.value) + expected = "\n".join( + [ + "Only numeric or unicode data is supported on ndarrays_regression fixture.", + "Array 'array' with type '{}' was given.".format(data["array"].dtype), + ] + ) + assert expected in obtained_error_msg + + def test_new_obtained(ndarrays_regression): # The original array only contains ar1. data1 = {"ar1": np.array([2.3, 9.4]), "ar2": np.array([3, 4, 9])} From f03cdce163ff53d561964ca487c2d7599a4aebc8 Mon Sep 17 00:00:00 2001 From: Toon Verstraelen Date: Wed, 8 Sep 2021 21:10:32 +0200 Subject: [PATCH 4/4] Fix broken tests - Add missing files - Remove unused ones - Facilitate regeneration of all NPZ files. --- tests/test_ndarrays_regression.py | 61 ++++++++++++------ .../test_common_case_zero_expected.npz | Bin 0 -> 218 bytes .../test_different_data_types.npz | Bin 211 -> 211 bytes ...s.npz => test_float_values_smoke_test.npz} | Bin .../test_non_numeric_data_array0_.npz | Bin 226 -> 0 bytes .../test_object_dtype.npz | Bin 404 -> 0 bytes 6 files changed, 41 insertions(+), 20 deletions(-) create mode 100644 tests/test_ndarrays_regression/test_common_case_zero_expected.npz rename tests/test_ndarrays_regression/{test_number_formats.npz => test_float_values_smoke_test.npz} (100%) delete mode 100644 tests/test_ndarrays_regression/test_non_numeric_data_array0_.npz delete mode 100644 tests/test_ndarrays_regression/test_object_dtype.npz diff --git a/tests/test_ndarrays_regression.py b/tests/test_ndarrays_regression.py index db5a319..1fcbb85 100644 --- a/tests/test_ndarrays_regression.py +++ b/tests/test_ndarrays_regression.py @@ -285,13 +285,17 @@ def test_common_case_zero_expected(ndarrays_regression, no_regen): def test_different_data_types(ndarrays_regression, no_regen): - # Original NPZ file contains integer data - data1 = np.array([True] * 10) + # Generate data with integer array. + data = {"data1": np.array([1] * 10)} + ndarrays_regression.check(data) + + # Run check with incompatible type. + data = {"data1": np.array([True] * 10)} with pytest.raises( AssertionError, match="Data types are not the same.\nkey: data1\nObtained: bool\nExpected: int64\n", ): - ndarrays_regression.check({"data1": data1}) + ndarrays_regression.check(data) class Foo: @@ -314,7 +318,7 @@ def test_integer_values_smoke_test(ndarrays_regression, no_regen): ndarrays_regression.check({"data1": data1}) -def test_number_formats(ndarrays_regression): +def test_float_values_smoke_test(ndarrays_regression): data1 = np.array([1.2345678e50, 1.2345678e-50, 0.0]) ndarrays_regression.check({"data1": data1}) @@ -383,21 +387,24 @@ def test_complex_array(ndarrays_regression, no_regen): def test_arrays_of_same_size_1d(ndarrays_regression): - same_size_int_arrays = { + data = { "hello": np.zeros((1,), dtype=int), "world": np.zeros((1,), dtype=int), } - ndarrays_regression.check(same_size_int_arrays) + ndarrays_regression.check(data) def test_arrays_with_different_sizes_1d(ndarrays_regression, no_regen): + data = {"data1": np.ones(11, dtype=np.float64)} + ndarrays_regression.check(data) + # Original NPY file contains 11 elements. - data1 = np.ones(10, dtype=np.float64) + data = {"data1": np.ones(10, dtype=np.float64)} expected = re.escape( "Shapes are not the same.\nKey: data1\nObtained: (10,)\nExpected: (11,)\n" ) with pytest.raises(AssertionError, match=expected): - ndarrays_regression.check({"data1": data1}) + ndarrays_regression.check(data) def test_arrays_of_same_shape(ndarrays_regression): @@ -410,12 +417,14 @@ def test_arrays_of_same_shape(ndarrays_regression): def test_arrays_with_different_shapes(ndarrays_regression): - same_size_int_arrays = { - # Originally with shape (3, 4) - "2d": np.zeros((3, 2), dtype=int), - } + # Prepare data with one shape. + data = {"2d": np.zeros((3, 4), dtype=int)} + ndarrays_regression.check(data) + + # Check with other shape. + data = {"2d": np.zeros((3, 2), dtype=int)} with pytest.raises(AssertionError) as excinfo: - ndarrays_regression.check(same_size_int_arrays) + ndarrays_regression.check(data) obtained_error_msg = str(excinfo.value) expected = "\n".join( [ @@ -429,9 +438,11 @@ def test_arrays_with_different_shapes(ndarrays_regression): def test_scalars(ndarrays_regression): + # Initial data with scalars. data = {"data1": 4.0, "data2": 42} ndarrays_regression.check(data) + # Run check with non-scalar data. data = {"data1": np.array([4.0]), "data2": np.array([42, 21])} with pytest.raises(AssertionError) as excinfo: ndarrays_regression.check(data) @@ -446,6 +457,7 @@ def test_scalars(ndarrays_regression): ) assert expected in obtained_error_msg + # Other test case. data = {"data1": 5.0, "data2": 21} with pytest.raises(AssertionError) as excinfo: ndarrays_regression.check(data) @@ -475,10 +487,11 @@ def test_scalars(ndarrays_regression): def test_string_array(ndarrays_regression): + # Initial data. data1 = {"potato": ["delicious", "nutritive", "yummy"]} ndarrays_regression.check(data1) - # Try wrong data + # Run check with wrong data. data1 = {"potato": ["delicious", "nutritive", "yikes"]} with pytest.raises(AssertionError) as excinfo: ndarrays_regression.check(data1) @@ -545,10 +558,14 @@ def test_structured_array(ndarrays_regression): def test_new_obtained(ndarrays_regression): - # The original array only contains ar1. - data1 = {"ar1": np.array([2.3, 9.4]), "ar2": np.array([3, 4, 9])} + # Prepare data with one array. + data = {"ar1": np.array([2.3, 9.4])} + ndarrays_regression.check(data) + + # Run check with two arrays. + data = {"ar1": np.array([2.3, 9.4]), "ar2": np.array([3, 4, 9])} with pytest.raises(AssertionError) as excinfo: - ndarrays_regression.check(data1) + ndarrays_regression.check(data) obtained_error_msg = str(excinfo.value) expected = "\n".join( [ @@ -562,10 +579,14 @@ def test_new_obtained(ndarrays_regression): def test_missing_obtained(ndarrays_regression): - # The original array also contains ar2. - data1 = {"ar1": np.array([2.3, 9.4])} # , "ar2": np.array([3, 4, 9])} + # Prepare data with two arrays. + data = {"ar1": np.array([2.3, 9.4]), "ar2": np.array([3, 4, 9])} + ndarrays_regression.check(data) + + # Run check with just one array. + data = {"ar1": np.array([2.3, 9.4])} with pytest.raises(AssertionError) as excinfo: - ndarrays_regression.check(data1) + ndarrays_regression.check(data) obtained_error_msg = str(excinfo.value) expected = "\n".join( [ diff --git a/tests/test_ndarrays_regression/test_common_case_zero_expected.npz b/tests/test_ndarrays_regression/test_common_case_zero_expected.npz new file mode 100644 index 0000000000000000000000000000000000000000..87d95f47e6430700e4a6a8a99fd74357efb0e402 GIT binary patch literal 218 zcmWIWW@Zs#U|`??Vnv3m%$F)bK-K{u=423INJ%V7G}Oy0sAOai01E=e89)j^VD=mF zSJDC}PXrtbSUYc0)SQ?lc}o}MP0n1nZt;S7)8a#drpyqZA3tRhmr%L)8BIUdF7b2~ zu_a7DrNpdeS#fRU5@FC|GdaMrbmqk)kr#iw36Nq4@MdHZVaDYgh^-*9fe}Q)9T(uu R$_5f(1VVEl-3#I{004_LH1Plc literal 0 HcmV?d00001 diff --git a/tests/test_ndarrays_regression/test_different_data_types.npz b/tests/test_ndarrays_regression/test_different_data_types.npz index 1c2a833232e1f84da50398432eea3665a42859da..2bca5eecdab1694efa46f63f16151babcffd8fa3 100644 GIT binary patch delta 39 qcmcc2c$rZsz?+#xgn@y91BewFmh_#jn<$ja=r*xIj0Mb@WeEVg7YlX( delta 39 qcmcc2c$rZsz?+#xgn@y91BewF`13s{O%%#y44c>>#scQdvIGF14+%m5 diff --git a/tests/test_ndarrays_regression/test_number_formats.npz b/tests/test_ndarrays_regression/test_float_values_smoke_test.npz similarity index 100% rename from tests/test_ndarrays_regression/test_number_formats.npz rename to tests/test_ndarrays_regression/test_float_values_smoke_test.npz diff --git a/tests/test_ndarrays_regression/test_non_numeric_data_array0_.npz b/tests/test_ndarrays_regression/test_non_numeric_data_array0_.npz deleted file mode 100644 index 31b11cfdad6994b4ae8d6119d1f459eb866e9da8..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 226 zcmWIWW@Zs#U|`??Vnv2c#};Zu0a*(e85lSjL>N*MOA-zB@(L;$83e$BKsg4G0uY$} zM*Nkuz{wK<#{$;Qn-n!CW=Y=C1$mP*7p_~pVBWO&kf13u#OKFPnZzYj?tMnnkF`rY zT}5mO(@&{1E3ULzR$N=TQkeA=**G4wUiuRd#qHJYoT;f{d+7=@Lx49UlL#{|XF+TS bkqtm+G9Vlo;LXYg5?};Eb09qx#9;scsFF7m diff --git a/tests/test_ndarrays_regression/test_object_dtype.npz b/tests/test_ndarrays_regression/test_object_dtype.npz deleted file mode 100644 index a0ce5a2c194c986139db6b202e6dea5e933199d6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 404 zcmWIWW@Zs#U|`??Vnv3+x2kMBj0_AlK+MS?!jO_!l4z)xS5V2wAOIEws$u{s0D+}_ zzTSr&1lr6!|3=xr{%*K^i9pdS$2}DSRRxAqn*{iU$|heGxs{VvuFN8R!Fi?a@8;6% z&-`-^ANYLc^qCs>{_p($VpY!XC`CW&=Ukc@xHI9DTY&#BCl zyubf05Z}@Jd;TGLzo?MQoA`niD=pTlRrxGkV(O>j@49fynxJ0BwZS`1Tnpa7F{`;Y z+ps|Fg5<$Af1OJ+52s0=DTyuOiQd>NAQgA;+ox9FH_VLoY#F4@g(ft;~TBB#TD?P#35$9s*K zzqEyM#`cPP2U;E74c9%K)Ve>wn~_O`8CO6;!V5$;fWrwt^#