diff --git a/src/pytest_regressions/dataframe_regression.py b/src/pytest_regressions/dataframe_regression.py index 37fde1d..bfe4617 100644 --- a/src/pytest_regressions/dataframe_regression.py +++ b/src/pytest_regressions/dataframe_regression.py @@ -42,7 +42,7 @@ def _check_data_types(self, key, obtained_column, expected_column): try: import numpy as np except ModuleNotFoundError: - raise ModuleNotFoundError(import_error_message("Numpy")) + raise ModuleNotFoundError(import_error_message("NumPy")) __tracebackhide__ = True obtained_data_type = obtained_column.values.dtype @@ -89,7 +89,7 @@ def _check_fn(self, obtained_filename, expected_filename): try: import numpy as np except ModuleNotFoundError: - raise ModuleNotFoundError(import_error_message("Numpy")) + raise ModuleNotFoundError(import_error_message("NumPy")) try: import pandas as pd except ModuleNotFoundError: @@ -123,8 +123,7 @@ def _check_fn(self, obtained_filename, expected_filename): self._check_data_types(k, obtained_column, expected_column) self._check_data_shapes(obtained_column, expected_column) - data_type = obtained_column.values.dtype - if data_type in [float, np.float16, np.float32, np.float64]: + if np.issubdtype(obtained_column.values.dtype, np.inexact): not_close_mask = ~np.isclose( obtained_column.values, expected_column.values, @@ -138,7 +137,7 @@ def _check_fn(self, obtained_filename, expected_filename): diff_ids = np.where(not_close_mask)[0] diff_obtained_data = obtained_column[diff_ids] diff_expected_data = expected_column[diff_ids] - if data_type == bool: + if obtained_column.values.dtype == bool: diffs = np.logical_xor(obtained_column, expected_column)[diff_ids] else: diffs = np.abs(obtained_column - expected_column)[diff_ids] @@ -199,7 +198,7 @@ def check( will ignore embed_data completely, being useful if a reference file is located in the session data dir for example. - :param dict tolerances: dict mapping keys from the data_dict to tolerance settings for the + :param dict tolerances: dict mapping keys from the data_frame to tolerance settings for the given data. Example:: tolerances={'U': Tolerance(atol=1e-2)} @@ -223,7 +222,7 @@ def check( __tracebackhide__ = True assert type(data_frame) is pd.DataFrame, ( - "Only pandas DataFrames are supported on on dataframe_regression fixture.\n" + "Only pandas DataFrames are supported on dataframe_regression fixture.\n" "Object with type '%s' was given." % (str(type(data_frame)),) ) @@ -235,7 +234,7 @@ def check( # Rejected: timedelta, datetime, objects, zero-terminated bytes, unicode strings and raw data assert array.dtype not in ["m", "M", "O", "S", "a", "U", "V"], ( "Only numeric data is supported on dataframe_regression fixture.\n" - "Array with type '%s' was given.\n" % (str(array.dtype),) + "Array with type '%s' was given." % (str(array.dtype),) ) if tolerances is None: diff --git a/src/pytest_regressions/ndarrays_regression.py b/src/pytest_regressions/ndarrays_regression.py new file mode 100644 index 0000000..4f74417 --- /dev/null +++ b/src/pytest_regressions/ndarrays_regression.py @@ -0,0 +1,352 @@ +from pytest_regressions.common import perform_regression_check, import_error_message + + +class NDArraysRegressionFixture: + """ + NumPy NPZ regression fixture implementation used on ndarrays_regression fixture. + """ + + THRESHOLD = 100 + ROWFORMAT = "{:>15s} {:>20s} {:>20s} {:>20s}\n" + + def __init__(self, datadir, original_datadir, request): + """ + :type datadir: Path + :type original_datadir: Path + :type request: FixtureRequest + """ + self._tolerances_dict = {} + self._default_tolerance = {} + + self.request = request + self.datadir = datadir + self.original_datadir = original_datadir + self._force_regen = False + self._with_test_class_names = False + + def _check_data_types(self, key, obtained_array, expected_array): + """ + Check if data type of obtained and expected arrays are the same. Fail if not. + Helper method used in _check_fn method. + """ + try: + import numpy as np + except ModuleNotFoundError: + raise ModuleNotFoundError(import_error_message("NumPy")) + + __tracebackhide__ = True + + if obtained_array.dtype != expected_array.dtype: + # Check if both data types are comparable as numbers (float, int, short, bytes, etc...) + if np.issubdtype(obtained_array.dtype, np.number) and np.issubdtype( + expected_array.dtype, np.number + ): + return + # Check if both data types are comparable as strings + if np.issubdtype(obtained_array.dtype, str) and np.issubdtype( + expected_array.dtype, str + ): + return + + # In case they are not, assume they are not comparable + error_msg = ( + "Data types are not the same.\n" + f"key: {key}\n" + f"Obtained: {obtained_array.dtype}\n" + f"Expected: {expected_array.dtype}\n" + ) + raise AssertionError(error_msg) + + def _check_data_shapes(self, key, obtained_array, expected_array): + """ + Check if obtained and expected arrays have the same size. + Helper method used in _check_fn method. + """ + __tracebackhide__ = True + + if obtained_array.shape != expected_array.shape: + error_msg = ( + "Shapes are not the same.\n" + f"Key: {key}\n" + f"Obtained: {obtained_array.shape}\n" + f"Expected: {expected_array.shape}\n" + ) + raise AssertionError(error_msg) + + def _check_fn(self, obtained_filename, expected_filename): + """ + Check if dict contents dumped to a file match the contents in expected file. + + :param str obtained_filename: + :param str expected_filename: + """ + try: + import numpy as np + except ModuleNotFoundError: + raise ModuleNotFoundError(import_error_message("NumPy")) + + __tracebackhide__ = True + + # Turn result of np.load into a dictionary, such that the files are closed immediately. + obtained_data = dict(np.load(str(obtained_filename))) + expected_data = dict(np.load(str(expected_filename))) + + # Check mismatches in the keys. + if set(obtained_data) != set(expected_data): + error_msg = ( + "They keys in the obtained results differ from the expected results.\n" + ) + error_msg += " Matching keys: " + error_msg += str(list(set(obtained_data) & set(expected_data))) + error_msg += "\n" + error_msg += " New in obtained: " + error_msg += str(list(set(obtained_data) - set(expected_data))) + error_msg += "\n" + error_msg += " Missing from obtained: " + error_msg += str(list(set(expected_data) - set(obtained_data))) + error_msg += "\n" + error_msg += "To update values, use --force-regen option.\n\n" + raise AssertionError(error_msg) + + # Compare the contents of the arrays. + comparison_tables_dict = {} + for k, obtained_array in obtained_data.items(): + expected_array = expected_data.get(k) + tolerance_args = self._tolerances_dict.get(k, self._default_tolerance) + + self._check_data_types(k, obtained_array, expected_array) + self._check_data_shapes(k, obtained_array, expected_array) + + if np.issubdtype(obtained_array.dtype, np.inexact): + not_close_mask = ~np.isclose( + obtained_array, + expected_array, + equal_nan=True, + **tolerance_args, + ) + else: + not_close_mask = obtained_array != expected_array + + if np.any(not_close_mask): + if not_close_mask.ndim == 0: + diff_ids = [()] + else: + diff_ids = np.array(np.nonzero(not_close_mask)).T + comparison_tables_dict[k] = ( + expected_array.size, + expected_array.shape, + diff_ids, + obtained_array[not_close_mask], + expected_array[not_close_mask], + ) + + if len(comparison_tables_dict) > 0: + error_msg = "Values are not sufficiently close.\n" + error_msg += "To update values, use --force-regen option.\n\n" + for k, ( + size, + shape, + diff_ids, + obtained_array, + expected_array, + ) in comparison_tables_dict.items(): + # Summary + error_msg += f"{k}:\n Shape: {shape}\n" + pct = 100 * len(diff_ids) / size + error_msg += ( + f" Number of differences: {len(diff_ids)} / {size} ({pct:.1f}%)\n" + ) + if np.issubdtype(obtained_array.dtype, np.number) and len(diff_ids) > 1: + error_msg += ( + " Statistics are computed for differing elements only.\n" + ) + + abs_errors = abs(obtained_array - expected_array) + error_msg += " Stats for abs(obtained - expected):\n" + error_msg += f" Max: {abs_errors.max()}\n" + error_msg += f" Mean: {abs_errors.mean()}\n" + error_msg += f" Median: {np.median(abs_errors)}\n" + + expected_nonzero = np.array(np.nonzero(expected_array)).T + rel_errors = abs( + ( + obtained_array[expected_nonzero] + - expected_array[expected_nonzero] + ) + / expected_array[expected_nonzero] + ) + if len(rel_errors) == 0: + error_msg += " Relative errors are not reported because all expected values are zero.\n" + else: + error_msg += ( + f" Stats for abs(obtained - expected) / abs(expected):\n" + ) + if len(rel_errors) != len(abs_errors): + pct = 100 * len(rel_errors) / len(abs_errors) + error_msg += f" Number of (differing) non-zero expected results: {len(rel_errors)} / {len(abs_errors)} ({pct:.1f}%)\n" + error_msg += f" Relative errors are computed for the non-zero expected results.\n" + else: + rel_errors = abs( + (obtained_array - expected_array) / expected_array + ) + error_msg += f" Max: {rel_errors.max()}\n" + error_msg += f" Mean: {rel_errors.mean()}\n" + error_msg += f" Median: {np.median(rel_errors)}\n" + + # Details results + error_msg += " Individual errors:\n" + if len(diff_ids) > self.THRESHOLD: + error_msg += ( + f" Only showing first {self.THRESHOLD} mismatches.\n" + ) + diff_ids = diff_ids[: self.THRESHOLD] + obtained_array = obtained_array[: self.THRESHOLD] + expected_array = expected_array[: self.THRESHOLD] + error_msg += self.ROWFORMAT.format( + "Index", + "Obtained", + "Expected", + "Difference", + ) + for diff_id, obtained, expected in zip( + diff_ids, obtained_array, expected_array + ): + diff_id_str = ", ".join(str(i) for i in diff_id) + if len(diff_id) != 1: + diff_id_str = f"({diff_id_str})" + error_msg += self.ROWFORMAT.format( + diff_id_str, + str(obtained), + str(expected), + str(obtained - expected) + if isinstance(obtained, np.number) + else "", + ) + error_msg += "\n" + raise AssertionError(error_msg) + + def _dump_fn(self, data_object, filename): + """ + Dump dict contents to the given filename + + :param Dict[str, np.ndarray] data_object: + :param str filename: + """ + try: + import numpy as np + except ModuleNotFoundError: + raise ModuleNotFoundError(import_error_message("NumPy")) + np.savez_compressed(str(filename), **data_object) + + def check( + self, + data_dict, + basename=None, + fullpath=None, + tolerances=None, + default_tolerance=None, + ): + """ + Checks a dictionary of NumPy ndarrays, containing only numeric data, against a previously recorded version, or generate a new file. + + Example:: + + def test_some_data(ndarrays_regression): + points, values = some_function() + ndarrays_regression.check( + { + 'points': points, # array with shape (100, 3) + 'values': values, # array with shape (100,) + }, + default_tolerance=dict(atol=1e-8, rtol=1e-8) + ) + + :param Dict[str, numpy.ndarray] data_dict: dictionary of NumPy ndarrays containing + data for regression check. The arrays can have any shape. + + :param str basename: basename of the file to test/record. If not given the name + of the test is used. + + :param str fullpath: complete path to use as a reference file. This option + will ignore embed_data completely, being useful if a reference file is located + in the session data dir for example. + + :param dict tolerances: dict mapping keys from the data_dict to tolerance settings + for the given data. Example:: + + tolerances={'U': Tolerance(atol=1e-2)} + + :param dict default_tolerance: dict mapping the default tolerance for the current + check call. Example:: + + default_tolerance=dict(atol=1e-7, rtol=1e-18). + + If not provided, will use defaults from numpy's ``isclose`` function. + + ``basename`` and ``fullpath`` are exclusive. + """ + try: + import numpy as np + except ModuleNotFoundError: + raise ModuleNotFoundError(import_error_message("NumPy")) + + import functools + + __tracebackhide__ = True + + if not isinstance(data_dict, dict): + raise TypeError( + "Only dictionaries with NumPy arrays or array-like objects are " + "supported on ndarray_regression fixture.\n" + "Object with type '{}' was given.".format(str(type(data_dict))) + ) + for key, array in data_dict.items(): + assert isinstance( + key, str + ), "The dictionary keys must be strings. " "Found key with type '%s'" % ( + str(type(key)) + ) + data_dict[key] = np.asarray(array) + + for key, array in data_dict.items(): + # Accepted: + # - b: boolean + # - i: signed integer + # - u: unsigned integer + # - f: floating-point number + # - c: complex floating-point number + # - U: unicode string + # Rejected: + # - m: timedelta + # - M: datetime + # - O: objects + # - S: zero-terminated bytes + # - V: void (raw data, structured arrays) + if array.dtype.kind not in ["b", "i", "u", "f", "c", "U"]: + raise TypeError( + "Only numeric or unicode data is supported on ndarrays_regression " + f"fixture.\nArray '{key}' with type '{array.dtype}' was given." + ) + + if tolerances is None: + tolerances = {} + self._tolerances_dict = tolerances + + if default_tolerance is None: + default_tolerance = {} + self._default_tolerance = default_tolerance + + dump_fn = functools.partial(self._dump_fn, data_dict) + + perform_regression_check( + datadir=self.datadir, + original_datadir=self.original_datadir, + request=self.request, + check_fn=self._check_fn, + dump_fn=dump_fn, + extension=".npz", + basename=basename, + fullpath=fullpath, + force_regen=self._force_regen, + with_test_class_names=self._with_test_class_names, + ) diff --git a/src/pytest_regressions/plugin.py b/src/pytest_regressions/plugin.py index db136b6..3c5d828 100644 --- a/src/pytest_regressions/plugin.py +++ b/src/pytest_regressions/plugin.py @@ -51,7 +51,7 @@ def dataframe_regression(datadir, original_datadir, request): """ Example usage: - def testSomeData(num_regression): + def testSomeData(dataframe_regression): dataframe_regression.check( pandas.DataFrame.from_dict( { @@ -75,6 +75,35 @@ def testSomeData(num_regression): return DataFrameRegressionFixture(datadir, original_datadir, request) +@pytest.fixture +def ndarrays_regression(datadir, original_datadir, request): + """ + Similar to num_regression, but supports numpy arrays with arbitrary shape. The + dictionary is stored as an NPZ file. The values of the dictionary must be accepted + by ``np.asarray``. + + Example:: + + def test_some_data(ndarrays_regression): + points, values = some_function() + ndarrays_regression.check( + { + 'points': points, # array with shape (100, 3) + 'values': values, # array with shape (100,) + }, + default_tolerance=dict(atol=1e-8, rtol=1e-8) + ) + + :type embed_data: _EmbedDataFixture + :type request: FixtureRequest + :rtype: DataRegressionFixture + :return: Data regression fixture. + """ + from .ndarrays_regression import NDArraysRegressionFixture + + return NDArraysRegressionFixture(datadir, original_datadir, request) + + @pytest.fixture def file_regression(datadir, original_datadir, request): """ diff --git a/tests/test_dataframe_regression.py b/tests/test_dataframe_regression.py index ed2ddeb..687c000 100644 --- a/tests/test_dataframe_regression.py +++ b/tests/test_dataframe_regression.py @@ -179,7 +179,7 @@ def test_non_numeric_data(dataframe_regression, array, no_regen): with pytest.raises( AssertionError, match="Only numeric data is supported on dataframe_regression fixture.\n" - " Array with type '%s' was given." % (str(data1["data1"].dtype),), + " *Array with type '%s' was given." % (str(data1["data1"].dtype),), ): dataframe_regression.check(data1) @@ -238,12 +238,17 @@ def test_string_array(dataframe_regression): data1 = {"potato": ["delicious", "nutritive", "yummy"]} dataframe_regression.check(pd.DataFrame.from_dict(data1)) + # TODO: The following fails with a confusing error message. + # Try wrong data + # data1 = {"potato": ["delicious", "nutritive", "yikes"]} + # dataframe_regression.check(pd.DataFrame.from_dict(data1)) + def test_non_pandas_dataframe(dataframe_regression): data = np.ones(shape=(10, 10)) with pytest.raises( AssertionError, - match="Only pandas DataFrames are supported on on dataframe_regression fixture.\n" - " Object with type '%s' was given." % (str(type(data)),), + match="Only pandas DataFrames are supported on dataframe_regression fixture.\n" + " *Object with type '%s' was given." % (str(type(data)),), ): dataframe_regression.check(data) diff --git a/tests/test_ndarrays_regression.py b/tests/test_ndarrays_regression.py new file mode 100644 index 0000000..1fcbb85 --- /dev/null +++ b/tests/test_ndarrays_regression.py @@ -0,0 +1,599 @@ +import re + +import numpy as np +import pytest + +from pytest_regressions.testing import check_regression_fixture_workflow + + +@pytest.fixture +def no_regen(ndarrays_regression, request): + if ndarrays_regression._force_regen or request.config.getoption("force_regen"): + pytest.fail("--force-regen should not be used on this test.") + + +def test_usage_workflow(testdir, monkeypatch): + """ + :type testdir: _pytest.pytester.TmpTestdir + + :type monkeypatch: _pytest.monkeypatch.monkeypatch + """ + + import sys + + monkeypatch.setattr( + sys, "testing_get_data", lambda: {"data": 1.1 * np.ones(50)}, raising=False + ) + source = """ + import sys + def test_1(ndarrays_regression): + contents = sys.testing_get_data() + ndarrays_regression.check(contents) + """ + + def get_npz_contents(): + filename = testdir.tmpdir / "test_file" / "test_1.npz" + return dict(np.load(str(filename))) + + def compare_arrays(obtained, expected): + assert (obtained["data"] == expected["data"]).all() + + check_regression_fixture_workflow( + testdir, + source=source, + data_getter=get_npz_contents, + data_modifier=lambda: monkeypatch.setattr( + sys, "testing_get_data", lambda: {"data": 1.2 * np.ones(50)}, raising=False + ), + expected_data_1={"data": 1.1 * np.ones(50)}, + expected_data_2={"data": 1.2 * np.ones(50)}, + compare_fn=compare_arrays, + ) + + +def test_common_case(ndarrays_regression, no_regen): + # Most common case: Data is valid, is present and should pass + data1 = np.full(5000, 1.1, dtype=float) + data2 = np.arange(5000, dtype=int) + ndarrays_regression.check({"data1": data1, "data2": data2}) + + # Assertion error case 1: Data has one invalid place + data1 = np.full(5000, 1.1, dtype=float) + data2 = np.arange(5000, dtype=int) + data1[500] += 0.1 + with pytest.raises(AssertionError) as excinfo: + ndarrays_regression.check({"data1": data1, "data2": data2}) + obtained_error_msg = str(excinfo.value) + expected = "\n".join( + [ + "Values are not sufficiently close.", + "To update values, use --force-regen option.", + ] + ) + assert expected in obtained_error_msg + expected = "\n".join( + [ + "data1:", + " Shape: (5000,)", + " Number of differences: 1 / 5000 (0.0%)", + " Individual errors:", + " Index Obtained Expected Difference", + " 500 1.2000000000000002 1.1 0.10000000000000009", + ] + ) + assert expected in obtained_error_msg + + # Assertion error case 2: More than one invalid data + data1 = np.full(5000, 1.1, dtype=float) + data2 = np.arange(5000, dtype=int) + data1[500] += 0.1 + data1[600] += 0.2 + data2[0] += 5 + data2[700:900] += 5 + with pytest.raises(AssertionError) as excinfo: + ndarrays_regression.check({"data1": data1, "data2": data2}) + obtained_error_msg = str(excinfo.value) + expected = "\n".join( + [ + "Values are not sufficiently close.", + "To update values, use --force-regen option.", + ] + ) + assert expected in obtained_error_msg + expected = "\n".join( + [ + "data1:", + " Shape: (5000,)", + " Number of differences: 2 / 5000 (0.0%)", + " Statistics are computed for differing elements only.", + " Stats for abs(obtained - expected):", + " Max: 0.19999999999999996", + " Mean: 0.15000000000000002", + " Median: 0.15000000000000002", + " Stats for abs(obtained - expected) / abs(expected):", + " Max: 0.18181818181818177", + " Mean: 0.13636363636363638", + " Median: 0.13636363636363638", + " Individual errors:", + " Index Obtained Expected Difference", + " 500 1.2000000000000002 1.1 0.10000000000000009", + " 600 1.3 1.1 0.19999999999999996", + ] + ) + assert expected in obtained_error_msg + expected = "\n".join( + [ + "data2:", + " Shape: (5000,)", + " Number of differences: 201 / 5000 (4.0%)", + " Statistics are computed for differing elements only.", + " Stats for abs(obtained - expected):", + " Max: 5", + " Mean: 5.0", + " Median: 5.0", + " Stats for abs(obtained - expected) / abs(expected):", + " Number of (differing) non-zero expected results: 200 / 201 (99.5%)", + " Relative errors are computed for the non-zero expected results.", + " Max: 0.007142857142857143", + " Mean: 0.006286830640674575", + " Median: 0.006253911138923655", + " Individual errors:", + " Only showing first 100 mismatches.", + " Index Obtained Expected Difference", + " 0 5 0 5", + " 700 705 700 5", + " 701 706 701 5", + ] + ) + assert expected in obtained_error_msg + + +def test_common_case_nd(ndarrays_regression, no_regen): + # Most common case: Data is valid, is present and should pass + data1 = np.full((50, 20), 1.1, dtype=float) + data2 = np.arange(60, dtype=int).reshape((3, 4, 5)) + ndarrays_regression.check({"data1": data1, "data2": data2}) + + # Assertion error case 1: Data has one invalid place + data1 = np.full((50, 20), 1.1, dtype=float) + data2 = np.arange(60, dtype=int).reshape((3, 4, 5)) + data1[30, 2] += 0.1 + with pytest.raises(AssertionError) as excinfo: + ndarrays_regression.check({"data1": data1, "data2": data2}) + obtained_error_msg = str(excinfo.value) + expected = "\n".join( + [ + "Values are not sufficiently close.", + "To update values, use --force-regen option.", + ] + ) + assert expected in obtained_error_msg + expected = "\n".join( + [ + "data1:", + " Shape: (50, 20)", + " Number of differences: 1 / 1000 (0.1%)", + " Individual errors:", + " Index Obtained Expected Difference", + " (30, 2) 1.2000000000000002 1.1 0.10000000000000009", + ] + ) + assert expected in obtained_error_msg + + # Assertion error case 2: More than one invalid data + data1 = np.full((50, 20), 1.1, dtype=float) + data2 = np.arange(60, dtype=int).reshape((3, 4, 5)) + data1[20, 15] += 0.1 + data1[0, 9] = 1.43248324e35 + data2[:2, 0, [0, 2, 4]] += 71 + with pytest.raises(AssertionError) as excinfo: + ndarrays_regression.check({"data1": data1, "data2": data2}) + obtained_error_msg = str(excinfo.value) + expected = "\n".join( + [ + "Values are not sufficiently close.", + "To update values, use --force-regen option.", + ] + ) + assert expected in obtained_error_msg + expected = "\n".join( + [ + "data1:", + " Shape: (50, 20)", + " Number of differences: 2 / 1000 (0.2%)", + " Statistics are computed for differing elements only.", + " Stats for abs(obtained - expected):", + " Max: 1.43248324e+35", + " Mean: 7.1624162e+34", + " Median: 7.1624162e+34", + " Stats for abs(obtained - expected) / abs(expected):", + " Max: 1.3022574909090907e+35", + " Mean: 6.511287454545454e+34", + " Median: 6.511287454545454e+34", + " Individual errors:", + " Index Obtained Expected Difference", + " (0, 9) 1.43248324e+35 1.1 1.43248324e+35", + " (20, 15) 1.2000000000000002 1.1 0.10000000000000009", + ] + ) + assert expected in obtained_error_msg + expected = "\n".join( + [ + "data2:", + " Shape: (3, 4, 5)", + " Number of differences: 6 / 60 (10.0%)", + " Statistics are computed for differing elements only.", + " Stats for abs(obtained - expected):", + " Max: 71", + " Mean: 71.0", + " Median: 71.0", + " Stats for abs(obtained - expected) / abs(expected):", + " Number of (differing) non-zero expected results: 5 / 6 (83.3%)", + " Relative errors are computed for the non-zero expected results.", + " Max: 35.5", + " Mean: 12.597121212121213", + " Median: 3.55", + " Individual errors:", + " Index Obtained Expected Difference", + " (0, 0, 0) 71 0 71", + " (0, 0, 2) 73 2 71", + " (0, 0, 4) 75 4 71", + " (1, 0, 0) 91 20 71", + " (1, 0, 2) 93 22 71", + " (1, 0, 4) 95 24 71", + ] + ) + assert expected in obtained_error_msg + + +def test_common_case_zero_expected(ndarrays_regression, no_regen): + # Most common case: Data is valid, is present and should pass + data = {"data1": np.array([0, 0, 2, 3, 0, 5, 0, 7])} + ndarrays_regression.check(data) + + # Assertion error case: Only some zeros are not reproduced. + data = {"data1": np.array([1, 5, 2, 3, 0, 5, 3, 7])} + with pytest.raises(AssertionError) as excinfo: + ndarrays_regression.check(data) + obtained_error_msg = str(excinfo.value) + expected = "\n".join( + [ + "Values are not sufficiently close.", + "To update values, use --force-regen option.", + ] + ) + assert expected in obtained_error_msg + expected = "\n".join( + [ + "data1:", + " Shape: (8,)", + " Number of differences: 3 / 8 (37.5%)", + " Statistics are computed for differing elements only.", + " Stats for abs(obtained - expected):", + " Max: 5", + " Mean: 3.0", + " Median: 3.0", + " Relative errors are not reported because all expected values are zero.", + " Individual errors:", + " Index Obtained Expected Difference", + " 0 1 0 1", + " 1 5 0 5", + " 6 3 0 3", + ] + ) + assert expected in obtained_error_msg + + +def test_different_data_types(ndarrays_regression, no_regen): + # Generate data with integer array. + data = {"data1": np.array([1] * 10)} + ndarrays_regression.check(data) + + # Run check with incompatible type. + data = {"data1": np.array([True] * 10)} + with pytest.raises( + AssertionError, + match="Data types are not the same.\nkey: data1\nObtained: bool\nExpected: int64\n", + ): + ndarrays_regression.check(data) + + +class Foo: + def __init__(self, bar): + self.bar = bar + + +def test_object_dtype(ndarrays_regression, no_regen): + data1 = {"data1": np.array([Foo(i) for i in range(4)], dtype=object)} + with pytest.raises( + TypeError, + match="Only numeric or unicode data is supported on ndarrays_regression fixture.\n" + "Array 'data1' with type 'object' was given.", + ): + ndarrays_regression.check(data1) + + +def test_integer_values_smoke_test(ndarrays_regression, no_regen): + data1 = np.ones(11, dtype=int) + ndarrays_regression.check({"data1": data1}) + + +def test_float_values_smoke_test(ndarrays_regression): + data1 = np.array([1.2345678e50, 1.2345678e-50, 0.0]) + ndarrays_regression.check({"data1": data1}) + + +def test_bool_array(ndarrays_regression, no_regen): + # Correct data + data1 = np.array([False, False, False], dtype=bool) + ndarrays_regression.check({"data1": data1}) + + # Data with errors + data1 = np.array([True, True, False], dtype=bool) + with pytest.raises(AssertionError) as excinfo: + ndarrays_regression.check({"data1": data1}) + obtained_error_msg = str(excinfo.value) + expected = "\n".join( + [ + "Values are not sufficiently close.", + "To update values, use --force-regen option.", + ] + ) + assert expected in obtained_error_msg + expected = "\n".join( + [ + "data1:", + " Shape: (3,)", + " Number of differences: 2 / 3 (66.7%)", + " Individual errors:", + " Index Obtained Expected Difference", + " 0 True False ", + " 1 True False ", + ] + ) + assert expected in obtained_error_msg + + +def test_complex_array(ndarrays_regression, no_regen): + # Correct data + data1 = np.array([3.0 + 2.5j, -0.5, -1.879j]) + ndarrays_regression.check({"data1": data1}) + + # Data with errors + data1 = np.array([3.0 + 2.5j, 0.5, -1.879]) + with pytest.raises(AssertionError) as excinfo: + ndarrays_regression.check({"data1": data1}) + obtained_error_msg = str(excinfo.value) + expected = "\n".join( + [ + "data1:", + " Shape: (3,)", + " Number of differences: 2 / 3 (66.7%)", + " Statistics are computed for differing elements only.", + " Stats for abs(obtained - expected):", + " Max: 2.6573072836990455", + " Mean: 1.8286536418495227", + " Median: 1.8286536418495227", + " Stats for abs(obtained - expected) / abs(expected):", + " Max: 2.0", + " Mean: 1.7071067811865475", + " Median: 1.7071067811865475", + " Individual errors:", + " Index Obtained Expected Difference", + " 1 (0.5+0j) (-0.5+0j) (1+0j)", + " 2 (-1.879+0j) (-0-1.879j) (-1.879+1.879j)", + ] + ) + + +def test_arrays_of_same_size_1d(ndarrays_regression): + data = { + "hello": np.zeros((1,), dtype=int), + "world": np.zeros((1,), dtype=int), + } + ndarrays_regression.check(data) + + +def test_arrays_with_different_sizes_1d(ndarrays_regression, no_regen): + data = {"data1": np.ones(11, dtype=np.float64)} + ndarrays_regression.check(data) + + # Original NPY file contains 11 elements. + data = {"data1": np.ones(10, dtype=np.float64)} + expected = re.escape( + "Shapes are not the same.\nKey: data1\nObtained: (10,)\nExpected: (11,)\n" + ) + with pytest.raises(AssertionError, match=expected): + ndarrays_regression.check(data) + + +def test_arrays_of_same_shape(ndarrays_regression): + same_size_int_arrays = { + "2d": np.zeros((3, 4), dtype=int), + "3d": np.ones((7, 8, 9), dtype=float), + "4d": np.full((2, 1, 1, 4), 3, dtype=int), + } + ndarrays_regression.check(same_size_int_arrays) + + +def test_arrays_with_different_shapes(ndarrays_regression): + # Prepare data with one shape. + data = {"2d": np.zeros((3, 4), dtype=int)} + ndarrays_regression.check(data) + + # Check with other shape. + data = {"2d": np.zeros((3, 2), dtype=int)} + with pytest.raises(AssertionError) as excinfo: + ndarrays_regression.check(data) + obtained_error_msg = str(excinfo.value) + expected = "\n".join( + [ + "Shapes are not the same.", + "Key: 2d", + "Obtained: (3, 2)", + "Expected: (3, 4)", + ] + ) + assert expected in obtained_error_msg + + +def test_scalars(ndarrays_regression): + # Initial data with scalars. + data = {"data1": 4.0, "data2": 42} + ndarrays_regression.check(data) + + # Run check with non-scalar data. + data = {"data1": np.array([4.0]), "data2": np.array([42, 21])} + with pytest.raises(AssertionError) as excinfo: + ndarrays_regression.check(data) + obtained_error_msg = str(excinfo.value) + expected = "\n".join( + [ + "Shapes are not the same.", + "Key: data1", + "Obtained: (1,)", + "Expected: ()", + ] + ) + assert expected in obtained_error_msg + + # Other test case. + data = {"data1": 5.0, "data2": 21} + with pytest.raises(AssertionError) as excinfo: + ndarrays_regression.check(data) + obtained_error_msg = str(excinfo.value) + expected = "\n".join( + [ + "data1:", + " Shape: ()", + " Number of differences: 1 / 1 (100.0%)", + " Individual errors:", + " Index Obtained Expected Difference", + " () 5.0 4.0 1.0", + ] + ) + assert expected in obtained_error_msg + expected = "\n".join( + [ + "data2:", + " Shape: ()", + " Number of differences: 1 / 1 (100.0%)", + " Individual errors:", + " Index Obtained Expected Difference", + " () 21 42 -21", + ] + ) + assert expected in obtained_error_msg + + +def test_string_array(ndarrays_regression): + # Initial data. + data1 = {"potato": ["delicious", "nutritive", "yummy"]} + ndarrays_regression.check(data1) + + # Run check with wrong data. + data1 = {"potato": ["delicious", "nutritive", "yikes"]} + with pytest.raises(AssertionError) as excinfo: + ndarrays_regression.check(data1) + obtained_error_msg = str(excinfo.value) + expected = "\n".join( + [ + "potato:", + " Shape: (3,)", + " Number of differences: 1 / 3 (33.3%)", + " Individual errors:", + " Index Obtained Expected Difference", + " 2 yikes yummy ", + ] + ) + assert expected in obtained_error_msg + + # Try data with incompatible dtype + data1 = {"potato": ["disgusting", "nutritive", "yikes"]} + with pytest.raises(AssertionError) as excinfo: + ndarrays_regression.check(data1) + obtained_error_msg = str(excinfo.value) + expected = "\n".join( + [ + "potato:", + " Shape: (3,)", + " Number of differences: 2 / 3 (66.7%)", + " Individual errors:", + " Index Obtained Expected Difference", + " 0 disgusting delicious ", + " 2 yikes yummy ", + ] + ) + assert expected in obtained_error_msg + + +def test_non_dict(ndarrays_regression): + data = np.ones(shape=(10, 10)) + with pytest.raises( + TypeError, + match="Only dictionaries with NumPy arrays or array-like objects are supported " + "on ndarray_regression fixture.\nObject with type '%s' was given." + % (str(type(data)),), + ): + ndarrays_regression.check(data) + + +def test_structured_array(ndarrays_regression): + data = { + "array": np.array( + [("spam", 1, 3.0), ("egg", 0, 4.3)], + dtype=[("item", "U5"), ("count", "i4"), ("price", "f8")], + ) + } + with pytest.raises(TypeError) as excinfo: + ndarrays_regression.check(data) + obtained_error_msg = str(excinfo.value) + expected = "\n".join( + [ + "Only numeric or unicode data is supported on ndarrays_regression fixture.", + "Array 'array' with type '{}' was given.".format(data["array"].dtype), + ] + ) + assert expected in obtained_error_msg + + +def test_new_obtained(ndarrays_regression): + # Prepare data with one array. + data = {"ar1": np.array([2.3, 9.4])} + ndarrays_regression.check(data) + + # Run check with two arrays. + data = {"ar1": np.array([2.3, 9.4]), "ar2": np.array([3, 4, 9])} + with pytest.raises(AssertionError) as excinfo: + ndarrays_regression.check(data) + obtained_error_msg = str(excinfo.value) + expected = "\n".join( + [ + "They keys in the obtained results differ from the expected results.", + " Matching keys: ['ar1']", + " New in obtained: ['ar2']", + " Missing from obtained: []", + ] + ) + assert expected in obtained_error_msg + + +def test_missing_obtained(ndarrays_regression): + # Prepare data with two arrays. + data = {"ar1": np.array([2.3, 9.4]), "ar2": np.array([3, 4, 9])} + ndarrays_regression.check(data) + + # Run check with just one array. + data = {"ar1": np.array([2.3, 9.4])} + with pytest.raises(AssertionError) as excinfo: + ndarrays_regression.check(data) + obtained_error_msg = str(excinfo.value) + expected = "\n".join( + [ + "They keys in the obtained results differ from the expected results.", + " Matching keys: ['ar1']", + " New in obtained: []", + " Missing from obtained: ['ar2']", + ] + ) + assert expected in obtained_error_msg diff --git a/tests/test_ndarrays_regression/test_arrays_of_same_shape.npz b/tests/test_ndarrays_regression/test_arrays_of_same_shape.npz new file mode 100644 index 0000000..61d99b0 Binary files /dev/null and b/tests/test_ndarrays_regression/test_arrays_of_same_shape.npz differ diff --git a/tests/test_ndarrays_regression/test_arrays_of_same_size_1d.npz b/tests/test_ndarrays_regression/test_arrays_of_same_size_1d.npz new file mode 100644 index 0000000..e609076 Binary files /dev/null and b/tests/test_ndarrays_regression/test_arrays_of_same_size_1d.npz differ diff --git a/tests/test_ndarrays_regression/test_arrays_with_different_shapes.npz b/tests/test_ndarrays_regression/test_arrays_with_different_shapes.npz new file mode 100644 index 0000000..0daa7dd Binary files /dev/null and b/tests/test_ndarrays_regression/test_arrays_with_different_shapes.npz differ diff --git a/tests/test_ndarrays_regression/test_arrays_with_different_sizes_1d.npz b/tests/test_ndarrays_regression/test_arrays_with_different_sizes_1d.npz new file mode 100644 index 0000000..4ea4fc5 Binary files /dev/null and b/tests/test_ndarrays_regression/test_arrays_with_different_sizes_1d.npz differ diff --git a/tests/test_ndarrays_regression/test_bool_array.npz b/tests/test_ndarrays_regression/test_bool_array.npz new file mode 100644 index 0000000..c2c1f5a Binary files /dev/null and b/tests/test_ndarrays_regression/test_bool_array.npz differ diff --git a/tests/test_ndarrays_regression/test_common_case.npz b/tests/test_ndarrays_regression/test_common_case.npz new file mode 100644 index 0000000..2251dad Binary files /dev/null and b/tests/test_ndarrays_regression/test_common_case.npz differ diff --git a/tests/test_ndarrays_regression/test_common_case_nd.npz b/tests/test_ndarrays_regression/test_common_case_nd.npz new file mode 100644 index 0000000..ddbd379 Binary files /dev/null and b/tests/test_ndarrays_regression/test_common_case_nd.npz differ diff --git a/tests/test_ndarrays_regression/test_common_case_zero_expected.npz b/tests/test_ndarrays_regression/test_common_case_zero_expected.npz new file mode 100644 index 0000000..87d95f4 Binary files /dev/null and b/tests/test_ndarrays_regression/test_common_case_zero_expected.npz differ diff --git a/tests/test_ndarrays_regression/test_complex_array.npz b/tests/test_ndarrays_regression/test_complex_array.npz new file mode 100644 index 0000000..96bfda2 Binary files /dev/null and b/tests/test_ndarrays_regression/test_complex_array.npz differ diff --git a/tests/test_ndarrays_regression/test_different_data_types.npz b/tests/test_ndarrays_regression/test_different_data_types.npz new file mode 100644 index 0000000..2bca5ee Binary files /dev/null and b/tests/test_ndarrays_regression/test_different_data_types.npz differ diff --git a/tests/test_ndarrays_regression/test_float_values_smoke_test.npz b/tests/test_ndarrays_regression/test_float_values_smoke_test.npz new file mode 100644 index 0000000..379d261 Binary files /dev/null and b/tests/test_ndarrays_regression/test_float_values_smoke_test.npz differ diff --git a/tests/test_ndarrays_regression/test_integer_values_smoke_test.npz b/tests/test_ndarrays_regression/test_integer_values_smoke_test.npz new file mode 100644 index 0000000..94df3ae Binary files /dev/null and b/tests/test_ndarrays_regression/test_integer_values_smoke_test.npz differ diff --git a/tests/test_ndarrays_regression/test_missing_obtained.npz b/tests/test_ndarrays_regression/test_missing_obtained.npz new file mode 100644 index 0000000..3c44c64 Binary files /dev/null and b/tests/test_ndarrays_regression/test_missing_obtained.npz differ diff --git a/tests/test_ndarrays_regression/test_new_obtained.npz b/tests/test_ndarrays_regression/test_new_obtained.npz new file mode 100644 index 0000000..ea7acbf Binary files /dev/null and b/tests/test_ndarrays_regression/test_new_obtained.npz differ diff --git a/tests/test_ndarrays_regression/test_scalars.npz b/tests/test_ndarrays_regression/test_scalars.npz new file mode 100644 index 0000000..518fb61 Binary files /dev/null and b/tests/test_ndarrays_regression/test_scalars.npz differ diff --git a/tests/test_ndarrays_regression/test_string_array.npz b/tests/test_ndarrays_regression/test_string_array.npz new file mode 100644 index 0000000..c2fbb22 Binary files /dev/null and b/tests/test_ndarrays_regression/test_string_array.npz differ