diff --git a/dpctl/tensor/CMakeLists.txt b/dpctl/tensor/CMakeLists.txt index de45ee47bd..ca83b8350b 100644 --- a/dpctl/tensor/CMakeLists.txt +++ b/dpctl/tensor/CMakeLists.txt @@ -58,10 +58,15 @@ set_source_files_properties( ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/linear_sequences.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions.cpp PROPERTIES COMPILE_OPTIONS "${_clang_prefix}-fno-fast-math") +if (UNIX) + set_source_files_properties( + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions.cpp + PROPERTIES COMPILE_DEFINITIONS "USE_STD_ABS_FOR_COMPLEX_TYPES;USE_STD_SQRT_FOR_COMPLEX_TYPES") +endif() target_compile_options(${python_module_name} PRIVATE -fno-sycl-id-queries-fit-in-int) target_link_options(${python_module_name} PRIVATE -fsycl-device-code-split=per_kernel) if(UNIX) - # this option is support on Linux only + # this option is supported on Linux only target_link_options(${python_module_name} PRIVATE -fsycl-link-huge-device-code) endif() target_include_directories(${python_module_name} diff --git a/dpctl/tensor/__init__.py b/dpctl/tensor/__init__.py index f1ec439f3a..7438fb8a67 100644 --- a/dpctl/tensor/__init__.py +++ b/dpctl/tensor/__init__.py @@ -158,6 +158,7 @@ trunc, ) from ._reduction import sum +from ._testing import allclose __all__ = [ "Device", @@ -301,4 +302,5 @@ "tan", "tanh", "trunc", + "allclose", ] diff --git a/dpctl/tensor/_testing.py b/dpctl/tensor/_testing.py new file mode 100644 index 0000000000..4d4a73408c --- /dev/null +++ b/dpctl/tensor/_testing.py @@ -0,0 +1,152 @@ +# Data Parallel Control (dpctl) +# +# Copyright 2020-2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np + +import dpctl.tensor as dpt +import dpctl.utils as du + +from ._manipulation_functions import _broadcast_shape_impl +from ._type_utils import _to_device_supported_dtype + + +def _allclose_complex_fp(z1, z2, atol, rtol, equal_nan): + z1r = dpt.real(z1) + z1i = dpt.imag(z1) + z2r = dpt.real(z2) + z2i = dpt.imag(z2) + if equal_nan: + check1 = dpt.all(dpt.isnan(z1r) == dpt.isnan(z2r)) and dpt.all( + dpt.isnan(z1i) == dpt.isnan(z2i) + ) + else: + check1 = ( + dpt.logical_not(dpt.any(dpt.isnan(z1r))) + and dpt.logical_not(dpt.any(dpt.isnan(z1i))) + ) and ( + dpt.logical_not(dpt.any(dpt.isnan(z2r))) + and dpt.logical_not(dpt.any(dpt.isnan(z2i))) + ) + if not check1: + return check1 + mr = dpt.isinf(z1r) + mi = dpt.isinf(z1i) + check2 = dpt.all(mr == dpt.isinf(z2r)) and dpt.all(mi == dpt.isinf(z2i)) + if not check2: + return check2 + check3 = dpt.all(z1r[mr] == z2r[mr]) and dpt.all(z1i[mi] == z2i[mi]) + if not check3: + return check3 + mr = dpt.isfinite(z1r) + mi = dpt.isfinite(z1i) + mv1 = z1r[mr] + mv2 = z2r[mr] + check4 = dpt.all( + dpt.abs(mv1 - mv2) + < dpt.maximum(atol, rtol * dpt.maximum(dpt.abs(mv1), dpt.abs(mv2))) + ) + if not check4: + return check4 + mv1 = z1i[mi] + mv2 = z2i[mi] + check5 = dpt.all( + dpt.abs(mv1 - mv2) + <= dpt.maximum(atol, rtol * dpt.maximum(dpt.abs(mv1), dpt.abs(mv2))) + ) + return check5 + + +def _allclose_real_fp(r1, r2, atol, rtol, equal_nan): + if equal_nan: + check1 = dpt.all(dpt.isnan(r1) == dpt.isnan(r2)) + else: + check1 = dpt.logical_not(dpt.any(dpt.isnan(r1))) and dpt.logical_not( + dpt.any(dpt.isnan(r2)) + ) + if not check1: + return check1 + mr = dpt.isinf(r1) + check2 = dpt.all(mr == dpt.isinf(r2)) + if not check2: + return check2 + check3 = dpt.all(r1[mr] == r2[mr]) + if not check3: + return check3 + m = dpt.isfinite(r1) + mv1 = r1[m] + mv2 = r2[m] + check4 = dpt.all( + dpt.abs(mv1 - mv2) + <= dpt.maximum(atol, rtol * dpt.maximum(dpt.abs(mv1), dpt.abs(mv2))) + ) + return check4 + + +def _allclose_others(r1, r2): + return dpt.all(r1 == r2) + + +def allclose(a1, a2, atol=1e-8, rtol=1e-5, equal_nan=False): + """allclose(a1, a2, atol=1e-8, rtol=1e-5, equal_nan=False) + + Returns True if two arrays are element-wise equal within tolerances. + + The testing is based on the following elementwise comparison: + + abs(a - b) <= max(atol, rtol * max(abs(a), abs(b))) + """ + if not isinstance(a1, dpt.usm_ndarray): + raise TypeError( + f"Expected dpctl.tensor.usm_ndarray type, got {type(a1)}." + ) + if not isinstance(a2, dpt.usm_ndarray): + raise TypeError( + f"Expected dpctl.tensor.usm_ndarray type, got {type(a2)}." + ) + atol = float(atol) + rtol = float(rtol) + if atol < 0.0 or rtol < 0.0: + raise ValueError( + "Absolute and relative tolerances must be non-negative" + ) + equal_nan = bool(equal_nan) + exec_q = du.get_execution_queue(tuple(a.sycl_queue for a in (a1, a2))) + if exec_q is None: + raise du.ExecutionPlacementError( + "Execution placement can not be unambiguously inferred " + "from input arguments." + ) + res_sh = _broadcast_shape_impl([a1.shape, a2.shape]) + b1 = a1 + b2 = a2 + if b1.dtype == b2.dtype: + res_dt = b1.dtype + else: + res_dt = np.promote_types(b1.dtype, b2.dtype) + res_dt = _to_device_supported_dtype(res_dt, exec_q.sycl_device) + b1 = dpt.astype(b1, res_dt) + b2 = dpt.astype(b2, res_dt) + + b1 = dpt.broadcast_to(b1, res_sh) + b2 = dpt.broadcast_to(b2, res_sh) + + k = b1.dtype.kind + if k == "c": + return _allclose_complex_fp(b1, b2, atol, rtol, equal_nan) + elif k == "f": + return _allclose_real_fp(b1, b2, atol, rtol, equal_nan) + else: + return _allclose_others(b1, b2) diff --git a/dpctl/tensor/_usmarray.pxd b/dpctl/tensor/_usmarray.pxd index 603b296103..120ad7a066 100644 --- a/dpctl/tensor/_usmarray.pxd +++ b/dpctl/tensor/_usmarray.pxd @@ -58,7 +58,6 @@ cdef api class usm_ndarray [object PyUSMArrayObject, type PyUSMArrayType]: cdef void _reset(usm_ndarray self) cdef void _cleanup(usm_ndarray self) - cdef usm_ndarray _clone(usm_ndarray self) cdef Py_ssize_t get_offset(usm_ndarray self) except * cdef char* get_data(self) diff --git a/dpctl/tensor/_usmarray.pyx b/dpctl/tensor/_usmarray.pyx index 1336063323..4fc90fbf94 100644 --- a/dpctl/tensor/_usmarray.pyx +++ b/dpctl/tensor/_usmarray.pyx @@ -44,58 +44,6 @@ include "_types.pxi" include "_slicing.pxi" -def _dispatch_unary_elementwise(ary, name): - try: - mod = ary.__array_namespace__() - except AttributeError: - return NotImplemented - if mod is None and "dpnp" in sys.modules: - fn = getattr(sys.modules["dpnp"], name) - if callable(fn): - return fn(ary) - elif hasattr(mod, name): - fn = getattr(mod, name) - if callable(fn): - return fn(ary) - - return NotImplemented - - -def _dispatch_binary_elementwise(ary, name, other): - try: - mod = ary.__array_namespace__() - except AttributeError: - return NotImplemented - if mod is None and "dpnp" in sys.modules: - fn = getattr(sys.modules["dpnp"], name) - if callable(fn): - return fn(ary, other) - elif hasattr(mod, name): - fn = getattr(mod, name) - if callable(fn): - return fn(ary, other) - - return NotImplemented - - -def _dispatch_binary_elementwise2(other, name, ary): - try: - mod = ary.__array_namespace__() - except AttributeError: - return NotImplemented - mod = ary.__array_namespace__() - if mod is None and "dpnp" in sys.modules: - fn = getattr(sys.modules["dpnp"], name) - if callable(fn): - return fn(other, ary) - elif hasattr(mod, name): - fn = getattr(mod, name) - if callable(fn): - return fn(other, ary) - - return NotImplemented - - cdef class InternalUSMArrayError(Exception): """ A InternalError exception is raised when internal @@ -200,28 +148,6 @@ cdef class usm_ndarray: PyMem_Free(self.strides_) self._reset() - cdef usm_ndarray _clone(usm_ndarray self): - """ - Provides a copy of Python object pointing to the same data - """ - cdef Py_ssize_t offset_elems = self.get_offset() - cdef usm_ndarray res = usm_ndarray.__new__( - usm_ndarray, _make_int_tuple(self.nd_, self.shape_), - dtype=_make_typestr(self.typenum_), - strides=( - _make_int_tuple(self.nd_, self.strides_) if (self.strides_) - else None), - buffer=self.base_, - offset=offset_elems, - order=('C' if (self.flags_ & USM_ARRAY_C_CONTIGUOUS) else 'F') - ) - res.flags_ = self.flags_ - res.array_namespace_ = self.array_namespace_ - if (res.data_ != self.data_): - raise InternalUSMArrayError( - "Data pointers of cloned and original objects are different.") - return res - def __cinit__(self, shape, dtype=None, strides=None, buffer='device', Py_ssize_t offset=0, order='C', buffer_ctor_kwargs=dict(), @@ -256,7 +182,10 @@ cdef class usm_ndarray: raise TypeError("Argument shape must be a list or a tuple.") nd = len(shape) if dtype is None: - q = buffer_ctor_kwargs.get("queue") + if isinstance(buffer, (dpmem._memory._Memory, usm_ndarray)): + q = buffer.sycl_queue + else: + q = buffer_ctor_kwargs.get("queue") if q is not None: dtype = default_device_fp_type(q) else: @@ -966,32 +895,17 @@ cdef class usm_ndarray: raise IndexError("only integer arrays are valid indices") def __abs__(self): - return _dispatch_unary_elementwise(self, "abs") + return dpctl.tensor.abs(self) def __add__(first, other): """ - Cython 0.* never calls `__radd__`, always calls `__add__` - but first argument need not be an instance of this class, - so dispatching is needed. - - This changes in Cython 3.0, where first is guaranteed to - be `self`. - - [1] http://docs.cython.org/en/latest/src/userguide/special_methods.html + Implementation for operator.add """ - if isinstance(first, usm_ndarray): - return _dispatch_binary_elementwise(first, "add", other) - elif isinstance(other, usm_ndarray): - return _dispatch_binary_elementwise2(first, "add", other) - return NotImplemented + return dpctl.tensor.add(first, other) def __and__(first, other): - "See comment in __add__" - if isinstance(first, usm_ndarray): - return _dispatch_binary_elementwise(first, "bitwise_and", other) - elif isinstance(other, usm_ndarray): - return _dispatch_binary_elementwise2(first, "bitwise_and", other) - return NotImplemented + "Implementation for operator.and" + return dpctl.tensor.bitwise_and(first, other) def __dlpack__(self, stream=None): """ @@ -1037,27 +951,22 @@ cdef class usm_ndarray: ) def __eq__(self, other): - return _dispatch_binary_elementwise(self, "equal", other) + return dpctl.tensor.equal(self, other) def __floordiv__(first, other): - "See comment in __add__" - if isinstance(first, usm_ndarray): - return _dispatch_binary_elementwise(first, "floor_divide", other) - elif isinstance(other, usm_ndarray): - return _dispatch_binary_elementwise2(first, "floor_divide", other) - return NotImplemented + return dpctl.tensor.floor_divide(first, other) def __ge__(self, other): - return _dispatch_binary_elementwise(self, "greater_equal", other) + return dpctl.tensor.greater_equal(self, other) def __gt__(self, other): - return _dispatch_binary_elementwise(self, "greater", other) + return dpctl.tensor.greater(self, other) def __invert__(self): - return _dispatch_unary_elementwise(self, "bitwise_invert") + return dpctl.tensor.bitwise_invert(self) def __le__(self, other): - return _dispatch_binary_elementwise(self, "less_equal", other) + return dpctl.tensor.less_equal(self, other) def __len__(self): if (self.nd_): @@ -1067,72 +976,40 @@ cdef class usm_ndarray: def __lshift__(first, other): "See comment in __add__" - if isinstance(first, usm_ndarray): - return _dispatch_binary_elementwise(first, "bitwise_left_shift", other) - elif isinstance(other, usm_ndarray): - return _dispatch_binary_elementwise2(first, "bitwise_left_shift", other) - return NotImplemented + return dpctl.tensor.bitwise_left_shift(first, other) def __lt__(self, other): - return _dispatch_binary_elementwise(self, "less", other) + return dpctl.tensor.less(self, other) def __matmul__(first, other): - "See comment in __add__" - if isinstance(first, usm_ndarray): - return _dispatch_binary_elementwise(first, "matmul", other) - elif isinstance(other, usm_ndarray): - return _dispatch_binary_elementwise2(first, "matmul", other) return NotImplemented def __mod__(first, other): - "See comment in __add__" - if isinstance(first, usm_ndarray): - return _dispatch_binary_elementwise(first, "remainder", other) - elif isinstance(other, usm_ndarray): - return _dispatch_binary_elementwise2(first, "remainder", other) - return NotImplemented + return dpctl.tensor.remainder(first, other) def __mul__(first, other): - "See comment in __add__" - if isinstance(first, usm_ndarray): - return _dispatch_binary_elementwise(first, "multiply", other) - elif isinstance(other, usm_ndarray): - return _dispatch_binary_elementwise2(first, "multiply", other) - return NotImplemented + return dpctl.tensor.multiply(first, other) def __ne__(self, other): - return _dispatch_binary_elementwise(self, "not_equal", other) + return dpctl.tensor.not_equal(self, other) def __neg__(self): - return _dispatch_unary_elementwise(self, "negative") + return dpctl.tensor.negative(self) def __or__(first, other): - "See comment in __add__" - if isinstance(first, usm_ndarray): - return _dispatch_binary_elementwise(first, "bitwise_or", other) - elif isinstance(other, usm_ndarray): - return _dispatch_binary_elementwise2(first, "bitwise_or", other) - return NotImplemented + return dpctl.tensor.bitwise_or(first, other) def __pos__(self): - return self # _dispatch_unary_elementwise(self, "positive") + return dpctl.tensor.positive(self) def __pow__(first, other, mod): - "See comment in __add__" if mod is None: - if isinstance(first, usm_ndarray): - return _dispatch_binary_elementwise(first, "pow", other) - elif isinstance(other, usm_ndarray): - return _dispatch_binary_elementwise(first, "pow", other) - return NotImplemented + return dpctl.tensor.pow(first, other) + else: + return NotImplemented def __rshift__(first, other): - "See comment in __add__" - if isinstance(first, usm_ndarray): - return _dispatch_binary_elementwise(first, "bitwise_right_shift", other) - elif isinstance(other, usm_ndarray): - return _dispatch_binary_elementwise2(first, "bitwise_right_shift", other) - return NotImplemented + return dpctl.tensor.bitwise_right_shift(first, other) def __setitem__(self, key, rhs): cdef tuple _meta @@ -1223,67 +1100,52 @@ cdef class usm_ndarray: def __sub__(first, other): - "See comment in __add__" - if isinstance(first, usm_ndarray): - return _dispatch_binary_elementwise(first, "subtract", other) - elif isinstance(other, usm_ndarray): - return _dispatch_binary_elementwise2(first, "subtract", other) - return NotImplemented + return dpctl.tensor.subtract(first, other) def __truediv__(first, other): - "See comment in __add__" - if isinstance(first, usm_ndarray): - return _dispatch_binary_elementwise(first, "divide", other) - elif isinstance(other, usm_ndarray): - return _dispatch_binary_elementwise2(first, "divide", other) - return NotImplemented + return dpctl.tensor.divide(first, other) def __xor__(first, other): - "See comment in __add__" - if isinstance(first, usm_ndarray): - return _dispatch_binary_elementwise(first, "bitwise_xor", other) - elif isinstance(other, usm_ndarray): - return _dispatch_binary_elementwise2(first, "bitwise_xor", other) - return NotImplemented + return dpctl.tensor.bitwise_xor(first, other) def __radd__(self, other): - return _dispatch_binary_elementwise(self, "add", other) + return dpctl.tensor.add(other, self) def __rand__(self, other): - return _dispatch_binary_elementwise(self, "bitwise_and", other) + return dpctl.tensor.bitwise_and(other, self) def __rfloordiv__(self, other): - return _dispatch_binary_elementwise2(other, "floor_divide", self) + return dpctl.tensor.floor_divide(other, self) def __rlshift__(self, other): - return _dispatch_binary_elementwise2(other, "bitwise_left_shift", self) + return dpctl.tensor.bitwise_left_shift(other, self) def __rmatmul__(self, other): - return _dispatch_binary_elementwise2(other, "matmul", self) + return NotImplemented def __rmod__(self, other): - return _dispatch_binary_elementwise2(other, "remainder", self) + return dpctl.tensor.remainder(other, self) def __rmul__(self, other): - return _dispatch_binary_elementwise(self, "multiply", other) + return dpctl.tensor.multiply(other, self) def __ror__(self, other): - return _dispatch_binary_elementwise(self, "bitwise_or", other) + return dpctl.tensor.bitwise_or(other, self) def __rpow__(self, other): - return _dispatch_binary_elementwise2(other, "pow", self) + return dpctl.tensor.pow(other, self) def __rrshift__(self, other): - return _dispatch_binary_elementwise2(other, "bitwise_right_shift", self) + return dpctl.tensor.bitwise_right_shift(other, self) def __rsub__(self, other): - return _dispatch_binary_elementwise2(other, "subtract", self) + return dpctl.tensor.subtract(other, self) def __rtruediv__(self, other): - return _dispatch_binary_elementwise2(other, "divide", self) + return dpctl.tensor.divide(other, self) def __rxor__(self, other): - return _dispatch_binary_elementwise2(other, "bitwise_xor", self) + return dpctl.tensor.bitwise_xor(other, self) def __iadd__(self, other): from ._elementwise_funcs import add diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/abs.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/abs.hpp index c734fd54ec..9c42ba1bb4 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/abs.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/abs.hpp @@ -25,8 +25,10 @@ #pragma once #include #include +#include #include #include +#include #include #include "kernels/elementwise_functions/common.hpp" @@ -36,8 +38,6 @@ #include "utils/type_utils.hpp" #include -#include - namespace dpctl { namespace tensor @@ -72,7 +72,58 @@ template struct AbsFunctor return x; } else { - return std::abs(x); + if constexpr (is_complex::value) { + return cabs(x); + } + else if constexpr (std::is_same_v || + std::is_floating_point_v) + { + return (std::signbit(x) ? -x : x); + } + else { + return std::abs(x); + } + } + } + +private: + template realT cabs(std::complex const &z) const + { + // Special values for cabs( x + y * 1j): + // * If x is either +infinity or -infinity and y is any value + // (including NaN), the result is +infinity. + // * If x is any value (including NaN) and y is either +infinity or + // -infinity, the result is +infinity. + // * If x is either +0 or -0, the result is equal to abs(y). + // * If y is either +0 or -0, the result is equal to abs(x). + // * If x is NaN and y is a finite number, the result is NaN. + // * If x is a finite number and y is NaN, the result is NaN. + // * If x is NaN and y is NaN, the result is NaN. + + const realT x = std::real(z); + const realT y = std::imag(z); + + constexpr realT q_nan = std::numeric_limits::quiet_NaN(); + constexpr realT p_inf = std::numeric_limits::infinity(); + + if (std::isinf(x)) { + return p_inf; + } + else if (std::isinf(y)) { + return p_inf; + } + else if (std::isnan(x)) { + return q_nan; + } + else if (std::isnan(y)) { + return q_nan; + } + else { +#ifdef USE_STD_ABS_FOR_COMPLEX_TYPES + return std::abs(z); +#else + return std::hypot(std::real(z), std::imag(z)); +#endif } } }; diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/negative.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/negative.hpp index 98ce88557a..830794680c 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/negative.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/negative.hpp @@ -37,8 +37,6 @@ #include "utils/type_utils.hpp" #include -#include - namespace dpctl { namespace tensor diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/positive.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/positive.hpp index f75b93d0f5..d5fab29315 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/positive.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/positive.hpp @@ -37,8 +37,6 @@ #include "utils/type_utils.hpp" #include -#include - namespace dpctl { namespace tensor diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sign.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sign.hpp index fb387bdcb2..8f0488746e 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sign.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sign.hpp @@ -37,8 +37,6 @@ #include "utils/type_utils.hpp" #include -#include - namespace dpctl { namespace tensor diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sqrt.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sqrt.hpp index 7853428227..808e82539e 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sqrt.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sqrt.hpp @@ -26,8 +26,10 @@ #pragma once #include #include +#include #include #include +#include #include #include "kernels/elementwise_functions/common.hpp" @@ -66,7 +68,186 @@ template struct SqrtFunctor resT operator()(const argT &in) { - return std::sqrt(in); + if constexpr (is_complex::value) { + // #ifdef _WINDOWS + // return csqrt(in); + // #else + // return std::sqrt(in); + // #endif + return csqrt(in); + } + else { + return std::sqrt(in); + } + } + +private: + template std::complex csqrt(std::complex const &z) const + { + // csqrt(x + y*1j) + // * csqrt(x - y * 1j) = conj(csqrt(x + y * 1j)) + // * If x is either +0 or -0 and y is +0, the result is +0 + 0j. + // * If x is any value (including NaN) and y is +infinity, the result + // is +infinity + infinity j. + // * If x is a finite number and y is NaN, the result is NaN + NaN j. + + // * If x -infinity and y is a positive (i.e., greater than 0) finite + // number, the result is NaN + NaN j. + // * If x is +infinity and y is a positive (i.e., greater than 0) + // finite number, the result is +0 + infinity j. + // * If x is -infinity and y is NaN, the result is NaN + infinity j + // (sign of the imaginary component is unspecified). + // * If x is +infinity and y is NaN, the result is +infinity + NaN j. + // * If x is NaN and y is any value, the result is NaN + NaN j. + + using realT = T; + constexpr realT q_nan = std::numeric_limits::quiet_NaN(); + constexpr realT p_inf = std::numeric_limits::infinity(); + constexpr realT zero = realT(0); + + realT x = std::real(z); + realT y = std::imag(z); + + if (std::isinf(y)) { + return {p_inf, y}; + } + else if (std::isnan(x)) { + return {x, q_nan}; + } + else if (std::isinf(x)) { // x is an infinity + // y is either finite, or nan + if (std::signbit(x)) { // x == -inf + return {(std::isfinite(y) ? zero : y), std::copysign(p_inf, y)}; + } + else { + return {p_inf, (std::isfinite(y) ? std::copysign(zero, y) : y)}; + } + } + else { // x is finite + if (std::isfinite(y)) { +#ifdef USE_STD_SQRT_FOR_COMPLEX_TYPES + return std::sqrt(z); +#else + return csqrt_finite(x, y); +#endif + } + else { + return {q_nan, y}; + } + } + } + + int get_normal_scale_float(const float &v) const + { + constexpr int float_significant_bits = 23; + constexpr std::uint32_t exponent_mask = 0xff; + constexpr int exponent_bias = 127; + const int scale = static_cast( + (sycl::bit_cast(v) >> float_significant_bits) & + exponent_mask); + return scale - exponent_bias; + } + + int get_normal_scale_double(const double &v) const + { + constexpr int float_significant_bits = 53; + constexpr std::uint64_t exponent_mask = 0x7ff; + constexpr int exponent_bias = 1023; + const int scale = static_cast( + (sycl::bit_cast(v) >> float_significant_bits) & + exponent_mask); + return scale - exponent_bias; + } + + template int get_normal_scale(const T &v) const + { + static_assert(std::is_same_v || std::is_same_v); + + if constexpr (std::is_same_v) { + return get_normal_scale_float(v); + } + else { + return get_normal_scale_double(v); + } + } + + template + std::complex csqrt_finite_scaled(T const &x, T const &y) const + { + // csqrt(x + y*1j) = + // sqrt((cabs(x, y) + x) / 2) + + // 1j * copysign(sqrt((cabs(x, y) - x) / 2), y) + + using realT = T; + constexpr realT half = realT(0x1.0p-1f); // 1/2 + constexpr realT zero = realT(0); + + const int exp_x = get_normal_scale(x); + const int exp_y = get_normal_scale(y); + + int sc = std::max(exp_x, exp_y) / 2; + const realT xx = sycl::ldexp(x, -sc * 2); + const realT yy = sycl::ldexp(y, -sc * 2); + + if (std::signbit(xx)) { + const realT m = std::hypot(xx, yy); + const realT d = std::sqrt((m - xx) * half); + const realT res_re = (d == zero ? zero : std::abs(yy) / d * half); + const realT res_im = std::copysign(d, yy); + return {sycl::ldexp(res_re, sc), sycl::ldexp(res_im, sc)}; + } + else { + const realT m = std::hypot(xx, yy); + const realT d = std::sqrt((m + xx) * half); + const realT res_im = + (d == zero) ? std::copysign(zero, yy) : yy * half / d; + return {sycl::ldexp(d, sc), sycl::ldexp(res_im, sc)}; + } + } + + template + std::complex csqrt_finite_unscaled(T const &x, T const &y) const + { + // csqrt(x + y*1j) = + // sqrt((cabs(x, y) + x) / 2) + + // 1j * copysign(sqrt((cabs(x, y) - x) / 2), y) + + using realT = T; + constexpr realT half = realT(0x1.0p-1f); // 1/2 + constexpr realT zero = realT(0); + + if (std::signbit(x)) { + const realT m = std::hypot(x, y); + const realT d = std::sqrt((m - x) * half); + const realT res_re = (d == zero ? zero : std::abs(y) / d * half); + const realT res_im = std::copysign(d, y); + return {res_re, res_im}; + } + else { + const realT m = std::hypot(x, y); + const realT d = std::sqrt((m + x) * half); + const realT res_im = + (d == zero) ? std::copysign(zero, y) : y * half / d; + return {d, res_im}; + } + } + + template T scaling_threshold() const + { + if constexpr (std::is_same_v) { + return T(0x1.0p+126f); + } + else { + return T(0x1.0p+1022); + } + } + + template + std::complex csqrt_finite(T const &x, T const &y) const + { + return (std::max(std::abs(x), std::abs(y)) < scaling_threshold()) + ? csqrt_finite_unscaled(x, y) + : csqrt_finite_scaled(x, y); } }; diff --git a/dpctl/tests/elementwise/test_abs.py b/dpctl/tests/elementwise/test_abs.py index ab0d34d54d..9c800af812 100644 --- a/dpctl/tests/elementwise/test_abs.py +++ b/dpctl/tests/elementwise/test_abs.py @@ -15,6 +15,7 @@ # limitations under the License. import itertools +import warnings import numpy as np import pytest @@ -22,7 +23,13 @@ import dpctl.tensor as dpt from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported -from .utils import _all_dtypes, _no_complex_dtypes, _usm_types +from .utils import ( + _all_dtypes, + _complex_fp_dtypes, + _no_complex_dtypes, + _real_fp_dtypes, + _usm_types, +) @pytest.mark.parametrize("dtype", _all_dtypes) @@ -135,3 +142,50 @@ def test_abs_out_overlap(dtype): assert Y is not X assert np.allclose(dpt.asnumpy(X), Xnp) assert np.allclose(dpt.asnumpy(Y), Ynp) + + +@pytest.mark.parametrize("dtype", _real_fp_dtypes) +def test_abs_real_fp_special_values(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + nans_ = [dpt.nan, -dpt.nan] + infs_ = [dpt.inf, -dpt.inf] + finites_ = [-1.0, -0.0, 0.0, 1.0] + inps_ = nans_ + infs_ + finites_ + + x = dpt.asarray(inps_, dtype=dtype) + r = dpt.abs(x) + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + expected_np = np.abs(np.asarray(inps_, dtype=dtype)) + + expected = dpt.asarray(expected_np, dtype=dtype) + tol = dpt.finfo(r.dtype).resolution + + assert dpt.allclose(r, expected, atol=tol, rtol=tol, equal_nan=True) + + +@pytest.mark.parametrize("dtype", _complex_fp_dtypes) +def test_abs_complex_fp_special_values(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + nans_ = [dpt.nan, -dpt.nan] + infs_ = [dpt.inf, -dpt.inf] + finites_ = [-1.0, -0.0, 0.0, 1.0] + inps_ = nans_ + infs_ + finites_ + c_ = [complex(*v) for v in itertools.product(inps_, repeat=2)] + + z = dpt.asarray(c_, dtype=dtype) + r = dpt.abs(z) + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + expected_np = np.abs(np.asarray(c_, dtype=dtype)) + + expected = dpt.asarray(expected_np, dtype=dtype) + tol = dpt.finfo(r.dtype).resolution + + assert dpt.allclose(r, expected, atol=tol, rtol=tol, equal_nan=True) diff --git a/dpctl/tests/elementwise/test_sqrt.py b/dpctl/tests/elementwise/test_sqrt.py index a15f5262a7..07b97733e3 100644 --- a/dpctl/tests/elementwise/test_sqrt.py +++ b/dpctl/tests/elementwise/test_sqrt.py @@ -15,6 +15,7 @@ # limitations under the License. import itertools +import warnings import numpy as np import pytest @@ -23,7 +24,13 @@ import dpctl.tensor as dpt from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported -from .utils import _all_dtypes, _map_to_device_dtype, _usm_types +from .utils import ( + _all_dtypes, + _complex_fp_dtypes, + _map_to_device_dtype, + _real_fp_dtypes, + _usm_types, +) @pytest.mark.parametrize("dtype", _all_dtypes) @@ -115,18 +122,6 @@ def test_sqrt_order(dtype): assert_allclose(dpt.asnumpy(Y), expected_Y, atol=tol, rtol=tol) -@pytest.mark.usefixtures("suppress_invalid_numpy_warnings") -def test_sqrt_special_cases(): - q = get_queue_or_skip() - - X = dpt.asarray( - [dpt.nan, -1.0, 0.0, -0.0, dpt.inf, -dpt.inf], dtype="f4", sycl_queue=q - ) - Xnp = dpt.asnumpy(X) - - assert_equal(dpt.asnumpy(dpt.sqrt(X)), np.sqrt(Xnp)) - - @pytest.mark.parametrize("dtype", ["f2", "f4", "f8", "c8", "c16"]) def test_sqrt_out_overlap(dtype): q = get_queue_or_skip() @@ -149,3 +144,62 @@ def test_sqrt_out_overlap(dtype): assert Y is not X assert_allclose(dpt.asnumpy(X), Xnp, atol=tol, rtol=tol) assert_allclose(dpt.asnumpy(Y), Ynp, atol=tol, rtol=tol) + + +@pytest.mark.usefixtures("suppress_invalid_numpy_warnings") +def test_sqrt_special_cases(): + q = get_queue_or_skip() + + X = dpt.asarray( + [dpt.nan, -1.0, 0.0, -0.0, dpt.inf, -dpt.inf], dtype="f4", sycl_queue=q + ) + Xnp = dpt.asnumpy(X) + + assert_equal(dpt.asnumpy(dpt.sqrt(X)), np.sqrt(Xnp)) + + +@pytest.mark.parametrize("dtype", _real_fp_dtypes) +def test_sqrt_real_fp_special_values(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + nans_ = [dpt.nan, -dpt.nan] + infs_ = [dpt.inf, -dpt.inf] + finites_ = [-1.0, -0.0, 0.0, 1.0] + inps_ = nans_ + infs_ + finites_ + + x = dpt.asarray(inps_, dtype=dtype) + r = dpt.sqrt(x) + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + expected_np = np.sqrt(np.asarray(inps_, dtype=dtype)) + + expected = dpt.asarray(expected_np, dtype=dtype) + tol = dpt.finfo(r.dtype).resolution + + assert dpt.allclose(r, expected, atol=tol, rtol=tol, equal_nan=True) + + +@pytest.mark.parametrize("dtype", _complex_fp_dtypes) +def test_sqrt_complex_fp_special_values(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + nans_ = [dpt.nan, -dpt.nan] + infs_ = [dpt.inf, -dpt.inf] + finites_ = [-1.0, -0.0, 0.0, 1.0] + inps_ = nans_ + infs_ + finites_ + c_ = [complex(*v) for v in itertools.product(inps_, repeat=2)] + + z = dpt.asarray(c_, dtype=dtype) + r = dpt.sqrt(z) + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + expected_np = np.sqrt(np.asarray(c_, dtype=dtype)) + + expected = dpt.asarray(expected_np, dtype=dtype) + tol = dpt.finfo(r.dtype).resolution + + assert dpt.allclose(r, expected, atol=tol, rtol=tol, equal_nan=True) diff --git a/dpctl/tests/test_tensor_testing.py b/dpctl/tests/test_tensor_testing.py new file mode 100644 index 0000000000..d1cb4df4ab --- /dev/null +++ b/dpctl/tests/test_tensor_testing.py @@ -0,0 +1,149 @@ +import itertools + +import pytest + +import dpctl.tensor as dpt +from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported + +_all_dtypes = [ + "?", + "i1", + "u1", + "i2", + "u2", + "i4", + "u4", + "i8", + "u8", + "f2", + "f4", + "f8", + "c8", + "c16", +] + + +@pytest.mark.parametrize("dtype", _all_dtypes) +def test_allclose(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + a1 = dpt.ones(10, dtype=dtype) + a2 = dpt.ones(10, dtype=dtype) + + assert dpt.allclose(a1, a2) + + +@pytest.mark.parametrize("dtype", ["f2", "f4", "f8"]) +def test_allclose_real_fp(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + v = [dpt.nan, -dpt.nan, dpt.inf, -dpt.inf, -0.0, 0.0, 1.0, -1.0] + a1 = dpt.asarray(v[2:], dtype=dtype) + a2 = dpt.asarray(v[2:], dtype=dtype) + + tol = dpt.finfo(a1.dtype).resolution + assert dpt.allclose(a1, a2, atol=tol, rtol=tol) + + a1 = dpt.asarray(v, dtype=dtype) + a2 = dpt.asarray(v, dtype=dtype) + + assert not dpt.allclose(a1, a2, atol=tol, rtol=tol) + assert dpt.allclose(a1, a2, atol=tol, rtol=tol, equal_nan=True) + + +@pytest.mark.parametrize("dtype", ["c8", "c16"]) +def test_allclose_complex_fp(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + v = [dpt.nan, -dpt.nan, dpt.inf, -dpt.inf, -0.0, 0.0, 1.0, -1.0] + + not_nans = [complex(*xy) for xy in itertools.product(v[2:], repeat=2)] + z1 = dpt.asarray(not_nans, dtype=dtype) + z2 = dpt.asarray(not_nans, dtype=dtype) + + tol = dpt.finfo(z1.dtype).resolution + assert dpt.allclose(z1, z2, atol=tol, rtol=tol) + + both = [complex(*xy) for xy in itertools.product(v, repeat=2)] + z1 = dpt.asarray(both, dtype=dtype) + z2 = dpt.asarray(both, dtype=dtype) + + tol = dpt.finfo(z1.dtype).resolution + assert not dpt.allclose(z1, z2, atol=tol, rtol=tol) + assert dpt.allclose(z1, z2, atol=tol, rtol=tol, equal_nan=True) + + +def test_allclose_validation(): + with pytest.raises(TypeError): + dpt.allclose(True, False) + + get_queue_or_skip() + x = dpt.asarray(True) + with pytest.raises(TypeError): + dpt.allclose(x, False) + + +def test_allclose_type_promotion(): + get_queue_or_skip() + + x1 = dpt.ones(10, dtype="i4") + x2 = dpt.ones(10, dtype="i8") + + assert dpt.allclose(x1, x2) + + +def test_allclose_tolerance(): + get_queue_or_skip() + + x = dpt.zeros(10, dtype="f4") + atol = 1e-5 + y = dpt.full_like(x, atol) + assert dpt.allclose(x, y, atol=atol, rtol=0) + + # about 8e-6 + tol = float.fromhex("0x1.0p-17") + x = dpt.ones(10, dtype="f4") + y = x - tol + assert dpt.allclose(x, y, atol=0, rtol=tol) + + +def test_allclose_real_fp_early_exists(): + get_queue_or_skip() + + x1 = dpt.asarray([0.0, dpt.inf, -dpt.inf], dtype="f4") + x2 = dpt.asarray([dpt.inf, 0.0, -dpt.inf], dtype="f4") + + # early exists, inf positions are different + assert not dpt.allclose(x1, x2) + + x2 = dpt.asarray([0.0, -dpt.inf, dpt.inf], dtype="f4") + + # early exists, inf positions are the same, but signs differ + assert not dpt.allclose(x1, x2) + + +def test_allclose_complex_fp_early_exists(): + get_queue_or_skip() + + x1 = dpt.asarray([0.0, dpt.inf, -dpt.inf], dtype="c8") + x2 = dpt.asarray([dpt.inf, 0.0, -dpt.inf], dtype="c8") + + # early exists, inf positions of real parts are different + assert not dpt.allclose(x1, x2) + + x2 = dpt.asarray([0.0, -dpt.inf, dpt.inf], dtype="c8") + + # early exists, inf positions of real parts are the same, but signs differ + assert not dpt.allclose(x1, x2) + + x1 = dpt.asarray([0.0, dpt.inf * 1j, -dpt.inf * 1j], dtype="c8") + x2 = dpt.asarray([dpt.inf * 1j, 0.0, -dpt.inf * 1j], dtype="c8") + + # early exists, inf positions of imag parts are different + assert not dpt.allclose(x1, x2) + + x2 = dpt.asarray([0.0, -dpt.inf * 1j, dpt.inf * 1j], dtype="c8") + assert not dpt.allclose(x1, x2)