From 48c2ad2041e969c8b390d339fa7ba2e8ffd76332 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 14 Aug 2023 12:14:14 -0500 Subject: [PATCH 01/15] Clean up of operator special methods 1. Removed unused usm_ndarray._clone static C-only method 2. Removed _dispatch* utilities 3. Used direct calls to unary/binary operators in implementation of special methods --- dpctl/tensor/_usmarray.pxd | 1 - dpctl/tensor/_usmarray.pyx | 219 +++++++------------------------------ 2 files changed, 39 insertions(+), 181 deletions(-) diff --git a/dpctl/tensor/_usmarray.pxd b/dpctl/tensor/_usmarray.pxd index 603b296103..120ad7a066 100644 --- a/dpctl/tensor/_usmarray.pxd +++ b/dpctl/tensor/_usmarray.pxd @@ -58,7 +58,6 @@ cdef api class usm_ndarray [object PyUSMArrayObject, type PyUSMArrayType]: cdef void _reset(usm_ndarray self) cdef void _cleanup(usm_ndarray self) - cdef usm_ndarray _clone(usm_ndarray self) cdef Py_ssize_t get_offset(usm_ndarray self) except * cdef char* get_data(self) diff --git a/dpctl/tensor/_usmarray.pyx b/dpctl/tensor/_usmarray.pyx index 98986d84d6..2b16df1060 100644 --- a/dpctl/tensor/_usmarray.pyx +++ b/dpctl/tensor/_usmarray.pyx @@ -44,58 +44,6 @@ include "_types.pxi" include "_slicing.pxi" -def _dispatch_unary_elementwise(ary, name): - try: - mod = ary.__array_namespace__() - except AttributeError: - return NotImplemented - if mod is None and "dpnp" in sys.modules: - fn = getattr(sys.modules["dpnp"], name) - if callable(fn): - return fn(ary) - elif hasattr(mod, name): - fn = getattr(mod, name) - if callable(fn): - return fn(ary) - - return NotImplemented - - -def _dispatch_binary_elementwise(ary, name, other): - try: - mod = ary.__array_namespace__() - except AttributeError: - return NotImplemented - if mod is None and "dpnp" in sys.modules: - fn = getattr(sys.modules["dpnp"], name) - if callable(fn): - return fn(ary, other) - elif hasattr(mod, name): - fn = getattr(mod, name) - if callable(fn): - return fn(ary, other) - - return NotImplemented - - -def _dispatch_binary_elementwise2(other, name, ary): - try: - mod = ary.__array_namespace__() - except AttributeError: - return NotImplemented - mod = ary.__array_namespace__() - if mod is None and "dpnp" in sys.modules: - fn = getattr(sys.modules["dpnp"], name) - if callable(fn): - return fn(other, ary) - elif hasattr(mod, name): - fn = getattr(mod, name) - if callable(fn): - return fn(other, ary) - - return NotImplemented - - cdef class InternalUSMArrayError(Exception): """ A InternalError exception is raised when internal @@ -200,28 +148,6 @@ cdef class usm_ndarray: PyMem_Free(self.strides_) self._reset() - cdef usm_ndarray _clone(usm_ndarray self): - """ - Provides a copy of Python object pointing to the same data - """ - cdef Py_ssize_t offset_elems = self.get_offset() - cdef usm_ndarray res = usm_ndarray.__new__( - usm_ndarray, _make_int_tuple(self.nd_, self.shape_), - dtype=_make_typestr(self.typenum_), - strides=( - _make_int_tuple(self.nd_, self.strides_) if (self.strides_) - else None), - buffer=self.base_, - offset=offset_elems, - order=('C' if (self.flags_ & USM_ARRAY_C_CONTIGUOUS) else 'F') - ) - res.flags_ = self.flags_ - res.array_namespace_ = self.array_namespace_ - if (res.data_ != self.data_): - raise InternalUSMArrayError( - "Data pointers of cloned and original objects are different.") - return res - def __cinit__(self, shape, dtype=None, strides=None, buffer='device', Py_ssize_t offset=0, order='C', buffer_ctor_kwargs=dict(), @@ -966,32 +892,17 @@ cdef class usm_ndarray: raise IndexError("only integer arrays are valid indices") def __abs__(self): - return _dispatch_unary_elementwise(self, "abs") + return dpctl.tensor.abs(self) def __add__(first, other): """ - Cython 0.* never calls `__radd__`, always calls `__add__` - but first argument need not be an instance of this class, - so dispatching is needed. - - This changes in Cython 3.0, where first is guaranteed to - be `self`. - - [1] http://docs.cython.org/en/latest/src/userguide/special_methods.html + Implementation for operator.add """ - if isinstance(first, usm_ndarray): - return _dispatch_binary_elementwise(first, "add", other) - elif isinstance(other, usm_ndarray): - return _dispatch_binary_elementwise2(first, "add", other) - return NotImplemented + return dpctl.tensor.add(first, other) def __and__(first, other): - "See comment in __add__" - if isinstance(first, usm_ndarray): - return _dispatch_binary_elementwise(first, "bitwise_and", other) - elif isinstance(other, usm_ndarray): - return _dispatch_binary_elementwise2(first, "bitwise_and", other) - return NotImplemented + "Implementation for operator.and" + return dpctl.tensor.bitwise_and(first, other) def __dlpack__(self, stream=None): """ @@ -1037,27 +948,22 @@ cdef class usm_ndarray: ) def __eq__(self, other): - return _dispatch_binary_elementwise(self, "equal", other) + return dpctl.tensor.equal(self, other) def __floordiv__(first, other): - "See comment in __add__" - if isinstance(first, usm_ndarray): - return _dispatch_binary_elementwise(first, "floor_divide", other) - elif isinstance(other, usm_ndarray): - return _dispatch_binary_elementwise2(first, "floor_divide", other) - return NotImplemented + return dpctl.tensor.floor_divide(first, other) def __ge__(self, other): - return _dispatch_binary_elementwise(self, "greater_equal", other) + return dpctl.tensor.greater_equal(self, other) def __gt__(self, other): - return _dispatch_binary_elementwise(self, "greater", other) + return dpctl.tensor.greater(self, other) def __invert__(self): - return _dispatch_unary_elementwise(self, "bitwise_invert") + return dpctl.tensor.bitwise_invert(self) def __le__(self, other): - return _dispatch_binary_elementwise(self, "less_equal", other) + return dpctl.tensor.less_equal(self, other) def __len__(self): if (self.nd_): @@ -1067,72 +973,40 @@ cdef class usm_ndarray: def __lshift__(first, other): "See comment in __add__" - if isinstance(first, usm_ndarray): - return _dispatch_binary_elementwise(first, "bitwise_left_shift", other) - elif isinstance(other, usm_ndarray): - return _dispatch_binary_elementwise2(first, "bitwise_left_shift", other) - return NotImplemented + return dpctl.tensor.bitwise_left_shift(first, other) def __lt__(self, other): - return _dispatch_binary_elementwise(self, "less", other) + return dpctl.tensor.less(self, other) def __matmul__(first, other): - "See comment in __add__" - if isinstance(first, usm_ndarray): - return _dispatch_binary_elementwise(first, "matmul", other) - elif isinstance(other, usm_ndarray): - return _dispatch_binary_elementwise2(first, "matmul", other) return NotImplemented def __mod__(first, other): - "See comment in __add__" - if isinstance(first, usm_ndarray): - return _dispatch_binary_elementwise(first, "remainder", other) - elif isinstance(other, usm_ndarray): - return _dispatch_binary_elementwise2(first, "remainder", other) - return NotImplemented + return dpctl.tensor.remainder(first, other) def __mul__(first, other): - "See comment in __add__" - if isinstance(first, usm_ndarray): - return _dispatch_binary_elementwise(first, "multiply", other) - elif isinstance(other, usm_ndarray): - return _dispatch_binary_elementwise2(first, "multiply", other) - return NotImplemented + return dpctl.tensor.multiply(first, other) def __ne__(self, other): - return _dispatch_binary_elementwise(self, "not_equal", other) + return dpctl.tensor.not_equal(self, other) def __neg__(self): - return _dispatch_unary_elementwise(self, "negative") + return dpctl.tensor.negative(self) def __or__(first, other): - "See comment in __add__" - if isinstance(first, usm_ndarray): - return _dispatch_binary_elementwise(first, "bitwise_or", other) - elif isinstance(other, usm_ndarray): - return _dispatch_binary_elementwise2(first, "bitwise_or", other) - return NotImplemented + return dpctl.tensor.bitwise_or(first, other) def __pos__(self): - return self # _dispatch_unary_elementwise(self, "positive") + return dpctl.tensor.positive(self) def __pow__(first, other, mod): - "See comment in __add__" if mod is None: - if isinstance(first, usm_ndarray): - return _dispatch_binary_elementwise(first, "pow", other) - elif isinstance(other, usm_ndarray): - return _dispatch_binary_elementwise(first, "pow", other) - return NotImplemented + return dpctl.tensor.pow(first, other) + else: + return NotImplemented def __rshift__(first, other): - "See comment in __add__" - if isinstance(first, usm_ndarray): - return _dispatch_binary_elementwise(first, "bitwise_right_shift", other) - elif isinstance(other, usm_ndarray): - return _dispatch_binary_elementwise2(first, "bitwise_right_shift", other) - return NotImplemented + return dpctl.tensor.bitwise_right_shift(first, other) def __setitem__(self, key, rhs): cdef tuple _meta @@ -1223,67 +1097,52 @@ cdef class usm_ndarray: def __sub__(first, other): - "See comment in __add__" - if isinstance(first, usm_ndarray): - return _dispatch_binary_elementwise(first, "subtract", other) - elif isinstance(other, usm_ndarray): - return _dispatch_binary_elementwise2(first, "subtract", other) - return NotImplemented + return dpctl.tensor.subtract(first, other) def __truediv__(first, other): - "See comment in __add__" - if isinstance(first, usm_ndarray): - return _dispatch_binary_elementwise(first, "divide", other) - elif isinstance(other, usm_ndarray): - return _dispatch_binary_elementwise2(first, "divide", other) - return NotImplemented + return dpctl.tensor.divide(first, other) def __xor__(first, other): - "See comment in __add__" - if isinstance(first, usm_ndarray): - return _dispatch_binary_elementwise(first, "bitwise_xor", other) - elif isinstance(other, usm_ndarray): - return _dispatch_binary_elementwise2(first, "bitwise_xor", other) - return NotImplemented + return dpctl.tensor.bitwise_xor(first, other) def __radd__(self, other): - return _dispatch_binary_elementwise(self, "add", other) + return dpctl.tensor.add(other, self) def __rand__(self, other): - return _dispatch_binary_elementwise(self, "bitwise_and", other) + return dpctl.tensor.bitwise_and(other, self) def __rfloordiv__(self, other): - return _dispatch_binary_elementwise2(other, "floor_divide", self) + return dpctl.tensor.floor_divide(other, self) def __rlshift__(self, other): - return _dispatch_binary_elementwise2(other, "bitwise_left_shift", self) + return dpctl.tensor.bitwise_left_shift(other, self) def __rmatmul__(self, other): - return _dispatch_binary_elementwise2(other, "matmul", self) + return NotImplemented def __rmod__(self, other): - return _dispatch_binary_elementwise2(other, "remainder", self) + return dpctl.tensor.remainder(other, self) def __rmul__(self, other): - return _dispatch_binary_elementwise(self, "multiply", other) + return dpctl.tensor.multiply(other, self) def __ror__(self, other): - return _dispatch_binary_elementwise(self, "bitwise_or", other) + return dpctl.tensor.bitwise_or(other, self) def __rpow__(self, other): - return _dispatch_binary_elementwise2(other, "pow", self) + return dpctl.tensor.pow(other, self) def __rrshift__(self, other): - return _dispatch_binary_elementwise2(other, "bitwise_right_shift", self) + return dpctl.tensor.bitwise_right_shift(other, self) def __rsub__(self, other): - return _dispatch_binary_elementwise2(other, "subtract", self) + return dpctl.tensor.subtract(other, self) def __rtruediv__(self, other): - return _dispatch_binary_elementwise2(other, "divide", self) + return dpctl.tensor.divide(other, self) def __rxor__(self, other): - return _dispatch_binary_elementwise2(other, "bitwise_xor", self) + return dpctl.tensor.bitwise_xor(other, self) def __iadd__(self, other): from ._elementwise_funcs import add From 7d9974ef339b057950aa047e760a726630fb2d54 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 15 Aug 2023 09:22:22 -0500 Subject: [PATCH 02/15] Removed leftover include iostream --- .../include/kernels/elementwise_functions/negative.hpp | 2 -- .../libtensor/include/kernels/elementwise_functions/sign.hpp | 2 -- 2 files changed, 4 deletions(-) diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/negative.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/negative.hpp index 98ce88557a..830794680c 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/negative.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/negative.hpp @@ -37,8 +37,6 @@ #include "utils/type_utils.hpp" #include -#include - namespace dpctl { namespace tensor diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sign.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sign.hpp index fb387bdcb2..8f0488746e 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sign.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sign.hpp @@ -37,8 +37,6 @@ #include "utils/type_utils.hpp" #include -#include - namespace dpctl { namespace tensor From 5f298e6d405cec2f6701148bc03d4e5c4455ea8c Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Sun, 13 Aug 2023 05:42:09 -0500 Subject: [PATCH 03/15] Closes gh-1279 Provide cabs private method implementating abs for complex types, paying attention to array-API mandated special values. To work-around gh-1279, use std::hypot to compute value for finite inputs. Compile with -DUSE_STD_ABS_FOR_COMPLEX_TYPES to use std::abs(z) instead of std::hypot(std::real(z), std::imag(z)). --- .../kernels/elementwise_functions/abs.hpp | 57 ++++++++++++++++++- 1 file changed, 54 insertions(+), 3 deletions(-) diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/abs.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/abs.hpp index c734fd54ec..9c42ba1bb4 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/abs.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/abs.hpp @@ -25,8 +25,10 @@ #pragma once #include #include +#include #include #include +#include #include #include "kernels/elementwise_functions/common.hpp" @@ -36,8 +38,6 @@ #include "utils/type_utils.hpp" #include -#include - namespace dpctl { namespace tensor @@ -72,7 +72,58 @@ template struct AbsFunctor return x; } else { - return std::abs(x); + if constexpr (is_complex::value) { + return cabs(x); + } + else if constexpr (std::is_same_v || + std::is_floating_point_v) + { + return (std::signbit(x) ? -x : x); + } + else { + return std::abs(x); + } + } + } + +private: + template realT cabs(std::complex const &z) const + { + // Special values for cabs( x + y * 1j): + // * If x is either +infinity or -infinity and y is any value + // (including NaN), the result is +infinity. + // * If x is any value (including NaN) and y is either +infinity or + // -infinity, the result is +infinity. + // * If x is either +0 or -0, the result is equal to abs(y). + // * If y is either +0 or -0, the result is equal to abs(x). + // * If x is NaN and y is a finite number, the result is NaN. + // * If x is a finite number and y is NaN, the result is NaN. + // * If x is NaN and y is NaN, the result is NaN. + + const realT x = std::real(z); + const realT y = std::imag(z); + + constexpr realT q_nan = std::numeric_limits::quiet_NaN(); + constexpr realT p_inf = std::numeric_limits::infinity(); + + if (std::isinf(x)) { + return p_inf; + } + else if (std::isinf(y)) { + return p_inf; + } + else if (std::isnan(x)) { + return q_nan; + } + else if (std::isnan(y)) { + return q_nan; + } + else { +#ifdef USE_STD_ABS_FOR_COMPLEX_TYPES + return std::abs(z); +#else + return std::hypot(std::real(z), std::imag(z)); +#endif } } }; From 4a2578fffdc147c03732f7b89df29f021d4b232e Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 15 Aug 2023 10:13:19 -0500 Subject: [PATCH 04/15] Closes gh-1279 for dpt.sqrt This change provides private method csqrt to evaluate square-root for complex types. It handles special values as mandated by array API. The finite input, it provides its own implementation based on std::hypot and std::sqrt for real types instead of calling std::sqrt on finite input of complex type. Compile with -DUSE_STD_SQRT_FOR_COMPLEX_TYPES to use std::sqrt instead of custom implementation. Cursory performance study suggests that custom implementation is at least not worse than std::sqrt one. --- .../kernels/elementwise_functions/sqrt.hpp | 94 ++++++++++++++++++- 1 file changed, 93 insertions(+), 1 deletion(-) diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sqrt.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sqrt.hpp index 7853428227..f72b5d963b 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sqrt.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sqrt.hpp @@ -26,8 +26,10 @@ #pragma once #include #include +#include #include #include +#include #include #include "kernels/elementwise_functions/common.hpp" @@ -66,7 +68,97 @@ template struct SqrtFunctor resT operator()(const argT &in) { - return std::sqrt(in); + if constexpr (is_complex::value) { + // #ifdef _WINDOWS + // return csqrt(in); + // #else + // return std::sqrt(in); + // #endif + return csqrt(in); + } + else { + return std::sqrt(in); + } + } + +private: + template std::complex csqrt(std::complex const &z) const + { + // csqrt(x + y*1j) + // * csqrt(x - y * 1j) = conj(csqrt(x + y * 1j)) + // * If x is either +0 or -0 and y is +0, the result is +0 + 0j. + // * If x is any value (including NaN) and y is +infinity, the result + // is +infinity + infinity j. + // * If x is a finite number and y is NaN, the result is NaN + NaN j. + + // * If x -infinity and y is a positive (i.e., greater than 0) finite + // number, the result is NaN + NaN j. + // * If x is +infinity and y is a positive (i.e., greater than 0) + // finite number, the result is +0 + infinity j. + // * If x is -infinity and y is NaN, the result is NaN + infinity j + // (sign of the imaginary component is unspecified). + // * If x is +infinity and y is NaN, the result is +infinity + NaN j. + // * If x is NaN and y is any value, the result is NaN + NaN j. + + using realT = T; + constexpr realT q_nan = std::numeric_limits::quiet_NaN(); + constexpr realT p_inf = std::numeric_limits::infinity(); + constexpr realT zero = realT(0); + + realT x = std::real(z); + realT y = std::imag(z); + + if (std::isinf(y)) { + return {p_inf, y}; + } + else if (std::isnan(x)) { + return {x, q_nan}; + } + else if (std::isinf(x)) { // x is an infinity + // y is either finite, or nan + if (std::signbit(x)) { // x == -inf + return {(std::isfinite(y) ? zero : y), std::copysign(p_inf, y)}; + } + else { + return {p_inf, (std::isfinite(y) ? std::copysign(zero, y) : y)}; + } + } + else { // x is finite + if (std::isfinite(y)) { +#ifdef USE_STD_SQRT_FOR_COMPLEX_TYPES + return std::sqrt(z); +#else + return csqrt_finite(x, y); +#endif + } + else { + return {q_nan, y}; + } + } + } + + template + std::complex csqrt_finite(T const &x, T const &y) const + { + // csqrt(x + y*1j) = + // sqrt((cabs(x, y) + x) / 2) + + // 1j * copysign(sqrt((cabs(x, y) - x) / 2), y) + + using realT = T; + constexpr realT half = realT(0x1.0p-1f); // 1/2 + constexpr realT zero = realT(0); + + if (std::signbit(x)) { + realT m = std::hypot(x, y); + realT d = std::sqrt((m - x) * half); + return {(d == zero ? zero : std::abs(y) / d * half), + std::copysign(d, y)}; + } + else { + realT m = std::hypot(x, y); + realT d = std::sqrt((m + x) * half); + return {d, (d == zero) ? std::copysign(zero, y) : y * half / d}; + } } }; From 26862b44a7cec8ca91bcd86b017be59f36482e5a Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 15 Aug 2023 10:16:49 -0500 Subject: [PATCH 05/15] Implements dpctl.tensor.allclose This utility function is based on symmetric check, unlike numpy.allclose, and verifies that abs(x1-x2) < atol + rtol * max(abs(x1), abs(x2)) This way allclose(x1, x2) is symmetric, and allclose(x1,x2) implies allclose(x2, x1). --- dpctl/tensor/__init__.py | 2 + dpctl/tensor/_testing.py | 144 +++++++++++++++++++++++++++++ dpctl/tests/test_tensor_testing.py | 86 +++++++++++++++++ 3 files changed, 232 insertions(+) create mode 100644 dpctl/tensor/_testing.py create mode 100644 dpctl/tests/test_tensor_testing.py diff --git a/dpctl/tensor/__init__.py b/dpctl/tensor/__init__.py index f1ec439f3a..7438fb8a67 100644 --- a/dpctl/tensor/__init__.py +++ b/dpctl/tensor/__init__.py @@ -158,6 +158,7 @@ trunc, ) from ._reduction import sum +from ._testing import allclose __all__ = [ "Device", @@ -301,4 +302,5 @@ "tan", "tanh", "trunc", + "allclose", ] diff --git a/dpctl/tensor/_testing.py b/dpctl/tensor/_testing.py new file mode 100644 index 0000000000..185ed816a4 --- /dev/null +++ b/dpctl/tensor/_testing.py @@ -0,0 +1,144 @@ +# Data Parallel Control (dpctl) +# +# Copyright 2020-2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np + +import dpctl.tensor as dpt +import dpctl.utils as du + +from ._manipulation_functions import _broadcast_shape_impl +from ._type_utils import _to_device_supported_dtype + + +def _allclose_complex_fp(z1, z2, atol, rtol, equal_nan): + z1r = dpt.real(z1) + z1i = dpt.imag(z1) + z2r = dpt.real(z2) + z2i = dpt.imag(z2) + if equal_nan: + check1 = dpt.all(dpt.isnan(z1r) == dpt.isnan(z2r)) and dpt.all( + dpt.isnan(z1i) == dpt.isnan(z2i) + ) + else: + check1 = ( + dpt.logical_not(dpt.any(dpt.isnan(z1r))) + and dpt.logical_not(dpt.any(dpt.isnan(z1i))) + ) and ( + dpt.logical_not(dpt.any(dpt.isnan(z2r))) + and dpt.logical_not(dpt.any(dpt.isnan(z2i))) + ) + if not check1: + return check1 + mr = dpt.isinf(z1r) + mi = dpt.isinf(z1i) + check2 = dpt.all(mr == dpt.isinf(z2r)) and dpt.all(mi == dpt.isinf(z2i)) + if not check2: + return check2 + check3 = dpt.all(z1r[mr] == z2r[mr]) and dpt.all(z1i[mi] == z2i[mi]) + if not check3: + return check3 + mr = dpt.isfinite(z1r) + mi = dpt.isfinite(z1i) + mv1 = z1r[mr] + mv2 = z2r[mr] + check4 = dpt.all( + dpt.abs(mv1 - mv2) + < atol + rtol * dpt.maximum(dpt.abs(mv1), dpt.abs(mv2)) + ) + if not check4: + return check4 + mv1 = z1i[mi] + mv2 = z2i[mi] + check5 = dpt.all( + dpt.abs(mv1 - mv2) + < atol + rtol * dpt.maximum(dpt.abs(mv1), dpt.abs(mv2)) + ) + return check5 + + +def _allclose_real_fp(r1, r2, atol, rtol, equal_nan): + if equal_nan: + check1 = dpt.all(dpt.isnan(r1) == dpt.isnan(r2)) + else: + check1 = dpt.logical_not(dpt.any(dpt.isnan(r1))) and dpt.logical_not( + dpt.any(dpt.isnan(r2)) + ) + if not check1: + return check1 + mr = dpt.isinf(r1) + check2 = dpt.all(mr == dpt.isinf(r2)) + if not check2: + return check2 + check3 = dpt.all(r1[mr] == r2[mr]) + if not check3: + return check3 + m = dpt.isfinite(r1) + mv1 = r1[m] + mv2 = r2[m] + check4 = dpt.all( + dpt.abs(mv1 - mv2) + < atol + rtol * dpt.maximum(dpt.abs(mv1), dpt.abs(mv2)) + ) + return check4 + + +def _allclose_others(r1, r2): + return dpt.all(r1 == r2) + + +def allclose(a1, a2, atol=1e-5, rtol=1e-8, equal_nan=False): + """allclose(a1, a2, atol=1e-5, rtol=1e-8) + + Returns True if two arrays are element-wise equal within tolerance. + """ + if not isinstance(a1, dpt.usm_ndarray): + raise TypeError( + f"Expected dpctl.tensor.usm_ndarray type, got {type(a1)}." + ) + if not isinstance(a2, dpt.usm_ndarray): + raise TypeError( + f"Expected dpctl.tensor.usm_ndarray type, got {type(a2)}." + ) + atol = float(atol) + rtol = float(rtol) + equal_nan = bool(equal_nan) + exec_q = du.get_execution_queue(tuple(a.sycl_queue for a in (a1, a2))) + if exec_q is None: + raise du.ExecutionPlacementError( + "Execution placement can not be unambiguously inferred " + "from input arguments." + ) + res_sh = _broadcast_shape_impl([a1.shape, a2.shape]) + b1 = a1 + b2 = a2 + if b1.dtype == b2.dtype: + res_dt = b1.dtype + else: + res_dt = np.promote_types(b1.dtype, b2.dtype) + res_dt = _to_device_supported_dtype(res_dt, exec_q.sycl_device) + b1 = dpt.astype(b1, res_dt) + b2 = dpt.astype(b2, res_dt) + + b1 = dpt.broadcast_to(b1, res_sh) + b2 = dpt.broadcast_to(b2, res_sh) + + k = b1.dtype.kind + if k == "c": + return _allclose_complex_fp(b1, b2, atol, rtol, equal_nan) + elif k == "f": + return _allclose_real_fp(b1, b2, atol, rtol, equal_nan) + else: + return _allclose_others(b1, b2) diff --git a/dpctl/tests/test_tensor_testing.py b/dpctl/tests/test_tensor_testing.py new file mode 100644 index 0000000000..21437459d9 --- /dev/null +++ b/dpctl/tests/test_tensor_testing.py @@ -0,0 +1,86 @@ +import itertools + +import pytest + +import dpctl.tensor as dpt +from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported + +_all_dtypes = [ + "?", + "i1", + "u1", + "i2", + "u2", + "i4", + "u4", + "i8", + "u8", + "f2", + "f4", + "f8", + "c8", + "c16", +] + + +@pytest.mark.parametrize("dtype", _all_dtypes) +def test_allclose(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + a1 = dpt.ones(10, dtype=dtype) + a2 = dpt.ones(10, dtype=dtype) + + assert dpt.allclose(a1, a2) + + +@pytest.mark.parametrize("dtype", ["f2", "f4", "f8"]) +def test_allclose_real_fp(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + v = [dpt.nan, -dpt.nan, dpt.inf, -dpt.inf, -0.0, 0.0, 1.0, -1.0] + a1 = dpt.asarray(v[2:], dtype=dtype) + a2 = dpt.asarray(v[2:], dtype=dtype) + + tol = dpt.finfo(a1.dtype).resolution + assert dpt.allclose(a1, a2, atol=tol, rtol=tol) + + a1 = dpt.asarray(v, dtype=dtype) + a2 = dpt.asarray(v, dtype=dtype) + + assert not dpt.allclose(a1, a2, atol=tol, rtol=tol) + assert dpt.allclose(a1, a2, atol=tol, rtol=tol, equal_nan=True) + + +@pytest.mark.parametrize("dtype", ["c8", "c16"]) +def test_allclose_complex_fp(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + v = [dpt.nan, -dpt.nan, dpt.inf, -dpt.inf, -0.0, 0.0, 1.0, -1.0] + + not_nans = [complex(*xy) for xy in itertools.product(v[2:], repeat=2)] + z1 = dpt.asarray(not_nans, dtype=dtype) + z2 = dpt.asarray(not_nans, dtype=dtype) + + tol = dpt.finfo(z1.dtype).resolution + assert dpt.allclose(z1, z2, atol=tol, rtol=tol) + + both = [complex(*xy) for xy in itertools.product(v, repeat=2)] + z1 = dpt.asarray(both, dtype=dtype) + z2 = dpt.asarray(both, dtype=dtype) + + tol = dpt.finfo(z1.dtype).resolution + assert not dpt.allclose(z1, z2, atol=tol, rtol=tol) + assert dpt.allclose(z1, z2, atol=tol, rtol=tol, equal_nan=True) + + +def test_allclose_validation(): + with pytest.raises(TypeError): + dpt.allclose(True, False) + + get_queue_or_skip() + x = dpt.asarray(True) + with pytest.raises(TypeError): + dpt.allclose(x, False) From b121d67eb69a9a106d67a81fd1ea119918ca6753 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 15 Aug 2023 13:45:09 -0500 Subject: [PATCH 06/15] Adds tests for special FP values for dpt.abs and dpt.sqrt --- dpctl/tests/elementwise/test_abs.py | 56 ++++++++++++++++++- dpctl/tests/elementwise/test_sqrt.py | 80 +++++++++++++++++++++++----- 2 files changed, 122 insertions(+), 14 deletions(-) diff --git a/dpctl/tests/elementwise/test_abs.py b/dpctl/tests/elementwise/test_abs.py index ab0d34d54d..9c800af812 100644 --- a/dpctl/tests/elementwise/test_abs.py +++ b/dpctl/tests/elementwise/test_abs.py @@ -15,6 +15,7 @@ # limitations under the License. import itertools +import warnings import numpy as np import pytest @@ -22,7 +23,13 @@ import dpctl.tensor as dpt from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported -from .utils import _all_dtypes, _no_complex_dtypes, _usm_types +from .utils import ( + _all_dtypes, + _complex_fp_dtypes, + _no_complex_dtypes, + _real_fp_dtypes, + _usm_types, +) @pytest.mark.parametrize("dtype", _all_dtypes) @@ -135,3 +142,50 @@ def test_abs_out_overlap(dtype): assert Y is not X assert np.allclose(dpt.asnumpy(X), Xnp) assert np.allclose(dpt.asnumpy(Y), Ynp) + + +@pytest.mark.parametrize("dtype", _real_fp_dtypes) +def test_abs_real_fp_special_values(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + nans_ = [dpt.nan, -dpt.nan] + infs_ = [dpt.inf, -dpt.inf] + finites_ = [-1.0, -0.0, 0.0, 1.0] + inps_ = nans_ + infs_ + finites_ + + x = dpt.asarray(inps_, dtype=dtype) + r = dpt.abs(x) + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + expected_np = np.abs(np.asarray(inps_, dtype=dtype)) + + expected = dpt.asarray(expected_np, dtype=dtype) + tol = dpt.finfo(r.dtype).resolution + + assert dpt.allclose(r, expected, atol=tol, rtol=tol, equal_nan=True) + + +@pytest.mark.parametrize("dtype", _complex_fp_dtypes) +def test_abs_complex_fp_special_values(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + nans_ = [dpt.nan, -dpt.nan] + infs_ = [dpt.inf, -dpt.inf] + finites_ = [-1.0, -0.0, 0.0, 1.0] + inps_ = nans_ + infs_ + finites_ + c_ = [complex(*v) for v in itertools.product(inps_, repeat=2)] + + z = dpt.asarray(c_, dtype=dtype) + r = dpt.abs(z) + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + expected_np = np.abs(np.asarray(c_, dtype=dtype)) + + expected = dpt.asarray(expected_np, dtype=dtype) + tol = dpt.finfo(r.dtype).resolution + + assert dpt.allclose(r, expected, atol=tol, rtol=tol, equal_nan=True) diff --git a/dpctl/tests/elementwise/test_sqrt.py b/dpctl/tests/elementwise/test_sqrt.py index a15f5262a7..07b97733e3 100644 --- a/dpctl/tests/elementwise/test_sqrt.py +++ b/dpctl/tests/elementwise/test_sqrt.py @@ -15,6 +15,7 @@ # limitations under the License. import itertools +import warnings import numpy as np import pytest @@ -23,7 +24,13 @@ import dpctl.tensor as dpt from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported -from .utils import _all_dtypes, _map_to_device_dtype, _usm_types +from .utils import ( + _all_dtypes, + _complex_fp_dtypes, + _map_to_device_dtype, + _real_fp_dtypes, + _usm_types, +) @pytest.mark.parametrize("dtype", _all_dtypes) @@ -115,18 +122,6 @@ def test_sqrt_order(dtype): assert_allclose(dpt.asnumpy(Y), expected_Y, atol=tol, rtol=tol) -@pytest.mark.usefixtures("suppress_invalid_numpy_warnings") -def test_sqrt_special_cases(): - q = get_queue_or_skip() - - X = dpt.asarray( - [dpt.nan, -1.0, 0.0, -0.0, dpt.inf, -dpt.inf], dtype="f4", sycl_queue=q - ) - Xnp = dpt.asnumpy(X) - - assert_equal(dpt.asnumpy(dpt.sqrt(X)), np.sqrt(Xnp)) - - @pytest.mark.parametrize("dtype", ["f2", "f4", "f8", "c8", "c16"]) def test_sqrt_out_overlap(dtype): q = get_queue_or_skip() @@ -149,3 +144,62 @@ def test_sqrt_out_overlap(dtype): assert Y is not X assert_allclose(dpt.asnumpy(X), Xnp, atol=tol, rtol=tol) assert_allclose(dpt.asnumpy(Y), Ynp, atol=tol, rtol=tol) + + +@pytest.mark.usefixtures("suppress_invalid_numpy_warnings") +def test_sqrt_special_cases(): + q = get_queue_or_skip() + + X = dpt.asarray( + [dpt.nan, -1.0, 0.0, -0.0, dpt.inf, -dpt.inf], dtype="f4", sycl_queue=q + ) + Xnp = dpt.asnumpy(X) + + assert_equal(dpt.asnumpy(dpt.sqrt(X)), np.sqrt(Xnp)) + + +@pytest.mark.parametrize("dtype", _real_fp_dtypes) +def test_sqrt_real_fp_special_values(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + nans_ = [dpt.nan, -dpt.nan] + infs_ = [dpt.inf, -dpt.inf] + finites_ = [-1.0, -0.0, 0.0, 1.0] + inps_ = nans_ + infs_ + finites_ + + x = dpt.asarray(inps_, dtype=dtype) + r = dpt.sqrt(x) + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + expected_np = np.sqrt(np.asarray(inps_, dtype=dtype)) + + expected = dpt.asarray(expected_np, dtype=dtype) + tol = dpt.finfo(r.dtype).resolution + + assert dpt.allclose(r, expected, atol=tol, rtol=tol, equal_nan=True) + + +@pytest.mark.parametrize("dtype", _complex_fp_dtypes) +def test_sqrt_complex_fp_special_values(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + nans_ = [dpt.nan, -dpt.nan] + infs_ = [dpt.inf, -dpt.inf] + finites_ = [-1.0, -0.0, 0.0, 1.0] + inps_ = nans_ + infs_ + finites_ + c_ = [complex(*v) for v in itertools.product(inps_, repeat=2)] + + z = dpt.asarray(c_, dtype=dtype) + r = dpt.sqrt(z) + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + expected_np = np.sqrt(np.asarray(c_, dtype=dtype)) + + expected = dpt.asarray(expected_np, dtype=dtype) + tol = dpt.finfo(r.dtype).resolution + + assert dpt.allclose(r, expected, atol=tol, rtol=tol, equal_nan=True) From d60d58ef41cc2a3756f436797f33c2cf45a1c9a5 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 15 Aug 2023 15:44:50 -0500 Subject: [PATCH 07/15] Added tests for type promotion in tensor.allclose --- dpctl/tests/test_tensor_testing.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/dpctl/tests/test_tensor_testing.py b/dpctl/tests/test_tensor_testing.py index 21437459d9..426d4bb586 100644 --- a/dpctl/tests/test_tensor_testing.py +++ b/dpctl/tests/test_tensor_testing.py @@ -84,3 +84,12 @@ def test_allclose_validation(): x = dpt.asarray(True) with pytest.raises(TypeError): dpt.allclose(x, False) + + +def test_all_close_promotion(): + get_queue_or_skip() + + x1 = dpt.ones(10, dtype="i4") + x2 = dpt.ones(10, dtype="i8") + + assert dpt.allclose(x1, x2) From ea6dd27c65065f40f78a8f196db52c754953c2f8 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 16 Aug 2023 13:30:05 -0500 Subject: [PATCH 08/15] Fixes per PR review feedback 1. Aligned default values with those of np.allclose 2. Replaced less test with less_equal to align with NumPy. --- dpctl/tensor/_testing.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/dpctl/tensor/_testing.py b/dpctl/tensor/_testing.py index 185ed816a4..1aa7d82099 100644 --- a/dpctl/tensor/_testing.py +++ b/dpctl/tensor/_testing.py @@ -64,7 +64,7 @@ def _allclose_complex_fp(z1, z2, atol, rtol, equal_nan): mv2 = z2i[mi] check5 = dpt.all( dpt.abs(mv1 - mv2) - < atol + rtol * dpt.maximum(dpt.abs(mv1), dpt.abs(mv2)) + <= atol + rtol * dpt.maximum(dpt.abs(mv1), dpt.abs(mv2)) ) return check5 @@ -90,7 +90,7 @@ def _allclose_real_fp(r1, r2, atol, rtol, equal_nan): mv2 = r2[m] check4 = dpt.all( dpt.abs(mv1 - mv2) - < atol + rtol * dpt.maximum(dpt.abs(mv1), dpt.abs(mv2)) + <= atol + rtol * dpt.maximum(dpt.abs(mv1), dpt.abs(mv2)) ) return check4 @@ -99,10 +99,10 @@ def _allclose_others(r1, r2): return dpt.all(r1 == r2) -def allclose(a1, a2, atol=1e-5, rtol=1e-8, equal_nan=False): - """allclose(a1, a2, atol=1e-5, rtol=1e-8) +def allclose(a1, a2, atol=1e-8, rtol=1e-5, equal_nan=False): + """allclose(a1, a2, atol=1e-8, rtol=1e-5, equal_nan=False) - Returns True if two arrays are element-wise equal within tolerance. + Returns True if two arrays are element-wise equal within tolerances. """ if not isinstance(a1, dpt.usm_ndarray): raise TypeError( From f36af57cc90810540a0e51338e72bb33bea09c15 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 16 Aug 2023 13:31:07 -0500 Subject: [PATCH 09/15] Adds tests for atol/rtol Also added tests for early exits to improve coverage. --- dpctl/tests/test_tensor_testing.py | 56 +++++++++++++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-) diff --git a/dpctl/tests/test_tensor_testing.py b/dpctl/tests/test_tensor_testing.py index 426d4bb586..d1cb4df4ab 100644 --- a/dpctl/tests/test_tensor_testing.py +++ b/dpctl/tests/test_tensor_testing.py @@ -86,10 +86,64 @@ def test_allclose_validation(): dpt.allclose(x, False) -def test_all_close_promotion(): +def test_allclose_type_promotion(): get_queue_or_skip() x1 = dpt.ones(10, dtype="i4") x2 = dpt.ones(10, dtype="i8") assert dpt.allclose(x1, x2) + + +def test_allclose_tolerance(): + get_queue_or_skip() + + x = dpt.zeros(10, dtype="f4") + atol = 1e-5 + y = dpt.full_like(x, atol) + assert dpt.allclose(x, y, atol=atol, rtol=0) + + # about 8e-6 + tol = float.fromhex("0x1.0p-17") + x = dpt.ones(10, dtype="f4") + y = x - tol + assert dpt.allclose(x, y, atol=0, rtol=tol) + + +def test_allclose_real_fp_early_exists(): + get_queue_or_skip() + + x1 = dpt.asarray([0.0, dpt.inf, -dpt.inf], dtype="f4") + x2 = dpt.asarray([dpt.inf, 0.0, -dpt.inf], dtype="f4") + + # early exists, inf positions are different + assert not dpt.allclose(x1, x2) + + x2 = dpt.asarray([0.0, -dpt.inf, dpt.inf], dtype="f4") + + # early exists, inf positions are the same, but signs differ + assert not dpt.allclose(x1, x2) + + +def test_allclose_complex_fp_early_exists(): + get_queue_or_skip() + + x1 = dpt.asarray([0.0, dpt.inf, -dpt.inf], dtype="c8") + x2 = dpt.asarray([dpt.inf, 0.0, -dpt.inf], dtype="c8") + + # early exists, inf positions of real parts are different + assert not dpt.allclose(x1, x2) + + x2 = dpt.asarray([0.0, -dpt.inf, dpt.inf], dtype="c8") + + # early exists, inf positions of real parts are the same, but signs differ + assert not dpt.allclose(x1, x2) + + x1 = dpt.asarray([0.0, dpt.inf * 1j, -dpt.inf * 1j], dtype="c8") + x2 = dpt.asarray([dpt.inf * 1j, 0.0, -dpt.inf * 1j], dtype="c8") + + # early exists, inf positions of imag parts are different + assert not dpt.allclose(x1, x2) + + x2 = dpt.asarray([0.0, -dpt.inf * 1j, dpt.inf * 1j], dtype="c8") + assert not dpt.allclose(x1, x2) From a75fff8f8394c4931f0f1e25414daeb695fa85e0 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 16 Aug 2023 13:41:41 -0500 Subject: [PATCH 10/15] tensor.allclose to use abs(a-b) < max(atol, rtol*max(abs(a), abs(b))) --- dpctl/tensor/_testing.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/dpctl/tensor/_testing.py b/dpctl/tensor/_testing.py index 1aa7d82099..4d4a73408c 100644 --- a/dpctl/tensor/_testing.py +++ b/dpctl/tensor/_testing.py @@ -56,7 +56,7 @@ def _allclose_complex_fp(z1, z2, atol, rtol, equal_nan): mv2 = z2r[mr] check4 = dpt.all( dpt.abs(mv1 - mv2) - < atol + rtol * dpt.maximum(dpt.abs(mv1), dpt.abs(mv2)) + < dpt.maximum(atol, rtol * dpt.maximum(dpt.abs(mv1), dpt.abs(mv2))) ) if not check4: return check4 @@ -64,7 +64,7 @@ def _allclose_complex_fp(z1, z2, atol, rtol, equal_nan): mv2 = z2i[mi] check5 = dpt.all( dpt.abs(mv1 - mv2) - <= atol + rtol * dpt.maximum(dpt.abs(mv1), dpt.abs(mv2)) + <= dpt.maximum(atol, rtol * dpt.maximum(dpt.abs(mv1), dpt.abs(mv2))) ) return check5 @@ -90,7 +90,7 @@ def _allclose_real_fp(r1, r2, atol, rtol, equal_nan): mv2 = r2[m] check4 = dpt.all( dpt.abs(mv1 - mv2) - <= atol + rtol * dpt.maximum(dpt.abs(mv1), dpt.abs(mv2)) + <= dpt.maximum(atol, rtol * dpt.maximum(dpt.abs(mv1), dpt.abs(mv2))) ) return check4 @@ -103,6 +103,10 @@ def allclose(a1, a2, atol=1e-8, rtol=1e-5, equal_nan=False): """allclose(a1, a2, atol=1e-8, rtol=1e-5, equal_nan=False) Returns True if two arrays are element-wise equal within tolerances. + + The testing is based on the following elementwise comparison: + + abs(a - b) <= max(atol, rtol * max(abs(a), abs(b))) """ if not isinstance(a1, dpt.usm_ndarray): raise TypeError( @@ -114,6 +118,10 @@ def allclose(a1, a2, atol=1e-8, rtol=1e-5, equal_nan=False): ) atol = float(atol) rtol = float(rtol) + if atol < 0.0 or rtol < 0.0: + raise ValueError( + "Absolute and relative tolerances must be non-negative" + ) equal_nan = bool(equal_nan) exec_q = du.get_execution_queue(tuple(a.sycl_queue for a in (a1, a2))) if exec_q is None: From ff3c680202551e486e44b59c7091eb5bd09158ae Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 16 Aug 2023 16:12:47 -0500 Subject: [PATCH 11/15] Completion of fix for gh-1058 This changes builds up on gh-1265 and takes into account queue from the pre-allocated buffer, if provided. --- dpctl/tensor/_usmarray.pyx | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dpctl/tensor/_usmarray.pyx b/dpctl/tensor/_usmarray.pyx index 2b16df1060..7573109c65 100644 --- a/dpctl/tensor/_usmarray.pyx +++ b/dpctl/tensor/_usmarray.pyx @@ -182,7 +182,10 @@ cdef class usm_ndarray: raise TypeError("Argument shape must be a list or a tuple.") nd = len(shape) if dtype is None: - q = buffer_ctor_kwargs.get("queue") + if isinstance(buffer, (dpmem._memory._Memory, usm_ndarray)): + q = buffer.sycl_queue + else: + q = buffer_ctor_kwargs.get("queue") if q is not None: dtype = default_device_fp_type(q) else: From c4312cb08c40d2de4c88617ee0219ebe20d67172 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 16 Aug 2023 18:56:46 -0500 Subject: [PATCH 12/15] Scale down arguments and scale back the result This improves accuracy at extremes of supported range. Use sycl:: namespace ldexp and ilogb to prevent problem with VS 2017 headers. --- .../kernels/elementwise_functions/sqrt.hpp | 26 +++++++++++++------ 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sqrt.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sqrt.hpp index f72b5d963b..a50859595a 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sqrt.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sqrt.hpp @@ -148,16 +148,26 @@ template struct SqrtFunctor constexpr realT half = realT(0x1.0p-1f); // 1/2 constexpr realT zero = realT(0); - if (std::signbit(x)) { - realT m = std::hypot(x, y); - realT d = std::sqrt((m - x) * half); - return {(d == zero ? zero : std::abs(y) / d * half), - std::copysign(d, y)}; + const int exp_x = sycl::ilogb(x); + const int exp_y = sycl::ilogb(y); + + int sc = std::max(exp_x, exp_y) / 2; + const realT xx = sycl::ldexp(x, -sc * 2); + const realT yy = sycl::ldexp(y, -sc * 2); + + if (std::signbit(xx)) { + const realT m = std::hypot(xx, yy); + const realT d = std::sqrt((m - xx) * half); + const realT res_re = (d == zero ? zero : std::abs(yy) / d * half); + const realT res_im = std::copysign(d, yy); + return {sycl::ldexp(res_re, sc), sycl::ldexp(res_im, sc)}; } else { - realT m = std::hypot(x, y); - realT d = std::sqrt((m + x) * half); - return {d, (d == zero) ? std::copysign(zero, y) : y * half / d}; + const realT m = std::hypot(xx, yy); + const realT d = std::sqrt((m + xx) * half); + const realT res_im = + (d == zero) ? std::copysign(zero, yy) : yy * half / d; + return {sycl::ldexp(d, sc), sycl::ldexp(res_im, sc)}; } } }; From bb52bb1774e0295b13de8792d41f0ec4efeb5a83 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Thu, 17 Aug 2023 11:49:04 -0500 Subject: [PATCH 13/15] Avoid using sycl::ilogb, but use own implementation ilogb would have to pay attention to correctly computing scale of denormal floats, while simpler code suffices. Also use unscaled version in most cases, and scaled version only for very large inputs. --- .../kernels/elementwise_functions/sqrt.hpp | 85 ++++++++++++++++++- 1 file changed, 82 insertions(+), 3 deletions(-) diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sqrt.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sqrt.hpp index a50859595a..808e82539e 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sqrt.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sqrt.hpp @@ -137,8 +137,42 @@ template struct SqrtFunctor } } + int get_normal_scale_float(const float &v) const + { + constexpr int float_significant_bits = 23; + constexpr std::uint32_t exponent_mask = 0xff; + constexpr int exponent_bias = 127; + const int scale = static_cast( + (sycl::bit_cast(v) >> float_significant_bits) & + exponent_mask); + return scale - exponent_bias; + } + + int get_normal_scale_double(const double &v) const + { + constexpr int float_significant_bits = 53; + constexpr std::uint64_t exponent_mask = 0x7ff; + constexpr int exponent_bias = 1023; + const int scale = static_cast( + (sycl::bit_cast(v) >> float_significant_bits) & + exponent_mask); + return scale - exponent_bias; + } + + template int get_normal_scale(const T &v) const + { + static_assert(std::is_same_v || std::is_same_v); + + if constexpr (std::is_same_v) { + return get_normal_scale_float(v); + } + else { + return get_normal_scale_double(v); + } + } + template - std::complex csqrt_finite(T const &x, T const &y) const + std::complex csqrt_finite_scaled(T const &x, T const &y) const { // csqrt(x + y*1j) = // sqrt((cabs(x, y) + x) / 2) + @@ -148,8 +182,8 @@ template struct SqrtFunctor constexpr realT half = realT(0x1.0p-1f); // 1/2 constexpr realT zero = realT(0); - const int exp_x = sycl::ilogb(x); - const int exp_y = sycl::ilogb(y); + const int exp_x = get_normal_scale(x); + const int exp_y = get_normal_scale(y); int sc = std::max(exp_x, exp_y) / 2; const realT xx = sycl::ldexp(x, -sc * 2); @@ -170,6 +204,51 @@ template struct SqrtFunctor return {sycl::ldexp(d, sc), sycl::ldexp(res_im, sc)}; } } + + template + std::complex csqrt_finite_unscaled(T const &x, T const &y) const + { + // csqrt(x + y*1j) = + // sqrt((cabs(x, y) + x) / 2) + + // 1j * copysign(sqrt((cabs(x, y) - x) / 2), y) + + using realT = T; + constexpr realT half = realT(0x1.0p-1f); // 1/2 + constexpr realT zero = realT(0); + + if (std::signbit(x)) { + const realT m = std::hypot(x, y); + const realT d = std::sqrt((m - x) * half); + const realT res_re = (d == zero ? zero : std::abs(y) / d * half); + const realT res_im = std::copysign(d, y); + return {res_re, res_im}; + } + else { + const realT m = std::hypot(x, y); + const realT d = std::sqrt((m + x) * half); + const realT res_im = + (d == zero) ? std::copysign(zero, y) : y * half / d; + return {d, res_im}; + } + } + + template T scaling_threshold() const + { + if constexpr (std::is_same_v) { + return T(0x1.0p+126f); + } + else { + return T(0x1.0p+1022); + } + } + + template + std::complex csqrt_finite(T const &x, T const &y) const + { + return (std::max(std::abs(x), std::abs(y)) < scaling_threshold()) + ? csqrt_finite_unscaled(x, y) + : csqrt_finite_scaled(x, y); + } }; template Date: Thu, 17 Aug 2023 12:44:47 -0500 Subject: [PATCH 14/15] Set defines to use std::abs and std::sqrt on Linux We work around issues with these functions when their implementation is taken from VS 2017 headers on Windows though. --- dpctl/tensor/CMakeLists.txt | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/dpctl/tensor/CMakeLists.txt b/dpctl/tensor/CMakeLists.txt index de45ee47bd..ca83b8350b 100644 --- a/dpctl/tensor/CMakeLists.txt +++ b/dpctl/tensor/CMakeLists.txt @@ -58,10 +58,15 @@ set_source_files_properties( ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/linear_sequences.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions.cpp PROPERTIES COMPILE_OPTIONS "${_clang_prefix}-fno-fast-math") +if (UNIX) + set_source_files_properties( + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions.cpp + PROPERTIES COMPILE_DEFINITIONS "USE_STD_ABS_FOR_COMPLEX_TYPES;USE_STD_SQRT_FOR_COMPLEX_TYPES") +endif() target_compile_options(${python_module_name} PRIVATE -fno-sycl-id-queries-fit-in-int) target_link_options(${python_module_name} PRIVATE -fsycl-device-code-split=per_kernel) if(UNIX) - # this option is support on Linux only + # this option is supported on Linux only target_link_options(${python_module_name} PRIVATE -fsycl-link-huge-device-code) endif() target_include_directories(${python_module_name} From 142190fe35f32577b526bd2a3b45db4b0ce2493b Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Thu, 17 Aug 2023 12:52:47 -0500 Subject: [PATCH 15/15] Removed stray include iostream --- .../include/kernels/elementwise_functions/positive.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/positive.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/positive.hpp index f75b93d0f5..d5fab29315 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/positive.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/positive.hpp @@ -37,8 +37,6 @@ #include "utils/type_utils.hpp" #include -#include - namespace dpctl { namespace tensor