Skip to content

Commit

Permalink
use tag dispatch
Browse files Browse the repository at this point in the history
  • Loading branch information
pijyoi committed Apr 27, 2024
1 parent 4c66277 commit c5d3d23
Showing 1 changed file with 57 additions and 68 deletions.
125 changes: 57 additions & 68 deletions numpy/_core/src/umath/clip.cpp
@@ -1,6 +1,8 @@
/**
* This module provides the inner loops for the clip ufunc
*/
#include <type_traits>

#define _UMATHMODULE
#define _MULTIARRAYMODULE
#define NPY_NO_DEPRECATED_API NPY_API_VERSION
Expand Down Expand Up @@ -150,95 +152,82 @@ _NPY_CLIP(T x, T min, T max)
return _NPY_MIN<Tag>(_NPY_MAX<Tag>((x), (min)), (max));
}

template <class Tag, class T = typename Tag::type>
static void
_npy_clip_(T **args, npy_intp const *dimensions, npy_intp const *steps)
{
npy_intp n = dimensions[0];
if (steps[1] == 0 && steps[2] == 0) {
/* min and max are constant throughout the loop, the most common case
*/
/* NOTE: it may be possible to optimize these checks for nan */
T min_val = *args[1];
T max_val = *args[2];
template <class Tag, class T>
void
_npy_clip_const_minmax_(
T *ip, npy_intp is, T *op, npy_intp os, npy_intp n, T min_val, T max_val,
std::false_type /* non-floating point */
)
{
/* contiguous, branch to let the compiler optimize */
if (is == 1 && os == 1) {
for (npy_intp i = 0; i < n; i++, ip++, op++) {
*op = _NPY_CLIP<Tag>(*ip, min_val, max_val);
}
}
else {
for (npy_intp i = 0; i < n; i++, ip += is, op += os) {
*op = _NPY_CLIP<Tag>(*ip, min_val, max_val);
}
}
}

T *ip1 = args[0], *op1 = args[3];
npy_intp is1 = steps[0] / sizeof(T), os1 = steps[3] / sizeof(T);
template <class Tag, class T>
void
_npy_clip_const_minmax_(
T *ip, npy_intp is, T *op, npy_intp os, npy_intp n, T min_val, T max_val,
std::true_type /* floating point */
)
{
if (!npy_isnan(min_val) && !npy_isnan(max_val)) {
/* min_val and max_val are not nans */
/* nans in the input will be propagated naturally */

/* contiguous, branch to let the compiler optimize */
if (is1 == 1 && os1 == 1) {
for (npy_intp i = 0; i < n; i++, ip1++, op1++) {
*op1 = _NPY_CLIP<Tag>(*ip1, min_val, max_val);
if (is == 1 && os == 1) {
for (npy_intp i = 0; i < n; i++, ip++, op++) {
T x = *ip;
if (x < min_val) x = min_val;
if (x > max_val) x = max_val;
*op = x;
}
}
else {
for (npy_intp i = 0; i < n; i++, ip1 += is1, op1 += os1) {
*op1 = _NPY_CLIP<Tag>(*ip1, min_val, max_val);
for (npy_intp i = 0; i < n; i++, ip += is, op += os) {
T x = *ip;
if (x < min_val) x = min_val;
if (x > max_val) x = max_val;
*op = x;
}
}
}
else {
T *ip1 = args[0], *ip2 = args[1], *ip3 = args[2], *op1 = args[3];
npy_intp is1 = steps[0] / sizeof(T), is2 = steps[1] / sizeof(T),
is3 = steps[2] / sizeof(T), os1 = steps[3] / sizeof(T);
for (npy_intp i = 0; i < n;
i++, ip1 += is1, ip2 += is2, ip3 += is3, op1 += os1)
*op1 = _NPY_CLIP<Tag>(*ip1, *ip2, *ip3);
/* min_val and/or max_val are nans */
T x = npy_isnan(min_val) ? min_val : max_val;
for (npy_intp i = 0; i < n; i++, op += os) {
*op = x;
}
}
npy_clear_floatstatus_barrier((char *)dimensions);
}

template <class Tag>
static void
_npy_clip(char **args, npy_intp const *dimensions, npy_intp const *steps)
{
using T = typename Tag::type;
return _npy_clip_<Tag>((T **)args, dimensions, steps);
}

template <class Tag, class T = typename Tag::type>
static void
_npy_clip_floating_(T **args, npy_intp const *dimensions, npy_intp const *steps)
_npy_clip_(T **args, npy_intp const *dimensions, npy_intp const *steps)
{
npy_intp n = dimensions[0];
if (steps[1] == 0 && steps[2] == 0) {
/* min and max are constant throughout the loop, the most common case
*/
/* NOTE: it may be possible to optimize these checks for nan */
T min_val = *args[1];
T max_val = *args[2];

T *ip1 = args[0], *op1 = args[3];
npy_intp is1 = steps[0] / sizeof(T), os1 = steps[3] / sizeof(T);

if (!npy_isnan(min_val) && !npy_isnan(max_val)) {
/* min_val and max_val are not nans */
/* nans in the input will be propagated naturally */

/* contiguous, branch to let the compiler optimize */
if (is1 == 1 && os1 == 1) {
for (npy_intp i = 0; i < n; i++, ip1++, op1++) {
T x = *ip1;
if (x < min_val) x = min_val;
if (x > max_val) x = max_val;
*op1 = x;
}
}
else {
for (npy_intp i = 0; i < n; i++, ip1 += is1, op1 += os1) {
T x = *ip1;
if (x < min_val) x = min_val;
if (x > max_val) x = max_val;
*op1 = x;
}
}
}
else {
/* min_val and/or max_val are nans */
T x = npy_isnan(min_val) ? min_val : max_val;
for (npy_intp i = 0; i < n; i++, op1 += os1) {
*op1 = x;
}
}
_npy_clip_const_minmax_<Tag, T>(ip1, is1, op1, os1, n, min_val, max_val,
std::is_base_of<npy::floating_point_tag, Tag>{}
);
}
else {
T *ip1 = args[0], *ip2 = args[1], *ip3 = args[2], *op1 = args[3];
Expand All @@ -253,10 +242,10 @@ _npy_clip_floating_(T **args, npy_intp const *dimensions, npy_intp const *steps)

template <class Tag>
static void
_npy_clip_floating(char **args, npy_intp const *dimensions, npy_intp const *steps)
_npy_clip(char **args, npy_intp const *dimensions, npy_intp const *steps)
{
using T = typename Tag::type;
return _npy_clip_floating_<Tag>((T **)args, dimensions, steps);
return _npy_clip_<Tag>((T **)args, dimensions, steps);
}

extern "C" {
Expand Down Expand Up @@ -336,19 +325,19 @@ NPY_NO_EXPORT void
FLOAT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
void *NPY_UNUSED(func))
{
return _npy_clip_floating<npy::float_tag>(args, dimensions, steps);
return _npy_clip<npy::float_tag>(args, dimensions, steps);
}
NPY_NO_EXPORT void
DOUBLE_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
void *NPY_UNUSED(func))
{
return _npy_clip_floating<npy::double_tag>(args, dimensions, steps);
return _npy_clip<npy::double_tag>(args, dimensions, steps);
}
NPY_NO_EXPORT void
LONGDOUBLE_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
void *NPY_UNUSED(func))
{
return _npy_clip_floating<npy::longdouble_tag>(args, dimensions, steps);
return _npy_clip<npy::longdouble_tag>(args, dimensions, steps);
}
NPY_NO_EXPORT void
CFLOAT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
Expand Down

0 comments on commit c5d3d23

Please sign in to comment.