Skip to content

Commit

Permalink
specialize float clip
Browse files Browse the repository at this point in the history
  • Loading branch information
pijyoi committed Apr 15, 2024
1 parent 48950ad commit 50dd260
Showing 1 changed file with 65 additions and 2 deletions.
67 changes: 65 additions & 2 deletions numpy/_core/src/umath/clip.cpp
Expand Up @@ -196,6 +196,69 @@ _npy_clip(char **args, npy_intp const *dimensions, npy_intp const *steps)
return _npy_clip_<Tag>((T **)args, dimensions, steps);
}

template <class Tag, class T = typename Tag::type>
static void
_npy_clip_floating_(T **args, npy_intp const *dimensions, npy_intp const *steps)
{
npy_intp n = dimensions[0];
if (steps[1] == 0 && steps[2] == 0) {
/* min and max are constant throughout the loop, the most common case
*/
T min_val = *args[1];
T max_val = *args[2];

T *ip1 = args[0], *op1 = args[3];
npy_intp is1 = steps[0] / sizeof(T), os1 = steps[3] / sizeof(T);

if (!npy_isnan(min_val) && !npy_isnan(max_val)) {
/* min_val and max_val are not nans */
/* nans in the input will be propagated naturally */

/* contiguous, branch to let the compiler optimize */
if (is1 == 1 && os1 == 1) {
for (npy_intp i = 0; i < n; i++, ip1++, op1++) {
T x = *ip1;
if (x < min_val) x = min_val;
if (x > max_val) x = max_val;
*op1 = x;
}
}
else {
for (npy_intp i = 0; i < n; i++, ip1 += is1, op1 += os1) {
T x = *ip1;
if (x < min_val) x = min_val;
if (x > max_val) x = max_val;
*op1 = x;
}
}
}
else {
/* min_val and/or max_val are nans */
T x = npy_isnan(min_val) ? min_val : max_val;
for (npy_intp i = 0; i < n; i++, op1 += os1) {
*op1 = x;
}
}
}
else {
T *ip1 = args[0], *ip2 = args[1], *ip3 = args[2], *op1 = args[3];
npy_intp is1 = steps[0] / sizeof(T), is2 = steps[1] / sizeof(T),
is3 = steps[2] / sizeof(T), os1 = steps[3] / sizeof(T);
for (npy_intp i = 0; i < n;
i++, ip1 += is1, ip2 += is2, ip3 += is3, op1 += os1)
*op1 = _NPY_CLIP<Tag>(*ip1, *ip2, *ip3);
}
npy_clear_floatstatus_barrier((char *)dimensions);
}

template <class Tag>
static void
_npy_clip_floating(char **args, npy_intp const *dimensions, npy_intp const *steps)
{
using T = typename Tag::type;
return _npy_clip_floating_<Tag>((T **)args, dimensions, steps);
}

extern "C" {
NPY_NO_EXPORT void
BOOL_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
Expand Down Expand Up @@ -273,13 +336,13 @@ NPY_NO_EXPORT void
FLOAT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
void *NPY_UNUSED(func))
{
return _npy_clip<npy::float_tag>(args, dimensions, steps);
return _npy_clip_floating<npy::float_tag>(args, dimensions, steps);
}
NPY_NO_EXPORT void
DOUBLE_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
void *NPY_UNUSED(func))
{
return _npy_clip<npy::double_tag>(args, dimensions, steps);
return _npy_clip_floating<npy::double_tag>(args, dimensions, steps);
}
NPY_NO_EXPORT void
LONGDOUBLE_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
Expand Down

0 comments on commit 50dd260

Please sign in to comment.