Skip to content

Commit

Permalink
use char pointers and step by bytes
Browse files Browse the repository at this point in the history
  • Loading branch information
pijyoi committed Apr 28, 2024
1 parent c5d3d23 commit 024c4f9
Showing 1 changed file with 22 additions and 32 deletions.
54 changes: 22 additions & 32 deletions numpy/_core/src/umath/clip.cpp
Expand Up @@ -155,27 +155,27 @@ _NPY_CLIP(T x, T min, T max)
template <class Tag, class T>
void
_npy_clip_const_minmax_(
T *ip, npy_intp is, T *op, npy_intp os, npy_intp n, T min_val, T max_val,
char *ip, npy_intp is, char *op, npy_intp os, npy_intp n, T min_val, T max_val,
std::false_type /* non-floating point */
)
{
/* contiguous, branch to let the compiler optimize */
if (is == 1 && os == 1) {
for (npy_intp i = 0; i < n; i++, ip++, op++) {
*op = _NPY_CLIP<Tag>(*ip, min_val, max_val);
if (is == sizeof(T) && os == sizeof(T)) {
for (npy_intp i = 0; i < n; i++, ip += sizeof(T), op += sizeof(T)) {
*(T *)op = _NPY_CLIP<Tag>(*(T *)ip, min_val, max_val);
}
}
else {
for (npy_intp i = 0; i < n; i++, ip += is, op += os) {
*op = _NPY_CLIP<Tag>(*ip, min_val, max_val);
*(T *)op = _NPY_CLIP<Tag>(*(T *)ip, min_val, max_val);
}
}
}

template <class Tag, class T>
void
_npy_clip_const_minmax_(
T *ip, npy_intp is, T *op, npy_intp os, npy_intp n, T min_val, T max_val,
char *ip, npy_intp is, char *op, npy_intp os, npy_intp n, T min_val, T max_val,
std::true_type /* floating point */
)
{
Expand All @@ -184,70 +184,60 @@ _npy_clip_const_minmax_(
/* nans in the input will be propagated naturally */

/* contiguous, branch to let the compiler optimize */
if (is == 1 && os == 1) {
for (npy_intp i = 0; i < n; i++, ip++, op++) {
T x = *ip;
if (is == sizeof(T) && os == sizeof(T)) {
for (npy_intp i = 0; i < n; i++, ip += sizeof(T), op += sizeof(T)) {
T x = *(T *)ip;
if (x < min_val) x = min_val;
if (x > max_val) x = max_val;
*op = x;
*(T *)op = x;
}
}
else {
for (npy_intp i = 0; i < n; i++, ip += is, op += os) {
T x = *ip;
T x = *(T *)ip;
if (x < min_val) x = min_val;
if (x > max_val) x = max_val;
*op = x;
*(T *)op = x;
}
}
}
else {
/* min_val and/or max_val are nans */
T x = npy_isnan(min_val) ? min_val : max_val;
for (npy_intp i = 0; i < n; i++, op += os) {
*op = x;
*(T *)op = x;
}
}
}

template <class Tag, class T = typename Tag::type>
static void
_npy_clip_(T **args, npy_intp const *dimensions, npy_intp const *steps)
_npy_clip(char **args, npy_intp const *dimensions, npy_intp const *steps)
{
npy_intp n = dimensions[0];
if (steps[1] == 0 && steps[2] == 0) {
/* min and max are constant throughout the loop, the most common case
*/
/* NOTE: it may be possible to optimize these checks for nan */
T min_val = *args[1];
T max_val = *args[2];

T *ip1 = args[0], *op1 = args[3];
npy_intp is1 = steps[0] / sizeof(T), os1 = steps[3] / sizeof(T);
T min_val = *(T *)args[1];
T max_val = *(T *)args[2];

_npy_clip_const_minmax_<Tag, T>(ip1, is1, op1, os1, n, min_val, max_val,
_npy_clip_const_minmax_<Tag, T>(
args[0], steps[0], args[3], steps[3], n, min_val, max_val,
std::is_base_of<npy::floating_point_tag, Tag>{}
);
}
else {
T *ip1 = args[0], *ip2 = args[1], *ip3 = args[2], *op1 = args[3];
npy_intp is1 = steps[0] / sizeof(T), is2 = steps[1] / sizeof(T),
is3 = steps[2] / sizeof(T), os1 = steps[3] / sizeof(T);
char *ip1 = args[0], *ip2 = args[1], *ip3 = args[2], *op1 = args[3];
npy_intp is1 = steps[0], is2 = steps[1],
is3 = steps[2], os1 = steps[3];
for (npy_intp i = 0; i < n;
i++, ip1 += is1, ip2 += is2, ip3 += is3, op1 += os1)
*op1 = _NPY_CLIP<Tag>(*ip1, *ip2, *ip3);
*(T *)op1 = _NPY_CLIP<Tag>(*(T *)ip1, *(T *)ip2, *(T *)ip3);
}
npy_clear_floatstatus_barrier((char *)dimensions);
}

template <class Tag>
static void
_npy_clip(char **args, npy_intp const *dimensions, npy_intp const *steps)
{
using T = typename Tag::type;
return _npy_clip_<Tag>((T **)args, dimensions, steps);
}

extern "C" {
NPY_NO_EXPORT void
BOOL_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
Expand Down

0 comments on commit 024c4f9

Please sign in to comment.