Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MAINT: Rewrote scipy.ndimage.find_object function in cython #5006

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Expand Up @@ -155,6 +155,7 @@ scipy/linalg/fblas.pyf
scipy/linalg/flapack.pyf
scipy/linalg/src/id_dist/src/*_subr_*.f
scipy/ndimage/src/_ni_label.c
scipy/ndimage/src/_ni_measure.c
scipy/optimize/cobyla/_cobylamodule.c
scipy/optimize/lbfgsb/_lbfgsbmodule.c
scipy/optimize/minpack2/minpack2module.c
Expand Down
6 changes: 5 additions & 1 deletion scipy/ndimage/measurements.py
Expand Up @@ -35,6 +35,7 @@
from . import _ni_support
from . import _ni_label
from . import _nd_image
from . import _ni_measure
from . import morphology

__all__ = ['label', 'find_objects', 'labeled_comprehension', 'sum', 'mean',
Expand Down Expand Up @@ -264,10 +265,13 @@ def find_objects(input, max_label=0):
if numpy.iscomplexobj(input):
raise TypeError('Complex type not supported')

if input.dtype == np.bool:
input = input.astype(int)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't input = input.view(np.uint8) work too? It would save a copy of the array.


if max_label < 1:
max_label = input.max()

return _nd_image.find_objects(input, max_label)
return _ni_measure._findObjects(input, max_label)


def labeled_comprehension(input, labels, index, func, out_dtype, default, pass_positions=False):
Expand Down
5 changes: 5 additions & 0 deletions scipy/ndimage/setup.py
Expand Up @@ -23,6 +23,11 @@ def configuration(parent_package='', top_path=None):
include_dirs=['src']+[get_include()],
)

config.add_extension("_ni_measure",
sources=["src/_ni_measure.c",],
include_dirs=['src']+[get_include()],
)

config.add_data_dir('tests')

return config
Expand Down
173 changes: 173 additions & 0 deletions scipy/ndimage/src/_ni_measure.pyx
@@ -0,0 +1,173 @@
######################################################################
# Cython version of scipy.ndimage.measurement.find_objects() function.
######################################################################

cimport cython
from cython cimport sizeof
import numpy as np
cimport numpy as np

np.import_array()

cdef extern from *:
ctypedef int Py_intptr_t

ctypedef void (*PyArray_CopySwapFunc)(void *, void *, int, void *)

cdef extern from "numpy/arrayobject.h" nogil:
ctypedef struct PyArrayIterObject:
np.intp_t *coordinates
char *dataptr
np.npy_bool contiguous

ctypedef struct PyArrayObject:
int nd
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not being used anywhere right now. If there is an advantage in using this over ndarray.ndim lets do it, if not we can remove it from here.


void *PyDataMem_NEW(size_t)
void PyDataMem_FREE(void *)


######################################################################
# Use of Cython's type templates for type specialization
######################################################################

# Only integer values are allowed.
ctypedef fused data_t:
np.int8_t
np.int16_t
np.int32_t
np.int64_t
np.uint8_t
np.uint16_t
np.uint32_t
np.uint64_t

#####################################################################
# Function Specializers and asociate function for using fused type
#####################################################################

ctypedef void (*func_p)(void *data, np.flatiter iti, np.intp_t max_label,
int *regions, int rank) nogil

def get_funcs(np.ndarray[data_t] input):
return (<Py_intptr_t> findObjectsPoint[data_t])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why the casting here? You are converting a function pointer to an integer type?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jaimefrio That is required. Removing it from line 53 gives error while compiling the code and, in line 103 removing it gives segmentation fault.


######################################################################
# Update Regions According to Input Data Type
######################################################################

cdef inline findObjectsPoint(data_t *data, np.flatiter _iti, np.intp_t max_label,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the right approach is to do:

cdef inline findObjectsPoint(void *raw_data, ...):
    cdef:
        data_t *data = <data_t *>raw_data

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jaimefrio Compiler crashes if I declare the argument type as void * giving warning:
_ni_measure.pyx:50:42: Can only parameterize template functions.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jaimefrio
Defining something like
`

...
cdef inline findObjectsPoint(void *data, np.flatiter _iti, np.intp_t max_label, 
                             int *regions, int rank, np.ndarray[data_t] input):
...

compiles successfully but when I run it over test cases, compiler crashes.

`

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of np.flatiter _iti, you could accept a np.intp_t *cc, and then call this function with iti.coordinates.

int *regions, int rank):
cdef int kk =0
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing space after =.

cdef np.intp_t cc

# only integer or boolean values are allowed, since s_index is being used in indexing
cdef np.intp_t s_index = <np.intp_t> (<data_t *> data)[0] - 1

if s_index >=0 and s_index < max_label:
if rank > 0:
s_index *= 2 * rank
if regions[s_index] < 0:
for kk in range(rank):
cc = (<PyArrayIterObject *>_iti).coordinates[kk]
regions[s_index + kk] = cc
regions[s_index + kk + rank] = cc + 1

else:
for kk in range(rank):
cc = (<PyArrayIterObject *>_iti).coordinates[kk]
if cc < regions[s_index + kk]:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you make this condition if regions[s_index] < 0 or cc < regions[s_index + kk]: and similarly two lines below, you can get rid of the if regions[s_index] < 0 branch.

regions[s_index + kk] = cc
if cc + 1 > regions[s_index + kk + rank]:
regions[s_index + kk + rank] = cc + 1
else:
regions[s_index] = 1

return 1
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function is void, should have no return.


######################################################################
# Implementaion of find_Objects function:-
######################################################################

cpdef _findObjects(np.ndarray input_raw, np.intp_t max_label):
cdef:
funcs = get_funcs(input_raw.take([0]))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't you need to specify the type of funcs here?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not:

cpdef _findObjects(np.ndarray[data_t] input_raw, np.intp_t max_label):
    cdef
        func_p findObjectsPoint = funcs[data_t]

If this works, you can get rid of the get_funcs function altogether.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jaimefrio No.It is not working. It gives error while compiling: 'data_t' is not a constant, variable or function identifier.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also in line 94 funcs = get_funcs(input_raw.take([0])), take[0] is required. Without it, I am getting error: TypeError: No matching signature found


int ii, rank, size_regions
int start, jj, idx, end
int *regions = NULL
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Most of these should be np.intp_t, not int. I know we had issues with things working properly on 64 bit systems, but reverting everything to int is not a valid option for the final version.


np.flatiter _iti
PyArrayIterObject *iti

func_p findObjectsPoint = <func_p> <void *> <Py_intptr_t> funcs
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the <Py_intptr_t> casting for?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jaimefrio This casting is required. Without it cython compiler gives error: Python objects can't be casted to void *.


int flags = np.NPY_CONTIGUOUS | np.NPY_NOTSWAPPED | np.NPY_ALIGNED
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think requiring the array to be contiguous is too strict: if we are going to make it unconditionally contiguous, we can probably get rid of using the iterator altogether.


input = np.PyArray_FROM_OF(input_raw, flags)

rank = input.ndim

# Array Iterator defining and Initialization:
_iti = np.PyArray_IterNew(input)
iti = <PyArrayIterObject *> _iti

# If iterator is contiguous, PyArray_ITER_NEXT will treat it as 1D Array
iti.contiguous = 0

# Declaring output array
size_regions = 0

if max_label < 0:
max_label = 0
elif max_label > 0:
if rank > 0:
size_regions = 2 * max_label * rank

else:
size_regions = max_label

regions = <int *> PyDataMem_NEW(size_regions * sizeof(int))
if regions == NULL:
raise MemoryError()

else:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you need this branch? regions gets initialized to NULL and is unchanged before here.

regions = NULL

try:
with nogil:
if rank > 0:
for jj in range(size_regions):
regions[jj] = -1

# Iteration over array:
while np.PyArray_ITER_NOTDONE(_iti):
findObjectsPoint(np.PyArray_ITER_DATA(_iti), _iti, max_label, regions, rank)
np.PyArray_ITER_NEXT(_iti)

result = []

for ii in range(max_label):
if rank > 0:
idx = 2 * rank * ii

else:
idx = ii

if regions[idx] >= 0:
slc = ()
for jj in range(rank):
start = regions[idx + jj]
end = regions[idx + jj + rank]

slc += (slice(start, end),)
result.append(slc)

else:
result.append(None)
except:
# clean up and re-raise
PyDataMem_FREE(regions)
raise

return result
91 changes: 0 additions & 91 deletions scipy/ndimage/src/nd_image.c
Expand Up @@ -629,95 +629,6 @@ static PyObject *Py_ZoomShift(PyObject *obj, PyObject *args)
return PyErr_Occurred() ? NULL : Py_BuildValue("");
}

static PyObject *Py_FindObjects(PyObject *obj, PyObject *args)
{
PyArrayObject *input = NULL;
PyObject *result = NULL, *tuple = NULL, *start = NULL, *end = NULL;
PyObject *slc = NULL;
int jj;
npy_intp max_label;
npy_intp ii, *regions = NULL;

if (!PyArg_ParseTuple(args, "O&n",
NI_ObjectToInputArray, &input, &max_label))
goto exit;

if (max_label < 0)
max_label = 0;
if (max_label > 0) {
if (input->nd > 0) {
regions = (npy_intp*)malloc(2 * max_label * input->nd *
sizeof(npy_intp));
} else {
regions = (npy_intp*)malloc(max_label * sizeof(npy_intp));
}
if (!regions) {
PyErr_NoMemory();
goto exit;
}
}

if (!NI_FindObjects(input, max_label, regions))
goto exit;

result = PyList_New(max_label);
if (!result) {
PyErr_NoMemory();
goto exit;
}

for(ii = 0; ii < max_label; ii++) {
npy_intp idx = input->nd > 0 ? 2 * input->nd * ii : ii;
if (regions[idx] >= 0) {
PyObject *tuple = PyTuple_New(input->nd);
if (!tuple) {
PyErr_NoMemory();
goto exit;
}
for(jj = 0; jj < input->nd; jj++) {
start = PyLong_FromSsize_t(regions[idx + jj]);
end = PyLong_FromSsize_t(regions[idx + jj + input->nd]);
if (!start || !end) {
PyErr_NoMemory();
goto exit;
}
slc = PySlice_New(start, end, NULL);
if (!slc) {
PyErr_NoMemory();
goto exit;
}
Py_XDECREF(start);
Py_XDECREF(end);
start = end = NULL;
PyTuple_SetItem(tuple, jj, slc);
slc = NULL;
}
PyList_SetItem(result, ii, tuple);
tuple = NULL;
} else {
Py_INCREF(Py_None);
PyList_SetItem(result, ii, Py_None);
}
}

Py_INCREF(result);

exit:
Py_XDECREF(input);
Py_XDECREF(result);
Py_XDECREF(tuple);
Py_XDECREF(start);
Py_XDECREF(end);
Py_XDECREF(slc);
free(regions);
if (PyErr_Occurred()) {
Py_XDECREF(result);
return NULL;
} else {
return result;
}
}

static PyObject *Py_WatershedIFT(PyObject *obj, PyObject *args)
{
PyArrayObject *input = NULL, *output = NULL, *markers = NULL;
Expand Down Expand Up @@ -917,8 +828,6 @@ static PyMethodDef methods[] = {
METH_VARARGS, NULL},
{"zoom_shift", (PyCFunction)Py_ZoomShift,
METH_VARARGS, NULL},
{"find_objects", (PyCFunction)Py_FindObjects,
METH_VARARGS, NULL},
{"watershed_ift", (PyCFunction)Py_WatershedIFT,
METH_VARARGS, NULL},
{"distance_transform_bf", (PyCFunction)Py_DistanceTransformBruteForce,
Expand Down