/
dummyarray.py
453 lines (374 loc) · 13.9 KB
/
dummyarray.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
from collections import namedtuple
import itertools
import functools
import operator
import ctypes
import numpy as np
from numba import _helperlib
from numba.core import config
Extent = namedtuple("Extent", ["begin", "end"])
attempt_nocopy_reshape = ctypes.CFUNCTYPE(
ctypes.c_int,
ctypes.c_long, # nd
np.ctypeslib.ndpointer(np.ctypeslib.c_intp, ndim=1), # dims
np.ctypeslib.ndpointer(np.ctypeslib.c_intp, ndim=1), # strides
ctypes.c_long, # newnd
np.ctypeslib.ndpointer(np.ctypeslib.c_intp, ndim=1), # newdims
np.ctypeslib.ndpointer(np.ctypeslib.c_intp, ndim=1), # newstrides
ctypes.c_long, # itemsize
ctypes.c_int, # is_f_order
)(_helperlib.c_helpers['attempt_nocopy_reshape'])
class Dim(object):
"""A single dimension of the array
Attributes
----------
start:
start offset
stop:
stop offset
size:
number of items
stride:
item stride
"""
__slots__ = 'start', 'stop', 'size', 'stride', 'single'
def __init__(self, start, stop, size, stride, single):
self.start = start
self.stop = stop
self.size = size
self.stride = stride
self.single = single
assert not single or size == 1
def __getitem__(self, item):
if isinstance(item, slice):
start, stop, step = item.indices(self.size)
stride = step * self.stride
start = self.start + start * abs(self.stride)
stop = self.start + stop * abs(self.stride)
if stride == 0:
size = 1
else:
size = _compute_size(start, stop, stride)
ret = Dim(
start=start,
stop=stop,
size=size,
stride=stride,
single=False
)
return ret
else:
sliced = self[item:item + 1] if item != -1 else self[-1:]
if sliced.size != 1:
raise IndexError
return Dim(
start=sliced.start,
stop=sliced.stop,
size=sliced.size,
stride=sliced.stride,
single=True,
)
def get_offset(self, idx):
return self.start + idx * self.stride
def __repr__(self):
strfmt = "Dim(start=%s, stop=%s, size=%s, stride=%s)"
return strfmt % (self.start, self.stop, self.size, self.stride)
def normalize(self, base):
return Dim(start=self.start - base, stop=self.stop - base,
size=self.size, stride=self.stride, single=self.single)
def copy(self, start=None, stop=None, size=None, stride=None, single=None):
if start is None:
start = self.start
if stop is None:
stop = self.stop
if size is None:
size = self.size
if stride is None:
stride = self.stride
if single is None:
single = self.single
return Dim(start, stop, size, stride, single)
def is_contiguous(self, itemsize):
return self.stride == itemsize
def compute_index(indices, dims):
return sum(d.get_offset(i) for i, d in zip(indices, dims))
class Element(object):
is_array = False
def __init__(self, extent):
self.extent = extent
def iter_contiguous_extent(self):
yield self.extent
class Array(object):
"""A dummy numpy array-like object. Consider it an array without the
actual data, but offset from the base data pointer.
Attributes
----------
dims: tuple of Dim
describing each dimension of the array
ndim: int
number of dimension
shape: tuple of int
size of each dimension
strides: tuple of int
stride of each dimension
itemsize: int
itemsize
extent: (start, end)
start and end offset containing the memory region
"""
is_array = True
@classmethod
def from_desc(cls, offset, shape, strides, itemsize):
dims = []
for ashape, astride in zip(shape, strides):
dim = Dim(offset, offset + ashape * astride, ashape, astride,
single=False)
dims.append(dim)
offset = 0 # offset only applies to first dimension
return cls(dims, itemsize)
def __init__(self, dims, itemsize):
self.dims = tuple(dims)
self.ndim = len(self.dims)
self.shape = tuple(dim.size for dim in self.dims)
self.strides = tuple(dim.stride for dim in self.dims)
self.itemsize = itemsize
self.size = functools.reduce(operator.mul, self.shape, 1)
self.extent = self._compute_extent()
self.flags = self._compute_layout()
def _compute_layout(self):
# The logic here is based on that in _UpdateContiguousFlags from
# numpy/core/src/multiarray/flagsobject.c in NumPy v1.19.1 (commit
# 13661ac70).
# https://github.com/numpy/numpy/blob/maintenance/1.19.x/numpy/core/src/multiarray/flagsobject.c#L123-L191
# Records have no dims, and we can treat them as contiguous
if not self.dims:
return {'C_CONTIGUOUS': True, 'F_CONTIGUOUS': True}
# If this is a broadcast array then it is not contiguous
if any([dim.stride == 0 for dim in self.dims]):
return {'C_CONTIGUOUS': False, 'F_CONTIGUOUS': False}
flags = {'C_CONTIGUOUS': True, 'F_CONTIGUOUS': True}
# Check C contiguity
sd = self.itemsize
for dim in reversed(self.dims):
if dim.size == 0:
# Contiguous by definition
return {'C_CONTIGUOUS': True, 'F_CONTIGUOUS': True}
if dim.size != 1:
if dim.stride != sd:
flags['C_CONTIGUOUS'] = False
sd *= dim.size
# Check F contiguity
sd = self.itemsize
for dim in self.dims:
if dim.size != 1:
if dim.stride != sd:
flags['F_CONTIGUOUS'] = False
return flags
sd *= dim.size
return flags
def _compute_extent(self):
firstidx = [0] * self.ndim
lastidx = [s - 1 for s in self.shape]
start = compute_index(firstidx, self.dims)
stop = compute_index(lastidx, self.dims) + self.itemsize
stop = max(stop, start) # ensure positive extent
return Extent(start, stop)
def __repr__(self):
return '<Array dims=%s itemsize=%s>' % (self.dims, self.itemsize)
def __getitem__(self, item):
if not isinstance(item, tuple):
item = [item]
else:
item = list(item)
nitem = len(item)
ndim = len(self.dims)
if nitem > ndim:
raise IndexError("%d extra indices given" % (nitem - ndim,))
# Add empty slices for missing indices
while len(item) < ndim:
item.append(slice(None, None))
dims = [dim.__getitem__(it) for dim, it in zip(self.dims, item)]
newshape = [d.size for d in dims if not d.single]
arr = Array(dims, self.itemsize)
if newshape:
return arr.reshape(*newshape)[0]
else:
return Element(arr.extent)
@property
def is_c_contig(self):
return self.flags['C_CONTIGUOUS']
@property
def is_f_contig(self):
return self.flags['F_CONTIGUOUS']
def iter_contiguous_extent(self):
""" Generates extents
"""
if self.is_c_contig or self.is_f_contig:
yield self.extent
else:
if self.dims[0].stride < self.dims[-1].stride:
innerdim = self.dims[0]
outerdims = self.dims[1:]
outershape = self.shape[1:]
else:
innerdim = self.dims[-1]
outerdims = self.dims[:-1]
outershape = self.shape[:-1]
if innerdim.is_contiguous(self.itemsize):
oslen = [range(s) for s in outershape]
for indices in itertools.product(*oslen):
base = compute_index(indices, outerdims)
yield base + innerdim.start, base + innerdim.stop
else:
oslen = [range(s) for s in self.shape]
for indices in itertools.product(*oslen):
offset = compute_index(indices, self.dims)
yield offset, offset + self.itemsize
def reshape(self, *newdims, **kws):
oldnd = self.ndim
newnd = len(newdims)
if newdims == self.shape:
return self, None
order = kws.pop('order', 'C')
if kws:
raise TypeError('unknown keyword arguments %s' % kws.keys())
if order not in 'CFA':
raise ValueError('order not C|F|A')
# check for exactly one instance of -1 in newdims
# https://github.com/numpy/numpy/blob/623bc1fae1d47df24e7f1e29321d0c0ba2771ce0/numpy/core/src/multiarray/shape.c#L470-L515 # noqa: E501
unknownidx = -1
knownsize = 1
for i, dim in enumerate(newdims):
if dim < 0:
if unknownidx == -1:
unknownidx = i
else:
raise ValueError("can only specify one unknown dimension")
else:
knownsize *= dim
# compute the missing dimension
if unknownidx >= 0:
if knownsize == 0 or self.size % knownsize != 0:
raise ValueError("cannot infer valid shape for unknown dimension")
else:
newdims = newdims[0:unknownidx] \
+ (self.size // knownsize,) \
+ newdims[unknownidx + 1:]
newsize = functools.reduce(operator.mul, newdims, 1)
if order == 'A':
order = 'F' if self.is_f_contig else 'C'
if newsize != self.size:
raise ValueError("reshape changes the size of the array")
if self.is_c_contig or self.is_f_contig:
if order == 'C':
newstrides = list(iter_strides_c_contig(self, newdims))
elif order == 'F':
newstrides = list(iter_strides_f_contig(self, newdims))
else:
raise AssertionError("unreachable")
else:
newstrides = np.empty(newnd, np.ctypeslib.c_intp)
# need to keep these around in variables, not temporaries, so they
# don't get GC'ed before we call into the C code
olddims = np.array(self.shape, dtype=np.ctypeslib.c_intp)
oldstrides = np.array(self.strides, dtype=np.ctypeslib.c_intp)
newdims = np.array(newdims, dtype=np.ctypeslib.c_intp)
if not attempt_nocopy_reshape(
oldnd,
olddims,
oldstrides,
newnd,
newdims,
newstrides,
self.itemsize,
order == 'F',
):
raise NotImplementedError('reshape would require copy')
ret = self.from_desc(self.extent.begin, shape=newdims,
strides=newstrides, itemsize=self.itemsize)
return ret, list(self.iter_contiguous_extent())
def squeeze(self, axis=None):
newshape, newstrides = [], []
if axis is None:
for length, stride in zip(self.shape, self.strides):
if length != 1:
newshape.append(length)
newstrides.append(stride)
else:
if not isinstance(axis, tuple):
axis = (axis,)
for ax in axis:
if self.shape[ax] != 1:
raise ValueError(
"cannot select an axis to squeeze out which has size not equal "
"to one"
)
for i, (length, stride) in enumerate(zip(self.shape, self.strides)):
if i not in axis:
newshape.append(length)
newstrides.append(stride)
newarr = self.from_desc(
self.extent.begin,
shape=newshape,
strides=newstrides,
itemsize=self.itemsize,
)
return newarr, list(self.iter_contiguous_extent())
def ravel(self, order='C'):
if order not in 'CFA':
raise ValueError('order not C|F|A')
if (order in 'CA' and self.is_c_contig
or order in 'FA' and self.is_f_contig):
newshape = (self.size,)
newstrides = (self.itemsize,)
arr = self.from_desc(self.extent.begin, newshape, newstrides,
self.itemsize)
return arr, list(self.iter_contiguous_extent())
else:
raise NotImplementedError("ravel on non-contiguous array")
def iter_strides_f_contig(arr, shape=None):
"""yields the f-contiguous strides
"""
shape = arr.shape if shape is None else shape
itemsize = arr.itemsize
yield itemsize
sum = 1
for s in shape[:-1]:
sum *= s
yield sum * itemsize
def iter_strides_c_contig(arr, shape=None):
"""yields the c-contiguous strides
"""
shape = arr.shape if shape is None else shape
itemsize = arr.itemsize
def gen():
yield itemsize
sum = 1
for s in reversed(shape[1:]):
sum *= s
yield sum * itemsize
for i in reversed(list(gen())):
yield i
def is_element_indexing(item, ndim):
if isinstance(item, slice):
return False
elif isinstance(item, tuple):
if len(item) == ndim:
if not any(isinstance(it, slice) for it in item):
return True
else:
return True
return False
def _compute_size(start, stop, step):
"""Algorithm adapted from cpython rangeobject.c
"""
if step > 0:
lo = start
hi = stop
else:
lo = stop
hi = start
step = -step
if lo >= hi:
return 0
return (hi - lo - 1) // step + 1