/
BCBetweenArray.h
354 lines (300 loc) · 13.4 KB
/
BCBetweenArray.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
/*
**
* BEGIN_COPYRIGHT
*
* Copyright (C) 2008-2015 SciDB, Inc.
* All Rights Reserved.
*
* SciDB is free software: you can redistribute it and/or modify
* it under the terms of the AFFERO GNU General Public License as published by
* the Free Software Foundation.
*
* SciDB is distributed "AS-IS" AND WITHOUT ANY WARRANTY OF ANY KIND,
* INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY,
* NON-INFRINGEMENT, OR FITNESS FOR A PARTICULAR PURPOSE. See
* the AFFERO GNU General Public License for the complete license terms.
*
* You should have received a copy of the AFFERO GNU General Public License
* along with SciDB. If not, see <http://www.gnu.org/licenses/agpl-3.0.html>
*
* END_COPYRIGHT
*/
/**
* @file BetweenArray.h
*
* @brief The implementation of the array iterator for the between operator
*
* The array iterator for the between maps incoming getChunks calls into the
* appropriate getChunks calls for its input array. Then, if the requested chunk
* fits in the between range, the entire chunk is returned as-is. Otherwise,
* the appropriate piece of the chunk is carved out.
*
* NOTE: In the current implementation if the between window stretches beyond the
* limits of the input array, the behavior of the operator is undefined.
*
* The top-level array object simply serves as a factory for the iterators.
*/
#ifndef BC_BETWEEN_ARRAY_H_
#define BC_BETWEEN_ARRAY_H_
#include <string>
#include <array/DelegateArray.h>
#include <array/Metadata.h>
#include <array/SpatialRangesChunkPosIterator.h>
#include <query/Operator.h>
#include <vector>
namespace scidb
{
using namespace std;
class BCBetweenArray;
class BCBetweenArrayIterator;
class BCBetweenChunkIterator;
typedef std::shared_ptr<SpatialRanges> SpatialRangesPtr;
typedef std::shared_ptr<SpatialRangesChunkPosIterator> SpatialRangesChunkPosIteratorPtr;
std::string coordinateToString(Coordinates const& coor);
class BCBetweenChunk : public DelegateChunk
{
friend class BCBetweenChunkIterator;
public:
// Cannot move this function to BCBetweenArray::createChunkIterator()
// It needs _fullyInside member variable but, BCBetweenArray cannot know.
virtual std::shared_ptr<ConstChunkIterator> getConstIterator(int iterationMode) const;
virtual void setInputChunk(ConstChunk const& inputChunk);
BCBetweenChunk(BCBetweenArray const& array, DelegateArrayIterator const& iterator, AttributeID attrID);
private:
BCBetweenArray const& _array;
SpatialRange _myRange; // the firstPosition and lastPosition of this _chunk.
bool _fullyInside;
bool _fullyOutside;
std::shared_ptr<ConstArrayIterator> _emptyBitmapIterator;
};
class BCBetweenChunkIterator : public DelegateChunkIterator, CoordinatesMapper
{
protected:
Value& evaluate();
bool filter();
void moveNext();
void advancedMoveNext();
void nextVisible();
public:
int getMode() const {
return _mode;
}
virtual Value const& getItem();
virtual bool isEmpty() const;
virtual bool end();
virtual void operator ++();
virtual Coordinates const& getPosition();
virtual bool setPosition(Coordinates const& pos);
virtual void restart();
virtual ConstChunk const& getChunk();
std::shared_ptr<Query> getQuery() { return _query; }
BCBetweenChunkIterator(BCBetweenArrayIterator const& arrayIterator,
BCBetweenChunk const& chunk, int iterationMode);
protected:
protected:
BCBetweenArray const& _array;
BCBetweenChunk const& _chunk;
Coordinates _curPos;
int _mode;
bool _hasCurrent;
bool _ignoreEmptyCells;
MemChunk _shapeChunk;
std::shared_ptr<ConstChunkIterator> _emptyBitmapIterator;
TypeId _type;
/**
* Several member functions of class SpatialRanges takes a hint, on where the last successful search.
*/
mutable size_t _hintForSpatialRanges;
// For filter boundary
ExpressionContext _params;
std::vector<std::shared_ptr<ConstChunkIterator>> _iterators;
Value _tileValue;
private:
std::shared_ptr<Query> _query;
};
class ExistedBitmapBCBetweenChunkIterator : public BCBetweenChunkIterator
{
public:
virtual Value const& getItem();
ExistedBitmapBCBetweenChunkIterator(BCBetweenArrayIterator const& arrayIterator, BCBetweenChunk const& chunk, int iterationMode);
private:
Value _value;
};
class NewBitmapBCBetweenChunkIterator : public BCBetweenChunkIterator
{
public:
virtual Value const& getItem();
NewBitmapBCBetweenChunkIterator(BCBetweenArrayIterator const& arrayIterator, BCBetweenChunk const& chunk, int iterationMode);
protected:
Value _value;
};
class EmptyBitmapBCBetweenChunkIterator : public NewBitmapBCBetweenChunkIterator
{
public:
virtual Value const& getItem();
virtual bool isEmpty() const;
EmptyBitmapBCBetweenChunkIterator(BCBetweenArrayIterator const& arrayIterator, BCBetweenChunk const& chunk, int iterationMode);
};
/**
* ====== NOTE FROM Donghui Z. ON UNIFYING THE TWO ITERATORS ===========
*
* Prior to the 14.8 release, there were two iterators for BetweenArray.
* They differ in their way to find the next chunk that has data and intersects the between ranges.
* - A "random" iterator computes the next chunkPos purely from the between ranges, and asks inputArray whether the chunk exists.
* - A "sequential" iterator asks inputArray for the next chunk, and checks to see if its range intersects the between ranges.
* There was a threshold parameter BetweenArray::BETWEEN_SEQUENTIAL_INTERATOR_THRESHOLD = 6000.
*
* Donghui Z. believes the separation is artificial and non-optimal. It is possible that when running a query,
* sometimes the "random" iterator can find the next chunk faster and sometimes the "sequential" iterator can find faster.
* So Donghui decided to creatively integrate the two iterator into one, as follows:
* - A "combined" iterator alternates in asking inputArray for the next chunk and computing the next chunkPos from the
* between ranges, and use whichever gets there first.
*
* Also, this class uses a SpatialRangesChunkIterator to iterate over the chunkPos in the logical space.
* Per THE REQUEST TO JUSTIFY LOGICAL-SPACE ITERATION (see RegionCoordinatesIterator.h),
* here is why this is ok.
* The above described "combined" iterator will not forever iterate over the logical space (until a valid chunkPos is found).
* Each iteration step is accompanied with a probing, of whether the next existing chunk intersects the query range.
*
* ====== BELOW ARE Alex P.'s ORIGINAL NOTE DESCRIBING THE TWO-ITERATOR APPROACH ============
*
* Between Array has two ArrayIterator types:
* 1. BetweenArrayIterator advances chunks (operator++) by finding the next chunk inside the between box
* and probing input to see if that chunk exists. Assume the between box describes b logical chunks,
* and the underlying input array has n chunks - the iteration using this iterator will run in O( b * lg(n))
*
* 2. BetweenArraySequentialIterator advances chunks by asking input for its next chunk, and, if that chunk does
* not overlap with the between box, continuing to ask for the next input chunk until we either find a chunk
* that fits or we run out of chunks. If the input has n chunks present, this iteration will run in O(n).
*
* Sometimes b is small (selecting just a few cells) and sometimes b is large (selecting a 10-20 chunks
* from a very sparse array). The number n is a count of actual (not logical) chunks and we don't know how big
* that is, but assuming about 1TB storage per SciDB instance and 10MB per chunk, we can expect upper bound on
* n to be about 100,000. I've never seen real arrays from customers above 5,000 chunks.
*
* 100,000 / lg(100,000) ~= 6,000. So if b is below that number, use BetweenArrayIterator. Otherwise, use
* BetweenArraySequentialIterator. [poliocough, 4/14/12]
*/
class BCBetweenArrayIterator : public DelegateArrayIterator
{
friend class BCBetweenChunkIterator;
public:
/***
* Constructor for the between iterator
* Here we initialize the current position vector to all zeros, and obtain an iterator for the appropriate
* attribute in the input array.
*/
BCBetweenArrayIterator(BCBetweenArray const& between, AttributeID attrID, AttributeID inputAttrID);
/***
* The end call checks whether we're operating with the last chunk of the between
* window.
*/
virtual bool end();
/***
* The ++ operator advances the current position to the next chunk of the between
* window.
*/
virtual void operator ++();
/***
* Simply returns the current position
* Initial position is a vector of zeros of appropriate dimensionality
*/
virtual Coordinates const& getPosition();
/***
* Here we only need to check that we're not moving beyond the bounds of the between window
*/
virtual bool setPosition(Coordinates const& pos);
/***
* Reset simply changes the current position to all zeros
*/
virtual void restart();
virtual ConstChunk const& getChunk();
protected:
bool setAllIteratorsPosition(Coordinates const& pos);
void moveNext();
protected:
BCBetweenArray const& _array;
SpatialRangesChunkPosIteratorPtr _spatialRangesChunkPosIteratorPtr;
Coordinates _curPos;
bool _hasCurrent;
/**
* @see BetweenChunkIterator::_hintForSpatialRanges
*/
size_t _hintForSpatialRanges;
/**
* Increment inputIterator at least once,
* then advance the two iterators to the next chunk that (a) exists in the database; and (b) intersects a query range.
* - Upon success: hasCurrent = true; pos = both iterators' position; chunkInitialized = false;
* - Upon failure: hasCurrent = false.
*
* @preconditions:
* - inputIterator is pointing to a chunk that exists in the database.
* (It may or may NOT intersect any query range.)
* - spatialRangesChunkPosIteratorPtr is pointing to a chunk intersecting some query range.
* (It may or may NOT exist in the database.)
*
* @note: by "exists in the database", we mean in the local SciDB instance.
* @note: in restart(), do NOT call this function if the initial position is already valid.
*/
void advanceToNextChunkInRange();
private:
std::vector< std::shared_ptr<ConstArrayIterator> > _iterators;
std::shared_ptr<ConstArrayIterator> _emptyBitmapIterator;
AttributeID _inputAttrID;
};
class BCBetweenArrayEmptyBitmapIterator : public BCBetweenArrayIterator
{
BCBetweenArray& _array;
public:
virtual ConstChunk const& getChunk();
BCBetweenArrayEmptyBitmapIterator(BCBetweenArray const& array, AttributeID attrID, AttributeID inputAttrID);
};
class BCBetweenArray : public DelegateArray
{
friend class BCBetweenChunk;
friend class BCBetweenChunkIterator;
friend class BCBetweenArrayIterator;
friend class ExistedBitmapBCBetweenChunkIterator;
friend class NewBitmapBCBetweenChunkIterator;
public:
BCBetweenArray(ArrayDesc const& desc,
SpatialRangesPtr const& spatialRangesPtr,
SpatialRangesPtr const& innerSpatialRangesPtr,
std::shared_ptr<Array> const& input,
std::shared_ptr<Expression> expr,
std::shared_ptr<Query>& query,
bool tileMode);
virtual DelegateChunk* createChunk(DelegateArrayIterator const* iterator, AttributeID attrID) const;
virtual DelegateArrayIterator* createArrayIterator(AttributeID attrID) const;
std::shared_ptr<DelegateChunk> getEmptyBitmapChunk(BCBetweenArrayEmptyBitmapIterator* iterator);
private:
/**
* The original spatial ranges.
*/
SpatialRangesPtr _spatialRangesPtr;
/**
* Inner spatial ranges.
* No filter ranges.
*/
SpatialRangesPtr _innerSpatialRnagesPtr;
/**
* The modified spatial ranges where every SpatialRange._low is reduced by (interval-1).
* The goal is to quickly tell, from a chunk's chunkPos, whether the chunk overlaps a spatial range.
* In particular, a chunk overlaps, if and only if the extended spatial range contains the chunkPos.
* E.g. Let there be chunk with chunkPos=0 and interval 10. A range [8, 19] intersects the chunk's space,
* equivalently, the modified range [-1, 19] contains 0.
*/
SpatialRangesPtr _extendedSpatialRangesPtr;
/**
* For filter boundary
*/
std::map<Coordinates, std::shared_ptr<DelegateChunk>, CoordinatesLess > cache;
Mutex mutex;
std::shared_ptr<Expression> expression;
std::vector<BindInfo> bindings;
bool _tileMode;
size_t cacheSize;
AttributeID emptyAttrID;
};
} //namespace
#endif /* BC_BETWEEN_ARRAY_H_ */