Skip to content

Commit

Permalink
Support NOT in StarTree Index (#12988)
Browse files Browse the repository at this point in the history
  • Loading branch information
Jackie-Jiang committed Apr 26, 2024
1 parent cb68783 commit 5fc89ce
Show file tree
Hide file tree
Showing 17 changed files with 357 additions and 315 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@

import com.google.common.base.Preconditions;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import org.apache.pinot.core.common.BlockDocIdSet;
Expand Down Expand Up @@ -90,10 +89,7 @@ protected BlockDocIdSet getNextBlockWithoutNullHandling() {
return new SortedDocIdSet(Collections.singletonList(docIdRange));
}
} else {
// Sort the dictIds in ascending order so that their respective docIdRanges are adjacent if they are adjacent
Arrays.sort(dictIds);

// Merge adjacent docIdRanges
// Merge adjacent docIdRanges (dictIds are already sorted)
List<IntPair> docIdRanges = new ArrayList<>();
IntPair lastDocIdRange = _sortedIndexReader.getDocIds(dictIds[0]);
for (int i = 1; i < numDictIds; i++) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,34 @@
*/
package org.apache.pinot.core.operator.filter.predicate;

import it.unimi.dsi.fastutil.ints.IntArrayList;
import it.unimi.dsi.fastutil.ints.IntList;
import java.math.BigDecimal;
import org.apache.pinot.common.request.context.predicate.Predicate;
import org.apache.pinot.segment.spi.index.reader.Dictionary;
import org.apache.pinot.spi.data.FieldSpec.DataType;


public abstract class BaseDictionaryBasedPredicateEvaluator extends BasePredicateEvaluator {
protected final Dictionary _dictionary;
protected boolean _alwaysTrue;
protected boolean _alwaysFalse;
protected int[] _matchingDictIds;
protected int[] _nonMatchingDictIds;

protected BaseDictionaryBasedPredicateEvaluator(Predicate predicate) {
protected BaseDictionaryBasedPredicateEvaluator(Predicate predicate, Dictionary dictionary) {
super(predicate);
_dictionary = dictionary;
}

@Override
public final boolean isDictionaryBased() {
return true;
}

@Override
public DataType getDataType() {
return DataType.INT;
}

@Override
Expand All @@ -42,13 +59,33 @@ public boolean isAlwaysFalse() {
}

@Override
public final boolean isDictionaryBased() {
return true;
public int[] getMatchingDictIds() {
if (_matchingDictIds == null) {
_matchingDictIds = calculateMatchingDictIds();
}
return _matchingDictIds;
}

@Override
public DataType getDataType() {
return DataType.INT;
protected int[] calculateMatchingDictIds() {
IntList matchingDictIds = new IntArrayList();
int dictionarySize = _dictionary.length();
for (int dictId = 0; dictId < dictionarySize; dictId++) {
if (applySV(dictId)) {
matchingDictIds.add(dictId);
}
}
return matchingDictIds.toIntArray();
}

public int[] getNonMatchingDictIds() {
if (_nonMatchingDictIds == null) {
_nonMatchingDictIds = calculateNonMatchingDictIds();
}
return _nonMatchingDictIds;
}

protected int[] calculateNonMatchingDictIds() {
return PredicateUtils.flipDictIds(getMatchingDictIds(), _dictionary.length());
}

@Override
Expand Down Expand Up @@ -106,12 +143,6 @@ public final boolean applyMV(byte[][] values, int length) {
throw new UnsupportedOperationException();
}

// NOTE: override it for exclusive predicate
@Override
public int[] getNonMatchingDictIds() {
throw new UnsupportedOperationException();
}

/**
* Apply a single-value entry to the predicate.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,4 @@ public Predicate.Type getPredicateType() {
public final boolean isExclusive() {
return getPredicateType().isExclusive();
}

@Override
public int getNumMatchingDictIds() {
return getMatchingDictIds().length;
}

@Override
public int getNumNonMatchingDictIds() {
return getNonMatchingDictIds().length;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,7 @@ public static BaseDictionaryBasedPredicateEvaluator newDictionaryBasedEvaluator(
* @param dataType Data type for the column
* @return Raw value based EQ predicate evaluator
*/
public static EqRawPredicateEvaluator newRawValueBasedEvaluator(EqPredicate eqPredicate,
DataType dataType) {
public static EqRawPredicateEvaluator newRawValueBasedEvaluator(EqPredicate eqPredicate, DataType dataType) {
String value = eqPredicate.getValue();
switch (dataType) {
case INT:
Expand Down Expand Up @@ -92,10 +91,9 @@ public static EqRawPredicateEvaluator newRawValueBasedEvaluator(EqPredicate eqPr
private static final class DictionaryBasedEqPredicateEvaluator extends BaseDictionaryBasedPredicateEvaluator
implements IntValue {
final int _matchingDictId;
final int[] _matchingDictIds;

DictionaryBasedEqPredicateEvaluator(EqPredicate eqPredicate, Dictionary dictionary, DataType dataType) {
super(eqPredicate);
super(eqPredicate, dictionary);
String predicateValue = PredicateUtils.getStoredValue(eqPredicate.getValue(), dataType);
_matchingDictId = dictionary.indexOf(predicateValue);
if (_matchingDictId >= 0) {
Expand All @@ -109,6 +107,11 @@ private static final class DictionaryBasedEqPredicateEvaluator extends BaseDicti
}
}

@Override
protected int[] calculateNonMatchingDictIds() {
return PredicateUtils.getDictIds(_dictionary.length(), _matchingDictId);
}

@Override
public int getNumMatchingItems() {
return 1;
Expand All @@ -132,11 +135,6 @@ public int applySV(int limit, int[] docIds, int[] values) {
return matches;
}

@Override
public int[] getMatchingDictIds() {
return _matchingDictIds;
}

@Override
public int getInt() {
return _matchingDictId;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,30 +50,29 @@ public static BaseDictionaryBasedPredicateEvaluator newFSTBasedEvaluator(RegexpL
* Matches regexp query using FSTIndexReader.
*/
private static class FSTBasedRegexpPredicateEvaluator extends BaseDictionaryBasedPredicateEvaluator {
final Dictionary _dictionary;
final ImmutableRoaringBitmap _dictIds;
final ImmutableRoaringBitmap _matchingDictIdBitmap;

public FSTBasedRegexpPredicateEvaluator(RegexpLikePredicate regexpLikePredicate, TextIndexReader fstIndexReader,
Dictionary dictionary) {
super(regexpLikePredicate);
_dictionary = dictionary;
super(regexpLikePredicate, dictionary);
String searchQuery = RegexpPatternConverterUtils.regexpLikeToLuceneRegExp(regexpLikePredicate.getValue());
_dictIds = fstIndexReader.getDictIds(searchQuery);
}

@Override
public boolean isAlwaysFalse() {
return _dictIds.isEmpty();
_matchingDictIdBitmap = fstIndexReader.getDictIds(searchQuery);
int numMatchingDictIds = _matchingDictIdBitmap.getCardinality();
if (numMatchingDictIds == 0) {
_alwaysFalse = true;
} else if (dictionary.length() == numMatchingDictIds) {
_alwaysTrue = true;
}
}

@Override
public boolean isAlwaysTrue() {
return _dictIds.getCardinality() == _dictionary.length();
protected int[] calculateMatchingDictIds() {
return _matchingDictIdBitmap.toArray();
}

@Override
public boolean applySV(int dictId) {
return _dictIds.contains(dictId);
return _matchingDictIdBitmap.contains(dictId);
}

@Override
Expand All @@ -88,10 +87,5 @@ public int applySV(int limit, int[] docIds, int[] values) {
}
return matches;
}

@Override
public int[] getMatchingDictIds() {
return _dictIds.toArray();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,7 @@ public static BaseDictionaryBasedPredicateEvaluator newDictionaryBasedEvaluator(
* @param dataType Data type for the column
* @return Raw value based IN predicate evaluator
*/
public static InRawPredicateEvaluator newRawValueBasedEvaluator(InPredicate inPredicate,
DataType dataType) {
public static InRawPredicateEvaluator newRawValueBasedEvaluator(InPredicate inPredicate, DataType dataType) {
switch (dataType) {
case INT: {
int[] intValues = inPredicate.getIntValues();
Expand Down Expand Up @@ -157,42 +156,34 @@ public static InRawPredicateEvaluator newRawValueBasedEvaluator(InPredicate inPr

private static final class DictionaryBasedInPredicateEvaluator extends BaseDictionaryBasedPredicateEvaluator {
final IntSet _matchingDictIdSet;
final int _numMatchingDictIds;
int[] _matchingDictIds;

DictionaryBasedInPredicateEvaluator(InPredicate inPredicate, Dictionary dictionary, DataType dataType,
@Nullable QueryContext queryContext) {
super(inPredicate);
super(inPredicate, dictionary);
_matchingDictIdSet = PredicateUtils.getDictIdSet(inPredicate, dictionary, dataType, queryContext);
_numMatchingDictIds = _matchingDictIdSet.size();
if (_numMatchingDictIds == 0) {
int numMatchingDictIds = _matchingDictIdSet.size();
if (numMatchingDictIds == 0) {
_alwaysFalse = true;
} else if (dictionary.length() == _numMatchingDictIds) {
} else if (dictionary.length() == numMatchingDictIds) {
_alwaysTrue = true;
}
}

@Override
public boolean applySV(int dictId) {
return _matchingDictIdSet.contains(dictId);
}

@Override
public int getNumMatchingDictIds() {
return _numMatchingDictIds;
protected int[] calculateMatchingDictIds() {
int[] matchingDictIds = _matchingDictIdSet.toIntArray();
Arrays.sort(matchingDictIds);
return matchingDictIds;
}

@Override
public int getNumMatchingItems() {
return getNumMatchingDictIds();
return _matchingDictIdSet.size();
}

@Override
public int[] getMatchingDictIds() {
if (_matchingDictIds == null) {
_matchingDictIds = _matchingDictIdSet.toIntArray();
}
return _matchingDictIds;
public boolean applySV(int dictId) {
return _matchingDictIdSet.contains(dictId);
}

@Override
Expand Down Expand Up @@ -477,9 +468,7 @@ public boolean applySV(byte[] value) {

@Override
public <R> R accept(MultiValueVisitor<R> visitor) {
byte[][] bytes = _matchingValues.stream()
.map(ByteArray::getBytes)
.toArray(byte[][]::new);
byte[][] bytes = _matchingValues.stream().map(ByteArray::getBytes).toArray(byte[][]::new);
return visitor.visitBytes(bytes);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,7 @@ public static BaseDictionaryBasedPredicateEvaluator newDictionaryBasedEvaluator(
* @param dataType Data type for the column
* @return Raw value based NOT_EQ predicate evaluator
*/
public static NeqRawPredicateEvaluator newRawValueBasedEvaluator(NotEqPredicate notEqPredicate,
DataType dataType) {
public static NeqRawPredicateEvaluator newRawValueBasedEvaluator(NotEqPredicate notEqPredicate, DataType dataType) {
String value = notEqPredicate.getValue();
switch (dataType) {
case INT:
Expand Down Expand Up @@ -87,12 +86,9 @@ public static NeqRawPredicateEvaluator newRawValueBasedEvaluator(NotEqPredicate

private static final class DictionaryBasedNeqPredicateEvaluator extends BaseDictionaryBasedPredicateEvaluator {
final int _nonMatchingDictId;
final int[] _nonMatchingDictIds;
final Dictionary _dictionary;
int[] _matchingDictIds;

DictionaryBasedNeqPredicateEvaluator(NotEqPredicate notEqPredicate, Dictionary dictionary, DataType dataType) {
super(notEqPredicate);
super(notEqPredicate, dictionary);
String predicateValue = PredicateUtils.getStoredValue(notEqPredicate.getValue(), dataType);
_nonMatchingDictId = dictionary.indexOf(predicateValue);
if (_nonMatchingDictId >= 0) {
Expand All @@ -104,7 +100,11 @@ private static final class DictionaryBasedNeqPredicateEvaluator extends BaseDict
_nonMatchingDictIds = new int[0];
_alwaysTrue = true;
}
_dictionary = dictionary;
}

@Override
protected int[] calculateMatchingDictIds() {
return PredicateUtils.getDictIds(_dictionary.length(), _nonMatchingDictId);
}

@Override
Expand All @@ -129,33 +129,6 @@ public int applySV(int limit, int[] docIds, int[] values) {
}
return matches;
}

@Override
public int[] getMatchingDictIds() {
if (_matchingDictIds == null) {
int dictionarySize = _dictionary.length();
if (_nonMatchingDictId >= 0) {
_matchingDictIds = new int[dictionarySize - 1];
int index = 0;
for (int dictId = 0; dictId < dictionarySize; dictId++) {
if (dictId != _nonMatchingDictId) {
_matchingDictIds[index++] = dictId;
}
}
} else {
_matchingDictIds = new int[dictionarySize];
for (int dictId = 0; dictId < dictionarySize; dictId++) {
_matchingDictIds[dictId] = dictId;
}
}
}
return _matchingDictIds;
}

@Override
public int[] getNonMatchingDictIds() {
return _nonMatchingDictIds;
}
}

public static abstract class NeqRawPredicateEvaluator extends BaseRawValueBasedPredicateEvaluator {
Expand Down

0 comments on commit 5fc89ce

Please sign in to comment.