Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Backport SOLR-14765 to branch_8_11 #2682

Open
wants to merge 5 commits into
base: branch_8_11
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 5 additions & 0 deletions solr/CHANGES.txt
Expand Up @@ -42,12 +42,17 @@ Bug Fixes
* SOLR-17098: ZK Credentials and ACLs are no longer sent to all ZK Servers when using Streaming Expressions.
They will only be used when sent to the default ZK Host. (Houston Putman, Jan Høydahl, David Smiley, Gus Heck, Qing Xu)

* SOLR-16585: Fixed NPE when paginating MatchAllDocs with non-zero start offset, like q=*:*&start=10 (Michael Gibney)

Optimizations
---------------------
* SOLR-16555: SolrIndexSearcher - FilterCache intersections/andNot should not clone bitsets repeatedly (Kevin Risden, David Smiley)

* SOLR-17004: ZkStateReader waitForState should check clusterState before using watchers (Kevin Risden)

* SOLR-14765: Optimize DocList creation for sort-irrelevant cases. This issue also reworks the building and caching of liveDocs
in SolrIndexSearcher, and refines/clarifies the effect of `useFilterForSortedQuery` (Michael Gibney, Mike Drob, David Smiley)

Other Changes
---------------------
* SOLR-16141: Upgrade Apache Tika to 1.28.4 (Kevin Risden)
Expand Down
84 changes: 60 additions & 24 deletions solr/core/src/java/org/apache/solr/search/DocSetUtil.java
Expand Up @@ -31,12 +31,12 @@
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.solr.common.SolrException;

/** @lucene.experimental */
public class DocSetUtil {
Expand Down Expand Up @@ -76,16 +76,10 @@ public static boolean equals(DocSet a, DocSet b) {
* @lucene.experimental
*/
public static DocSet getDocSet(DocSetCollector collector, SolrIndexSearcher searcher) {
if (collector.size() == searcher.numDocs()) {
if (!searcher.isLiveDocsInstantiated()) {
searcher.setLiveDocs( collector.getDocSet() );
}
try {
return searcher.getLiveDocSet();
} catch (IOException e) {
// should be impossible... liveDocs should exist, so no IO should be necessary
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
}
final int size = collector.size();
if (size == searcher.numDocs()) {
// see comment under similar block in `getDocSet(DocSet, SolrIndexSearcher)`
return searcher.offerLiveDocs(collector::getDocSet, size);
}

return collector.getDocSet();
Expand All @@ -97,18 +91,15 @@ public static DocSet getDocSet(DocSetCollector collector, SolrIndexSearcher sear
* @lucene.experimental
*/
public static DocSet getDocSet(DocSet docs, SolrIndexSearcher searcher) {
if (docs.size() == searcher.numDocs()) {
if (!searcher.isLiveDocsInstantiated()) {
searcher.setLiveDocs( docs );
}
try {
// if this docset has the same cardinality as liveDocs, return liveDocs instead
// so this set will be short lived garbage.
return searcher.getLiveDocSet();
} catch (IOException e) {
// should be impossible... liveDocs should exist, so no IO should be necessary
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
}
final int size = docs.size();
if (size == searcher.numDocs()) {
// if this docset has the same cardinality as liveDocs, return liveDocs instead
// so this set will be short lived garbage.
// This could be a very broad query, or some unusual way of running MatchAllDocsQuery?
// In any event, we already _have_ a viable `liveDocs` DocSet, so offer it to the
// SolrIndexSearcher, and use whatever canonical `liveDocs` instance the searcher returns
// (which may or may not be derived from the set that we offered)
return searcher.offerLiveDocs(() -> docs, size);
}

return docs;
Expand All @@ -129,6 +120,10 @@ public static DocSet createDocSet(SolrIndexSearcher searcher, Query query, DocSe
DocSet set = ((DocSetProducer) query).createDocSet(searcher);
// assert equals(set, createDocSetGeneric(searcher, query));
return set;
} else if (query instanceof MatchAllDocsQuery) {
DocSet set = searcher.getLiveDocSet();
// assert equals(set, createDocSetGeneric(searcher, query));
return set;
}

return createDocSetGeneric(searcher, query);
Expand Down Expand Up @@ -279,4 +274,45 @@ public static void collectSortedDocSet(DocSet docs, IndexReader reader, Collecto
}
}

}
/**
* Utility method to copy a specified range of {@link Bits} to a specified offset in a destination
* {@link FixedBitSet}. This can be useful, e.g., for translating per-segment bits ranges to
* composite DocSet bits ranges.
*
* @param src source Bits
* @param srcOffset start offset (inclusive) in source Bits
* @param srcLimit end offset (exclusive) in source Bits
* @param dest destination FixedBitSet
* @param destOffset start offset of range in destination
*/
static void copyTo(
Bits src, final int srcOffset, int srcLimit, FixedBitSet dest, int destOffset) {
/*
NOTE: `adjustedSegDocBase` +1 to compensate for the fact that `segOrd` always has to "read
ahead" by 1. Adding 1 to set `adjustedSegDocBase` once allows us to use `segOrd` as-is (with
no "pushback") for both `startIndex` and `endIndex` args to `dest.set(startIndex, endIndex)`
*/
final int adjustedSegDocBase = destOffset - srcOffset + 1;
int segOrd = srcLimit;
do {
/*
NOTE: we check deleted range before live range in the outer loop in order to not have
to explicitly guard against `dest.set(maxDoc, maxDoc)` in the event that the global max doc
is a delete (this case would trigger a bounds-checking AssertionError in
`FixedBitSet.set(int, int)`).
*/
do {
// consume deleted range
if (--segOrd < srcOffset) {
// we're currently in a "deleted" run, so just return; no need to do anything further
return;
}
} while (!src.get(segOrd));
final int limit = segOrd; // set highest ord (inclusive) of live range
while (segOrd-- > srcOffset && src.get(segOrd)) {
// consume live range
}
dest.set(adjustedSegDocBase + segOrd, adjustedSegDocBase + limit);
} while (segOrd > srcOffset);
}
}
38 changes: 38 additions & 0 deletions solr/core/src/java/org/apache/solr/search/QueryUtils.java
Expand Up @@ -24,6 +24,7 @@
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.solr.common.SolrException;

Expand All @@ -44,6 +45,36 @@ public static boolean isNegative(Query q) {
return true;
}

/**
* Recursively unwraps the specified query to determine whether it is capable of producing a score
* that varies across different documents. Returns true if this query is not capable of producing
* a varying score (i.e., it is a constant score query).
*/
public static boolean isConstantScoreQuery(Query q) {
while (true) {
if (q instanceof BoostQuery) {
q = ((BoostQuery) q).getQuery();
} else if (q instanceof WrappedQuery) {
q = ((WrappedQuery) q).getWrappedQuery();
} else if (q instanceof ConstantScoreQuery) {
return true;
} else if (q instanceof MatchAllDocsQuery) {
return true;
} else if (q instanceof MatchNoDocsQuery) {
return true;
} else if (q instanceof BooleanQuery) {
// NOTE: this check can be very simple because:
// 1. there's no need to check `q == clause.getQuery()` because BooleanQuery is final, with
// a builder that prevents direct loops.
// 2. we don't bother recursing to second-guess a nominally "scoring" clause that actually
// wraps a constant-score query.
return ((BooleanQuery) q).clauses().stream().noneMatch(BooleanClause::isScoring);
} else {
return false;
}
}
}

/** Returns the original query if it was already a positive query, otherwise
* return the negative of the query (i.e., a positive query).
* <p>
Expand Down Expand Up @@ -155,6 +186,13 @@ public static Query combineQueryAndFilter(Query scoreQuery, Query filterQuery) {
if (filterQuery == null) {
return new MatchAllDocsQuery(); // default if nothing -- match everything
} else {
/*
NOTE: we _must_ wrap filter in a ConstantScoreQuery (default score `1f`) in order to
guarantee score parity with the actual user-specified scoreQuery (i.e., MatchAllDocsQuery).
This should only matter if score is _explicitly_ requested to be returned, but we don't know
that here, and it's probably not worth jumping through the necessary hoops simply to avoid
wrapping in the case where `true==isConstantScoreQuery(filterQuery)`
*/
return new ConstantScoreQuery(filterQuery);
}
} else {
Expand Down