Skip to content

Commit

Permalink
Reduce FST block size for BlockTreeTermsWriter (#12604)
Browse files Browse the repository at this point in the history
  • Loading branch information
risdenk committed Oct 4, 2023
1 parent 43739e2 commit 0e6c29e
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 1 deletion.
5 changes: 5 additions & 0 deletions lucene/CHANGES.txt
Expand Up @@ -26,6 +26,11 @@ Bug Fixes
* GITHUB#12352: [Tessellator] Improve the checks that validate the diagonal between two polygon nodes so
the resulting polygons are valid counter clockwise polygons. (Ignacio Vera)

Optimizations
---------------------
* GITHUB#12604: Estimate the block size of FST BytesStore in BlockTreeTermsWriter
to reduce GC load during indexing. (Guo Feng)

======================= Lucene 8.11.2 =======================

Bug Fixes
Expand Down
Expand Up @@ -55,6 +55,7 @@
import org.apache.lucene.util.fst.BytesRefFSTEnum;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.Util;
import org.apache.lucene.util.packed.PackedInts;

/*
TODO:
Expand Down Expand Up @@ -427,10 +428,22 @@ public void compileIndex(List<PendingBlock> blocks, RAMOutputStream scratchBytes
}
}

long estimateSize = prefix.length;
for (PendingBlock block : blocks) {
if (block.subIndices != null) {
for (FST<BytesRef> subIndex : block.subIndices) {
estimateSize += subIndex.numBytes();
}
}
}
int estimateBitsRequired = PackedInts.bitsRequired(estimateSize);
int pageBits = Math.min(15, Math.max(6, estimateBitsRequired));


final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
final Builder<BytesRef> indexBuilder = new Builder<>(FST.INPUT_TYPE.BYTE1,
0, 0, true, false, Integer.MAX_VALUE,
outputs, true, 15);
outputs, true, pageBits);
//if (DEBUG) {
// System.out.println(" compile index for prefix=" + prefix);
//}
Expand Down
4 changes: 4 additions & 0 deletions lucene/core/src/java/org/apache/lucene/util/fst/FST.java
Expand Up @@ -489,6 +489,10 @@ void finish(long newStartNode) throws IOException {
startNode = newStartNode;
bytes.finish();
}

public long numBytes() {
return bytes.getPosition();
}

public T getEmptyOutput() {
return emptyOutput;
Expand Down

0 comments on commit 0e6c29e

Please sign in to comment.