lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From broust...@apache.org
Subject [lucene-solr] branch master updated: LUCENE-9106: UniformSplit postings format allows extension of block/line serializers.
Date Tue, 31 Dec 2019 09:17:43 GMT
This is an automated email from the ASF dual-hosted git repository.

broustant pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/master by this push:
     new 1851779  LUCENE-9106: UniformSplit postings format allows extension of block/line
serializers.
1851779 is described below

commit 1851779ddbfd8ed3148b5d20114bcf2b3651459d
Author: Bruno Roustant <broustant@salesforce.com>
AuthorDate: Tue Dec 31 10:13:11 2019 +0100

    LUCENE-9106: UniformSplit postings format allows extension of block/line serializers.
    
    Closes #1106
---
 lucene/CHANGES.txt                                 |   2 +
 .../lucene/codecs/uniformsplit/BlockHeader.java    |  70 +++++-----
 .../lucene/codecs/uniformsplit/BlockLine.java      |  10 +-
 .../lucene/codecs/uniformsplit/BlockReader.java    |  20 ++-
 .../lucene/codecs/uniformsplit/BlockWriter.java    |  24 +++-
 .../lucene/codecs/uniformsplit/FieldMetadata.java  | 151 +++++++++++----------
 .../uniformsplit/UniformSplitTermsReader.java      |  16 ++-
 .../uniformsplit/UniformSplitTermsWriter.java      |   8 +-
 .../uniformsplit/sharedterms/STBlockLine.java      |   8 +-
 .../uniformsplit/sharedterms/STBlockReader.java    |   7 +-
 .../uniformsplit/sharedterms/STBlockWriter.java    |   9 +-
 .../sharedterms/STIntersectBlockReader.java        |   7 +-
 .../sharedterms/STMergingBlockReader.java          |   2 +-
 .../sharedterms/STUniformSplitTermsReader.java     |  12 +-
 .../sharedterms/STUniformSplitTermsWriter.java     |   9 +-
 15 files changed, 213 insertions(+), 142 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index db80c93..3fa7dc4 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -104,6 +104,8 @@ Improvements
 
 * LUCENE-9105: UniformSplit postings format detects corrupted index and better handles IO
exceptions. (Bruno Roustant)
 
+* LUCENE-9106: UniformSplit postings format allows extension of block/line serializers. (Bruno
Roustant)
+
 Optimizations
 ---------------------
 (No changes)
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/BlockHeader.java
b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/BlockHeader.java
index d512fdc..cfdf2e9 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/BlockHeader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/BlockHeader.java
@@ -134,43 +134,49 @@ public class BlockHeader implements Accountable {
     return basePayloadsFP;
   }
 
-  public void write(DataOutput output) throws IOException {
-    assert linesCount > 0 : "block header does not seem to be initialized";
-    output.writeVInt(linesCount);
-
-    output.writeVLong(baseDocsFP);
-    output.writeVLong(basePositionsFP);
-    output.writeVLong(basePayloadsFP);
-
-    output.writeVInt(termStatesBaseOffset);
-    output.writeVInt(middleLineOffset);
+  @Override
+  public long ramBytesUsed() {
+    return RAM_USAGE;
   }
 
-  public static BlockHeader read(DataInput input, BlockHeader reuse) throws IOException {
-    int linesCount = input.readVInt();
-    if (linesCount <= 0 || linesCount > UniformSplitTermsWriter.MAX_NUM_BLOCK_LINES)
{
-      throw new CorruptIndexException("Illegal number of lines in a block: " + linesCount,
input);
-    }
+  /**
+   * Reads/writes block header.
+   */
+  public static class Serializer {
 
-    long baseDocsFP = input.readVLong();
-    long basePositionsFP = input.readVLong();
-    long basePayloadsFP = input.readVLong();
+    public void write(DataOutput output, BlockHeader blockHeader) throws IOException {
+      assert blockHeader.linesCount > 0 : "Block header is not initialized";
+      output.writeVInt(blockHeader.linesCount);
 
-    int termStatesBaseOffset = input.readVInt();
-    if (termStatesBaseOffset < 0) {
-      throw new CorruptIndexException("Illegal termStatesBaseOffset= " + termStatesBaseOffset,
input);
-    }
-    int middleTermOffset = input.readVInt();
-    if (middleTermOffset < 0) {
-      throw new CorruptIndexException("Illegal middleTermOffset= " + middleTermOffset, input);
-    }
+      output.writeVLong(blockHeader.baseDocsFP);
+      output.writeVLong(blockHeader.basePositionsFP);
+      output.writeVLong(blockHeader.basePayloadsFP);
 
-    BlockHeader blockHeader = reuse == null ? new BlockHeader() : reuse;
-    return blockHeader.reset(linesCount, baseDocsFP, basePositionsFP, basePayloadsFP, termStatesBaseOffset,
middleTermOffset);
-  }
+      output.writeVInt(blockHeader.termStatesBaseOffset);
+      output.writeVInt(blockHeader.middleLineOffset);
+    }
 
-  @Override
-  public long ramBytesUsed() {
-    return RAM_USAGE;
+    public BlockHeader read(DataInput input, BlockHeader reuse) throws IOException {
+      int linesCount = input.readVInt();
+      if (linesCount <= 0 || linesCount > UniformSplitTermsWriter.MAX_NUM_BLOCK_LINES)
{
+        throw new CorruptIndexException("Illegal number of lines in block: " + linesCount,
input);
+      }
+
+      long baseDocsFP = input.readVLong();
+      long basePositionsFP = input.readVLong();
+      long basePayloadsFP = input.readVLong();
+
+      int termStatesBaseOffset = input.readVInt();
+      if (termStatesBaseOffset < 0) {
+        throw new CorruptIndexException("Illegal termStatesBaseOffset= " + termStatesBaseOffset,
input);
+      }
+      int middleTermOffset = input.readVInt();
+      if (middleTermOffset < 0) {
+        throw new CorruptIndexException("Illegal middleTermOffset= " + middleTermOffset,
input);
+      }
+
+      BlockHeader blockHeader = reuse == null ? new BlockHeader() : reuse;
+      return blockHeader.reset(linesCount, baseDocsFP, basePositionsFP, basePayloadsFP, termStatesBaseOffset,
middleTermOffset);
+    }
   }
 }
\ No newline at end of file
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/BlockLine.java b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/BlockLine.java
index cea8e93..e39cfc6 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/BlockLine.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/BlockLine.java
@@ -107,7 +107,7 @@ public class BlockLine implements Accountable {
   }
 
   /**
-   * Reads block lines with terms encoded incrementally inside a block.
+   * Reads/writes block lines with terms encoded incrementally inside a block.
    * This class keeps a state of the previous term read to decode the next term.
    */
   public static class Serializer implements Accountable {
@@ -149,7 +149,7 @@ public class BlockLine implements Accountable {
      *                                  the incremental encoding. {@code true} for the first
      *                                  and middle term, {@code false} for other terms.
      */
-    public static void writeLine(DataOutput blockOutput, BlockLine line, BlockLine previousLine,
+    public void writeLine(DataOutput blockOutput, BlockLine line, BlockLine previousLine,
                                  int termStateRelativeOffset, boolean isIncrementalEncodingSeed)
throws IOException {
       blockOutput.writeVInt(termStateRelativeOffset);
       writeIncrementallyEncodedTerm(line.getTermBytes(), previousLine == null ? null : previousLine.getTermBytes(),
@@ -161,13 +161,13 @@ public class BlockLine implements Accountable {
      *
      * @param termStatesOutput The output pointing to the details region.
      */
-    protected static void writeLineTermState(DataOutput termStatesOutput, BlockLine line,
+    protected void writeLineTermState(DataOutput termStatesOutput, BlockLine line,
                                    FieldInfo fieldInfo, DeltaBaseTermStateSerializer encoder)
throws IOException {
       assert line.termState != null;
       encoder.writeTermState(termStatesOutput, fieldInfo, line.termState);
     }
 
-    protected static void writeIncrementallyEncodedTerm(TermBytes termBytes, TermBytes previousTermBytes,
+    protected void writeIncrementallyEncodedTerm(TermBytes termBytes, TermBytes previousTermBytes,
                                                       boolean isIncrementalEncodingSeed,
DataOutput blockOutput) throws IOException {
       BytesRef term = termBytes.getTerm();
       assert term.offset == 0;
@@ -240,7 +240,7 @@ public class BlockLine implements Accountable {
      * Reads {@code length} bytes from the given {@link DataInput} and stores
      * them at {@code offset} in {@code bytes.bytes}.
      */
-    protected static void readBytes(DataInput input, BytesRef bytes, int offset, int length)
throws IOException {
+    protected void readBytes(DataInput input, BytesRef bytes, int offset, int length) throws
IOException {
       assert bytes.offset == 0;
       bytes.length = offset + length;
       bytes.bytes = ArrayUtil.grow(bytes.bytes, bytes.length);
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/BlockReader.java
b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/BlockReader.java
index 5c9c840b..8d4bfc0 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/BlockReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/BlockReader.java
@@ -60,6 +60,7 @@ public class BlockReader extends BaseTermsEnum implements Accountable {
   protected final FieldMetadata fieldMetadata;
   protected final BlockDecoder blockDecoder;
 
+  protected BlockHeader.Serializer blockHeaderReader;
   protected BlockLine.Serializer blockLineReader;
   /**
    * In-memory read buffer for the current block.
@@ -406,14 +407,27 @@ public class BlockReader extends BaseTermsEnum implements Accountable
{
   protected void initializeBlockReadLazily() throws IOException {
     if (blockStartFP == -1) {
       blockInput = blockInput.clone();
-      blockLineReader = new BlockLine.Serializer();
+      blockHeaderReader = createBlockHeaderSerializer();
+      blockLineReader = createBlockLineSerializer();
       blockReadBuffer = new ByteArrayDataInput();
       termStatesReadBuffer = new ByteArrayDataInput();
-      termStateSerializer = new DeltaBaseTermStateSerializer();
+      termStateSerializer = createDeltaBaseTermStateSerializer();
       scratchBlockBytes = new BytesRef();
     }
   }
 
+  protected BlockHeader.Serializer createBlockHeaderSerializer() {
+    return new BlockHeader.Serializer();
+  }
+
+  protected BlockLine.Serializer createBlockLineSerializer() {
+    return new BlockLine.Serializer();
+  }
+
+  protected DeltaBaseTermStateSerializer createDeltaBaseTermStateSerializer() {
+    return new DeltaBaseTermStateSerializer();
+  }
+
   /**
    * Reads the block header.
    * Sets {@link #blockHeader}.
@@ -428,7 +442,7 @@ public class BlockReader extends BaseTermsEnum implements Accountable
{
     BytesRef blockBytesRef = decodeBlockBytesIfNeeded(numBlockBytes);
     blockReadBuffer.reset(blockBytesRef.bytes, blockBytesRef.offset, blockBytesRef.length);
     termStatesReadBuffer.reset(blockBytesRef.bytes, blockBytesRef.offset, blockBytesRef.length);
-    return blockHeader = BlockHeader.read(blockReadBuffer, blockHeader);
+    return blockHeader = blockHeaderReader.read(blockReadBuffer, blockHeader);
   }
 
   protected BytesRef decodeBlockBytesIfNeeded(int numBlockBytes) throws IOException {
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/BlockWriter.java
b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/BlockWriter.java
index acc397d..a1c3d70 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/BlockWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/BlockWriter.java
@@ -60,6 +60,8 @@ public class BlockWriter {
   protected final ByteBuffersDataOutput blockLinesWriteBuffer;
   protected final ByteBuffersDataOutput termStatesWriteBuffer;
 
+  protected final BlockHeader.Serializer blockHeaderWriter;
+  protected final BlockLine.Serializer blockLineWriter;
   protected final DeltaBaseTermStateSerializer termStateSerializer;
   protected final BlockEncoder blockEncoder;
   protected final ByteBuffersDataOutput blockWriteBuffer;
@@ -81,7 +83,9 @@ public class BlockWriter {
     this.blockEncoder = blockEncoder;
 
     this.blockLines = new ArrayList<>(targetNumBlockLines);
-    this.termStateSerializer = new DeltaBaseTermStateSerializer();
+    this.blockHeaderWriter = createBlockHeaderSerializer();
+    this.blockLineWriter = createBlockLineSerializer();
+    this.termStateSerializer = createDeltaBaseTermStateSerializer();
 
     this.blockLinesWriteBuffer = ByteBuffersDataOutput.newResettableInstance();
     this.termStatesWriteBuffer = ByteBuffersDataOutput.newResettableInstance();
@@ -91,6 +95,18 @@ public class BlockWriter {
     this.scratchBytesRef = new BytesRef();
   }
 
+  protected BlockHeader.Serializer createBlockHeaderSerializer() {
+    return new BlockHeader.Serializer();
+  }
+
+  protected BlockLine.Serializer createBlockLineSerializer() {
+    return new BlockLine.Serializer();
+  }
+
+  protected DeltaBaseTermStateSerializer createDeltaBaseTermStateSerializer() {
+    return new DeltaBaseTermStateSerializer();
+  }
+
   /**
    * Adds a new {@link BlockLine} term for the current field.
    * <p>
@@ -196,7 +212,7 @@ public class BlockWriter {
 
     reusableBlockHeader.reset(blockLines.size(), termStateSerializer.getBaseDocStartFP(),
termStateSerializer.getBasePosStartFP(),
         termStateSerializer.getBasePayStartFP(), Math.toIntExact(blockLinesWriteBuffer.size()),
middleOffset);
-    reusableBlockHeader.write(blockWriteBuffer);
+    blockHeaderWriter.write(blockWriteBuffer, reusableBlockHeader);
 
     blockLinesWriteBuffer.copyTo(blockWriteBuffer);
     termStatesWriteBuffer.copyTo(blockWriteBuffer);
@@ -236,8 +252,8 @@ public class BlockWriter {
 
   protected void writeBlockLine(boolean isIncrementalEncodingSeed, BlockLine line, BlockLine
previousLine) throws IOException {
     assert fieldMetadata != null;
-    BlockLine.Serializer.writeLine(blockLinesWriteBuffer, line, previousLine, Math.toIntExact(termStatesWriteBuffer.size()),
isIncrementalEncodingSeed);
-    BlockLine.Serializer.writeLineTermState(termStatesWriteBuffer, line, fieldMetadata.getFieldInfo(),
termStateSerializer);
+    blockLineWriter.writeLine(blockLinesWriteBuffer, line, previousLine, Math.toIntExact(termStatesWriteBuffer.size()),
isIncrementalEncodingSeed);
+    blockLineWriter.writeLineTermState(termStatesWriteBuffer, line, fieldMetadata.getFieldInfo(),
termStateSerializer);
   }
 
   /**
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/FieldMetadata.java
b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/FieldMetadata.java
index f4cede0..8e2fc84 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/FieldMetadata.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/FieldMetadata.java
@@ -194,88 +194,99 @@ public class FieldMetadata implements Accountable {
         + (docsSeen == null ? 0 : docsSeen.ramBytesUsed());
   }
 
-  public static FieldMetadata read(DataInput input, FieldInfos fieldInfos, int maxNumDocs)
throws IOException {
-    int fieldId = input.readVInt();
-    FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldId);
-    if (fieldInfo == null) {
-      throw new CorruptIndexException("Illegal field id= " + fieldId, input);
-    }
-    FieldMetadata fieldMetadata = new FieldMetadata(fieldInfo, 0, false);
+  /**
+   * Reads/writes field metadata.
+   */
+  public static class Serializer {
 
-    fieldMetadata.numTerms = input.readVInt();
-    if (fieldMetadata.numTerms <= 0) {
-      throw new CorruptIndexException("Illegal number of terms= " + fieldMetadata.numTerms
+ " for field= " + fieldId, input);
-    }
+    /**
+     * Stateless singleton.
+     */
+    public static final Serializer INSTANCE = new Serializer();
 
-    fieldMetadata.sumDocFreq = input.readVInt();
-    fieldMetadata.sumTotalTermFreq = fieldMetadata.sumDocFreq;
-    if (fieldMetadata.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS)
>= 0) {
-      fieldMetadata.sumTotalTermFreq += input.readVInt();
-      if (fieldMetadata.sumTotalTermFreq < fieldMetadata.sumDocFreq) {
-        // #positions must be >= #postings.
-        throw new CorruptIndexException("Illegal sumTotalTermFreq= " + fieldMetadata.sumTotalTermFreq
-            + " sumDocFreq= " + fieldMetadata.sumDocFreq + " for field= " + fieldId, input);
-      }
-    }
+    public void write(DataOutput output, FieldMetadata fieldMetadata) throws IOException
{
+      assert fieldMetadata.dictionaryStartFP >= 0;
+      assert fieldMetadata.firstBlockStartFP >= 0;
+      assert fieldMetadata.lastBlockStartFP >= 0;
+      assert fieldMetadata.numTerms > 0 : "There should be at least one term for field
" + fieldMetadata.fieldInfo.name + ": " + fieldMetadata.numTerms;
+      assert fieldMetadata.firstBlockStartFP <= fieldMetadata.lastBlockStartFP : "start:
" + fieldMetadata.firstBlockStartFP + " end: " + fieldMetadata.lastBlockStartFP;
+      assert fieldMetadata.lastTerm != null : "you must set the last term";
 
-    fieldMetadata.docCount = input.readVInt();
-    if (fieldMetadata.docCount < 0 || fieldMetadata.docCount > maxNumDocs) {
-      // #docs with field must be <= #docs.
-      throw new CorruptIndexException("Illegal number of docs= " + fieldMetadata.docCount
-          + " maxNumDocs= " + maxNumDocs + " for field=" + fieldId, input);
-    }
-    if (fieldMetadata.sumDocFreq < fieldMetadata.docCount) {
-      // #postings must be >= #docs with field.
-      throw new CorruptIndexException("Illegal sumDocFreq= " + fieldMetadata.sumDocFreq
-          + " docCount= " + fieldMetadata.docCount + " for field= " + fieldId, input);
-    }
+      output.writeVInt(fieldMetadata.fieldInfo.number);
 
-    fieldMetadata.dictionaryStartFP = input.readVLong();
-    fieldMetadata.firstBlockStartFP = input.readVLong();
-    fieldMetadata.lastBlockStartFP = input.readVLong();
-
-    int lastTermLength = input.readVInt();
-    BytesRef lastTerm = new BytesRef(lastTermLength);
-    if (lastTermLength > 0) {
-      input.readBytes(lastTerm.bytes, 0, lastTermLength);
-      lastTerm.length = lastTermLength;
-    } else if (lastTermLength < 0) {
-      throw new CorruptIndexException("Illegal last term length= " + lastTermLength + " for
field= " + fieldId, input);
-    }
-    fieldMetadata.setLastTerm(lastTerm);
-
-    return fieldMetadata;
-  }
+      output.writeVInt(fieldMetadata.numTerms);
+      output.writeVInt(fieldMetadata.sumDocFreq);
 
-  public void write(DataOutput output) throws IOException {
-    assert dictionaryStartFP >= 0;
-    assert firstBlockStartFP >= 0;
-    assert lastBlockStartFP >= 0;
-    assert numTerms > 0 : "There should be at least one term for field " + fieldInfo.name
+ ": " + numTerms;
-    assert firstBlockStartFP <= lastBlockStartFP : "start: " + firstBlockStartFP + " end:
" + lastBlockStartFP;
-    assert lastTerm != null : "you must set the last term";
+      if (fieldMetadata.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS)
>= 0) {
+        assert fieldMetadata.sumTotalTermFreq >= fieldMetadata.sumDocFreq : "sumTotalFQ:
" + fieldMetadata.sumTotalTermFreq + " sumDocFQ: " + fieldMetadata.sumDocFreq;
+        output.writeVInt(fieldMetadata.sumTotalTermFreq - fieldMetadata.sumDocFreq);
+      }
 
-    output.writeVInt(fieldInfo.number);
+      output.writeVInt(fieldMetadata.getDocCount());
 
-    output.writeVInt(numTerms);
-    output.writeVInt(sumDocFreq);
+      output.writeVLong(fieldMetadata.dictionaryStartFP);
+      output.writeVLong(fieldMetadata.firstBlockStartFP);
+      output.writeVLong(fieldMetadata.lastBlockStartFP);
 
-    if (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0) {
-      assert sumTotalTermFreq >= sumDocFreq : "sumTotalFQ: " + sumTotalTermFreq + " sumDocFQ:
" + sumDocFreq;
-      output.writeVInt(sumTotalTermFreq - sumDocFreq);
+      if (fieldMetadata.lastTerm.length > 0) {
+        output.writeVInt(fieldMetadata.lastTerm.length);
+        output.writeBytes(fieldMetadata.lastTerm.bytes, fieldMetadata.lastTerm.offset, fieldMetadata.lastTerm.length);
+      } else {
+        output.writeVInt(0);
+      }
     }
 
-    output.writeVInt(getDocCount());
+    public FieldMetadata read(DataInput input, FieldInfos fieldInfos, int maxNumDocs) throws
IOException {
+      int fieldId = input.readVInt();
+      FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldId);
+      if (fieldInfo == null) {
+        throw new CorruptIndexException("Illegal field id= " + fieldId, input);
+      }
+      FieldMetadata fieldMetadata = new FieldMetadata(fieldInfo, 0, false);
+
+      fieldMetadata.numTerms = input.readVInt();
+      if (fieldMetadata.numTerms <= 0) {
+        throw new CorruptIndexException("Illegal number of terms= " + fieldMetadata.numTerms
+ " for field= " + fieldId, input);
+      }
 
-    output.writeVLong(dictionaryStartFP);
-    output.writeVLong(firstBlockStartFP);
-    output.writeVLong(lastBlockStartFP);
+      fieldMetadata.sumDocFreq = input.readVInt();
+      fieldMetadata.sumTotalTermFreq = fieldMetadata.sumDocFreq;
+      if (fieldMetadata.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS)
>= 0) {
+        fieldMetadata.sumTotalTermFreq += input.readVInt();
+        if (fieldMetadata.sumTotalTermFreq < fieldMetadata.sumDocFreq) {
+          // #positions must be >= #postings.
+          throw new CorruptIndexException("Illegal sumTotalTermFreq= " + fieldMetadata.sumTotalTermFreq
+              + " sumDocFreq= " + fieldMetadata.sumDocFreq + " for field= " + fieldId, input);
+        }
+      }
+
+      fieldMetadata.docCount = input.readVInt();
+      if (fieldMetadata.docCount < 0 || fieldMetadata.docCount > maxNumDocs) {
+        // #docs with field must be <= #docs.
+        throw new CorruptIndexException("Illegal number of docs= " + fieldMetadata.docCount
+            + " maxNumDocs= " + maxNumDocs + " for field=" + fieldId, input);
+      }
+      if (fieldMetadata.sumDocFreq < fieldMetadata.docCount) {
+        // #postings must be >= #docs with field.
+        throw new CorruptIndexException("Illegal sumDocFreq= " + fieldMetadata.sumDocFreq
+            + " docCount= " + fieldMetadata.docCount + " for field= " + fieldId, input);
+      }
+
+      fieldMetadata.dictionaryStartFP = input.readVLong();
+      fieldMetadata.firstBlockStartFP = input.readVLong();
+      fieldMetadata.lastBlockStartFP = input.readVLong();
+
+      int lastTermLength = input.readVInt();
+      BytesRef lastTerm = new BytesRef(lastTermLength);
+      if (lastTermLength > 0) {
+        input.readBytes(lastTerm.bytes, 0, lastTermLength);
+        lastTerm.length = lastTermLength;
+      } else if (lastTermLength < 0) {
+        throw new CorruptIndexException("Illegal last term length= " + lastTermLength + "
for field= " + fieldId, input);
+      }
+      fieldMetadata.setLastTerm(lastTerm);
 
-    if (lastTerm.length > 0) {
-      output.writeVInt(lastTerm.length);
-      output.writeBytes(lastTerm.bytes, lastTerm.offset, lastTerm.length);
-    } else {
-      output.writeVInt(0);
+      return fieldMetadata;
     }
   }
 }
\ No newline at end of file
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/UniformSplitTermsReader.java
b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/UniformSplitTermsReader.java
index f116eee..bda0406 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/UniformSplitTermsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/UniformSplitTermsReader.java
@@ -69,7 +69,7 @@ public class UniformSplitTermsReader extends FieldsProducer {
    *                     It can be used for decompression or decryption.
    */
   public UniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state,
BlockDecoder blockDecoder) throws IOException {
-    this(postingsReader, state, blockDecoder, NAME, VERSION_START, VERSION_CURRENT,
+    this(postingsReader, state, blockDecoder, FieldMetadata.Serializer.INSTANCE, NAME, VERSION_START,
VERSION_CURRENT,
         TERMS_BLOCKS_EXTENSION, TERMS_DICTIONARY_EXTENSION);
    }
    
@@ -77,8 +77,10 @@ public class UniformSplitTermsReader extends FieldsProducer {
    * @param blockDecoder Optional block decoder, may be null if none.
    *                     It can be used for decompression or decryption.
    */
-  protected UniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state,
BlockDecoder blockDecoder,
-                                     String codecName, int versionStart, int versionCurrent,
String termsBlocksExtension, String dictionaryExtension) throws IOException {
+  protected UniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state,
+                                    BlockDecoder blockDecoder, FieldMetadata.Serializer fieldMetadataReader,
+                                     String codecName, int versionStart, int versionCurrent,
+                                    String termsBlocksExtension, String dictionaryExtension)
throws IOException {
      IndexInput dictionaryInput = null;
      IndexInput blockInput = null;
      boolean success = false;
@@ -100,7 +102,7 @@ public class UniformSplitTermsReader extends FieldsProducer {
        CodecUtil.retrieveChecksum(blockInput);
 
        seekFieldsMetadata(blockInput);
-       Collection<FieldMetadata> fieldMetadataCollection = parseFieldsMetadata(blockInput,
state.fieldInfos, state.segmentInfo.maxDoc());
+       Collection<FieldMetadata> fieldMetadataCollection = parseFieldsMetadata(blockInput,
state.fieldInfos, fieldMetadataReader, state.segmentInfo.maxDoc());
 
        fieldToTermsMap = new HashMap<>();
        this.blockInput = blockInput;
@@ -133,19 +135,19 @@ public class UniformSplitTermsReader extends FieldsProducer {
    * @param indexInput {@link IndexInput} must be positioned to the fields metadata
    *                   details by calling {@link #seekFieldsMetadata(IndexInput)} before
this call.
    */
-  protected static Collection<FieldMetadata> parseFieldsMetadata(IndexInput indexInput,
FieldInfos fieldInfos, int maxNumDocs) throws IOException {
+  protected static Collection<FieldMetadata> parseFieldsMetadata(IndexInput indexInput,
FieldInfos fieldInfos,
+                                                                 FieldMetadata.Serializer
fieldMetadataReader, int maxNumDocs) throws IOException {
     int numFields = indexInput.readVInt();
     if (numFields < 0) {
       throw new CorruptIndexException("Illegal number of fields= " + numFields, indexInput);
     }
     Collection<FieldMetadata> fieldMetadataCollection = new ArrayList<>(numFields);
     for (int i = 0; i < numFields; i++) {
-      fieldMetadataCollection.add(FieldMetadata.read(indexInput, fieldInfos, maxNumDocs));
+      fieldMetadataCollection.add(fieldMetadataReader.read(indexInput, fieldInfos, maxNumDocs));
     }
     return fieldMetadataCollection;
   }
 
-
   @Override
   public void close() throws IOException {
     try {
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/UniformSplitTermsWriter.java
b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/UniformSplitTermsWriter.java
index 5adf74d..101b6b5 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/UniformSplitTermsWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/UniformSplitTermsWriter.java
@@ -128,6 +128,7 @@ public class UniformSplitTermsWriter extends FieldsConsumer {
   protected final int deltaNumLines;
 
   protected final BlockEncoder blockEncoder;
+  protected final FieldMetadata.Serializer fieldMetadataWriter;
   protected final IndexOutput blockOutput;
   protected final IndexOutput dictionaryOutput;
 
@@ -146,7 +147,7 @@ public class UniformSplitTermsWriter extends FieldsConsumer {
    */
   public UniformSplitTermsWriter(PostingsWriterBase postingsWriter, SegmentWriteState state,
                           int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder)
throws IOException {
-    this(postingsWriter, state, targetNumBlockLines, deltaNumLines, blockEncoder,
+    this(postingsWriter, state, targetNumBlockLines, deltaNumLines, blockEncoder, FieldMetadata.Serializer.INSTANCE,
         NAME, VERSION_CURRENT, TERMS_BLOCKS_EXTENSION, TERMS_DICTIONARY_EXTENSION);
   }
 
@@ -164,7 +165,7 @@ public class UniformSplitTermsWriter extends FieldsConsumer {
    *                            It can be used for compression or encryption.
    */
   protected UniformSplitTermsWriter(PostingsWriterBase postingsWriter, SegmentWriteState
state,
-                          int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder,
+                          int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder,
FieldMetadata.Serializer fieldMetadataWriter,
                           String codecName, int versionCurrent, String termsBlocksExtension,
String dictionaryExtension) throws IOException {
     validateSettings(targetNumBlockLines, deltaNumLines);
     IndexOutput blockOutput = null;
@@ -177,6 +178,7 @@ public class UniformSplitTermsWriter extends FieldsConsumer {
       this.targetNumBlockLines = targetNumBlockLines;
       this.deltaNumLines = deltaNumLines;
       this.blockEncoder = blockEncoder;
+      this.fieldMetadataWriter = fieldMetadataWriter;
 
       String termsName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix,
termsBlocksExtension);
       blockOutput = state.directory.createOutput(termsName, state.context);
@@ -278,7 +280,7 @@ public class UniformSplitTermsWriter extends FieldsConsumer {
 
     if (fieldMetadata.getNumTerms() > 0) {
       fieldMetadata.setLastTerm(lastTerm);
-      fieldMetadata.write(fieldsOutput);
+      fieldMetadataWriter.write(fieldsOutput, fieldMetadata);
       writeDictionary(dictionaryBuilder);
       return 1;
     }
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STBlockLine.java
b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STBlockLine.java
index 46f6ab7..7dfe312 100755
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STBlockLine.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STBlockLine.java
@@ -75,7 +75,7 @@ public class STBlockLine extends BlockLine {
     /**
      * Writes all the {@link BlockTermState} of the provided {@link STBlockLine} to the given
output.
      */
-    public static void writeLineTermStates(DataOutput termStatesOutput, STBlockLine line,
+    public void writeLineTermStates(DataOutput termStatesOutput, STBlockLine line,
                                     DeltaBaseTermStateSerializer encoder) throws IOException
{
 
       FieldMetadataTermState fieldMetadataTermState;
@@ -111,7 +111,7 @@ public class STBlockLine extends BlockLine {
      * @return The {@link BlockTermState} corresponding to the provided field id; or null
if the field
      * does not occur in the line.
      */
-    public static BlockTermState readTermStateForField(int fieldId, DataInput termStatesInput,
+    public BlockTermState readTermStateForField(int fieldId, DataInput termStatesInput,
                                                 DeltaBaseTermStateSerializer termStateSerializer,
                                                 BlockHeader blockHeader, FieldInfos fieldInfos,
                                                 BlockTermState reuse) throws IOException
{
@@ -161,7 +161,7 @@ public class STBlockLine extends BlockLine {
      * @param fieldTermStatesMap Map filled with the term states for each field. It is cleared
first.
      * @see #readTermStateForField
      */
-    public static void readFieldTermStatesMap(DataInput termStatesInput,
+    public void readFieldTermStatesMap(DataInput termStatesInput,
                                        DeltaBaseTermStateSerializer termStateSerializer,
                                        BlockHeader blockHeader,
                                        FieldInfos fieldInfos,
@@ -183,7 +183,7 @@ public class STBlockLine extends BlockLine {
     /**
      * Reads all the field ids in the current block line of the provided input.
      */
-    public static int[] readFieldIds(DataInput termStatesInput, int numFields) throws IOException
{
+    public int[] readFieldIds(DataInput termStatesInput, int numFields) throws IOException
{
       int[] fieldIds = new int[numFields];
       for (int i = 0; i < numFields; i++) {
         fieldIds[i] = termStatesInput.readVInt();
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STBlockReader.java
b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STBlockReader.java
index 3f04d92..6d7c79d 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STBlockReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STBlockReader.java
@@ -117,6 +117,11 @@ public class STBlockReader extends BlockReader {
     return blockStartFP > fieldMetadata.getLastBlockStartFP() || super.isBeyondLastTerm(searchedTerm,
blockStartFP);
   }
 
+  @Override
+  protected STBlockLine.Serializer createBlockLineSerializer() {
+    return new STBlockLine.Serializer();
+  }
+
   /**
    * Reads the {@link BlockTermState} on the current line for this reader's field.
    *
@@ -125,7 +130,7 @@ public class STBlockReader extends BlockReader {
   @Override
   protected BlockTermState readTermState() throws IOException {
     termStatesReadBuffer.setPosition(blockFirstLineStart + blockHeader.getTermStatesBaseOffset()
+ blockLine.getTermStateRelativeOffset());
-    return termState = STBlockLine.Serializer.readTermStateForField(
+    return termState = ((STBlockLine.Serializer) blockLineReader).readTermStateForField(
         fieldMetadata.getFieldInfo().number,
         termStatesReadBuffer,
         termStateSerializer,
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STBlockWriter.java
b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STBlockWriter.java
index 4c37a4e..15e8d54 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STBlockWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STBlockWriter.java
@@ -85,9 +85,14 @@ public class STBlockWriter extends BlockWriter {
   }
 
   @Override
+  protected BlockLine.Serializer createBlockLineSerializer() {
+    return new STBlockLine.Serializer();
+  }
+
+  @Override
   protected void writeBlockLine(boolean isIncrementalEncodingSeed, BlockLine line, BlockLine
previousLine) throws IOException {
-    STBlockLine.Serializer.writeLine(blockLinesWriteBuffer, line, previousLine, Math.toIntExact(termStatesWriteBuffer.size()),
isIncrementalEncodingSeed);
-    STBlockLine.Serializer.writeLineTermStates(termStatesWriteBuffer, (STBlockLine) line,
termStateSerializer);
+    blockLineWriter.writeLine(blockLinesWriteBuffer, line, previousLine, Math.toIntExact(termStatesWriteBuffer.size()),
isIncrementalEncodingSeed);
+    ((STBlockLine.Serializer) blockLineWriter).writeLineTermStates(termStatesWriteBuffer,
(STBlockLine) line, termStateSerializer);
     ((STBlockLine) line).collectFields(fieldsInBlock);
   }
 
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STIntersectBlockReader.java
b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STIntersectBlockReader.java
index 577aae1..099b6c3 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STIntersectBlockReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STIntersectBlockReader.java
@@ -91,6 +91,11 @@ public class STIntersectBlockReader extends IntersectBlockReader {
     return super.nextBlockMatchingPrefix() && blockHeader != null;
   }
 
+  @Override
+  protected STBlockLine.Serializer createBlockLineSerializer() {
+    return new STBlockLine.Serializer();
+  }
+
   /**
    * Reads the {@link BlockTermState} on the current line for the specific field
    * corresponding this this reader.
@@ -100,7 +105,7 @@ public class STIntersectBlockReader extends IntersectBlockReader {
   @Override
   protected BlockTermState readTermState() throws IOException {
     termStatesReadBuffer.setPosition(blockFirstLineStart + blockHeader.getTermStatesBaseOffset()
+ blockLine.getTermStateRelativeOffset());
-    return STBlockLine.Serializer.readTermStateForField(
+    return ((STBlockLine.Serializer) blockLineReader).readTermStateForField(
         fieldMetadata.getFieldInfo().number,
         termStatesReadBuffer,
         termStateSerializer,
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STMergingBlockReader.java
b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STMergingBlockReader.java
index 5f135bb..fe6bdbe 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STMergingBlockReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STMergingBlockReader.java
@@ -98,7 +98,7 @@ public class STMergingBlockReader extends STBlockReader {
   public void readFieldTermStatesMap(Map<String, BlockTermState> fieldTermStatesMap)
throws IOException {
     if (term() != null) {
       termStatesReadBuffer.setPosition(blockFirstLineStart + blockHeader.getTermStatesBaseOffset()
+ blockLine.getTermStateRelativeOffset());
-      STBlockLine.Serializer.readFieldTermStatesMap(
+      ((STBlockLine.Serializer) blockLineReader).readFieldTermStatesMap(
           termStatesReadBuffer,
           termStateSerializer,
           blockHeader,
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STUniformSplitTermsReader.java
b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STUniformSplitTermsReader.java
index 84360b9..50a17bc 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STUniformSplitTermsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STUniformSplitTermsReader.java
@@ -47,13 +47,15 @@ import static org.apache.lucene.codecs.uniformsplit.sharedterms.STUniformSplitPo
 public class STUniformSplitTermsReader extends UniformSplitTermsReader {
 
   public STUniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state,
BlockDecoder blockDecoder) throws IOException {
-    super(postingsReader, state, blockDecoder, NAME, VERSION_START,
-        VERSION_CURRENT, TERMS_BLOCKS_EXTENSION, TERMS_DICTIONARY_EXTENSION);
+    this(postingsReader, state, blockDecoder, FieldMetadata.Serializer.INSTANCE,
+        NAME, VERSION_START, VERSION_CURRENT, TERMS_BLOCKS_EXTENSION, TERMS_DICTIONARY_EXTENSION);
   }
 
-  protected STUniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState
state, BlockDecoder blockDecoder,
-                                      String codecName, int versionStart, int versionCurrent,
String termsBlocksExtension, String dictionaryExtension) throws IOException {
-    super(postingsReader, state, blockDecoder, codecName, versionStart, versionCurrent, termsBlocksExtension,
dictionaryExtension);
+  protected STUniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState
state,
+                                      BlockDecoder blockDecoder, FieldMetadata.Serializer
fieldMetadataReader,
+                                      String codecName, int versionStart, int versionCurrent,
+                                      String termsBlocksExtension, String dictionaryExtension)
throws IOException {
+    super(postingsReader, state, blockDecoder, fieldMetadataReader, codecName, versionStart,
versionCurrent, termsBlocksExtension, dictionaryExtension);
   }
 
   @Override
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STUniformSplitTermsWriter.java
b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STUniformSplitTermsWriter.java
index d121301..ca15d6a 100755
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STUniformSplitTermsWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STUniformSplitTermsWriter.java
@@ -88,13 +88,14 @@ public class STUniformSplitTermsWriter extends UniformSplitTermsWriter
{
 
   public STUniformSplitTermsWriter(PostingsWriterBase postingsWriter, SegmentWriteState state,
                                    int targetNumBlockLines, int deltaNumLines, BlockEncoder
blockEncoder) throws IOException {
-    this(postingsWriter, state, targetNumBlockLines, deltaNumLines, blockEncoder, NAME, VERSION_CURRENT,
TERMS_BLOCKS_EXTENSION, TERMS_DICTIONARY_EXTENSION);
+    this(postingsWriter, state, targetNumBlockLines, deltaNumLines, blockEncoder, FieldMetadata.Serializer.INSTANCE,
+        NAME, VERSION_CURRENT, TERMS_BLOCKS_EXTENSION, TERMS_DICTIONARY_EXTENSION);
   }
 
   protected STUniformSplitTermsWriter(PostingsWriterBase postingsWriter, SegmentWriteState
state,
-                                      int targetNumBlockLines, int deltaNumLines, BlockEncoder
blockEncoder,
+                                      int targetNumBlockLines, int deltaNumLines, BlockEncoder
blockEncoder, FieldMetadata.Serializer fieldMetadataWriter,
                                       String codecName, int versionCurrent, String termsBlocksExtension,
String dictionaryExtension) throws IOException {
-    super(postingsWriter, state, targetNumBlockLines, deltaNumLines, blockEncoder, codecName,
versionCurrent, termsBlocksExtension, dictionaryExtension);
+    super(postingsWriter, state, targetNumBlockLines, deltaNumLines, blockEncoder, fieldMetadataWriter,
codecName, versionCurrent, termsBlocksExtension, dictionaryExtension);
   }
 
   @Override
@@ -200,7 +201,7 @@ public class STUniformSplitTermsWriter extends UniformSplitTermsWriter
{
     int fieldsNumber = 0;
     for (FieldMetadata fieldMetadata : fieldMetadataList) {
       if (fieldMetadata.getNumTerms() > 0) {
-        fieldMetadata.write(fieldsOutput);
+        fieldMetadataWriter.write(fieldsOutput, fieldMetadata);
         fieldsNumber++;
       }
     }


Mime
View raw message