lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jpou...@apache.org
Subject [lucene-solr] branch branch_8x updated: LUCENE-9056: Fewer conditionals in #advance. (#1021)
Date Thu, 21 Nov 2019 17:52:34 GMT
This is an automated email from the ASF dual-hosted git repository.

jpountz pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/branch_8x by this push:
     new 559e730  LUCENE-9056: Fewer conditionals in #advance. (#1021)
559e730 is described below

commit 559e73080aa2b09b9edee9a4f50220d89b730c2d
Author: Adrien Grand <jpountz@gmail.com>
AuthorDate: Thu Nov 21 18:20:12 2019 +0100

    LUCENE-9056: Fewer conditionals in #advance. (#1021)
---
 .../lucene/codecs/lucene84/ForDeltaUtil.java       |  2 +-
 .../codecs/lucene84/Lucene84PostingsReader.java    | 68 +++++++++++++---------
 2 files changed, 43 insertions(+), 27 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene84/ForDeltaUtil.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene84/ForDeltaUtil.java
index 862d049..904d865 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene84/ForDeltaUtil.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene84/ForDeltaUtil.java
@@ -38,7 +38,7 @@ public class ForDeltaUtil {
   private static void prefixSumOfOnes(long[] arr, long base) {
     System.arraycopy(IDENTITY_PLUS_ONE, 0, arr, 0, ForUtil.BLOCK_SIZE);
     // This loop gets auto-vectorized
-    for (int i = 0; i < arr.length; ++i) {
+    for (int i = 0; i < ForUtil.BLOCK_SIZE; ++i) {
       arr[i] += base;
     }
   }
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene84/Lucene84PostingsReader.java
b/lucene/core/src/java/org/apache/lucene/codecs/lucene84/Lucene84PostingsReader.java
index c49b0ce..fe4514d 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene84/Lucene84PostingsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene84/Lucene84PostingsReader.java
@@ -267,7 +267,7 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
     final ForDeltaUtil forDeltaUtil = new ForDeltaUtil(forUtil);
     final PForUtil pforUtil = new PForUtil(forUtil);
 
-    private final long[] docBuffer = new long[BLOCK_SIZE];
+    private final long[] docBuffer = new long[BLOCK_SIZE+1];
     private final long[] freqBuffer = new long[BLOCK_SIZE];
 
     private int docBufferUpto;
@@ -285,7 +285,7 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
 
     private int docFreq;                              // number of docs in this posting list
     private long totalTermFreq;                       // sum of freqBuffer in this posting
list (or docFreq when omitted)
-    private int docUpto;                              // how many docs we've read
+    private int blockUpto;                            // number of docs in or before the
current block
     private int doc;                                  // doc we last read
     private long accum;                               // accumulator for doc deltas
 
@@ -313,7 +313,9 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
       indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >=
0;
       indexHasPos = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
>= 0;
       indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
>= 0;
-      indexHasPayloads = fieldInfo.hasPayloads(); 
+      indexHasPayloads = fieldInfo.hasPayloads();
+      // We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals
in advance()
+      docBuffer[BLOCK_SIZE] = NO_MORE_DOCS;
     }
 
     public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) {
@@ -346,7 +348,7 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
         }
       }
       accum = 0;
-      docUpto = 0;
+      blockUpto = 0;
       nextSkipDoc = BLOCK_SIZE - 1; // we won't skip if target is found in first block
       docBufferUpto = BLOCK_SIZE;
       skipped = false;
@@ -394,7 +396,7 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
         isFreqsRead = true;
       }
       
-      final int left = docFreq - docUpto;
+      final int left = docFreq - blockUpto;
       assert left >= 0;
 
       if (left >= BLOCK_SIZE) {
@@ -407,18 +409,22 @@ public final class Lucene84PostingsReader extends PostingsReaderBase
{
             pforUtil.skip(docIn); // skip over freqBuffer if we don't need them at all
           }
         }
+        blockUpto += BLOCK_SIZE;
       } else if (docFreq == 1) {
         docBuffer[0] = singletonDocID;
         freqBuffer[0] = totalTermFreq;
         Arrays.fill(docBuffer, 1, BLOCK_SIZE, DocIdSetIterator.NO_MORE_DOCS);
+        blockUpto++;
       } else {
         // Read vInts:
         readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq);
         prefixSum(docBuffer, left, accum);
         Arrays.fill(docBuffer, left, BLOCK_SIZE, DocIdSetIterator.NO_MORE_DOCS);
+        blockUpto += left;
       }
       accum = docBuffer[BLOCK_SIZE - 1];
       docBufferUpto = 0;
+      assert docBuffer[BLOCK_SIZE] == NO_MORE_DOCS;
     }
 
     @Override
@@ -428,7 +434,6 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
       }
 
       doc = (int) docBuffer[docBufferUpto];
-      docUpto++;
       docBufferUpto++;
       return doc;
     }
@@ -460,10 +465,10 @@ public final class Lucene84PostingsReader extends PostingsReaderBase
{
         // is a little different from MultiLevelSkipListReader
         final int newDocUpto = skipper.skipTo(target) + 1; 
 
-        if (newDocUpto > docUpto) {
+        if (newDocUpto >= blockUpto) {
           // Skipper moved
           assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
-          docUpto = newDocUpto;
+          blockUpto = newDocUpto;
 
           // Force to read next block
           docBufferUpto = BLOCK_SIZE;
@@ -487,14 +492,11 @@ public final class Lucene84PostingsReader extends PostingsReaderBase
{
       long doc;
       while (true) {
         doc = docBuffer[docBufferUpto];
-        docUpto++;
 
         if (doc >= target) {
           break;
         }
-        if (++docBufferUpto == BLOCK_SIZE) {
-          return this.doc = NO_MORE_DOCS;
-        }
+        ++docBufferUpto;
       }
 
       docBufferUpto++;
@@ -989,7 +991,7 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
     final ForDeltaUtil forDeltaUtil = new ForDeltaUtil(forUtil);
     final PForUtil pforUtil = new PForUtil(forUtil);
 
-    private final long[] docBuffer = new long[BLOCK_SIZE];
+    private final long[] docBuffer = new long[BLOCK_SIZE+1];
     private final long[] freqBuffer = new long[BLOCK_SIZE];
 
     private int docBufferUpto;
@@ -1001,16 +1003,18 @@ public final class Lucene84PostingsReader extends PostingsReaderBase
{
     final boolean indexHasFreqs;
 
     private int docFreq;                              // number of docs in this posting list
-    private int docUpto;                              // how many docs we've read
+    private int blockUpto;                            // number of documents in or before
the current block
     private int doc;                                  // doc we last read
     private long accum;                               // accumulator for doc deltas
-    private int freq;                                 // freq we last read
-
 
     private int nextSkipDoc = -1;
 
     private long seekTo = -1;
 
+    // as we read freqBuffer lazily, isFreqsRead shows if freqBuffer are read for the current
block
+    // always true when we don't have freqBuffer (indexHasFreq=false) or don't need freqBuffer
(needsFreq=false)
+    private boolean isFreqsRead;
+
     public BlockImpactsDocsEnum(FieldInfo fieldInfo, IntBlockTermState termState) throws
IOException {
       indexHasFreqs = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS)
>= 0;
       final boolean indexHasPositions = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
>= 0;
@@ -1024,7 +1028,7 @@ public final class Lucene84PostingsReader extends PostingsReaderBase
{
 
       doc = -1;
       accum = 0;
-      docUpto = 0;
+      blockUpto = 0;
       docBufferUpto = BLOCK_SIZE;
 
       skipper = new Lucene84ScoreSkipReader(docIn.clone(),
@@ -1034,6 +1038,9 @@ public final class Lucene84PostingsReader extends PostingsReaderBase
{
           indexHasPayloads);
       skipper.init(termState.docStartFP+termState.skipOffset, termState.docStartFP, termState.posStartFP,
termState.payStartFP, docFreq);
 
+      // We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals
in advance()
+      docBuffer[BLOCK_SIZE] = NO_MORE_DOCS;
+      this.isFreqsRead = true;
       if (indexHasFreqs == false) {
         Arrays.fill(freqBuffer, 1L);
       }
@@ -1041,7 +1048,11 @@ public final class Lucene84PostingsReader extends PostingsReaderBase
{
 
     @Override
     public int freq() throws IOException {
-      return freq;
+      if (isFreqsRead == false) {
+        pforUtil.decode(docIn, freqBuffer); // read freqBuffer for this block
+        isFreqsRead = true;
+      }
+      return (int) freqBuffer[docBufferUpto-1];
     }
 
     @Override
@@ -1050,7 +1061,13 @@ public final class Lucene84PostingsReader extends PostingsReaderBase
{
     }
 
     private void refillDocs() throws IOException {
-      final int left = docFreq - docUpto;
+      // Check if we skipped reading the previous block of freqBuffer, and if yes, position
docIn after it
+      if (isFreqsRead == false) {
+        pforUtil.skip(docIn);
+        isFreqsRead = true;
+      }
+
+      final int left = docFreq - blockUpto;
       assert left >= 0;
 
       if (left >= BLOCK_SIZE) {
@@ -1058,13 +1075,16 @@ public final class Lucene84PostingsReader extends PostingsReaderBase
{
         if (indexHasFreqs) {
           pforUtil.decode(docIn, freqBuffer);
         }
+        blockUpto += BLOCK_SIZE;
       } else {
         readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreqs);
         prefixSum(docBuffer, left, accum);
         Arrays.fill(docBuffer, left, BLOCK_SIZE, DocIdSetIterator.NO_MORE_DOCS);
+        blockUpto += left;
       }
       accum = docBuffer[BLOCK_SIZE - 1];
       docBufferUpto = 0;
+      assert docBuffer[BLOCK_SIZE] == NO_MORE_DOCS;
     }
 
     @Override
@@ -1074,10 +1094,10 @@ public final class Lucene84PostingsReader extends PostingsReaderBase
{
         // is a little different from MultiLevelSkipListReader
         final int newDocUpto = skipper.skipTo(target) + 1;
 
-        if (newDocUpto > docUpto) {
+        if (newDocUpto >= blockUpto) {
           // Skipper moved
           assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
-          docUpto = newDocUpto;
+          blockUpto = newDocUpto;
 
           // Force to read next block
           docBufferUpto = BLOCK_SIZE;
@@ -1110,18 +1130,14 @@ public final class Lucene84PostingsReader extends PostingsReaderBase
{
       if (docBufferUpto == BLOCK_SIZE) {
         if (seekTo >= 0) {
           docIn.seek(seekTo);
+          isFreqsRead = true; // reset isFreqsRead
           seekTo = -1;
         }
         refillDocs();
       }
 
       int next = findFirstGreater(docBuffer, target, docBufferUpto);
-      if (next == BLOCK_SIZE) {
-        return doc = NO_MORE_DOCS;
-      }
       this.doc = (int) docBuffer[next];
-      this.freq = (int) freqBuffer[next];
-      docUpto += next - docBufferUpto + 1;
       docBufferUpto = next + 1;
       return doc;
     }


Mime
View raw message