Return-Path: X-Original-To: apmail-lucene-commits-archive@www.apache.org Delivered-To: apmail-lucene-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 5CD35D101 for ; Wed, 8 Aug 2012 23:02:54 +0000 (UTC) Received: (qmail 60888 invoked by uid 500); 8 Aug 2012 23:02:54 -0000 Mailing-List: contact commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@lucene.apache.org Delivered-To: mailing list commits@lucene.apache.org Received: (qmail 60880 invoked by uid 99); 8 Aug 2012 23:02:54 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 08 Aug 2012 23:02:54 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 08 Aug 2012 23:02:50 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id F23F9238890D; Wed, 8 Aug 2012 23:02:05 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1371010 - in /lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/block: BlockPostingsReader.java BlockSkipWriter.java gendecompress.py Date: Wed, 08 Aug 2012 23:02:05 -0000 To: commits@lucene.apache.org From: mikemccand@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20120808230205.F23F9238890D@eris.apache.org> Author: mikemccand Date: Wed Aug 8 23:02:05 2012 New Revision: 1371010 URL: http://svn.apache.org/viewvc?rev=1371010&view=rev Log: LUCENE-4283: further optimize scan-after-advance Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsReader.java lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/block/BlockSkipWriter.java lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/block/gendecompress.py Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsReader.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsReader.java?rev=1371010&r1=1371009&r2=1371010&view=diff ============================================================================== --- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsReader.java (original) +++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsReader.java Wed Aug 8 23:02:05 2012 @@ -412,7 +412,6 @@ public final class BlockPostingsReader e } private void refillDocs() throws IOException { - //System.out.println("["+docFreq+"]"+" refillDoc"); final int left = docFreq - docUpto; assert left > 0; @@ -451,7 +450,6 @@ public final class BlockPostingsReader e } return doc = NO_MORE_DOCS; } - //System.out.println("["+docFreq+"]"+" nextDoc"); if (docBufferUpto == BLOCK_SIZE) { refillDocs(); } @@ -510,15 +508,15 @@ public final class BlockPostingsReader e skipped = true; } - final int newDocUpto = skipper.skipTo(target); + final int newDocUpto = skipper.skipTo(target) + 1; if (newDocUpto > docUpto) { // Skipper moved if (DEBUG) { System.out.println("skipper moved to docUpto=" + newDocUpto + " vs current=" + docUpto + "; docID=" + skipper.getDoc() + " fp=" + skipper.getDocPointer()); } - assert newDocUpto % BLOCK_SIZE == (BLOCK_SIZE-1): "got " + newDocUpto; - docUpto = newDocUpto+1; + assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto; + docUpto = newDocUpto; // Force to read next block docBufferUpto = BLOCK_SIZE; @@ -527,6 +525,12 @@ public final class BlockPostingsReader e } nextSkipDoc = skipper.getNextSkipDoc(); } + if (docUpto == docFreq) { + return doc = NO_MORE_DOCS; + } + if (docBufferUpto == BLOCK_SIZE) { + refillDocs(); + } // Now scan... this is an inlined/pared down version // of nextDoc(): @@ -534,18 +538,6 @@ public final class BlockPostingsReader e if (DEBUG) { System.out.println(" scan doc=" + accum + " docBufferUpto=" + docBufferUpto); } - if (docUpto == docFreq) { - return doc = NO_MORE_DOCS; - } - - // nocommit: in theory we should not hit this? ie - // skipper should already have moved us to the block - // containing the doc? yet assert false trips ... i - // think because if you advance w/o having done a - // nextDoc yet()... can we assert/remove this? - if (docBufferUpto == BLOCK_SIZE) { - refillDocs(); - } accum += docDeltaBuffer[docBufferUpto]; docUpto++; @@ -553,6 +545,9 @@ public final class BlockPostingsReader e break; } docBufferUpto++; + if (docUpto == docFreq) { + return doc = NO_MORE_DOCS; + } } if (liveDocs == null || liveDocs.get(accum)) { @@ -692,9 +687,9 @@ public final class BlockPostingsReader e } private void refillDocs() throws IOException { - //System.out.println("["+docFreq+"]"+" refillDoc"); final int left = docFreq - docUpto; assert left > 0; + if (left >= BLOCK_SIZE) { if (DEBUG) { System.out.println(" fill doc block from fp=" + docIn.getFilePointer()); @@ -761,7 +756,6 @@ public final class BlockPostingsReader e if (docUpto == docFreq) { return doc = NO_MORE_DOCS; } - //System.out.println("["+docFreq+"]"+" nextDoc"); if (docBufferUpto == BLOCK_SIZE) { refillDocs(); } @@ -823,7 +817,7 @@ public final class BlockPostingsReader e skipped = true; } - final int newDocUpto = skipper.skipTo(target); + final int newDocUpto = skipper.skipTo(target) + 1; if (newDocUpto > docUpto) { // Skipper moved @@ -831,8 +825,8 @@ public final class BlockPostingsReader e System.out.println(" skipper moved to docUpto=" + newDocUpto + " vs current=" + docUpto + "; docID=" + skipper.getDoc() + " fp=" + skipper.getDocPointer() + " pos.fp=" + skipper.getPosPointer() + " pos.bufferUpto=" + skipper.getPosBufferUpto()); } - assert newDocUpto % BLOCK_SIZE == (BLOCK_SIZE-1): "got " + newDocUpto; - docUpto = newDocUpto+1; + assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto; + docUpto = newDocUpto; // Force to read next block docBufferUpto = BLOCK_SIZE; @@ -843,6 +837,12 @@ public final class BlockPostingsReader e } nextSkipDoc = skipper.getNextSkipDoc(); } + if (docUpto == docFreq) { + return doc = NO_MORE_DOCS; + } + if (docBufferUpto == BLOCK_SIZE) { + refillDocs(); + } // Now scan... this is an inlined/pared down version // of nextDoc(): @@ -853,16 +853,6 @@ public final class BlockPostingsReader e if (docUpto == docFreq) { return doc = NO_MORE_DOCS; } - // nocommit: in theory we should not hit this? ie - // skipper should already have moved us to the block - // containing the doc? yet assert false trips ... i - // think because if you advance w/o having done a - // nextDoc yet()... can we assert/remove this? - if (docBufferUpto == BLOCK_SIZE) { - // nocommit hmm skip freq? but: we don't ever - // scan over more than one block? - refillDocs(); - } accum += docDeltaBuffer[docBufferUpto]; freq = freqBuffer[docBufferUpto]; posPendingCount += freq; @@ -872,6 +862,9 @@ public final class BlockPostingsReader e if (accum >= target) { break; } + if (docUpto == docFreq) { + return doc = NO_MORE_DOCS; + } } if (liveDocs == null || liveDocs.get(accum)) { @@ -1138,7 +1131,6 @@ public final class BlockPostingsReader e } private void refillDocs() throws IOException { - //System.out.println("["+docFreq+"]"+" refillDoc"); final int left = docFreq - docUpto; assert left > 0; @@ -1254,7 +1246,6 @@ public final class BlockPostingsReader e if (docUpto == docFreq) { return doc = NO_MORE_DOCS; } - //System.out.println("["+docFreq+"]"+" nextDoc"); if (docBufferUpto == BLOCK_SIZE) { refillDocs(); } @@ -1320,15 +1311,15 @@ public final class BlockPostingsReader e skipped = true; } - final int newDocUpto = skipper.skipTo(target); + final int newDocUpto = skipper.skipTo(target) + 1; if (newDocUpto > docUpto) { // Skipper moved if (DEBUG) { System.out.println(" skipper moved to docUpto=" + newDocUpto + " vs current=" + docUpto + "; docID=" + skipper.getDoc() + " fp=" + skipper.getDocPointer() + " pos.fp=" + skipper.getPosPointer() + " pos.bufferUpto=" + skipper.getPosBufferUpto() + " pay.fp=" + skipper.getPayPointer() + " lastStartOffset=" + lastStartOffset); } - assert newDocUpto % BLOCK_SIZE == (BLOCK_SIZE-1): "got " + newDocUpto; - docUpto = newDocUpto+1; + assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto; + docUpto = newDocUpto; // Force to read next block docBufferUpto = BLOCK_SIZE; @@ -1342,24 +1333,50 @@ public final class BlockPostingsReader e } nextSkipDoc = skipper.getNextSkipDoc(); } - - // nocommit inline nextDoc here + if (docUpto == docFreq) { + return doc = NO_MORE_DOCS; + } + if (docBufferUpto == BLOCK_SIZE) { + refillDocs(); + } // Now scan: - while (nextDoc() != NO_MORE_DOCS) { - if (doc >= target) { - if (DEBUG) { - System.out.println(" advance return doc=" + doc); - } - return doc; + while (true) { + if (DEBUG) { + System.out.println(" scan doc=" + accum + " docBufferUpto=" + docBufferUpto); } - } + accum += docDeltaBuffer[docBufferUpto]; + freq = freqBuffer[docBufferUpto]; + posPendingCount += freq; + docBufferUpto++; + docUpto++; - if (DEBUG) { - System.out.println(" advance return doc=END"); + if (accum >= target) { + break; + } + if (docUpto == docFreq) { + return doc = NO_MORE_DOCS; + } } - return NO_MORE_DOCS; + if (liveDocs == null || liveDocs.get(accum)) { + if (DEBUG) { + System.out.println(" return doc=" + accum); + } + if (indexHasPayloads) { + payloadByteUpto += payloadLength; + payloadLength = 0; + } + position = 0; + payloadLength = 0; + lastStartOffset = 0; + return doc = accum; + } else { + if (DEBUG) { + System.out.println(" now do nextDoc()"); + } + return nextDoc(); + } } // nocommit in theory we could avoid loading frq block Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/block/BlockSkipWriter.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/block/BlockSkipWriter.java?rev=1371010&r1=1371009&r2=1371010&view=diff ============================================================================== --- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/block/BlockSkipWriter.java (original) +++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/block/BlockSkipWriter.java Wed Aug 8 23:02:05 2012 @@ -37,7 +37,7 @@ import org.apache.lucene.codecs.MultiLev * block, only record skip data at the start its start point(if it exist). * * For each skip point, we will record: - * 1. lastDocID, + * 1. docID in former position, i.e. for position 12, record docID[11], etc. * 2. its related file points(position, payload), * 3. related numbers or uptos(position, payload). * 4. start offset. Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/block/gendecompress.py URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/block/gendecompress.py?rev=1371010&r1=1371009&r2=1371010&view=diff ============================================================================== --- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/block/gendecompress.py (original) +++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/block/gendecompress.py Wed Aug 8 23:02:05 2012 @@ -81,7 +81,7 @@ def genDecompress(): w(' Arrays.fill(output, compressedBuffer.get());\n') w(' }\n') - for numFrameBits in xrange(1, 33): + for numFrameBits in xrange(1, 32): w(' public static void decode%d(final IntBuffer compressedBuffer, final int[] output) {\n' % numFrameBits) w(' final int numFrameBits = %d;\n' % numFrameBits) w(' final int mask = (int) ((1L<