Return-Path: X-Original-To: apmail-lucene-commits-archive@www.apache.org Delivered-To: apmail-lucene-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 4AC0211204 for ; Fri, 16 May 2014 14:15:33 +0000 (UTC) Received: (qmail 64153 invoked by uid 500); 16 May 2014 11:32:15 -0000 Mailing-List: contact commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@lucene.apache.org Delivered-To: mailing list commits@lucene.apache.org Received: (qmail 45746 invoked by uid 99); 16 May 2014 11:14:34 -0000 Received: from Unknown (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 16 May 2014 11:14:34 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 15 May 2014 18:02:19 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id DBAFE2388868; Thu, 15 May 2014 18:01:54 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1594991 - in /lucene/dev/branches/lucene5675/lucene: codecs/src/java/org/apache/lucene/codecs/idversion/ codecs/src/java/org/apache/lucene/codecs/pulsing/ codecs/src/resources/META-INF/services/ core/src/java/org/apache/lucene/codecs/ core... Date: Thu, 15 May 2014 18:01:54 -0000 To: commits@lucene.apache.org From: mikemccand@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20140515180154.DBAFE2388868@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: mikemccand Date: Thu May 15 18:01:53 2014 New Revision: 1594991 URL: http://svn.apache.org/r1594991 Log: LUCENE-5675: move BlockTree* under its own package Added: lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/blocktree/ lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java - copied, changed from r1594985, lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java - copied, changed from r1594970, lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/blocktree/Stats.java (with props) Removed: lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java Modified: lucene/dev/branches/lucene5675/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsFormat.java lucene/dev/branches/lucene5675/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/Pulsing41PostingsFormat.java lucene/dev/branches/lucene5675/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java lucene/dev/branches/lucene5675/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java lucene/dev/branches/lucene5675/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWPostingsFormat.java lucene/dev/branches/lucene5675/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java lucene/dev/branches/lucene5675/lucene/test-framework/src/java/org/apache/lucene/codecs/nestedpulsing/NestedPulsingPostingsFormat.java Modified: lucene/dev/branches/lucene5675/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsFormat.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5675/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsFormat.java?rev=1594991&r1=1594990&r2=1594991&view=diff ============================================================================== --- lucene/dev/branches/lucene5675/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsFormat.java (original) +++ lucene/dev/branches/lucene5675/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsFormat.java Thu May 15 18:01:53 2014 @@ -19,13 +19,13 @@ package org.apache.lucene.codecs.idversi import java.io.IOException; -import org.apache.lucene.codecs.BlockTreeTermsReader; -import org.apache.lucene.codecs.BlockTreeTermsWriter; import org.apache.lucene.codecs.FieldsConsumer; import org.apache.lucene.codecs.FieldsProducer; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.PostingsReaderBase; import org.apache.lucene.codecs.PostingsWriterBase; +import org.apache.lucene.codecs.blocktree.BlockTreeTermsReader; +import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.util.IOUtils; Modified: lucene/dev/branches/lucene5675/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/Pulsing41PostingsFormat.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5675/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/Pulsing41PostingsFormat.java?rev=1594991&r1=1594990&r2=1594991&view=diff ============================================================================== --- lucene/dev/branches/lucene5675/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/Pulsing41PostingsFormat.java (original) +++ lucene/dev/branches/lucene5675/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/Pulsing41PostingsFormat.java Thu May 15 18:01:53 2014 @@ -17,7 +17,7 @@ package org.apache.lucene.codecs.pulsing * limitations under the License. */ -import org.apache.lucene.codecs.BlockTreeTermsWriter; +import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter; import org.apache.lucene.codecs.lucene41.Lucene41PostingsBaseFormat; import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat; // javadocs Modified: lucene/dev/branches/lucene5675/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5675/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java?rev=1594991&r1=1594990&r2=1594991&view=diff ============================================================================== --- lucene/dev/branches/lucene5675/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java (original) +++ lucene/dev/branches/lucene5675/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java Thu May 15 18:01:53 2014 @@ -19,14 +19,14 @@ package org.apache.lucene.codecs.pulsing import java.io.IOException; -import org.apache.lucene.codecs.BlockTreeTermsReader; -import org.apache.lucene.codecs.BlockTreeTermsWriter; import org.apache.lucene.codecs.FieldsConsumer; import org.apache.lucene.codecs.FieldsProducer; import org.apache.lucene.codecs.PostingsBaseFormat; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.PostingsReaderBase; import org.apache.lucene.codecs.PostingsWriterBase; +import org.apache.lucene.codecs.blocktree.BlockTreeTermsReader; +import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.util.IOUtils; Modified: lucene/dev/branches/lucene5675/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5675/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat?rev=1594991&r1=1594990&r2=1594991&view=diff ============================================================================== --- lucene/dev/branches/lucene5675/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat (original) +++ lucene/dev/branches/lucene5675/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat Thu May 15 18:01:53 2014 @@ -22,4 +22,5 @@ org.apache.lucene.codecs.memory.FSTPulsi org.apache.lucene.codecs.memory.FSTOrdPulsing41PostingsFormat org.apache.lucene.codecs.memory.FSTPostingsFormat org.apache.lucene.codecs.memory.FSTOrdPostingsFormat -org.apache.lucene.codecs.idversion.IDVersionPostingsFormat + +#org.apache.lucene.codecs.idversion.IDVersionPostingsFormat Copied: lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java (from r1594985, lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java) URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java?p2=lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java&p1=lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java&r1=1594985&r2=1594991&rev=1594991&view=diff ============================================================================== --- lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java (original) +++ lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java Thu May 15 18:01:53 2014 @@ -1,4 +1,4 @@ -package org.apache.lucene.codecs; +package org.apache.lucene.codecs.blocktree; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -17,15 +17,16 @@ package org.apache.lucene.codecs; * limitations under the License. */ -import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.PrintStream; -import java.io.UnsupportedEncodingException; import java.util.Collections; import java.util.Iterator; -import java.util.Locale; import java.util.TreeMap; +import org.apache.lucene.codecs.BlockTermState; +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.FieldsProducer; +import org.apache.lucene.codecs.PostingsReaderBase; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; @@ -290,177 +291,6 @@ public class BlockTreeTermsReader extend } } - /** - * BlockTree statistics for a single field - * returned by {@link FieldReader#computeStats()}. - */ - public static class Stats { - /** How many nodes in the index FST. */ - public long indexNodeCount; - - /** How many arcs in the index FST. */ - public long indexArcCount; - - /** Byte size of the index. */ - public long indexNumBytes; - - /** Total number of terms in the field. */ - public long totalTermCount; - - /** Total number of bytes (sum of term lengths) across all terms in the field. */ - public long totalTermBytes; - - /** The number of normal (non-floor) blocks in the terms file. */ - public int nonFloorBlockCount; - - /** The number of floor blocks (meta-blocks larger than the - * allowed {@code maxItemsPerBlock}) in the terms file. */ - public int floorBlockCount; - - /** The number of sub-blocks within the floor blocks. */ - public int floorSubBlockCount; - - /** The number of "internal" blocks (that have both - * terms and sub-blocks). */ - public int mixedBlockCount; - - /** The number of "leaf" blocks (blocks that have only - * terms). */ - public int termsOnlyBlockCount; - - /** The number of "internal" blocks that do not contain - * terms (have only sub-blocks). */ - public int subBlocksOnlyBlockCount; - - /** Total number of blocks. */ - public int totalBlockCount; - - /** Number of blocks at each prefix depth. */ - public int[] blockCountByPrefixLen = new int[10]; - private int startBlockCount; - private int endBlockCount; - - /** Total number of bytes used to store term suffixes. */ - public long totalBlockSuffixBytes; - - /** Total number of bytes used to store term stats (not - * including what the {@link PostingsBaseFormat} - * stores. */ - public long totalBlockStatsBytes; - - /** Total bytes stored by the {@link PostingsBaseFormat}, - * plus the other few vInts stored in the frame. */ - public long totalBlockOtherBytes; - - /** Segment name. */ - public final String segment; - - /** Field name. */ - public final String field; - - Stats(String segment, String field) { - this.segment = segment; - this.field = field; - } - - void startBlock(FieldReader.SegmentTermsEnum.Frame frame, boolean isFloor) { - totalBlockCount++; - if (isFloor) { - if (frame.fp == frame.fpOrig) { - floorBlockCount++; - } - floorSubBlockCount++; - } else { - nonFloorBlockCount++; - } - - if (blockCountByPrefixLen.length <= frame.prefix) { - blockCountByPrefixLen = ArrayUtil.grow(blockCountByPrefixLen, 1+frame.prefix); - } - blockCountByPrefixLen[frame.prefix]++; - startBlockCount++; - totalBlockSuffixBytes += frame.suffixesReader.length(); - totalBlockStatsBytes += frame.statsReader.length(); - } - - void endBlock(FieldReader.SegmentTermsEnum.Frame frame) { - final int termCount = frame.isLeafBlock ? frame.entCount : frame.state.termBlockOrd; - final int subBlockCount = frame.entCount - termCount; - totalTermCount += termCount; - if (termCount != 0 && subBlockCount != 0) { - mixedBlockCount++; - } else if (termCount != 0) { - termsOnlyBlockCount++; - } else if (subBlockCount != 0) { - subBlocksOnlyBlockCount++; - } else { - throw new IllegalStateException(); - } - endBlockCount++; - final long otherBytes = frame.fpEnd - frame.fp - frame.suffixesReader.length() - frame.statsReader.length(); - assert otherBytes > 0 : "otherBytes=" + otherBytes + " frame.fp=" + frame.fp + " frame.fpEnd=" + frame.fpEnd; - totalBlockOtherBytes += otherBytes; - } - - void term(BytesRef term) { - totalTermBytes += term.length; - } - - void finish() { - assert startBlockCount == endBlockCount: "startBlockCount=" + startBlockCount + " endBlockCount=" + endBlockCount; - assert totalBlockCount == floorSubBlockCount + nonFloorBlockCount: "floorSubBlockCount=" + floorSubBlockCount + " nonFloorBlockCount=" + nonFloorBlockCount + " totalBlockCount=" + totalBlockCount; - assert totalBlockCount == mixedBlockCount + termsOnlyBlockCount + subBlocksOnlyBlockCount: "totalBlockCount=" + totalBlockCount + " mixedBlockCount=" + mixedBlockCount + " subBlocksOnlyBlockCount=" + subBlocksOnlyBlockCount + " termsOnlyBlockCount=" + termsOnlyBlockCount; - } - - @Override - public String toString() { - final ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); - PrintStream out; - try { - out = new PrintStream(bos, false, IOUtils.UTF_8); - } catch (UnsupportedEncodingException bogus) { - throw new RuntimeException(bogus); - } - - out.println(" index FST:"); - out.println(" " + indexNodeCount + " nodes"); - out.println(" " + indexArcCount + " arcs"); - out.println(" " + indexNumBytes + " bytes"); - out.println(" terms:"); - out.println(" " + totalTermCount + " terms"); - out.println(" " + totalTermBytes + " bytes" + (totalTermCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalTermBytes)/totalTermCount) + " bytes/term)" : "")); - out.println(" blocks:"); - out.println(" " + totalBlockCount + " blocks"); - out.println(" " + termsOnlyBlockCount + " terms-only blocks"); - out.println(" " + subBlocksOnlyBlockCount + " sub-block-only blocks"); - out.println(" " + mixedBlockCount + " mixed blocks"); - out.println(" " + floorBlockCount + " floor blocks"); - out.println(" " + (totalBlockCount-floorSubBlockCount) + " non-floor blocks"); - out.println(" " + floorSubBlockCount + " floor sub-blocks"); - out.println(" " + totalBlockSuffixBytes + " term suffix bytes" + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockSuffixBytes)/totalBlockCount) + " suffix-bytes/block)" : "")); - out.println(" " + totalBlockStatsBytes + " term stats bytes" + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockStatsBytes)/totalBlockCount) + " stats-bytes/block)" : "")); - out.println(" " + totalBlockOtherBytes + " other bytes" + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockOtherBytes)/totalBlockCount) + " other-bytes/block)" : "")); - if (totalBlockCount != 0) { - out.println(" by prefix length:"); - int total = 0; - for(int prefix=0;prefix fstOutputs = ByteSequenceOutputs.getSingleton(); final BytesRef NO_OUTPUT = fstOutputs.getNoOutput(); @@ -1310,7 +1140,7 @@ public class BlockTreeTermsReader extend } // Iterates through terms in this field - private final class SegmentTermsEnum extends TermsEnum { + final class SegmentTermsEnum extends TermsEnum { private IndexInput in; private Frame[] stack; @@ -2308,7 +2138,7 @@ public class BlockTreeTermsReader extend // Not static -- references term, postingsReader, // fieldInfo, in - private final class Frame { + final class Frame { // Our index in stack[]: final int ord; Copied: lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java (from r1594970, lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java) URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java?p2=lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java&p1=lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java&r1=1594970&r2=1594991&rev=1594991&view=diff ============================================================================== --- lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java (original) +++ lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java Thu May 15 18:01:53 2014 @@ -1,4 +1,4 @@ -package org.apache.lucene.codecs; +package org.apache.lucene.codecs.blocktree; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -21,6 +21,10 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; +import org.apache.lucene.codecs.BlockTermState; +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.FieldsConsumer; +import org.apache.lucene.codecs.PostingsWriterBase; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; Added: lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/blocktree/Stats.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/blocktree/Stats.java?rev=1594991&view=auto ============================================================================== --- lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/blocktree/Stats.java (added) +++ lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/blocktree/Stats.java Thu May 15 18:01:53 2014 @@ -0,0 +1,198 @@ +package org.apache.lucene.codecs.blocktree; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.ByteArrayOutputStream; +import java.io.PrintStream; +import java.io.UnsupportedEncodingException; +import java.util.Locale; + +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IOUtils; + +/** + * BlockTree statistics for a single field + * returned by {@link FieldReader#computeStats()}. + */ +public class Stats { + /** How many nodes in the index FST. */ + public long indexNodeCount; + + /** How many arcs in the index FST. */ + public long indexArcCount; + + /** Byte size of the index. */ + public long indexNumBytes; + + /** Total number of terms in the field. */ + public long totalTermCount; + + /** Total number of bytes (sum of term lengths) across all terms in the field. */ + public long totalTermBytes; + + /** The number of normal (non-floor) blocks in the terms file. */ + public int nonFloorBlockCount; + + /** The number of floor blocks (meta-blocks larger than the + * allowed {@code maxItemsPerBlock}) in the terms file. */ + public int floorBlockCount; + + /** The number of sub-blocks within the floor blocks. */ + public int floorSubBlockCount; + + /** The number of "internal" blocks (that have both + * terms and sub-blocks). */ + public int mixedBlockCount; + + /** The number of "leaf" blocks (blocks that have only + * terms). */ + public int termsOnlyBlockCount; + + /** The number of "internal" blocks that do not contain + * terms (have only sub-blocks). */ + public int subBlocksOnlyBlockCount; + + /** Total number of blocks. */ + public int totalBlockCount; + + /** Number of blocks at each prefix depth. */ + public int[] blockCountByPrefixLen = new int[10]; + private int startBlockCount; + private int endBlockCount; + + /** Total number of bytes used to store term suffixes. */ + public long totalBlockSuffixBytes; + + /** Total number of bytes used to store term stats (not + * including what the {@link PostingsBaseFormat} + * stores. */ + public long totalBlockStatsBytes; + + /** Total bytes stored by the {@link PostingsBaseFormat}, + * plus the other few vInts stored in the frame. */ + public long totalBlockOtherBytes; + + /** Segment name. */ + public final String segment; + + /** Field name. */ + public final String field; + + Stats(String segment, String field) { + this.segment = segment; + this.field = field; + } + + void startBlock(BlockTreeTermsReader.FieldReader.SegmentTermsEnum.Frame frame, boolean isFloor) { + totalBlockCount++; + if (isFloor) { + if (frame.fp == frame.fpOrig) { + floorBlockCount++; + } + floorSubBlockCount++; + } else { + nonFloorBlockCount++; + } + + if (blockCountByPrefixLen.length <= frame.prefix) { + blockCountByPrefixLen = ArrayUtil.grow(blockCountByPrefixLen, 1+frame.prefix); + } + blockCountByPrefixLen[frame.prefix]++; + startBlockCount++; + totalBlockSuffixBytes += frame.suffixesReader.length(); + totalBlockStatsBytes += frame.statsReader.length(); + } + + void endBlock(BlockTreeTermsReader.FieldReader.SegmentTermsEnum.Frame frame) { + final int termCount = frame.isLeafBlock ? frame.entCount : frame.state.termBlockOrd; + final int subBlockCount = frame.entCount - termCount; + totalTermCount += termCount; + if (termCount != 0 && subBlockCount != 0) { + mixedBlockCount++; + } else if (termCount != 0) { + termsOnlyBlockCount++; + } else if (subBlockCount != 0) { + subBlocksOnlyBlockCount++; + } else { + throw new IllegalStateException(); + } + endBlockCount++; + final long otherBytes = frame.fpEnd - frame.fp - frame.suffixesReader.length() - frame.statsReader.length(); + assert otherBytes > 0 : "otherBytes=" + otherBytes + " frame.fp=" + frame.fp + " frame.fpEnd=" + frame.fpEnd; + totalBlockOtherBytes += otherBytes; + } + + void term(BytesRef term) { + totalTermBytes += term.length; + } + + void finish() { + assert startBlockCount == endBlockCount: "startBlockCount=" + startBlockCount + " endBlockCount=" + endBlockCount; + assert totalBlockCount == floorSubBlockCount + nonFloorBlockCount: "floorSubBlockCount=" + floorSubBlockCount + " nonFloorBlockCount=" + nonFloorBlockCount + " totalBlockCount=" + totalBlockCount; + assert totalBlockCount == mixedBlockCount + termsOnlyBlockCount + subBlocksOnlyBlockCount: "totalBlockCount=" + totalBlockCount + " mixedBlockCount=" + mixedBlockCount + " subBlocksOnlyBlockCount=" + subBlocksOnlyBlockCount + " termsOnlyBlockCount=" + termsOnlyBlockCount; + } + + @Override + public String toString() { + final ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); + PrintStream out; + try { + out = new PrintStream(bos, false, IOUtils.UTF_8); + } catch (UnsupportedEncodingException bogus) { + throw new RuntimeException(bogus); + } + + out.println(" index FST:"); + out.println(" " + indexNodeCount + " nodes"); + out.println(" " + indexArcCount + " arcs"); + out.println(" " + indexNumBytes + " bytes"); + out.println(" terms:"); + out.println(" " + totalTermCount + " terms"); + out.println(" " + totalTermBytes + " bytes" + (totalTermCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalTermBytes)/totalTermCount) + " bytes/term)" : "")); + out.println(" blocks:"); + out.println(" " + totalBlockCount + " blocks"); + out.println(" " + termsOnlyBlockCount + " terms-only blocks"); + out.println(" " + subBlocksOnlyBlockCount + " sub-block-only blocks"); + out.println(" " + mixedBlockCount + " mixed blocks"); + out.println(" " + floorBlockCount + " floor blocks"); + out.println(" " + (totalBlockCount-floorSubBlockCount) + " non-floor blocks"); + out.println(" " + floorSubBlockCount + " floor sub-blocks"); + out.println(" " + totalBlockSuffixBytes + " term suffix bytes" + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockSuffixBytes)/totalBlockCount) + " suffix-bytes/block)" : "")); + out.println(" " + totalBlockStatsBytes + " term stats bytes" + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockStatsBytes)/totalBlockCount) + " stats-bytes/block)" : "")); + out.println(" " + totalBlockOtherBytes + " other bytes" + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockOtherBytes)/totalBlockCount) + " other-bytes/block)" : "")); + if (totalBlockCount != 0) { + out.println(" by prefix length:"); + int total = 0; + for(int prefix=0;prefix blockTreeStats = null; + public Map blockTreeStats = null; } /** @@ -1116,7 +1118,7 @@ public class CheckIndex { } else { if (fieldTerms instanceof BlockTreeTermsReader.FieldReader) { - final BlockTreeTermsReader.Stats stats = ((BlockTreeTermsReader.FieldReader) fieldTerms).computeStats(); + final Stats stats = ((BlockTreeTermsReader.FieldReader) fieldTerms).computeStats(); assert stats != null; if (status.blockTreeStats == null) { status.blockTreeStats = new HashMap<>(); @@ -1249,7 +1251,7 @@ public class CheckIndex { } if (verbose && status.blockTreeStats != null && infoStream != null && status.termCount > 0) { - for(Map.Entry ent : status.blockTreeStats.entrySet()) { + for(Map.Entry ent : status.blockTreeStats.entrySet()) { infoStream.println(" field \"" + ent.getKey() + "\":"); infoStream.println(" " + ent.getValue().toString().replace("\n", "\n ")); } Modified: lucene/dev/branches/lucene5675/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWPostingsFormat.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5675/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWPostingsFormat.java?rev=1594991&r1=1594990&r2=1594991&view=diff ============================================================================== --- lucene/dev/branches/lucene5675/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWPostingsFormat.java (original) +++ lucene/dev/branches/lucene5675/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWPostingsFormat.java Thu May 15 18:01:53 2014 @@ -19,9 +19,9 @@ package org.apache.lucene.codecs.lucene4 import java.io.IOException; -import org.apache.lucene.codecs.BlockTreeTermsWriter; import org.apache.lucene.codecs.FieldsConsumer; import org.apache.lucene.codecs.PostingsWriterBase; +import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.util.LuceneTestCase; Modified: lucene/dev/branches/lucene5675/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5675/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java?rev=1594991&r1=1594990&r2=1594991&view=diff ============================================================================== --- lucene/dev/branches/lucene5675/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java (original) +++ lucene/dev/branches/lucene5675/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java Thu May 15 18:01:53 2014 @@ -20,8 +20,6 @@ package org.apache.lucene.codecs.mockran import java.io.IOException; import java.util.Random; -import org.apache.lucene.codecs.BlockTreeTermsReader; -import org.apache.lucene.codecs.BlockTreeTermsWriter; import org.apache.lucene.codecs.FieldsConsumer; import org.apache.lucene.codecs.FieldsProducer; import org.apache.lucene.codecs.PostingsFormat; @@ -36,6 +34,8 @@ import org.apache.lucene.codecs.blockter import org.apache.lucene.codecs.blockterms.TermsIndexWriterBase; import org.apache.lucene.codecs.blockterms.VariableGapTermsIndexReader; import org.apache.lucene.codecs.blockterms.VariableGapTermsIndexWriter; +import org.apache.lucene.codecs.blocktree.BlockTreeTermsReader; +import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter; import org.apache.lucene.codecs.lucene41.Lucene41PostingsReader; import org.apache.lucene.codecs.lucene41.Lucene41PostingsWriter; import org.apache.lucene.codecs.memory.FSTOrdTermsReader; Modified: lucene/dev/branches/lucene5675/lucene/test-framework/src/java/org/apache/lucene/codecs/nestedpulsing/NestedPulsingPostingsFormat.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5675/lucene/test-framework/src/java/org/apache/lucene/codecs/nestedpulsing/NestedPulsingPostingsFormat.java?rev=1594991&r1=1594990&r2=1594991&view=diff ============================================================================== --- lucene/dev/branches/lucene5675/lucene/test-framework/src/java/org/apache/lucene/codecs/nestedpulsing/NestedPulsingPostingsFormat.java (original) +++ lucene/dev/branches/lucene5675/lucene/test-framework/src/java/org/apache/lucene/codecs/nestedpulsing/NestedPulsingPostingsFormat.java Thu May 15 18:01:53 2014 @@ -19,13 +19,13 @@ package org.apache.lucene.codecs.nestedp import java.io.IOException; -import org.apache.lucene.codecs.BlockTreeTermsReader; -import org.apache.lucene.codecs.BlockTreeTermsWriter; import org.apache.lucene.codecs.FieldsConsumer; import org.apache.lucene.codecs.FieldsProducer; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.PostingsReaderBase; import org.apache.lucene.codecs.PostingsWriterBase; +import org.apache.lucene.codecs.blocktree.BlockTreeTermsReader; +import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter; import org.apache.lucene.codecs.lucene41.Lucene41PostingsReader; import org.apache.lucene.codecs.lucene41.Lucene41PostingsWriter; import org.apache.lucene.codecs.pulsing.PulsingPostingsReader;