lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r1594991 - in /lucene/dev/branches/lucene5675/lucene: codecs/src/java/org/apache/lucene/codecs/idversion/ codecs/src/java/org/apache/lucene/codecs/pulsing/ codecs/src/resources/META-INF/services/ core/src/java/org/apache/lucene/codecs/ core...
Date Thu, 15 May 2014 18:01:54 GMT
Author: mikemccand
Date: Thu May 15 18:01:53 2014
New Revision: 1594991

URL: http://svn.apache.org/r1594991
Log:
LUCENE-5675: move BlockTree* under its own package

Added:
    lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/blocktree/
    lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java
      - copied, changed from r1594985, lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
    lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java
      - copied, changed from r1594970, lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java
    lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/blocktree/Stats.java
  (with props)
Removed:
    lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
    lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java
Modified:
    lucene/dev/branches/lucene5675/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsFormat.java
    lucene/dev/branches/lucene5675/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/Pulsing41PostingsFormat.java
    lucene/dev/branches/lucene5675/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java
    lucene/dev/branches/lucene5675/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
    lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java
    lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java
    lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
    lucene/dev/branches/lucene5675/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWPostingsFormat.java
    lucene/dev/branches/lucene5675/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java
    lucene/dev/branches/lucene5675/lucene/test-framework/src/java/org/apache/lucene/codecs/nestedpulsing/NestedPulsingPostingsFormat.java

Modified: lucene/dev/branches/lucene5675/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5675/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsFormat.java?rev=1594991&r1=1594990&r2=1594991&view=diff
==============================================================================
--- lucene/dev/branches/lucene5675/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsFormat.java
(original)
+++ lucene/dev/branches/lucene5675/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsFormat.java
Thu May 15 18:01:53 2014
@@ -19,13 +19,13 @@ package org.apache.lucene.codecs.idversi
 
 import java.io.IOException;
 
-import org.apache.lucene.codecs.BlockTreeTermsReader;
-import org.apache.lucene.codecs.BlockTreeTermsWriter;
 import org.apache.lucene.codecs.FieldsConsumer;
 import org.apache.lucene.codecs.FieldsProducer;
 import org.apache.lucene.codecs.PostingsFormat;
 import org.apache.lucene.codecs.PostingsReaderBase;
 import org.apache.lucene.codecs.PostingsWriterBase;
+import org.apache.lucene.codecs.blocktree.BlockTreeTermsReader;
+import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.SegmentWriteState;
 import org.apache.lucene.util.IOUtils;

Modified: lucene/dev/branches/lucene5675/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/Pulsing41PostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5675/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/Pulsing41PostingsFormat.java?rev=1594991&r1=1594990&r2=1594991&view=diff
==============================================================================
--- lucene/dev/branches/lucene5675/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/Pulsing41PostingsFormat.java
(original)
+++ lucene/dev/branches/lucene5675/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/Pulsing41PostingsFormat.java
Thu May 15 18:01:53 2014
@@ -17,7 +17,7 @@ package org.apache.lucene.codecs.pulsing
  * limitations under the License.
  */
 
-import org.apache.lucene.codecs.BlockTreeTermsWriter;
+import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter;
 import org.apache.lucene.codecs.lucene41.Lucene41PostingsBaseFormat;
 import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat; // javadocs
 

Modified: lucene/dev/branches/lucene5675/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5675/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java?rev=1594991&r1=1594990&r2=1594991&view=diff
==============================================================================
--- lucene/dev/branches/lucene5675/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java
(original)
+++ lucene/dev/branches/lucene5675/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java
Thu May 15 18:01:53 2014
@@ -19,14 +19,14 @@ package org.apache.lucene.codecs.pulsing
 
 import java.io.IOException;
 
-import org.apache.lucene.codecs.BlockTreeTermsReader;
-import org.apache.lucene.codecs.BlockTreeTermsWriter;
 import org.apache.lucene.codecs.FieldsConsumer;
 import org.apache.lucene.codecs.FieldsProducer;
 import org.apache.lucene.codecs.PostingsBaseFormat;
 import org.apache.lucene.codecs.PostingsFormat;
 import org.apache.lucene.codecs.PostingsReaderBase;
 import org.apache.lucene.codecs.PostingsWriterBase;
+import org.apache.lucene.codecs.blocktree.BlockTreeTermsReader;
+import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.SegmentWriteState;
 import org.apache.lucene.util.IOUtils;

Modified: lucene/dev/branches/lucene5675/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5675/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat?rev=1594991&r1=1594990&r2=1594991&view=diff
==============================================================================
--- lucene/dev/branches/lucene5675/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
(original)
+++ lucene/dev/branches/lucene5675/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
Thu May 15 18:01:53 2014
@@ -22,4 +22,5 @@ org.apache.lucene.codecs.memory.FSTPulsi
 org.apache.lucene.codecs.memory.FSTOrdPulsing41PostingsFormat
 org.apache.lucene.codecs.memory.FSTPostingsFormat
 org.apache.lucene.codecs.memory.FSTOrdPostingsFormat
-org.apache.lucene.codecs.idversion.IDVersionPostingsFormat
+
+#org.apache.lucene.codecs.idversion.IDVersionPostingsFormat

Copied: lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java
(from r1594985, lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java?p2=lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java&p1=lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java&r1=1594985&r2=1594991&rev=1594991&view=diff
==============================================================================
--- lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
(original)
+++ lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java
Thu May 15 18:01:53 2014
@@ -1,4 +1,4 @@
-package org.apache.lucene.codecs;
+package org.apache.lucene.codecs.blocktree;
 
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -17,15 +17,16 @@ package org.apache.lucene.codecs;
  * limitations under the License.
  */
 
-import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.PrintStream;
-import java.io.UnsupportedEncodingException;
 import java.util.Collections;
 import java.util.Iterator;
-import java.util.Locale;
 import java.util.TreeMap;
 
+import org.apache.lucene.codecs.BlockTermState;
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.FieldsProducer;
+import org.apache.lucene.codecs.PostingsReaderBase;
 import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.DocsAndPositionsEnum;
 import org.apache.lucene.index.DocsEnum;
@@ -290,177 +291,6 @@ public class BlockTreeTermsReader extend
     }
   }
 
-  /**
-   * BlockTree statistics for a single field 
-   * returned by {@link FieldReader#computeStats()}.
-   */
-  public static class Stats {
-    /** How many nodes in the index FST. */
-    public long indexNodeCount;
-
-    /** How many arcs in the index FST. */
-    public long indexArcCount;
-
-    /** Byte size of the index. */
-    public long indexNumBytes;
-
-    /** Total number of terms in the field. */
-    public long totalTermCount;
-
-    /** Total number of bytes (sum of term lengths) across all terms in the field. */
-    public long totalTermBytes;
-
-    /** The number of normal (non-floor) blocks in the terms file. */
-    public int nonFloorBlockCount;
-
-    /** The number of floor blocks (meta-blocks larger than the
-     *  allowed {@code maxItemsPerBlock}) in the terms file. */
-    public int floorBlockCount;
-    
-    /** The number of sub-blocks within the floor blocks. */
-    public int floorSubBlockCount;
-
-    /** The number of "internal" blocks (that have both
-     *  terms and sub-blocks). */
-    public int mixedBlockCount;
-
-    /** The number of "leaf" blocks (blocks that have only
-     *  terms). */
-    public int termsOnlyBlockCount;
-
-    /** The number of "internal" blocks that do not contain
-     *  terms (have only sub-blocks). */
-    public int subBlocksOnlyBlockCount;
-
-    /** Total number of blocks. */
-    public int totalBlockCount;
-
-    /** Number of blocks at each prefix depth. */
-    public int[] blockCountByPrefixLen = new int[10];
-    private int startBlockCount;
-    private int endBlockCount;
-
-    /** Total number of bytes used to store term suffixes. */
-    public long totalBlockSuffixBytes;
-
-    /** Total number of bytes used to store term stats (not
-     *  including what the {@link PostingsBaseFormat}
-     *  stores. */
-    public long totalBlockStatsBytes;
-
-    /** Total bytes stored by the {@link PostingsBaseFormat},
-     *  plus the other few vInts stored in the frame. */
-    public long totalBlockOtherBytes;
-
-    /** Segment name. */
-    public final String segment;
-
-    /** Field name. */
-    public final String field;
-
-    Stats(String segment, String field) {
-      this.segment = segment;
-      this.field = field;
-    }
-
-    void startBlock(FieldReader.SegmentTermsEnum.Frame frame, boolean isFloor) {
-      totalBlockCount++;
-      if (isFloor) {
-        if (frame.fp == frame.fpOrig) {
-          floorBlockCount++;
-        }
-        floorSubBlockCount++;
-      } else {
-        nonFloorBlockCount++;
-      }
-
-      if (blockCountByPrefixLen.length <= frame.prefix) {
-        blockCountByPrefixLen = ArrayUtil.grow(blockCountByPrefixLen, 1+frame.prefix);
-      }
-      blockCountByPrefixLen[frame.prefix]++;
-      startBlockCount++;
-      totalBlockSuffixBytes += frame.suffixesReader.length();
-      totalBlockStatsBytes += frame.statsReader.length();
-    }
-
-    void endBlock(FieldReader.SegmentTermsEnum.Frame frame) {
-      final int termCount = frame.isLeafBlock ? frame.entCount : frame.state.termBlockOrd;
-      final int subBlockCount = frame.entCount - termCount;
-      totalTermCount += termCount;
-      if (termCount != 0 && subBlockCount != 0) {
-        mixedBlockCount++;
-      } else if (termCount != 0) {
-        termsOnlyBlockCount++;
-      } else if (subBlockCount != 0) {
-        subBlocksOnlyBlockCount++;
-      } else {
-        throw new IllegalStateException();
-      }
-      endBlockCount++;
-      final long otherBytes = frame.fpEnd - frame.fp - frame.suffixesReader.length() - frame.statsReader.length();
-      assert otherBytes > 0 : "otherBytes=" + otherBytes + " frame.fp=" + frame.fp + "
frame.fpEnd=" + frame.fpEnd;
-      totalBlockOtherBytes += otherBytes;
-    }
-
-    void term(BytesRef term) {
-      totalTermBytes += term.length;
-    }
-
-    void finish() {
-      assert startBlockCount == endBlockCount: "startBlockCount=" + startBlockCount + " endBlockCount="
+ endBlockCount;
-      assert totalBlockCount == floorSubBlockCount + nonFloorBlockCount: "floorSubBlockCount="
+ floorSubBlockCount + " nonFloorBlockCount=" + nonFloorBlockCount + " totalBlockCount=" +
totalBlockCount;
-      assert totalBlockCount == mixedBlockCount + termsOnlyBlockCount + subBlocksOnlyBlockCount:
"totalBlockCount=" + totalBlockCount + " mixedBlockCount=" + mixedBlockCount + " subBlocksOnlyBlockCount="
+ subBlocksOnlyBlockCount + " termsOnlyBlockCount=" + termsOnlyBlockCount;
-    }
-
-    @Override
-    public String toString() {
-      final ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
-      PrintStream out;
-      try {
-        out = new PrintStream(bos, false, IOUtils.UTF_8);
-      } catch (UnsupportedEncodingException bogus) {
-        throw new RuntimeException(bogus);
-      }
-      
-      out.println("  index FST:");
-      out.println("    " + indexNodeCount + " nodes");
-      out.println("    " + indexArcCount + " arcs");
-      out.println("    " + indexNumBytes + " bytes");
-      out.println("  terms:");
-      out.println("    " + totalTermCount + " terms");
-      out.println("    " + totalTermBytes + " bytes" + (totalTermCount != 0 ? " (" + String.format(Locale.ROOT,
"%.1f", ((double) totalTermBytes)/totalTermCount) + " bytes/term)" : ""));
-      out.println("  blocks:");
-      out.println("    " + totalBlockCount + " blocks");
-      out.println("    " + termsOnlyBlockCount + " terms-only blocks");
-      out.println("    " + subBlocksOnlyBlockCount + " sub-block-only blocks");
-      out.println("    " + mixedBlockCount + " mixed blocks");
-      out.println("    " + floorBlockCount + " floor blocks");
-      out.println("    " + (totalBlockCount-floorSubBlockCount) + " non-floor blocks");
-      out.println("    " + floorSubBlockCount + " floor sub-blocks");
-      out.println("    " + totalBlockSuffixBytes + " term suffix bytes" + (totalBlockCount
!= 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockSuffixBytes)/totalBlockCount)
+ " suffix-bytes/block)" : ""));
-      out.println("    " + totalBlockStatsBytes + " term stats bytes" + (totalBlockCount
!= 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockStatsBytes)/totalBlockCount)
+ " stats-bytes/block)" : ""));
-      out.println("    " + totalBlockOtherBytes + " other bytes" + (totalBlockCount != 0
? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockOtherBytes)/totalBlockCount)
+ " other-bytes/block)" : ""));
-      if (totalBlockCount != 0) {
-        out.println("    by prefix length:");
-        int total = 0;
-        for(int prefix=0;prefix<blockCountByPrefixLen.length;prefix++) {
-          final int blockCount = blockCountByPrefixLen[prefix];
-          total += blockCount;
-          if (blockCount != 0) {
-            out.println("      " + String.format(Locale.ROOT, "%2d", prefix) + ": " + blockCount);
-          }
-        }
-        assert totalBlockCount == total;
-      }
-
-      try {
-        return bos.toString(IOUtils.UTF_8);
-      } catch (UnsupportedEncodingException bogus) {
-        throw new RuntimeException(bogus);
-      }
-    }
-  }
-
   final Outputs<BytesRef> fstOutputs = ByteSequenceOutputs.getSingleton();
   final BytesRef NO_OUTPUT = fstOutputs.getNoOutput();
 
@@ -1310,7 +1140,7 @@ public class BlockTreeTermsReader extend
     }
 
     // Iterates through terms in this field
-    private final class SegmentTermsEnum extends TermsEnum {
+    final class SegmentTermsEnum extends TermsEnum {
       private IndexInput in;
 
       private Frame[] stack;
@@ -2308,7 +2138,7 @@ public class BlockTreeTermsReader extend
 
       // Not static -- references term, postingsReader,
       // fieldInfo, in
-      private final class Frame {
+      final class Frame {
         // Our index in stack[]:
         final int ord;
 

Copied: lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java
(from r1594970, lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java?p2=lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java&p1=lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java&r1=1594970&r2=1594991&rev=1594991&view=diff
==============================================================================
--- lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java
(original)
+++ lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java
Thu May 15 18:01:53 2014
@@ -1,4 +1,4 @@
-package org.apache.lucene.codecs;
+package org.apache.lucene.codecs.blocktree;
 
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -21,6 +21,10 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 
+import org.apache.lucene.codecs.BlockTermState;
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.FieldsConsumer;
+import org.apache.lucene.codecs.PostingsWriterBase;
 import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfos;

Added: lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/blocktree/Stats.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/blocktree/Stats.java?rev=1594991&view=auto
==============================================================================
--- lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/blocktree/Stats.java
(added)
+++ lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/blocktree/Stats.java
Thu May 15 18:01:53 2014
@@ -0,0 +1,198 @@
+package org.apache.lucene.codecs.blocktree;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.ByteArrayOutputStream;
+import java.io.PrintStream;
+import java.io.UnsupportedEncodingException;
+import java.util.Locale;
+
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+
+/**
+ * BlockTree statistics for a single field 
+ * returned by {@link FieldReader#computeStats()}.
+ */
+public class Stats {
+  /** How many nodes in the index FST. */
+  public long indexNodeCount;
+
+  /** How many arcs in the index FST. */
+  public long indexArcCount;
+
+  /** Byte size of the index. */
+  public long indexNumBytes;
+
+  /** Total number of terms in the field. */
+  public long totalTermCount;
+
+  /** Total number of bytes (sum of term lengths) across all terms in the field. */
+  public long totalTermBytes;
+
+  /** The number of normal (non-floor) blocks in the terms file. */
+  public int nonFloorBlockCount;
+
+  /** The number of floor blocks (meta-blocks larger than the
+   *  allowed {@code maxItemsPerBlock}) in the terms file. */
+  public int floorBlockCount;
+    
+  /** The number of sub-blocks within the floor blocks. */
+  public int floorSubBlockCount;
+
+  /** The number of "internal" blocks (that have both
+   *  terms and sub-blocks). */
+  public int mixedBlockCount;
+
+  /** The number of "leaf" blocks (blocks that have only
+   *  terms). */
+  public int termsOnlyBlockCount;
+
+  /** The number of "internal" blocks that do not contain
+   *  terms (have only sub-blocks). */
+  public int subBlocksOnlyBlockCount;
+
+  /** Total number of blocks. */
+  public int totalBlockCount;
+
+  /** Number of blocks at each prefix depth. */
+  public int[] blockCountByPrefixLen = new int[10];
+  private int startBlockCount;
+  private int endBlockCount;
+
+  /** Total number of bytes used to store term suffixes. */
+  public long totalBlockSuffixBytes;
+
+  /** Total number of bytes used to store term stats (not
+   *  including what the {@link PostingsBaseFormat}
+   *  stores. */
+  public long totalBlockStatsBytes;
+
+  /** Total bytes stored by the {@link PostingsBaseFormat},
+   *  plus the other few vInts stored in the frame. */
+  public long totalBlockOtherBytes;
+
+  /** Segment name. */
+  public final String segment;
+
+  /** Field name. */
+  public final String field;
+
+  Stats(String segment, String field) {
+    this.segment = segment;
+    this.field = field;
+  }
+
+  void startBlock(BlockTreeTermsReader.FieldReader.SegmentTermsEnum.Frame frame, boolean
isFloor) {
+    totalBlockCount++;
+    if (isFloor) {
+      if (frame.fp == frame.fpOrig) {
+        floorBlockCount++;
+      }
+      floorSubBlockCount++;
+    } else {
+      nonFloorBlockCount++;
+    }
+
+    if (blockCountByPrefixLen.length <= frame.prefix) {
+      blockCountByPrefixLen = ArrayUtil.grow(blockCountByPrefixLen, 1+frame.prefix);
+    }
+    blockCountByPrefixLen[frame.prefix]++;
+    startBlockCount++;
+    totalBlockSuffixBytes += frame.suffixesReader.length();
+    totalBlockStatsBytes += frame.statsReader.length();
+  }
+
+  void endBlock(BlockTreeTermsReader.FieldReader.SegmentTermsEnum.Frame frame) {
+    final int termCount = frame.isLeafBlock ? frame.entCount : frame.state.termBlockOrd;
+    final int subBlockCount = frame.entCount - termCount;
+    totalTermCount += termCount;
+    if (termCount != 0 && subBlockCount != 0) {
+      mixedBlockCount++;
+    } else if (termCount != 0) {
+      termsOnlyBlockCount++;
+    } else if (subBlockCount != 0) {
+      subBlocksOnlyBlockCount++;
+    } else {
+      throw new IllegalStateException();
+    }
+    endBlockCount++;
+    final long otherBytes = frame.fpEnd - frame.fp - frame.suffixesReader.length() - frame.statsReader.length();
+    assert otherBytes > 0 : "otherBytes=" + otherBytes + " frame.fp=" + frame.fp + " frame.fpEnd="
+ frame.fpEnd;
+    totalBlockOtherBytes += otherBytes;
+  }
+
+  void term(BytesRef term) {
+    totalTermBytes += term.length;
+  }
+
+  void finish() {
+    assert startBlockCount == endBlockCount: "startBlockCount=" + startBlockCount + " endBlockCount="
+ endBlockCount;
+    assert totalBlockCount == floorSubBlockCount + nonFloorBlockCount: "floorSubBlockCount="
+ floorSubBlockCount + " nonFloorBlockCount=" + nonFloorBlockCount + " totalBlockCount=" +
totalBlockCount;
+    assert totalBlockCount == mixedBlockCount + termsOnlyBlockCount + subBlocksOnlyBlockCount:
"totalBlockCount=" + totalBlockCount + " mixedBlockCount=" + mixedBlockCount + " subBlocksOnlyBlockCount="
+ subBlocksOnlyBlockCount + " termsOnlyBlockCount=" + termsOnlyBlockCount;
+  }
+
+  @Override
+  public String toString() {
+    final ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
+    PrintStream out;
+    try {
+      out = new PrintStream(bos, false, IOUtils.UTF_8);
+    } catch (UnsupportedEncodingException bogus) {
+      throw new RuntimeException(bogus);
+    }
+      
+    out.println("  index FST:");
+    out.println("    " + indexNodeCount + " nodes");
+    out.println("    " + indexArcCount + " arcs");
+    out.println("    " + indexNumBytes + " bytes");
+    out.println("  terms:");
+    out.println("    " + totalTermCount + " terms");
+    out.println("    " + totalTermBytes + " bytes" + (totalTermCount != 0 ? " (" + String.format(Locale.ROOT,
"%.1f", ((double) totalTermBytes)/totalTermCount) + " bytes/term)" : ""));
+    out.println("  blocks:");
+    out.println("    " + totalBlockCount + " blocks");
+    out.println("    " + termsOnlyBlockCount + " terms-only blocks");
+    out.println("    " + subBlocksOnlyBlockCount + " sub-block-only blocks");
+    out.println("    " + mixedBlockCount + " mixed blocks");
+    out.println("    " + floorBlockCount + " floor blocks");
+    out.println("    " + (totalBlockCount-floorSubBlockCount) + " non-floor blocks");
+    out.println("    " + floorSubBlockCount + " floor sub-blocks");
+    out.println("    " + totalBlockSuffixBytes + " term suffix bytes" + (totalBlockCount
!= 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockSuffixBytes)/totalBlockCount)
+ " suffix-bytes/block)" : ""));
+    out.println("    " + totalBlockStatsBytes + " term stats bytes" + (totalBlockCount !=
0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockStatsBytes)/totalBlockCount)
+ " stats-bytes/block)" : ""));
+    out.println("    " + totalBlockOtherBytes + " other bytes" + (totalBlockCount != 0 ?
" (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockOtherBytes)/totalBlockCount)
+ " other-bytes/block)" : ""));
+    if (totalBlockCount != 0) {
+      out.println("    by prefix length:");
+      int total = 0;
+      for(int prefix=0;prefix<blockCountByPrefixLen.length;prefix++) {
+        final int blockCount = blockCountByPrefixLen[prefix];
+        total += blockCount;
+        if (blockCount != 0) {
+          out.println("      " + String.format(Locale.ROOT, "%2d", prefix) + ": " + blockCount);
+        }
+      }
+      assert totalBlockCount == total;
+    }
+
+    try {
+      return bos.toString(IOUtils.UTF_8);
+    } catch (UnsupportedEncodingException bogus) {
+      throw new RuntimeException(bogus);
+    }
+  }
+}

Modified: lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java?rev=1594991&r1=1594990&r2=1594991&view=diff
==============================================================================
--- lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java
(original)
+++ lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java
Thu May 15 18:01:53 2014
@@ -19,14 +19,14 @@ package org.apache.lucene.codecs.lucene4
 
 import java.io.IOException;
 
-import org.apache.lucene.codecs.BlockTreeTermsReader;
-import org.apache.lucene.codecs.BlockTreeTermsWriter;
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.FieldsConsumer;
 import org.apache.lucene.codecs.FieldsProducer;
 import org.apache.lucene.codecs.PostingsFormat;
 import org.apache.lucene.codecs.PostingsReaderBase;
 import org.apache.lucene.codecs.PostingsWriterBase; // javadocs
+import org.apache.lucene.codecs.blocktree.BlockTreeTermsReader;
+import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter;
 import org.apache.lucene.index.DocsEnum; // javadocs
 import org.apache.lucene.index.FieldInfo.IndexOptions; // javadocs
 import org.apache.lucene.index.FieldInfos; // javadocs

Modified: lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java?rev=1594991&r1=1594990&r2=1594991&view=diff
==============================================================================
--- lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java
(original)
+++ lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java
Thu May 15 18:01:53 2014
@@ -20,8 +20,6 @@ package org.apache.lucene.codecs.lucene4
 
 import java.io.IOException;
 
-import org.apache.lucene.codecs.BlockTreeTermsReader;
-import org.apache.lucene.codecs.BlockTreeTermsWriter;
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.FieldsConsumer;
 import org.apache.lucene.codecs.FieldsProducer;
@@ -29,6 +27,8 @@ import org.apache.lucene.codecs.MultiLev
 import org.apache.lucene.codecs.PostingsFormat;
 import org.apache.lucene.codecs.PostingsReaderBase;
 import org.apache.lucene.codecs.PostingsWriterBase;
+import org.apache.lucene.codecs.blocktree.BlockTreeTermsReader;
+import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter;
 import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.index.SegmentReadState;

Modified: lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java?rev=1594991&r1=1594990&r2=1594991&view=diff
==============================================================================
--- lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
(original)
+++ lucene/dev/branches/lucene5675/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
Thu May 15 18:01:53 2014
@@ -28,9 +28,10 @@ import java.util.List;
 import java.util.Locale;
 import java.util.Map;
 
-import org.apache.lucene.codecs.BlockTreeTermsReader;
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.codecs.PostingsFormat;
+import org.apache.lucene.codecs.blocktree.BlockTreeTermsReader;
+import org.apache.lucene.codecs.blocktree.Stats;
 import org.apache.lucene.index.CheckIndex.Status.DocValuesStatus;
 import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.search.DocIdSetIterator;
@@ -45,6 +46,7 @@ import org.apache.lucene.util.FixedBitSe
 import org.apache.lucene.util.LongBitSet;
 import org.apache.lucene.util.StringHelper;
 
+
 /**
  * Basic tool and API to check the health of an index and
  * write a new segments file that removes reference to
@@ -237,7 +239,7 @@ public class CheckIndex {
        *  tree terms dictionary (this is only set if the
        *  {@link PostingsFormat} for this segment uses block
        *  tree. */
-      public Map<String,BlockTreeTermsReader.Stats> blockTreeStats = null;
+      public Map<String,Stats> blockTreeStats = null;
     }
 
     /**
@@ -1116,7 +1118,7 @@ public class CheckIndex {
         
       } else {
         if (fieldTerms instanceof BlockTreeTermsReader.FieldReader) {
-          final BlockTreeTermsReader.Stats stats = ((BlockTreeTermsReader.FieldReader) fieldTerms).computeStats();
+          final Stats stats = ((BlockTreeTermsReader.FieldReader) fieldTerms).computeStats();
           assert stats != null;
           if (status.blockTreeStats == null) {
             status.blockTreeStats = new HashMap<>();
@@ -1249,7 +1251,7 @@ public class CheckIndex {
     }
     
     if (verbose && status.blockTreeStats != null && infoStream != null &&
status.termCount > 0) {
-      for(Map.Entry<String,BlockTreeTermsReader.Stats> ent : status.blockTreeStats.entrySet())
{
+      for(Map.Entry<String,Stats> ent : status.blockTreeStats.entrySet()) {
         infoStream.println("      field \"" + ent.getKey() + "\":");
         infoStream.println("      " + ent.getValue().toString().replace("\n", "\n      "));
       }

Modified: lucene/dev/branches/lucene5675/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5675/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWPostingsFormat.java?rev=1594991&r1=1594990&r2=1594991&view=diff
==============================================================================
--- lucene/dev/branches/lucene5675/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWPostingsFormat.java
(original)
+++ lucene/dev/branches/lucene5675/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWPostingsFormat.java
Thu May 15 18:01:53 2014
@@ -19,9 +19,9 @@ package org.apache.lucene.codecs.lucene4
 
 import java.io.IOException;
 
-import org.apache.lucene.codecs.BlockTreeTermsWriter;
 import org.apache.lucene.codecs.FieldsConsumer;
 import org.apache.lucene.codecs.PostingsWriterBase;
+import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter;
 import org.apache.lucene.index.SegmentWriteState;
 import org.apache.lucene.util.LuceneTestCase;
 

Modified: lucene/dev/branches/lucene5675/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5675/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java?rev=1594991&r1=1594990&r2=1594991&view=diff
==============================================================================
--- lucene/dev/branches/lucene5675/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java
(original)
+++ lucene/dev/branches/lucene5675/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java
Thu May 15 18:01:53 2014
@@ -20,8 +20,6 @@ package org.apache.lucene.codecs.mockran
 import java.io.IOException;
 import java.util.Random;
 
-import org.apache.lucene.codecs.BlockTreeTermsReader;
-import org.apache.lucene.codecs.BlockTreeTermsWriter;
 import org.apache.lucene.codecs.FieldsConsumer;
 import org.apache.lucene.codecs.FieldsProducer;
 import org.apache.lucene.codecs.PostingsFormat;
@@ -36,6 +34,8 @@ import org.apache.lucene.codecs.blockter
 import org.apache.lucene.codecs.blockterms.TermsIndexWriterBase;
 import org.apache.lucene.codecs.blockterms.VariableGapTermsIndexReader;
 import org.apache.lucene.codecs.blockterms.VariableGapTermsIndexWriter;
+import org.apache.lucene.codecs.blocktree.BlockTreeTermsReader;
+import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter;
 import org.apache.lucene.codecs.lucene41.Lucene41PostingsReader;
 import org.apache.lucene.codecs.lucene41.Lucene41PostingsWriter;
 import org.apache.lucene.codecs.memory.FSTOrdTermsReader;

Modified: lucene/dev/branches/lucene5675/lucene/test-framework/src/java/org/apache/lucene/codecs/nestedpulsing/NestedPulsingPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5675/lucene/test-framework/src/java/org/apache/lucene/codecs/nestedpulsing/NestedPulsingPostingsFormat.java?rev=1594991&r1=1594990&r2=1594991&view=diff
==============================================================================
--- lucene/dev/branches/lucene5675/lucene/test-framework/src/java/org/apache/lucene/codecs/nestedpulsing/NestedPulsingPostingsFormat.java
(original)
+++ lucene/dev/branches/lucene5675/lucene/test-framework/src/java/org/apache/lucene/codecs/nestedpulsing/NestedPulsingPostingsFormat.java
Thu May 15 18:01:53 2014
@@ -19,13 +19,13 @@ package org.apache.lucene.codecs.nestedp
 
 import java.io.IOException;
 
-import org.apache.lucene.codecs.BlockTreeTermsReader;
-import org.apache.lucene.codecs.BlockTreeTermsWriter;
 import org.apache.lucene.codecs.FieldsConsumer;
 import org.apache.lucene.codecs.FieldsProducer;
 import org.apache.lucene.codecs.PostingsFormat;
 import org.apache.lucene.codecs.PostingsReaderBase;
 import org.apache.lucene.codecs.PostingsWriterBase;
+import org.apache.lucene.codecs.blocktree.BlockTreeTermsReader;
+import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter;
 import org.apache.lucene.codecs.lucene41.Lucene41PostingsReader;
 import org.apache.lucene.codecs.lucene41.Lucene41PostingsWriter;
 import org.apache.lucene.codecs.pulsing.PulsingPostingsReader;



Mime
View raw message