incubator-blur-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From amccu...@apache.org
Subject [12/13] git commit: Removing thread local variables.
Date Tue, 30 Aug 2016 01:57:57 GMT
http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/991fb043/blur-store/src/main/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
----------------------------------------------------------------------
diff --git a/blur-store/src/main/java/org/apache/lucene/codecs/BlockTreeTermsReader.java b/blur-store/src/main/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
new file mode 100644
index 0000000..4ef65ff
--- /dev/null
+++ b/blur-store/src/main/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
@@ -0,0 +1,2939 @@
+package org.apache.lucene.codecs;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.io.UnsupportedEncodingException;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.Locale;
+import java.util.TreeMap;
+
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.TermState;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.StringHelper;
+import org.apache.lucene.util.automaton.CompiledAutomaton;
+import org.apache.lucene.util.automaton.RunAutomaton;
+import org.apache.lucene.util.automaton.Transition;
+import org.apache.lucene.util.fst.ByteSequenceOutputs;
+import org.apache.lucene.util.fst.FST;
+import org.apache.lucene.util.fst.Outputs;
+import org.apache.lucene.util.fst.Util;
+
+/** A block-based terms index and dictionary that assigns
+ *  terms to variable length blocks according to how they
+ *  share prefixes.  The terms index is a prefix trie
+ *  whose leaves are term blocks.  The advantage of this
+ *  approach is that seekExact is often able to
+ *  determine a term cannot exist without doing any IO, and
+ *  intersection with Automata is very fast.  Note that this
+ *  terms dictionary has it's own fixed terms index (ie, it
+ *  does not support a pluggable terms index
+ *  implementation).
+ *
+ *  <p><b>NOTE</b>: this terms dictionary does not support
+ *  index divisor when opening an IndexReader.  Instead, you
+ *  can change the min/maxItemsPerBlock during indexing.</p>
+ *
+ *  <p>The data structure used by this implementation is very
+ *  similar to a burst trie
+ *  (http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.18.3499),
+ *  but with added logic to break up too-large blocks of all
+ *  terms sharing a given prefix into smaller ones.</p>
+ *
+ *  <p>Use {@link org.apache.lucene.index.CheckIndex} with the <code>-verbose</code>
+ *  option to see summary statistics on the blocks in the
+ *  dictionary.
+ *
+ *  See {@link BlockTreeTermsWriter}.
+ *
+ * @lucene.experimental
+ */
+
+public class BlockTreeTermsReader extends FieldsProducer {
+
+  // Open input to the main terms dict file (_X.tib)
+  private final IndexInput in;
+
+  //private static final boolean DEBUG = BlockTreeTermsWriter.DEBUG;
+
+  // Reads the terms dict entries, to gather state to
+  // produce DocsEnum on demand
+  private final PostingsReaderBase postingsReader;
+
+  private final TreeMap<String,FieldReader> fields = new TreeMap<String,FieldReader>();
+
+  /** File offset where the directory starts in the terms file. */
+  private long dirOffset;
+
+  /** File offset where the directory starts in the index file. */
+  private long indexDirOffset;
+
+  private String segment;
+  
+  private final int version;
+
+  private final IndexInput indexIn;
+
+  /** Sole constructor. */
+  public BlockTreeTermsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo info,
+                              PostingsReaderBase postingsReader, IOContext ioContext,
+                              String segmentSuffix, int indexDivisor)
+    throws IOException {
+    
+    this.postingsReader = postingsReader;
+
+    this.segment = info.name;
+    in = dir.openInput(IndexFileNames.segmentFileName(segment, segmentSuffix, BlockTreeTermsWriter.TERMS_EXTENSION),
+                       ioContext);
+
+    boolean success = false;
+
+    try {
+      version = readHeader(in);
+      if (indexDivisor != -1) {
+        indexIn = dir.openInput(IndexFileNames.segmentFileName(segment, segmentSuffix, BlockTreeTermsWriter.TERMS_INDEX_EXTENSION),
+                                ioContext);
+        int indexVersion = readIndexHeader(indexIn);
+        if (indexVersion != version) {
+          throw new CorruptIndexException("mixmatched version files: " + in + "=" + version + "," + indexIn + "=" + indexVersion);
+        }
+      } else {
+        indexIn = null;
+      }
+
+      // Have PostingsReader init itself
+      postingsReader.init(in);
+
+      // Read per-field details
+      seekDir(in, dirOffset);
+      if (indexDivisor != -1) {
+        seekDir(indexIn, indexDirOffset);
+      }
+
+      final int numFields = in.readVInt();
+      if (numFields < 0) {
+        throw new CorruptIndexException("invalid numFields: " + numFields + " (resource=" + in + ")");
+      }
+
+      for(int i=0;i<numFields;i++) {
+        final int field = in.readVInt();
+        final long numTerms = in.readVLong();
+        assert numTerms >= 0;
+        final int numBytes = in.readVInt();
+        final BytesRef rootCode = new BytesRef(new byte[numBytes]);
+        in.readBytes(rootCode.bytes, 0, numBytes);
+        rootCode.length = numBytes;
+        final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
+        assert fieldInfo != null: "field=" + field;
+        final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY ? -1 : in.readVLong();
+        final long sumDocFreq = in.readVLong();
+        final int docCount = in.readVInt();
+        if (docCount < 0 || docCount > info.getDocCount()) { // #docs with field must be <= #docs
+          throw new CorruptIndexException("invalid docCount: " + docCount + " maxDoc: " + info.getDocCount() + " (resource=" + in + ")");
+        }
+        if (sumDocFreq < docCount) {  // #postings must be >= #docs with field
+          throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount + " (resource=" + in + ")");
+        }
+        if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
+          throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq + " (resource=" + in + ")");
+        }
+        final long indexStartFP = indexDivisor != -1 ? indexIn.readVLong() : 0;
+        FieldReader previous = fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount, indexStartFP, indexIn));
+        if (previous != null) {
+          throw new CorruptIndexException("duplicate field: " + fieldInfo.name + " (resource=" + in + ")");
+        }
+      }
+
+      success = true;
+    } finally {
+      if (!success) {
+        // this.close() will close in:
+        IOUtils.closeWhileHandlingException(this);
+      }
+    }
+  }
+
+  /** Reads terms file header. */
+  protected int readHeader(IndexInput input) throws IOException {
+    int version = CodecUtil.checkHeader(input, BlockTreeTermsWriter.TERMS_CODEC_NAME,
+                          BlockTreeTermsWriter.TERMS_VERSION_START,
+                          BlockTreeTermsWriter.TERMS_VERSION_CURRENT);
+    if (version < BlockTreeTermsWriter.TERMS_VERSION_APPEND_ONLY) {
+      dirOffset = input.readLong();
+    }
+    return version;
+  }
+
+  /** Reads index file header. */
+  protected int readIndexHeader(IndexInput input) throws IOException {
+    int version = CodecUtil.checkHeader(input, BlockTreeTermsWriter.TERMS_INDEX_CODEC_NAME,
+                          BlockTreeTermsWriter.TERMS_INDEX_VERSION_START,
+                          BlockTreeTermsWriter.TERMS_INDEX_VERSION_CURRENT);
+    if (version < BlockTreeTermsWriter.TERMS_INDEX_VERSION_APPEND_ONLY) {
+      indexDirOffset = input.readLong(); 
+    }
+    return version;
+  }
+
+  /** Seek {@code input} to the directory offset. */
+  protected void seekDir(IndexInput input, long dirOffset)
+      throws IOException {
+    if (version >= BlockTreeTermsWriter.TERMS_INDEX_VERSION_APPEND_ONLY) {
+      input.seek(input.length() - 8);
+      dirOffset = input.readLong();
+    }
+    input.seek(dirOffset);
+  }
+
+  // for debugging
+  // private static String toHex(int v) {
+  //   return "0x" + Integer.toHexString(v);
+  // }
+
+  @Override
+  public void close() throws IOException {
+    try {
+      IOUtils.close(indexIn, in, postingsReader);
+    } finally { 
+      // Clear so refs to terms index is GCable even if
+      // app hangs onto us:
+      fields.clear();
+    }
+  }
+
+  @Override
+  public Iterator<String> iterator() {
+    return Collections.unmodifiableSet(fields.keySet()).iterator();
+  }
+
+  @Override
+  public Terms terms(String field) throws IOException {
+    assert field != null;
+    return fields.get(field);
+  }
+
+  @Override
+  public int size() {
+    return fields.size();
+  }
+
+  // for debugging
+  String brToString(BytesRef b) {
+    if (b == null) {
+      return "null";
+    } else {
+      try {
+        return b.utf8ToString() + " " + b;
+      } catch (Throwable t) {
+        // If BytesRef isn't actually UTF8, or it's eg a
+        // prefix of UTF8 that ends mid-unicode-char, we
+        // fallback to hex:
+        return b.toString();
+      }
+    }
+  }
+
+  /**
+   * BlockTree statistics for a single field 
+   * returned by {@link FieldReader#computeStats()}.
+   */
+  public static class Stats {
+    /** How many nodes in the index FST. */
+    public long indexNodeCount;
+
+    /** How many arcs in the index FST. */
+    public long indexArcCount;
+
+    /** Byte size of the index. */
+    public long indexNumBytes;
+
+    /** Total number of terms in the field. */
+    public long totalTermCount;
+
+    /** Total number of bytes (sum of term lengths) across all terms in the field. */
+    public long totalTermBytes;
+
+    /** The number of normal (non-floor) blocks in the terms file. */
+    public int nonFloorBlockCount;
+
+    /** The number of floor blocks (meta-blocks larger than the
+     *  allowed {@code maxItemsPerBlock}) in the terms file. */
+    public int floorBlockCount;
+    
+    /** The number of sub-blocks within the floor blocks. */
+    public int floorSubBlockCount;
+
+    /** The number of "internal" blocks (that have both
+     *  terms and sub-blocks). */
+    public int mixedBlockCount;
+
+    /** The number of "leaf" blocks (blocks that have only
+     *  terms). */
+    public int termsOnlyBlockCount;
+
+    /** The number of "internal" blocks that do not contain
+     *  terms (have only sub-blocks). */
+    public int subBlocksOnlyBlockCount;
+
+    /** Total number of blocks. */
+    public int totalBlockCount;
+
+    /** Number of blocks at each prefix depth. */
+    public int[] blockCountByPrefixLen = new int[10];
+    private int startBlockCount;
+    private int endBlockCount;
+
+    /** Total number of bytes used to store term suffixes. */
+    public long totalBlockSuffixBytes;
+
+    /** Total number of bytes used to store term stats (not
+     *  including what the {@link PostingsBaseFormat}
+     *  stores. */
+    public long totalBlockStatsBytes;
+
+    /** Total bytes stored by the {@link PostingsBaseFormat},
+     *  plus the other few vInts stored in the frame. */
+    public long totalBlockOtherBytes;
+
+    /** Segment name. */
+    public final String segment;
+
+    /** Field name. */
+    public final String field;
+
+    Stats(String segment, String field) {
+      this.segment = segment;
+      this.field = field;
+    }
+
+    void startBlock(FieldReader.SegmentTermsEnum.Frame frame, boolean isFloor) {
+      totalBlockCount++;
+      if (isFloor) {
+        if (frame.fp == frame.fpOrig) {
+          floorBlockCount++;
+        }
+        floorSubBlockCount++;
+      } else {
+        nonFloorBlockCount++;
+      }
+
+      if (blockCountByPrefixLen.length <= frame.prefix) {
+        blockCountByPrefixLen = ArrayUtil.grow(blockCountByPrefixLen, 1+frame.prefix);
+      }
+      blockCountByPrefixLen[frame.prefix]++;
+      startBlockCount++;
+      totalBlockSuffixBytes += frame.suffixesReader.length();
+      totalBlockStatsBytes += frame.statsReader.length();
+    }
+
+    void endBlock(FieldReader.SegmentTermsEnum.Frame frame) {
+      final int termCount = frame.isLeafBlock ? frame.entCount : frame.state.termBlockOrd;
+      final int subBlockCount = frame.entCount - termCount;
+      totalTermCount += termCount;
+      if (termCount != 0 && subBlockCount != 0) {
+        mixedBlockCount++;
+      } else if (termCount != 0) {
+        termsOnlyBlockCount++;
+      } else if (subBlockCount != 0) {
+        subBlocksOnlyBlockCount++;
+      } else {
+        throw new IllegalStateException();
+      }
+      endBlockCount++;
+      final long otherBytes = frame.fpEnd - frame.fp - frame.suffixesReader.length() - frame.statsReader.length();
+      assert otherBytes > 0 : "otherBytes=" + otherBytes + " frame.fp=" + frame.fp + " frame.fpEnd=" + frame.fpEnd;
+      totalBlockOtherBytes += otherBytes;
+    }
+
+    void term(BytesRef term) {
+      totalTermBytes += term.length;
+    }
+
+    void finish() {
+      assert startBlockCount == endBlockCount: "startBlockCount=" + startBlockCount + " endBlockCount=" + endBlockCount;
+      assert totalBlockCount == floorSubBlockCount + nonFloorBlockCount: "floorSubBlockCount=" + floorSubBlockCount + " nonFloorBlockCount=" + nonFloorBlockCount + " totalBlockCount=" + totalBlockCount;
+      assert totalBlockCount == mixedBlockCount + termsOnlyBlockCount + subBlocksOnlyBlockCount: "totalBlockCount=" + totalBlockCount + " mixedBlockCount=" + mixedBlockCount + " subBlocksOnlyBlockCount=" + subBlocksOnlyBlockCount + " termsOnlyBlockCount=" + termsOnlyBlockCount;
+    }
+
+    @Override
+    public String toString() {
+      final ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
+      PrintStream out;
+      try {
+        out = new PrintStream(bos, false, "UTF-8");
+      } catch (UnsupportedEncodingException bogus) {
+        throw new RuntimeException(bogus);
+      }
+      
+      out.println("  index FST:");
+      out.println("    " + indexNodeCount + " nodes");
+      out.println("    " + indexArcCount + " arcs");
+      out.println("    " + indexNumBytes + " bytes");
+      out.println("  terms:");
+      out.println("    " + totalTermCount + " terms");
+      out.println("    " + totalTermBytes + " bytes" + (totalTermCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalTermBytes)/totalTermCount) + " bytes/term)" : ""));
+      out.println("  blocks:");
+      out.println("    " + totalBlockCount + " blocks");
+      out.println("    " + termsOnlyBlockCount + " terms-only blocks");
+      out.println("    " + subBlocksOnlyBlockCount + " sub-block-only blocks");
+      out.println("    " + mixedBlockCount + " mixed blocks");
+      out.println("    " + floorBlockCount + " floor blocks");
+      out.println("    " + (totalBlockCount-floorSubBlockCount) + " non-floor blocks");
+      out.println("    " + floorSubBlockCount + " floor sub-blocks");
+      out.println("    " + totalBlockSuffixBytes + " term suffix bytes" + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockSuffixBytes)/totalBlockCount) + " suffix-bytes/block)" : ""));
+      out.println("    " + totalBlockStatsBytes + " term stats bytes" + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockStatsBytes)/totalBlockCount) + " stats-bytes/block)" : ""));
+      out.println("    " + totalBlockOtherBytes + " other bytes" + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockOtherBytes)/totalBlockCount) + " other-bytes/block)" : ""));
+      if (totalBlockCount != 0) {
+        out.println("    by prefix length:");
+        int total = 0;
+        for(int prefix=0;prefix<blockCountByPrefixLen.length;prefix++) {
+          final int blockCount = blockCountByPrefixLen[prefix];
+          total += blockCount;
+          if (blockCount != 0) {
+            out.println("      " + String.format(Locale.ROOT, "%2d", prefix) + ": " + blockCount);
+          }
+        }
+        assert totalBlockCount == total;
+      }
+
+      try {
+        return bos.toString("UTF-8");
+      } catch (UnsupportedEncodingException bogus) {
+        throw new RuntimeException(bogus);
+      }
+    }
+  }
+
+  final Outputs<BytesRef> fstOutputs = ByteSequenceOutputs.getSingleton();
+  final BytesRef NO_OUTPUT = fstOutputs.getNoOutput();
+
+  /** BlockTree's implementation of {@link Terms}. */
+  public final class FieldReader extends Terms {
+    final long numTerms;
+    final FieldInfo fieldInfo;
+    final long sumTotalTermFreq;
+    final long sumDocFreq;
+    final int docCount;
+    final long indexStartFP;
+    final long rootBlockFP;
+    final BytesRef rootCode;
+    private final FST<BytesRef> index;
+
+    //private boolean DEBUG;
+
+    FieldReader(FieldInfo fieldInfo, long numTerms, BytesRef rootCode, long sumTotalTermFreq, long sumDocFreq, int docCount, long indexStartFP, IndexInput indexIn) throws IOException {
+      assert numTerms > 0;
+      this.fieldInfo = fieldInfo;
+      //DEBUG = BlockTreeTermsReader.DEBUG && fieldInfo.name.equals("id");
+      this.numTerms = numTerms;
+      this.sumTotalTermFreq = sumTotalTermFreq; 
+      this.sumDocFreq = sumDocFreq; 
+      this.docCount = docCount;
+      this.indexStartFP = indexStartFP;
+      this.rootCode = rootCode;
+      // if (DEBUG) {
+      //   System.out.println("BTTR: seg=" + segment + " field=" + fieldInfo.name + " rootBlockCode=" + rootCode + " divisor=" + indexDivisor);
+      // }
+
+      rootBlockFP = (new ByteArrayDataInput(rootCode.bytes, rootCode.offset, rootCode.length)).readVLong() >>> BlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS;
+
+      if (indexIn != null) {
+        final IndexInput clone = indexIn.clone();
+        //System.out.println("start=" + indexStartFP + " field=" + fieldInfo.name);
+        clone.seek(indexStartFP);
+        index = new FST<BytesRef>(clone, ByteSequenceOutputs.getSingleton());
+        
+        /*
+        if (false) {
+          final String dotFileName = segment + "_" + fieldInfo.name + ".dot";
+          Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName));
+          Util.toDot(index, w, false, false);
+          System.out.println("FST INDEX: SAVED to " + dotFileName);
+          w.close();
+        }
+        */
+      } else {
+        index = null;
+      }
+    }
+
+    /** For debugging -- used by CheckIndex too*/
+    // TODO: maybe push this into Terms?
+    public Stats computeStats() throws IOException {
+      return new SegmentTermsEnum().computeBlockStats();
+    }
+
+    @Override
+    public Comparator<BytesRef> getComparator() {
+      return BytesRef.getUTF8SortedAsUnicodeComparator();
+    }
+
+    @Override
+    public boolean hasOffsets() {
+      return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+    }
+
+    @Override
+    public boolean hasPositions() {
+      return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+    }
+    
+    @Override
+    public boolean hasPayloads() {
+      return fieldInfo.hasPayloads();
+    }
+
+    @Override
+    public TermsEnum iterator(TermsEnum reuse) throws IOException {
+      return new SegmentTermsEnum();
+    }
+
+    @Override
+    public long size() {
+      return numTerms;
+    }
+
+    @Override
+    public long getSumTotalTermFreq() {
+      return sumTotalTermFreq;
+    }
+
+    @Override
+    public long getSumDocFreq() {
+      return sumDocFreq;
+    }
+
+    @Override
+    public int getDocCount() {
+      return docCount;
+    }
+
+    @Override
+    public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
+      if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
+        throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
+      }
+      return new IntersectEnum(compiled, startTerm);
+    }
+    
+    // NOTE: cannot seek!
+    private final class IntersectEnum extends TermsEnum {
+      private final IndexInput in;
+
+      private Frame[] stack;
+      
+      @SuppressWarnings({"rawtypes","unchecked"}) private FST.Arc<BytesRef>[] arcs = new FST.Arc[5];
+
+      private final RunAutomaton runAutomaton;
+      private final CompiledAutomaton compiledAutomaton;
+
+      private Frame currentFrame;
+
+      private final BytesRef term = new BytesRef();
+
+      private final FST.BytesReader fstReader;
+
+      // TODO: can we share this with the frame in STE?
+      private final class Frame {
+        final int ord;
+        long fp;
+        long fpOrig;
+        long fpEnd;
+        long lastSubFP;
+
+        // State in automaton
+        int state;
+
+        int metaDataUpto;
+
+        byte[] suffixBytes = new byte[128];
+        final ByteArrayDataInput suffixesReader = new ByteArrayDataInput();
+
+        byte[] statBytes = new byte[64];
+        final ByteArrayDataInput statsReader = new ByteArrayDataInput();
+
+        byte[] floorData = new byte[32];
+        final ByteArrayDataInput floorDataReader = new ByteArrayDataInput();
+
+        // Length of prefix shared by all terms in this block
+        int prefix;
+
+        // Number of entries (term or sub-block) in this block
+        int entCount;
+
+        // Which term we will next read
+        int nextEnt;
+
+        // True if this block is either not a floor block,
+        // or, it's the last sub-block of a floor block
+        boolean isLastInFloor;
+
+        // True if all entries are terms
+        boolean isLeafBlock;
+
+        int numFollowFloorBlocks;
+        int nextFloorLabel;
+        
+        Transition[] transitions;
+        int curTransitionMax;
+        int transitionIndex;
+
+        FST.Arc<BytesRef> arc;
+
+        final BlockTermState termState;
+
+        // Cumulative output so far
+        BytesRef outputPrefix;
+
+        private int startBytePos;
+        private int suffix;
+
+        public Frame(int ord) throws IOException {
+          this.ord = ord;
+          termState = postingsReader.newTermState();
+          termState.totalTermFreq = -1;
+        }
+
+        void loadNextFloorBlock() throws IOException {
+          assert numFollowFloorBlocks > 0;
+          //if (DEBUG) System.out.println("    loadNextFoorBlock trans=" + transitions[transitionIndex]);
+
+          do {
+            fp = fpOrig + (floorDataReader.readVLong() >>> 1);
+            numFollowFloorBlocks--;
+            // if (DEBUG) System.out.println("    skip floor block2!  nextFloorLabel=" + (char) nextFloorLabel + " vs target=" + (char) transitions[transitionIndex].getMin() + " newFP=" + fp + " numFollowFloorBlocks=" + numFollowFloorBlocks);
+            if (numFollowFloorBlocks != 0) {
+              nextFloorLabel = floorDataReader.readByte() & 0xff;
+            } else {
+              nextFloorLabel = 256;
+            }
+            // if (DEBUG) System.out.println("    nextFloorLabel=" + (char) nextFloorLabel);
+          } while (numFollowFloorBlocks != 0 && nextFloorLabel <= transitions[transitionIndex].getMin());
+
+          load(null);
+        }
+
+        public void setState(int state) {
+          this.state = state;
+          transitionIndex = 0;
+          transitions = compiledAutomaton.sortedTransitions[state];
+          if (transitions.length != 0) {
+            curTransitionMax = transitions[0].getMax();
+          } else {
+            curTransitionMax = -1;
+          }
+        }
+
+        void load(BytesRef frameIndexData) throws IOException {
+
+          // if (DEBUG) System.out.println("    load fp=" + fp + " fpOrig=" + fpOrig + " frameIndexData=" + frameIndexData + " trans=" + (transitions.length != 0 ? transitions[0] : "n/a" + " state=" + state));
+
+          if (frameIndexData != null && transitions.length != 0) {
+            // Floor frame
+            if (floorData.length < frameIndexData.length) {
+              this.floorData = new byte[ArrayUtil.oversize(frameIndexData.length, 1)];
+            }
+            System.arraycopy(frameIndexData.bytes, frameIndexData.offset, floorData, 0, frameIndexData.length);
+            floorDataReader.reset(floorData, 0, frameIndexData.length);
+            // Skip first long -- has redundant fp, hasTerms
+            // flag, isFloor flag
+            final long code = floorDataReader.readVLong();
+            if ((code & BlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR) != 0) {
+              numFollowFloorBlocks = floorDataReader.readVInt();
+              nextFloorLabel = floorDataReader.readByte() & 0xff;
+              // if (DEBUG) System.out.println("    numFollowFloorBlocks=" + numFollowFloorBlocks + " nextFloorLabel=" + nextFloorLabel);
+
+              // If current state is accept, we must process
+              // first block in case it has empty suffix:
+              if (!runAutomaton.isAccept(state)) {
+                // Maybe skip floor blocks:
+                while (numFollowFloorBlocks != 0 && nextFloorLabel <= transitions[0].getMin()) {
+                  fp = fpOrig + (floorDataReader.readVLong() >>> 1);
+                  numFollowFloorBlocks--;
+                  // if (DEBUG) System.out.println("    skip floor block!  nextFloorLabel=" + (char) nextFloorLabel + " vs target=" + (char) transitions[0].getMin() + " newFP=" + fp + " numFollowFloorBlocks=" + numFollowFloorBlocks);
+                  if (numFollowFloorBlocks != 0) {
+                    nextFloorLabel = floorDataReader.readByte() & 0xff;
+                  } else {
+                    nextFloorLabel = 256;
+                  }
+                }
+              }
+            }
+          }
+
+          in.seek(fp);
+          int code = in.readVInt();
+          entCount = code >>> 1;
+          assert entCount > 0;
+          isLastInFloor = (code & 1) != 0;
+
+          // term suffixes:
+          code = in.readVInt();
+          isLeafBlock = (code & 1) != 0;
+          int numBytes = code >>> 1;
+          // if (DEBUG) System.out.println("      entCount=" + entCount + " lastInFloor?=" + isLastInFloor + " leafBlock?=" + isLeafBlock + " numSuffixBytes=" + numBytes);
+          if (suffixBytes.length < numBytes) {
+            suffixBytes = new byte[ArrayUtil.oversize(numBytes, 1)];
+          }
+          in.readBytes(suffixBytes, 0, numBytes);
+          suffixesReader.reset(suffixBytes, 0, numBytes);
+
+          // stats
+          numBytes = in.readVInt();
+          if (statBytes.length < numBytes) {
+            statBytes = new byte[ArrayUtil.oversize(numBytes, 1)];
+          }
+          in.readBytes(statBytes, 0, numBytes);
+          statsReader.reset(statBytes, 0, numBytes);
+          metaDataUpto = 0;
+
+          termState.termBlockOrd = 0;
+          nextEnt = 0;
+          
+          postingsReader.readTermsBlock(in, fieldInfo, termState);
+
+          if (!isLastInFloor) {
+            // Sub-blocks of a single floor block are always
+            // written one after another -- tail recurse:
+            fpEnd = in.getFilePointer();
+          }
+        }
+
+        // TODO: maybe add scanToLabel; should give perf boost
+
+        public boolean next() {
+          return isLeafBlock ? nextLeaf() : nextNonLeaf();
+        }
+
+        // Decodes next entry; returns true if it's a sub-block
+        public boolean nextLeaf() {
+          //if (DEBUG) System.out.println("  frame.next ord=" + ord + " nextEnt=" + nextEnt + " entCount=" + entCount);
+          assert nextEnt != -1 && nextEnt < entCount: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
+          nextEnt++;
+          suffix = suffixesReader.readVInt();
+          startBytePos = suffixesReader.getPosition();
+          suffixesReader.skipBytes(suffix);
+          return false;
+        }
+
+        public boolean nextNonLeaf() {
+          //if (DEBUG) System.out.println("  frame.next ord=" + ord + " nextEnt=" + nextEnt + " entCount=" + entCount);
+          assert nextEnt != -1 && nextEnt < entCount: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
+          nextEnt++;
+          final int code = suffixesReader.readVInt();
+          suffix = code >>> 1;
+          startBytePos = suffixesReader.getPosition();
+          suffixesReader.skipBytes(suffix);
+          if ((code & 1) == 0) {
+            // A normal term
+            termState.termBlockOrd++;
+            return false;
+          } else {
+            // A sub-block; make sub-FP absolute:
+            lastSubFP = fp - suffixesReader.readVLong();
+            return true;
+          }
+        }
+
+        public int getTermBlockOrd() {
+          return isLeafBlock ? nextEnt : termState.termBlockOrd;
+        }
+
+        public void decodeMetaData() throws IOException {
+
+          // lazily catch up on metadata decode:
+          final int limit = getTermBlockOrd();
+          assert limit > 0;
+
+          // We must set/incr state.termCount because
+          // postings impl can look at this
+          termState.termBlockOrd = metaDataUpto;
+      
+          // TODO: better API would be "jump straight to term=N"???
+          while (metaDataUpto < limit) {
+
+            // TODO: we could make "tiers" of metadata, ie,
+            // decode docFreq/totalTF but don't decode postings
+            // metadata; this way caller could get
+            // docFreq/totalTF w/o paying decode cost for
+            // postings
+
+            // TODO: if docFreq were bulk decoded we could
+            // just skipN here:
+            termState.docFreq = statsReader.readVInt();
+            //if (DEBUG) System.out.println("    dF=" + state.docFreq);
+            if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
+              termState.totalTermFreq = termState.docFreq + statsReader.readVLong();
+              //if (DEBUG) System.out.println("    totTF=" + state.totalTermFreq);
+            }
+
+            postingsReader.nextTerm(fieldInfo, termState);
+            metaDataUpto++;
+            termState.termBlockOrd++;
+          }
+        }
+      }
+
+      private BytesRef savedStartTerm;
+      
+      // TODO: in some cases we can filter by length?  eg
+      // regexp foo*bar must be at least length 6 bytes
+      public IntersectEnum(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
+        // if (DEBUG) {
+        //   System.out.println("\nintEnum.init seg=" + segment + " commonSuffix=" + brToString(compiled.commonSuffixRef));
+        // }
+        runAutomaton = compiled.runAutomaton;
+        compiledAutomaton = compiled;
+        in = BlockTreeTermsReader.this.in.clone();
+        stack = new Frame[5];
+        for(int idx=0;idx<stack.length;idx++) {
+          stack[idx] = new Frame(idx);
+        }
+        for(int arcIdx=0;arcIdx<arcs.length;arcIdx++) {
+          arcs[arcIdx] = new FST.Arc<BytesRef>();
+        }
+
+        if (index == null) {
+          fstReader = null;
+        } else {
+          fstReader = index.getBytesReader();
+        }
+
+        // TODO: if the automaton is "smallish" we really
+        // should use the terms index to seek at least to
+        // the initial term and likely to subsequent terms
+        // (or, maybe just fallback to ATE for such cases).
+        // Else the seek cost of loading the frames will be
+        // too costly.
+
+        final FST.Arc<BytesRef> arc = index.getFirstArc(arcs[0]);
+        // Empty string prefix must have an output in the index!
+        assert arc.isFinal();
+
+        // Special pushFrame since it's the first one:
+        final Frame f = stack[0];
+        f.fp = f.fpOrig = rootBlockFP;
+        f.prefix = 0;
+        f.setState(runAutomaton.getInitialState());
+        f.arc = arc;
+        f.outputPrefix = arc.output;
+        f.load(rootCode);
+
+        // for assert:
+        assert setSavedStartTerm(startTerm);
+
+        currentFrame = f;
+        if (startTerm != null) {
+          seekToStartTerm(startTerm);
+        }
+      }
+
+      // only for assert:
+      private boolean setSavedStartTerm(BytesRef startTerm) {
+        savedStartTerm = startTerm == null ? null : BytesRef.deepCopyOf(startTerm);
+        return true;
+      }
+
+      @Override
+      public TermState termState() throws IOException {
+        currentFrame.decodeMetaData();
+        return currentFrame.termState.clone();
+      }
+
+      private Frame getFrame(int ord) throws IOException {
+        if (ord >= stack.length) {
+          final Frame[] next = new Frame[ArrayUtil.oversize(1+ord, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
+          System.arraycopy(stack, 0, next, 0, stack.length);
+          for(int stackOrd=stack.length;stackOrd<next.length;stackOrd++) {
+            next[stackOrd] = new Frame(stackOrd);
+          }
+          stack = next;
+        }
+        assert stack[ord].ord == ord;
+        return stack[ord];
+      }
+
+      private FST.Arc<BytesRef> getArc(int ord) {
+        if (ord >= arcs.length) {
+          @SuppressWarnings({"rawtypes","unchecked"}) final FST.Arc<BytesRef>[] next =
+            new FST.Arc[ArrayUtil.oversize(1+ord, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
+          System.arraycopy(arcs, 0, next, 0, arcs.length);
+          for(int arcOrd=arcs.length;arcOrd<next.length;arcOrd++) {
+            next[arcOrd] = new FST.Arc<BytesRef>();
+          }
+          arcs = next;
+        }
+        return arcs[ord];
+      }
+
+      private Frame pushFrame(int state) throws IOException {
+        final Frame f = getFrame(currentFrame == null ? 0 : 1+currentFrame.ord);
+        
+        f.fp = f.fpOrig = currentFrame.lastSubFP;
+        f.prefix = currentFrame.prefix + currentFrame.suffix;
+        // if (DEBUG) System.out.println("    pushFrame state=" + state + " prefix=" + f.prefix);
+        f.setState(state);
+
+        // Walk the arc through the index -- we only
+        // "bother" with this so we can get the floor data
+        // from the index and skip floor blocks when
+        // possible:
+        FST.Arc<BytesRef> arc = currentFrame.arc;
+        int idx = currentFrame.prefix;
+        assert currentFrame.suffix > 0;
+        BytesRef output = currentFrame.outputPrefix;
+        while (idx < f.prefix) {
+          final int target = term.bytes[idx] & 0xff;
+          // TODO: we could be more efficient for the next()
+          // case by using current arc as starting point,
+          // passed to findTargetArc
+          arc = index.findTargetArc(target, arc, getArc(1+idx), fstReader);
+          assert arc != null;
+          output = fstOutputs.add(output, arc.output);
+          idx++;
+        }
+
+        f.arc = arc;
+        f.outputPrefix = output;
+        assert arc.isFinal();
+        f.load(fstOutputs.add(output, arc.nextFinalOutput));
+        return f;
+      }
+
+      @Override
+      public BytesRef term() {
+        return term;
+      }
+
+      @Override
+      public int docFreq() throws IOException {
+        //if (DEBUG) System.out.println("BTIR.docFreq");
+        currentFrame.decodeMetaData();
+        //if (DEBUG) System.out.println("  return " + currentFrame.termState.docFreq);
+        return currentFrame.termState.docFreq;
+      }
+
+      @Override
+      public long totalTermFreq() throws IOException {
+        currentFrame.decodeMetaData();
+        return currentFrame.termState.totalTermFreq;
+      }
+
+      @Override
+      public DocsEnum docs(Bits skipDocs, DocsEnum reuse, int flags) throws IOException {
+        currentFrame.decodeMetaData();
+        return postingsReader.docs(fieldInfo, currentFrame.termState, skipDocs, reuse, flags);
+      }
+
+      @Override
+      public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
+        if (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
+          // Positions were not indexed:
+          return null;
+        }
+
+        currentFrame.decodeMetaData();
+        return postingsReader.docsAndPositions(fieldInfo, currentFrame.termState, skipDocs, reuse, flags);
+      }
+
+      private int getState() {
+        int state = currentFrame.state;
+        for(int idx=0;idx<currentFrame.suffix;idx++) {
+          state = runAutomaton.step(state,  currentFrame.suffixBytes[currentFrame.startBytePos+idx] & 0xff);
+          assert state != -1;
+        }
+        return state;
+      }
+
+      // NOTE: specialized to only doing the first-time
+      // seek, but we could generalize it to allow
+      // arbitrary seekExact/Ceil.  Note that this is a
+      // seekFloor!
+      private void seekToStartTerm(BytesRef target) throws IOException {
+        //if (DEBUG) System.out.println("seek to startTerm=" + target.utf8ToString());
+        assert currentFrame.ord == 0;
+        if (term.length < target.length) {
+          term.bytes = ArrayUtil.grow(term.bytes, target.length);
+        }
+        FST.Arc<BytesRef> arc = arcs[0];
+        assert arc == currentFrame.arc;
+
+        for(int idx=0;idx<=target.length;idx++) {
+
+          while (true) {
+            final int savePos = currentFrame.suffixesReader.getPosition();
+            final int saveStartBytePos = currentFrame.startBytePos;
+            final int saveSuffix = currentFrame.suffix;
+            final long saveLastSubFP = currentFrame.lastSubFP;
+            final int saveTermBlockOrd = currentFrame.termState.termBlockOrd;
+
+            final boolean isSubBlock = currentFrame.next();
+
+            //if (DEBUG) System.out.println("    cycle ent=" + currentFrame.nextEnt + " (of " + currentFrame.entCount + ") prefix=" + currentFrame.prefix + " suffix=" + currentFrame.suffix + " isBlock=" + isSubBlock + " firstLabel=" + (currentFrame.suffix == 0 ? "" : (currentFrame.suffixBytes[currentFrame.startBytePos])&0xff));
+            term.length = currentFrame.prefix + currentFrame.suffix;
+            if (term.bytes.length < term.length) {
+              term.bytes = ArrayUtil.grow(term.bytes, term.length);
+            }
+            System.arraycopy(currentFrame.suffixBytes, currentFrame.startBytePos, term.bytes, currentFrame.prefix, currentFrame.suffix);
+
+            if (isSubBlock && StringHelper.startsWith(target, term)) {
+              // Recurse
+              //if (DEBUG) System.out.println("      recurse!");
+              currentFrame = pushFrame(getState());
+              break;
+            } else {
+              final int cmp = term.compareTo(target);
+              if (cmp < 0) {
+                if (currentFrame.nextEnt == currentFrame.entCount) {
+                  if (!currentFrame.isLastInFloor) {
+                    //if (DEBUG) System.out.println("  load floorBlock");
+                    currentFrame.loadNextFloorBlock();
+                    continue;
+                  } else {
+                    //if (DEBUG) System.out.println("  return term=" + brToString(term));
+                    return;
+                  }
+                }
+                continue;
+              } else if (cmp == 0) {
+                //if (DEBUG) System.out.println("  return term=" + brToString(term));
+                return;
+              } else {
+                // Fallback to prior entry: the semantics of
+                // this method is that the first call to
+                // next() will return the term after the
+                // requested term
+                currentFrame.nextEnt--;
+                currentFrame.lastSubFP = saveLastSubFP;
+                currentFrame.startBytePos = saveStartBytePos;
+                currentFrame.suffix = saveSuffix;
+                currentFrame.suffixesReader.setPosition(savePos);
+                currentFrame.termState.termBlockOrd = saveTermBlockOrd;
+                System.arraycopy(currentFrame.suffixBytes, currentFrame.startBytePos, term.bytes, currentFrame.prefix, currentFrame.suffix);
+                term.length = currentFrame.prefix + currentFrame.suffix;
+                // If the last entry was a block we don't
+                // need to bother recursing and pushing to
+                // the last term under it because the first
+                // next() will simply skip the frame anyway
+                return;
+              }
+            }
+          }
+        }
+
+        assert false;
+      }
+
+      @Override
+      public BytesRef next() throws IOException {
+
+        // if (DEBUG) {
+        //   System.out.println("\nintEnum.next seg=" + segment);
+        //   System.out.println("  frame ord=" + currentFrame.ord + " prefix=" + brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" : currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" + currentFrame.outputPrefix);
+        // }
+
+        nextTerm:
+        while(true) {
+          // Pop finished frames
+          while (currentFrame.nextEnt == currentFrame.entCount) {
+            if (!currentFrame.isLastInFloor) {
+              //if (DEBUG) System.out.println("    next-floor-block");
+              currentFrame.loadNextFloorBlock();
+              //if (DEBUG) System.out.println("\n  frame ord=" + currentFrame.ord + " prefix=" + brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" : currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" + currentFrame.outputPrefix);
+            } else {
+              //if (DEBUG) System.out.println("  pop frame");
+              if (currentFrame.ord == 0) {
+                return null;
+              }
+              final long lastFP = currentFrame.fpOrig;
+              currentFrame = stack[currentFrame.ord-1];
+              assert currentFrame.lastSubFP == lastFP;
+              //if (DEBUG) System.out.println("\n  frame ord=" + currentFrame.ord + " prefix=" + brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" : currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" + currentFrame.outputPrefix);
+            }
+          }
+
+          final boolean isSubBlock = currentFrame.next();
+          // if (DEBUG) {
+          //   final BytesRef suffixRef = new BytesRef();
+          //   suffixRef.bytes = currentFrame.suffixBytes;
+          //   suffixRef.offset = currentFrame.startBytePos;
+          //   suffixRef.length = currentFrame.suffix;
+          //   System.out.println("    " + (isSubBlock ? "sub-block" : "term") + " " + currentFrame.nextEnt + " (of " + currentFrame.entCount + ") suffix=" + brToString(suffixRef));
+          // }
+
+          if (currentFrame.suffix != 0) {
+            final int label = currentFrame.suffixBytes[currentFrame.startBytePos] & 0xff;
+            while (label > currentFrame.curTransitionMax) {
+              if (currentFrame.transitionIndex >= currentFrame.transitions.length-1) {
+                // Stop processing this frame -- no further
+                // matches are possible because we've moved
+                // beyond what the max transition will allow
+                //if (DEBUG) System.out.println("      break: trans=" + (currentFrame.transitions.length == 0 ? "n/a" : currentFrame.transitions[currentFrame.transitionIndex]));
+
+                // sneaky!  forces a pop above
+                currentFrame.isLastInFloor = true;
+                currentFrame.nextEnt = currentFrame.entCount;
+                continue nextTerm;
+              }
+              currentFrame.transitionIndex++;
+              currentFrame.curTransitionMax = currentFrame.transitions[currentFrame.transitionIndex].getMax();
+              //if (DEBUG) System.out.println("      next trans=" + currentFrame.transitions[currentFrame.transitionIndex]);
+            }
+          }
+
+          // First test the common suffix, if set:
+          if (compiledAutomaton.commonSuffixRef != null && !isSubBlock) {
+            final int termLen = currentFrame.prefix + currentFrame.suffix;
+            if (termLen < compiledAutomaton.commonSuffixRef.length) {
+              // No match
+              // if (DEBUG) {
+              //   System.out.println("      skip: common suffix length");
+              // }
+              continue nextTerm;
+            }
+
+            final byte[] suffixBytes = currentFrame.suffixBytes;
+            final byte[] commonSuffixBytes = compiledAutomaton.commonSuffixRef.bytes;
+
+            final int lenInPrefix = compiledAutomaton.commonSuffixRef.length - currentFrame.suffix;
+            assert compiledAutomaton.commonSuffixRef.offset == 0;
+            int suffixBytesPos;
+            int commonSuffixBytesPos = 0;
+
+            if (lenInPrefix > 0) {
+              // A prefix of the common suffix overlaps with
+              // the suffix of the block prefix so we first
+              // test whether the prefix part matches:
+              final byte[] termBytes = term.bytes;
+              int termBytesPos = currentFrame.prefix - lenInPrefix;
+              assert termBytesPos >= 0;
+              final int termBytesPosEnd = currentFrame.prefix;
+              while (termBytesPos < termBytesPosEnd) {
+                if (termBytes[termBytesPos++] != commonSuffixBytes[commonSuffixBytesPos++]) {
+                  // if (DEBUG) {
+                  //   System.out.println("      skip: common suffix mismatch (in prefix)");
+                  // }
+                  continue nextTerm;
+                }
+              }
+              suffixBytesPos = currentFrame.startBytePos;
+            } else {
+              suffixBytesPos = currentFrame.startBytePos + currentFrame.suffix - compiledAutomaton.commonSuffixRef.length;
+            }
+
+            // Test overlapping suffix part:
+            final int commonSuffixBytesPosEnd = compiledAutomaton.commonSuffixRef.length;
+            while (commonSuffixBytesPos < commonSuffixBytesPosEnd) {
+              if (suffixBytes[suffixBytesPos++] != commonSuffixBytes[commonSuffixBytesPos++]) {
+                // if (DEBUG) {
+                //   System.out.println("      skip: common suffix mismatch");
+                // }
+                continue nextTerm;
+              }
+            }
+          }
+
+          // TODO: maybe we should do the same linear test
+          // that AutomatonTermsEnum does, so that if we
+          // reach a part of the automaton where .* is
+          // "temporarily" accepted, we just blindly .next()
+          // until the limit
+
+          // See if the term prefix matches the automaton:
+          int state = currentFrame.state;
+          for (int idx=0;idx<currentFrame.suffix;idx++) {
+            state = runAutomaton.step(state,  currentFrame.suffixBytes[currentFrame.startBytePos+idx] & 0xff);
+            if (state == -1) {
+              // No match
+              //System.out.println("    no s=" + state);
+              continue nextTerm;
+            } else {
+              //System.out.println("    c s=" + state);
+            }
+          }
+
+          if (isSubBlock) {
+            // Match!  Recurse:
+            //if (DEBUG) System.out.println("      sub-block match to state=" + state + "; recurse fp=" + currentFrame.lastSubFP);
+            copyTerm();
+            currentFrame = pushFrame(state);
+            //if (DEBUG) System.out.println("\n  frame ord=" + currentFrame.ord + " prefix=" + brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" : currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" + currentFrame.outputPrefix);
+          } else if (runAutomaton.isAccept(state)) {
+            copyTerm();
+            //if (DEBUG) System.out.println("      term match to state=" + state + "; return term=" + brToString(term));
+            assert savedStartTerm == null || term.compareTo(savedStartTerm) > 0: "saveStartTerm=" + savedStartTerm.utf8ToString() + " term=" + term.utf8ToString();
+            return term;
+          } else {
+            //System.out.println("    no s=" + state);
+          }
+        }
+      }
+
+      private void copyTerm() {
+        //System.out.println("      copyTerm cur.prefix=" + currentFrame.prefix + " cur.suffix=" + currentFrame.suffix + " first=" + (char) currentFrame.suffixBytes[currentFrame.startBytePos]);
+        final int len = currentFrame.prefix + currentFrame.suffix;
+        if (term.bytes.length < len) {
+          term.bytes = ArrayUtil.grow(term.bytes, len);
+        }
+        System.arraycopy(currentFrame.suffixBytes, currentFrame.startBytePos, term.bytes, currentFrame.prefix, currentFrame.suffix);
+        term.length = len;
+      }
+
+      @Override
+      public Comparator<BytesRef> getComparator() {
+        return BytesRef.getUTF8SortedAsUnicodeComparator();
+      }
+
+      @Override
+      public boolean seekExact(BytesRef text, boolean useCache) {
+        throw new UnsupportedOperationException();
+      }
+
+      @Override
+      public void seekExact(long ord) {
+        throw new UnsupportedOperationException();
+      }
+
+      @Override
+      public long ord() {
+        throw new UnsupportedOperationException();
+      }
+
+      @Override
+      public SeekStatus seekCeil(BytesRef text, boolean useCache) {
+        throw new UnsupportedOperationException();
+      }
+    }
+
+    // Iterates through terms in this field
+    private final class SegmentTermsEnum extends TermsEnum {
+      private IndexInput in;
+
+      private Frame[] stack;
+      private final Frame staticFrame;
+      private Frame currentFrame;
+      private boolean termExists;
+
+      private int targetBeforeCurrentLength;
+
+      private final ByteArrayDataInput scratchReader = new ByteArrayDataInput();
+
+      // What prefix of the current term was present in the index:
+      private int validIndexPrefix;
+
+      // assert only:
+      private boolean eof;
+
+      final BytesRef term = new BytesRef();
+      private final FST.BytesReader fstReader;
+
+      @SuppressWarnings({"rawtypes","unchecked"}) private FST.Arc<BytesRef>[] arcs =
+          new FST.Arc[1];
+
+      public SegmentTermsEnum() throws IOException {
+        //if (DEBUG) System.out.println("BTTR.init seg=" + segment);
+        stack = new Frame[0];
+        
+        // Used to hold seek by TermState, or cached seek
+        staticFrame = new Frame(-1);
+
+        if (index == null) {
+          fstReader = null;
+        } else {
+          fstReader = index.getBytesReader();
+        }
+
+        // Init w/ root block; don't use index since it may
+        // not (and need not) have been loaded
+        for(int arcIdx=0;arcIdx<arcs.length;arcIdx++) {
+          arcs[arcIdx] = new FST.Arc<BytesRef>();
+        }
+
+        currentFrame = staticFrame;
+        final FST.Arc<BytesRef> arc;
+        if (index != null) {
+          arc = index.getFirstArc(arcs[0]);
+          // Empty string prefix must have an output in the index!
+          assert arc.isFinal();
+        } else {
+          arc = null;
+        }
+        currentFrame = staticFrame;
+        //currentFrame = pushFrame(arc, rootCode, 0);
+        //currentFrame.loadBlock();
+        validIndexPrefix = 0;
+        // if (DEBUG) {
+        //   System.out.println("init frame state " + currentFrame.ord);
+        //   printSeekState();
+        // }
+
+        //System.out.println();
+        // computeBlockStats().print(System.out);
+      }
+      
+      // Not private to avoid synthetic access$NNN methods
+      void initIndexInput() {
+        if (this.in == null) {
+          this.in = BlockTreeTermsReader.this.in.clone();
+        }
+      }
+
+      /** Runs next() through the entire terms dict,
+       *  computing aggregate statistics. */
+      public Stats computeBlockStats() throws IOException {
+
+        Stats stats = new Stats(segment, fieldInfo.name);
+        if (index != null) {
+          stats.indexNodeCount = index.getNodeCount();
+          stats.indexArcCount = index.getArcCount();
+          stats.indexNumBytes = index.sizeInBytes();
+        }
+        
+        currentFrame = staticFrame;
+        FST.Arc<BytesRef> arc;
+        if (index != null) {
+          arc = index.getFirstArc(arcs[0]);
+          // Empty string prefix must have an output in the index!
+          assert arc.isFinal();
+        } else {
+          arc = null;
+        }
+
+        // Empty string prefix must have an output in the
+        // index!
+        currentFrame = pushFrame(arc, rootCode, 0);
+        currentFrame.fpOrig = currentFrame.fp;
+        currentFrame.loadBlock();
+        validIndexPrefix = 0;
+
+        stats.startBlock(currentFrame, !currentFrame.isLastInFloor);
+
+        allTerms:
+        while (true) {
+
+          // Pop finished blocks
+          while (currentFrame.nextEnt == currentFrame.entCount) {
+            stats.endBlock(currentFrame);
+            if (!currentFrame.isLastInFloor) {
+              currentFrame.loadNextFloorBlock();
+              stats.startBlock(currentFrame, true);
+            } else {
+              if (currentFrame.ord == 0) {
+                break allTerms;
+              }
+              final long lastFP = currentFrame.fpOrig;
+              currentFrame = stack[currentFrame.ord-1];
+              assert lastFP == currentFrame.lastSubFP;
+              // if (DEBUG) {
+              //   System.out.println("  reset validIndexPrefix=" + validIndexPrefix);
+              // }
+            }
+          }
+
+          while(true) {
+            if (currentFrame.next()) {
+              // Push to new block:
+              currentFrame = pushFrame(null, currentFrame.lastSubFP, term.length);
+              currentFrame.fpOrig = currentFrame.fp;
+              // This is a "next" frame -- even if it's
+              // floor'd we must pretend it isn't so we don't
+              // try to scan to the right floor frame:
+              currentFrame.isFloor = false;
+              //currentFrame.hasTerms = true;
+              currentFrame.loadBlock();
+              stats.startBlock(currentFrame, !currentFrame.isLastInFloor);
+            } else {
+              stats.term(term);
+              break;
+            }
+          }
+        }
+
+        stats.finish();
+
+        // Put root frame back:
+        currentFrame = staticFrame;
+        if (index != null) {
+          arc = index.getFirstArc(arcs[0]);
+          // Empty string prefix must have an output in the index!
+          assert arc.isFinal();
+        } else {
+          arc = null;
+        }
+        currentFrame = pushFrame(arc, rootCode, 0);
+        currentFrame.rewind();
+        currentFrame.loadBlock();
+        validIndexPrefix = 0;
+        term.length = 0;
+
+        return stats;
+      }
+
+      private Frame getFrame(int ord) throws IOException {
+        if (ord >= stack.length) {
+          final Frame[] next = new Frame[ArrayUtil.oversize(1+ord, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
+          System.arraycopy(stack, 0, next, 0, stack.length);
+          for(int stackOrd=stack.length;stackOrd<next.length;stackOrd++) {
+            next[stackOrd] = new Frame(stackOrd);
+          }
+          stack = next;
+        }
+        assert stack[ord].ord == ord;
+        return stack[ord];
+      }
+
+      private FST.Arc<BytesRef> getArc(int ord) {
+        if (ord >= arcs.length) {
+          @SuppressWarnings({"rawtypes","unchecked"}) final FST.Arc<BytesRef>[] next =
+              new FST.Arc[ArrayUtil.oversize(1+ord, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
+          System.arraycopy(arcs, 0, next, 0, arcs.length);
+          for(int arcOrd=arcs.length;arcOrd<next.length;arcOrd++) {
+            next[arcOrd] = new FST.Arc<BytesRef>();
+          }
+          arcs = next;
+        }
+        return arcs[ord];
+      }
+
+      @Override
+      public Comparator<BytesRef> getComparator() {
+        return BytesRef.getUTF8SortedAsUnicodeComparator();
+      }
+
+      // Pushes a frame we seek'd to
+      Frame pushFrame(FST.Arc<BytesRef> arc, BytesRef frameData, int length) throws IOException {
+        scratchReader.reset(frameData.bytes, frameData.offset, frameData.length);
+        final long code = scratchReader.readVLong();
+        final long fpSeek = code >>> BlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS;
+        final Frame f = getFrame(1+currentFrame.ord);
+        f.hasTerms = (code & BlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS) != 0;
+        f.hasTermsOrig = f.hasTerms;
+        f.isFloor = (code & BlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR) != 0;
+        if (f.isFloor) {
+          f.setFloorData(scratchReader, frameData);
+        }
+        pushFrame(arc, fpSeek, length);
+
+        return f;
+      }
+
+      // Pushes next'd frame or seek'd frame; we later
+      // lazy-load the frame only when needed
+      Frame pushFrame(FST.Arc<BytesRef> arc, long fp, int length) throws IOException {
+        final Frame f = getFrame(1+currentFrame.ord);
+        f.arc = arc;
+        if (f.fpOrig == fp && f.nextEnt != -1) {
+          //if (DEBUG) System.out.println("      push reused frame ord=" + f.ord + " fp=" + f.fp + " isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" + f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" + term.length + " vs prefix=" + f.prefix);
+          if (f.prefix > targetBeforeCurrentLength) {
+            f.rewind();
+          } else {
+            // if (DEBUG) {
+            //   System.out.println("        skip rewind!");
+            // }
+          }
+          assert length == f.prefix;
+        } else {
+          f.nextEnt = -1;
+          f.prefix = length;
+          f.state.termBlockOrd = 0;
+          f.fpOrig = f.fp = fp;
+          f.lastSubFP = -1;
+          // if (DEBUG) {
+          //   final int sav = term.length;
+          //   term.length = length;
+          //   System.out.println("      push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" + f.hasTerms + " isFloor=" + f.isFloor + " pref=" + brToString(term));
+          //   term.length = sav;
+          // }
+        }
+
+        return f;
+      }
+
+      // asserts only
+      private boolean clearEOF() {
+        eof = false;
+        return true;
+      }
+
+      // asserts only
+      private boolean setEOF() {
+        eof = true;
+        return true;
+      }
+
+      @Override
+      public boolean seekExact(final BytesRef target, final boolean useCache) throws IOException {
+
+        if (index == null) {
+          throw new IllegalStateException("terms index was not loaded");
+        }
+
+        if (term.bytes.length <= target.length) {
+          term.bytes = ArrayUtil.grow(term.bytes, 1+target.length);
+        }
+
+        assert clearEOF();
+
+        // if (DEBUG) {
+        //   System.out.println("\nBTTR.seekExact seg=" + segment + " target=" + fieldInfo.name + ":" + brToString(target) + " current=" + brToString(term) + " (exists?=" + termExists + ") validIndexPrefix=" + validIndexPrefix);
+        //   printSeekState();
+        // }
+
+        FST.Arc<BytesRef> arc;
+        int targetUpto;
+        BytesRef output;
+
+        targetBeforeCurrentLength = currentFrame.ord;
+
+        if (currentFrame != staticFrame) {
+
+          // We are already seek'd; find the common
+          // prefix of new seek term vs current term and
+          // re-use the corresponding seek state.  For
+          // example, if app first seeks to foobar, then
+          // seeks to foobaz, we can re-use the seek state
+          // for the first 5 bytes.
+
+          // if (DEBUG) {
+          //   System.out.println("  re-use current seek state validIndexPrefix=" + validIndexPrefix);
+          // }
+
+          arc = arcs[0];
+          assert arc.isFinal();
+          output = arc.output;
+          targetUpto = 0;
+          
+          Frame lastFrame = stack[0];
+          assert validIndexPrefix <= term.length;
+
+          final int targetLimit = Math.min(target.length, validIndexPrefix);
+
+          int cmp = 0;
+
+          // TODO: reverse vLong byte order for better FST
+          // prefix output sharing
+
+          // First compare up to valid seek frames:
+          while (targetUpto < targetLimit) {
+            cmp = (term.bytes[targetUpto]&0xFF) - (target.bytes[target.offset + targetUpto]&0xFF);
+            // if (DEBUG) {
+            //   System.out.println("    cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")"   + " arc.output=" + arc.output + " output=" + output);
+            // }
+            if (cmp != 0) {
+              break;
+            }
+            arc = arcs[1+targetUpto];
+            //if (arc.label != (target.bytes[target.offset + targetUpto] & 0xFF)) {
+            //System.out.println("FAIL: arc.label=" + (char) arc.label + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF));
+            //}
+            assert arc.label == (target.bytes[target.offset + targetUpto] & 0xFF): "arc.label=" + (char) arc.label + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF);
+            if (arc.output != NO_OUTPUT) {
+              output = fstOutputs.add(output, arc.output);
+            }
+            if (arc.isFinal()) {
+              lastFrame = stack[1+lastFrame.ord];
+            }
+            targetUpto++;
+          }
+
+          if (cmp == 0) {
+            final int targetUptoMid = targetUpto;
+
+            // Second compare the rest of the term, but
+            // don't save arc/output/frame; we only do this
+            // to find out if the target term is before,
+            // equal or after the current term
+            final int targetLimit2 = Math.min(target.length, term.length);
+            while (targetUpto < targetLimit2) {
+              cmp = (term.bytes[targetUpto]&0xFF) - (target.bytes[target.offset + targetUpto]&0xFF);
+              // if (DEBUG) {
+              //   System.out.println("    cycle2 targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")");
+              // }
+              if (cmp != 0) {
+                break;
+              }
+              targetUpto++;
+            }
+
+            if (cmp == 0) {
+              cmp = term.length - target.length;
+            }
+            targetUpto = targetUptoMid;
+          }
+
+          if (cmp < 0) {
+            // Common case: target term is after current
+            // term, ie, app is seeking multiple terms
+            // in sorted order
+            // if (DEBUG) {
+            //   System.out.println("  target is after current (shares prefixLen=" + targetUpto + "); frame.ord=" + lastFrame.ord);
+            // }
+            currentFrame = lastFrame;
+
+          } else if (cmp > 0) {
+            // Uncommon case: target term
+            // is before current term; this means we can
+            // keep the currentFrame but we must rewind it
+            // (so we scan from the start)
+            targetBeforeCurrentLength = 0;
+            // if (DEBUG) {
+            //   System.out.println("  target is before current (shares prefixLen=" + targetUpto + "); rewind frame ord=" + lastFrame.ord);
+            // }
+            currentFrame = lastFrame;
+            currentFrame.rewind();
+          } else {
+            // Target is exactly the same as current term
+            assert term.length == target.length;
+            if (termExists) {
+              // if (DEBUG) {
+              //   System.out.println("  target is same as current; return true");
+              // }
+              return true;
+            } else {
+              // if (DEBUG) {
+              //   System.out.println("  target is same as current but term doesn't exist");
+              // }
+            }
+            //validIndexPrefix = currentFrame.depth;
+            //term.length = target.length;
+            //return termExists;
+          }
+
+        } else {
+
+          targetBeforeCurrentLength = -1;
+          arc = index.getFirstArc(arcs[0]);
+
+          // Empty string prefix must have an output (block) in the index!
+          assert arc.isFinal();
+          assert arc.output != null;
+
+          // if (DEBUG) {
+          //   System.out.println("    no seek state; push root frame");
+          // }
+
+          output = arc.output;
+
+          currentFrame = staticFrame;
+
+          //term.length = 0;
+          targetUpto = 0;
+          currentFrame = pushFrame(arc, fstOutputs.add(output, arc.nextFinalOutput), 0);
+        }
+
+        // if (DEBUG) {
+        //   System.out.println("  start index loop targetUpto=" + targetUpto + " output=" + output + " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" + targetBeforeCurrentLength);
+        // }
+
+        while (targetUpto < target.length) {
+
+          final int targetLabel = target.bytes[target.offset + targetUpto] & 0xFF;
+
+          final FST.Arc<BytesRef> nextArc = index.findTargetArc(targetLabel, arc, getArc(1+targetUpto), fstReader);
+
+          if (nextArc == null) {
+
+            // Index is exhausted
+            // if (DEBUG) {
+            //   System.out.println("    index: index exhausted label=" + ((char) targetLabel) + " " + toHex(targetLabel));
+            // }
+            
+            validIndexPrefix = currentFrame.prefix;
+            //validIndexPrefix = targetUpto;
+
+            currentFrame.scanToFloorFrame(target);
+
+            if (!currentFrame.hasTerms) {
+              termExists = false;
+              term.bytes[targetUpto] = (byte) targetLabel;
+              term.length = 1+targetUpto;
+              // if (DEBUG) {
+              //   System.out.println("  FAST NOT_FOUND term=" + brToString(term));
+              // }
+              return false;
+            }
+
+            currentFrame.loadBlock();
+
+            final SeekStatus result = currentFrame.scanToTerm(target, true);            
+            if (result == SeekStatus.FOUND) {
+              // if (DEBUG) {
+              //   System.out.println("  return FOUND term=" + term.utf8ToString() + " " + term);
+              // }
+              return true;
+            } else {
+              // if (DEBUG) {
+              //   System.out.println("  got " + result + "; return NOT_FOUND term=" + brToString(term));
+              // }
+              return false;
+            }
+          } else {
+            // Follow this arc
+            arc = nextArc;
+            term.bytes[targetUpto] = (byte) targetLabel;
+            // Aggregate output as we go:
+            assert arc.output != null;
+            if (arc.output != NO_OUTPUT) {
+              output = fstOutputs.add(output, arc.output);
+            }
+
+            // if (DEBUG) {
+            //   System.out.println("    index: follow label=" + toHex(target.bytes[target.offset + targetUpto]&0xff) + " arc.output=" + arc.output + " arc.nfo=" + arc.nextFinalOutput);
+            // }
+            targetUpto++;
+
+            if (arc.isFinal()) {
+              //if (DEBUG) System.out.println("    arc is final!");
+              currentFrame = pushFrame(arc, fstOutputs.add(output, arc.nextFinalOutput), targetUpto);
+              //if (DEBUG) System.out.println("    curFrame.ord=" + currentFrame.ord + " hasTerms=" + currentFrame.hasTerms);
+            }
+          }
+        }
+
+        //validIndexPrefix = targetUpto;
+        validIndexPrefix = currentFrame.prefix;
+
+        currentFrame.scanToFloorFrame(target);
+
+        // Target term is entirely contained in the index:
+        if (!currentFrame.hasTerms) {
+          termExists = false;
+          term.length = targetUpto;
+          // if (DEBUG) {
+          //   System.out.println("  FAST NOT_FOUND term=" + brToString(term));
+          // }
+          return false;
+        }
+
+        currentFrame.loadBlock();
+
+        final SeekStatus result = currentFrame.scanToTerm(target, true);            
+        if (result == SeekStatus.FOUND) {
+          // if (DEBUG) {
+          //   System.out.println("  return FOUND term=" + term.utf8ToString() + " " + term);
+          // }
+          return true;
+        } else {
+          // if (DEBUG) {
+          //   System.out.println("  got result " + result + "; return NOT_FOUND term=" + term.utf8ToString());
+          // }
+
+          return false;
+        }
+      }
+
+      @Override
+      public SeekStatus seekCeil(final BytesRef target, final boolean useCache) throws IOException {
+        if (index == null) {
+          throw new IllegalStateException("terms index was not loaded");
+        }
+   
+        if (term.bytes.length <= target.length) {
+          term.bytes = ArrayUtil.grow(term.bytes, 1+target.length);
+        }
+
+        assert clearEOF();
+
+        //if (DEBUG) {
+        //System.out.println("\nBTTR.seekCeil seg=" + segment + " target=" + fieldInfo.name + ":" + target.utf8ToString() + " " + target + " current=" + brToString(term) + " (exists?=" + termExists + ") validIndexPrefix=  " + validIndexPrefix);
+        //printSeekState();
+        //}
+
+        FST.Arc<BytesRef> arc;
+        int targetUpto;
+        BytesRef output;
+
+        targetBeforeCurrentLength = currentFrame.ord;
+
+        if (currentFrame != staticFrame) {
+
+          // We are already seek'd; find the common
+          // prefix of new seek term vs current term and
+          // re-use the corresponding seek state.  For
+          // example, if app first seeks to foobar, then
+          // seeks to foobaz, we can re-use the seek state
+          // for the first 5 bytes.
+
+          //if (DEBUG) {
+          //System.out.println("  re-use current seek state validIndexPrefix=" + validIndexPrefix);
+          //}
+
+          arc = arcs[0];
+          assert arc.isFinal();
+          output = arc.output;
+          targetUpto = 0;
+          
+          Frame lastFrame = stack[0];
+          assert validIndexPrefix <= term.length;
+
+          final int targetLimit = Math.min(target.length, validIndexPrefix);
+
+          int cmp = 0;
+
+          // TOOD: we should write our vLong backwards (MSB
+          // first) to get better sharing from the FST
+
+          // First compare up to valid seek frames:
+          while (targetUpto < targetLimit) {
+            cmp = (term.bytes[targetUpto]&0xFF) - (target.bytes[target.offset + targetUpto]&0xFF);
+            //if (DEBUG) {
+            //System.out.println("    cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")"   + " arc.output=" + arc.output + " output=" + output);
+            //}
+            if (cmp != 0) {
+              break;
+            }
+            arc = arcs[1+targetUpto];
+            assert arc.label == (target.bytes[target.offset + targetUpto] & 0xFF): "arc.label=" + (char) arc.label + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF);
+            // TOOD: we could save the outputs in local
+            // byte[][] instead of making new objs ever
+            // seek; but, often the FST doesn't have any
+            // shared bytes (but this could change if we
+            // reverse vLong byte order)
+            if (arc.output != NO_OUTPUT) {
+              output = fstOutputs.add(output, arc.output);
+            }
+            if (arc.isFinal()) {
+              lastFrame = stack[1+lastFrame.ord];
+            }
+            targetUpto++;
+          }
+
+
+          if (cmp == 0) {
+            final int targetUptoMid = targetUpto;
+            // Second compare the rest of the term, but
+            // don't save arc/output/frame:
+            final int targetLimit2 = Math.min(target.length, term.length);
+            while (targetUpto < targetLimit2) {
+              cmp = (term.bytes[targetUpto]&0xFF) - (target.bytes[target.offset + targetUpto]&0xFF);
+              //if (DEBUG) {
+              //System.out.println("    cycle2 targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")");
+              //}
+              if (cmp != 0) {
+                break;
+              }
+              targetUpto++;
+            }
+
+            if (cmp == 0) {
+              cmp = term.length - target.length;
+            }
+            targetUpto = targetUptoMid;
+          }
+
+          if (cmp < 0) {
+            // Common case: target term is after current
+            // term, ie, app is seeking multiple terms
+            // in sorted order
+            //if (DEBUG) {
+            //System.out.println("  target is after current (shares prefixLen=" + targetUpto + "); clear frame.scanned ord=" + lastFrame.ord);
+            //}
+            currentFrame = lastFrame;
+
+          } else if (cmp > 0) {
+            // Uncommon case: target term
+            // is before current term; this means we can
+            // keep the currentFrame but we must rewind it
+            // (so we scan from the start)
+            targetBeforeCurrentLength = 0;
+            //if (DEBUG) {
+            //System.out.println("  target is before current (shares prefixLen=" + targetUpto + "); rewind frame ord=" + lastFrame.ord);
+            //}
+            currentFrame = lastFrame;
+            currentFrame.rewind();
+          } else {
+            // Target is exactly the same as current term
+            assert term.length == target.length;
+            if (termExists) {
+              //if (DEBUG) {
+              //System.out.println("  target is same as current; return FOUND");
+              //}
+              return SeekStatus.FOUND;
+            } else {
+              //if (DEBUG) {
+              //System.out.println("  target is same as current but term doesn't exist");
+              //}
+            }
+          }
+
+        } else {
+
+          targetBeforeCurrentLength = -1;
+          arc = index.getFirstArc(arcs[0]);
+
+          // Empty string prefix must have an output (block) in the index!
+          assert arc.isFinal();
+          assert arc.output != null;
+
+          //if (DEBUG) {
+          //System.out.println("    no seek state; push root frame");
+          //}
+
+          output = arc.output;
+
+          currentFrame = staticFrame;
+
+          //term.length = 0;
+          targetUpto = 0;
+          currentFrame = pushFrame(arc, fstOutputs.add(output, arc.nextFinalOutput), 0);
+        }
+
+        //if (DEBUG) {
+        //System.out.println("  start index loop targetUpto=" + targetUpto + " output=" + output + " currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" + targetBeforeCurrentLength);
+        //}
+
+        while (targetUpto < target.length) {
+
+          final int targetLabel = target.bytes[target.offset + targetUpto] & 0xFF;
+
+          final FST.Arc<BytesRef> nextArc = index.findTargetArc(targetLabel, arc, getArc(1+targetUpto), fstReader);
+
+          if (nextArc == null) {
+
+            // Index is exhausted
+            // if (DEBUG) {
+            //   System.out.println("    index: index exhausted label=" + ((char) targetLabel) + " " + toHex(targetLabel));
+            // }
+            
+            validIndexPrefix = currentFrame.prefix;
+            //validIndexPrefix = targetUpto;
+
+            currentFrame.scanToFloorFrame(target);
+
+            currentFrame.loadBlock();
+
+            final SeekStatus result = currentFrame.scanToTerm(target, false);
+            if (result == SeekStatus.END) {
+              term.copyBytes(target);
+              termExists = false;
+
+              if (next() != null) {
+                //if (DEBUG) {
+                //System.out.println("  return NOT_FOUND term=" + brToString(term) + " " + term);
+                //}
+                return SeekStatus.NOT_FOUND;
+              } else {
+                //if (DEBUG) {
+                //System.out.println("  return END");
+                //}
+                return SeekStatus.END;
+              }
+            } else {
+              //if (DEBUG) {
+              //System.out.println("  return " + result + " term=" + brToString(term) + " " + term);
+              //}
+              return result;
+            }
+          } else {
+            // Follow this arc
+            term.bytes[targetUpto] = (byte) targetLabel;
+            arc = nextArc;
+            // Aggregate output as we go:
+            assert arc.output != null;
+            if (arc.output != NO_OUTPUT) {
+              output = fstOutputs.add(output, arc.output);
+            }
+
+            //if (DEBUG) {
+            //System.out.println("    index: follow label=" + toHex(target.bytes[target.offset + targetUpto]&0xff) + " arc.output=" + arc.output + " arc.nfo=" + arc.nextFinalOutput);
+            //}
+            targetUpto++;
+
+            if (arc.isFinal()) {
+              //if (DEBUG) System.out.println("    arc is final!");
+              currentFrame = pushFrame(arc, fstOutputs.add(output, arc.nextFinalOutput), targetUpto);
+              //if (DEBUG) System.out.println("    curFrame.ord=" + currentFrame.ord + " hasTerms=" + currentFrame.hasTerms);
+            }
+          }
+        }
+
+        //validIndexPrefix = targetUpto;
+        validIndexPrefix = currentFrame.prefix;
+
+        currentFrame.scanToFloorFrame(target);
+
+        currentFrame.loadBlock();
+
+        final SeekStatus result = currentFrame.scanToTerm(target, false);
+
+        if (result == SeekStatus.END) {
+          term.copyBytes(target);
+          termExists = false;
+          if (next() != null) {
+            //if (DEBUG) {
+            //System.out.println("  return NOT_FOUND term=" + term.utf8ToString() + " " + term);
+            //}
+            return SeekStatus.NOT_FOUND;
+          } else {
+            //if (DEBUG) {
+            //System.out.println("  return END");
+            //}
+            return SeekStatus.END;
+          }
+        } else {
+          return result;
+        }
+      }
+
+      @SuppressWarnings("unused")
+      private void printSeekState(PrintStream out) throws IOException {
+        if (currentFrame == staticFrame) {
+          out.println("  no prior seek");
+        } else {
+          out.println("  prior seek state:");
+          int ord = 0;
+          boolean isSeekFrame = true;
+          while(true) {
+            Frame f = getFrame(ord);
+            assert f != null;
+            final BytesRef prefix = new BytesRef(term.bytes, 0, f.prefix);
+            if (f.nextEnt == -1) {
+              out.println("    frame " + (isSeekFrame ? "(seek)" : "(next)") + " ord=" + ord + " fp=" + f.fp + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "") + " prefixLen=" + f.prefix + " prefix=" + prefix + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")")) + " hasTerms=" + f.hasTerms + " isFloor=" + f.isFloor + " code=" + ((f.fp<<BlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS) + (f.hasTerms ? BlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS:0) + (f.isFloor ? BlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR:0)) + " isLastInFloor=" + f.isLastInFloor + " mdUpto=" + f.metaDataUpto + " tbOrd=" + f.getTermBlockOrd());
+            } else {
+              out.println("    frame " + (isSeekFrame ? "(seek, loaded)" : "(next, loaded)") + " ord=" + ord + " fp=" + f.fp + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "") + " prefixLen=" + f.prefix + " prefix=" + prefix + " nextEnt=" + f.nextEnt + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")")) + " hasTerms=" + f.hasTerms + " isFloor=" + f.isFloor + " code=" + ((f.fp<<BlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS) + (f.hasTerms ? BlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS:0) + (f.isFloor ? BlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR:0)) + " lastSubFP=" + f.lastSubFP + " isLastInFloor=" + f.isLastInFloor + " mdUpto=" + f.metaDataUpto + " tbOrd=" + f.getTermBlockOrd());
+            }
+            if (index != null) {
+              assert !isSeekFrame || f.arc != null: "isSeekFrame=" + isSeekFrame + " f.arc=" + f.arc;
+              if (f.prefix > 0 && isSeekFrame && f.arc.label != (term.bytes[f.prefix-1]&0xFF)) {
+                out.println("      broken seek state: arc.label=" + (char) f.arc.label + " vs term byte=" + (char) (term.bytes[f.prefix-1]&0xFF));
+                throw new RuntimeException("seek state is broken");
+              }
+              BytesRef output = Util.get(index, prefix);
+              if (output == null) {
+                out.println("      broken seek state: prefix is not final in index");
+                throw new RuntimeException("seek state is broken");
+              } else if (isSeekFrame && !f.isFloor) {
+                final ByteArrayDataInput reader = new ByteArrayDataInput(output.bytes, output.offset, output.length);
+                final long codeOrig = reader.readVLong();
+                final long code = (f.fp << BlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS) | (f.hasTerms ? BlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS:0) | (f.isFloor ? BlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR:0);
+                if (codeOrig != code) {
+                  out.println("      broken seek state: output code=" + codeOrig + " doesn't match frame code=" + code);
+                  throw new RuntimeException("seek state is broken");
+                }
+              }
+            }
+            if (f == currentFrame) {
+              break;
+            }
+            if (f.prefix == validIndexPrefix) {
+              isSeekFrame = false;
+            }
+            ord++;
+          }
+        }
+      }
+
+      /* Decodes only the term bytes of the next term.  If caller then asks for
+         metadata, ie docFreq, totalTermFreq or pulls a D/&PEnum, we then (lazily)
+         decode all metadata up to the current term. */
+      @Override
+      public BytesRef next() throws IOException {
+
+        if (in == null) {
+          // Fresh TermsEnum; seek to first term:
+          final FST.Arc<BytesRef> arc;
+          if (index != null) {
+            arc = index.getFirstArc(arcs[0]);
+            // Empty string prefix must have an output in the index!
+            assert arc.isFinal();
+          } else {
+            arc = null;
+          }
+          currentFrame = pushFrame(arc, rootCode, 0);
+          currentFrame.loadBlock();
+        }
+
+        targetBeforeCurrentLength = currentFrame.ord;
+
+        assert !eof;
+        //if (DEBUG) {
+        //System.out.println("\nBTTR.next seg=" + segment + " term=" + brToString(term) + " termExists?=" + termExists + " field=" + fieldInfo.name + " termBlockOrd=" + currentFrame.state.termBlockOrd + " validIndexPrefix=" + validIndexPrefix);
+        //printSeekState();
+        //}
+
+        if (currentFrame == staticFrame) {
+          // If seek was previously called and the term was
+          // cached, or seek(TermState) was called, usually
+          // caller is just going to pull a D/&PEnum or get
+          // docFreq, etc.  But, if they then call next(),
+          // this method catches up all internal state so next()
+          // works properly:
+          //if (DEBUG) System.out.println("  re-seek to pending term=" + term.utf8ToString() + " " + term);
+          final boolean result = seekExact(term, false);
+          assert result;
+        }
+
+        // Pop finished blocks
+        while (currentFrame.nextEnt == currentFrame.entCount) {
+          if (!currentFrame.isLastInFloor) {
+            currentFrame.loadNextFloorBlock();
+          } else {
+            //if (DEBUG) System.out.println("  pop frame");
+            if (currentFrame.ord == 0) {
+              //if (DEBUG) System.out.println("  return null");
+              assert setEOF();
+              term.length = 0;
+              validIndexPrefix = 0;
+              currentFrame.rewind();
+              termExists = false;
+              return null;
+            }
+            final long lastFP = currentFrame.fpOrig;
+            currentFrame = stack[currentFrame.ord-1];
+
+            if (currentFrame.nextEnt == -1 || currentFrame.lastSubFP != lastFP) {
+              // We popped into a frame that's not loaded
+              // yet or not scan'd to the right entry
+              currentFrame.scanToFloorFrame(term);
+              currentFrame.loadBlock();
+              currentFrame.scanToSubBlock(lastFP);
+            }
+
+            // Note that the seek state (last seek) has been
+            // invalidated beyond this depth
+            validIndexPrefix = Math.min(validIndexPrefix, currentFrame.prefix);
+            //if (DEBUG) {
+            //System.out.println("  reset validIndexPrefix=" + validIndexPrefix);
+            //}
+          }
+        }
+
+        while(true) {
+          if (currentFrame.next()) {
+            // Push to new block:
+            //if (DEBUG) System.out.println("  push frame");
+            currentFrame = pushFrame(null, currentFrame.lastSubFP, term.length);
+            // This is a "next" frame -- even if it's
+            // floor'd we must pretend it isn't so we don't
+            // try to scan to the right floor frame:
+            currentFrame.isFloor = false;
+            //currentFrame.hasTerms = true;
+            currentFrame.loadBlock();
+          } else {
+            //if (DEBUG) System.out.println("  return term=" + term.utf8ToString() + " " + term + " currentFrame.ord=" + currentFrame.ord);
+            return term;
+          }
+        }
+      }
+
+      @Override
+      public BytesRef term() {
+        assert !eof;
+        return term;
+      }
+
+      @Override
+      public int docFreq() throws IOException {
+        assert !eof;
+        //if (DEBUG) System.out.println("BTR.docFreq");
+        currentFrame.decodeMetaData();
+        //if (DEBUG) System.out.println("  return " + currentFrame.state.docFreq);
+        return currentFrame.state.docFreq;
+      }
+
+      @Override
+      public long totalTermFreq() throws IOException {
+        assert !eof;
+        currentFrame.decodeMetaData();
+        return currentFrame.state.totalTermFreq;
+      }
+
+      @Override
+      public DocsEnum docs(Bits skipDocs, DocsEnum reuse, int flags) throws IOException {
+        assert !eof;
+        //if (DEBUG) {
+        //System.out.println("BTTR.docs seg=" + segment);
+      

<TRUNCATED>

Mime
View raw message