lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From h..@apache.org
Subject svn commit: r1513336 [3/11] - in /lucene/dev/branches/lucene3069/lucene: ./ analysis/ analysis/common/ analysis/common/src/java/org/apache/lucene/analysis/charfilter/ analysis/common/src/java/org/apache/lucene/analysis/hunspell/ analysis/common/src/jav...
Date Tue, 13 Aug 2013 04:06:27 GMT
Modified: lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java (original)
+++ lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java Tue Aug 13 04:06:18 2013
@@ -684,7 +684,7 @@ public final class DirectPostingsFormat 
       }
 
       @Override
-      public SeekStatus seekCeil(BytesRef term, boolean useCache) {
+      public SeekStatus seekCeil(BytesRef term) {
         // TODO: we should use the skip pointers; should be
         // faster than bin search; we should also hold
         // & reuse current state so seeking forwards is
@@ -707,7 +707,7 @@ public final class DirectPostingsFormat 
       }
 
       @Override
-      public boolean seekExact(BytesRef term, boolean useCache) {
+      public boolean seekExact(BytesRef term) {
         // TODO: we should use the skip pointers; should be
         // faster than bin search; we should also hold
         // & reuse current state so seeking forwards is
@@ -1413,7 +1413,7 @@ public final class DirectPostingsFormat 
       }
 
       @Override
-      public SeekStatus seekCeil(BytesRef term, boolean useCache) {
+      public SeekStatus seekCeil(BytesRef term) {
         throw new UnsupportedOperationException();
       }
 

Modified: lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java (original)
+++ lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java Tue Aug 13 04:06:18 2013
@@ -335,11 +335,11 @@ public final class MemoryPostingsFormat 
     
     public FSTDocsEnum reset(BytesRef bufferIn, Bits liveDocs, int numDocs) {
       assert numDocs > 0;
-      if (buffer.length < bufferIn.length - bufferIn.offset) {
-        buffer = ArrayUtil.grow(buffer, bufferIn.length - bufferIn.offset);
+      if (buffer.length < bufferIn.length) {
+        buffer = ArrayUtil.grow(buffer, bufferIn.length);
       }
-      in.reset(buffer, 0, bufferIn.length - bufferIn.offset);
-      System.arraycopy(bufferIn.bytes, bufferIn.offset, buffer, 0, bufferIn.length - bufferIn.offset);
+      in.reset(buffer, 0, bufferIn.length);
+      System.arraycopy(bufferIn.bytes, bufferIn.offset, buffer, 0, bufferIn.length);
       this.liveDocs = liveDocs;
       docID = -1;
       accum = 0;
@@ -472,11 +472,11 @@ public final class MemoryPostingsFormat 
       //   System.out.println("  " + Integer.toHexString(bufferIn.bytes[i]&0xFF));
       // }
 
-      if (buffer.length < bufferIn.length - bufferIn.offset) {
-        buffer = ArrayUtil.grow(buffer, bufferIn.length - bufferIn.offset);
+      if (buffer.length < bufferIn.length) {
+        buffer = ArrayUtil.grow(buffer, bufferIn.length);
       }
       in.reset(buffer, 0, bufferIn.length - bufferIn.offset);
-      System.arraycopy(bufferIn.bytes, bufferIn.offset, buffer, 0, bufferIn.length - bufferIn.offset);
+      System.arraycopy(bufferIn.bytes, bufferIn.offset, buffer, 0, bufferIn.length);
       this.liveDocs = liveDocs;
       docID = -1;
       accum = 0;
@@ -632,6 +632,7 @@ public final class MemoryPostingsFormat 
     private int docFreq;
     private long totalTermFreq;
     private BytesRefFSTEnum.InputOutput<BytesRef> current;
+    private BytesRef postingsSpare = new BytesRef();
 
     public FSTTermsEnum(FieldInfo field, FST<BytesRef> fst) {
       this.field = field;
@@ -640,21 +641,23 @@ public final class MemoryPostingsFormat 
 
     private void decodeMetaData() {
       if (!didDecode) {
-        buffer.reset(current.output.bytes, 0, current.output.length);
+        buffer.reset(current.output.bytes, current.output.offset, current.output.length);
         docFreq = buffer.readVInt();
         if (field.getIndexOptions() != IndexOptions.DOCS_ONLY) {
           totalTermFreq = docFreq + buffer.readVLong();
         } else {
           totalTermFreq = -1;
         }
-        current.output.offset = buffer.getPosition();
+        postingsSpare.bytes = current.output.bytes;
+        postingsSpare.offset = buffer.getPosition();
+        postingsSpare.length = current.output.length - (buffer.getPosition() - current.output.offset);
         //System.out.println("  df=" + docFreq + " totTF=" + totalTermFreq + " offset=" + buffer.getPosition() + " len=" + current.output.length);
         didDecode = true;
       }
     }
 
     @Override
-    public boolean seekExact(BytesRef text, boolean useCache /* ignored */) throws IOException {
+    public boolean seekExact(BytesRef text) throws IOException {
       //System.out.println("te.seekExact text=" + field.name + ":" + text.utf8ToString() + " this=" + this);
       current = fstEnum.seekExact(text);
       didDecode = false;
@@ -662,7 +665,7 @@ public final class MemoryPostingsFormat 
     }
 
     @Override
-    public SeekStatus seekCeil(BytesRef text, boolean useCache /* ignored */) throws IOException {
+    public SeekStatus seekCeil(BytesRef text) throws IOException {
       //System.out.println("te.seek text=" + field.name + ":" + text.utf8ToString() + " this=" + this);
       current = fstEnum.seekCeil(text);
       if (current == null) {
@@ -699,7 +702,7 @@ public final class MemoryPostingsFormat 
           docsEnum = new FSTDocsEnum(field.getIndexOptions(), field.hasPayloads());
         }
       }
-      return docsEnum.reset(current.output, liveDocs, docFreq);
+      return docsEnum.reset(this.postingsSpare, liveDocs, docFreq);
     }
 
     @Override
@@ -720,7 +723,7 @@ public final class MemoryPostingsFormat 
         }
       }
       //System.out.println("D&P reset this=" + this);
-      return docsAndPositionsEnum.reset(current.output, liveDocs, docFreq);
+      return docsAndPositionsEnum.reset(postingsSpare, liveDocs, docFreq);
     }
 
     @Override

Modified: lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java (original)
+++ lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java Tue Aug 13 04:06:18 2013
@@ -103,8 +103,7 @@ public abstract class PulsingPostingsFor
                                                     state.directory, state.fieldInfos, state.segmentInfo,
                                                     pulsingReader,
                                                     state.context,
-                                                    state.segmentSuffix,
-                                                    state.termsIndexDivisor);
+                                                    state.segmentSuffix);
       success = true;
       return ret;
     } finally {

Modified: lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java (original)
+++ lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java Tue Aug 13 04:06:18 2013
@@ -109,7 +109,7 @@ class SimpleTextFieldsReader extends Fie
     }
 
     @Override
-    public boolean seekExact(BytesRef text, boolean useCache /* ignored */) throws IOException {
+    public boolean seekExact(BytesRef text) throws IOException {
 
       final BytesRefFSTEnum.InputOutput<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> result = fstEnum.seekExact(text);
       if (result != null) {
@@ -125,7 +125,7 @@ class SimpleTextFieldsReader extends Fie
     }
 
     @Override
-    public SeekStatus seekCeil(BytesRef text, boolean useCache /* ignored */) throws IOException {
+    public SeekStatus seekCeil(BytesRef text) throws IOException {
 
       //System.out.println("seek to text=" + text.utf8ToString());
       final BytesRefFSTEnum.InputOutput<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> result = fstEnum.seekCeil(text);

Modified: lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java (original)
+++ lucene/dev/branches/lucene3069/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java Tue Aug 13 04:06:18 2013
@@ -331,7 +331,7 @@ public class SimpleTextTermVectorsReader
     }
     
     @Override
-    public SeekStatus seekCeil(BytesRef text, boolean useCache) throws IOException {
+    public SeekStatus seekCeil(BytesRef text) throws IOException {
       iterator = terms.tailMap(text).entrySet().iterator();
       if (!iterator.hasNext()) {
         return SeekStatus.END;

Modified: lucene/dev/branches/lucene3069/lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestFixedGapPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestFixedGapPostingsFormat.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestFixedGapPostingsFormat.java (original)
+++ lucene/dev/branches/lucene3069/lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestFixedGapPostingsFormat.java Tue Aug 13 04:06:18 2013
@@ -25,10 +25,8 @@ import org.apache.lucene.util._TestUtil;
 /**
  * Basic tests of a PF using FixedGap terms dictionary
  */
-// TODO: we should add an instantiation for VarGap too to TestFramework, and a test in this package
-// TODO: ensure both of these are also in rotation in RandomCodec
 public class TestFixedGapPostingsFormat extends BasePostingsFormatTestCase {
-  private final Codec codec = _TestUtil.alwaysPostingsFormat(new Lucene41WithOrds());
+  private final Codec codec = _TestUtil.alwaysPostingsFormat(new Lucene41WithOrds(_TestUtil.nextInt(random(), 1, 1000)));
 
   @Override
   protected Codec getCodec() {

Modified: lucene/dev/branches/lucene3069/lucene/common-build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/common-build.xml?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/common-build.xml (original)
+++ lucene/dev/branches/lucene3069/lucene/common-build.xml Tue Aug 13 04:06:18 2013
@@ -230,9 +230,6 @@
   <property name="svn.exe" value="svn" />
   <property name="perl.exe" value="perl" />
   
-  <property name="hg.exe" value="hg" />
-  <property name="moman.url" value="https://bitbucket.org/jpbarrette/moman" />
-  <property name="moman.rev" value="120" />
   <property name="python.exe" value="python" />
   <property name="python32.exe" value="python3.2" />
 
@@ -445,20 +442,6 @@
     </sequential>
   </macrodef>
 
-  <target name="jflex-uptodate-check">
-    <uptodate property="jflex.files.uptodate">
-      <srcfiles dir="${src.dir}" includes="**/*.jflex" />
-      <mapper type="glob" from="*.jflex" to="*.java"/>
-    </uptodate>
-  </target>
- 
-  <target name="jflex-notice" depends="jflex-uptodate-check" unless="jflex.files.uptodate">
-    <echo>
-      One or more of the JFlex .jflex files is newer than its corresponding
-      .java file.  Run the "jflex" target to regenerate the artifacts.
-    </echo>
-  </target>
-
   <target name="jflex-check">
     <available property="jflex.present" classname="jflex.anttask.JFlexTask">
       <classpath refid="jflex.classpath"/>
@@ -471,7 +454,7 @@
       Please install the jFlex 1.5 version (currently not released)
       from its SVN repository:
 
-       svn co http://jflex.svn.sourceforge.net/svnroot/jflex/trunk jflex
+       svn co -r 623 http://jflex.svn.sourceforge.net/svnroot/jflex/trunk jflex
        cd jflex
        mvn install
 
@@ -2100,6 +2083,8 @@ ${tests-output}/junit4-*.suites     - pe
     <property name="pegdown.loaded" value="true"/>
   </target>
   
+  <target name="regenerate"/>
+	
   <macrodef name="pegdown">
     <attribute name="todir"/>
     <attribute name="flatten" default="false"/>

Modified: lucene/dev/branches/lucene3069/lucene/core/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/build.xml?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/build.xml (original)
+++ lucene/dev/branches/lucene3069/lucene/core/build.xml Tue Aug 13 04:06:18 2013
@@ -24,6 +24,9 @@
 
   <import file="../common-build.xml"/>
 
+  <property name="moman.commit-hash" value="5c5c2a1e4dea" />
+  <property name="moman.url" value="https://bitbucket.org/jpbarrette/moman/get/${moman.commit-hash}.zip" />
+
   <path id="classpath"/>
   
   <path id="test.classpath">
@@ -109,36 +112,24 @@
     <fixcrlf srcdir="src/java/org/apache/lucene/util/packed" includes="BulkOperation*.java,Direct*.java,Packed64SingleBlock.java,Packed*ThreeBlocks.py" encoding="UTF-8"/>
   </target>
 
-  <target name="createLevAutomata" depends="check-moman,clone-moman,pull-moman">
+  <target name="createLevAutomata" depends="check-moman,download-moman">
     <createLevAutomaton n="1"/>
     <createLevAutomaton n="2"/>
   </target>
   
   <target name="check-moman">
-    <condition property="moman.cloned">
-      <available file="${build.dir}/moman"/>
-    </condition>
+    <available file="${build.dir}/moman" property="moman.downloaded"/>
   </target>
 
-  <target name="clone-moman" unless="moman.cloned">
-    <mkdir dir="${build.dir}"/>
-    <exec dir="${build.dir}" 
-          executable="${hg.exe}" failonerror="true">
-      <arg value="clone"/>
-      <arg value="-r"/>
-      <arg value="${moman.rev}"/>
-      <arg value="${moman.url}"/>
-      <arg value="moman"/>
-    </exec>
+  <target name="download-moman" unless="moman.downloaded">
+    <mkdir dir="${build.dir}/moman"/>
+    <get src="${moman.url}" dest="${build.dir}/moman.zip"/>
+    <unzip dest="${build.dir}/moman" src="${build.dir}/moman.zip">
+      <cutdirsmapper dirs="1"/>
+    </unzip>
+    <delete file="${build.dir}/moman.zip"/>
   </target>
 
-  <target name="pull-moman" if="moman.cloned">
-    <exec dir="${build.dir}/moman" 
-          executable="${hg.exe}" failonerror="true">
-      <arg value="pull"/>
-      <arg value="-f"/>
-      <arg value="-r"/>
-      <arg value="${moman.rev}"/>
-    </exec>
-  </target>
+  <target name="regenerate" depends="createLevAutomata,createPackedIntSources"/>
+
 </project>

Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/analysis/AnalyzerWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/analysis/AnalyzerWrapper.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/analysis/AnalyzerWrapper.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/analysis/AnalyzerWrapper.java Tue Aug 13 04:06:18 2013
@@ -51,16 +51,35 @@ public abstract class AnalyzerWrapper ex
 
   /**
    * Wraps / alters the given TokenStreamComponents, taken from the wrapped
-   * Analyzer, to form new components.  It is through this method that new
-   * TokenFilters can be added by AnalyzerWrappers.
-   *
-   *
-   * @param fieldName Name of the field which is to be analyzed
-   * @param components TokenStreamComponents taken from the wrapped Analyzer
+   * Analyzer, to form new components. It is through this method that new
+   * TokenFilters can be added by AnalyzerWrappers. By default, the given
+   * components are returned.
+   * 
+   * @param fieldName
+   *          Name of the field which is to be analyzed
+   * @param components
+   *          TokenStreamComponents taken from the wrapped Analyzer
    * @return Wrapped / altered TokenStreamComponents.
    */
-  protected abstract TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components);
+  protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
+    return components;
+  }
 
+  /**
+   * Wraps / alters the given Reader. Through this method AnalyzerWrappers can
+   * implement {@link #initReader(String, Reader)}. By default, the given reader
+   * is returned.
+   * 
+   * @param fieldName
+   *          name of the field which is to be analyzed
+   * @param reader
+   *          the reader to wrap
+   * @return the wrapped reader
+   */
+  protected Reader wrapReader(String fieldName, Reader reader) {
+    return reader;
+  }
+  
   @Override
   protected final TokenStreamComponents createComponents(String fieldName, Reader aReader) {
     return wrapComponents(fieldName, getWrappedAnalyzer(fieldName).createComponents(fieldName, aReader));
@@ -78,6 +97,6 @@ public abstract class AnalyzerWrapper ex
 
   @Override
   public final Reader initReader(String fieldName, Reader reader) {
-    return getWrappedAnalyzer(fieldName).initReader(fieldName, reader);
+    return getWrappedAnalyzer(fieldName).initReader(fieldName, wrapReader(fieldName, reader));
   }
 }

Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java Tue Aug 13 04:06:18 2013
@@ -32,7 +32,8 @@ import org.apache.lucene.util.automaton.
 // TODO: maybe also toFST?  then we can translate atts into FST outputs/weights
 
 /** Consumes a TokenStream and creates an {@link Automaton}
- *  where the transition labels are UTF8 bytes from the {@link
+ *  where the transition labels are UTF8 bytes (or Unicode 
+ *  code points if unicodeArcs is true) from the {@link
  *  TermToBytesRefAttribute}.  Between tokens we insert
  *  POS_SEP and for holes we insert HOLE.
  *
@@ -40,6 +41,7 @@ import org.apache.lucene.util.automaton.
 public class TokenStreamToAutomaton {
 
   private boolean preservePositionIncrements;
+  private boolean unicodeArcs;
 
   /** Sole constructor. */
   public TokenStreamToAutomaton() {
@@ -51,6 +53,12 @@ public class TokenStreamToAutomaton {
     this.preservePositionIncrements = enablePositionIncrements;
   }
 
+  /** Whether to make transition labels Unicode code points instead of UTF8 bytes, 
+   *  <code>false</code> by default */
+  public void setUnicodeArcs(boolean unicodeArcs) {
+    this.unicodeArcs = unicodeArcs;
+  }
+
   private static class Position implements RollingBuffer.Resettable {
     // Any tokens that ended at our position arrive to this state:
     State arriving;
@@ -80,15 +88,16 @@ public class TokenStreamToAutomaton {
   }
 
   /** We create transition between two adjacent tokens. */
-  public static final int POS_SEP = 256;
+  public static final int POS_SEP = 0x001f;
 
   /** We add this arc to represent a hole. */
-  public static final int HOLE = 257;
+  public static final int HOLE = 0x001e;
 
   /** Pulls the graph (including {@link
    *  PositionLengthAttribute}) from the provided {@link
    *  TokenStream}, and creates the corresponding
-   *  automaton where arcs are bytes from each term. */
+   *  automaton where arcs are bytes (or Unicode code points 
+   *  if unicodeArcs = true) from each term. */
   public Automaton toAutomaton(TokenStream in) throws IOException {
     final Automaton a = new Automaton();
     boolean deterministic = true;
@@ -156,16 +165,34 @@ public class TokenStreamToAutomaton {
       final int endPos = pos + posLengthAtt.getPositionLength();
 
       termBytesAtt.fillBytesRef();
-      final BytesRef term2 = changeToken(term);
+      final BytesRef termUTF8 = changeToken(term);
+      int[] termUnicode = null;
       final Position endPosData = positions.get(endPos);
       if (endPosData.arriving == null) {
         endPosData.arriving = new State();
       }
 
       State state = posData.leaving;
-      for(int byteIDX=0;byteIDX<term2.length;byteIDX++) {
-        final State nextState = byteIDX == term2.length-1 ? endPosData.arriving : new State();
-        state.addTransition(new Transition(term2.bytes[term2.offset + byteIDX] & 0xff, nextState));
+      int termLen;
+      if (unicodeArcs) {
+        final String utf16 = termUTF8.utf8ToString();
+        termUnicode = new int[utf16.codePointCount(0, utf16.length())];
+        termLen = termUnicode.length;
+        for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp))
+          termUnicode[j++] = cp = utf16.codePointAt(i);
+      } else {
+        termLen = termUTF8.length;
+      }
+
+      for(int byteIDX=0;byteIDX<termLen;byteIDX++) {
+        final State nextState = byteIDX == termLen-1 ? endPosData.arriving : new State();
+        int c;
+        if (unicodeArcs) {
+          c = termUnicode[byteIDX];
+        } else {
+          c = termUTF8.bytes[termUTF8.offset + byteIDX] & 0xff;
+        }
+        state.addTransition(new Transition(c, nextState));
         state = nextState;
       }
 

Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java Tue Aug 13 04:06:18 2013
@@ -67,9 +67,9 @@ import org.apache.lucene.util.fst.Util;
  *  does not support a pluggable terms index
  *  implementation).
  *
- *  <p><b>NOTE</b>: this terms dictionary does not support
- *  index divisor when opening an IndexReader.  Instead, you
- *  can change the min/maxItemsPerBlock during indexing.</p>
+ *  <p><b>NOTE</b>: this terms dictionary supports
+ *  min/maxItemsPerBlock during indexing to control how
+ *  much memory the terms index uses.</p>
  *
  *  <p>The data structure used by this implementation is very
  *  similar to a burst trie
@@ -112,7 +112,7 @@ public class BlockTreeTermsReader extend
   /** Sole constructor. */
   public BlockTreeTermsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo info,
                               PostingsReaderBase postingsReader, IOContext ioContext,
-                              String segmentSuffix, int indexDivisor)
+                              String segmentSuffix)
     throws IOException {
     
     this.postingsReader = postingsReader;
@@ -126,13 +126,11 @@ public class BlockTreeTermsReader extend
 
     try {
       version = readHeader(in);
-      if (indexDivisor != -1) {
-        indexIn = dir.openInput(IndexFileNames.segmentFileName(segment, segmentSuffix, BlockTreeTermsWriter.TERMS_INDEX_EXTENSION),
+      indexIn = dir.openInput(IndexFileNames.segmentFileName(segment, segmentSuffix, BlockTreeTermsWriter.TERMS_INDEX_EXTENSION),
                                 ioContext);
-        int indexVersion = readIndexHeader(indexIn);
-        if (indexVersion != version) {
-          throw new CorruptIndexException("mixmatched version files: " + in + "=" + version + "," + indexIn + "=" + indexVersion);
-        }
+      int indexVersion = readIndexHeader(indexIn);
+      if (indexVersion != version) {
+        throw new CorruptIndexException("mixmatched version files: " + in + "=" + version + "," + indexIn + "=" + indexVersion);
       }
 
       // Have PostingsReader init itself
@@ -140,9 +138,7 @@ public class BlockTreeTermsReader extend
 
       // Read per-field details
       seekDir(in, dirOffset);
-      if (indexDivisor != -1) {
-        seekDir(indexIn, indexDirOffset);
-      }
+      seekDir(indexIn, indexDirOffset);
 
       final int numFields = in.readVInt();
       if (numFields < 0) {
@@ -171,15 +167,13 @@ public class BlockTreeTermsReader extend
         if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
           throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq + " (resource=" + in + ")");
         }
-        final long indexStartFP = indexDivisor != -1 ? indexIn.readVLong() : 0;
+        final long indexStartFP = indexIn.readVLong();
         FieldReader previous = fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount, indexStartFP, indexIn));
         if (previous != null) {
           throw new CorruptIndexException("duplicate field: " + fieldInfo.name + " (resource=" + in + ")");
         }
       }
-      if (indexDivisor != -1) {
-        indexIn.close();
-      }
+      indexIn.close();
 
       success = true;
     } finally {
@@ -1222,7 +1216,7 @@ public class BlockTreeTermsReader extend
       }
 
       @Override
-      public boolean seekExact(BytesRef text, boolean useCache) {
+      public boolean seekExact(BytesRef text) {
         throw new UnsupportedOperationException();
       }
 
@@ -1237,7 +1231,7 @@ public class BlockTreeTermsReader extend
       }
 
       @Override
-      public SeekStatus seekCeil(BytesRef text, boolean useCache) {
+      public SeekStatus seekCeil(BytesRef text) {
         throw new UnsupportedOperationException();
       }
     }
@@ -1499,7 +1493,7 @@ public class BlockTreeTermsReader extend
       }
 
       @Override
-      public boolean seekExact(final BytesRef target, final boolean useCache) throws IOException {
+      public boolean seekExact(final BytesRef target) throws IOException {
 
         if (index == null) {
           throw new IllegalStateException("terms index was not loaded");
@@ -1713,7 +1707,6 @@ public class BlockTreeTermsReader extend
             if (arc.output != NO_OUTPUT) {
               output = fstOutputs.add(output, arc.output);
             }
-
             // if (DEBUG) {
             //   System.out.println("    index: follow label=" + toHex(target.bytes[target.offset + targetUpto]&0xff) + " arc.output=" + arc.output + " arc.nfo=" + arc.nextFinalOutput);
             // }
@@ -1760,7 +1753,7 @@ public class BlockTreeTermsReader extend
       }
 
       @Override
-      public SeekStatus seekCeil(final BytesRef target, final boolean useCache) throws IOException {
+      public SeekStatus seekCeil(final BytesRef target) throws IOException {
         if (index == null) {
           throw new IllegalStateException("terms index was not loaded");
         }
@@ -2096,7 +2089,7 @@ public class BlockTreeTermsReader extend
           // this method catches up all internal state so next()
           // works properly:
           //if (DEBUG) System.out.println("  re-seek to pending term=" + term.utf8ToString() + " " + term);
-          final boolean result = seekExact(term, false);
+          final boolean result = seekExact(term);
           assert result;
         }
 

Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java Tue Aug 13 04:06:18 2013
@@ -824,7 +824,7 @@ public final class CompressingTermVector
     }
 
     @Override
-    public SeekStatus seekCeil(BytesRef text, boolean useCache)
+    public SeekStatus seekCeil(BytesRef text)
         throws IOException {
       if (ord < numTerms && ord >= 0) {
         final int cmp = term().compareTo(text);
@@ -851,16 +851,7 @@ public final class CompressingTermVector
 
     @Override
     public void seekExact(long ord) throws IOException {
-      if (ord < -1 || ord >= numTerms) {
-        throw new IOException("ord is out of range: ord=" + ord + ", numTerms=" + numTerms);
-      }
-      if (ord < this.ord) {
-        reset();
-      }
-      for (int i = this.ord; i < ord; ++i) {
-        next();
-      }
-      assert ord == this.ord();
+      throw new UnsupportedOperationException();
     }
 
     @Override
@@ -870,7 +861,7 @@ public final class CompressingTermVector
 
     @Override
     public long ord() throws IOException {
-      return ord;
+      throw new UnsupportedOperationException();
     }
 
     @Override

Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java Tue Aug 13 04:06:18 2013
@@ -258,8 +258,7 @@ public class Lucene40PostingsFormat exte
                                                     state.segmentInfo,
                                                     postings,
                                                     state.context,
-                                                    state.segmentSuffix,
-                                                    state.termsIndexDivisor);
+                                                    state.segmentSuffix);
       success = true;
       return ret;
     } finally {

Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java Tue Aug 13 04:06:18 2013
@@ -433,7 +433,7 @@ public class Lucene40TermVectorsReader e
 
     // NOTE: slow!  (linear scan)
     @Override
-    public SeekStatus seekCeil(BytesRef text, boolean useCache)
+    public SeekStatus seekCeil(BytesRef text)
       throws IOException {
       if (nextTerm != 0) {
         final int cmp = text.compareTo(term);

Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java Tue Aug 13 04:06:18 2013
@@ -439,8 +439,7 @@ public final class Lucene41PostingsForma
                                                     state.segmentInfo,
                                                     postingsReader,
                                                     state.context,
-                                                    state.segmentSuffix,
-                                                    state.termsIndexDivisor);
+                                                    state.segmentSuffix);
       success = true;
       return ret;
     } finally {

Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java Tue Aug 13 04:06:18 2013
@@ -490,7 +490,7 @@ class Lucene42DocValuesProducer extends 
     }
 
     @Override
-    public SeekStatus seekCeil(BytesRef text, boolean useCache) throws IOException {
+    public SeekStatus seekCeil(BytesRef text) throws IOException {
       if (in.seekCeil(text) == null) {
         return SeekStatus.END;
       } else if (term().equals(text)) {
@@ -503,7 +503,7 @@ class Lucene42DocValuesProducer extends 
     }
 
     @Override
-    public boolean seekExact(BytesRef text, boolean useCache) throws IOException {
+    public boolean seekExact(BytesRef text) throws IOException {
       if (in.seekExact(text) == null) {
         return false;
       } else {

Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockPostingsFormat.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockPostingsFormat.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockPostingsFormat.java Tue Aug 13 04:06:18 2013
@@ -437,8 +437,7 @@ public final class TempBlockPostingsForm
                                                     state.segmentInfo,
                                                     postingsReader,
                                                     state.context,
-                                                    state.segmentSuffix,
-                                                    state.termsIndexDivisor);
+                                                    state.segmentSuffix);
       success = true;
       return ret;
     } finally {

Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTermsReader.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTermsReader.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempBlockTermsReader.java Tue Aug 13 04:06:18 2013
@@ -117,7 +117,7 @@ public class TempBlockTermsReader extend
   /** Sole constructor. */
   public TempBlockTermsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo info,
                               TempPostingsReaderBase postingsReader, IOContext ioContext,
-                              String segmentSuffix, int indexDivisor)
+                              String segmentSuffix)
     throws IOException {
     
     this.postingsReader = postingsReader;
@@ -131,13 +131,11 @@ public class TempBlockTermsReader extend
 
     try {
       version = readHeader(in);
-      if (indexDivisor != -1) {
-        indexIn = dir.openInput(IndexFileNames.segmentFileName(segment, segmentSuffix, TempBlockTermsWriter.TERMS_INDEX_EXTENSION),
+      indexIn = dir.openInput(IndexFileNames.segmentFileName(segment, segmentSuffix, TempBlockTermsWriter.TERMS_INDEX_EXTENSION),
                                 ioContext);
-        int indexVersion = readIndexHeader(indexIn);
-        if (indexVersion != version) {
-          throw new CorruptIndexException("mixmatched version files: " + in + "=" + version + "," + indexIn + "=" + indexVersion);
-        }
+      int indexVersion = readIndexHeader(indexIn);
+      if (indexVersion != version) {
+        throw new CorruptIndexException("mixmatched version files: " + in + "=" + version + "," + indexIn + "=" + indexVersion);
       }
 
       // Have PostingsReader init itself
@@ -145,9 +143,7 @@ public class TempBlockTermsReader extend
 
       // Read per-field details
       seekDir(in, dirOffset);
-      if (indexDivisor != -1) {
-        seekDir(indexIn, indexDirOffset);
-      }
+      seekDir(indexIn, indexDirOffset);
 
       final int numFields = in.readVInt();
       if (numFields < 0) {
@@ -177,15 +173,13 @@ public class TempBlockTermsReader extend
         if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
           throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq + " (resource=" + in + ")");
         }
-        final long indexStartFP = indexDivisor != -1 ? indexIn.readVLong() : 0;
+        final long indexStartFP = indexIn.readVLong();
         FieldReader previous = fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount, indexStartFP, longsSize, indexIn));
         if (previous != null) {
           throw new CorruptIndexException("duplicate field: " + fieldInfo.name + " (resource=" + in + ")");
         }
       }
-      if (indexDivisor != -1) {
-        indexIn.close();
-      }
+      indexIn.close();
 
       success = true;
     } finally {
@@ -1251,7 +1245,7 @@ public class TempBlockTermsReader extend
       }
 
       @Override
-      public boolean seekExact(BytesRef text, boolean useCache) {
+      public boolean seekExact(BytesRef text) {
         throw new UnsupportedOperationException();
       }
 
@@ -1266,7 +1260,7 @@ public class TempBlockTermsReader extend
       }
 
       @Override
-      public SeekStatus seekCeil(BytesRef text, boolean useCache) {
+      public SeekStatus seekCeil(BytesRef text) {
         throw new UnsupportedOperationException();
       }
     }
@@ -1528,7 +1522,7 @@ public class TempBlockTermsReader extend
       }
 
       @Override
-      public boolean seekExact(final BytesRef target, final boolean useCache) throws IOException {
+      public boolean seekExact(final BytesRef target) throws IOException {
 
         if (index == null) {
           throw new IllegalStateException("terms index was not loaded");
@@ -1789,7 +1783,7 @@ public class TempBlockTermsReader extend
       }
 
       @Override
-      public SeekStatus seekCeil(final BytesRef target, final boolean useCache) throws IOException {
+      public SeekStatus seekCeil(final BytesRef target) throws IOException {
         if (index == null) {
           throw new IllegalStateException("terms index was not loaded");
         }
@@ -2125,7 +2119,7 @@ public class TempBlockTermsReader extend
           // this method catches up all internal state so next()
           // works properly:
           //if (DEBUG) System.out.println("  re-seek to pending term=" + term.utf8ToString() + " " + term);
-          final boolean result = seekExact(term, false);
+          final boolean result = seekExact(term);
           assert result;
         }
 

Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTOrdTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTOrdTermsReader.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTOrdTermsReader.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTOrdTermsReader.java Tue Aug 13 04:06:18 2013
@@ -461,7 +461,7 @@ public class TempFSTOrdTermsReader exten
       public BytesRef next() throws IOException {
         if (seekPending) {  // previously positioned, but termOutputs not fetched
           seekPending = false;
-          SeekStatus status = seekCeil(term, false);
+          SeekStatus status = seekCeil(term);
           assert status == SeekStatus.FOUND;  // must positioned on valid term
         }
         updateEnum(fstEnum.next());
@@ -469,13 +469,13 @@ public class TempFSTOrdTermsReader exten
       }
 
       @Override
-      public boolean seekExact(BytesRef target, boolean useCache) throws IOException {
+      public boolean seekExact(BytesRef target) throws IOException {
         updateEnum(fstEnum.seekExact(target));
         return term != null;
       }
 
       @Override
-      public SeekStatus seekCeil(BytesRef target, boolean useCache) throws IOException {
+      public SeekStatus seekCeil(BytesRef target) throws IOException {
         updateEnum(fstEnum.seekCeil(target));
         if (term == null) {
           return SeekStatus.END;
@@ -587,17 +587,9 @@ public class TempFSTOrdTermsReader exten
         super.decodeStats();
       }
 
-      // nocommit: need testcase for this
       @Override
-      public SeekStatus seekCeil(BytesRef target, boolean useCache) throws IOException {
-        decoded = false;
-        term = doSeekCeil(target);
-        decodeStats();
-        if (term == null) {
-          return SeekStatus.END;
-        } else {
-          return term.equals(target) ? SeekStatus.FOUND : SeekStatus.NOT_FOUND;
-        }
+      public SeekStatus seekCeil(BytesRef target) throws IOException {
+        throw new UnsupportedOperationException();
       }
 
       @Override

Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTTermsReader.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTTermsReader.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTTermsReader.java Tue Aug 13 04:06:18 2013
@@ -348,7 +348,7 @@ public class TempFSTTermsReader extends 
       public BytesRef next() throws IOException {
         if (seekPending) {  // previously positioned, but termOutputs not fetched
           seekPending = false;
-          SeekStatus status = seekCeil(term, false);
+          SeekStatus status = seekCeil(term);
           assert status == SeekStatus.FOUND;  // must positioned on valid term
         }
         updateEnum(fstEnum.next());
@@ -356,13 +356,13 @@ public class TempFSTTermsReader extends 
       }
 
       @Override
-      public boolean seekExact(BytesRef target, boolean useCache) throws IOException {
+      public boolean seekExact(BytesRef target) throws IOException {
         updateEnum(fstEnum.seekExact(target));
         return term != null;
       }
 
       @Override
-      public SeekStatus seekCeil(BytesRef target, boolean useCache) throws IOException {
+      public SeekStatus seekCeil(BytesRef target) throws IOException {
         updateEnum(fstEnum.seekCeil(target));
         if (term == null) {
           return SeekStatus.END;
@@ -497,7 +497,7 @@ public class TempFSTTermsReader extends 
       }
 
       @Override
-      public SeekStatus seekCeil(BytesRef target, boolean useCache) throws IOException {
+      public SeekStatus seekCeil(BytesRef target) throws IOException {
         decoded = false;
         term = doSeekCeil(target);
         loadMetaData();

Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java Tue Aug 13 04:06:18 2013
@@ -78,7 +78,7 @@ public abstract class AtomicReader exten
       return 0;
     }
     final TermsEnum termsEnum = terms.iterator(null);
-    if (termsEnum.seekExact(term.bytes(), true)) {
+    if (termsEnum.seekExact(term.bytes())) {
       return termsEnum.docFreq();
     } else {
       return 0;
@@ -101,7 +101,7 @@ public abstract class AtomicReader exten
       return 0;
     }
     final TermsEnum termsEnum = terms.iterator(null);
-    if (termsEnum.seekExact(term.bytes(), true)) {
+    if (termsEnum.seekExact(term.bytes())) {
       return termsEnum.totalTermFreq();
     } else {
       return 0;
@@ -156,7 +156,7 @@ public abstract class AtomicReader exten
       final Terms terms = fields.terms(term.field());
       if (terms != null) {
         final TermsEnum termsEnum = terms.iterator(null);
-        if (termsEnum.seekExact(term.bytes(), true)) {
+        if (termsEnum.seekExact(term.bytes())) {
           return termsEnum.docs(getLiveDocs(), null);
         }
       }
@@ -176,7 +176,7 @@ public abstract class AtomicReader exten
       final Terms terms = fields.terms(term.field());
       if (terms != null) {
         final TermsEnum termsEnum = terms.iterator(null);
-        if (termsEnum.seekExact(term.bytes(), true)) {
+        if (termsEnum.seekExact(term.bytes())) {
           return termsEnum.docsAndPositions(getLiveDocs(), null);
         }
       }

Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java Tue Aug 13 04:06:18 2013
@@ -26,7 +26,8 @@ import org.apache.lucene.util.ByteBlockP
 import org.apache.lucene.util.ByteBlockPool;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.Counter;
-import org.apache.lucene.util.packed.AppendingLongBuffer;
+import org.apache.lucene.util.packed.AppendingDeltaPackedLongBuffer;
+import org.apache.lucene.util.packed.PackedInts;
 
 import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE;
 
@@ -36,14 +37,14 @@ import static org.apache.lucene.util.Byt
 class BinaryDocValuesWriter extends DocValuesWriter {
 
   private final ByteBlockPool pool;
-  private final AppendingLongBuffer lengths;
+  private final AppendingDeltaPackedLongBuffer lengths;
   private final FieldInfo fieldInfo;
   private int addedValues = 0;
 
   public BinaryDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
     this.fieldInfo = fieldInfo;
     this.pool = new ByteBlockPool(new DirectTrackingAllocator(iwBytesUsed));
-    this.lengths = new AppendingLongBuffer();
+    this.lengths = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT);
   }
 
   public void addValue(int docID, BytesRef value) {
@@ -90,7 +91,7 @@ class BinaryDocValuesWriter extends DocV
   // iterates over the values we have in ram
   private class BytesIterator implements Iterator<BytesRef> {
     final BytesRef value = new BytesRef();
-    final AppendingLongBuffer.Iterator lengthsIterator = lengths.iterator();
+    final AppendingDeltaPackedLongBuffer.Iterator lengthsIterator = lengths.iterator();
     final int size = (int) lengths.size();
     final int maxDoc;
     int upto;

Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/BufferedDeletes.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/BufferedDeletes.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/BufferedDeletes.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/BufferedDeletes.java Tue Aug 13 04:06:18 2013
@@ -33,9 +33,9 @@ import org.apache.lucene.util.RamUsageEs
  * deletes are pushed (on flush in DocumentsWriter), these
  * deletes are converted to a FrozenDeletes instance. */
 
-// NOTE: we are sync'd by BufferedDeletes, ie, all access to
-// instances of this class is via sync'd methods on
-// BufferedDeletes
+// NOTE: instances of this class are accessed either via a private
+// instance on DocumentWriterPerThread, or via sync'd code by
+// DocumentsWriterDeleteQueue
 
 class BufferedDeletes {
 
@@ -136,6 +136,9 @@ class BufferedDeletes {
     }
 
     terms.put(term, Integer.valueOf(docIDUpto));
+    // note that if current != null then it means there's already a buffered
+    // delete on that term, therefore we seem to over-count. this over-counting
+    // is done to respect IndexWriterConfig.setMaxBufferedDeleteTerms.
     numTermDeletes.incrementAndGet();
     if (current == null) {
       bytesUsed.addAndGet(BYTES_PER_DEL_TERM + term.bytes.length + (RamUsageEstimator.NUM_BYTES_CHAR * term.field().length()));

Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/BufferedDeletesStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/BufferedDeletesStream.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/BufferedDeletesStream.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/BufferedDeletesStream.java Tue Aug 13 04:06:18 2013
@@ -390,7 +390,7 @@ class BufferedDeletesStream {
 
       // System.out.println("  term=" + term);
 
-      if (termsEnum.seekExact(term.bytes(), false)) {
+      if (termsEnum.seekExact(term.bytes())) {
         // we don't need term frequencies for this
         DocsEnum docsEnum = termsEnum.docs(rld.getLiveDocs(), docs, DocsEnum.FLAG_NONE);
         //System.out.println("BDS: got docsEnum=" + docsEnum);

Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java Tue Aug 13 04:06:18 2013
@@ -30,8 +30,8 @@ import java.util.Map;
 
 import org.apache.lucene.codecs.BlockTreeTermsReader;
 import org.apache.lucene.codecs.Codec;
-import org.apache.lucene.codecs.PostingsFormat; // javadocs
-import org.apache.lucene.document.FieldType; // for javadocs
+import org.apache.lucene.codecs.PostingsFormat;
+import org.apache.lucene.index.CheckIndex.Status.DocValuesStatus;
 import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.store.Directory;
@@ -159,19 +159,6 @@ public class CheckIndex {
        *  segment. */
       public double sizeMB;
 
-      /** Doc store offset, if this segment shares the doc
-       *  store files (stored fields and term vectors) with
-       *  other segments.  This is -1 if it does not share. */
-      public int docStoreOffset = -1;
-    
-      /** String of the shared doc store segment, or null if
-       *  this segment does not share the doc store files. */
-      public String docStoreSegment;
-
-      /** True if the shared doc store files are compound file
-       *  format. */
-      public boolean docStoreCompoundFile;
-
       /** True if this segment has pending deletions. */
       public boolean hasDeletions;
 
@@ -297,10 +284,21 @@ public class CheckIndex {
       DocValuesStatus() {
       }
 
-      /** Number of documents tested. */
-      public int docCount;
       /** Total number of docValues tested. */
       public long totalValueFields;
+      
+      /** Total number of numeric fields */
+      public long totalNumericFields;
+      
+      /** Total number of binary fields */
+      public long totalBinaryFields;
+      
+      /** Total number of sorted fields */
+      public long totalSortedFields;
+      
+      /** Total number of sortedset fields */
+      public long totalSortedSetFields;
+      
       /** Exception thrown during doc values test (null on success) */
       public Throwable error = null;
     }
@@ -535,7 +533,7 @@ public class CheckIndex {
         }
         if (infoStream != null)
           infoStream.print("    test: open reader.........");
-        reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, IOContext.DEFAULT);
+        reader = new SegmentReader(info, IOContext.DEFAULT);
 
         segInfoStat.openReaderPassed = true;
 
@@ -1117,7 +1115,7 @@ public class CheckIndex {
             long totDocCountNoDeletes = 0;
             long totDocFreq = 0;
             for(int i=0;i<seekCount;i++) {
-              if (!termsEnum.seekExact(seekTerms[i], true)) {
+              if (!termsEnum.seekExact(seekTerms[i])) {
                 throw new RuntimeException("seek to existing term " + seekTerms[i] + " failed");
               }
               
@@ -1272,7 +1270,7 @@ public class CheckIndex {
       for (FieldInfo fieldInfo : reader.getFieldInfos()) {
         if (fieldInfo.hasDocValues()) {
           status.totalValueFields++;
-          checkDocValues(fieldInfo, reader, infoStream);
+          checkDocValues(fieldInfo, reader, infoStream, status);
         } else {
           if (reader.getBinaryDocValues(fieldInfo.name) != null ||
               reader.getNumericDocValues(fieldInfo.name) != null ||
@@ -1283,7 +1281,11 @@ public class CheckIndex {
         }
       }
 
-      msg(infoStream, "OK [" + status.docCount + " total doc count; " + status.totalValueFields + " docvalues fields]");
+      msg(infoStream, "OK [" + status.totalValueFields + " docvalues fields; "
+                             + status.totalBinaryFields + " BINARY; " 
+                             + status.totalNumericFields + " NUMERIC; "
+                             + status.totalSortedFields + " SORTED; "
+                             + status.totalSortedSetFields + " SORTED_SET]");
     } catch (Throwable e) {
       msg(infoStream, "ERROR [" + String.valueOf(e.getMessage()) + "]");
       status.error = e;
@@ -1382,9 +1384,10 @@ public class CheckIndex {
     }
   }
   
-  private static void checkDocValues(FieldInfo fi, AtomicReader reader, PrintStream infoStream) throws Exception {
+  private static void checkDocValues(FieldInfo fi, AtomicReader reader, PrintStream infoStream, DocValuesStatus status) throws Exception {
     switch(fi.getDocValuesType()) {
       case SORTED:
+        status.totalSortedFields++;
         checkSortedDocValues(fi.name, reader, reader.getSortedDocValues(fi.name));
         if (reader.getBinaryDocValues(fi.name) != null ||
             reader.getNumericDocValues(fi.name) != null ||
@@ -1393,6 +1396,7 @@ public class CheckIndex {
         }
         break;
       case SORTED_SET:
+        status.totalSortedSetFields++;
         checkSortedSetDocValues(fi.name, reader, reader.getSortedSetDocValues(fi.name));
         if (reader.getBinaryDocValues(fi.name) != null ||
             reader.getNumericDocValues(fi.name) != null ||
@@ -1401,6 +1405,7 @@ public class CheckIndex {
         }
         break;
       case BINARY:
+        status.totalBinaryFields++;
         checkBinaryDocValues(fi.name, reader, reader.getBinaryDocValues(fi.name));
         if (reader.getNumericDocValues(fi.name) != null ||
             reader.getSortedDocValues(fi.name) != null ||
@@ -1409,6 +1414,7 @@ public class CheckIndex {
         }
         break;
       case NUMERIC:
+        status.totalNumericFields++;
         checkNumericDocValues(fi.name, reader, reader.getNumericDocValues(fi.name));
         if (reader.getBinaryDocValues(fi.name) != null ||
             reader.getSortedDocValues(fi.name) != null ||
@@ -1543,7 +1549,7 @@ public class CheckIndex {
                 }
 
                 final DocsEnum postingsDocs2;
-                if (!postingsTermsEnum.seekExact(term, true)) {
+                if (!postingsTermsEnum.seekExact(term)) {
                   throw new RuntimeException("vector term=" + term + " field=" + field + " does not exist in postings; doc=" + j);
                 }
                 postingsPostings = postingsTermsEnum.docsAndPositions(null, postingsPostings);
@@ -1677,7 +1683,7 @@ public class CheckIndex {
    *
    * <p><b>WARNING</b>: Make sure you only call this when the
    *  index is not opened  by any writer. */
-  public void fixIndex(Status result, Codec codec) throws IOException {
+  public void fixIndex(Status result) throws IOException {
     if (result.partial)
       throw new IllegalArgumentException("can only fix an index that was fully checked (this status checked a subset of segments)");
     result.newSegments.changed();
@@ -1732,7 +1738,6 @@ public class CheckIndex {
 
     boolean doFix = false;
     boolean doCrossCheckTermVectors = false;
-    Codec codec = Codec.getDefault(); // only used when fixing
     boolean verbose = false;
     List<String> onlySegments = new ArrayList<String>();
     String indexPath = null;
@@ -1744,13 +1749,6 @@ public class CheckIndex {
         doFix = true;
       } else if ("-crossCheckTermVectors".equals(arg)) {
         doCrossCheckTermVectors = true;
-      } else if ("-codec".equals(arg)) {
-        if (i == args.length-1) {
-          System.out.println("ERROR: missing name for -codec option");
-          System.exit(1);
-        }
-        i++;
-        codec = Codec.forName(args[i]);
       } else if (arg.equals("-verbose")) {
         verbose = true;
       } else if (arg.equals("-segment")) {
@@ -1851,7 +1849,7 @@ public class CheckIndex {
           System.out.println("  " + (5-s) + "...");
         }
         System.out.println("Writing...");
-        checker.fixIndex(result, codec);
+        checker.fixIndex(result);
         System.out.println("OK");
         System.out.println("Wrote new segments file \"" + result.newSegments.getSegmentsFileName() + "\"");
       }

Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java Tue Aug 13 04:06:18 2013
@@ -52,9 +52,6 @@ import org.apache.lucene.store.NoSuchDir
 */
 public abstract class DirectoryReader extends BaseCompositeReader<AtomicReader> {
 
-  /** Default termInfosIndexDivisor. */
-  public static final int DEFAULT_TERMS_INDEX_DIVISOR = 1;
-
   /** The index directory. */
   protected final Directory directory;
   
@@ -64,29 +61,7 @@ public abstract class DirectoryReader ex
    * @throws IOException if there is a low-level IO error
    */
   public static DirectoryReader open(final Directory directory) throws IOException {
-    return StandardDirectoryReader.open(directory, null, DEFAULT_TERMS_INDEX_DIVISOR);
-  }
-  
-  /** Expert: Returns a IndexReader reading the index in the given
-   *  Directory with the given termInfosIndexDivisor.
-   * @param directory the index directory
-   * @param termInfosIndexDivisor Subsamples which indexed
-   *  terms are loaded into RAM. This has the same effect as {@link
-   *  IndexWriterConfig#setTermIndexInterval} except that setting
-   *  must be done at indexing time while this setting can be
-   *  set per reader.  When set to N, then one in every
-   *  N*termIndexInterval terms in the index is loaded into
-   *  memory.  By setting this to a value > 1 you can reduce
-   *  memory usage, at the expense of higher latency when
-   *  loading a TermInfo.  The default value is 1.  Set this
-   *  to -1 to skip loading the terms index entirely.
-   *  <b>NOTE:</b> divisor settings &gt; 1 do not apply to all PostingsFormat
-   *  implementations, including the default one in this release. It only makes
-   *  sense for terms indexes that can efficiently re-sample terms at load time.
-   * @throws IOException if there is a low-level IO error
-   */
-  public static DirectoryReader open(final Directory directory, int termInfosIndexDivisor) throws IOException {
-    return StandardDirectoryReader.open(directory, null, termInfosIndexDivisor);
+    return StandardDirectoryReader.open(directory, null);
   }
   
   /**
@@ -118,29 +93,7 @@ public abstract class DirectoryReader ex
    * @throws IOException if there is a low-level IO error
    */
   public static DirectoryReader open(final IndexCommit commit) throws IOException {
-    return StandardDirectoryReader.open(commit.getDirectory(), commit, DEFAULT_TERMS_INDEX_DIVISOR);
-  }
-
-  /** Expert: returns an IndexReader reading the index in the given
-   *  {@link IndexCommit} and termInfosIndexDivisor.
-   * @param commit the commit point to open
-   * @param termInfosIndexDivisor Subsamples which indexed
-   *  terms are loaded into RAM. This has the same effect as {@link
-   *  IndexWriterConfig#setTermIndexInterval} except that setting
-   *  must be done at indexing time while this setting can be
-   *  set per reader.  When set to N, then one in every
-   *  N*termIndexInterval terms in the index is loaded into
-   *  memory.  By setting this to a value > 1 you can reduce
-   *  memory usage, at the expense of higher latency when
-   *  loading a TermInfo.  The default value is 1.  Set this
-   *  to -1 to skip loading the terms index entirely.
-   *  <b>NOTE:</b> divisor settings &gt; 1 do not apply to all PostingsFormat
-   *  implementations, including the default one in this release. It only makes
-   *  sense for terms indexes that can efficiently re-sample terms at load time.
-   * @throws IOException if there is a low-level IO error
-   */
-  public static DirectoryReader open(final IndexCommit commit, int termInfosIndexDivisor) throws IOException {
-    return StandardDirectoryReader.open(commit.getDirectory(), commit, termInfosIndexDivisor);
+    return StandardDirectoryReader.open(commit.getDirectory(), commit);
   }
 
   /**

Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java Tue Aug 13 04:06:18 2013
@@ -659,7 +659,7 @@ public class DocTermOrds {
     }
 
     @Override
-    public SeekStatus seekCeil(BytesRef target, boolean useCache) throws IOException {
+    public SeekStatus seekCeil(BytesRef target) throws IOException {
 
       // already here
       if (term != null && term.equals(target)) {
@@ -729,7 +729,7 @@ public class DocTermOrds {
         //System.out.println("  do seek term=" + base.utf8ToString());
         ord = idx << indexIntervalBits;
         delta = (int) (targetOrd - ord);
-        final TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(base, true);
+        final TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(base);
         assert seekStatus == TermsEnum.SeekStatus.FOUND;
       } else {
         //System.out.println("seek w/in block");

Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java Tue Aug 13 04:06:18 2013
@@ -403,8 +403,8 @@ class DocumentsWriterPerThread {
     ++numDocsInRAM;
   }
 
-  // Buffer a specific docID for deletion.  Currently only
-  // used when we hit a exception when adding a document
+  // Buffer a specific docID for deletion. Currently only
+  // used when we hit an exception when adding a document
   void deleteDocID(int docIDUpto) {
     pendingDeletes.addDocID(docIDUpto);
     // NOTE: we do not trigger flush here.  This is
@@ -468,7 +468,6 @@ class DocumentsWriterPerThread {
     assert deleteSlice == null : "all deletes must be applied in prepareFlush";
     segmentInfo.setDocCount(numDocsInRAM);
     flushState = new SegmentWriteState(infoStream, directory, segmentInfo, fieldInfos.finish(),
-        writer.getConfig().getTermIndexInterval(),
         pendingDeletes, new IOContext(new FlushInfo(numDocsInRAM, bytesUsed())));
     final double startMBUsed = parent.flushControl.netBytes() / 1024. / 1024.;
 

Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThreadPool.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThreadPool.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThreadPool.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThreadPool.java Tue Aug 13 04:06:18 2013
@@ -147,7 +147,10 @@ abstract class DocumentsWriterPerThreadP
   @Override
   public DocumentsWriterPerThreadPool clone() {
     // We should only be cloned before being used:
-    assert numThreadStatesActive == 0;
+    if (numThreadStatesActive != 0) {
+      throw new IllegalStateException("clone this object before it is used!");
+    }
+    
     DocumentsWriterPerThreadPool clone;
     try {
       clone = (DocumentsWriterPerThreadPool) super.clone();

Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java Tue Aug 13 04:06:18 2013
@@ -157,8 +157,8 @@ public class FilterAtomicReader extends 
     }
 
     @Override
-    public SeekStatus seekCeil(BytesRef text, boolean useCache) throws IOException {
-      return in.seekCeil(text, useCache);
+    public SeekStatus seekCeil(BytesRef text) throws IOException {
+      return in.seekCeil(text);
     }
 
     @Override

Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java Tue Aug 13 04:06:18 2013
@@ -154,7 +154,7 @@ public abstract class FilteredTermsEnum 
    *         support seeking.
    */
   @Override
-  public boolean seekExact(BytesRef term, boolean useCache) throws IOException {
+  public boolean seekExact(BytesRef term) throws IOException {
     throw new UnsupportedOperationException(getClass().getName()+" does not support seeking");
   }
 
@@ -163,7 +163,7 @@ public abstract class FilteredTermsEnum 
    *         support seeking.
    */
   @Override
-  public SeekStatus seekCeil(BytesRef term, boolean useCache) throws IOException {
+  public SeekStatus seekCeil(BytesRef term) throws IOException {
     throw new UnsupportedOperationException(getClass().getName()+" does not support seeking");
   }
 
@@ -222,7 +222,7 @@ public abstract class FilteredTermsEnum 
         //System.out.println("  seek to t=" + (t == null ? "null" : t.utf8ToString()) + " tenum=" + tenum);
         // Make sure we always seek forward:
         assert actualTerm == null || t == null || getComparator().compare(t, actualTerm) > 0: "curTerm=" + actualTerm + " seekTerm=" + t;
-        if (t == null || tenum.seekCeil(t, false) == SeekStatus.END) {
+        if (t == null || tenum.seekCeil(t) == SeekStatus.END) {
           // no more terms to seek to or enum exhausted
           //System.out.println("  return null");
           return null;

Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/FlushByRamOrCountsPolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/FlushByRamOrCountsPolicy.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/FlushByRamOrCountsPolicy.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/FlushByRamOrCountsPolicy.java Tue Aug 13 04:06:18 2013
@@ -20,23 +20,32 @@ package org.apache.lucene.index;
 import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState;
 
 /**
- * Default {@link FlushPolicy} implementation that flushes based on RAM used,
- * document count and number of buffered deletes depending on the IndexWriter's
- * {@link IndexWriterConfig}.
+ * Default {@link FlushPolicy} implementation that flushes new segments based on
+ * RAM used and document count depending on the IndexWriter's
+ * {@link IndexWriterConfig}. It also applies pending deletes based on the
+ * number of buffered delete terms.
  * 
  * <ul>
- * <li>{@link #onDelete(DocumentsWriterFlushControl, DocumentsWriterPerThreadPool.ThreadState)} - flushes
- * based on the global number of buffered delete terms iff
- * {@link IndexWriterConfig#getMaxBufferedDeleteTerms()} is enabled</li>
- * <li>{@link #onInsert(DocumentsWriterFlushControl, DocumentsWriterPerThreadPool.ThreadState)} - flushes
- * either on the number of documents per {@link DocumentsWriterPerThread} (
+ * <li>
+ * {@link #onDelete(DocumentsWriterFlushControl, DocumentsWriterPerThreadPool.ThreadState)}
+ * - applies pending delete operations based on the global number of buffered
+ * delete terms iff {@link IndexWriterConfig#getMaxBufferedDeleteTerms()} is
+ * enabled</li>
+ * <li>
+ * {@link #onInsert(DocumentsWriterFlushControl, DocumentsWriterPerThreadPool.ThreadState)}
+ * - flushes either on the number of documents per
+ * {@link DocumentsWriterPerThread} (
  * {@link DocumentsWriterPerThread#getNumDocsInRAM()}) or on the global active
  * memory consumption in the current indexing session iff
  * {@link IndexWriterConfig#getMaxBufferedDocs()} or
  * {@link IndexWriterConfig#getRAMBufferSizeMB()} is enabled respectively</li>
- * <li>{@link #onUpdate(DocumentsWriterFlushControl, DocumentsWriterPerThreadPool.ThreadState)} - calls
- * {@link #onInsert(DocumentsWriterFlushControl, DocumentsWriterPerThreadPool.ThreadState)} and
- * {@link #onDelete(DocumentsWriterFlushControl, DocumentsWriterPerThreadPool.ThreadState)} in order</li>
+ * <li>
+ * {@link #onUpdate(DocumentsWriterFlushControl, DocumentsWriterPerThreadPool.ThreadState)}
+ * - calls
+ * {@link #onInsert(DocumentsWriterFlushControl, DocumentsWriterPerThreadPool.ThreadState)}
+ * and
+ * {@link #onDelete(DocumentsWriterFlushControl, DocumentsWriterPerThreadPool.ThreadState)}
+ * in order</li>
  * </ul>
  * All {@link IndexWriterConfig} settings are used to mark
  * {@link DocumentsWriterPerThread} as flush pending during indexing with

Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/FlushPolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/FlushPolicy.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/FlushPolicy.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/FlushPolicy.java Tue Aug 13 04:06:18 2013
@@ -32,18 +32,19 @@ import org.apache.lucene.util.SetOnce;
  * {@link IndexWriterConfig#setRAMBufferSizeMB(double)}</li>
  * <li>Number of RAM resident documents - configured via
  * {@link IndexWriterConfig#setMaxBufferedDocs(int)}</li>
- * <li>Number of buffered delete terms/queries - configured via
- * {@link IndexWriterConfig#setMaxBufferedDeleteTerms(int)}</li>
  * </ul>
- * 
- * The {@link IndexWriter} consults a provided {@link FlushPolicy} to control the
- * flushing process. The policy is informed for each added or
- * updated document as well as for each delete term. Based on the
- * {@link FlushPolicy}, the information provided via {@link ThreadState} and
+ * The policy also applies pending delete operations (by term and/or query),
+ * given the threshold set in
+ * {@link IndexWriterConfig#setMaxBufferedDeleteTerms(int)}.
+ * <p>
+ * {@link IndexWriter} consults the provided {@link FlushPolicy} to control the
+ * flushing process. The policy is informed for each added or updated document
+ * as well as for each delete term. Based on the {@link FlushPolicy}, the
+ * information provided via {@link ThreadState} and
  * {@link DocumentsWriterFlushControl}, the {@link FlushPolicy} decides if a
- * {@link DocumentsWriterPerThread} needs flushing and mark it as
- * flush-pending via
- * {@link DocumentsWriterFlushControl#setFlushPending(DocumentsWriterPerThreadPool.ThreadState)}.
+ * {@link DocumentsWriterPerThread} needs flushing and mark it as flush-pending
+ * via {@link DocumentsWriterFlushControl#setFlushPending}, or if deletes need
+ * to be applied.
  * 
  * @see ThreadState
  * @see DocumentsWriterFlushControl

Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java Tue Aug 13 04:06:18 2013
@@ -630,15 +630,13 @@ public class IndexWriter implements Clos
 
   /**
    * Constructs a new IndexWriter per the settings given in <code>conf</code>.
-   * Note that the passed in {@link IndexWriterConfig} is
-   * privately cloned, which, in-turn, clones the
-   * {@link IndexWriterConfig#getFlushPolicy() flush policy},
-   * {@link IndexWriterConfig#getIndexDeletionPolicy() deletion policy},
-   * {@link IndexWriterConfig#getMergePolicy() merge policy},
-   * and {@link IndexWriterConfig#getMergeScheduler() merge scheduler}.
-   * If you need to make subsequent "live"
-   * changes to the configuration use {@link #getConfig}.
+   * If you want to make "live" changes to this writer instance, use
+   * {@link #getConfig()}.
+   * 
    * <p>
+   * <b>NOTE:</b> after ths writer is created, the given configuration instance
+   * cannot be passed to another writer. If you intend to do so, you should
+   * {@link IndexWriterConfig#clone() clone} it beforehand.
    * 
    * @param d
    *          the index directory. The index is either created or appended
@@ -653,7 +651,8 @@ public class IndexWriter implements Clos
    *           IO error
    */
   public IndexWriter(Directory d, IndexWriterConfig conf) throws IOException {
-    config = new LiveIndexWriterConfig(conf.clone());
+    conf.setIndexWriter(this); // prevent reuse by other instances
+    config = new LiveIndexWriterConfig(conf);
     directory = d;
     analyzer = config.getAnalyzer();
     infoStream = config.getInfoStream();
@@ -2429,15 +2428,16 @@ public class IndexWriter implements Clos
    * close the writer. See <a href="#OOME">above</a> for details.
    * 
    * <p>
+   * <b>NOTE:</b> empty segments are dropped by this method and not added to this
+   * index.
+   * 
+   * <p>
    * <b>NOTE:</b> this method merges all given {@link IndexReader}s in one
    * merge. If you intend to merge a large number of readers, it may be better
    * to call this method multiple times, each time with a small set of readers.
    * In principle, if you use a merge policy with a {@code mergeFactor} or
    * {@code maxMergeAtOnce} parameter, you should pass that many readers in one
-   * call. Also, if the given readers are {@link DirectoryReader}s, they can be
-   * opened with {@code termIndexInterval=-1} to save RAM, since during merge
-   * the in-memory structure is not used. See
-   * {@link DirectoryReader#open(Directory, int)}.
+   * call.
    * 
    * <p>
    * <b>NOTE</b>: if you call {@link #close(boolean)} with <tt>false</tt>, which
@@ -2462,11 +2462,20 @@ public class IndexWriter implements Clos
       String mergedName = newSegmentName();
       final List<AtomicReader> mergeReaders = new ArrayList<AtomicReader>();
       for (IndexReader indexReader : readers) {
-        numDocs += indexReader.numDocs();
-        for (AtomicReaderContext ctx : indexReader.leaves()) {
-          mergeReaders.add(ctx.reader());
+        if (indexReader.numDocs() > 0) {
+          numDocs += indexReader.numDocs();
+          for (AtomicReaderContext ctx : indexReader.leaves()) {
+            if (ctx.reader().numDocs() > 0) { // drop empty (or all deleted) segments
+              mergeReaders.add(ctx.reader());
+            }
+          }
         }
       }
+      
+      if (mergeReaders.isEmpty()) { // no segments with documents to add
+        return;
+      }
+      
       final IOContext context = new IOContext(new MergeInfo(numDocs, -1, true, -1));
 
       // TODO: somehow we should fix this merge so it's
@@ -2476,7 +2485,7 @@ public class IndexWriter implements Clos
       SegmentInfo info = new SegmentInfo(directory, Constants.LUCENE_MAIN_VERSION, mergedName, -1,
                                          false, codec, null, null);
 
-      SegmentMerger merger = new SegmentMerger(mergeReaders, info, infoStream, trackingDir, config.getTermIndexInterval(),
+      SegmentMerger merger = new SegmentMerger(mergeReaders, info, infoStream, trackingDir,
                                                MergeState.CheckAbort.NONE, globalFieldNumberMap, context);
 
       MergeState mergeState;
@@ -3658,7 +3667,7 @@ public class IndexWriter implements Clos
         // Hold onto the "live" reader; we will use this to
         // commit merged deletes
         final ReadersAndLiveDocs rld = readerPool.get(info, true);
-        SegmentReader reader = rld.getMergeReader(context);
+        SegmentReader reader = rld.getReader(context);
         assert reader != null;
 
         // Carefully pull the most recent live docs:
@@ -3715,7 +3724,7 @@ public class IndexWriter implements Clos
       // we pass merge.getMergeReaders() instead of merge.readers to allow the
       // OneMerge to return a view over the actual segments to merge
       final SegmentMerger merger = new SegmentMerger(merge.getMergeReaders(),
-          merge.info.info, infoStream, dirWrapper, config.getTermIndexInterval(),
+          merge.info.info, infoStream, dirWrapper,
           checkAbort, globalFieldNumberMap, context);
 
       merge.checkAborted(directory);

Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java?rev=1513336&r1=1513335&r2=1513336&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java Tue Aug 13 04:06:18 2013
@@ -26,6 +26,8 @@ import org.apache.lucene.index.IndexWrit
 import org.apache.lucene.search.similarities.Similarity;
 import org.apache.lucene.util.InfoStream;
 import org.apache.lucene.util.PrintStreamInfoStream;
+import org.apache.lucene.util.SetOnce;
+import org.apache.lucene.util.SetOnce.AlreadySetException;
 import org.apache.lucene.util.Version;
 
 /**
@@ -70,9 +72,6 @@ public final class IndexWriterConfig ext
     CREATE_OR_APPEND 
   }
 
-  /** Default value is 32. Change using {@link #setTermIndexInterval(int)}. */
-  public static final int DEFAULT_TERM_INDEX_INTERVAL = 32; // TODO: this should be private to the codec, not settable here
-
   /** Denotes a flush trigger is disabled. */
   public final static int DISABLE_AUTO_FLUSH = -1;
 
@@ -98,9 +97,6 @@ public final class IndexWriterConfig ext
   /** Default setting for {@link #setReaderPooling}. */
   public final static boolean DEFAULT_READER_POOLING = false;
 
-  /** Default value is 1. Change using {@link #setReaderTermsIndexDivisor(int)}. */
-  public static final int DEFAULT_READER_TERMS_INDEX_DIVISOR = DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR;
-
   /** Default value is 1945. Change using {@link #setRAMPerThreadHardLimitMB(int)} */
   public static final int DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB = 1945;
   
@@ -132,6 +128,21 @@ public final class IndexWriterConfig ext
     return WRITE_LOCK_TIMEOUT;
   }
 
+  // indicates whether this config instance is already attached to a writer.
+  // not final so that it can be cloned properly.
+  private SetOnce<IndexWriter> writer = new SetOnce<IndexWriter>();
+  
+  /**
+   * Sets the {@link IndexWriter} this config is attached to.
+   * 
+   * @throws AlreadySetException
+   *           if this config is already attached to a writer.
+   */
+  IndexWriterConfig setIndexWriter(IndexWriter writer) {
+    this.writer.set(writer);
+    return this;
+  }
+  
   /**
    * Creates a new config that with defaults that match the specified
    * {@link Version} as well as the default {@link
@@ -152,6 +163,8 @@ public final class IndexWriterConfig ext
     try {
       IndexWriterConfig clone = (IndexWriterConfig) super.clone();
       
+      clone.writer = writer.clone();
+      
       // Mostly shallow clone, but do a deepish clone of
       // certain objects that have state that cannot be shared
       // across IW instances:
@@ -484,16 +497,6 @@ public final class IndexWriterConfig ext
     return super.getRAMBufferSizeMB();
   }
   
-  @Override
-  public int getReaderTermsIndexDivisor() {
-    return super.getReaderTermsIndexDivisor();
-  }
-  
-  @Override
-  public int getTermIndexInterval() {
-    return super.getTermIndexInterval();
-  }
-  
   /** If non-null, information about merges, deletes and a
    * message when maxFieldLength is reached will be printed
    * to this.
@@ -536,17 +539,15 @@ public final class IndexWriterConfig ext
   }
   
   @Override
-  public IndexWriterConfig setReaderTermsIndexDivisor(int divisor) {
-    return (IndexWriterConfig) super.setReaderTermsIndexDivisor(divisor);
-  }
-  
-  @Override
-  public IndexWriterConfig setTermIndexInterval(int interval) {
-    return (IndexWriterConfig) super.setTermIndexInterval(interval);
-  }
-  
   public IndexWriterConfig setUseCompoundFile(boolean useCompoundFile) {
     return (IndexWriterConfig) super.setUseCompoundFile(useCompoundFile);
   }
 
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder(super.toString());
+    sb.append("writer=").append(writer).append("\n");
+    return sb.toString();
+  }
+  
 }



Mime
View raw message