lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r1231794 [2/3] - in /lucene/dev/trunk: lucene/ lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/ lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/ lucene/contrib/memory/src/java/org/apache/l...
Date Sun, 15 Jan 2012 23:17:47 GMT
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/CheckIndex.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/CheckIndex.java Sun Jan 15 23:17:45 2012
@@ -289,6 +289,20 @@ public class CheckIndex {
     infoStream = null;
   }
 
+  private boolean crossCheckTermVectors;
+
+  /** If true, term vectors are compared against postings to
+   *  make sure they are the same.  This will likely
+   *  drastically increase time it takes to run CheckIndex! */
+  public void setCrossCheckTermVectors(boolean v) {
+    crossCheckTermVectors = v;
+  }
+
+  /** See {@link #setCrossCheckTermVectors}. */
+  public boolean getCrossCheckTermVectors() {
+    return crossCheckTermVectors;
+  }
+
   private boolean verbose;
 
   /** Set infoStream where messages should go.  If null, no
@@ -563,7 +577,7 @@ public class CheckIndex {
         segInfoStat.fieldNormStatus = testFieldNorms(fieldInfos, reader);
 
         // Test the Term Index
-        segInfoStat.termIndexStatus = testTermIndex(reader);
+        segInfoStat.termIndexStatus = testPostings(reader);
 
         // Test Stored Fields
         segInfoStat.storedFieldStatus = testStoredFields(info, reader, nf);
@@ -678,7 +692,11 @@ public class CheckIndex {
   /**
    * Test the term index.
    */
-  private Status.TermIndexStatus testTermIndex(SegmentReader reader) {
+  private Status.TermIndexStatus testPostings(SegmentReader reader) {
+
+    // TODO: we should go and verify term vectors match, if
+    // crossCheckTermVectors is on...
+
     final Status.TermIndexStatus status = new Status.TermIndexStatus();
 
     final int maxDoc = reader.maxDoc();
@@ -760,7 +778,7 @@ public class CheckIndex {
 
           docs = termsEnum.docs(liveDocs, docs, false);
           docsAndFreqs = termsEnum.docs(liveDocs, docsAndFreqs, true);
-          postings = termsEnum.docsAndPositions(liveDocs, postings);
+          postings = termsEnum.docsAndPositions(liveDocs, postings, false);
 
           if (hasOrd) {
             long ord = -1;
@@ -890,7 +908,7 @@ public class CheckIndex {
           if (hasPositions) {
             for(int idx=0;idx<7;idx++) {
               final int skipDocID = (int) (((idx+1)*(long) maxDoc)/8);
-              postings = termsEnum.docsAndPositions(liveDocs, postings);
+              postings = termsEnum.docsAndPositions(liveDocs, postings, false);
               final int docID = postings.advance(skipDocID);
               if (docID == DocsEnum.NO_MORE_DOCS) {
                 break;
@@ -1256,7 +1274,10 @@ public class CheckIndex {
   private Status.TermVectorStatus testTermVectors(SegmentInfo info, SegmentReader reader, NumberFormat format) {
     final Status.TermVectorStatus status = new Status.TermVectorStatus();
     
-    TermsEnum termsEnum = null;
+    // TODO: in theory we could test that term vectors have
+    // same terms/pos/offsets as the postings, but it'd be
+    // very slow...
+
     try {
       if (infoStream != null) {
         infoStream.print("    test: term vectors........");
@@ -1264,9 +1285,25 @@ public class CheckIndex {
 
       // TODO: maybe we can factor out testTermIndex and reuse here?
       DocsEnum docs = null;
-      DocsEnum docsAndFreqs = null;
       DocsAndPositionsEnum postings = null;
+
+      // Only used if crossCheckTermVectors is true:
+      DocsEnum postingsDocs = null;
+      DocsAndPositionsEnum postingsPostings = null;
+
       final Bits liveDocs = reader.getLiveDocs();
+
+      final Fields postingsFields;
+      // TODO: testTermsIndex
+      if (crossCheckTermVectors) {
+        postingsFields = reader.fields();
+      } else {
+        postingsFields = null;
+      }
+
+      TermsEnum termsEnum = null;
+      TermsEnum postingsTermsEnum = null;
+
       for (int j = 0; j < info.docCount; ++j) {
         if (liveDocs == null || liveDocs.get(j)) {
           status.docCount++;
@@ -1290,6 +1327,16 @@ public class CheckIndex {
               
               Terms terms = tfv.terms(field);
               termsEnum = terms.iterator(termsEnum);
+
+              if (crossCheckTermVectors) {
+                Terms postingsTerms = postingsFields.terms(field);
+                if (postingsTerms == null) {
+                  throw new RuntimeException("vector field=" + field + " does not exist in postings; doc=" + j);
+                }
+                postingsTermsEnum = postingsTerms.iterator(postingsTermsEnum);
+              } else {
+                postingsTermsEnum = null;
+              }
               
               long tfvComputedTermCountForField = 0;
               long tfvComputedSumTotalTermFreq = 0;
@@ -1308,52 +1355,171 @@ public class CheckIndex {
                   throw new RuntimeException("totalTermFreq: " + totalTermFreq + " is out of bounds");
                 }
 
-                postings = termsEnum.docsAndPositions(null, postings);
+                final boolean hasPositions;
+                final boolean hasOffsets;
+                final boolean hasFreqs;
+
+                // TODO: really we need a reflection/query
+                // API so we can just ask what was indexed
+                // instead of "probing"...
+
+                // Try offsets:
+                postings = termsEnum.docsAndPositions(null, postings, true);
                 if (postings == null) {
-                  docsAndFreqs = termsEnum.docs(null, docsAndFreqs, true);
-                  if (docsAndFreqs == null) {
-                    docs = termsEnum.docs(null, docs, false);
+                  hasOffsets = false;
+                  // Try only positions:
+                  postings = termsEnum.docsAndPositions(null, postings, false);
+                  if (postings == null) {
+                    hasPositions = false;
+                    // Try docIDs & freqs:
+                    docs = termsEnum.docs(null, docs, true);
+                    if (docs == null) {
+                      // OK, only docIDs:
+                      hasFreqs = false;
+                      docs = termsEnum.docs(null, docs, false);
+                    } else {
+                      hasFreqs = true;
+                    }
+                  } else {
+                    hasPositions = true;
+                    hasFreqs = true;
+                  }
+                } else {
+                  hasOffsets = true;
+                  // NOTE: may be a lie... but we accept -1 below
+                  hasPositions = true;
+                  hasFreqs = true;
+                }
+
+                final DocsEnum docs2;
+                if (hasPositions || hasOffsets) {
+                  assert postings != null;
+                  docs2 = postings;
+                } else {
+                  assert docs != null;
+                  docs2 = docs;
+                }
+
+                final DocsEnum postingsDocs2;
+                final boolean postingsHasFreq;
+                if (crossCheckTermVectors) {
+                  if (!postingsTermsEnum.seekExact(term, true)) {
+                    throw new RuntimeException("vector term=" + term + " field=" + field + " does not exist in postings; doc=" + j);
+                  }
+                  postingsPostings = postingsTermsEnum.docsAndPositions(null, postingsPostings, true);
+                  if (postingsPostings == null) {
+                    // Term vectors were indexed w/ offsets but postings were not
+                    postingsPostings = postingsTermsEnum.docsAndPositions(null, postingsPostings, false);
+                    if (postingsPostings == null) {
+                      postingsDocs = postingsTermsEnum.docs(null, postingsDocs, true);
+                      if (postingsDocs == null) {
+                        postingsHasFreq = false;
+                        postingsDocs = postingsTermsEnum.docs(null, postingsDocs, false);
+                        if (postingsDocs == null) {
+                          throw new RuntimeException("vector term=" + term + " field=" + field + " does not exist in postings; doc=" + j);
+                        }
+                      } else {
+                        postingsHasFreq = true;
+                      }
+                    } else {
+                      postingsHasFreq = true;
+                    }
                   } else {
-                    docs = docsAndFreqs;
+                    postingsHasFreq = true;
+                  }
+
+                  if (postingsPostings != null) {
+                    postingsDocs2 = postingsPostings;
+                  } else {
+                    postingsDocs2 = postingsDocs;
+                  }
+                  
+                  final int advanceDoc = postingsDocs2.advance(j);
+                  if (advanceDoc != j) {
+                    throw new RuntimeException("vector term=" + term + " field=" + field + ": doc=" + j + " was not found in postings (got: " + advanceDoc + ")");
                   }
                 } else {
-                  docs = docsAndFreqs = postings;
+                  postingsDocs2 = null;
+                  postingsHasFreq = false;
                 }
 
-                final int doc = docs.nextDoc();
+                final int doc = docs2.nextDoc();
                   
                 if (doc != 0) {
                   throw new RuntimeException("vector for doc " + j + " didn't return docID=0: got docID=" + doc);
                 }
 
-                if (docsAndFreqs != null) {
-                  final int tf = docsAndFreqs.freq();
+                if (hasFreqs) {
+                  final int tf = docs2.freq();
                   if (tf <= 0) {
                     throw new RuntimeException("vector freq " + tf + " is out of bounds");
                   }
                   if (totalTermFreq != -1 && totalTermFreq != tf) {
                     throw new RuntimeException("vector totalTermFreq " + totalTermFreq + " != tf " + tf);
                   }
+                  if (crossCheckTermVectors && postingsHasFreq) {
+                    if (postingsDocs2.freq() != tf) {
+                      throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + ": freq=" + tf + " differs from postings freq=" + postingsDocs2.freq());
+                    }
+                  }
                   tfvComputedSumTotalTermFreq += tf;
                 
-                  if (postings != null) {
+                  if (hasPositions || hasOffsets) {
                     int lastPosition = -1;
+                    //int lastStartOffset = -1;
                     for (int i = 0; i < tf; i++) {
                       int pos = postings.nextPosition();
-                      if (pos != -1 && pos < 0) {
-                        throw new RuntimeException("vector position " + pos + " is out of bounds");
-                      }
+                      if (hasPositions) {
+                        if (pos != -1 && pos < 0) {
+                          throw new RuntimeException("vector position " + pos + " is out of bounds");
+                        }
+                        if (pos < lastPosition) {
+                          throw new RuntimeException("vector position " + pos + " < lastPos " + lastPosition);
+                        }
                     
-                      if (pos < lastPosition) {
-                        throw new RuntimeException("vector position " + pos + " < lastPos " + lastPosition);
+                        lastPosition = pos;
+                      }
+
+                      if (crossCheckTermVectors && postingsPostings != null) {
+                        int postingsPos = postingsPostings.nextPosition();
+                        if (pos != -1 && postingsPos != -1 && pos != postingsPos) {
+                          throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + ": pos=" + pos + " differs from postings pos=" + postingsPos);
+                        }
+                      }
+
+                      if (hasOffsets) {
+                        // Call the methods to at least make
+                        // sure they don't throw exc:
+                        final int startOffset = postings.startOffset();
+                        final int endOffset = postings.endOffset();
+                        // TODO: these are too anal...?
+                        /*
+                        if (endOffset < startOffset) {
+                          throw new RuntimeException("vector startOffset=" + startOffset + " is > endOffset=" + endOffset);
+                        }
+                        if (startOffset < lastStartOffset) {
+                          throw new RuntimeException("vector startOffset=" + startOffset + " is < prior startOffset=" + lastStartOffset);
+                        }
+                        lastStartOffset = startOffset;
+                        */
+
+                        if (crossCheckTermVectors && postingsPostings != null) {
+                          final int postingsStartOffset = postingsPostings.startOffset();
+
+                          final int postingsEndOffset = postingsPostings.endOffset();
+                          if (startOffset != -1 && postingsStartOffset != -1 && startOffset != postingsStartOffset) {
+                            throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + ": startOffset=" + startOffset + " differs from postings startOffset=" + postingsStartOffset);
+                          }
+                          if (endOffset != -1 && postingsEndOffset != -1 && endOffset != postingsEndOffset) {
+                            throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + ": endOffset=" + endOffset + " differs from postings endOffset=" + postingsEndOffset);
+                          }
+                        }
                       }
-                    
-                      lastPosition = pos;
                     }
                   }
                 }
                   
-                if (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
+                if (docs2.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
                   throw new RuntimeException("vector for doc " + j + " references multiple documents!");
                 }
               }
@@ -1474,6 +1640,7 @@ public class CheckIndex {
   public static void main(String[] args) throws IOException, InterruptedException {
 
     boolean doFix = false;
+    boolean doCrossCheckTermVectors = false;
     Codec codec = Codec.getDefault(); // only used when fixing
     boolean verbose = false;
     List<String> onlySegments = new ArrayList<String>();
@@ -1484,6 +1651,8 @@ public class CheckIndex {
       String arg = args[i];
       if ("-fix".equals(arg)) {
         doFix = true;
+      } else if ("-crossCheckTermVectors".equals(arg)) {
+        doCrossCheckTermVectors = true;
       } else if ("-codec".equals(arg)) {
         if (i == args.length-1) {
           System.out.println("ERROR: missing name for -codec option");
@@ -1519,9 +1688,10 @@ public class CheckIndex {
 
     if (indexPath == null) {
       System.out.println("\nERROR: index path not specified");
-      System.out.println("\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y] [-dir-impl X]\n" +
+      System.out.println("\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-crossCheckTermVectors] [-segment X] [-segment Y] [-dir-impl X]\n" +
                          "\n" +
                          "  -fix: actually write a new segments_N file, removing any problematic segments\n" +
+                         "  -crossCheckTermVectors: verifies that term vectors match postings; THIS IS VERY SLOW!\n" +
                          "  -codec X: when fixing, codec to write the new segments_N file with\n" +
                          "  -verbose: print additional details\n" +
                          "  -segment X: only check the specified segments.  This can be specified multiple\n" + 
@@ -1570,6 +1740,7 @@ public class CheckIndex {
     }
 
     CheckIndex checker = new CheckIndex(dir);
+    checker.setCrossCheckTermVectors(doCrossCheckTermVectors);
     checker.setInfoStream(System.out, verbose);
 
     Status result = checker.checkIndex(onlySegments);

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java Sun Jan 15 23:17:45 2012
@@ -73,8 +73,9 @@ final class DocInverterPerField extends 
       // tokenized.
       if (field.fieldType().indexed() && doInvert) {
 
-        if (i > 0)
+        if (i > 0) {
           fieldState.position += docState.analyzer == null ? 0 : docState.analyzer.getPositionIncrementGap(fieldInfo.name);
+        }
 
         final TokenStream stream = field.tokenStream(docState.analyzer);
         // reset the TokenStream to the first token

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocTermOrds.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocTermOrds.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocTermOrds.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocTermOrds.java Sun Jan 15 23:17:45 2012
@@ -655,8 +655,8 @@ public class DocTermOrds {
     }
 
     @Override    
-    public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
-      return termsEnum.docsAndPositions(liveDocs, reuse);
+    public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
+      return termsEnum.docsAndPositions(liveDocs, reuse, needsOffsets);
     }
 
     @Override

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocsAndPositionsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocsAndPositionsEnum.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocsAndPositionsEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocsAndPositionsEnum.java Sun Jan 15 23:17:45 2012
@@ -26,9 +26,20 @@ public abstract class DocsAndPositionsEn
 
   /** Returns the next position.  You should only call this
    *  up to {@link DocsEnum#freq()} times else
-   *  the behavior is not defined. */
+   *  the behavior is not defined.  If positions were not
+   *  indexed this will return -1; this only happens if
+   *  offsets were indexed and you passed needsOffset=true
+   *  when pulling the enum.  */
   public abstract int nextPosition() throws IOException;
 
+  /** Returns start offset for the current position, or -1
+   *  if offsets were not indexed. */
+  public abstract int startOffset() throws IOException;
+
+  /** Returns end offset for the current position, or -1 if
+   *  offsets were not indexed. */
+  public abstract int endOffset() throws IOException;
+
   /** Returns the payload at this position, or null if no
    *  payload was indexed.  Only call this once per
    *  position. */

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfo.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfo.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfo.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfo.java Sun Jan 15 23:17:45 2012
@@ -38,13 +38,18 @@ public final class FieldInfo {
    * @lucene.experimental
    */
   public static enum IndexOptions { 
+    // NOTE: order is important here; FieldInfo uses this
+    // order to merge two conflicting IndexOptions (always
+    // "downgrades" by picking the lowest).
     /** only documents are indexed: term frequencies and positions are omitted */
     // TODO: maybe rename to just DOCS?
     DOCS_ONLY,
     /** only documents and term frequencies are indexed: positions are omitted */  
     DOCS_AND_FREQS,
-    /** full postings: documents, frequencies, and positions */
-    DOCS_AND_FREQS_AND_POSITIONS 
+    /** documents, frequencies and positions */
+    DOCS_AND_FREQS_AND_POSITIONS,
+    /** documents, frequencies, positions and offsets */
+    DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS,
   };
 
   /**
@@ -67,7 +72,7 @@ public final class FieldInfo {
       this.omitNorms = false;
       this.indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
     }
-    assert indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !storePayloads;
+    assert indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !storePayloads;
   }
   
   @Override
@@ -95,10 +100,13 @@ public final class FieldInfo {
       if (this.indexOptions != indexOptions) {
         // downgrade
         this.indexOptions = this.indexOptions.compareTo(indexOptions) < 0 ? this.indexOptions : indexOptions;
-        this.storePayloads = false;
+        if (this.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
+          // cannot store payloads if we don't store positions:
+          this.storePayloads = false;
+        }
       }
     }
-    assert this.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !this.storePayloads;
+    assert this.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !this.storePayloads;
   }
 
   void setDocValuesType(DocValues.Type v) {

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfos.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfos.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfos.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfos.java Sun Jan 15 23:17:45 2012
@@ -185,7 +185,7 @@ public final class FieldInfos implements
     }
     // mutable FIs must check!
     for (FieldInfo fi : this) {
-      if (fi.isIndexed && fi.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+      if (fi.isIndexed && fi.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
         return true;
       }
     }
@@ -430,7 +430,7 @@ public final class FieldInfos implements
       FieldInfo clone = (FieldInfo) (fieldInfo).clone();
       roFis.putInternal(clone);
       roFis.hasVectors |= clone.storeTermVector;
-      roFis.hasProx |= clone.isIndexed && clone.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
+      roFis.hasProx |= clone.isIndexed && clone.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
       roFis.hasFreq |= clone.isIndexed && clone.indexOptions != IndexOptions.DOCS_ONLY;
     }
     return roFis;

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java Sun Jan 15 23:17:45 2012
@@ -176,8 +176,8 @@ public class FilterIndexReader extends I
     }
 
     @Override
-    public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
-      return in.docsAndPositions(liveDocs, reuse);
+    public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
+      return in.docsAndPositions(liveDocs, reuse, needsOffsets);
     }
 
     @Override
@@ -259,6 +259,16 @@ public class FilterIndexReader extends I
     }
 
     @Override
+    public int startOffset() throws IOException {
+      return in.startOffset();
+    }
+
+    @Override
+    public int endOffset() throws IOException {
+      return in.endOffset();
+    }
+
+    @Override
     public BytesRef getPayload() throws IOException {
       return in.getPayload();
     }

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FilteredTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FilteredTermsEnum.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FilteredTermsEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FilteredTermsEnum.java Sun Jan 15 23:17:45 2012
@@ -171,8 +171,8 @@ public abstract class FilteredTermsEnum 
   }
     
   @Override
-  public DocsAndPositionsEnum docsAndPositions(Bits bits, DocsAndPositionsEnum reuse) throws IOException {
-    return tenum.docsAndPositions(bits, reuse);
+  public DocsAndPositionsEnum docsAndPositions(Bits bits, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
+    return tenum.docsAndPositions(bits, reuse, needsOffsets);
   }
   
   /** This enum does not support seeking!

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java Sun Jan 15 23:17:45 2012
@@ -83,7 +83,7 @@ final class FreqProxTermsWriter extends 
         
         // Aggregate the storePayload as seen by the same
         // field across multiple threads
-        if (fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+        if (fieldInfo.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
           fieldInfo.storePayloads |= fieldWriter.hasPayloads;
         }
         

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java Sun Jan 15 23:17:45 2012
@@ -21,6 +21,7 @@ import java.io.IOException;
 import java.util.Comparator;
 import java.util.Map;
 
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
 import org.apache.lucene.codecs.FieldsConsumer;
 import org.apache.lucene.codecs.PostingsConsumer;
@@ -43,7 +44,11 @@ final class FreqProxTermsWriterPerField 
   final DocumentsWriterPerThread.DocState docState;
   final FieldInvertState fieldState;
   IndexOptions indexOptions;
+  private boolean writeFreq;
+  private boolean writeProx;
+  private boolean writeOffsets;
   PayloadAttribute payloadAttribute;
+  OffsetAttribute offsetAttribute;
 
   public FreqProxTermsWriterPerField(TermsHashPerField termsHashPerField, FreqProxTermsWriter parent, FieldInfo fieldInfo) {
     this.termsHashPerField = termsHashPerField;
@@ -51,15 +56,16 @@ final class FreqProxTermsWriterPerField 
     this.fieldInfo = fieldInfo;
     docState = termsHashPerField.docState;
     fieldState = termsHashPerField.fieldState;
-    indexOptions = fieldInfo.indexOptions;
+    setIndexOptions(fieldInfo.indexOptions);
   }
 
   @Override
   int getStreamCount() {
-    if (fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
+    if (!writeProx) {
       return 1;
-    else
+    } else {
       return 2;
+    }
   }
 
   @Override
@@ -74,13 +80,21 @@ final class FreqProxTermsWriterPerField 
     return fieldInfo.name.compareTo(other.fieldInfo.name);
   }
 
+  // Called after flush
   void reset() {
     // Record, up front, whether our in-RAM format will be
     // with or without term freqs:
-    indexOptions = fieldInfo.indexOptions;
+    setIndexOptions(fieldInfo.indexOptions);
     payloadAttribute = null;
   }
 
+  private void setIndexOptions(IndexOptions indexOptions) {
+    this.indexOptions = indexOptions;
+    writeFreq = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
+    writeProx = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+    writeOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+  }
+
   @Override
   boolean start(IndexableField[] fields, int count) {
     for(int i=0;i<count;i++) {
@@ -98,9 +112,16 @@ final class FreqProxTermsWriterPerField 
     } else {
       payloadAttribute = null;
     }
+    if (writeOffsets) {
+      offsetAttribute = fieldState.attributeSource.addAttribute(OffsetAttribute.class);
+    } else {
+      offsetAttribute = null;
+    }
   }
 
   void writeProx(final int termID, int proxCode) {
+    //System.out.println("writeProx termID=" + termID + " proxCode=" + proxCode);
+    assert writeProx;
     final Payload payload;
     if (payloadAttribute == null) {
       payload = null;
@@ -113,12 +134,24 @@ final class FreqProxTermsWriterPerField 
       termsHashPerField.writeVInt(1, payload.length);
       termsHashPerField.writeBytes(1, payload.data, payload.offset, payload.length);
       hasPayloads = true;
-    } else
+    } else {
       termsHashPerField.writeVInt(1, proxCode<<1);
+    }
 
     FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
     postings.lastPositions[termID] = fieldState.position;
+  }
+
+  void writeOffsets(final int termID, int prevOffset) {
+    assert writeOffsets;
+    final int startOffset = offsetAttribute.startOffset();
+    final int endOffset = offsetAttribute.endOffset();
+    //System.out.println("writeOffsets termID=" + termID + " prevOffset=" + prevOffset + " startOff=" + startOffset + " endOff=" + endOffset);
+    termsHashPerField.writeVInt(1, startOffset - prevOffset);
+    termsHashPerField.writeVInt(1, endOffset - startOffset);
 
+    FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
+    postings.lastOffsets[termID] = startOffset;
   }
 
   @Override
@@ -129,13 +162,18 @@ final class FreqProxTermsWriterPerField 
 
     FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
     postings.lastDocIDs[termID] = docState.docID;
-    if (indexOptions == IndexOptions.DOCS_ONLY) {
+    if (!writeFreq) {
       postings.lastDocCodes[termID] = docState.docID;
     } else {
       postings.lastDocCodes[termID] = docState.docID << 1;
       postings.docFreqs[termID] = 1;
-      if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+      if (writeProx) {
         writeProx(termID, fieldState.position);
+        if (writeOffsets) {
+          writeOffsets(termID, fieldState.offset);
+        }
+      } else {
+        assert !writeOffsets;
       }
     }
     fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
@@ -149,9 +187,10 @@ final class FreqProxTermsWriterPerField 
 
     FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
 
-    assert indexOptions == IndexOptions.DOCS_ONLY || postings.docFreqs[termID] > 0;
+    assert !writeFreq || postings.docFreqs[termID] > 0;
 
-    if (indexOptions == IndexOptions.DOCS_ONLY) {
+    if (!writeFreq) {
+      assert postings.docFreqs == null;
       if (docState.docID != postings.lastDocIDs[termID]) {
         assert docState.docID > postings.lastDocIDs[termID];
         termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]);
@@ -159,59 +198,76 @@ final class FreqProxTermsWriterPerField 
         postings.lastDocIDs[termID] = docState.docID;
         fieldState.uniqueTermCount++;
       }
-    } else {
-      if (docState.docID != postings.lastDocIDs[termID]) {
-        assert docState.docID > postings.lastDocIDs[termID]:"id: "+docState.docID + " postings ID: "+ postings.lastDocIDs[termID] + " termID: "+termID;
-        // Term not yet seen in the current doc but previously
-        // seen in other doc(s) since the last flush
-
-        // Now that we know doc freq for previous doc,
-        // write it & lastDocCode
-        if (1 == postings.docFreqs[termID])
-          termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]|1);
-        else {
-          termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]);
-          termsHashPerField.writeVInt(0, postings.docFreqs[termID]);
-        }
-        postings.docFreqs[termID] = 1;
-        fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
-        postings.lastDocCodes[termID] = (docState.docID - postings.lastDocIDs[termID]) << 1;
-        postings.lastDocIDs[termID] = docState.docID;
-        if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
-          writeProx(termID, fieldState.position);
-        }
-        fieldState.uniqueTermCount++;
+    } else if (docState.docID != postings.lastDocIDs[termID]) {
+      assert docState.docID > postings.lastDocIDs[termID]:"id: "+docState.docID + " postings ID: "+ postings.lastDocIDs[termID] + " termID: "+termID;
+      // Term not yet seen in the current doc but previously
+      // seen in other doc(s) since the last flush
+
+      // Now that we know doc freq for previous doc,
+      // write it & lastDocCode
+      if (1 == postings.docFreqs[termID]) {
+        termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]|1);
       } else {
-        fieldState.maxTermFrequency = Math.max(fieldState.maxTermFrequency, ++postings.docFreqs[termID]);
-        if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
-          writeProx(termID, fieldState.position-postings.lastPositions[termID]);
+        termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]);
+        termsHashPerField.writeVInt(0, postings.docFreqs[termID]);
+      }
+      postings.docFreqs[termID] = 1;
+      fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
+      postings.lastDocCodes[termID] = (docState.docID - postings.lastDocIDs[termID]) << 1;
+      postings.lastDocIDs[termID] = docState.docID;
+      if (writeProx) {
+        writeProx(termID, fieldState.position);
+        if (writeOffsets) {
+          writeOffsets(termID, fieldState.offset);
         }
+      } else {
+        assert !writeOffsets;
+      }
+      fieldState.uniqueTermCount++;
+    } else {
+      fieldState.maxTermFrequency = Math.max(fieldState.maxTermFrequency, ++postings.docFreqs[termID]);
+      if (writeProx) {
+        writeProx(termID, fieldState.position-postings.lastPositions[termID]);
+      }
+      if (writeOffsets) {
+        writeOffsets(termID, postings.lastOffsets[termID]);
       }
     }
   }
 
   @Override
   ParallelPostingsArray createPostingsArray(int size) {
-    return new FreqProxPostingsArray(size);
+    return new FreqProxPostingsArray(size, writeFreq, writeProx, writeOffsets);
   }
 
   static final class FreqProxPostingsArray extends ParallelPostingsArray {
-    public FreqProxPostingsArray(int size) {
+    public FreqProxPostingsArray(int size, boolean writeFreqs, boolean writeProx, boolean writeOffsets) {
       super(size);
-      docFreqs = new int[size];
+      if (writeFreqs) {
+        docFreqs = new int[size];
+      }
       lastDocIDs = new int[size];
       lastDocCodes = new int[size];
-      lastPositions = new int[size];
+      if (writeProx) {
+        lastPositions = new int[size];
+        if (writeOffsets) {
+          lastOffsets = new int[size];
+        }
+      } else {
+        assert !writeOffsets;
+      }
+      //System.out.println("PA init freqs=" + writeFreqs + " pos=" + writeProx + " offs=" + writeOffsets);
     }
 
     int docFreqs[];                                    // # times this term occurs in the current doc
     int lastDocIDs[];                                  // Last docID where this term occurred
     int lastDocCodes[];                                // Code for prior doc
     int lastPositions[];                               // Last position where this term occurred
+    int lastOffsets[];                                 // Last endOffset where this term occurred
 
     @Override
     ParallelPostingsArray newInstance(int size) {
-      return new FreqProxPostingsArray(size);
+      return new FreqProxPostingsArray(size, docFreqs != null, lastPositions != null, lastOffsets != null);
     }
 
     @Override
@@ -221,15 +277,36 @@ final class FreqProxTermsWriterPerField 
 
       super.copyTo(toArray, numToCopy);
 
-      System.arraycopy(docFreqs, 0, to.docFreqs, 0, numToCopy);
       System.arraycopy(lastDocIDs, 0, to.lastDocIDs, 0, numToCopy);
       System.arraycopy(lastDocCodes, 0, to.lastDocCodes, 0, numToCopy);
-      System.arraycopy(lastPositions, 0, to.lastPositions, 0, numToCopy);
+      if (lastPositions != null) {
+        assert to.lastPositions != null;
+        System.arraycopy(lastPositions, 0, to.lastPositions, 0, numToCopy);
+      }
+      if (lastOffsets != null) {
+        assert to.lastOffsets != null;
+        System.arraycopy(lastOffsets, 0, to.lastOffsets, 0, numToCopy);
+      }
+      if (docFreqs != null) {
+        assert to.docFreqs != null;
+        System.arraycopy(docFreqs, 0, to.docFreqs, 0, numToCopy);
+      }
     }
 
     @Override
     int bytesPerPosting() {
-      return ParallelPostingsArray.BYTES_PER_POSTING + 4 * RamUsageEstimator.NUM_BYTES_INT;
+      int bytes = ParallelPostingsArray.BYTES_PER_POSTING + 2 * RamUsageEstimator.NUM_BYTES_INT;
+      if (lastPositions != null) {
+        bytes += RamUsageEstimator.NUM_BYTES_INT;
+      }
+      if (lastOffsets != null) {
+        bytes += RamUsageEstimator.NUM_BYTES_INT;
+      }
+      if (docFreqs != null) {
+        bytes += RamUsageEstimator.NUM_BYTES_INT;
+      }
+
+      return bytes;
     }
   }
 
@@ -246,8 +323,33 @@ final class FreqProxTermsWriterPerField 
     final TermsConsumer termsConsumer = consumer.addField(fieldInfo);
     final Comparator<BytesRef> termComp = termsConsumer.getComparator();
 
+    // CONFUSING: this.indexOptions holds the index options
+    // that were current when we first saw this field.  But
+    // it's possible this has changed, eg when other
+    // documents are indexed that cause a "downgrade" of the
+    // IndexOptions.  So we must decode the in-RAM buffer
+    // according to this.indexOptions, but then write the
+    // new segment to the directory according to
+    // currentFieldIndexOptions:
     final IndexOptions currentFieldIndexOptions = fieldInfo.indexOptions;
 
+    final boolean writeTermFreq = currentFieldIndexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
+    final boolean writePositions = currentFieldIndexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+    final boolean writeOffsets = currentFieldIndexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+
+    final boolean readTermFreq = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
+    final boolean readPositions = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+    final boolean readOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+
+    //System.out.println("flush readTF=" + readTermFreq + " readPos=" + readPositions + " readOffs=" + readOffsets);
+
+    // Make sure FieldInfo.update is working correctly!:
+    assert !writeTermFreq || readTermFreq;
+    assert !writePositions || readPositions;
+    assert !writeOffsets || readOffsets;
+
+    assert !writeOffsets || writePositions;
+
     final Map<Term,Integer> segDeletes;
     if (state.segDeletes != null && state.segDeletes.terms.size() > 0) {
       segDeletes = state.segDeletes.terms;
@@ -268,12 +370,13 @@ final class FreqProxTermsWriterPerField 
 
     for (int i = 0; i < numTerms; i++) {
       final int termID = termIDs[i];
+      //System.out.println("term=" + termID);
       // Get BytesRef
       final int textStart = postings.textStarts[termID];
       termsHashPerField.bytePool.setBytesRef(text, textStart);
 
       termsHashPerField.initReader(freq, termID, 0);
-      if (fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+      if (readPositions || readOffsets) {
         termsHashPerField.initReader(prox, termID, 1);
       }
 
@@ -303,15 +406,18 @@ final class FreqProxTermsWriterPerField 
       int numDocs = 0;
       long totTF = 0;
       int docID = 0;
-      int termFreq = 0;
 
       while(true) {
+        //System.out.println("  cycle");
+        final int termDocFreq;
         if (freq.eof()) {
           if (postings.lastDocCodes[termID] != -1) {
             // Return last doc
             docID = postings.lastDocIDs[termID];
-            if (indexOptions != IndexOptions.DOCS_ONLY) {
-              termFreq = postings.docFreqs[termID];
+            if (readTermFreq) {
+              termDocFreq = postings.docFreqs[termID];
+            } else {
+              termDocFreq = 0;
             }
             postings.lastDocCodes[termID] = -1;
           } else {
@@ -320,14 +426,15 @@ final class FreqProxTermsWriterPerField 
           }
         } else {
           final int code = freq.readVInt();
-          if (indexOptions == IndexOptions.DOCS_ONLY) {
+          if (!readTermFreq) {
             docID += code;
+            termDocFreq = 0;
           } else {
             docID += code >>> 1;
             if ((code & 1) != 0) {
-              termFreq = 1;
+              termDocFreq = 1;
             } else {
-              termFreq = freq.readVInt();
+              termDocFreq = freq.readVInt();
             }
           }
 
@@ -336,7 +443,6 @@ final class FreqProxTermsWriterPerField 
 
         numDocs++;
         assert docID < state.numDocs: "doc=" + docID + " maxDoc=" + state.numDocs;
-        final int termDocFreq = termFreq;
 
         // NOTE: we could check here if the docID was
         // deleted, and skip it.  However, this is somewhat
@@ -362,45 +468,54 @@ final class FreqProxTermsWriterPerField 
           state.liveDocs.clear(docID);
         }
 
-        if (currentFieldIndexOptions != IndexOptions.DOCS_ONLY) {
-          totTF += termDocFreq;
-        }
+        totTF += termDocFreq;
         
         // Carefully copy over the prox + payload info,
         // changing the format to match Lucene's segment
         // format.
 
-        if (currentFieldIndexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
-          // we do write positions & payload
+        if (readPositions || readOffsets) {
+          // we did record positions (& maybe payload) and/or offsets
           int position = 0;
+          int offset = 0;
           for(int j=0;j<termDocFreq;j++) {
-            final int code = prox.readVInt();
-            position += code >> 1;
-
-            final int payloadLength;
             final BytesRef thisPayload;
 
-            if ((code & 1) != 0) {
-              // This position has a payload
-              payloadLength = prox.readVInt();
+            if (readPositions) {
+              final int code = prox.readVInt();
+              position += code >> 1;
+
+              if ((code & 1) != 0) {
+
+                // This position has a payload
+                final int payloadLength = prox.readVInt();
+
+                if (payload == null) {
+                  payload = new BytesRef();
+                  payload.bytes = new byte[payloadLength];
+                } else if (payload.bytes.length < payloadLength) {
+                  payload.grow(payloadLength);
+                }
+
+                prox.readBytes(payload.bytes, 0, payloadLength);
+                payload.length = payloadLength;
+                thisPayload = payload;
 
-              if (payload == null) {
-                payload = new BytesRef();
-                payload.bytes = new byte[payloadLength];
-              } else if (payload.bytes.length < payloadLength) {
-                payload.grow(payloadLength);
+              } else {
+                thisPayload = null;
               }
 
-              prox.readBytes(payload.bytes, 0, payloadLength);
-              payload.length = payloadLength;
-              thisPayload = payload;
-
-            } else {
-              payloadLength = 0;
-              thisPayload = null;
+              if (readOffsets) {
+                final int startOffset = offset + prox.readVInt();
+                final int endOffset = startOffset + prox.readVInt();
+                offset = startOffset;
+                if (writePositions) {
+                  postingsConsumer.addPosition(position, thisPayload, startOffset, endOffset);
+                }
+              } else if (writePositions) {
+                postingsConsumer.addPosition(position, thisPayload, -1, -1);
+              }
             }
-
-            postingsConsumer.addPosition(position, thisPayload);
           }
 
           postingsConsumer.finishDoc();
@@ -413,6 +528,4 @@ final class FreqProxTermsWriterPerField 
 
     termsConsumer.finish(sumTotalTermFreq, sumDocFreq, visitedDocs.cardinality());
   }
-
 }
-

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexReader.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexReader.java Sun Jan 15 23:17:45 2012
@@ -788,9 +788,9 @@ public abstract class IndexReader implem
 
   /** Returns {@link DocsAndPositionsEnum} for the specified
    *  field & term.  This may return null, if either the
-   *  field or term does not exist, or, positions were not
-   *  indexed for this field. */
-  public final DocsAndPositionsEnum termPositionsEnum(Bits liveDocs, String field, BytesRef term) throws IOException {
+   *  field or term does not exist, or needsOffsets is
+   *  true but offsets were not indexed for this field. */
+  public final DocsAndPositionsEnum termPositionsEnum(Bits liveDocs, String field, BytesRef term, boolean needsOffsets) throws IOException {
     assert field != null;
     assert term != null;
     final Fields fields = fields();
@@ -799,7 +799,7 @@ public abstract class IndexReader implem
       if (terms != null) {
         final TermsEnum termsEnum = terms.iterator(null);
         if (termsEnum.seekExact(term, true)) {
-          return termsEnum.docsAndPositions(liveDocs, null);
+          return termsEnum.docsAndPositions(liveDocs, null, needsOffsets);
         }
       }
     }
@@ -830,8 +830,9 @@ public abstract class IndexReader implem
    * Returns {@link DocsAndPositionsEnum} for the specified field and
    * {@link TermState}. This may return null, if either the field or the term
    * does not exists, the {@link TermState} is invalid for the underlying
-   * implementation, or positions were not indexed for this field. */
-  public final DocsAndPositionsEnum termPositionsEnum(Bits liveDocs, String field, BytesRef term, TermState state) throws IOException {
+   * implementation, or needsOffsets is true but offsets
+   * were not indexed for this field. */
+  public final DocsAndPositionsEnum termPositionsEnum(Bits liveDocs, String field, BytesRef term, TermState state, boolean needsOffsets) throws IOException {
     assert state != null;
     assert field != null;
     final Fields fields = fields();
@@ -840,7 +841,7 @@ public abstract class IndexReader implem
       if (terms != null) {
         final TermsEnum termsEnum = terms.iterator(null);
         termsEnum.seekExact(term, state);
-        return termsEnum.docsAndPositions(liveDocs, null);
+        return termsEnum.docsAndPositions(liveDocs, null, needsOffsets);
       }
     }
     return null;

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java Sun Jan 15 23:17:45 2012
@@ -126,6 +126,16 @@ public final class MultiDocsAndPositions
   }
 
   @Override
+  public int startOffset() throws IOException {
+    return current.startOffset();
+  }
+
+  @Override
+  public int endOffset() throws IOException {
+    return current.endOffset();
+  }
+
+  @Override
   public boolean hasPayload() {
     return current.hasPayload();
   }

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiFields.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiFields.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiFields.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiFields.java Sun Jan 15 23:17:45 2012
@@ -167,14 +167,14 @@ public final class MultiFields extends F
   /** Returns {@link DocsAndPositionsEnum} for the specified
    *  field & term.  This may return null if the term does
    *  not exist or positions were not indexed. */
-  public static DocsAndPositionsEnum getTermPositionsEnum(IndexReader r, Bits liveDocs, String field, BytesRef term) throws IOException {
+  public static DocsAndPositionsEnum getTermPositionsEnum(IndexReader r, Bits liveDocs, String field, BytesRef term, boolean needsOffsets) throws IOException {
     assert field != null;
     assert term != null;
     final Terms terms = getTerms(r, field);
     if (terms != null) {
       final TermsEnum termsEnum = terms.iterator(null);
       if (termsEnum.seekExact(term, true)) {
-        return termsEnum.docsAndPositions(liveDocs, null);
+        return termsEnum.docsAndPositions(liveDocs, null, needsOffsets);
       }
     }
     return null;

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java Sun Jan 15 23:17:45 2012
@@ -418,7 +418,7 @@ public final class MultiTermsEnum extend
   }
 
   @Override
-  public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
+  public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
     MultiDocsAndPositionsEnum docsAndPositionsEnum;
     // Can only reuse if incoming enum is also a MultiDocsAndPositionsEnum
     if (reuse != null && reuse instanceof MultiDocsAndPositionsEnum) {
@@ -469,7 +469,7 @@ public final class MultiTermsEnum extend
       }
 
       assert entry.index < docsAndPositionsEnum.subDocsAndPositionsEnum.length: entry.index + " vs " + docsAndPositionsEnum.subDocsAndPositionsEnum.length + "; " + subs.length;
-      final DocsAndPositionsEnum subPostings = entry.terms.docsAndPositions(b, docsAndPositionsEnum.subDocsAndPositionsEnum[entry.index]);
+      final DocsAndPositionsEnum subPostings = entry.terms.docsAndPositions(b, docsAndPositionsEnum.subDocsAndPositionsEnum[entry.index], needsOffsets);
 
       if (subPostings != null) {
         docsAndPositionsEnum.subDocsAndPositionsEnum[entry.index] = subPostings;
@@ -479,8 +479,8 @@ public final class MultiTermsEnum extend
       } else {
         if (entry.terms.docs(b, null, false) != null) {
           // At least one of our subs does not store
-          // positions -- we can't correctly produce a
-          // MultiDocsAndPositions enum
+          // offsets or positions -- we can't correctly
+          // produce a MultiDocsAndPositions enum
           return null;
         }
       }

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/Term.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/Term.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/Term.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/Term.java Sun Jan 15 23:17:45 2012
@@ -44,7 +44,7 @@ public final class Term implements Compa
     field = fld;
     this.bytes = bytes;
   }
-  
+
   /** Constructs a Term with the given field and text.
    * <p>Note that a null field or null text value results in undefined
    * behavior for most Lucene APIs that accept a Term parameter. */
@@ -132,4 +132,8 @@ public final class Term implements Compa
 
   @Override
   public final String toString() { return field + ":" + bytes.utf8ToString(); }
+
+  public Term deepCopyOf() {
+    return new Term(field, BytesRef.deepCopyOf(bytes));
+  }
 }

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java Sun Jan 15 23:17:45 2012
@@ -38,7 +38,7 @@ final class TermVectorsConsumerPerField 
   boolean doVectorOffsets;
 
   int maxNumPostings;
-  OffsetAttribute offsetAttribute = null;
+  OffsetAttribute offsetAttribute;
 
   public TermVectorsConsumerPerField(TermsHashPerField termsHashPerField, TermVectorsConsumer termsWriter, FieldInfo fieldInfo) {
     this.termsHashPerField = termsHashPerField;

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermsEnum.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermsEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermsEnum.java Sun Jan 15 23:17:45 2012
@@ -160,12 +160,13 @@ public abstract class TermsEnum {
 
   /** Get {@link DocsAndPositionsEnum} for the current term.
    *  Do not call this when the enum is unpositioned.
-   *  This method will only return null if positions were
-   *  not indexed into the postings by this codec.
+   *  This method will only return null if needsOffsets is
+   *  true but offsets were not indexed.
    *  @param liveDocs unset bits are documents that should not
    *  be returned
-   *  @param reuse pass a prior DocsAndPositionsEnum for possible reuse */
-  public abstract DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException;
+   *  @param reuse pass a prior DocsAndPositionsEnum for possible reuse
+   *  @param needsOffsets true if offsets are required */
+  public abstract DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException;
 
   /**
    * Expert: Returns the TermsEnums internal state to position the TermsEnum
@@ -238,7 +239,7 @@ public abstract class TermsEnum {
     }
       
     @Override
-    public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) {
+    public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) {
       throw new IllegalStateException("this method should never be called");
     }
       

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermsHashPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermsHashPerField.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermsHashPerField.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermsHashPerField.java Sun Jan 15 23:17:45 2012
@@ -293,7 +293,7 @@ final class TermsHashPerField extends In
 
     @Override
     public int[] init() {
-      if(perField.postingsArray == null) {
+      if (perField.postingsArray == null) {
         perField.postingsArray = perField.consumer.createPostingsArray(2);
         bytesUsed.addAndGet(perField.postingsArray.size * perField.postingsArray.bytesPerPosting());
       }
@@ -305,8 +305,7 @@ final class TermsHashPerField extends In
       ParallelPostingsArray postingsArray = perField.postingsArray;
       final int oldSize = perField.postingsArray.size;
       postingsArray = perField.postingsArray = postingsArray.grow();
-      bytesUsed
-          .addAndGet((postingsArray.bytesPerPosting() * (postingsArray.size - oldSize)));
+      bytesUsed.addAndGet((postingsArray.bytesPerPosting() * (postingsArray.size - oldSize)));
       return postingsArray.textStarts;
     }
 

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java Sun Jan 15 23:17:45 2012
@@ -1043,7 +1043,7 @@ class FieldCacheImpl implements FieldCac
       }
 
       @Override
-      public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
+      public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
         throw new UnsupportedOperationException();
       }
 

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java Sun Jan 15 23:17:45 2012
@@ -272,8 +272,8 @@ public final class FuzzyTermsEnum extend
   
   @Override
   public DocsAndPositionsEnum docsAndPositions(Bits liveDocs,
-      DocsAndPositionsEnum reuse) throws IOException {
-    return actualEnum.docsAndPositions(liveDocs, reuse);
+                                               DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
+    return actualEnum.docsAndPositions(liveDocs, reuse, needsOffsets);
   }
   
   @Override

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java Sun Jan 15 23:17:45 2012
@@ -225,7 +225,7 @@ public class MultiPhraseQuery extends Qu
             return null;
           }
           termsEnum.seekExact(term.bytes(), termState);
-          postingsEnum = termsEnum.docsAndPositions(liveDocs, null);
+          postingsEnum = termsEnum.docsAndPositions(liveDocs, null, false);
 
           if (postingsEnum == null) {
             // term does exist, but has no positions
@@ -475,7 +475,7 @@ class UnionDocsAndPositionsEnum extends 
         continue;
       }
       termsEnum.seekExact(term.bytes(), termState);
-      DocsAndPositionsEnum postings = termsEnum.docsAndPositions(liveDocs, null);
+      DocsAndPositionsEnum postings = termsEnum.docsAndPositions(liveDocs, null, false);
       if (postings == null) {
         // term does exist, but has no positions
         throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")");
@@ -528,6 +528,16 @@ class UnionDocsAndPositionsEnum extends 
   }
 
   @Override
+  public int startOffset() {
+    return -1;
+  }
+
+  @Override
+  public int endOffset() {
+    return -1;
+  }
+
+  @Override
   public BytesRef getPayload() {
     throw new UnsupportedOperationException();
   }

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PhraseQuery.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PhraseQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PhraseQuery.java Sun Jan 15 23:17:45 2012
@@ -239,7 +239,7 @@ public class PhraseQuery extends Query {
           return null;
         }
         te.seekExact(t.bytes(), state);
-        DocsAndPositionsEnum postingsEnum = te.docsAndPositions(liveDocs, null);
+        DocsAndPositionsEnum postingsEnum = te.docsAndPositions(liveDocs, null, false);
 
         // PhraseQuery on a field that did not index
         // positions.

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java Sun Jan 15 23:17:45 2012
@@ -120,7 +120,7 @@ public class SpanTermQuery extends SpanQ
     final TermsEnum termsEnum = context.reader.terms(term.field()).iterator(null);
     termsEnum.seekExact(term.bytes(), state);
     
-    final DocsAndPositionsEnum postings = termsEnum.docsAndPositions(acceptDocs, null);
+    final DocsAndPositionsEnum postings = termsEnum.docsAndPositions(acceptDocs, null, false);
 
     if (postings != null) {
       return new TermSpans(postings, term);

Added: lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/analysis/CannedAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/analysis/CannedAnalyzer.java?rev=1231794&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/analysis/CannedAnalyzer.java (added)
+++ lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/analysis/CannedAnalyzer.java Sun Jan 15 23:17:45 2012
@@ -0,0 +1,73 @@
+package org.apache.lucene.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.IOException;
+
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+
+public class CannedAnalyzer extends Analyzer {
+  private final Token[] tokens;
+
+  public CannedAnalyzer(Token[] tokens) {
+    this.tokens = tokens;
+  }
+
+  @Override
+  public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+    return new TokenStreamComponents(new CannedTokenizer(tokens));
+  }
+
+  public static class CannedTokenizer extends Tokenizer {
+    private final Token[] tokens;
+    private int upto = 0;
+    private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+    private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
+    private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+
+    public CannedTokenizer(Token[] tokens) {
+      this.tokens = tokens;
+    }
+
+    @Override
+    public final boolean incrementToken() throws IOException {
+      if (upto < tokens.length) {
+        final Token token = tokens[upto++];     
+        // TODO: can we just capture/restoreState so
+        // we get all attrs...?
+        clearAttributes();      
+        termAtt.setEmpty();
+        termAtt.append(token.toString());
+        posIncrAtt.setPositionIncrement(token.getPositionIncrement());
+        offsetAtt.setOffset(token.startOffset(), token.endOffset());
+        return true;
+      } else {
+        return false;
+      }
+    }
+
+    @Override
+    public void reset() throws IOException {
+      super.reset();
+      this.upto = 0;
+    }
+  }
+}

Modified: lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexFieldsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexFieldsWriter.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexFieldsWriter.java (original)
+++ lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexFieldsWriter.java Sun Jan 15 23:17:45 2012
@@ -88,6 +88,9 @@ class PreFlexFieldsWriter extends Fields
   @Override
   public TermsConsumer addField(FieldInfo field) throws IOException {
     assert field.number != -1;
+    if (field.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0) {
+      throw new IllegalArgumentException("this codec cannot index offsets");
+    }
     //System.out.println("w field=" + field.name + " storePayload=" + field.storePayloads + " number=" + field.number);
     return new PreFlexTermsWriter(field);
   }
@@ -157,8 +160,10 @@ class PreFlexFieldsWriter extends Fields
       }
 
       @Override
-      public void addPosition(int position, BytesRef payload) throws IOException {
+      public void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException {
         assert proxOut != null;
+        assert startOffset == -1;
+        assert endOffset == -1;
 
         //System.out.println("      w pos=" + position + " payl=" + payload);
         final int delta = position - lastPosition;

Modified: lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java (original)
+++ lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java Sun Jan 15 23:17:45 2012
@@ -37,6 +37,7 @@ import org.apache.lucene.codecs.TermStat
 import org.apache.lucene.codecs.TermsConsumer;
 import org.apache.lucene.index.DocsAndPositionsEnum;
 import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldsEnum;
 import org.apache.lucene.index.IndexFileNames;
@@ -197,6 +198,9 @@ public class RAMOnlyPostingsFormat exten
 
     @Override
     public TermsConsumer addField(FieldInfo field) {
+      if (field.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0) {
+        throw new IllegalArgumentException("this codec cannot index offsets");
+      }
       RAMField ramField = new RAMField(field.name);
       postings.fieldToTerms.put(field.name, ramField);
       termsConsumer.reset(ramField);
@@ -265,7 +269,9 @@ public class RAMOnlyPostingsFormat exten
     }
 
     @Override
-    public void addPosition(int position, BytesRef payload) {
+    public void addPosition(int position, BytesRef payload, int startOffset, int endOffset) {
+      assert startOffset == -1;
+      assert endOffset == -1;
       current.positions[posUpto] = position;
       if (payload != null && payload.length > 0) {
         if (current.payloads == null) {
@@ -388,7 +394,10 @@ public class RAMOnlyPostingsFormat exten
     }
 
     @Override
-    public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) {
+    public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) {
+      if (needsOffsets) {
+        return null;
+      }
       return new RAMDocsAndPositionsEnum(ramField.termToDocs.get(current), liveDocs);
     }
   }
@@ -494,6 +503,16 @@ public class RAMOnlyPostingsFormat exten
     }
 
     @Override
+    public int startOffset() {
+      return -1;
+    }
+
+    @Override
+    public int endOffset() {
+      return -1;
+    }
+
+    @Override
     public boolean hasPayload() {
       return current.payloads != null && current.payloads[posUpto-1] != null;
     }

Modified: lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/index/RandomIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/index/RandomIndexWriter.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/index/RandomIndexWriter.java (original)
+++ lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/index/RandomIndexWriter.java Sun Jan 15 23:17:45 2012
@@ -122,6 +122,10 @@ public class RandomIndexWriter implement
    * @see IndexWriter#addDocument(Iterable)
    */
   public <T extends IndexableField> void addDocument(final Iterable<T> doc) throws IOException {
+    addDocument(doc, w.getAnalyzer());
+  }
+
+  public <T extends IndexableField> void addDocument(final Iterable<T> doc, Analyzer a) throws IOException {
     if (doDocValues && doc instanceof Document) {
       randomPerDocFieldValues(r, (Document) doc);
     }
@@ -157,9 +161,9 @@ public class RandomIndexWriter implement
             }
           };
         }
-        });
+        }, a);
     } else {
-      w.addDocument(doc);
+      w.addDocument(doc, a);
     }
     
     maybeCommit();

Modified: lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/util/LuceneTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/util/LuceneTestCase.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/util/LuceneTestCase.java (original)
+++ lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/util/LuceneTestCase.java Sun Jan 15 23:17:45 2012
@@ -1106,6 +1106,10 @@ public abstract class LuceneTestCase ext
       return new Field(name, value, type);
     }
 
+    // TODO: once all core & test codecs can index
+    // offsets, sometimes randomly turn on offsets if we are
+    // already indexing positions...
+
     FieldType newType = new FieldType(type);
     if (!newType.stored() && random.nextBoolean()) {
       newType.setStored(true); // randomly store it

Modified: lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/util/_TestUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/util/_TestUtil.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/util/_TestUtil.java (original)
+++ lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/util/_TestUtil.java Sun Jan 15 23:17:45 2012
@@ -157,6 +157,7 @@ public class _TestUtil {
   public static CheckIndex.Status checkIndex(Directory dir) throws IOException {
     ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
     CheckIndex checker = new CheckIndex(dir);
+    checker.setCrossCheckTermVectors(true);
     checker.setInfoStream(new PrintStream(bos), false);
     CheckIndex.Status indexStatus = checker.checkIndex(null);
     if (indexStatus == null || indexStatus.clean == false) {
@@ -567,7 +568,10 @@ public class _TestUtil {
     if (random.nextBoolean()) {
       if (random.nextBoolean()) {
         // TODO: cast re-use to D&PE if we can...?
-        final DocsAndPositionsEnum docsAndPositions = termsEnum.docsAndPositions(liveDocs, null);
+        DocsAndPositionsEnum docsAndPositions = termsEnum.docsAndPositions(liveDocs, null, true);
+        if (docsAndPositions == null) {
+          docsAndPositions = termsEnum.docsAndPositions(liveDocs, null, false);
+        }
         if (docsAndPositions != null) {
           return docsAndPositions;
         }
@@ -586,7 +590,10 @@ public class _TestUtil {
     if (random.nextBoolean()) {
       if (random.nextBoolean()) {
         // TODO: cast re-use to D&PE if we can...?
-        final DocsAndPositionsEnum docsAndPositions = termsEnum.docsAndPositions(liveDocs, null);
+        DocsAndPositionsEnum docsAndPositions = termsEnum.docsAndPositions(liveDocs, null, true);
+        if (docsAndPositions == null) {
+          docsAndPositions = termsEnum.docsAndPositions(liveDocs, null, false);
+        }
         if (docsAndPositions != null) {
           return docsAndPositions;
         }

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java Sun Jan 15 23:17:45 2012
@@ -74,7 +74,8 @@ public class TestCachingTokenFilter exte
     DocsAndPositionsEnum termPositions = MultiFields.getTermPositionsEnum(reader,
                                                                           MultiFields.getLiveDocs(reader),
                                                                           "preanalyzed",
-                                                                          new BytesRef("term1"));
+                                                                          new BytesRef("term1"),
+                                                                          false);
     assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS);
     assertEquals(1, termPositions.freq());
     assertEquals(0, termPositions.nextPosition());
@@ -82,7 +83,8 @@ public class TestCachingTokenFilter exte
     termPositions = MultiFields.getTermPositionsEnum(reader,
                                                      MultiFields.getLiveDocs(reader),
                                                      "preanalyzed",
-                                                     new BytesRef("term2"));
+                                                     new BytesRef("term2"),
+                                                     false);
     assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS);
     assertEquals(2, termPositions.freq());
     assertEquals(1, termPositions.nextPosition());
@@ -91,7 +93,8 @@ public class TestCachingTokenFilter exte
     termPositions = MultiFields.getTermPositionsEnum(reader,
                                                      MultiFields.getLiveDocs(reader),
                                                      "preanalyzed",
-                                                     new BytesRef("term3"));
+                                                     new BytesRef("term3"),
+                                                     false);
     assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS);
     assertEquals(1, termPositions.freq());
     assertEquals(2, termPositions.nextPosition());

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/codecs/pulsing/TestPulsingReuse.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/codecs/pulsing/TestPulsingReuse.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/codecs/pulsing/TestPulsingReuse.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/codecs/pulsing/TestPulsingReuse.java Sun Jan 15 23:17:45 2012
@@ -23,7 +23,6 @@ import java.util.Map;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.codecs.nestedpulsing.NestedPulsingPostingsFormat;
-import org.apache.lucene.codecs.pulsing.Pulsing40PostingsFormat;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.TextField;
@@ -70,7 +69,7 @@ public class TestPulsingReuse extends Lu
     DocsAndPositionsEnum posReuse = null;
     te = segment.terms("foo").iterator(null);
     while (te.next() != null) {
-      posReuse = te.docsAndPositions(null, posReuse);
+      posReuse = te.docsAndPositions(null, posReuse, false);
       allEnums.put(posReuse, true);
     }
     
@@ -112,7 +111,7 @@ public class TestPulsingReuse extends Lu
     DocsAndPositionsEnum posReuse = null;
     te = segment.terms("foo").iterator(null);
     while (te.next() != null) {
-      posReuse = te.docsAndPositions(null, posReuse);
+      posReuse = te.docsAndPositions(null, posReuse, false);
       allEnums.put(posReuse, true);
     }
     

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/document/TestDocument.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/document/TestDocument.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/document/TestDocument.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/document/TestDocument.java Sun Jan 15 23:17:45 2012
@@ -347,7 +347,7 @@ public class TestDocument extends Lucene
       assertEquals(2, tvs.getUniqueTermCount());
       TermsEnum tvsEnum = tvs.iterator(null);
       assertEquals(new BytesRef("abc"), tvsEnum.next());
-      final DocsAndPositionsEnum dpEnum = tvsEnum.docsAndPositions(null, null);
+      final DocsAndPositionsEnum dpEnum = tvsEnum.docsAndPositions(null, null, false);
       if (field.equals("tv")) {
         assertNull(dpEnum);
       } else {

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestCodecs.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestCodecs.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestCodecs.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestCodecs.java Sun Jan 15 23:17:45 2012
@@ -166,7 +166,7 @@ public class TestCodecs extends LuceneTe
           totTF += positions[i].length;
           for(int j=0;j<positions[i].length;j++) {
             final PositionData pos = positions[i][j];
-            postingsConsumer.addPosition(pos.pos, pos.payload);
+            postingsConsumer.addPosition(pos.pos, pos.payload, -1, -1);
           }
           postingsConsumer.finishDoc();
         }
@@ -480,7 +480,7 @@ public class TestCodecs extends LuceneTe
         if (field.omitTF) {
           this.verifyDocs(term.docs, term.positions, _TestUtil.docs(random, termsEnum, null, null, false), false);
         } else {
-          this.verifyDocs(term.docs, term.positions, termsEnum.docsAndPositions(null, null), true);
+          this.verifyDocs(term.docs, term.positions, termsEnum.docsAndPositions(null, null, false), true);
         }
 
         // Test random seek by ord:
@@ -500,7 +500,7 @@ public class TestCodecs extends LuceneTe
           if (field.omitTF) {
             this.verifyDocs(term.docs, term.positions, _TestUtil.docs(random, termsEnum, null, null, false), false);
           } else {
-            this.verifyDocs(term.docs, term.positions, termsEnum.docsAndPositions(null, null), true);
+            this.verifyDocs(term.docs, term.positions, termsEnum.docsAndPositions(null, null, false), true);
           }
         }
 
@@ -552,7 +552,7 @@ public class TestCodecs extends LuceneTe
             final DocsEnum docsAndFreqs;
             final DocsAndPositionsEnum postings;
             if (!field.omitTF) {
-              postings = termsEnum.docsAndPositions(null, null);
+              postings = termsEnum.docsAndPositions(null, null, false);
               if (postings != null) {
                 docs = docsAndFreqs = postings;
               } else {

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDoc.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDoc.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDoc.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDoc.java Sun Jan 15 23:17:45 2012
@@ -234,7 +234,7 @@ public class TestDoc extends LuceneTestC
           out.print("  term=" + field + ":" + tis.term());
           out.println("    DF=" + tis.docFreq());
 
-          DocsAndPositionsEnum positions = tis.docsAndPositions(reader.getLiveDocs(), null);
+          DocsAndPositionsEnum positions = tis.docsAndPositions(reader.getLiveDocs(), null, false);
 
           while (positions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
             out.print(" doc=" + positions.docID());

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java Sun Jan 15 23:17:45 2012
@@ -96,7 +96,7 @@ public class TestDocsAndPositions extend
 
   public DocsAndPositionsEnum getDocsAndPositions(IndexReader reader,
       BytesRef bytes, Bits liveDocs) throws IOException {
-      return reader.termPositionsEnum(null, fieldName, bytes);
+    return reader.termPositionsEnum(null, fieldName, bytes, false);
   }
 
   /**
@@ -358,7 +358,7 @@ public class TestDocsAndPositions extend
     writer.addDocument(doc);
     IndexReader reader = writer.getReader();
     IndexReader r = getOnlySegmentReader(reader);
-    DocsAndPositionsEnum disi = r.termPositionsEnum(null, "foo", new BytesRef("bar"));
+    DocsAndPositionsEnum disi = r.termPositionsEnum(null, "foo", new BytesRef("bar"), false);
     int docid = disi.docID();
     assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS);
     assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
@@ -366,7 +366,7 @@ public class TestDocsAndPositions extend
     // now reuse and check again
     TermsEnum te = r.terms("foo").iterator(null);
     assertTrue(te.seekExact(new BytesRef("bar"), true));
-    disi = te.docsAndPositions(null, disi);
+    disi = te.docsAndPositions(null, disi, false);
     docid = disi.docID();
     assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS);
     assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);



Mime
View raw message