lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r1231794 [1/3] - in /lucene/dev/trunk: lucene/ lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/ lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/ lucene/contrib/memory/src/java/org/apache/l...
Date Sun, 15 Jan 2012 23:17:47 GMT
Author: mikemccand
Date: Sun Jan 15 23:17:45 2012
New Revision: 1231794

URL: http://svn.apache.org/viewvc?rev=1231794&view=rev
Log:
LUCENE-3684: add offsets to postings APIs

Added:
    lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/analysis/CannedAnalyzer.java   (with props)
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestPostingsOffsets.java   (with props)
Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java
    lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java
    lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java
    lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
    lucene/dev/trunk/lucene/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/BlockTermsReader.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/MappingMultiDocsAndPositionsEnum.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/PostingsConsumer.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/PostingsReaderBase.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/TermVectorsWriter.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/TermsConsumer.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFields.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/TermInfosReader.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosWriter.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsWriter.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/sep/SepPostingsWriter.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosReader.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosWriter.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsFormat.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/CheckIndex.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocTermOrds.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocsAndPositionsEnum.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfo.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfos.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FilteredTermsEnum.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexReader.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiFields.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/Term.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermsEnum.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermsHashPerField.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PhraseQuery.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java
    lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexFieldsWriter.java
    lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java
    lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/index/RandomIndexWriter.java
    lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/util/LuceneTestCase.java
    lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/util/_TestUtil.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/codecs/pulsing/TestPulsingReuse.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/document/TestDocument.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestCodecs.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDoc.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDocumentWriter.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDuelingCodecs.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestFilterIndexReader.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReader.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexableField.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestLazyProxSkipping.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestLongPostings.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestMultiLevelSkipList.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestOmitPositions.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestPayloadProcessorProvider.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestPayloads.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestSegmentReader.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestTermVectorsWriter.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestPositionIncrement.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestTermVectors.java
    lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java
    lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java
    lucene/dev/trunk/modules/facet/src/java/org/apache/lucene/facet/search/PayloadIterator.java
    lucene/dev/trunk/modules/facet/src/java/org/apache/lucene/facet/taxonomy/directory/ParentArray.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Sun Jan 15 23:17:45 2012
@@ -226,6 +226,10 @@ Changes in backwards compatibility polic
 * LUCENE-3640: Removed IndexSearcher.close(), because IndexSearcher no longer
   takes a Directory and no longer "manages" IndexReaders, it is a no-op.
   (Robert Muir)
+
+* LUCENE-3684: Add offsets into DocsAndPositionsEnum, and a few
+  FieldInfo.IndexOption: DOCS_AND_POSITIONS_AND_OFFSETS.  (Robert
+  Muir, Mike McCandless)
   
 Changes in Runtime Behavior
 

Modified: lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java (original)
+++ lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java Sun Jan 15 23:17:45 2012
@@ -126,7 +126,7 @@ public class TokenSources {
   private static boolean hasPositions(Terms vector) throws IOException {
     final TermsEnum termsEnum = vector.iterator(null);
     if (termsEnum.next() != null) {
-      DocsAndPositionsEnum dpEnum = termsEnum.docsAndPositions(null, null);
+      DocsAndPositionsEnum dpEnum = termsEnum.docsAndPositions(null, null, false);
       if (dpEnum != null) {
         int pos = dpEnum.nextPosition();
         if (pos >= 0) {
@@ -219,22 +219,21 @@ public class TokenSources {
     DocsAndPositionsEnum dpEnum = null;
     while ((text = termsEnum.next()) != null) {
 
-      dpEnum = termsEnum.docsAndPositions(null, dpEnum);
-      if (dpEnum == null || (!dpEnum.attributes().hasAttribute(OffsetAttribute.class))) {
+      dpEnum = termsEnum.docsAndPositions(null, dpEnum, true);
+      if (dpEnum == null) {
         throw new IllegalArgumentException(
             "Required TermVector Offset information was not found");
       }
 
       final String term = text.utf8ToString();
 
-      final OffsetAttribute offsetAtt = dpEnum.attributes().getAttribute(OffsetAttribute.class);
       dpEnum.nextDoc();
       final int freq = dpEnum.freq();
       for(int posUpto=0;posUpto<freq;posUpto++) {
         final int pos = dpEnum.nextPosition();
         final Token token = new Token(term,
-                                      offsetAtt.startOffset(),
-                                      offsetAtt.endOffset());
+                                      dpEnum.startOffset(),
+                                      dpEnum.endOffset());
         if (tokenPositionsGuaranteedContiguous && pos != -1) {
           // We have positions stored and a guarantee that the token position
           // information is contiguous

Modified: lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java (original)
+++ lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java Sun Jan 15 23:17:45 2012
@@ -60,22 +60,23 @@ public final class TokenStreamFromTermPo
     BytesRef text;
     DocsAndPositionsEnum dpEnum = null;
     while((text = termsEnum.next()) != null) {
-      dpEnum = termsEnum.docsAndPositions(null, dpEnum);
-      dpEnum.nextDoc();
-      final int freq = dpEnum.freq();
-      final OffsetAttribute offsetAtt;
-      if (dpEnum.attributes().hasAttribute(OffsetAttribute.class)) {
-        offsetAtt = dpEnum.attributes().getAttribute(OffsetAttribute.class);
+      dpEnum = termsEnum.docsAndPositions(null, dpEnum, true);
+      final boolean hasOffsets;
+      if (dpEnum == null) {
+        hasOffsets = false;
+        dpEnum = termsEnum.docsAndPositions(null, dpEnum, false);
       } else {
-        offsetAtt = null;
+        hasOffsets = true;
       }
+      dpEnum.nextDoc();
+      final int freq = dpEnum.freq();
       for (int j = 0; j < freq; j++) {
         int pos = dpEnum.nextPosition();
         Token token;
-        if (offsetAtt != null) {
+        if (hasOffsets) {
           token = new Token(text.utf8ToString(),
-                            offsetAtt.startOffset(),
-                            offsetAtt.endOffset());
+                            dpEnum.startOffset(),
+                            dpEnum.endOffset());
         } else {
           token = new Token();
           token.setEmpty().append(text.utf8ToString());

Modified: lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java (original)
+++ lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java Sun Jan 15 23:17:45 2012
@@ -21,7 +21,6 @@ import java.util.Collections;
 import java.util.LinkedList;
 import java.util.Set;
 
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.index.DocsAndPositionsEnum;
 import org.apache.lucene.index.Fields;
 import org.apache.lucene.index.IndexReader;
@@ -101,29 +100,19 @@ public class FieldTermStack {
       if (!termSet.contains(term)) {
         continue;
       }
-      dpEnum = termsEnum.docsAndPositions(null, dpEnum);
+      dpEnum = termsEnum.docsAndPositions(null, dpEnum, true);
       if (dpEnum == null) {
         // null snippet
         return;
       }
 
-      if (!dpEnum.attributes().hasAttribute(OffsetAttribute.class)) {
-        // null snippet
-        return;
-      }
       dpEnum.nextDoc();
 
-      final OffsetAttribute offsetAtt = dpEnum.attributes().getAttribute(OffsetAttribute.class);
-
       final int freq = dpEnum.freq();
       
       for(int i = 0;i < freq;i++) {
-        final int pos = dpEnum.nextPosition();
-        if (pos == -1) {
-          // null snippet
-          return;
-        }
-        termList.add(new TermInfo(term, offsetAtt.startOffset(), offsetAtt.endOffset(), pos));
+        int pos = dpEnum.nextPosition();
+        termList.add(new TermInfo(term, dpEnum.startOffset(), dpEnum.endOffset(), pos));
       }
     }
     

Modified: lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (original)
+++ lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java Sun Jan 15 23:17:45 2012
@@ -953,7 +953,10 @@ public class MemoryIndex {
       }
 
       @Override
-      public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) {
+      public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) {
+        if (needsOffsets) {
+          return null;
+        }
         if (reuse == null || !(reuse instanceof MemoryDocsAndPositionsEnum)) {
           reuse = new MemoryDocsAndPositionsEnum();
         }
@@ -1066,6 +1069,16 @@ public class MemoryIndex {
       }
 
       @Override
+      public int startOffset() {
+        return -1;
+      }
+
+      @Override
+      public int endOffset() {
+        return -1;
+      }
+
+      @Override
       public boolean hasPayload() {
         return false;
       }

Modified: lucene/dev/trunk/lucene/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java (original)
+++ lucene/dev/trunk/lucene/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java Sun Jan 15 23:17:45 2012
@@ -206,7 +206,7 @@ public class MemoryIndexTest extends Bas
     MemoryIndex memory = new MemoryIndex();
     memory.addField("foo", "bar", analyzer);
     IndexReader reader = memory.createSearcher().getIndexReader();
-    DocsAndPositionsEnum disi = reader.termPositionsEnum(null, "foo", new BytesRef("bar"));
+    DocsAndPositionsEnum disi = reader.termPositionsEnum(null, "foo", new BytesRef("bar"), false);
     int docid = disi.docID();
     assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS);
     assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
@@ -214,7 +214,7 @@ public class MemoryIndexTest extends Bas
     // now reuse and check again
     TermsEnum te = reader.terms("foo").iterator(null);
     assertTrue(te.seekExact(new BytesRef("bar"), true));
-    disi = te.docsAndPositions(null, disi);
+    disi = te.docsAndPositions(null, disi, false);
     docid = disi.docID();
     assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS);
     assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/BlockTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/BlockTermsReader.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/BlockTermsReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/BlockTermsReader.java Sun Jan 15 23:17:45 2012
@@ -697,16 +697,20 @@ public class BlockTermsReader extends Fi
       }
 
       @Override
-      public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
-        //System.out.println("BTR.d&p this=" + this);
-        decodeMetaData();
-        if (fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+      public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
+        if (fieldInfo.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
+          // Positions were not indexed:
           return null;
-        } else {
-          DocsAndPositionsEnum dpe = postingsReader.docsAndPositions(fieldInfo, state, liveDocs, reuse);
-          //System.out.println("  return d&pe=" + dpe);
-          return dpe;
         }
+
+        if (needsOffsets &&
+            fieldInfo.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) < 0) {
+          // Offsets were not indexed:
+          return null;
+        }
+
+        decodeMetaData();
+        return postingsReader.docsAndPositions(fieldInfo, state, liveDocs, reuse, needsOffsets);
       }
 
       @Override

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java Sun Jan 15 23:17:45 2012
@@ -881,13 +881,20 @@ public class BlockTreeTermsReader extend
       }
 
       @Override
-      public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
-        if (fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+      public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
+        if (fieldInfo.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
+          // Positions were not indexed:
+          return null;
+        }
+
+        if (needsOffsets &&
+            fieldInfo.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) < 0) {
+          // Offsets were not indexed:
           return null;
-        } else {
-          currentFrame.decodeMetaData();
-          return postingsReader.docsAndPositions(fieldInfo, currentFrame.termState, skipDocs, reuse);
         }
+
+        currentFrame.decodeMetaData();
+        return postingsReader.docsAndPositions(fieldInfo, currentFrame.termState, skipDocs, reuse, needsOffsets);
       }
 
       private int getState() {
@@ -2096,17 +2103,21 @@ public class BlockTreeTermsReader extend
       }
 
       @Override
-      public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
-        assert !eof;
-        //System.out.println("BTR.d&p this=" + this);
-        if (fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+      public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
+        if (fieldInfo.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
+          // Positions were not indexed:
+          return null;
+        }
+
+        if (needsOffsets &&
+            fieldInfo.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) < 0) {
+          // Offsets were not indexed:
           return null;
-        } else {
-          currentFrame.decodeMetaData();
-          DocsAndPositionsEnum dpe = postingsReader.docsAndPositions(fieldInfo, currentFrame.state, skipDocs, reuse);
-          //System.out.println("  return d&pe=" + dpe);
-          return dpe;
         }
+
+        assert !eof;
+        currentFrame.decodeMetaData();
+        return postingsReader.docsAndPositions(fieldInfo, currentFrame.state, skipDocs, reuse, needsOffsets);
       }
 
       @Override

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/MappingMultiDocsAndPositionsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/MappingMultiDocsAndPositionsEnum.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/MappingMultiDocsAndPositionsEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/MappingMultiDocsAndPositionsEnum.java Sun Jan 15 23:17:45 2012
@@ -102,6 +102,16 @@ public final class MappingMultiDocsAndPo
   public int nextPosition() throws IOException {
     return current.nextPosition();
   }
+
+  @Override
+  public int startOffset() throws IOException {
+    return current.startOffset();
+  }
+  
+  @Override
+  public int endOffset() throws IOException {
+    return current.endOffset();
+  }
   
   @Override
   public BytesRef getPayload() throws IOException {

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/PostingsConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/PostingsConsumer.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/PostingsConsumer.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/PostingsConsumer.java Sun Jan 15 23:17:45 2012
@@ -44,12 +44,12 @@ public abstract class PostingsConsumer {
     int docBase;
   }
 
-  /** Add a new position & payload.  A null payload means no
-   *  payload; a non-null payload with zero length also
-   *  means no payload.  Caller may reuse the {@link
-   *  BytesRef} for the payload between calls (method must
-   *  fully consume the payload). */
-  public abstract void addPosition(int position, BytesRef payload) throws IOException;
+  /** Add a new position & payload, and start/end offset.  A
+   *  null payload means no payload; a non-null payload with
+   *  zero length also means no payload.  Caller may reuse
+   *  the {@link BytesRef} for the payload between calls
+   *  (method must fully consume the payload). */
+  public abstract void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException;
 
   /** Called when we are done adding positions & payloads
    *  for each doc.  Not called  when the field omits term
@@ -88,7 +88,32 @@ public abstract class PostingsConsumer {
         df++;
         totTF += freq;
       }
+    } else if (mergeState.fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+      final DocsAndPositionsEnum postingsEnum = (DocsAndPositionsEnum) postings;
+      while(true) {
+        final int doc = postingsEnum.nextDoc();
+        if (doc == DocIdSetIterator.NO_MORE_DOCS) {
+          break;
+        }
+        visitedDocs.set(doc);
+        final int freq = postingsEnum.freq();
+        this.startDoc(doc, freq);
+        totTF += freq;
+        for(int i=0;i<freq;i++) {
+          final int position = postingsEnum.nextPosition();
+          final BytesRef payload;
+          if (postingsEnum.hasPayload()) {
+            payload = postingsEnum.getPayload();
+          } else {
+            payload = null;
+          }
+          this.addPosition(position, payload, -1, -1);
+        }
+        this.finishDoc();
+        df++;
+      }
     } else {
+      assert mergeState.fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
       final DocsAndPositionsEnum postingsEnum = (DocsAndPositionsEnum) postings;
       while(true) {
         final int doc = postingsEnum.nextDoc();
@@ -107,7 +132,7 @@ public abstract class PostingsConsumer {
           } else {
             payload = null;
           }
-          this.addPosition(position, payload);
+          this.addPosition(position, payload, postingsEnum.startOffset(), postingsEnum.endOffset());
         }
         this.finishDoc();
         df++;

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/PostingsReaderBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/PostingsReaderBase.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/PostingsReaderBase.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/PostingsReaderBase.java Sun Jan 15 23:17:45 2012
@@ -55,7 +55,8 @@ public abstract class PostingsReaderBase
 
   /** Must fully consume state, since after this call that
    *  TermState may be reused. */
-  public abstract DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState state, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException;
+  public abstract DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState state, Bits skipDocs, DocsAndPositionsEnum reuse,
+                                                        boolean needsOffsets) throws IOException;
 
   public abstract void close() throws IOException;
 

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/TermVectorsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/TermVectorsWriter.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/TermVectorsWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/TermVectorsWriter.java Sun Jan 15 23:17:45 2012
@@ -20,7 +20,6 @@ package org.apache.lucene.codecs;
 import java.io.Closeable;
 import java.io.IOException;
 
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.index.DocsAndPositionsEnum;
 import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.FieldInfo;
@@ -185,7 +184,6 @@ public abstract class TermVectorsWriter 
     String lastFieldName = null;
 
     while((fieldName = fieldsEnum.next()) != null) {
-      
       final FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldName);
 
       assert lastFieldName == null || fieldName.compareTo(lastFieldName) > 0: "lastFieldName=" + lastFieldName + " fieldName=" + fieldName;
@@ -200,79 +198,79 @@ public abstract class TermVectorsWriter 
       if (numTerms == -1) {
         throw new IllegalStateException("vector.getUniqueTermCount() must be implemented (it returned -1)");
       }
-
-      final boolean positions;
-
-      OffsetAttribute offsetAtt;
-
       final TermsEnum termsEnum = terms.iterator(null);
 
       DocsAndPositionsEnum docsAndPositionsEnum = null;
 
-      if (termsEnum.next() != null) {
-        assert numTerms > 0;
-        docsAndPositionsEnum = termsEnum.docsAndPositions(null, null);
-        if (docsAndPositionsEnum != null) {
-          // has positions
-          positions = true;
-          if (docsAndPositionsEnum.attributes().hasAttribute(OffsetAttribute.class)) {
-            offsetAtt = docsAndPositionsEnum.attributes().getAttribute(OffsetAttribute.class);
-          } else {
-            offsetAtt = null;
-          }
-        } else {
-          positions = false;
-          offsetAtt = null;
-        }
-      } else {
-        // no terms in this field (hmm why is field present
-        // then...?)
-        assert numTerms == 0;
-        positions = false;
-        offsetAtt = null;
-      }
-      
-      startField(fieldInfo, numTerms, positions, offsetAtt != null);
+      boolean startedField = false;
 
-      int termCount = 1;
+      // NOTE: this is tricky, because TermVectors allow
+      // indexing offsets but NOT positions.  So we must
+      // lazily init the field by checking whether first
+      // position we see is -1 or not.
+
+      int termCount = 0;
+      while(termsEnum.next() != null) {
+        termCount++;
 
-      // NOTE: we already .next()'d the TermsEnum above, to
-      // peek @ first term to see if positions/offsets are
-      // present
-      while(true) {
         final int freq = (int) termsEnum.totalTermFreq();
-        startTerm(termsEnum.term(), freq);
 
-        if (positions || offsetAtt != null) {
-          DocsAndPositionsEnum dp = termsEnum.docsAndPositions(null, docsAndPositionsEnum);
-          // TODO: add startOffset()/endOffset() to d&pEnum... this is insanity
-          if (dp != docsAndPositionsEnum) {
-            // producer didnt reuse, must re-pull attributes
-            if (offsetAtt != null) {
-              assert dp.attributes().hasAttribute(OffsetAttribute.class);
-              offsetAtt = dp.attributes().getAttribute(OffsetAttribute.class);
-            }
-          }
-          docsAndPositionsEnum = dp;
+        if (startedField) {
+          startTerm(termsEnum.term(), freq);
+        }
+
+        // TODO: we need a "query" API where we can ask (via
+        // flex API) what this term was indexed with...
+        // Both positions & offsets:
+        docsAndPositionsEnum = termsEnum.docsAndPositions(null, null, true);
+        final boolean hasOffsets;
+        boolean hasPositions = false;
+        if (docsAndPositionsEnum == null) {
+          // Fallback: no offsets
+          docsAndPositionsEnum = termsEnum.docsAndPositions(null, null, false);
+          hasOffsets = false;
+        } else {
+          hasOffsets = true;
+        }
+
+        if (docsAndPositionsEnum != null) {
           final int docID = docsAndPositionsEnum.nextDoc();
           assert docID != DocsEnum.NO_MORE_DOCS;
           assert docsAndPositionsEnum.freq() == freq;
 
           for(int posUpto=0; posUpto<freq; posUpto++) {
             final int pos = docsAndPositionsEnum.nextPosition();
-            final int startOffset = offsetAtt == null ? -1 : offsetAtt.startOffset();
-            final int endOffset = offsetAtt == null ? -1 : offsetAtt.endOffset();
-            
+            if (!startedField) {
+              assert numTerms > 0;
+              hasPositions = pos != -1;
+              startField(fieldInfo, numTerms, hasPositions, hasOffsets);
+              startTerm(termsEnum.term(), freq);
+              startedField = true;
+            }
+            final int startOffset;
+            final int endOffset;
+            if (hasOffsets) {
+              startOffset = docsAndPositionsEnum.startOffset();
+              endOffset = docsAndPositionsEnum.endOffset();
+              assert startOffset != -1;
+              assert endOffset != -1;
+            } else {
+              startOffset = -1;
+              endOffset = -1;
+            }
+            assert !hasPositions || pos >= 0;
             addPosition(pos, startOffset, endOffset);
           }
+        } else {
+          if (!startedField) {
+            assert numTerms > 0;
+            startField(fieldInfo, numTerms, hasPositions, hasOffsets);
+            startTerm(termsEnum.term(), freq);
+            startedField = true;
+          }
         }
-        
-        if (termsEnum.next() == null) {
-          assert termCount == numTerms;
-          break;
-        }
-        termCount++;
       }
+      assert termCount == numTerms;
     }
   }
 }

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/TermsConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/TermsConsumer.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/TermsConsumer.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/TermsConsumer.java Sun Jan 15 23:17:45 2012
@@ -119,8 +119,41 @@ public abstract class TermsConsumer {
           }
         }
       }
+    } else if (mergeState.fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+      if (postingsEnum == null) {
+        postingsEnum = new MappingMultiDocsAndPositionsEnum();
+      }
+      postingsEnum.setMergeState(mergeState);
+      MultiDocsAndPositionsEnum postingsEnumIn = null;
+      while((term = termsEnum.next()) != null) {
+        // We can pass null for liveDocs, because the
+        // mapping enum will skip the non-live docs:
+        postingsEnumIn = (MultiDocsAndPositionsEnum) termsEnum.docsAndPositions(null, postingsEnumIn, false);
+        assert postingsEnumIn != null;
+        postingsEnum.reset(postingsEnumIn);
+        // set PayloadProcessor
+        if (mergeState.payloadProcessorProvider != null) {
+          for (int i = 0; i < mergeState.readers.size(); i++) {
+            if (mergeState.dirPayloadProcessor[i] != null) {
+              mergeState.currentPayloadProcessor[i] = mergeState.dirPayloadProcessor[i].getProcessor(mergeState.fieldInfo.name, term);
+            }
+          }
+        }
+        final PostingsConsumer postingsConsumer = startTerm(term);
+        final TermStats stats = postingsConsumer.merge(mergeState, postingsEnum, visitedDocs);
+        if (stats.docFreq > 0) {
+          finishTerm(term, stats);
+          sumTotalTermFreq += stats.totalTermFreq;
+          sumDFsinceLastAbortCheck += stats.docFreq;
+          sumDocFreq += stats.docFreq;
+          if (sumDFsinceLastAbortCheck > 60000) {
+            mergeState.checkAbort.work(sumDFsinceLastAbortCheck/5.0);
+            sumDFsinceLastAbortCheck = 0;
+          }
+        }
+      }
     } else {
-      assert mergeState.fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
+      assert mergeState.fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
       if (postingsEnum == null) {
         postingsEnum = new MappingMultiDocsAndPositionsEnum();
       }
@@ -129,7 +162,7 @@ public abstract class TermsConsumer {
       while((term = termsEnum.next()) != null) {
         // We can pass null for liveDocs, because the
         // mapping enum will skip the non-live docs:
-        postingsEnumIn = (MultiDocsAndPositionsEnum) termsEnum.docsAndPositions(null, postingsEnumIn);
+        postingsEnumIn = (MultiDocsAndPositionsEnum) termsEnum.docsAndPositions(null, postingsEnumIn, true);
         assert postingsEnumIn != null;
         postingsEnum.reset(postingsEnumIn);
         // set PayloadProcessor
@@ -154,7 +187,6 @@ public abstract class TermsConsumer {
         }
       }
     }
-
     finish(sumTotalTermFreq, sumDocFreq, visitedDocs.cardinality());
   }
 }

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFields.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFields.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFields.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFields.java Sun Jan 15 23:17:45 2012
@@ -966,7 +966,12 @@ public class Lucene3xFields extends Fiel
     }
 
     @Override
-    public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
+    public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
+      if (needsOffsets) {
+        // Pre-4.0 indices never have offsets:
+        return null;
+      }
+
       PreDocsAndPositionsEnum docsPosEnum;
       if (fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
         return null;
@@ -1082,6 +1087,16 @@ public class Lucene3xFields extends Fiel
     }
 
     @Override
+    public int startOffset() throws IOException {
+      return -1;
+    }
+
+    @Override
+    public int endOffset() throws IOException {
+      return -1;
+    }
+
+    @Override
     public boolean hasPayload() {
       assert docID != NO_MORE_DOCS;
       return pos.isPayloadAvailable();

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/TermInfosReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/TermInfosReader.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/TermInfosReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/TermInfosReader.java Sun Jan 15 23:17:45 2012
@@ -215,7 +215,9 @@ public final class TermInfosReader {
 
   TermInfo seekEnum(SegmentTermEnum enumerator, Term term, boolean useCache) throws IOException {
     if (useCache) {
-      return seekEnum(enumerator, term, termsCache.get(new CloneableTerm(term)), useCache);
+      return seekEnum(enumerator, term,
+                      termsCache.get(new CloneableTerm(term.deepCopyOf())),
+                      useCache);
     } else {
       return seekEnum(enumerator, term, null, useCache);
     }
@@ -247,7 +249,8 @@ public final class TermInfosReader {
             // of terms in order
             if (tiOrd == null) {
               if (useCache) {
-                termsCache.put(new CloneableTerm(term), new TermInfoAndOrd(ti, enumerator.position));
+                termsCache.put(new CloneableTerm(term.deepCopyOf()),
+                               new TermInfoAndOrd(ti, enumerator.position));
               }
             } else {
               assert sameTermInfo(ti, tiOrd, enumerator);
@@ -279,7 +282,8 @@ public final class TermInfosReader {
       ti = enumerator.termInfo;
       if (tiOrd == null) {
         if (useCache) {
-          termsCache.put(new CloneableTerm(term), new TermInfoAndOrd(ti, enumerator.position));
+          termsCache.put(new CloneableTerm(term.deepCopyOf()),
+                         new TermInfoAndOrd(ti, enumerator.position));
         }
       } else {
         assert sameTermInfo(ti, tiOrd, enumerator);

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java Sun Jan 15 23:17:45 2012
@@ -80,6 +80,8 @@ public class Lucene40FieldInfosReader ex
           } else {
             throw new CorruptIndexException("Corrupt fieldinfos, OMIT_POSITIONS set but format=" + format + " (resource: " + input + ")");
           }
+        } else if (format <= Lucene40FieldInfosWriter.FORMAT_FLEX && (bits & Lucene40FieldInfosWriter.STORE_OFFSETS_IN_POSTINGS) != 0) {
+          indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
         } else {
           indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
         }

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosWriter.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosWriter.java Sun Jan 15 23:17:45 2012
@@ -47,6 +47,7 @@ public class Lucene40FieldInfosWriter ex
   
   static final byte IS_INDEXED = 0x1;
   static final byte STORE_TERMVECTOR = 0x2;
+  static final byte STORE_OFFSETS_IN_POSTINGS = 0x4;
   static final byte OMIT_NORMS = 0x10;
   static final byte STORE_PAYLOADS = 0x20;
   static final byte OMIT_TERM_FREQ_AND_POSITIONS = 0x40;
@@ -68,6 +69,8 @@ public class Lucene40FieldInfosWriter ex
         if (fi.storePayloads) bits |= STORE_PAYLOADS;
         if (fi.indexOptions == IndexOptions.DOCS_ONLY) {
           bits |= OMIT_TERM_FREQ_AND_POSITIONS;
+        } else if (fi.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) {
+          bits |= STORE_OFFSETS_IN_POSTINGS;
         } else if (fi.indexOptions == IndexOptions.DOCS_AND_FREQS) {
           bits |= OMIT_POSITIONS;
         }

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java Sun Jan 15 23:17:45 2012
@@ -241,11 +241,15 @@ public class Lucene40PostingsReader exte
   }
 
   @Override
-  public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
-    if (fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+  public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs,
+                                               DocsAndPositionsEnum reuse, boolean needsOffsets)
+    throws IOException {
+
+    if (needsOffsets) {
+      // TODO: once we index offsets into postings fix this!
       return null;
     }
-    
+
     // TODO: refactor
     if (fieldInfo.storePayloads) {
       SegmentDocsAndPositionsAndPayloadsEnum docsEnum;
@@ -366,7 +370,7 @@ public class Lucene40PostingsReader exte
       
       start = count; // buffer is consumed
       
-      return doc = skipTo(target, liveDocs);
+      return doc = skipTo(target);
     }
     
     private final int binarySearch(int hi, int low, int target, int[] docs) {
@@ -448,7 +452,7 @@ public class Lucene40PostingsReader exte
      
     }
 
-    private final int skipTo(int target, Bits liveDocs) throws IOException {
+    private final int skipTo(int target) throws IOException {
       if ((target - skipInterval) >= accum && limit >= skipMinimum) {
 
         // There are enough docs in the posting to have
@@ -841,6 +845,16 @@ public class Lucene40PostingsReader exte
       return position;
     }
 
+    @Override
+    public int startOffset() throws IOException {
+      return -1;
+    }
+
+    @Override
+    public int endOffset() throws IOException {
+      return -1;
+    }
+
     /** Returns the payload at this position, or null if no
      *  payload was indexed. */
     @Override
@@ -1074,6 +1088,16 @@ public class Lucene40PostingsReader exte
       return position;
     }
 
+    @Override
+    public int startOffset() throws IOException {
+      return -1;
+    }
+
+    @Override
+    public int endOffset() throws IOException {
+      return -1;
+    }
+
     /** Returns the payload at this position, or null if no
      *  payload was indexed. */
     @Override

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java Sun Jan 15 23:17:45 2012
@@ -155,6 +155,10 @@ public final class Lucene40PostingsWrite
     */
     this.fieldInfo = fieldInfo;
     indexOptions = fieldInfo.indexOptions;
+    if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0) {
+      throw new IllegalArgumentException("this codec cannot index offsets");
+    }
+        
     storePayloads = fieldInfo.storePayloads;
     //System.out.println("  set init blockFreqStart=" + freqStart);
     //System.out.println("  set init blockProxStart=" + proxStart);
@@ -197,11 +201,19 @@ public final class Lucene40PostingsWrite
 
   /** Add a new position & payload */
   @Override
-  public void addPosition(int position, BytesRef payload) throws IOException {
+  public void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException {
     //if (DEBUG) System.out.println("SPW:     addPos pos=" + position + " payload=" + (payload == null ? "null" : (payload.length + " bytes")) + " proxFP=" + proxOut.getFilePointer());
     assert indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS: "invalid indexOptions: " + indexOptions;
     assert proxOut != null;
 
+    // TODO: when we add offsets... often
+    // endOffset-startOffset will be constant or near
+    // constant for all docs (eg if the term wasn't stemmed
+    // then this will usually be the utf16 length of the
+    // term); would be nice to write that length once up
+    // front and then not encode endOffset for each
+    // position..
+
     final int delta = position - lastPosition;
     
     assert delta >= 0: "position=" + position + " lastPosition=" + lastPosition;            // not quite right (if pos=0 is repeated twice we don't catch it)

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java Sun Jan 15 23:17:45 2012
@@ -24,7 +24,6 @@ import java.util.HashMap;
 import java.util.Map;
 import java.util.Set;
 
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.codecs.TermVectorsReader;
 import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.DocsAndPositionsEnum;
@@ -518,21 +517,20 @@ public class Lucene40TermVectorsReader e
     }
 
     @Override
-    public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
+    public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
+      if (needsOffsets && !storeOffsets) {
+        return null;
+      }
+
       if (!storePositions && !storeOffsets) {
         return null;
       }
       
       TVDocsAndPositionsEnum docsAndPositionsEnum;
-      if (reuse != null) {
+      if (reuse != null && reuse instanceof TVDocsAndPositionsEnum) {
         docsAndPositionsEnum = (TVDocsAndPositionsEnum) reuse;
-        if (docsAndPositionsEnum.canReuse(storeOffsets)) {
-          docsAndPositionsEnum = (TVDocsAndPositionsEnum) reuse;
-        } else {
-          docsAndPositionsEnum = new TVDocsAndPositionsEnum(storeOffsets);
-        }
       } else {
-        docsAndPositionsEnum = new TVDocsAndPositionsEnum(storeOffsets);
+        docsAndPositionsEnum = new TVDocsAndPositionsEnum();
       }
       docsAndPositionsEnum.reset(liveDocs, positions, startOffsets, endOffsets);
       return docsAndPositionsEnum;
@@ -592,7 +590,6 @@ public class Lucene40TermVectorsReader e
   }
 
   private static class TVDocsAndPositionsEnum extends DocsAndPositionsEnum {
-    private final OffsetAttribute offsetAtt;
     private boolean didNext;
     private int doc = -1;
     private int nextPos;
@@ -601,18 +598,6 @@ public class Lucene40TermVectorsReader e
     private int[] startOffsets;
     private int[] endOffsets;
 
-    public TVDocsAndPositionsEnum(boolean storeOffsets) {
-      if (storeOffsets) {
-        offsetAtt = attributes().addAttribute(OffsetAttribute.class);
-      } else {
-        offsetAtt = null;
-      }
-    }
-
-    public boolean canReuse(boolean storeOffsets) {
-      return storeOffsets == (offsetAtt != null);
-    }
-
     @Override
     public int freq() {
       if (positions != null) {
@@ -651,7 +636,6 @@ public class Lucene40TermVectorsReader e
       this.liveDocs = liveDocs;
       this.positions = positions;
       this.startOffsets = startOffsets;
-      assert (offsetAtt != null) == (startOffsets != null);
       this.endOffsets = endOffsets;
       this.doc = -1;
       didNext = false;
@@ -673,10 +657,6 @@ public class Lucene40TermVectorsReader e
       assert (positions != null && nextPos < positions.length) ||
         startOffsets != null && nextPos < startOffsets.length;
 
-      if (startOffsets != null) {
-        offsetAtt.setOffset(startOffsets[nextPos],
-                            endOffsets[nextPos]);
-      }
       if (positions != null) {
         return positions[nextPos++];
       } else {
@@ -684,6 +664,18 @@ public class Lucene40TermVectorsReader e
         return -1;
       }
     }
+
+    @Override
+    public int startOffset() {
+      assert startOffsets != null;
+      return startOffsets[nextPos-1];
+    }
+
+    @Override
+    public int endOffset() {
+      assert endOffsets != null;
+      return endOffsets[nextPos-1];
+    }
   }
 
   @Override

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java Sun Jan 15 23:17:45 2012
@@ -131,7 +131,7 @@ public class MemoryPostingsFormat extend
       }
 
       @Override
-      public void addPosition(int pos, BytesRef payload) throws IOException {
+      public void addPosition(int pos, BytesRef payload, int startOffset, int endOffset) throws IOException {
         assert payload == null || field.storePayloads;
 
         if (VERBOSE) System.out.println("      addPos pos=" + pos + " payload=" + payload);
@@ -249,6 +249,9 @@ public class MemoryPostingsFormat extend
     return new FieldsConsumer() {
       @Override
       public TermsConsumer addField(FieldInfo field) {
+        if (field.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0) {
+          throw new IllegalArgumentException("this codec cannot index offsets");
+        }
         if (VERBOSE) System.out.println("\naddField field=" + field.name);
         return new TermsWriter(out, field);
       }
@@ -328,7 +331,7 @@ public class MemoryPostingsFormat extend
             assert freq > 0;
           }
 
-          if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+          if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
             // Skip positions
             for(int posUpto=0;posUpto<freq;posUpto++) {
               if (!storePayloads) {
@@ -501,6 +504,16 @@ public class MemoryPostingsFormat extend
     }
 
     @Override
+    public int startOffset() {
+      return -1;
+    }
+
+    @Override
+    public int endOffset() {
+      return -1;
+    }
+
+    @Override
     public BytesRef getPayload() {
       payloadRetrieved = true;
       return payload;
@@ -618,8 +631,14 @@ public class MemoryPostingsFormat extend
     }
 
     @Override
-    public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
-      if (field.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+    public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
+
+      if (needsOffsets) {
+        // Not until we can index offsets...
+        return null;
+      }
+      
+      if (field.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
         return null;
       }
       decodeMetaData();

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java Sun Jan 15 23:17:45 2012
@@ -215,10 +215,8 @@ public class PulsingPostingsReader exten
   }
 
   @Override
-  public DocsAndPositionsEnum docsAndPositions(FieldInfo field, BlockTermState _termState, Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
-    if (field.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
-      return null;
-    }
+  public DocsAndPositionsEnum docsAndPositions(FieldInfo field, BlockTermState _termState, Bits liveDocs, DocsAndPositionsEnum reuse,
+                                               boolean needsOffsets) throws IOException {
     //System.out.println("D&P: field=" + field.name);
 
     final PulsingTermState termState = (PulsingTermState) _termState;
@@ -245,11 +243,12 @@ public class PulsingPostingsReader exten
       return postings.reset(liveDocs, termState);
     } else {
       if (reuse instanceof PulsingDocsAndPositionsEnum) {
-        DocsAndPositionsEnum wrapped = wrappedPostingsReader.docsAndPositions(field, termState.wrappedTermState, liveDocs, (DocsAndPositionsEnum) getOther(reuse));
+        DocsAndPositionsEnum wrapped = wrappedPostingsReader.docsAndPositions(field, termState.wrappedTermState, liveDocs, (DocsAndPositionsEnum) getOther(reuse),
+                                                                              needsOffsets);
         setOther(wrapped, reuse); // wrapped.other = reuse
         return wrapped;
       } else {
-        return wrappedPostingsReader.docsAndPositions(field, termState.wrappedTermState, liveDocs, reuse);
+        return wrappedPostingsReader.docsAndPositions(field, termState.wrappedTermState, liveDocs, reuse, needsOffsets);
       }
     }
   }
@@ -486,6 +485,16 @@ public class PulsingPostingsReader exten
       return position;
     }
 
+    @Override
+    public int startOffset() {
+      return -1;
+    }
+
+    @Override
+    public int endOffset() {
+      return -1;
+    }
+
     private void skipPositions() throws IOException {
       while(posPending != 0) {
         nextPosition();

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsWriter.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsWriter.java Sun Jan 15 23:17:45 2012
@@ -115,6 +115,9 @@ public final class PulsingPostingsWriter
   @Override
   public void setField(FieldInfo fieldInfo) {
     this.indexOptions = fieldInfo.indexOptions;
+    if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0) {
+      throw new IllegalArgumentException("this codec cannot index offsets: " + indexOptions);
+    }
     if (DEBUG) System.out.println("PW field=" + fieldInfo.name + " indexOptions=" + indexOptions);
     storePayloads = fieldInfo.storePayloads;
     wrappedPostingsWriter.setField(fieldInfo);
@@ -165,7 +168,7 @@ public final class PulsingPostingsWriter
   }
 
   @Override
-  public void addPosition(int position, BytesRef payload) throws IOException {
+  public void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException {
 
     if (DEBUG) System.out.println("PW       pos=" + position + " payload=" + (payload == null ? "null" : payload.length + " bytes"));
     if (pendingCount == pending.length) {
@@ -175,7 +178,7 @@ public final class PulsingPostingsWriter
     if (pendingCount == -1) {
       // We've already seen too many docs for this term --
       // just forward to our fallback writer
-      wrappedPostingsWriter.addPosition(position, payload);
+      wrappedPostingsWriter.addPosition(position, payload, -1, -1);
     } else {
       // buffer up
       final Position pos = pending[pendingCount++];
@@ -360,7 +363,7 @@ public final class PulsingPostingsWriter
     wrappedPostingsWriter.startTerm();
       
     // Flush all buffered docs
-    if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+    if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
       Position doc = null;
       for(Position pos : pending) {
         if (doc == null) {
@@ -376,7 +379,7 @@ public final class PulsingPostingsWriter
           wrappedPostingsWriter.startDoc(doc.docID, doc.termFreq);
         }
         if (DEBUG) System.out.println("PW:   wrapped.addPos pos=" + pos.pos);
-        wrappedPostingsWriter.addPosition(pos.pos, pos.payload);
+        wrappedPostingsWriter.addPosition(pos.pos, pos.payload, -1, -1);
       }
       //wrappedPostingsWriter.finishDoc();
     } else {

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java Sun Jan 15 23:17:45 2012
@@ -294,7 +294,18 @@ public class SepPostingsReader extends P
   }
 
   @Override
-  public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState _termState, Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
+  public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState _termState, Bits liveDocs,
+                                               DocsAndPositionsEnum reuse, boolean needsOffsets)
+    throws IOException {
+
+    if (fieldInfo.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
+      return null;
+    }
+
+    if (needsOffsets) {
+      return null;
+    }
+
     assert fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
     final SepTermState termState = (SepTermState) _termState;
     SepDocsAndPositionsEnum postingsEnum;
@@ -713,6 +724,16 @@ public class SepPostingsReader extends P
       return position;
     }
 
+    @Override
+    public int startOffset() {
+      return -1;
+    }
+
+    @Override
+    public int endOffset() {
+      return -1;
+    }
+
     private BytesRef payload;
 
     @Override

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/sep/SepPostingsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/sep/SepPostingsWriter.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/sep/SepPostingsWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/sep/SepPostingsWriter.java Sun Jan 15 23:17:45 2012
@@ -188,6 +188,9 @@ public final class SepPostingsWriter ext
   public void setField(FieldInfo fieldInfo) {
     this.fieldInfo = fieldInfo;
     this.indexOptions = fieldInfo.indexOptions;
+    if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0) {
+      throw new IllegalArgumentException("this codec cannot index offsets");
+    }
     skipListWriter.setIndexOptions(indexOptions);
     storePayloads = indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS && fieldInfo.storePayloads;
   }
@@ -222,7 +225,7 @@ public final class SepPostingsWriter ext
 
   /** Add a new position & payload */
   @Override
-  public void addPosition(int position, BytesRef payload) throws IOException {
+  public void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException {
     assert indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
 
     final int delta = position - lastPosition;

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosReader.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosReader.java Sun Jan 15 23:17:45 2012
@@ -103,7 +103,7 @@ public class SimpleTextFieldInfosReader 
         IndexOptions indexOptions = IndexOptions.valueOf(readString(INDEXOPTIONS.length, scratch));
 
         hasVectors |= storeTermVector;
-        hasProx |= isIndexed && indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
+        hasProx |= isIndexed && indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
         hasFreq |= isIndexed && indexOptions != IndexOptions.DOCS_ONLY;
         
         infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, 

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosWriter.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosWriter.java Sun Jan 15 23:17:45 2012
@@ -62,7 +62,7 @@ public class SimpleTextFieldInfosWriter 
       SimpleTextUtil.writeNewline(out);
       
       for (FieldInfo fi : infos) {
-        assert fi.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !fi.storePayloads;
+        assert fi.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !fi.storePayloads;
 
         SimpleTextUtil.write(out, NAME);
         SimpleTextUtil.write(out, fi.name, scratch);

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java Sun Jan 15 23:17:45 2012
@@ -50,13 +50,15 @@ class SimpleTextFieldsReader extends Fie
   private final IndexInput in;
   private final FieldInfos fieldInfos;
 
-  final static BytesRef END     = SimpleTextFieldsWriter.END;
-  final static BytesRef FIELD   = SimpleTextFieldsWriter.FIELD;
-  final static BytesRef TERM    = SimpleTextFieldsWriter.TERM;
-  final static BytesRef DOC     = SimpleTextFieldsWriter.DOC;
-  final static BytesRef FREQ    = SimpleTextFieldsWriter.FREQ;
-  final static BytesRef POS     = SimpleTextFieldsWriter.POS;
-  final static BytesRef PAYLOAD = SimpleTextFieldsWriter.PAYLOAD;
+  final static BytesRef END          = SimpleTextFieldsWriter.END;
+  final static BytesRef FIELD        = SimpleTextFieldsWriter.FIELD;
+  final static BytesRef TERM         = SimpleTextFieldsWriter.TERM;
+  final static BytesRef DOC          = SimpleTextFieldsWriter.DOC;
+  final static BytesRef FREQ         = SimpleTextFieldsWriter.FREQ;
+  final static BytesRef POS          = SimpleTextFieldsWriter.POS;
+  final static BytesRef START_OFFSET = SimpleTextFieldsWriter.START_OFFSET;
+  final static BytesRef END_OFFSET   = SimpleTextFieldsWriter.END_OFFSET;
+  final static BytesRef PAYLOAD      = SimpleTextFieldsWriter.PAYLOAD;
 
   public SimpleTextFieldsReader(SegmentReadState state) throws IOException {
     in = state.dir.openInput(SimpleTextPostingsFormat.getPostingsFileName(state.segmentInfo.name, state.segmentSuffix), state.context);
@@ -204,8 +206,16 @@ class SimpleTextFieldsReader extends Fie
     }
 
     @Override
-    public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
-      if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+    public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
+
+      if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
+        // Positions were not indexed
+        return null;
+      }
+
+      if (needsOffsets &&
+          indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) < 0) {
+        // Offsets were not indexed
         return null;
       }
 
@@ -215,7 +225,7 @@ class SimpleTextFieldsReader extends Fie
       } else {
         docsAndPositionsEnum = new SimpleTextDocsAndPositionsEnum();
       } 
-      return docsAndPositionsEnum.reset(docsStart, liveDocs);
+      return docsAndPositionsEnum.reset(docsStart, liveDocs, indexOptions);
     }
     
     @Override
@@ -289,6 +299,10 @@ class SimpleTextFieldsReader extends Fie
           termFreq = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
         } else if (StringHelper.startsWith(scratch, POS)) {
           // skip termFreq++;
+        } else if (StringHelper.startsWith(scratch, START_OFFSET)) {
+          // skip
+        } else if (StringHelper.startsWith(scratch, END_OFFSET)) {
+          // skip
         } else if (StringHelper.startsWith(scratch, PAYLOAD)) {
           // skip
         } else {
@@ -325,6 +339,10 @@ class SimpleTextFieldsReader extends Fie
     private final CharsRef scratchUTF16_2 = new CharsRef(10);
     private BytesRef payload;
     private long nextDocStart;
+    private boolean readOffsets;
+    private boolean readPositions;
+    private int startOffset = -1;
+    private int endOffset = -1;
 
     public SimpleTextDocsAndPositionsEnum() {
       this.inStart = SimpleTextFieldsReader.this.in;
@@ -335,10 +353,12 @@ class SimpleTextFieldsReader extends Fie
       return in == inStart;
     }
 
-    public SimpleTextDocsAndPositionsEnum reset(long fp, Bits liveDocs) {
+    public SimpleTextDocsAndPositionsEnum reset(long fp, Bits liveDocs, IndexOptions indexOptions) {
       this.liveDocs = liveDocs;
       nextDocStart = fp;
       docID = -1;
+      readPositions = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+      readOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
       return this;
     }
 
@@ -360,6 +380,7 @@ class SimpleTextFieldsReader extends Fie
       while(true) {
         final long lineStart = in.getFilePointer();
         SimpleTextUtil.readLine(in, scratch);
+        //System.out.println("NEXT DOC: " + scratch.utf8ToString());
         if (StringHelper.startsWith(scratch, DOC)) {
           if (!first && (liveDocs == null || liveDocs.get(docID))) {
             nextDocStart = lineStart;
@@ -376,6 +397,10 @@ class SimpleTextFieldsReader extends Fie
           posStart = in.getFilePointer();
         } else if (StringHelper.startsWith(scratch, POS)) {
           // skip
+        } else if (StringHelper.startsWith(scratch, START_OFFSET)) {
+          // skip
+        } else if (StringHelper.startsWith(scratch, END_OFFSET)) {
+          // skip
         } else if (StringHelper.startsWith(scratch, PAYLOAD)) {
           // skip
         } else {
@@ -399,10 +424,27 @@ class SimpleTextFieldsReader extends Fie
 
     @Override
     public int nextPosition() throws IOException {
-      SimpleTextUtil.readLine(in, scratch);
-      assert StringHelper.startsWith(scratch, POS): "got line=" + scratch.utf8ToString();
-      UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+POS.length, scratch.length-POS.length, scratchUTF16_2);
-      final int pos = ArrayUtil.parseInt(scratchUTF16_2.chars, 0, scratchUTF16_2.length);
+      final int pos;
+      if (readPositions) {
+        SimpleTextUtil.readLine(in, scratch);
+        assert StringHelper.startsWith(scratch, POS): "got line=" + scratch.utf8ToString();
+        UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+POS.length, scratch.length-POS.length, scratchUTF16_2);
+        pos = ArrayUtil.parseInt(scratchUTF16_2.chars, 0, scratchUTF16_2.length);
+      } else {
+        pos = -1;
+      }
+
+      if (readOffsets) {
+        SimpleTextUtil.readLine(in, scratch);
+        assert StringHelper.startsWith(scratch, START_OFFSET): "got line=" + scratch.utf8ToString();
+        UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+START_OFFSET.length, scratch.length-START_OFFSET.length, scratchUTF16_2);
+        startOffset = ArrayUtil.parseInt(scratchUTF16_2.chars, 0, scratchUTF16_2.length);
+        SimpleTextUtil.readLine(in, scratch);
+        assert StringHelper.startsWith(scratch, END_OFFSET): "got line=" + scratch.utf8ToString();
+        UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+END_OFFSET.length, scratch.length-END_OFFSET.length, scratchUTF16_2);
+        endOffset = ArrayUtil.parseInt(scratchUTF16_2.chars, 0, scratchUTF16_2.length);
+      }
+
       final long fp = in.getFilePointer();
       SimpleTextUtil.readLine(in, scratch);
       if (StringHelper.startsWith(scratch, PAYLOAD)) {
@@ -421,6 +463,16 @@ class SimpleTextFieldsReader extends Fie
     }
 
     @Override
+    public int startOffset() throws IOException {
+      return startOffset;
+    }
+
+    @Override
+    public int endOffset() throws IOException {
+      return endOffset;
+    }
+
+    @Override
     public BytesRef getPayload() {
       // Some tests rely on only being able to retrieve the
       // payload once

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java Sun Jan 15 23:17:45 2012
@@ -35,13 +35,15 @@ class SimpleTextFieldsWriter extends Fie
   private final IndexOutput out;
   private final BytesRef scratch = new BytesRef(10);
 
-  final static BytesRef END     = new BytesRef("END");
-  final static BytesRef FIELD   = new BytesRef("field ");
-  final static BytesRef TERM    = new BytesRef("  term ");
-  final static BytesRef DOC     = new BytesRef("    doc ");
-  final static BytesRef FREQ    = new BytesRef("      freq ");
-  final static BytesRef POS     = new BytesRef("      pos ");
-  final static BytesRef PAYLOAD = new BytesRef("        payload ");
+  final static BytesRef END          = new BytesRef("END");
+  final static BytesRef FIELD        = new BytesRef("field ");
+  final static BytesRef TERM         = new BytesRef("  term ");
+  final static BytesRef DOC          = new BytesRef("    doc ");
+  final static BytesRef FREQ         = new BytesRef("      freq ");
+  final static BytesRef POS          = new BytesRef("      pos ");
+  final static BytesRef START_OFFSET = new BytesRef("      startOffset ");
+  final static BytesRef END_OFFSET   = new BytesRef("      endOffset ");
+  final static BytesRef PAYLOAD      = new BytesRef("        payload ");
 
   public SimpleTextFieldsWriter(SegmentWriteState state) throws IOException {
     final String fileName = SimpleTextPostingsFormat.getPostingsFileName(state.segmentName, state.segmentSuffix);
@@ -97,10 +99,19 @@ class SimpleTextFieldsWriter extends Fie
   private class SimpleTextPostingsWriter extends PostingsConsumer {
     private BytesRef term;
     private boolean wroteTerm;
-    private IndexOptions indexOptions;
+    private final IndexOptions indexOptions;
+    private final boolean writePositions;
+    private final boolean writeOffsets;
+
+    // for assert:
+    private int lastEndOffset = -1;
 
     public SimpleTextPostingsWriter(FieldInfo field) {
       this.indexOptions = field.indexOptions;
+      writePositions = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+      writeOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+      //System.out.println("writeOffsets=" + writeOffsets);
+      //System.out.println("writePos=" + writePositions);
     }
 
     @Override
@@ -121,10 +132,10 @@ class SimpleTextFieldsWriter extends Fie
         write(Integer.toString(termDocFreq));
         newline();
       }
+
+      lastEndOffset = -1;
     }
     
-    
-
     public PostingsConsumer reset(BytesRef term) {
       this.term = term;
       wroteTerm = false;
@@ -132,10 +143,25 @@ class SimpleTextFieldsWriter extends Fie
     }
 
     @Override
-    public void addPosition(int position, BytesRef payload) throws IOException {
-      write(POS);
-      write(Integer.toString(position));
-      newline();
+    public void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException {
+      if (writePositions) {
+        write(POS);
+        write(Integer.toString(position));
+        newline();
+      }
+
+      if (writeOffsets) {
+        assert endOffset >= startOffset;
+        assert startOffset >= lastEndOffset: "startOffset=" + startOffset + " lastEndOffset=" + lastEndOffset;
+        lastEndOffset = endOffset;
+        write(START_OFFSET);
+        write(Integer.toString(startOffset));
+        newline();
+        write(END_OFFSET);
+        write(Integer.toString(endOffset));
+        newline();
+      }
+
       if (payload != null && payload.length > 0) {
         assert payload.length != 0;
         write(PAYLOAD);

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsFormat.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsFormat.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsFormat.java Sun Jan 15 23:17:45 2012
@@ -38,7 +38,7 @@ public class SimpleTextTermVectorsFormat
 
   @Override
   public TermVectorsReader vectorsReader(Directory directory, SegmentInfo segmentInfo, FieldInfos fieldInfos, IOContext context) throws IOException {
-    return new SimpleTextTermVectorsReader(directory, segmentInfo, fieldInfos, context);
+    return new SimpleTextTermVectorsReader(directory, segmentInfo, context);
   }
 
   @Override

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java?rev=1231794&r1=1231793&r2=1231794&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java Sun Jan 15 23:17:45 2012
@@ -26,11 +26,9 @@ import java.util.Set;
 import java.util.SortedMap;
 import java.util.TreeMap;
 
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.codecs.TermVectorsReader;
 import org.apache.lucene.index.DocsAndPositionsEnum;
 import org.apache.lucene.index.DocsEnum;
-import org.apache.lucene.index.FieldInfos;
 import org.apache.lucene.index.Fields;
 import org.apache.lucene.index.FieldsEnum;
 import org.apache.lucene.index.IndexFileNames;
@@ -63,7 +61,7 @@ public class SimpleTextTermVectorsReader
   private BytesRef scratch = new BytesRef();
   private CharsRef scratchUTF16 = new CharsRef();
   
-  public SimpleTextTermVectorsReader(Directory directory, SegmentInfo si, FieldInfos fieldInfos, IOContext context) throws IOException {
+  public SimpleTextTermVectorsReader(Directory directory, SegmentInfo si, IOContext context) throws IOException {
     boolean success = false;
     try {
       in = directory.openInput(IndexFileNames.segmentFileName(si.name, "", VECTORS_EXTENSION), context);
@@ -114,7 +112,8 @@ public class SimpleTextTermVectorsReader
     for (int i = 0; i < numFields; i++) {
       readLine();
       assert StringHelper.startsWith(scratch, FIELD);
-      int fieldNumber = parseIntAt(FIELD.length);
+      // skip fieldNumber:
+      parseIntAt(FIELD.length);
       
       readLine();
       assert StringHelper.startsWith(scratch, FIELDNAME);
@@ -373,13 +372,16 @@ public class SimpleTextTermVectorsReader
     }
 
     @Override
-    public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
+    public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
       SimpleTVPostings postings = current.getValue();
       if (postings.positions == null && postings.startOffsets == null) {
         return null;
       }
+      if (needsOffsets && (postings.startOffsets == null || postings.endOffsets == null)) {
+        return null;
+      }
       // TODO: reuse
-      SimpleTVDocsAndPositionsEnum e = new SimpleTVDocsAndPositionsEnum(postings.startOffsets != null);
+      SimpleTVDocsAndPositionsEnum e = new SimpleTVDocsAndPositionsEnum();
       e.reset(liveDocs, postings.positions, postings.startOffsets, postings.endOffsets);
       return e;
     }
@@ -436,7 +438,6 @@ public class SimpleTextTermVectorsReader
   }
   
   private static class SimpleTVDocsAndPositionsEnum extends DocsAndPositionsEnum {
-    private final OffsetAttribute offsetAtt;
     private boolean didNext;
     private int doc = -1;
     private int nextPos;
@@ -445,18 +446,6 @@ public class SimpleTextTermVectorsReader
     private int[] startOffsets;
     private int[] endOffsets;
 
-    public SimpleTVDocsAndPositionsEnum(boolean storeOffsets) {
-      if (storeOffsets) {
-        offsetAtt = attributes().addAttribute(OffsetAttribute.class);
-      } else {
-        offsetAtt = null;
-      }
-    }
-
-    public boolean canReuse(boolean storeOffsets) {
-      return storeOffsets == (offsetAtt != null);
-    }
-
     @Override
     public int freq() {
       if (positions != null) {
@@ -495,7 +484,6 @@ public class SimpleTextTermVectorsReader
       this.liveDocs = liveDocs;
       this.positions = positions;
       this.startOffsets = startOffsets;
-      assert (offsetAtt != null) == (startOffsets != null);
       this.endOffsets = endOffsets;
       this.doc = -1;
       didNext = false;
@@ -516,11 +504,6 @@ public class SimpleTextTermVectorsReader
     public int nextPosition() {
       assert (positions != null && nextPos < positions.length) ||
         startOffsets != null && nextPos < startOffsets.length;
-
-      if (startOffsets != null) {
-        offsetAtt.setOffset(startOffsets[nextPos],
-                            endOffsets[nextPos]);
-      }
       if (positions != null) {
         return positions[nextPos++];
       } else {
@@ -528,5 +511,15 @@ public class SimpleTextTermVectorsReader
         return -1;
       }
     }
+
+    @Override
+    public int startOffset() {
+      return startOffsets[nextPos-1];
+    }
+
+    @Override
+    public int endOffset() {
+      return endOffsets[nextPos-1];
+    }
   }
 }



Mime
View raw message