lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r682812 [2/2] - in /lucene/java/trunk: ./ docs/ src/java/org/apache/lucene/document/ src/java/org/apache/lucene/index/ src/site/src/documentation/content/xdocs/ src/test/org/apache/lucene/ src/test/org/apache/lucene/index/
Date Tue, 05 Aug 2008 17:17:43 GMT
Modified: lucene/java/trunk/src/java/org/apache/lucene/document/Fieldable.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/document/Fieldable.java?rev=682812&r1=682811&r2=682812&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/document/Fieldable.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/document/Fieldable.java Tue Aug  5 10:17:42 2008
@@ -139,6 +139,15 @@
    */
   void setOmitNorms(boolean omitNorms);
 
+  /** Expert:
+   *
+   * If set, omit term freq, positions and payloads from postings for this field.
+   */
+  void setOmitTf(boolean omitTf);
+  
+  /** True if tf is omitted for this indexed field */
+  boolean getOmitTf();
+
   /**
    * Indicates whether a Field is Lazy or not.  The semantics of Lazy loading are such that if a Field is lazily loaded, retrieving
    * it's values via {@link #stringValue()} or {@link #binaryValue()} is only valid as long as the {@link org.apache.lucene.index.IndexReader} that

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/CheckIndex.java?rev=682812&r1=682811&r2=682812&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/CheckIndex.java Tue Aug  5 10:17:42 2008
@@ -114,13 +114,12 @@
     else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE)
       sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]";
     else {
-      // LUCENE-1255: All versions before 2.3.2/2.4 were
-      // able to create position=-1 when the very first
-      // Token has positionIncrement 0
       if (format == SegmentInfos.FORMAT_CHECKSUM)
         sFormat = "FORMAT_CHECKSUM [Lucene 2.4]";
       else if (format == SegmentInfos.FORMAT_DEL_COUNT)
-          sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]";
+        sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]";
+      else if (format == SegmentInfos.FORMAT_HAS_PROX)
+        sFormat = "FORMAT_HAS_PROX [Lucene 2.4]";
       else if (format < SegmentInfos.CURRENT_FORMAT) {
         sFormat = "int=" + format + " [newer version of Lucene than this tool]";
         skip = true;
@@ -161,6 +160,7 @@
 
       try {
         out.println("    compound=" + info.getUseCompoundFile());
+        out.println("    hasProx=" + info.getHasProx());
         out.println("    numFiles=" + info.files().size());
         out.println("    size (MB)=" + nf.format(info.sizeInBytes()/(1024.*1024.)));
         final int docStoreOffset = info.getDocStoreOffset();
@@ -224,7 +224,7 @@
             final int doc = termPositions.doc();
             final int freq = termPositions.freq();
             if (doc <= lastDoc)
-              throw new RuntimeException("term " + term + ": doc " + doc + " < lastDoc " + lastDoc);
+              throw new RuntimeException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc);
             lastDoc = doc;
             if (freq <= 0)
               throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/DefaultSkipListWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/DefaultSkipListWriter.java?rev=682812&r1=682811&r2=682812&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/DefaultSkipListWriter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/DefaultSkipListWriter.java Tue Aug  5 10:17:42 2008
@@ -62,7 +62,8 @@
     this.curStorePayloads = storePayloads;
     this.curPayloadLength = payloadLength;
     this.curFreqPointer = freqOutput.getFilePointer();
-    this.curProxPointer = proxOutput.getFilePointer();
+    if (proxOutput != null)
+      this.curProxPointer = proxOutput.getFilePointer();
   }
   
   protected void resetSkip() {
@@ -70,7 +71,8 @@
     Arrays.fill(lastSkipDoc, 0);
     Arrays.fill(lastSkipPayloadLength, -1);  // we don't have to write the first length in the skip list
     Arrays.fill(lastSkipFreqPointer, freqOutput.getFilePointer());
-    Arrays.fill(lastSkipProxPointer, proxOutput.getFilePointer());
+    if (proxOutput != null)
+      Arrays.fill(lastSkipProxPointer, proxOutput.getFilePointer());
   }
   
   protected void writeSkipData(int level, IndexOutput skipBuffer) throws IOException {

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java?rev=682812&r1=682811&r2=682812&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java Tue Aug  5 10:17:42 2008
@@ -183,7 +183,7 @@
         // easily add it
         FieldInfo fi = fieldInfos.add(fieldName, field.isIndexed(), field.isTermVectorStored(),
                                       field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),
-                                      field.getOmitNorms(), false);
+                                      field.getOmitNorms(), false, field.getOmitTf());
 
         fp = new DocFieldProcessorPerField(this, fi);
         fp.next = fieldHash[hashPos];
@@ -195,7 +195,7 @@
       } else
         fp.fieldInfo.update(field.isIndexed(), field.isTermVectorStored(),
                             field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),
-                            field.getOmitNorms(), false);
+                            field.getOmitNorms(), false, field.getOmitTf());
 
       if (thisFieldGen != fp.lastGen) {
 

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=682812&r1=682811&r2=682812&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/DocumentsWriter.java Tue Aug  5 10:17:42 2008
@@ -132,6 +132,8 @@
   boolean bufferIsFull;                   // True when it's time to write segment
   private boolean aborting;               // True if an abort is pending
 
+  private DocFieldProcessor docFieldProcessor;
+
   PrintStream infoStream;
   int maxFieldLength = IndexWriter.DEFAULT_MAX_FIELD_LENGTH;
   Similarity similarity;
@@ -261,7 +263,13 @@
     final DocInverter docInverter = new DocInverter(termsHash, normsWriter);
     final StoredFieldsWriter fieldsWriter = new StoredFieldsWriter(this);
     final DocFieldConsumers docFieldConsumers = new DocFieldConsumers(docInverter, fieldsWriter);
-    consumer = new DocFieldProcessor(this, docFieldConsumers);
+    consumer = docFieldProcessor = new DocFieldProcessor(this, docFieldConsumers);
+  }
+
+  /** Returns true if any of the fields in the current
+   *  buffered docs have omitTf==false */
+  boolean hasProx() {
+    return docFieldProcessor.fieldInfos.hasProx();
   }
 
   /** If non-null, various details of indexing are printed

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/FieldInfo.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/FieldInfo.java?rev=682812&r1=682811&r2=682812&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/FieldInfo.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/FieldInfo.java Tue Aug  5 10:17:42 2008
@@ -27,13 +27,14 @@
   boolean storeOffsetWithTermVector;
   boolean storePositionWithTermVector;
 
-  boolean omitNorms; // omit norms associated with indexed fields
+  boolean omitNorms; // omit norms associated with indexed fields  
+  boolean omitTf; // omit tf
   
   boolean storePayloads; // whether this field stores payloads together with term positions
 
   FieldInfo(String na, boolean tk, int nu, boolean storeTermVector, 
             boolean storePositionWithTermVector,  boolean storeOffsetWithTermVector, 
-            boolean omitNorms, boolean storePayloads) {
+            boolean omitNorms, boolean storePayloads, boolean omitTf) {
     name = na;
     isIndexed = tk;
     number = nu;
@@ -42,15 +43,16 @@
     this.storePositionWithTermVector = storePositionWithTermVector;
     this.omitNorms = omitNorms;
     this.storePayloads = storePayloads;
+    this.omitTf = omitTf;
   }
 
   public Object clone() {
     return new FieldInfo(name, isIndexed, number, storeTermVector, storePositionWithTermVector,
-                         storeOffsetWithTermVector, omitNorms, storePayloads);
+                         storeOffsetWithTermVector, omitNorms, storePayloads, omitTf);
   }
 
   void update(boolean isIndexed, boolean storeTermVector, boolean storePositionWithTermVector, 
-              boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads) {
+              boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, boolean omitTf) {
     if (this.isIndexed != isIndexed) {
       this.isIndexed = true;                      // once indexed, always index
     }
@@ -66,6 +68,9 @@
     if (this.omitNorms != omitNorms) {
       this.omitNorms = false;                // once norms are stored, always store
     }
+    if (this.omitTf != omitTf) {
+      this.omitTf = true;                // if one require omitTf at least once, it remains off for life
+    }
     if (this.storePayloads != storePayloads) {
       this.storePayloads = true;
     }
@@ -87,6 +92,9 @@
     if (omitNorms != other.omitNorms) {
       omitNorms = false;                // once norms are stored, always store
     }
+    if (this.omitTf != omitTf) {
+      this.omitTf = true;                // if one require omitTf at least once, it remains off for life
+    }
     if (storePayloads != other.storePayloads) {
       storePayloads = true;
     }

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/FieldInfos.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/FieldInfos.java?rev=682812&r1=682811&r2=682812&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/FieldInfos.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/FieldInfos.java Tue Aug  5 10:17:42 2008
@@ -40,6 +40,7 @@
   static final byte STORE_OFFSET_WITH_TERMVECTOR = 0x8;
   static final byte OMIT_NORMS = 0x10;
   static final byte STORE_PAYLOADS = 0x20;
+  static final byte OMIT_TF = 0x40;
   
   private ArrayList byNumber = new ArrayList();
   private HashMap byName = new HashMap();
@@ -86,6 +87,15 @@
               field.isStoreOffsetWithTermVector(), field.getOmitNorms());
     }
   }
+
+  /** Returns true if any fields do not omitTf */
+  boolean hasProx() {
+    final int numFields = byNumber.size();
+    for(int i=0;i<numFields;i++)
+      if (!fieldInfo(i).omitTf)
+        return true;
+    return false;
+  }
   
   /**
    * Add fields that are indexed. Whether they have termvectors has to be specified.
@@ -172,7 +182,7 @@
   synchronized public void add(String name, boolean isIndexed, boolean storeTermVector,
                   boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, boolean omitNorms) {
     add(name, isIndexed, storeTermVector, storePositionWithTermVector,
-        storeOffsetWithTermVector, omitNorms, false);
+        storeOffsetWithTermVector, omitNorms, false, false);
   }
   
   /** If the field is not yet known, adds it. If it is known, checks to make
@@ -187,15 +197,16 @@
    * @param storeOffsetWithTermVector true if the term vector with offsets should be stored
    * @param omitNorms true if the norms for the indexed field should be omitted
    * @param storePayloads true if payloads should be stored for this field
+   * @param omitTf true if term freqs should be omitted for this field
    */
   synchronized public FieldInfo add(String name, boolean isIndexed, boolean storeTermVector,
                        boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
-                       boolean omitNorms, boolean storePayloads) {
+                       boolean omitNorms, boolean storePayloads, boolean omitTf) {
     FieldInfo fi = fieldInfo(name);
     if (fi == null) {
-      return addInternal(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads);
+      return addInternal(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTf);
     } else {
-      fi.update(isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads);
+      fi.update(isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTf);
     }
     return fi;
   }
@@ -205,7 +216,7 @@
     if (fi == null) {
       return addInternal(fieldInfo.name, fieldInfo.isIndexed, fieldInfo.storeTermVector,
                          fieldInfo.storePositionWithTermVector, fieldInfo.storeOffsetWithTermVector,
-                         fieldInfo.omitNorms, fieldInfo.storePayloads);
+                         fieldInfo.omitNorms, fieldInfo.storePayloads, fieldInfo.omitTf);
     } else {
       fi.update(fieldInfo);
     }
@@ -214,10 +225,10 @@
 
   private FieldInfo addInternal(String name, boolean isIndexed,
                                 boolean storeTermVector, boolean storePositionWithTermVector, 
-                                boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads) {
+                                boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, boolean omitTf) {
     FieldInfo fi =
       new FieldInfo(name, isIndexed, byNumber.size(), storeTermVector, storePositionWithTermVector,
-              storeOffsetWithTermVector, omitNorms, storePayloads);
+              storeOffsetWithTermVector, omitNorms, storePayloads, omitTf);
     byNumber.add(fi);
     byName.put(name, fi);
     return fi;
@@ -289,6 +300,8 @@
       if (fi.storeOffsetWithTermVector) bits |= STORE_OFFSET_WITH_TERMVECTOR;
       if (fi.omitNorms) bits |= OMIT_NORMS;
       if (fi.storePayloads) bits |= STORE_PAYLOADS;
+      if (fi.omitTf) bits |= OMIT_TF;
+      
       output.writeString(fi.name);
       output.writeByte(bits);
     }
@@ -305,8 +318,9 @@
       boolean storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
       boolean omitNorms = (bits & OMIT_NORMS) != 0;
       boolean storePayloads = (bits & STORE_PAYLOADS) != 0;
+      boolean omitTf = (bits & OMIT_TF) != 0;
       
-      addInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads);
+      addInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTf);
     }    
   }
 

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/FreqProxFieldMergeState.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/FreqProxFieldMergeState.java?rev=682812&r1=682811&r2=682812&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/FreqProxFieldMergeState.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/FreqProxFieldMergeState.java Tue Aug  5 10:17:42 2008
@@ -63,7 +63,8 @@
     textOffset = p.textStart & DocumentsWriter.CHAR_BLOCK_MASK;
 
     field.termsHashPerField.initReader(freq, p, 0);
-    field.termsHashPerField.initReader(prox, p, 1);
+    if (!field.fieldInfo.omitTf)
+      field.termsHashPerField.initReader(prox, p, 1);
 
     // Should always be true
     boolean result = nextDoc();
@@ -77,20 +78,27 @@
       if (p.lastDocCode != -1) {
         // Return last doc
         docID = p.lastDocID;
-        termFreq = p.docFreq;
+        if (!field.omitTf)
+          termFreq = p.docFreq;
         p.lastDocCode = -1;
         return true;
-      } else 
+      } else
         // EOF
         return false;
     }
 
     final int code = freq.readVInt();
-    docID += code >>> 1;
-    if ((code & 1) != 0)
-      termFreq = 1;
-    else
-      termFreq = freq.readVInt();
+    if (field.omitTf)
+      docID += code;
+    else {
+      docID += code >>> 1;
+      if ((code & 1) != 0)
+        termFreq = 1;
+      else
+        termFreq = freq.readVInt();
+    }
+
+    assert docID != p.lastDocID;
 
     return true;
   }

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/FreqProxTermsWriter.java?rev=682812&r1=682811&r2=682812&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/FreqProxTermsWriter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/FreqProxTermsWriter.java Tue Aug  5 10:17:42 2008
@@ -31,10 +31,6 @@
 
 final class FreqProxTermsWriter extends TermsHashConsumer {
 
-  FreqProxTermsWriter() {
-    streamCount = 2;
-  }
-
   public TermsHashConsumerPerThread addThread(TermsHashPerThread perThread) {
     return new FreqProxTermsWriterPerThread(perThread);
   }
@@ -102,7 +98,12 @@
                                                          state.docWriter.writer.getTermIndexInterval());
 
     final IndexOutput freqOut = state.directory.createOutput(state.segmentFileName(IndexFileNames.FREQ_EXTENSION));
-    final IndexOutput proxOut = state.directory.createOutput(state.segmentFileName(IndexFileNames.PROX_EXTENSION));
+    final IndexOutput proxOut;
+
+    if (fieldInfos.hasProx())
+      proxOut = state.directory.createOutput(state.segmentFileName(IndexFileNames.PROX_EXTENSION));
+    else
+      proxOut = null;
 
     final DefaultSkipListWriter skipListWriter = new DefaultSkipListWriter(termsOut.skipInterval,
                                                                            termsOut.maxSkipLevels,
@@ -135,6 +136,7 @@
         int numPostings = perField.numPostings;
         perField.reset();
         perField.shrinkHash(numPostings);
+        fields[i].reset();
       }
 
       start = end;
@@ -148,13 +150,15 @@
     }
 
     freqOut.close();
-    proxOut.close();
+    if (proxOut != null) {
+      state.flushedFiles.add(state.segmentFileName(IndexFileNames.PROX_EXTENSION));
+      proxOut.close();
+    }
     termsOut.close();
     
     // Record all files we have flushed
     state.flushedFiles.add(state.segmentFileName(IndexFileNames.FIELD_INFOS_EXTENSION));
     state.flushedFiles.add(state.segmentFileName(IndexFileNames.FREQ_EXTENSION));
-    state.flushedFiles.add(state.segmentFileName(IndexFileNames.PROX_EXTENSION));
     state.flushedFiles.add(state.segmentFileName(IndexFileNames.TERMS_EXTENSION));
     state.flushedFiles.add(state.segmentFileName(IndexFileNames.TERMS_INDEX_EXTENSION));
   }
@@ -205,8 +209,12 @@
     }
 
     final int skipInterval = termsOut.skipInterval;
-    final boolean currentFieldStorePayloads = fields[0].fieldInfo.storePayloads;
+    final boolean currentFieldOmitTf = fields[0].fieldInfo.omitTf;
 
+    // If current field omits tf then it cannot store
+    // payloads.  We silently drop the payloads in this case:
+    final boolean currentFieldStorePayloads = currentFieldOmitTf ? false : fields[0].fieldInfo.storePayloads;
+  
     FreqProxFieldMergeState[] termStates = new FreqProxFieldMergeState[numFields];
 
     while(numFields > 0) {
@@ -235,8 +243,12 @@
       final char[] text = termStates[0].text;
       final int start = termStates[0].textOffset;
 
-      long freqPointer = freqOut.getFilePointer();
-      long proxPointer = proxOut.getFilePointer();
+      final long freqPointer = freqOut.getFilePointer();
+      final long proxPointer;
+      if (proxOut != null)
+        proxPointer = proxOut.getFilePointer();
+      else
+        proxPointer = 0;
 
       skipListWriter.resetSkip();
 
@@ -261,45 +273,53 @@
         assert doc < flushState.numDocsInRAM;
         assert doc > lastDoc || df == 1;
 
-        final int newDocCode = (doc-lastDoc)<<1;
-
-        lastDoc = doc;
-
         final ByteSliceReader prox = minState.prox;
 
         // Carefully copy over the prox + payload info,
         // changing the format to match Lucene's segment
         // format.
-        for(int j=0;j<termDocFreq;j++) {
-          final int code = prox.readVInt();
-          if (currentFieldStorePayloads) {
-            final int payloadLength;
-            if ((code & 1) != 0) {
-              // This position has a payload
-              payloadLength = prox.readVInt();
-            } else
-              payloadLength = 0;
-            if (payloadLength != lastPayloadLength) {
-              proxOut.writeVInt(code|1);
-              proxOut.writeVInt(payloadLength);
-              lastPayloadLength = payloadLength;
-            } else
-              proxOut.writeVInt(code & (~1));
-            if (payloadLength > 0)
-              copyBytes(prox, proxOut, payloadLength);
-          } else {
-            assert 0 == (code & 1);
-            proxOut.writeVInt(code>>1);
+        if (!currentFieldOmitTf) {
+          // omitTf == false so we do write positions & payload          
+          assert proxOut != null;
+          for(int j=0;j<termDocFreq;j++) {
+            final int code = prox.readVInt();
+            if (currentFieldStorePayloads) {
+              final int payloadLength;
+              if ((code & 1) != 0) {
+                // This position has a payload
+                payloadLength = prox.readVInt();
+              } else
+                payloadLength = 0;
+              if (payloadLength != lastPayloadLength) {
+                proxOut.writeVInt(code|1);
+                proxOut.writeVInt(payloadLength);
+                lastPayloadLength = payloadLength;
+              } else
+                proxOut.writeVInt(code & (~1));
+              if (payloadLength > 0)
+                copyBytes(prox, proxOut, payloadLength);
+            } else {
+              assert 0 == (code & 1);
+              proxOut.writeVInt(code>>1);
+            }
+          } //End for
+          
+          final int newDocCode = (doc-lastDoc)<<1;
+
+          if (1 == termDocFreq) {
+            freqOut.writeVInt(newDocCode|1);
+           } else {
+            freqOut.writeVInt(newDocCode);
+            freqOut.writeVInt(termDocFreq);
           }
-        }
-
-        if (1 == termDocFreq) {
-          freqOut.writeVInt(newDocCode|1);
         } else {
-          freqOut.writeVInt(newDocCode);
-          freqOut.writeVInt(termDocFreq);
+          // omitTf==true: we store only the docs, without
+          // term freq, positions, payloads
+          freqOut.writeVInt(doc-lastDoc);
         }
 
+        lastDoc = doc;
+
         if (!minState.nextDoc()) {
 
           // Remove from termStates

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java?rev=682812&r1=682811&r2=682812&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java Tue Aug  5 10:17:42 2008
@@ -31,6 +31,7 @@
   final FieldInfo fieldInfo;
   final DocumentsWriter.DocState docState;
   final DocInverter.FieldInvertState fieldState;
+  boolean omitTf;
 
   public FreqProxTermsWriterPerField(TermsHashPerField termsHashPerField, FreqProxTermsWriterPerThread perThread, FieldInfo fieldInfo) {
     this.termsHashPerField = termsHashPerField;
@@ -38,11 +39,18 @@
     this.fieldInfo = fieldInfo;
     docState = termsHashPerField.docState;
     fieldState = termsHashPerField.fieldState;
+    omitTf = fieldInfo.omitTf;
+  }
+
+  int getStreamCount() {
+    if (fieldInfo.omitTf)
+      return 1;
+    else
+      return 2;
   }
 
   void finish() {}
 
-  //boolean doNext;
   boolean hasPayloads;
 
   void skippingLongTerm(Token t) throws IOException {}
@@ -52,6 +60,12 @@
     return fieldInfo.name.compareTo(other.fieldInfo.name);
   }
 
+  void reset() {
+    // Record, up front, whether our in-RAM format will be
+    // with or without term freqs:
+    omitTf = fieldInfo.omitTf;
+  }
+
   boolean start(Fieldable[] fields, int count) {
     for(int i=0;i<count;i++)
       if (fields[i].isIndexed())
@@ -76,10 +90,14 @@
     // flush
     assert docState.testPoint("FreqProxTermsWriterPerField.newTerm start");
     FreqProxTermsWriter.PostingList p = (FreqProxTermsWriter.PostingList) p0;
-    p.lastDocCode = docState.docID << 1;
     p.lastDocID = docState.docID;
-    p.docFreq = 1;
-    writeProx(t, p, fieldState.position);
+    if (omitTf) {
+      p.lastDocCode = docState.docID;
+    } else {
+      p.lastDocCode = docState.docID << 1;
+      p.docFreq = 1;
+      writeProx(t, p, fieldState.position);
+    }
   }
 
   final void addTerm(Token t, RawPostingList p0) {
@@ -88,27 +106,37 @@
 
     FreqProxTermsWriter.PostingList p = (FreqProxTermsWriter.PostingList) p0;
 
-    assert p.docFreq > 0;
+    assert omitTf || p.docFreq > 0;
 
-    if (docState.docID != p.lastDocID) {
-      // Term not yet seen in the current doc but previously
-      // seen in other doc(s) since the last flush
-
-      // Now that we know doc freq for previous doc,
-      // write it & lastDocCode
-      if (1 == p.docFreq)
-        termsHashPerField.writeVInt(0, p.lastDocCode|1);
-      else {
+    if (omitTf) {
+      if (docState.docID != p.lastDocID) {
+        assert docState.docID > p.lastDocID;
         termsHashPerField.writeVInt(0, p.lastDocCode);
-        termsHashPerField.writeVInt(0, p.docFreq);
+        p.lastDocCode = docState.docID - p.lastDocID;
+        p.lastDocID = docState.docID;
       }
-      p.docFreq = 1;
-      p.lastDocCode = (docState.docID - p.lastDocID) << 1;
-      p.lastDocID = docState.docID;
-      writeProx(t, p, fieldState.position);
     } else {
-      p.docFreq++;
-      writeProx(t, p, fieldState.position-p.lastPosition);
+      if (docState.docID != p.lastDocID) {
+        assert docState.docID > p.lastDocID;
+        // Term not yet seen in the current doc but previously
+        // seen in other doc(s) since the last flush
+
+        // Now that we know doc freq for previous doc,
+        // write it & lastDocCode
+        if (1 == p.docFreq)
+          termsHashPerField.writeVInt(0, p.lastDocCode|1);
+        else {
+          termsHashPerField.writeVInt(0, p.lastDocCode);
+          termsHashPerField.writeVInt(0, p.docFreq);
+        }
+        p.docFreq = 1;
+        p.lastDocCode = (docState.docID - p.lastDocID) << 1;
+        p.lastDocID = docState.docID;
+        writeProx(t, p, fieldState.position);
+      } else {
+        p.docFreq++;
+        writeProx(t, p, fieldState.position-p.lastPosition);
+      }
     }
   }
 

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/IndexReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/IndexReader.java?rev=682812&r1=682811&r2=682812&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/IndexReader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/IndexReader.java Tue Aug  5 10:17:42 2008
@@ -75,6 +75,8 @@
     public static final FieldOption INDEXED = new FieldOption ("INDEXED");
     /** All fields that store payloads */
     public static final FieldOption STORES_PAYLOADS = new FieldOption ("STORES_PAYLOADS");
+    /** All fields that omit tf */
+    public static final FieldOption OMIT_TF = new FieldOption ("OMIT_TF");
     /** All fields which are not indexed */
     public static final FieldOption UNINDEXED = new FieldOption ("UNINDEXED");
     /** All fields which are indexed with termvectors enabled */

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java?rev=682812&r1=682811&r2=682812&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java Tue Aug  5 10:17:42 2008
@@ -3069,7 +3069,7 @@
           synchronized(this) {
             segmentInfos.setSize(0);                      // pop old infos & add new
             info = new SegmentInfo(mergedName, docCount, directory, false, true,
-                                   -1, null, false);
+                                   -1, null, false, merger.hasProx());
             segmentInfos.addElement(info);
           }
 
@@ -3377,7 +3377,8 @@
                                      flushedDocCount,
                                      directory, false, true,
                                      docStoreOffset, docStoreSegment,
-                                     docStoreIsCompoundFile);
+                                     docStoreIsCompoundFile,    
+                                     docWriter.hasProx());
       }
 
       docWriter.pushDeletes();
@@ -3615,6 +3616,8 @@
       }
     }
 
+    merge.info.setHasProx(merger.hasProx());
+
     segmentInfos.subList(start, start + merge.segments.size()).clear();
     segmentInfos.add(start, merge.info);
 
@@ -3905,7 +3908,8 @@
                                  directory, false, true,
                                  docStoreOffset,
                                  docStoreSegment,
-                                 docStoreIsCompoundFile);
+                                 docStoreIsCompoundFile,
+                                 false);
 
     // Also enroll the merged segment into mergingSegments;
     // this prevents it from getting selected for a merge

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/SegmentInfo.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/SegmentInfo.java?rev=682812&r1=682811&r2=682812&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/SegmentInfo.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/SegmentInfo.java Tue Aug  5 10:17:42 2008
@@ -77,6 +77,8 @@
   private int delCount;                           // How many deleted docs in this segment, or -1 if not yet known
                                                   // (if it's an older index)
 
+  private boolean hasProx;                        // True if this segment has any fields with omitTf==false
+
   public SegmentInfo(String name, int docCount, Directory dir) {
     this.name = name;
     this.docCount = docCount;
@@ -89,14 +91,15 @@
     docStoreSegment = name;
     docStoreIsCompoundFile = false;
     delCount = 0;
+    hasProx = true;
   }
 
   public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile, boolean hasSingleNormFile) { 
-    this(name, docCount, dir, isCompoundFile, hasSingleNormFile, -1, null, false);
+    this(name, docCount, dir, isCompoundFile, hasSingleNormFile, -1, null, false, true);
   }
 
   public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile, boolean hasSingleNormFile,
-                     int docStoreOffset, String docStoreSegment, boolean docStoreIsCompoundFile) { 
+                     int docStoreOffset, String docStoreSegment, boolean docStoreIsCompoundFile, boolean hasProx) { 
     this(name, docCount, dir);
     this.isCompoundFile = (byte) (isCompoundFile ? YES : NO);
     this.hasSingleNormFile = hasSingleNormFile;
@@ -104,6 +107,7 @@
     this.docStoreOffset = docStoreOffset;
     this.docStoreSegment = docStoreSegment;
     this.docStoreIsCompoundFile = docStoreIsCompoundFile;
+    this.hasProx = hasProx;
     delCount = 0;
     assert docStoreOffset == -1 || docStoreSegment != null;
   }
@@ -180,6 +184,10 @@
         assert delCount <= docCount;
       } else
         delCount = -1;
+      if (format <= SegmentInfos.FORMAT_HAS_PROX)
+        hasProx = input.readByte() == 1;
+      else
+        hasProx = true;
     } else {
       delGen = CHECK_DIR;
       normGen = null;
@@ -190,6 +198,7 @@
       docStoreIsCompoundFile = false;
       docStoreSegment = null;
       delCount = -1;
+      hasProx = true;
     }
   }
   
@@ -507,6 +516,16 @@
     }
     output.writeByte(isCompoundFile);
     output.writeInt(delCount);
+    output.writeByte((byte) (hasProx ? 1:0));
+  }
+
+  void setHasProx(boolean hasProx) {
+    this.hasProx = hasProx;
+    clearFiles();
+  }
+
+  boolean getHasProx() {
+    return hasProx;
   }
 
   private void addIfExists(List files, String fileName) throws IOException {

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/SegmentInfos.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/SegmentInfos.java?rev=682812&r1=682811&r2=682812&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/SegmentInfos.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/SegmentInfos.java Tue Aug  5 10:17:42 2008
@@ -65,8 +65,13 @@
    *  This way IndexWriter can efficiently report numDocs(). */
   public static final int FORMAT_DEL_COUNT = -6;
 
+  /** This format adds the boolean hasProx to record if any
+   *  fields in the segment store prox information (ie, have
+   *  omitTf==false) */
+  public static final int FORMAT_HAS_PROX = -7;
+
   /* This must always point to the most recent file format. */
-  static final int CURRENT_FORMAT = FORMAT_DEL_COUNT;
+  static final int CURRENT_FORMAT = FORMAT_HAS_PROX;
   
   public int counter = 0;    // used to name new segments
   /**

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java?rev=682812&r1=682811&r2=682812&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java Tue Aug  5 10:17:42 2008
@@ -83,6 +83,10 @@
       checkAbort = new CheckAbort(merge, directory);
     termIndexInterval = writer.getTermIndexInterval();
   }
+  
+  boolean hasProx() {
+    return fieldInfos.hasProx();
+  }
 
   /**
    * Add an IndexReader to the collection of readers that are to be merged
@@ -164,6 +168,10 @@
     // Basic files
     for (int i = 0; i < IndexFileNames.COMPOUND_EXTENSIONS.length; i++) {
       String ext = IndexFileNames.COMPOUND_EXTENSIONS[i];
+
+      if (ext.equals(IndexFileNames.PROX_EXTENSION) && !hasProx())
+        continue;
+
       if (mergeDocStores || (!ext.equals(IndexFileNames.FIELDS_EXTENSION) &&
                             !ext.equals(IndexFileNames.FIELDS_INDEX_EXTENSION)))
         files.add(segment + "." + ext);
@@ -198,11 +206,11 @@
   }
 
   private void addIndexed(IndexReader reader, FieldInfos fieldInfos, Collection names, boolean storeTermVectors, boolean storePositionWithTermVector,
-                         boolean storeOffsetWithTermVector, boolean storePayloads) throws IOException {
+                         boolean storeOffsetWithTermVector, boolean storePayloads, boolean omitTf) throws IOException {
     Iterator i = names.iterator();
     while (i.hasNext()) {
       String field = (String)i.next();
-      fieldInfos.add(field, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector, !reader.hasNorms(field), storePayloads);
+      fieldInfos.add(field, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector, !reader.hasNorms(field), storePayloads, omitTf);
     }
   }
 
@@ -265,15 +273,16 @@
         SegmentReader segmentReader = (SegmentReader) reader;
         for (int j = 0; j < segmentReader.getFieldInfos().size(); j++) {
           FieldInfo fi = segmentReader.getFieldInfos().fieldInfo(j);
-          fieldInfos.add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.hasNorms(fi.name), fi.storePayloads);
+          fieldInfos.add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.hasNorms(fi.name), fi.storePayloads, fi.omitTf);
         }
       } else {
-        addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false);
-        addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false);
-        addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false);
-        addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false, false);
-        addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.STORES_PAYLOADS), false, false, false, true);
-        addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.INDEXED), false, false, false, false);
+        addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false);
+        addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, false);
+        addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, false);
+        addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false, false, false);
+        addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.OMIT_TF), false, false, false, false, true);
+        addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.STORES_PAYLOADS), false, false, false, true, false);
+        addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.INDEXED), false, false, false, false, false);
         fieldInfos.add(reader.getFieldNames(IndexReader.FieldOption.UNINDEXED), false);
       }
     }
@@ -477,7 +486,8 @@
   private final void mergeTerms() throws CorruptIndexException, IOException {
     try {
       freqOutput = directory.createOutput(segment + ".frq");
-      proxOutput = directory.createOutput(segment + ".prx");
+      if (hasProx())
+        proxOutput = directory.createOutput(segment + ".prx");
       termInfosWriter =
               new TermInfosWriter(directory, segment, fieldInfos,
                                   termIndexInterval);
@@ -561,11 +571,20 @@
    */
   private final int mergeTermInfo(SegmentMergeInfo[] smis, int n)
           throws CorruptIndexException, IOException {
-    long freqPointer = freqOutput.getFilePointer();
-    long proxPointer = proxOutput.getFilePointer();
-
-    int df = appendPostings(smis, n);		  // append posting data
-
+    final long freqPointer = freqOutput.getFilePointer();
+    final long proxPointer;
+    if (proxOutput != null)
+      proxPointer = proxOutput.getFilePointer();
+    else
+      proxPointer = 0;
+
+    int df;
+    if (fieldInfos.fieldInfo(smis[0].term.field).omitTf) { // append posting data
+      df = appendPostingsNoTf(smis, n);     
+    } else{
+      df = appendPostings(smis, n);      
+    }
+    
     long skipPointer = skipListWriter.writeSkip(freqOutput);
 
     if (df > 0) {
@@ -672,6 +691,53 @@
     return df;
   }
 
+  /** Process postings from multiple segments without tf, all positioned on the
+   *  same term. Writes out merged entries only into freqOutput, proxOut is not written.
+   *
+   * @param smis array of segments
+   * @param n number of cells in the array actually occupied
+   * @return number of documents across all segments where this term was found
+   * @throws CorruptIndexException if the index is corrupt
+   * @throws IOException if there is a low-level IO error
+   */
+  private final int appendPostingsNoTf(SegmentMergeInfo[] smis, int n)
+          throws CorruptIndexException, IOException {
+    int lastDoc = 0;
+    int df = 0;           // number of docs w/ term
+    skipListWriter.resetSkip();
+    int lastPayloadLength = -1;   // ensures that we write the first length
+    for (int i = 0; i < n; i++) {
+      SegmentMergeInfo smi = smis[i];
+      TermPositions postings = smi.getPositions();
+      assert postings != null;
+      int base = smi.base;
+      int[] docMap = smi.getDocMap();
+      postings.seek(smi.termEnum);
+      while (postings.next()) {
+        int doc = postings.doc();
+        if (docMap != null)
+          doc = docMap[doc];                      // map around deletions
+        doc += base;                              // convert to merged space
+
+        if (doc < 0 || (df > 0 && doc <= lastDoc))
+          throw new CorruptIndexException("docs out of order (" + doc +
+              " <= " + lastDoc + " )");
+
+        df++;
+
+        if ((df % skipInterval) == 0) {
+          skipListWriter.setSkipData(lastDoc, false, lastPayloadLength);
+          skipListWriter.bufferSkip(df);
+        }
+
+        int docCode = (doc - lastDoc);   
+        lastDoc = doc;
+        freqOutput.writeVInt(docCode);    // write doc & freq=1
+      }
+    }
+    return df;
+  }
+  
   private void mergeNorms() throws IOException {
     byte[] normBuffer = null;
     IndexOutput output = null;

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java?rev=682812&r1=682811&r2=682812&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java Tue Aug  5 10:17:42 2008
@@ -298,6 +298,12 @@
 
       fieldInfos = new FieldInfos(cfsDir, segment + ".fnm");
 
+      boolean anyProx = false;
+      final int numFields = fieldInfos.size();
+      for(int i=0;!anyProx && i<numFields;i++)
+        if (!fieldInfos.fieldInfo(i).omitTf)
+          anyProx = true;
+
       final String fieldsSegment;
 
       if (si.getDocStoreOffset() != -1)
@@ -322,7 +328,8 @@
       // make sure that all index files have been read or are kept open
       // so that if an index update removes them we'll still have them
       freqStream = cfsDir.openInput(segment + ".frq", readBufferSize);
-      proxStream = cfsDir.openInput(segment + ".prx", readBufferSize);
+      if (anyProx)
+        proxStream = cfsDir.openInput(segment + ".prx", readBufferSize);
       openNorms(cfsDir, readBufferSize);
 
       if (doOpenStores && fieldInfos.hasVectors()) { // open term vector files only as needed
@@ -728,6 +735,9 @@
       else if (!fi.isIndexed && fieldOption == IndexReader.FieldOption.UNINDEXED) {
         fieldSet.add(fi.name);
       }
+      else if (fi.omitTf && fieldOption == IndexReader.FieldOption.OMIT_TF) {
+        fieldSet.add(fi.name);
+      }
       else if (fi.storePayloads && fieldOption == IndexReader.FieldOption.STORES_PAYLOADS) {
         fieldSet.add(fi.name);
       }

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/SegmentTermDocs.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/SegmentTermDocs.java?rev=682812&r1=682811&r2=682812&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/SegmentTermDocs.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/SegmentTermDocs.java Tue Aug  5 10:17:42 2008
@@ -41,7 +41,8 @@
   private boolean haveSkipped;
   
   protected boolean currentFieldStoresPayloads;
-
+  protected boolean currentFieldOmitTf;
+  
   protected SegmentTermDocs(SegmentReader parent) {
     this.parent = parent;
     this.freqStream = (IndexInput) parent.freqStream.clone();
@@ -75,6 +76,7 @@
   void seek(TermInfo ti, Term term) throws IOException {
     count = 0;
     FieldInfo fi = parent.fieldInfos.fieldInfo(term.field);
+    currentFieldOmitTf = (fi != null) ? fi.omitTf : false;
     currentFieldStoresPayloads = (fi != null) ? fi.storePayloads : false;
     if (ti == null) {
       df = 0;
@@ -105,14 +107,19 @@
     while (true) {
       if (count == df)
         return false;
-
-      int docCode = freqStream.readVInt();
-      doc += docCode >>> 1;       // shift off low bit
-      if ((docCode & 1) != 0)       // if low bit is set
-        freq = 1;         // freq is one
-      else
-        freq = freqStream.readVInt();     // else read freq
-
+      final int docCode = freqStream.readVInt();
+      
+      if (currentFieldOmitTf) {
+        doc += docCode;
+        freq = 1;
+      } else {
+        doc += docCode >>> 1;       // shift off low bit
+        if ((docCode & 1) != 0)       // if low bit is set
+          freq = 1;         // freq is one
+        else
+          freq = freqStream.readVInt();     // else read freq
+      }
+      
       count++;
 
       if (deletedDocs == null || !deletedDocs.get(doc))
@@ -126,27 +133,49 @@
   public int read(final int[] docs, final int[] freqs)
           throws IOException {
     final int length = docs.length;
+    if (currentFieldOmitTf) {
+      return readNoTf(docs, freqs, length);
+    } else {
+      int i = 0;
+      while (i < length && count < df) {
+        // manually inlined call to next() for speed
+        final int docCode = freqStream.readVInt();
+        doc += docCode >>> 1;       // shift off low bit
+        if ((docCode & 1) != 0)       // if low bit is set
+          freq = 1;         // freq is one
+        else
+          freq = freqStream.readVInt();     // else read freq
+        count++;
+
+        if (deletedDocs == null || !deletedDocs.get(doc)) {
+          docs[i] = doc;
+          freqs[i] = freq;
+          ++i;
+        }
+      }
+      return i;
+    }
+  }
+
+  private final int readNoTf(final int[] docs, final int[] freqs, final int length) throws IOException {
     int i = 0;
     while (i < length && count < df) {
-
       // manually inlined call to next() for speed
-      final int docCode = freqStream.readVInt();
-      doc += docCode >>> 1;       // shift off low bit
-      if ((docCode & 1) != 0)       // if low bit is set
-        freq = 1;         // freq is one
-      else
-        freq = freqStream.readVInt();     // else read freq
+      doc += freqStream.readVInt();       
       count++;
 
       if (deletedDocs == null || !deletedDocs.get(doc)) {
         docs[i] = doc;
-        freqs[i] = freq;
+        // Hardware freq to 1 when term freqs were not
+        // stored in the index
+        freqs[i] = 1;
         ++i;
       }
     }
     return i;
   }
-
+ 
+  
   /** Overridden by SegmentTermPositions to skip in prox stream. */
   protected void skipProx(long proxPointer, int payloadLength) throws IOException {}
 

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/SegmentTermPositions.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/SegmentTermPositions.java?rev=682812&r1=682811&r2=682812&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/SegmentTermPositions.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/SegmentTermPositions.java Tue Aug  5 10:17:42 2008
@@ -60,6 +60,9 @@
   }
 
   public final int nextPosition() throws IOException {
+    if (currentFieldOmitTf)
+      // This field does not store term freq, positions, payloads
+      return 0;
     // perform lazy skips if neccessary
     lazySkip();
     proxCount--;
@@ -116,6 +119,7 @@
   }
 
   private void skipPositions(int n) throws IOException {
+    assert !currentFieldOmitTf;
     for (int f = n; f > 0; f--) {        // skip unread positions
       readDeltaPosition();
       skipPayload();

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java?rev=682812&r1=682811&r2=682812&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java Tue Aug  5 10:17:42 2008
@@ -39,7 +39,6 @@
 
   public TermVectorsTermsWriter(DocumentsWriter docWriter) {
     this.docWriter = docWriter;
-    streamCount = 2;
   }
 
   public TermsHashConsumerPerThread addThread(TermsHashPerThread termsHashPerThread) {

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java?rev=682812&r1=682811&r2=682812&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java Tue Aug  5 10:17:42 2008
@@ -47,6 +47,10 @@
     fieldState = termsHashPerField.fieldState;
   }
 
+  int getStreamCount() {
+    return 2;
+  }
+
   boolean start(Fieldable[] fields, int count) {
     doVectors = false;
     doVectorPositions = false;

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/TermsHash.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/TermsHash.java?rev=682812&r1=682811&r2=682812&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/TermsHash.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/TermsHash.java Tue Aug  5 10:17:42 2008
@@ -42,7 +42,6 @@
   final TermsHash nextTermsHash;
   final int bytesPerPosting;
   final int postingsFreeChunk;
-  final int streamCount;
   final DocumentsWriter docWriter;
   
   TermsHash primaryTermsHash;
@@ -55,7 +54,6 @@
   public TermsHash(final DocumentsWriter docWriter, boolean trackAllocations, final TermsHashConsumer consumer, final TermsHash nextTermsHash) {
     this.docWriter = docWriter;
     this.consumer = consumer;
-    this.streamCount = consumer.streamCount;
     this.nextTermsHash = nextTermsHash;
     this.trackAllocations = trackAllocations;
 

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/TermsHashConsumer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/TermsHashConsumer.java?rev=682812&r1=682811&r2=682812&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/TermsHashConsumer.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/TermsHashConsumer.java Tue Aug  5 10:17:42 2008
@@ -28,8 +28,6 @@
   abstract void abort();
   abstract void closeDocStore(DocumentsWriter.FlushState state) throws IOException;
 
-  int streamCount;
-
   FieldInfos fieldInfos;
 
   void setFieldInfos(FieldInfos fieldInfos) {

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/TermsHashConsumerPerField.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/TermsHashConsumerPerField.java?rev=682812&r1=682811&r2=682812&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/TermsHashConsumerPerField.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/TermsHashConsumerPerField.java Tue Aug  5 10:17:42 2008
@@ -32,4 +32,5 @@
   abstract void skippingLongTerm(Token t) throws IOException;
   abstract void newTerm(Token t, RawPostingList p) throws IOException;
   abstract void addTerm(Token t, RawPostingList p) throws IOException;
+  abstract int getStreamCount();
 }

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/TermsHashPerField.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/TermsHashPerField.java?rev=682812&r1=682811&r2=682812&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/TermsHashPerField.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/TermsHashPerField.java Tue Aug  5 10:17:42 2008
@@ -57,9 +57,9 @@
     bytePool = perThread.bytePool;
     docState = perThread.docState;
     fieldState = docInverterPerField.fieldState;
-    streamCount = perThread.termsHash.streamCount;
-    numPostingInt = 2*streamCount;
     this.consumer = perThread.consumer.addField(this, fieldInfo);
+    streamCount = consumer.getStreamCount();
+    numPostingInt = 2*streamCount;
     this.fieldInfo = fieldInfo;
     if (nextPerThread != null)
       nextPerField = (TermsHashPerField) nextPerThread.addField(docInverterPerField, fieldInfo);
@@ -488,6 +488,7 @@
   }
 
   void writeVInt(int stream, int i) {
+    assert stream < streamCount;
     while ((i & ~0x7F) != 0) {
       writeByte(stream, (byte)((i & 0x7f) | 0x80));
       i >>>= 7;

Modified: lucene/java/trunk/src/site/src/documentation/content/xdocs/fileformats.xml
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/site/src/documentation/content/xdocs/fileformats.xml?rev=682812&r1=682811&r2=682812&view=diff
==============================================================================
--- lucene/java/trunk/src/site/src/documentation/content/xdocs/fileformats.xml (original)
+++ lucene/java/trunk/src/site/src/documentation/content/xdocs/fileformats.xml Tue Aug  5 10:17:42 2008
@@ -246,14 +246,16 @@
                     <p>Term Frequency
                         data. For each term in the dictionary, the numbers of all the
                         documents that contain that term, and the frequency of the term in
-                        that document.
+                        that document if omitTf is false.
                     </p>
                 </li>
 
                 <li>
                     <p>Term Proximity
                         data. For each term in the dictionary, the positions that the term
-                        occurs in each document.
+                        occurs in each document.  Note that this will
+                        not exist if all fields in all documents set
+                        omitTf to true.
                     </p>
                 </li>
 
@@ -826,11 +828,12 @@
                     <b>2.4 and above:</b>
                     Segments --&gt; Format, Version, NameCounter, SegCount, &lt;SegName, SegSize, DelGen, DocStoreOffset, [DocStoreSegment, DocStoreIsCompoundFile], HasSingleNormFile, NumField,
                     NormGen<sup>NumField</sup>,
-                    IsCompoundFile&gt;<sup>SegCount</sup>, Checksum
+                    IsCompoundFile, DeletionCount, HasProx&gt;<sup>SegCount</sup>, Checksum
                 </p>
 
                 <p>
-                    Format, NameCounter, SegCount, SegSize, NumField, DocStoreOffset --&gt; Int32
+                    Format, NameCounter, SegCount, SegSize, NumField,
+                    DocStoreOffset, DeletionCount --&gt; Int32
                 </p>
 
                 <p>
@@ -842,7 +845,8 @@
                 </p>
 
                 <p>
-                    IsCompoundFile, HasSingleNormFile, DocStoreIsCompoundFile --&gt; Int8
+                    IsCompoundFile, HasSingleNormFile,
+                    DocStoreIsCompoundFile, HasProx --&gt; Int8
                 </p>
 
                 <p>
@@ -936,7 +940,16 @@
 		    This is used to verify integrity of the file on
 		    opening the index.
 		</p>
-		
+
+		<p>
+		    DeletionCount records the number of deleted
+		    documents in this segment.
+		</p>
+
+		<p>
+		    HasProx is 1 if any fields in this segment have
+		    omitTf set to false; else, it's 0.
+		</p>
 
             </section>
 
@@ -1264,7 +1277,9 @@
                             determines the position of this term's TermPositions within the .prx
                             file. In particular, it is the difference between the position of
                             this term's data in that file and the position of the previous
-                            term's data (or zero, for the first term in the file.
+                            term's data (or zero, for the first term in the file.  For fields
+			    with omitTf true, this will be 0 since
+                            prox information is not stored.
                         </p>
                         <p>SkipDelta determines the position of this
                             term's SkipData within the .frq file. In
@@ -1338,7 +1353,7 @@
                 <p>
                     The .frq file contains the lists of documents
                     which contain each term, along with the frequency of the term in that
-                    document.
+                    document (if omitTf is false).
                 </p>
                 <p>FreqFile (.frq) --&gt;
                     &lt;TermFreqs, SkipData&gt;
@@ -1349,7 +1364,7 @@
                     <sup>DocFreq</sup>
                 </p>
                 <p>TermFreq --&gt;
-                    DocDelta, Freq?
+                    DocDelta[, Freq?]
                 </p>
                 <p>SkipData --&gt;
                     &lt;&lt;SkipLevelLength, SkipLevel&gt;
@@ -1375,21 +1390,31 @@
                 <p>TermFreq
                     entries are ordered by increasing document number.
                 </p>
-                <p>DocDelta
-                    determines both the document number and the frequency. In
-                    particular, DocDelta/2 is the difference between this document number
-                    and the previous document number (or zero when this is the first
-                    document in a TermFreqs). When DocDelta is odd, the frequency is
-                    one. When DocDelta is even, the frequency is read as another VInt.
-                </p>
-                <p>For
-                    example, the TermFreqs for a term which occurs once in document seven
-                    and three times in document eleven would be the following sequence of
-                    VInts:
-                </p>
-                <p>15,
-                    8, 3
-                </p>
+                <p>DocDelta: if omitTf is false, this determines both
+                    the document number and the frequency. In
+                    particular, DocDelta/2 is the difference between
+                    this document number and the previous document
+                    number (or zero when this is the first document in
+                    a TermFreqs). When DocDelta is odd, the frequency
+                    is one. When DocDelta is even, the frequency is
+                    read as another VInt.  If omitTf is true, DocDelta
+                    contains the gap (not multiplied by 2) between
+                    document numbers and no frequency information is
+                    stored.
+                </p>
+                <p>For example, the TermFreqs for a term which occurs
+                    once in document seven and three times in document
+                    eleven, with omitTf false, would be the following
+                    sequence of VInts:
+                </p>
+                <p>15, 8, 3
+                </p>
+		<p> If omitTf were true it would be this sequence
+		of VInts instead:
+		  </p>
+		 <p>
+		   7,4
+                 </p>
                 <p>DocSkip records the document number before every
                     SkipInterval
                     <sup>th</sup>
@@ -1454,7 +1479,11 @@
 
                 <p>
                     The .prx file contains the lists of positions that
-                    each term occurs at within documents.
+                    each term occurs at within documents.  Note that
+                    fields with omitTf true do not store
+                    anything into this file, and if all fields in the
+                    index have omitTf true then the .prx file will not
+                    exist.
                 </p>
                 <p>ProxFile (.prx) --&gt;
                     &lt;TermPositions&gt;

Modified: lucene/java/trunk/src/test/org/apache/lucene/TestHitIterator.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/TestHitIterator.java?rev=682812&r1=682811&r2=682812&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/TestHitIterator.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/TestHitIterator.java Tue Aug  5 10:17:42 2008
@@ -18,6 +18,7 @@
  */
 
 import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
 import org.apache.lucene.store.RAMDirectory;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.Term;
@@ -54,6 +55,8 @@
 
     writer.close();
 
+    _TestUtil.checkIndex(directory);
+
     IndexSearcher searcher = new IndexSearcher(directory);
     Hits hits = searcher.search(new TermQuery(new Term("field", "iterator")));
 

Added: lucene/java/trunk/src/test/org/apache/lucene/index/TestOmitTf.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestOmitTf.java?rev=682812&view=auto
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestOmitTf.java (added)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestOmitTf.java Tue Aug  5 10:17:42 2008
@@ -0,0 +1,363 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Collection;
+
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.HitCollector;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Searcher;
+import org.apache.lucene.search.Similarity;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.MockRAMDirectory;
+
+
+public class TestOmitTf extends LuceneTestCase {
+    
+  public static class SimpleSimilarity extends Similarity {
+    public float lengthNorm(String field, int numTerms) { return 1.0f; }
+    public float queryNorm(float sumOfSquaredWeights) { return 1.0f; }
+    
+    public float tf(float freq) { return freq; }
+    
+    public float sloppyFreq(int distance) { return 2.0f; }
+    public float idf(Collection terms, Searcher searcher) { return 1.0f; }
+    public float idf(int docFreq, int numDocs) { return 1.0f; }
+    public float coord(int overlap, int maxOverlap) { return 1.0f; }
+  }
+
+
+  // Tests whether the DocumentWriter correctly enable the
+  // omitTf bit in the FieldInfo
+  public void testOmitTf() throws Exception {
+    Directory ram = new MockRAMDirectory();
+    Analyzer analyzer = new StandardAnalyzer();
+    IndexWriter writer = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
+    Document d = new Document();
+        
+    // this field will have Tf
+    Field f1 = new Field("f1", "This field has term freqs", Field.Store.NO, Field.Index.TOKENIZED);
+    d.add(f1);
+       
+    // this field will NOT have Tf
+    Field f2 = new Field("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.TOKENIZED);
+    f2.setOmitTf(true);
+    d.add(f2);
+        
+    writer.addDocument(d);
+    writer.optimize();
+    // now we add another document which has term freq for field f2 and not for f1 and verify if the SegmentMerger
+    // keep things constant
+    d = new Document();
+        
+    // Reverese
+    f1.setOmitTf(true);
+    d.add(f1);
+        
+    f2.setOmitTf(false);        
+    d.add(f2);
+        
+    writer.addDocument(d);
+    // force merge
+    writer.optimize();
+    // flush
+    writer.close();
+    _TestUtil.checkIndex(ram);
+
+    // only one segment in the index, so we can cast to SegmentReader
+    SegmentReader reader = (SegmentReader) IndexReader.open(ram);
+    FieldInfos fi = reader.fieldInfos();
+    assertTrue("OmitTf field bit should be set.", fi.fieldInfo("f1").omitTf);
+    assertTrue("OmitTf field bit should be set.", fi.fieldInfo("f2").omitTf);
+        
+    reader.close();
+    ram.close();
+  }
+ 
+  // Tests whether merging of docs that have different
+  // omitTf for the same field works
+  public void testMixedMerge() throws Exception {
+    Directory ram = new MockRAMDirectory();
+    Analyzer analyzer = new StandardAnalyzer();
+    IndexWriter writer = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
+    writer.setMaxBufferedDocs(3);
+    writer.setMergeFactor(2);
+    Document d = new Document();
+        
+    // this field will have Tf
+    Field f1 = new Field("f1", "This field has term freqs", Field.Store.NO, Field.Index.TOKENIZED);
+    d.add(f1);
+       
+    // this field will NOT have Tf
+    Field f2 = new Field("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.TOKENIZED);
+    f2.setOmitTf(true);
+    d.add(f2);
+
+    for(int i=0;i<30;i++)
+      writer.addDocument(d);
+        
+    // now we add another document which has term freq for field f2 and not for f1 and verify if the SegmentMerger
+    // keep things constant
+    d = new Document();
+        
+    // Reverese
+    f1.setOmitTf(true);
+    d.add(f1);
+        
+    f2.setOmitTf(false);        
+    d.add(f2);
+        
+    for(int i=0;i<30;i++)
+      writer.addDocument(d);
+        
+    // force merge
+    writer.optimize();
+    // flush
+    writer.close();
+
+    _TestUtil.checkIndex(ram);
+
+    // only one segment in the index, so we can cast to SegmentReader
+    SegmentReader reader = (SegmentReader) IndexReader.open(ram);
+    FieldInfos fi = reader.fieldInfos();
+    assertTrue("OmitTf field bit should be set.", fi.fieldInfo("f1").omitTf);
+    assertTrue("OmitTf field bit should be set.", fi.fieldInfo("f2").omitTf);
+        
+    reader.close();
+    ram.close();
+  }
+
+  // Make sure first adding docs that do not omitTf for
+  // field X, then adding docs that do omitTf for that same
+  // field, 
+  public void testMixedRAM() throws Exception {
+    Directory ram = new MockRAMDirectory();
+    Analyzer analyzer = new StandardAnalyzer();
+    IndexWriter writer = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
+    writer.setMaxBufferedDocs(10);
+    writer.setMergeFactor(2);
+    Document d = new Document();
+        
+    // this field will have Tf
+    Field f1 = new Field("f1", "This field has term freqs", Field.Store.NO, Field.Index.TOKENIZED);
+    d.add(f1);
+       
+    // this field will NOT have Tf
+    Field f2 = new Field("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.TOKENIZED);
+    d.add(f2);
+
+    for(int i=0;i<5;i++)
+      writer.addDocument(d);
+
+    f2.setOmitTf(true);
+        
+    for(int i=0;i<20;i++)
+      writer.addDocument(d);
+
+    // force merge
+    writer.optimize();
+
+    // flush
+    writer.close();
+
+    _TestUtil.checkIndex(ram);
+
+    // only one segment in the index, so we can cast to SegmentReader
+    SegmentReader reader = (SegmentReader) IndexReader.open(ram);
+    FieldInfos fi = reader.fieldInfos();
+    assertTrue("OmitTf field bit should not be set.", !fi.fieldInfo("f1").omitTf);
+    assertTrue("OmitTf field bit should be set.", fi.fieldInfo("f2").omitTf);
+        
+    reader.close();
+    ram.close();
+  }
+
+  private void assertNoPrx(Directory dir) throws Throwable {
+    final String[] files = dir.list();
+    for(int i=0;i<files.length;i++)
+      assertFalse(files[i].endsWith(".prx"));
+  }
+
+  // Verifies no *.prx exists when all fields omit term freq:
+  public void testNoPrxFile() throws Throwable {
+    Directory ram = new MockRAMDirectory();
+    Analyzer analyzer = new StandardAnalyzer();
+    IndexWriter writer = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
+    writer.setMaxBufferedDocs(3);
+    writer.setMergeFactor(2);
+    writer.setUseCompoundFile(false);
+    Document d = new Document();
+        
+    Field f1 = new Field("f1", "This field has term freqs", Field.Store.NO, Field.Index.TOKENIZED);
+    f1.setOmitTf(true);
+    d.add(f1);
+
+    for(int i=0;i<30;i++)
+      writer.addDocument(d);
+
+    writer.commit();
+
+    assertNoPrx(ram);
+        
+    // force merge
+    writer.optimize();
+    // flush
+    writer.close();
+
+    assertNoPrx(ram);
+    _TestUtil.checkIndex(ram);
+    ram.close();
+  }
+ 
+  // Test scores with one field with Term Freqs and one without, otherwise with equal content 
+  public void testBasic() throws Exception {
+    Directory dir = new MockRAMDirectory();  
+    Analyzer analyzer = new StandardAnalyzer();
+    IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
+    writer.setMergeFactor(2);
+    writer.setMaxBufferedDocs(2);
+    writer.setSimilarity(new SimpleSimilarity());
+        
+        
+    StringBuffer sb = new StringBuffer(265);
+    String term = "term";
+    for(int i = 0; i<30; i++){
+      Document d = new Document();
+      sb.append(term).append(" ");
+      String content  = sb.toString();
+      Field noTf = new Field("noTf", content + (i%2==0 ? "" : " notf"), Field.Store.NO, Field.Index.TOKENIZED);
+      noTf.setOmitTf(true);
+      d.add(noTf);
+          
+      Field tf = new Field("tf", content + (i%2==0 ? " tf" : ""), Field.Store.NO, Field.Index.TOKENIZED);
+      d.add(tf);
+          
+      writer.addDocument(d);
+      //System.out.println(d);
+    }
+        
+    writer.optimize();
+    // flush
+    writer.close();
+    _TestUtil.checkIndex(dir);
+
+    /*
+     * Verify the index
+     */         
+    Searcher searcher = new IndexSearcher(dir);
+    searcher.setSimilarity(new SimpleSimilarity());
+        
+    Term a = new Term("noTf", term);
+    Term b = new Term("tf", term);
+    Term c = new Term("noTf", "notf");
+    Term d = new Term("tf", "tf");
+    TermQuery q1 = new TermQuery(a);
+    TermQuery q2 = new TermQuery(b);
+    TermQuery q3 = new TermQuery(c);
+    TermQuery q4 = new TermQuery(d);
+
+        
+    searcher.search(q1,
+                    new CountingHitCollector() {
+                      public final void collect(int doc, float score) {
+                        //System.out.println("Q1: Doc=" + doc + " score=" + score);
+                        assertTrue(score==1.0f);
+                        super.collect(doc, score);
+                      }
+                    });
+    //System.out.println(CountingHitCollector.getCount());
+        
+        
+    searcher.search(q2,
+                    new CountingHitCollector() {
+                      public final void collect(int doc, float score) {
+                        //System.out.println("Q2: Doc=" + doc + " score=" + score);  
+                        assertTrue(score==1.0f+doc);
+                        super.collect(doc, score);
+                      }
+                    });
+    //System.out.println(CountingHitCollector.getCount());
+         
+        
+        
+        
+        
+    searcher.search(q3,
+                    new CountingHitCollector() {
+                      public final void collect(int doc, float score) {
+                        //System.out.println("Q1: Doc=" + doc + " score=" + score);
+                        assertTrue(score==1.0f);
+                        assertFalse(doc%2==0);
+                        super.collect(doc, score);
+                      }
+                    });
+    //System.out.println(CountingHitCollector.getCount());
+        
+        
+    searcher.search(q4,
+                    new CountingHitCollector() {
+                      public final void collect(int doc, float score) {
+                        //System.out.println("Q1: Doc=" + doc + " score=" + score);
+                        assertTrue(score==1.0f);
+                        assertTrue(doc%2==0);
+                        super.collect(doc, score);
+                      }
+                    });
+    //System.out.println(CountingHitCollector.getCount());
+        
+        
+        
+    BooleanQuery bq = new BooleanQuery();
+    bq.add(q1,Occur.MUST);
+    bq.add(q4,Occur.MUST);
+        
+    searcher.search(bq,
+                    new CountingHitCollector() {
+                      public final void collect(int doc, float score) {
+                        //System.out.println("BQ: Doc=" + doc + " score=" + score);
+                        super.collect(doc, score);
+                      }
+                    });
+    assertTrue(15 == CountingHitCollector.getCount());
+        
+    searcher.close();     
+    dir.close();
+  }
+     
+  public static class CountingHitCollector extends HitCollector {
+    static int count=0;
+    static int sum=0;
+    CountingHitCollector(){count=0;sum=0;}
+    public void collect(int doc, float score) {
+      count++;
+      sum += doc;  // use it to avoid any possibility of being optimized away
+    }
+
+    public static int getCount() { return count; }
+    public static int getSum() { return sum; }
+  }
+}

Propchange: lucene/java/trunk/src/test/org/apache/lucene/index/TestOmitTf.java
------------------------------------------------------------------------------
    svn:eol-style = native



Mime
View raw message