lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From yo...@apache.org
Subject svn commit: r329366 - in /lucene/java/trunk: ./ docs/ src/java/org/apache/lucene/document/ src/java/org/apache/lucene/index/ xdocs/
Date Sat, 29 Oct 2005 03:53:05 GMT
Author: yonik
Date: Fri Oct 28 20:52:48 2005
New Revision: 329366

URL: http://svn.apache.org/viewcvs?rev=329366&view=rev
Log:
make field norms optional : LUCENE-448

Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/docs/fileformats.html
    lucene/java/trunk/src/java/org/apache/lucene/document/Field.java
    lucene/java/trunk/src/java/org/apache/lucene/index/DocumentWriter.java
    lucene/java/trunk/src/java/org/apache/lucene/index/FieldInfo.java
    lucene/java/trunk/src/java/org/apache/lucene/index/FieldInfos.java
    lucene/java/trunk/src/java/org/apache/lucene/index/FilterIndexReader.java
    lucene/java/trunk/src/java/org/apache/lucene/index/IndexReader.java
    lucene/java/trunk/src/java/org/apache/lucene/index/MultiReader.java
    lucene/java/trunk/src/java/org/apache/lucene/index/ParallelReader.java
    lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java
    lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java
    lucene/java/trunk/xdocs/fileformats.xml

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/CHANGES.txt?rev=329366&r1=329365&r2=329366&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Fri Oct 28 20:52:48 2005
@@ -145,6 +145,10 @@
 20. Added a new class MatchAllDocsQuery that matches all documents.
     (John Wang via Daniel Naber, bug #34946)
 
+21. Added ability to omit norms on a per field basis to decrease
+    index size and memory consumption when there are many indexed fields.
+    See Field.setOmitNorms()
+    (Yonik Seeley, LUCENE-448)
 
 API Changes
 

Modified: lucene/java/trunk/docs/fileformats.html
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/docs/fileformats.html?rev=329366&r1=329365&r2=329366&view=diff
==============================================================================
--- lucene/java/trunk/docs/fileformats.html (original)
+++ lucene/java/trunk/docs/fileformats.html Fri Oct 28 20:52:48 2005
@@ -1245,10 +1245,20 @@
                     FieldBits    --> Byte
                 </p>
                                                 <p>
+	          <ul>
+                    <li>
                     The low-order bit is one for
-                    indexed fields, and zero for non-indexed fields.  The second lowest-order
+		    indexed fields, and zero for non-indexed fields.
+                    </li>
+		    <li>
+		    The second lowest-order
                     bit is one for fields that have term vectors stored, and zero for fields
-                    without term vectors.
+                    without term vectors.  
+	            </li>
+		    <li> If the third lowest-order bit is set (0x04), term positions are stored with
the term vectors. </li>
+		    <li> If the fourth lowest-order bit is set (0x08), term offsets are stored with
the term vectors. </li>
+		    <li> If the fifth lowest-order bit is set (0x10), norms are omitted for the indexed
field. </li>
+		  </ul>
                 </p>
                                                 <p>
                     Fields are numbered by their order in this file.  Thus field zero is

Modified: lucene/java/trunk/src/java/org/apache/lucene/document/Field.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/document/Field.java?rev=329366&r1=329365&r2=329366&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/document/Field.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/document/Field.java Fri Oct 28 20:52:48 2005
@@ -42,6 +42,7 @@
   private boolean storeTermVector = false;
   private boolean storeOffsetWithTermVector = false; 
   private boolean storePositionWithTermVector = false;
+  private boolean omitNorms = false;
   private boolean isStored = false;
   private boolean isIndexed = true;
   private boolean isTokenized = true;
@@ -90,10 +91,19 @@
     public static final Index TOKENIZED = new Index("TOKENIZED");
     
     /** Index the field's value without using an Analyzer, so it can be searched.
-     * As no analyzer is used the value will be stored as a single term. This is 
+     * As no analyzer is used the value will be stored as a single term. This is
      * useful for unique Ids like product numbers.
      */
     public static final Index UN_TOKENIZED = new Index("UN_TOKENIZED");
+
+    /** Index the field's value without an Analyzer, and disable
+     * the storing of norms.  No norms means that index-time boosting
+     * and field length normalization will be disabled.  The benefit is
+     * less memory usage as norms take up one byte per indexed field
+     * for every document in the index.
+     */
+    public static final Index NO_NORMS = new Index("NO_NORMS");
+
   }
 
   public static final class TermVector  extends Parameter implements Serializable {
@@ -338,6 +348,10 @@
     } else if (index == Index.UN_TOKENIZED) {
       this.isIndexed = true;
       this.isTokenized = false;
+    } else if (index == Index.NO_NORMS) {
+      this.isIndexed = true;
+      this.isTokenized = false;
+      this.omitNorms = true;
     } else {
       throw new IllegalArgumentException("unknown index parameter " + index);
     }
@@ -540,6 +554,16 @@
   /** True iff the value of the filed is stored as binary */
   public final boolean  isBinary()      { return isBinary; }
   
+  /** True if norms are omitted for this indexed field */
+  public boolean getOmitNorms() { return omitNorms; }
+
+  /** Expert:
+   *
+   * If set, omit normalization factors associated with this indexed field.
+   * This effectively disables indexing boosts and length normalization for this field.
+   */
+  public void setOmitNorms(boolean omitNorms) { this.omitNorms=omitNorms; }
+  
   /** Prints a Field for human consumption. */
   public final String toString() {
     StringBuffer result = new StringBuffer();
@@ -580,7 +604,9 @@
         result.append(",");
       result.append("binary");
     }
-    
+    if (omitNorms) {
+      result.append(",omitNorms");
+    }
     result.append('<');
     result.append(name);
     result.append(':');

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/DocumentWriter.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/index/DocumentWriter.java?rev=329366&r1=329365&r2=329366&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/DocumentWriter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/DocumentWriter.java Fri Oct 28 20:52:48
2005
@@ -371,7 +371,7 @@
   private final void writeNorms(String segment) throws IOException { 
     for(int n = 0; n < fieldInfos.size(); n++){
       FieldInfo fi = fieldInfos.fieldInfo(n);
-      if(fi.isIndexed){
+      if(fi.isIndexed && !fi.omitNorms){
         float norm = fieldBoosts[n] * similarity.lengthNorm(fi.name, fieldLengths[n]);
         IndexOutput norms = directory.createOutput(segment + ".f" + n);
         try {

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/FieldInfo.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/index/FieldInfo.java?rev=329366&r1=329365&r2=329366&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/FieldInfo.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/FieldInfo.java Fri Oct 28 20:52:48
2005
@@ -26,13 +26,16 @@
   boolean storeOffsetWithTermVector;
   boolean storePositionWithTermVector;
 
+  boolean omitNorms; // omit norms associated with indexed fields
+
   FieldInfo(String na, boolean tk, int nu, boolean storeTermVector, 
-            boolean storePositionWithTermVector,  boolean storeOffsetWithTermVector) {
+            boolean storePositionWithTermVector,  boolean storeOffsetWithTermVector, boolean
omitNorms) {
     name = na;
     isIndexed = tk;
     number = nu;
     this.storeTermVector = storeTermVector;
     this.storeOffsetWithTermVector = storeOffsetWithTermVector;
     this.storePositionWithTermVector = storePositionWithTermVector;
+    this.omitNorms = omitNorms;
   }
 }

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/FieldInfos.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/index/FieldInfos.java?rev=329366&r1=329365&r2=329366&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/FieldInfos.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/FieldInfos.java Fri Oct 28 20:52:48
2005
@@ -38,6 +38,7 @@
   static final byte STORE_TERMVECTOR = 0x2;
   static final byte STORE_POSITIONS_WITH_TERMVECTOR = 0x4;
   static final byte STORE_OFFSET_WITH_TERMVECTOR = 0x8;
+  static final byte OMIT_NORMS = 0x10;
   
   private ArrayList byNumber = new ArrayList();
   private HashMap byName = new HashMap();
@@ -66,7 +67,7 @@
     while (fields.hasMoreElements()) {
       Field field = (Field) fields.nextElement();
       add(field.name(), field.isIndexed(), field.isTermVectorStored(), field.isStorePositionWithTermVector(),
-              field.isStoreOffsetWithTermVector());
+              field.isStoreOffsetWithTermVector(), field.getOmitNorms());
     }
   }
   
@@ -109,7 +110,7 @@
    * @see #add(String, boolean, boolean, boolean, boolean)
    */
   public void add(String name, boolean isIndexed) {
-    add(name, isIndexed, false, false, false);
+    add(name, isIndexed, false, false, false, false);
   }
 
   /**
@@ -120,7 +121,7 @@
    * @param storeTermVector true if the term vector should be stored
    */
   public void add(String name, boolean isIndexed, boolean storeTermVector){
-    add(name, isIndexed, storeTermVector, false, false);
+    add(name, isIndexed, storeTermVector, false, false, false);
   }
   
   /** If the field is not yet known, adds it. If it is known, checks to make
@@ -136,9 +137,27 @@
    */
   public void add(String name, boolean isIndexed, boolean storeTermVector,
                   boolean storePositionWithTermVector, boolean storeOffsetWithTermVector)
{
+
+    add(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector,
false);
+  }
+
+    /** If the field is not yet known, adds it. If it is known, checks to make
+   *  sure that the isIndexed flag is the same as was given previously for this
+   *  field. If not - marks it as being indexed.  Same goes for the TermVector
+   * parameters.
+   *
+   * @param name The name of the field
+   * @param isIndexed true if the field is indexed
+   * @param storeTermVector true if the term vector should be stored
+   * @param storePositionWithTermVector true if the term vector with positions should be
stored
+   * @param storeOffsetWithTermVector true if the term vector with offsets should be stored
+   * @param omitNorms true if the norms for the indexed field should be omitted
+   */
+  public void add(String name, boolean isIndexed, boolean storeTermVector,
+                  boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
boolean omitNorms) {
     FieldInfo fi = fieldInfo(name);
     if (fi == null) {
-      addInternal(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector);
+      addInternal(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector,
omitNorms);
     } else {
       if (fi.isIndexed != isIndexed) {
         fi.isIndexed = true;                      // once indexed, always index
@@ -152,15 +171,20 @@
       if (fi.storeOffsetWithTermVector != storeOffsetWithTermVector) {
         fi.storeOffsetWithTermVector = true;                // once vector, always vector
       }
+      if (fi.omitNorms != omitNorms) {
+        fi.omitNorms = false;                // once norms are stored, always store
+      }
+
     }
   }
 
+
   private void addInternal(String name, boolean isIndexed,
                            boolean storeTermVector, boolean storePositionWithTermVector,

-                           boolean storeOffsetWithTermVector) {
+                           boolean storeOffsetWithTermVector, boolean omitNorms) {
     FieldInfo fi =
       new FieldInfo(name, isIndexed, byNumber.size(), storeTermVector, storePositionWithTermVector,
-              storeOffsetWithTermVector);
+              storeOffsetWithTermVector, omitNorms);
     byNumber.add(fi);
     byName.put(name, fi);
   }
@@ -245,6 +269,7 @@
       if (fi.storeTermVector) bits |= STORE_TERMVECTOR;
       if (fi.storePositionWithTermVector) bits |= STORE_POSITIONS_WITH_TERMVECTOR;
       if (fi.storeOffsetWithTermVector) bits |= STORE_OFFSET_WITH_TERMVECTOR;
+      if (fi.omitNorms) bits |= OMIT_NORMS;
       output.writeString(fi.name);
       output.writeByte(bits);
     }
@@ -259,7 +284,9 @@
       boolean storeTermVector = (bits & STORE_TERMVECTOR) != 0;
       boolean storePositionsWithTermVector = (bits & STORE_POSITIONS_WITH_TERMVECTOR)
!= 0;
       boolean storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
-      addInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector);
+      boolean omitNorms = (bits & OMIT_NORMS) != 0;
+
+      addInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector,
omitNorms);
     }    
   }
 

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/FilterIndexReader.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/index/FilterIndexReader.java?rev=329366&r1=329365&r2=329366&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/FilterIndexReader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/FilterIndexReader.java Fri Oct 28 20:52:48
2005
@@ -107,6 +107,10 @@
   public boolean hasDeletions() { return in.hasDeletions(); }
   protected void doUndeleteAll() throws IOException { in.undeleteAll(); }
 
+  public boolean hasNorms(String field) throws IOException {
+    return in.hasNorms(field);
+  }
+
   public byte[] norms(String f) throws IOException { return in.norms(f); }
   public void norms(String f, byte[] bytes, int offset) throws IOException {
     in.norms(f, bytes, offset);

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/IndexReader.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/index/IndexReader.java?rev=329366&r1=329365&r2=329366&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/IndexReader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/IndexReader.java Fri Oct 28 20:52:48
2005
@@ -338,6 +338,13 @@
   /** Returns true if any documents have been deleted */
   public abstract boolean hasDeletions();
   
+  /** Returns true if there are norms stored for this field. */
+  public boolean hasNorms(String field) throws IOException {
+    // backward compatible implementation.
+    // SegmentReader has an efficient implementation.
+    return norms(field) != null;
+  }
+
   /** Returns the byte-encoded normalization factor for the named field of
    * every document.  This is used by the search code to score documents.
    *

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/MultiReader.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/index/MultiReader.java?rev=329366&r1=329365&r2=329366&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/MultiReader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/MultiReader.java Fri Oct 28 20:52:48
2005
@@ -145,10 +145,25 @@
     return hi;
   }
 
+  public boolean hasNorms(String field) throws IOException {
+    for (int i = 0; i < subReaders.length; i++) {
+      if (subReaders[i].hasNorms(field)) return true;
+    }
+    return false;
+  }
+
+  private byte[] ones;
+  private synchronized byte[] fakeNorms() {
+    if (ones==null) ones=SegmentReader.createFakeNorms(maxDoc());
+    return ones;
+  }
+
   public synchronized byte[] norms(String field) throws IOException {
     byte[] bytes = (byte[])normsCache.get(field);
     if (bytes != null)
       return bytes;          // cache hit
+    if (!hasNorms(field))
+      return fakeNorms();
 
     bytes = new byte[maxDoc()];
     for (int i = 0; i < subReaders.length; i++)
@@ -160,6 +175,7 @@
   public synchronized void norms(String field, byte[] result, int offset)
     throws IOException {
     byte[] bytes = (byte[])normsCache.get(field);
+    if (bytes==null && !hasNorms(field)) bytes=fakeNorms();
     if (bytes != null)                            // cache hit
       System.arraycopy(bytes, 0, result, offset, maxDoc());
 

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/ParallelReader.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/index/ParallelReader.java?rev=329366&r1=329365&r2=329366&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/ParallelReader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/ParallelReader.java Fri Oct 28 20:52:48
2005
@@ -165,6 +165,10 @@
     return ((IndexReader)fieldToReader.get(field)).getTermFreqVector(n, field);
   }
 
+  public boolean hasNorms(String field) throws IOException {
+    return ((IndexReader)fieldToReader.get(field)).hasNorms(field);
+  }
+
   public byte[] norms(String field) throws IOException {
     return ((IndexReader)fieldToReader.get(field)).norms(field);
   }

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java?rev=329366&r1=329365&r2=329366&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java Fri Oct 28 20:52:48
2005
@@ -18,6 +18,7 @@
 
 import java.util.Vector;
 import java.util.Iterator;
+import java.util.Collection;
 import java.io.IOException;
 
 import org.apache.lucene.store.Directory;
@@ -122,7 +123,7 @@
     // Field norm files
     for (int i = 0; i < fieldInfos.size(); i++) {
       FieldInfo fi = fieldInfos.fieldInfo(i);
-      if (fi.isIndexed) {
+      if (fi.isIndexed && !fi.omitNorms) {
         files.add(segment + ".f" + i);
       }
     }
@@ -146,6 +147,15 @@
     return files;
   }
 
+  private void addIndexed(IndexReader reader, FieldInfos fieldInfos, Collection names, boolean
storeTermVectors, boolean storePositionWithTermVector,
+                         boolean storeOffsetWithTermVector) throws IOException {
+    Iterator i = names.iterator();
+    while (i.hasNext()) {
+      String field = (String)i.next();
+      fieldInfos.add(field, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector,
!reader.hasNorms(field));
+    }
+  }
+
   /**
    * 
    * @return The number of documents in all of the readers
@@ -156,11 +166,11 @@
     int docCount = 0;
     for (int i = 0; i < readers.size(); i++) {
       IndexReader reader = (IndexReader) readers.elementAt(i);
-      fieldInfos.addIndexed(reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET),
true, true, true);
-      fieldInfos.addIndexed(reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION),
true, true, false);
-      fieldInfos.addIndexed(reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET),
true, false, true);
-      fieldInfos.addIndexed(reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR), true,
false, false);
-      fieldInfos.addIndexed(reader.getFieldNames(IndexReader.FieldOption.INDEXED), false,
false, false);
+      addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET),
true, true, true);
+      addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION),
true, true, false);
+      addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET),
true, false, true);
+      addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR),
true, false, false);
+      addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.INDEXED),
false, false, false);
       fieldInfos.add(reader.getFieldNames(IndexReader.FieldOption.UNINDEXED), false);
     }
     fieldInfos.write(directory, segment + ".fnm");
@@ -386,7 +396,7 @@
   private void mergeNorms() throws IOException {
     for (int i = 0; i < fieldInfos.size(); i++) {
       FieldInfo fi = fieldInfos.fieldInfo(i);
-      if (fi.isIndexed) {
+      if (fi.isIndexed && !fi.omitNorms) {
         IndexOutput output = directory.createOutput(segment + ".f" + i);
         try {
           for (int j = 0; j < readers.size(); j++) {

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java?rev=329366&r1=329365&r2=329366&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java Fri Oct 28 20:52:48
2005
@@ -17,12 +17,7 @@
  */
 
 import java.io.IOException;
-import java.util.Collection;
-import java.util.Enumeration;
-import java.util.HashSet;
-import java.util.Hashtable;
-import java.util.Set;
-import java.util.Vector;
+import java.util.*;
 
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
@@ -30,6 +25,7 @@
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BitVector;
+import org.apache.lucene.search.DefaultSimilarity;
 
 /**
  * @version $Id$
@@ -56,9 +52,9 @@
   CompoundFileReader cfsReader = null;
 
   private class Norm {
-    public Norm(IndexInput in, int number) 
-    { 
-      this.in = in; 
+    public Norm(IndexInput in, int number)
+    {
+      this.in = in;
       this.number = number;
     }
 
@@ -78,7 +74,7 @@
       String fileName;
       if(cfsReader == null)
           fileName = segment + ".f" + number;
-      else{ 
+      else{
           // use a different file name if we have compound format
           fileName = segment + ".s" + number;
       }
@@ -133,7 +129,7 @@
     instance.initialize(si);
     return instance;
   }
-          
+
    private void initialize(SegmentInfo si) throws IOException {
     segment = si.name;
 
@@ -164,7 +160,7 @@
       termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos);
     }
   }
-   
+
    protected void finalize() {
      // patch for pre-1.4.2 JVMs, whose ThreadLocals leak
      termVectorsLocal.set(null);
@@ -172,14 +168,14 @@
    }
 
   protected void doCommit() throws IOException {
-    if (deletedDocsDirty) {               // re-write deleted 
+    if (deletedDocsDirty) {               // re-write deleted
       deletedDocs.write(directory(), segment + ".tmp");
       directory().renameFile(segment + ".tmp", segment + ".del");
     }
     if(undeleteAll && directory().fileExists(segment + ".del")){
       directory().deleteFile(segment + ".del");
     }
-    if (normsDirty) {               // re-write norms 
+    if (normsDirty) {               // re-write norms
       Enumeration values = norms.elements();
       while (values.hasMoreElements()) {
         Norm norm = (Norm) values.nextElement();
@@ -192,7 +188,7 @@
     normsDirty = false;
     undeleteAll = false;
   }
-  
+
   protected void doClose() throws IOException {
     fieldsReader.close();
     tis.close();
@@ -203,8 +199,8 @@
       proxStream.close();
 
     closeNorms();
-    
-    if (termVectorsReaderOrig != null) 
+
+    if (termVectorsReaderOrig != null)
       termVectorsReaderOrig.close();
 
     if (cfsReader != null)
@@ -223,7 +219,7 @@
   static boolean usesCompoundFile(SegmentInfo si) throws IOException {
     return si.dir.fileExists(si.name + ".cfs");
   }
-  
+
   static boolean hasSeparateNorms(SegmentInfo si) throws IOException {
     String[] result = si.dir.list();
     String pattern = si.name + ".s";
@@ -260,7 +256,7 @@
 
     for (int i = 0; i < fieldInfos.size(); i++) {
       FieldInfo fi = fieldInfos.fieldInfo(i);
-      if (fi.isIndexed){
+      if (fi.isIndexed  && !fi.omitNorms){
         String name;
         if(cfsReader == null)
             name = segment + ".f" + i;
@@ -347,7 +343,7 @@
     }
     return fieldSet;
   }
-  
+
   /**
    * @see IndexReader#getIndexedFieldNames(Field.TermVector tvSpec)
    * @deprecated  Replaced by {@link #getFieldNames (IndexReader.FieldOption fldOption)}
@@ -356,7 +352,7 @@
     boolean storedTermVector;
     boolean storePositionWithTermVector;
     boolean storeOffsetWithTermVector;
-    
+
     if(tvSpec == Field.TermVector.NO){
       storedTermVector = false;
       storePositionWithTermVector = false;
@@ -385,25 +381,25 @@
     else{
       throw new IllegalArgumentException("unknown termVector parameter " + tvSpec);
     }
-    
+
     // maintain a unique set of field names
     Set fieldSet = new HashSet();
     for (int i = 0; i < fieldInfos.size(); i++) {
       FieldInfo fi = fieldInfos.fieldInfo(i);
-      if (fi.isIndexed && fi.storeTermVector == storedTermVector && 
-          fi.storePositionWithTermVector == storePositionWithTermVector && 
+      if (fi.isIndexed && fi.storeTermVector == storedTermVector &&
+          fi.storePositionWithTermVector == storePositionWithTermVector &&
           fi.storeOffsetWithTermVector == storeOffsetWithTermVector){
         fieldSet.add(fi.name);
       }
     }
-    return fieldSet;    
+    return fieldSet;
   }
 
   /**
    * @see IndexReader#getFieldNames(IndexReader.FieldOption fldOption)
    */
   public Collection getFieldNames(IndexReader.FieldOption fieldOption) {
-    
+
     Set fieldSet = new HashSet();
     for (int i = 0; i < fieldInfos.size(); i++) {
       FieldInfo fi = fieldInfos.fieldInfo(i);
@@ -441,11 +437,29 @@
     }
     return fieldSet;
   }
-  
-  public synchronized byte[] norms(String field) throws IOException {
+
+
+  public synchronized boolean hasNorms(String field) {
+    return norms.containsKey(field);
+  }
+
+  static byte[] createFakeNorms(int size) {
+    byte[] ones = new byte[size];
+    Arrays.fill(ones, DefaultSimilarity.encodeNorm(1.0f));
+    return ones;
+  }
+
+  private byte[] ones;
+  private synchronized byte[] fakeNorms() {
+    if (ones==null) ones=createFakeNorms(maxDoc());
+    return ones;
+  }
+
+  // can return null if norms aren't stored
+  protected synchronized byte[] getNorms(String field) throws IOException {
     Norm norm = (Norm) norms.get(field);
-    if (norm == null)                             // not an indexed field
-      return null;
+    if (norm == null) return null;  // not indexed, or norms not stored
+
     if (norm.bytes == null) {                     // value not yet read
       byte[] bytes = new byte[maxDoc()];
       norms(field, bytes, 0);
@@ -454,6 +468,13 @@
     return norm.bytes;
   }
 
+  // returns fake norms if norms aren't available
+  public synchronized byte[] norms(String field) throws IOException {
+    byte[] bytes = getNorms(field);
+    if (bytes==null) bytes=fakeNorms();
+    return bytes;
+  }
+
   protected void doSetNorm(int doc, String field, byte value)
           throws IOException {
     Norm norm = (Norm) norms.get(field);
@@ -470,8 +491,10 @@
     throws IOException {
 
     Norm norm = (Norm) norms.get(field);
-    if (norm == null)
-      return;					  // use zeros in array
+    if (norm == null) {
+      System.arraycopy(fakeNorms(), 0, bytes, offset, maxDoc());
+      return;
+    }
 
     if (norm.bytes != null) {                     // can copy from cache
       System.arraycopy(norm.bytes, 0, bytes, offset, maxDoc());
@@ -487,10 +510,11 @@
     }
   }
 
+
   private void openNorms(Directory cfsDir) throws IOException {
     for (int i = 0; i < fieldInfos.size(); i++) {
       FieldInfo fi = fieldInfos.fieldInfo(i);
-      if (fi.isIndexed) {
+      if (fi.isIndexed && !fi.omitNorms) {
         // look first if there are separate norms in compound format
         String fileName = segment + ".s" + fi.number;
         Directory d = directory();

Modified: lucene/java/trunk/xdocs/fileformats.xml
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/xdocs/fileformats.xml?rev=329366&r1=329365&r2=329366&view=diff
==============================================================================
--- lucene/java/trunk/xdocs/fileformats.xml (original)
+++ lucene/java/trunk/xdocs/fileformats.xml Fri Oct 28 20:52:48 2005
@@ -848,10 +848,20 @@
                 </p>
 
                 <p>
+	          <ul>
+                    <li>
                     The low-order bit is one for
-                    indexed fields, and zero for non-indexed fields.  The second lowest-order
+		    indexed fields, and zero for non-indexed fields.
+                    </li>
+		    <li>
+		    The second lowest-order
                     bit is one for fields that have term vectors stored, and zero for fields
-                    without term vectors.
+                    without term vectors.  
+	            </li>
+		    <li> If the third lowest-order bit is set (0x04), term positions are stored with
the term vectors. </li>
+		    <li> If the fourth lowest-order bit is set (0x08), term offsets are stored with
the term vectors. </li>
+		    <li> If the fifth lowest-order bit is set (0x10), norms are omitted for the indexed
field. </li>
+		  </ul>
                 </p>
 
                 <p>



Mime
View raw message