lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r763591 - in /lucene/java/trunk: ./ src/java/org/apache/lucene/document/ src/java/org/apache/lucene/index/ src/test/org/apache/lucene/index/
Date Thu, 09 Apr 2009 10:37:50 GMT
Author: mikemccand
Date: Thu Apr  9 10:37:50 2009
New Revision: 763591

URL: http://svn.apache.org/viewvc?rev=763591&view=rev
Log:
LUCENE-1590: don't allow stored-only fields to mess up the index-only attrs like omitNorms
and omitTermFreqAndPositions

Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/src/java/org/apache/lucene/document/Field.java
    lucene/java/trunk/src/java/org/apache/lucene/index/FieldInfo.java
    lucene/java/trunk/src/java/org/apache/lucene/index/FieldInfos.java
    lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java
    lucene/java/trunk/src/test/org/apache/lucene/index/DocHelper.java
    lucene/java/trunk/src/test/org/apache/lucene/index/TestDocumentWriter.java
    lucene/java/trunk/src/test/org/apache/lucene/index/TestFieldsReader.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=763591&r1=763590&r2=763591&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Thu Apr  9 10:37:50 2009
@@ -107,6 +107,12 @@
    public APIs to throw InterruptedException.  (Jeremy Volkman vai
    Mike McCandless)
 
+9. LUCENE-1590: Fixed stored-only Field instances to not change the
+   value of omitNorms, omitTermFreqAndPositions in FieldInfo; when you
+   retrieve such fields they will now have omitNorms=true and
+   omitTermFreqAndPositions=false (though these values are unused).
+   (Uwe Schindler via Mike McCandless)
+
 New features
 
  1. LUCENE-1411: Added expert API to open an IndexWriter on a prior

Modified: lucene/java/trunk/src/java/org/apache/lucene/document/Field.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/document/Field.java?rev=763591&r1=763590&r2=763591&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/document/Field.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/document/Field.java Thu Apr  9 10:37:50 2009
@@ -329,6 +329,8 @@
     if (index == Index.NO) {
       this.isIndexed = false;
       this.isTokenized = false;
+      this.omitTermFreqAndPositions = false;
+      this.omitNorms = true;
     } else if (index == Index.ANALYZED) {
       this.isIndexed = true;
       this.isTokenized = true;
@@ -492,6 +494,8 @@
     
     isIndexed   = false;
     isTokenized = false;
+    omitTermFreqAndPositions = false;
+    omitNorms = true;
     
     isBinary    = true;
     binaryLength = length;

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/FieldInfo.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/FieldInfo.java?rev=763591&r1=763590&r2=763591&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/FieldInfo.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/FieldInfo.java Thu Apr  9 10:37:50
2009
@@ -38,12 +38,21 @@
     name = na;
     isIndexed = tk;
     number = nu;
-    this.storeTermVector = storeTermVector;
-    this.storeOffsetWithTermVector = storeOffsetWithTermVector;
-    this.storePositionWithTermVector = storePositionWithTermVector;
-    this.omitNorms = omitNorms;
-    this.storePayloads = storePayloads;
-    this.omitTermFreqAndPositions = omitTermFreqAndPositions;
+    if (isIndexed) {
+      this.storeTermVector = storeTermVector;
+      this.storeOffsetWithTermVector = storeOffsetWithTermVector;
+      this.storePositionWithTermVector = storePositionWithTermVector;
+      this.storePayloads = storePayloads;
+      this.omitNorms = omitNorms;
+      this.omitTermFreqAndPositions = omitTermFreqAndPositions;
+    } else { // for non-indexed fields, leave defaults
+      this.storeTermVector = false;
+      this.storeOffsetWithTermVector = false;
+      this.storePositionWithTermVector = false;
+      this.storePayloads = false;
+      this.omitNorms = true;
+      this.omitTermFreqAndPositions = false;
+    }
   }
 
   public Object clone() {
@@ -56,23 +65,25 @@
     if (this.isIndexed != isIndexed) {
       this.isIndexed = true;                      // once indexed, always index
     }
-    if (this.storeTermVector != storeTermVector) {
-      this.storeTermVector = true;                // once vector, always vector
-    }
-    if (this.storePositionWithTermVector != storePositionWithTermVector) {
-      this.storePositionWithTermVector = true;                // once vector, always vector
-    }
-    if (this.storeOffsetWithTermVector != storeOffsetWithTermVector) {
-      this.storeOffsetWithTermVector = true;                // once vector, always vector
-    }
-    if (this.omitNorms != omitNorms) {
-      this.omitNorms = false;                // once norms are stored, always store
-    }
-    if (this.omitTermFreqAndPositions != omitTermFreqAndPositions) {
-      this.omitTermFreqAndPositions = true;                // if one require omitTermFreqAndPositions
at least once, it remains off for life
-    }
-    if (this.storePayloads != storePayloads) {
-      this.storePayloads = true;
+    if (isIndexed) { // if updated field data is not for indexing, leave the updates out
+      if (this.storeTermVector != storeTermVector) {
+        this.storeTermVector = true;                // once vector, always vector
+      }
+      if (this.storePositionWithTermVector != storePositionWithTermVector) {
+        this.storePositionWithTermVector = true;                // once vector, always vector
+      }
+      if (this.storeOffsetWithTermVector != storeOffsetWithTermVector) {
+        this.storeOffsetWithTermVector = true;                // once vector, always vector
+      }
+      if (this.storePayloads != storePayloads) {
+        this.storePayloads = true;
+      }
+      if (this.omitNorms != omitNorms) {
+        this.omitNorms = false;                // once norms are stored, always store
+      }
+      if (this.omitTermFreqAndPositions != omitTermFreqAndPositions) {
+        this.omitTermFreqAndPositions = true;                // if one require omitTermFreqAndPositions
at least once, it remains off for life
+      }
     }
   }
 }

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/FieldInfos.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/FieldInfos.java?rev=763591&r1=763590&r2=763591&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/FieldInfos.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/FieldInfos.java Thu Apr  9 10:37:50
2009
@@ -84,7 +84,7 @@
     while (fieldIterator.hasNext()) {
       Fieldable field = (Fieldable) fieldIterator.next();
       add(field.name(), field.isIndexed(), field.isTermVectorStored(), field.isStorePositionWithTermVector(),
-              field.isStoreOffsetWithTermVector(), field.getOmitNorms());
+              field.isStoreOffsetWithTermVector(), field.getOmitNorms(), false, field.getOmitTf());
     }
   }
 

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java?rev=763591&r1=763590&r2=763591&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java Thu Apr  9 10:37:50
2009
@@ -322,6 +322,7 @@
         //skip over the part that we aren't loading
         fieldsStream.seek(pointer + toRead);
         f.setOmitNorms(fi.omitNorms);
+        f.setOmitTf(fi.omitTermFreqAndPositions);
       } else {
         int length = fieldsStream.readVInt();
         long pointer = fieldsStream.getFilePointer();
@@ -332,6 +333,7 @@
           fieldsStream.skipChars(length);
         f = new LazyField(fi.name, store, index, termVector, length, pointer, binary);
         f.setOmitNorms(fi.omitNorms);
+        f.setOmitTf(fi.omitTermFreqAndPositions);
       }
       doc.add(f);
     }
@@ -365,7 +367,6 @@
         doc.add(new Field(fi.name, uncompress(b), Field.Store.COMPRESS));
       else
         doc.add(new Field(fi.name, b, Field.Store.YES));
-
     } else {
       Field.Store store = Field.Store.YES;
       Field.Index index = getIndexType(fi, tokenize);
@@ -383,6 +384,7 @@
                 store,
                 index,
                 termVector);
+        f.setOmitTf(fi.omitTermFreqAndPositions);
         f.setOmitNorms(fi.omitNorms);
       } else {
         f = new Field(fi.name,     // name
@@ -390,6 +392,7 @@
                 store,
                 index,
                 termVector);
+        f.setOmitTf(fi.omitTermFreqAndPositions);
         f.setOmitNorms(fi.omitNorms);
       }
       doc.add(f);
@@ -641,6 +644,7 @@
       this.name = fi.name.intern();
       this.isIndexed = fi.isIndexed;
       this.omitNorms = fi.omitNorms;          
+      this.omitTermFreqAndPositions = fi.omitTermFreqAndPositions;
       this.storeOffsetWithTermVector = fi.storeOffsetWithTermVector;
       this.storePositionWithTermVector = fi.storePositionWithTermVector;
       this.storeTermVector = fi.storeTermVector;            

Modified: lucene/java/trunk/src/test/org/apache/lucene/index/DocHelper.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/DocHelper.java?rev=763591&r1=763590&r2=763591&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/DocHelper.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/DocHelper.java Thu Apr  9 10:37:50
2009
@@ -64,6 +64,14 @@
   public static Field noNormsField = new Field(NO_NORMS_KEY, NO_NORMS_TEXT,
       Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
 
+  public static final String NO_TF_TEXT = "analyzed with no tf and positions";
+  public static final String NO_TF_KEY = "omitTermFreqAndPositions";
+  public static Field noTFField = new Field(NO_TF_KEY, NO_TF_TEXT,
+      Field.Store.YES, Field.Index.ANALYZED);
+  static {
+    noTFField.setOmitTermFreqAndPositions(true);
+  }
+
   public static final String UNINDEXED_FIELD_TEXT = "unindexed field text";
   public static final String UNINDEXED_FIELD_KEY = "unIndField";
   public static Field unIndField = new Field(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT,
@@ -119,6 +127,7 @@
     compressedTextField2,
     keyField,
     noNormsField,
+    noTFField,
     unIndField,
     unStoredField1,
     unStoredField2,
@@ -139,6 +148,7 @@
   public static Map notermvector=new HashMap();
   public static Map lazy= new HashMap();
   public static Map noNorms=new HashMap();
+  public static Map noTf=new HashMap();
 
   static {
     //Initialize the large Lazy Field
@@ -167,6 +177,7 @@
       if (f.isStored()) add(stored,f);
       else add(unstored,f);
       if (f.getOmitNorms()) add(noNorms,f);
+      if (f.getOmitTf()) add(noTf,f);
       if (f.isLazy()) add(lazy, f);
     }
   }
@@ -186,6 +197,7 @@
     nameValues.put(TEXT_FIELD_3_KEY, FIELD_3_TEXT);
     nameValues.put(KEYWORD_FIELD_KEY, KEYWORD_TEXT);
     nameValues.put(NO_NORMS_KEY, NO_NORMS_TEXT);
+    nameValues.put(NO_TF_KEY, NO_TF_TEXT);
     nameValues.put(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT);
     nameValues.put(UNSTORED_FIELD_1_KEY, UNSTORED_1_FIELD_TEXT);
     nameValues.put(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT);

Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestDocumentWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestDocumentWriter.java?rev=763591&r1=763590&r2=763591&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestDocumentWriter.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestDocumentWriter.java Thu Apr  9
10:37:50 2009
@@ -259,14 +259,13 @@
     doc.add(new Field("f2", "v1", Store.YES, Index.NOT_ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
     doc.add(new Field("f2", "v2", Store.YES, Index.NOT_ANALYZED, TermVector.NO));
 
-    RAMDirectory ram = new RAMDirectory();
-    IndexWriter writer = new IndexWriter(ram, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
+    IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
     writer.addDocument(doc);
     writer.close();
 
-    _TestUtil.checkIndex(ram);
+    _TestUtil.checkIndex(dir);
 
-    IndexReader reader = IndexReader.open(ram);
+    IndexReader reader = IndexReader.open(dir);
     // f1
     TermFreqVector tfv1 = reader.getTermFreqVector(0, "f1");
     assertNotNull(tfv1);
@@ -276,4 +275,37 @@
     assertNotNull(tfv2);
     assertEquals("the 'with_tv' setting should rule!",2,tfv2.getTerms().length);
   }
+
+  /**
+   * Test adding two fields with the same name, one indexed
+   * the other stored only. The omitNorms and omitTermFreqAndPositions setting
+   * of the stored field should not affect the indexed one (LUCENE-1590)
+   */
+  public void testLUCENE_1590() throws Exception {
+    Document doc = new Document();
+    // f1 has no norms
+    doc.add(new Field("f1", "v1", Store.NO, Index.ANALYZED_NO_NORMS));
+    doc.add(new Field("f1", "v2", Store.YES, Index.NO));
+    // f2 has no TF
+    Field f = new Field("f2", "v1", Store.NO, Index.ANALYZED);
+    f.setOmitTermFreqAndPositions(true);
+    doc.add(f);
+    doc.add(new Field("f2", "v2", Store.YES, Index.NO));
+
+    IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
+    writer.addDocument(doc);
+    writer.optimize(); // be sure to have a single segment
+    writer.close();
+
+    _TestUtil.checkIndex(dir);
+
+    SegmentReader reader = (SegmentReader) IndexReader.open(dir);
+    FieldInfos fi = reader.fieldInfos();
+    // f1
+    assertFalse("f1 should have no norms", reader.hasNorms("f1"));
+    assertFalse("omitTermFreqAndPositions field bit should not be set for f1", fi.fieldInfo("f1").omitTermFreqAndPositions);
+    // f2
+    assertTrue("f2 should have norms", reader.hasNorms("f2"));
+    assertTrue("omitTermFreqAndPositions field bit should be set for f2", fi.fieldInfo("f2").omitTermFreqAndPositions);
+  }
 }

Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestFieldsReader.java?rev=763591&r1=763590&r2=763591&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestFieldsReader.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestFieldsReader.java Thu Apr  9 10:37:50
2009
@@ -72,6 +72,7 @@
     assertTrue(field.isStoreOffsetWithTermVector() == true);
     assertTrue(field.isStorePositionWithTermVector() == true);
     assertTrue(field.getOmitNorms() == false);
+    assertTrue(field.getOmitTf() == false);
 
     field = doc.getField(DocHelper.TEXT_FIELD_3_KEY);
     assertTrue(field != null);
@@ -79,8 +80,15 @@
     assertTrue(field.isStoreOffsetWithTermVector() == false);
     assertTrue(field.isStorePositionWithTermVector() == false);
     assertTrue(field.getOmitNorms() == true);
+    assertTrue(field.getOmitTf() == false);
 
-
+    field = doc.getField(DocHelper.NO_TF_KEY);
+    assertTrue(field != null);
+    assertTrue(field.isTermVectorStored() == false);
+    assertTrue(field.isStoreOffsetWithTermVector() == false);
+    assertTrue(field.isStorePositionWithTermVector() == false);
+    assertTrue(field.getOmitNorms() == false);
+    assertTrue(field.getOmitTf() == true);
     reader.close();
   }
 



Mime
View raw message