lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From gsing...@apache.org
Subject svn commit: r591620 - in /lucene/java/trunk: CHANGES.txt src/java/org/apache/lucene/index/TermVectorMapper.java src/java/org/apache/lucene/index/TermVectorsReader.java src/test/org/apache/lucene/index/TestTermVectorsReader.java
Date Sat, 03 Nov 2007 13:41:47 GMT
Author: gsingers
Date: Sat Nov  3 06:41:46 2007
New Revision: 591620

URL: http://svn.apache.org/viewvc?rev=591620&view=rev
Log:
LUCENE-1038: Sets the document number of the term vector being mapped.

Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorMapper.java
    lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsReader.java
    lucene/java/trunk/src/test/org/apache/lucene/index/TestTermVectorsReader.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=591620&r1=591619&r2=591620&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Sat Nov  3 06:41:46 2007
@@ -142,6 +142,7 @@
  3. LUCENE-868: Added new Term Vector access features.  New callback mechanism allows application
to define how and where to read Term Vectors from disk.
     This implementation contains several extensions of the new abstract TermVectorMapper
class.  The new API should be back-compatible.  No changes in the
      actual storage of Term Vectors has taken place.
+ 3.1 LUCENE-1038: Added setDocumentNumber() method to TermVectorMapper to provide information
about what document is being accessed. (Karl Wetting via Grant Ingersoll)
 
  4. LUCENE-975: Added PositionBasedTermVectorMapper that allows for position based lookup
of term vector information.  See item #3 above (LUCENE-868).
 

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorMapper.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorMapper.java?rev=591620&r1=591619&r2=591620&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorMapper.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorMapper.java Sat Nov  3 06:41:46
2007
@@ -85,4 +85,17 @@
     return ignoringOffsets;
   }
 
+  /**
+   * Passes down the index of the document whose term vector is currently being mapped,
+   * once for each top level call to a term vector reader.
+   *<p/>
+   * Default implementation IGNORES the document number.  Override if your implementation
needs the document number.
+   * <p/> 
+   * NOTE: Document numbers are internal to Lucene and subject to change depending on indexing
operations.
+   *
+   * @param documentNumber index of document currently being mapped
+   */
+  public void setDocumentNumber(int documentNumber) {
+  }
+
 }

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsReader.java?rev=591620&r1=591619&r2=591620&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsReader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsReader.java Sat Nov  3 06:41:46
2007
@@ -163,6 +163,7 @@
         for (int i = 0; i <= found; i++)
           position += tvd.readVLong();
 
+        mapper.setDocumentNumber(docNum);
         readTermVector(field, position, mapper);
       } else {
         //System.out.println("Fieldable not found");
@@ -228,7 +229,7 @@
           tvfPointers[i] = position;
         }
 
-        result = readTermVectors(fields, tvfPointers);
+        result = readTermVectors(docNum, fields, tvfPointers);
       }
     } else {
       //System.out.println("No tvx file");
@@ -268,6 +269,7 @@
           tvfPointers[i] = position;
         }
 
+        mapper.setDocumentNumber(docNumber);
         readTermVectors(fields, tvfPointers, mapper);
       }
     } else {
@@ -276,12 +278,13 @@
   }
 
 
-  private SegmentTermVector[] readTermVectors(String fields[], long tvfPointers[])
+  private SegmentTermVector[] readTermVectors(int docNum, String fields[], long tvfPointers[])
           throws IOException {
     SegmentTermVector res[] = new SegmentTermVector[fields.length];
     for (int i = 0; i < fields.length; i++) {
       ParallelArrayTermVectorMapper mapper = new ParallelArrayTermVectorMapper();
-       readTermVector(fields[i], tvfPointers[i], mapper);
+      mapper.setDocumentNumber(docNum);
+      readTermVector(fields[i], tvfPointers[i], mapper);
       res[i] = (SegmentTermVector) mapper.materializeVector();
     }
     return res;

Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestTermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestTermVectorsReader.java?rev=591620&r1=591619&r2=591620&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestTermVectorsReader.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestTermVectorsReader.java Sat Nov
 3 06:41:46 2007
@@ -17,13 +17,13 @@
  * limitations under the License.
  */
 
-import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.store.MockRAMDirectory;
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Token;
-import org.apache.lucene.document.Field;
+import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.store.MockRAMDirectory;
+import org.apache.lucene.util.LuceneTestCase;
 
 import java.io.IOException;
 import java.io.Reader;
@@ -329,6 +329,32 @@
       }
     }
 
+    // test setDocumentNumber()
+    IndexReader ir = IndexReader.open(dir);
+    DocNumAwareMapper docNumAwareMapper = new DocNumAwareMapper();
+    assertEquals(-1, docNumAwareMapper.getDocumentNumber());
+
+    ir.getTermFreqVector(0, docNumAwareMapper);
+    assertEquals(0, docNumAwareMapper.getDocumentNumber());
+    docNumAwareMapper.setDocumentNumber(-1);
+
+    ir.getTermFreqVector(1, docNumAwareMapper);
+    assertEquals(1, docNumAwareMapper.getDocumentNumber());
+    docNumAwareMapper.setDocumentNumber(-1);
+
+    ir.getTermFreqVector(0, "f1", docNumAwareMapper);
+    assertEquals(0, docNumAwareMapper.getDocumentNumber());
+    docNumAwareMapper.setDocumentNumber(-1);
+
+    ir.getTermFreqVector(1, "f2", docNumAwareMapper);
+    assertEquals(1, docNumAwareMapper.getDocumentNumber());
+    docNumAwareMapper.setDocumentNumber(-1);
+
+    ir.getTermFreqVector(0, "f1", docNumAwareMapper);
+    assertEquals(0, docNumAwareMapper.getDocumentNumber());
+
+    ir.close();
+
   }
 
 
@@ -362,6 +388,35 @@
       assertTrue(vector == null);
     } catch (IOException e) {
       fail();
+    }
+  }
+
+
+  public static class DocNumAwareMapper extends TermVectorMapper {
+
+    public DocNumAwareMapper() {
+    }
+
+    private int documentNumber = -1;
+
+    public void setExpectations(String field, int numTerms, boolean storeOffsets, boolean
storePositions) {
+      if (documentNumber == -1) {
+        throw new RuntimeException("Documentnumber should be set at this point!");
+      }
+    }
+
+    public void map(String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions)
{
+      if (documentNumber == -1) {
+        throw new RuntimeException("Documentnumber should be set at this point!");
+      }
+    }
+
+    public int getDocumentNumber() {
+      return documentNumber;
+    }
+
+    public void setDocumentNumber(int documentNumber) {
+      this.documentNumber = documentNumber;
     }
   }
 }



Mime
View raw message