lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r1544570 - in /lucene/dev/trunk/lucene: ./ suggest/ suggest/src/java/org/apache/lucene/search/suggest/ suggest/src/test/org/apache/lucene/search/suggest/
Date Fri, 22 Nov 2013 15:21:31 GMT
Author: mikemccand
Date: Fri Nov 22 15:21:31 2013
New Revision: 1544570

URL: http://svn.apache.org/r1544570
Log:
LUCENE-5329: Document/ExpressionDictionary are now lenient if a doc is missing term/payload/weight

Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/suggest/build.xml
    lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java
    lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentExpressionDictionary.java
    lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java
    lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentExpressionDictionaryTest.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1544570&r1=1544569&r2=1544570&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Fri Nov 22 15:21:31 2013
@@ -68,6 +68,11 @@ New Features
 * LUCENE-5336: Add SimpleQueryParser: parser for human-entered queries.
   (Jack Conradson via Robert Muir)
 
+* LUCENE-5329: suggest: DocumentDictionary and
+  DocumentExpressionDictionary are now lenient for dirty documents
+  (missing the term, weight or payload).  (Areek Zillur via
+  Mike McCandless)
+
 Build
 
 * LUCENE-5217: Maven config: get dependencies from Ant+Ivy config; disable

Modified: lucene/dev/trunk/lucene/suggest/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/build.xml?rev=1544570&r1=1544569&r2=1544570&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/suggest/build.xml (original)
+++ lucene/dev/trunk/lucene/suggest/build.xml Fri Nov 22 15:21:31 2013
@@ -42,6 +42,7 @@
     <invoke-module-javadoc>
       <links>
         <link href="../analyzers-common"/>
+      	<link href="../queries"/>
       </links>
     </invoke-module-javadoc>
   </target>

Modified: lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java?rev=1544570&r1=1544569&r2=1544570&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java
(original)
+++ lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java
Fri Nov 22 15:21:31 2013
@@ -17,13 +17,13 @@ package org.apache.lucene.search.suggest
  * limitations under the License.
  */
 import java.io.IOException;
-import java.util.Arrays;
 import java.util.HashSet;
-import java.util.List;
 import java.util.Set;
 
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.MultiDocValues;
 import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.index.StorableField;
 import org.apache.lucene.index.StoredDocument;
 import org.apache.lucene.search.spell.Dictionary;
@@ -32,14 +32,24 @@ import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefIterator;
 
 /**
+ * <p>
  * Dictionary with terms, weights and optionally payload information 
- * taken from stored fields in a Lucene index.
- * 
- * <b>NOTE: </b> 
+ * taken from stored/indexed fields in a Lucene index.
+ * </p>
+ * <b>NOTE:</b> 
  *  <ul>
  *    <li>
- *      The term, weight and (optionally) payload fields supplied
- *      are required for ALL documents and has to be stored
+ *      The term and (optionally) payload fields have to be
+ *      stored
+ *    </li>
+ *    <li>
+ *      The weight field can be stored or can be a {@link NumericDocValues}.
+ *      If the weight field is not defined, the value of the weight is <code>0</code>
+ *    </li>
+ *    <li>
+ *      if any of the term or (optionally) payload fields supplied
+ *      do not have a value for a document, then the document is 
+ *      skipped by the dictionary
  *    </li>
  *  </ul>
  */
@@ -59,10 +69,7 @@ public class DocumentDictionary implemen
    * the corresponding terms.
    */
   public DocumentDictionary(IndexReader reader, String field, String weightField) {
-    this.reader = reader;
-    this.field = field;
-    this.weightField = weightField;
-    this.payloadField = null;
+    this(reader, field, weightField, null);
   }
   
   /**
@@ -85,14 +92,16 @@ public class DocumentDictionary implemen
 
   /** Implements {@link InputIterator} from stored fields. */
   protected class DocumentInputIterator implements InputIterator {
+
     private final int docCount;
     private final Set<String> relevantFields;
     private final boolean hasPayloads;
     private final Bits liveDocs;
     private int currentDocId = -1;
-    private long currentWeight;
-    private BytesRef currentPayload;
-    private StoredDocument doc;
+    private long currentWeight = 0;
+    private BytesRef currentPayload = null;
+    private final NumericDocValues weightValues;
+    
     
     /**
      * Creates an iterator over term, weight and payload fields from the lucene
@@ -100,11 +109,11 @@ public class DocumentDictionary implemen
      * over only term and weight.
      */
     public DocumentInputIterator(boolean hasPayloads) throws IOException {
-      docCount = reader.maxDoc() - 1;
       this.hasPayloads = hasPayloads;
-      currentPayload = null;
-      liveDocs = MultiFields.getLiveDocs(reader);
-      this.relevantFields = getRelevantFields(new String [] {field, weightField, payloadField});
+      docCount = reader.maxDoc() - 1;
+      weightValues = (weightField != null) ? MultiDocValues.getNumericValues(reader, weightField)
: null;
+      liveDocs = (reader.leaves().size() > 0) ? MultiFields.getLiveDocs(reader) : null;
+      relevantFields = getRelevantFields(new String [] {field, weightField, payloadField});
     }
 
     @Override
@@ -120,28 +129,29 @@ public class DocumentDictionary implemen
           continue;
         }
 
-        doc = reader.document(currentDocId, relevantFields);
+        StoredDocument doc = reader.document(currentDocId, relevantFields);
+        
+        BytesRef tempPayload = null;
+        BytesRef tempTerm = null;
         
         if (hasPayloads) {
           StorableField payload = doc.getField(payloadField);
-          if (payload == null) {
-            throw new IllegalArgumentException(payloadField + " does not exist");
-          } else if (payload.binaryValue() == null) {
-            throw new IllegalArgumentException(payloadField + " does not have binary value");
+          if (payload == null || (payload.binaryValue() == null && payload.stringValue()
== null)) {
+            continue;
           }
-          currentPayload = payload.binaryValue();
+          tempPayload = (payload.binaryValue() != null) ? payload.binaryValue() : new BytesRef(payload.stringValue());
         }
         
-        currentWeight = getWeight(currentDocId);
-        
         StorableField fieldVal = doc.getField(field);
-        if (fieldVal == null) {
-          throw new IllegalArgumentException(field + " does not exist");
-        } else if(fieldVal.stringValue() == null) {
-          throw new IllegalArgumentException(field + " does not have string value");
+        if (fieldVal == null || (fieldVal.binaryValue() == null && fieldVal.stringValue()
== null)) {
+          continue;
         }
+        tempTerm = (fieldVal.stringValue() != null) ? new BytesRef(fieldVal.stringValue())
: fieldVal.binaryValue();
         
-        return new BytesRef(fieldVal.stringValue());
+        currentPayload = tempPayload;
+        currentWeight = getWeight(doc, currentDocId);
+
+        return tempTerm;
       }
       return null;
     }
@@ -156,15 +166,21 @@ public class DocumentDictionary implemen
       return hasPayloads;
     }
 
-    /** Return the suggestion weight for this document */
-    protected long getWeight(int docId) {
+    /** 
+     * Returns the value of the <code>weightField</code> for the current document.
+     * Retrieves the value for the <code>weightField</code> if its stored (using
<code>doc</code>)
+     * or if its indexed as {@link NumericDocValues} (using <code>docId</code>)
for the document.
+     * If no value is found, then the weight is 0.
+     */
+    protected long getWeight(StoredDocument doc, int docId) {
       StorableField weight = doc.getField(weightField);
-      if (weight == null) {
-        throw new IllegalArgumentException(weightField + " does not exist");
-      } else if (weight.numericValue() == null) {
-        throw new IllegalArgumentException(weightField + " does not have numeric value");
+      if (weight != null) { // found weight as stored
+        return (weight.numericValue() != null) ? weight.numericValue().longValue() : 0;
+      } else if (weightValues != null) {  // found weight as NumericDocValue
+        return weightValues.get(docId);
+      } else { // fall back
+        return 0;
       }
-      return weight.numericValue().longValue();
     }
     
     private Set<String> getRelevantFields(String... fields) {

Modified: lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentExpressionDictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentExpressionDictionary.java?rev=1544570&r1=1544569&r2=1544570&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentExpressionDictionary.java
(original)
+++ lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentExpressionDictionary.java
Fri Nov 22 15:21:31 2013
@@ -30,6 +30,7 @@ import org.apache.lucene.expressions.js.
 import org.apache.lucene.index.AtomicReaderContext;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.ReaderUtil;
+import org.apache.lucene.index.StoredDocument;
 import org.apache.lucene.queries.function.FunctionValues;
 import org.apache.lucene.queries.function.ValueSource;
 import org.apache.lucene.search.SortField;
@@ -37,23 +38,34 @@ import org.apache.lucene.util.BytesRefIt
 
 
 /**
+ * <p>
  * Dictionary with terms and optionally payload information 
  * taken from stored fields in a Lucene index. Similar to 
  * {@link DocumentDictionary}, except it computes the weight
  * of the terms in a document based on a user-defined expression
  * having one or more {@link NumericDocValuesField} in the document.
- * 
+ * </p>
  * <b>NOTE:</b> 
  *  <ul>
  *    <li>
- *      The term and (optionally) payload fields supplied
- *      are required for ALL documents and has to be stored
+ *      The term and (optionally) payload fields have to be
+ *      stored
+ *    </li>
+ *    <li>
+ *      if the term or (optionally) payload fields supplied
+ *      do not have a value for a document, then the document is 
+ *      rejected by the dictionary
+ *    </li>
+ *    <li>
+ *      All the fields used in <code>weightExpression</code> should
+ *      have values for all documents, if any of the fields do not 
+ *      have a value for a document, it will default to 0
  *    </li>
  *  </ul>
  */
 public class DocumentExpressionDictionary extends DocumentDictionary {
   
-  private ValueSource weightsValueSource;
+  private final ValueSource weightsValueSource;
   
   /**
    * Creates a new dictionary with the contents of the fields named <code>field</code>
@@ -86,8 +98,31 @@ public class DocumentExpressionDictionar
     for (SortField sortField: sortFields) {
       bindings.add(sortField);
     }
-    weightsValueSource = expression.getValueSource(bindings);
     
+    weightsValueSource = expression.getValueSource(bindings);
+  }
+  
+  /** 
+   * Creates a new dictionary with the contents of the fields named <code>field</code>
+   * for the terms, <code>payloadField</code> for the corresponding payloads
+   * and uses the <code>weightsValueSource</code> supplied to determine the 
+   * score.
+   */
+  public DocumentExpressionDictionary(IndexReader reader, String field,
+      ValueSource weightsValueSource, String payload) {
+    super(reader, field, null, payload);
+    this.weightsValueSource = weightsValueSource;  
+  }
+  
+  /** 
+   * Creates a new dictionary with the contents of the fields named <code>field</code>
+   * for the terms and uses the <code>weightsValueSource</code> supplied to determine
the 
+   * score.
+   */
+  public DocumentExpressionDictionary(IndexReader reader, String field,
+      ValueSource weightsValueSource) {
+    super(reader, field, null, null);
+    this.weightsValueSource = weightsValueSource;  
   }
   
   @Override
@@ -98,30 +133,36 @@ public class DocumentExpressionDictionar
   final class DocumentExpressionInputIterator extends DocumentDictionary.DocumentInputIterator
{
     
     private FunctionValues currentWeightValues;
-    private int currentLeafIndex = 0;
+    /** leaves of the reader */
     private final List<AtomicReaderContext> leaves;
-    
+    /** starting docIds of all the leaves */
     private final int[] starts;
+    /** current leave index */
+    private int currentLeafIndex = 0;
     
     public DocumentExpressionInputIterator(boolean hasPayloads)
         throws IOException {
       super(hasPayloads);
       leaves = reader.leaves();
-      if (leaves.size() == 0) {
-        throw new IllegalArgumentException("Reader has to have at least one leaf");
-      }
       starts = new int[leaves.size() + 1];
       for (int i = 0; i < leaves.size(); i++) {
         starts[i] = leaves.get(i).docBase;
       }
       starts[leaves.size()] = reader.maxDoc();
-      
-      currentLeafIndex = 0;
-      currentWeightValues = weightsValueSource.getValues(new HashMap<String, Object>(),
leaves.get(currentLeafIndex));
+      currentWeightValues = (leaves.size() > 0) 
+          ? weightsValueSource.getValues(new HashMap<String, Object>(), leaves.get(currentLeafIndex))
+          : null;
     }
     
+    /** 
+     * Returns the weight for the current <code>docId</code> as computed 
+     * by the <code>weightsValueSource</code>
+     * */
     @Override
-    protected long getWeight(int docId) {
+    protected long getWeight(StoredDocument doc, int docId) {    
+      if (currentWeightValues == null) {
+        return 0;
+      }
       int subIndex = ReaderUtil.subIndex(docId, starts);
       if (subIndex != currentLeafIndex) {
         currentLeafIndex = subIndex;

Modified: lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java?rev=1544570&r1=1544569&r2=1544570&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java
(original)
+++ lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java
Fri Nov 22 15:21:31 2013
@@ -1,22 +1,24 @@
 package org.apache.lucene.search.suggest;
 
 import java.io.IOException;
+import java.util.AbstractMap.SimpleEntry;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Random;
 
-import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
+import org.apache.lucene.document.NumericDocValuesField;
 import org.apache.lucene.document.StoredField;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.StorableField;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.spell.Dictionary;
 import org.apache.lucene.search.suggest.DocumentDictionary;
@@ -48,19 +50,73 @@ public class DocumentDictionaryTest exte
   static final String WEIGHT_FIELD_NAME = "w1";
   static final String PAYLOAD_FIELD_NAME = "p1";
   
-  private Map<String, Document> generateIndexDocuments(int ndocs) {
+  /** Returns Pair(list of invalid document terms, Map of document term -> document) */
+  private Map.Entry<List<String>, Map<String, Document>> generateIndexDocuments(int
ndocs, boolean requiresPayload) {
     Map<String, Document> docs = new HashMap<>();
+    List<String> invalidDocTerms = new ArrayList<>();
     for(int i = 0; i < ndocs ; i++) {
-      Field field = new TextField(FIELD_NAME, "field_" + i, Field.Store.YES);
-      Field payload = new StoredField(PAYLOAD_FIELD_NAME, new BytesRef("payload_" + i));
-      Field weight = new StoredField(WEIGHT_FIELD_NAME, 100d + i);
       Document doc = new Document();
-      doc.add(field);
-      doc.add(payload);
-      doc.add(weight);
-      docs.put(field.stringValue(), doc);
+      boolean invalidDoc = false;
+      Field field = null;
+      // usually have valid term field in document
+      if (usually()) {
+        field = new TextField(FIELD_NAME, "field_" + i, Field.Store.YES);
+        doc.add(field);
+      } else {
+        invalidDoc = true;
+      }
+      
+      // even if payload is not required usually have it
+      if (requiresPayload || usually()) {
+        // usually have valid payload field in document
+        if (usually()) {
+          Field payload = new StoredField(PAYLOAD_FIELD_NAME, new BytesRef("payload_" + i));
+          doc.add(payload);
+        } else if (requiresPayload) {
+          invalidDoc = true;
+        }
+      }
+      
+      // usually have valid weight field in document
+      if (usually()) {
+        Field weight = (rarely()) ? 
+            new StoredField(WEIGHT_FIELD_NAME, 100d + i) : 
+            new NumericDocValuesField(WEIGHT_FIELD_NAME, 100 + i);
+        doc.add(weight);
+      }
+      
+      String term = null;
+      if (invalidDoc) {
+        term = (field!=null) ? field.stringValue() : "invalid_" + i;
+        invalidDocTerms.add(term);
+      } else {
+        term = field.stringValue();
+      }
+      
+      docs.put(term, doc);
     }
-    return docs;
+    return new SimpleEntry<List<String>, Map<String, Document>>(invalidDocTerms,
docs);
+  }
+  
+  @Test
+  public void testEmptyReader() throws IOException {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+    iwc.setMergePolicy(newLogMergePolicy());
+    // Make sure the index is created?
+    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
+    writer.commit();
+    writer.close();
+    IndexReader ir = DirectoryReader.open(dir);
+    Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME);
+    InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator();
+
+    assertNull(inputIterator.next());
+    assertEquals(inputIterator.weight(), 0);
+    assertNull(inputIterator.payload());
+    
+    ir.close();
+    dir.close();
   }
   
   @Test
@@ -69,7 +125,9 @@ public class DocumentDictionaryTest exte
     IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
     iwc.setMergePolicy(newLogMergePolicy());
     RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
-    Map<String, Document> docs = generateIndexDocuments(10);
+    Map.Entry<List<String>, Map<String, Document>> res = generateIndexDocuments(atLeast(1000),
true);
+    Map<String, Document> docs = res.getValue();
+    List<String> invalidDocTerms = res.getKey();
     for(Document doc: docs.values()) {
       writer.addDocument(doc);
     }
@@ -77,15 +135,21 @@ public class DocumentDictionaryTest exte
     writer.close();
     IndexReader ir = DirectoryReader.open(dir);
     Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME);
-    InputIterator tfp = (InputIterator) dictionary.getWordsIterator();
+    InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator();
     BytesRef f;
-    while((f = tfp.next())!=null) {
+    while((f = inputIterator.next())!=null) {
       Document doc = docs.remove(f.utf8ToString());
       assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
-      assertEquals(tfp.weight(), doc.getField(WEIGHT_FIELD_NAME).numericValue().longValue());
-      assertTrue(tfp.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue()));
+      Field weightField = doc.getField(WEIGHT_FIELD_NAME);
+      assertEquals(inputIterator.weight(), (weightField != null) ? weightField.numericValue().longValue()
: 0);
+      assertTrue(inputIterator.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue()));
+    }
+    
+    for (String invalidTerm : invalidDocTerms) {
+      assertNotNull(docs.remove(invalidTerm));
     }
     assertTrue(docs.isEmpty());
+    
     ir.close();
     dir.close();
   }
@@ -96,7 +160,9 @@ public class DocumentDictionaryTest exte
     IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
     iwc.setMergePolicy(newLogMergePolicy());
     RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
-    Map<String, Document> docs = generateIndexDocuments(10);
+    Map.Entry<List<String>, Map<String, Document>> res = generateIndexDocuments(atLeast(1000),
false);
+    Map<String, Document> docs = res.getValue();
+    List<String> invalidDocTerms = res.getKey();
     for(Document doc: docs.values()) {
       writer.addDocument(doc);
     }
@@ -104,15 +170,22 @@ public class DocumentDictionaryTest exte
     writer.close();
     IndexReader ir = DirectoryReader.open(dir);
     Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME);
-    InputIterator tfp = (InputIterator) dictionary.getWordsIterator();
+    InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator();
     BytesRef f;
-    while((f = tfp.next())!=null) {
+    while((f = inputIterator.next())!=null) {
       Document doc = docs.remove(f.utf8ToString());
       assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
-      assertEquals(tfp.weight(), doc.getField(WEIGHT_FIELD_NAME).numericValue().longValue());
-      assertEquals(tfp.payload(), null);
+      Field weightField = doc.getField(WEIGHT_FIELD_NAME);
+      assertEquals(inputIterator.weight(), (weightField != null) ? weightField.numericValue().longValue()
: 0);
+      assertEquals(inputIterator.payload(), null);
     }
+    
+    for (String invalidTerm : invalidDocTerms) {
+      assertNotNull(docs.remove(invalidTerm));
+    }
+    
     assertTrue(docs.isEmpty());
+    
     ir.close();
     dir.close();
   }
@@ -123,11 +196,14 @@ public class DocumentDictionaryTest exte
     IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
     iwc.setMergePolicy(newLogMergePolicy());
     RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
-    Map<String, Document> docs = generateIndexDocuments(10);
+    Map.Entry<List<String>, Map<String, Document>> res = generateIndexDocuments(atLeast(1000),
false);
+    Map<String, Document> docs = res.getValue();
+    List<String> invalidDocTerms = res.getKey();
     Random rand = random();
     List<String> termsToDel = new ArrayList<>();
     for(Document doc : docs.values()) {
-      if(rand.nextBoolean()) {
+      StorableField f = doc.getField(FIELD_NAME);
+      if(rand.nextBoolean() && f != null && !invalidDocTerms.contains(f.stringValue()))
{
         termsToDel.add(doc.get(FIELD_NAME));
       }
       writer.addDocument(doc);
@@ -152,15 +228,21 @@ public class DocumentDictionaryTest exte
     IndexReader ir = DirectoryReader.open(dir);
     assertEquals(ir.numDocs(), docs.size());
     Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME);
-    InputIterator tfp = (InputIterator) dictionary.getWordsIterator();
+    InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator();
     BytesRef f;
-    while((f = tfp.next())!=null) {
+    while((f = inputIterator.next())!=null) {
       Document doc = docs.remove(f.utf8ToString());
       assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
-      assertEquals(tfp.weight(), doc.getField(WEIGHT_FIELD_NAME).numericValue().longValue());
-      assertEquals(tfp.payload(), null);
+      Field weightField = doc.getField(WEIGHT_FIELD_NAME);
+      assertEquals(inputIterator.weight(), (weightField != null) ? weightField.numericValue().longValue()
: 0);
+      assertEquals(inputIterator.payload(), null);
+    }
+    
+    for (String invalidTerm : invalidDocTerms) {
+      assertNotNull(docs.remove(invalidTerm));
     }
     assertTrue(docs.isEmpty());
+    
     ir.close();
     dir.close();
   }

Modified: lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentExpressionDictionaryTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentExpressionDictionaryTest.java?rev=1544570&r1=1544569&r2=1544570&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentExpressionDictionaryTest.java
(original)
+++ lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentExpressionDictionaryTest.java
Fri Nov 22 15:21:31 2013
@@ -37,6 +37,7 @@ import org.apache.lucene.index.IndexRead
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.queries.function.valuesource.DoubleConstValueSource;
 import org.apache.lucene.search.SortField;
 import org.apache.lucene.search.spell.Dictionary;
 import org.apache.lucene.store.Directory;
@@ -72,12 +73,37 @@ public class DocumentExpressionDictionar
   }
   
   @Test
+  public void testEmptyReader() throws IOException {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+    iwc.setMergePolicy(newLogMergePolicy());
+    // Make sure the index is created?
+    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
+    writer.commit();
+    writer.close();
+    IndexReader ir = DirectoryReader.open(dir);
+    Set<SortField> sortFields = new HashSet<SortField>(); 
+    sortFields.add(new SortField(WEIGHT_FIELD_NAME_1, SortField.Type.LONG));
+    sortFields.add(new SortField(WEIGHT_FIELD_NAME_2, SortField.Type.LONG));
+    sortFields.add(new SortField(WEIGHT_FIELD_NAME_3, SortField.Type.LONG));
+    Dictionary dictionary = new DocumentExpressionDictionary(ir, FIELD_NAME, "((w1 + w2)
- w3)", sortFields, PAYLOAD_FIELD_NAME);
+    InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator();
+
+    assertNull(inputIterator.next());
+    assertEquals(inputIterator.weight(), 0);
+    assertNull(inputIterator.payload());
+
+    ir.close();
+    dir.close();
+  }
+  
+  @Test
   public void testBasic() throws IOException {
     Directory dir = newDirectory();
     IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
     iwc.setMergePolicy(newLogMergePolicy());
     RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
-    Map<String, Document> docs = generateIndexDocuments(atLeast(10));
+    Map<String, Document> docs = generateIndexDocuments(atLeast(100));
     for(Document doc: docs.values()) {
       writer.addDocument(doc);
     }
@@ -90,16 +116,16 @@ public class DocumentExpressionDictionar
     sortFields.add(new SortField(WEIGHT_FIELD_NAME_2, SortField.Type.LONG));
     sortFields.add(new SortField(WEIGHT_FIELD_NAME_3, SortField.Type.LONG));
     Dictionary dictionary = new DocumentExpressionDictionary(ir, FIELD_NAME, "((w1 + w2)
- w3)", sortFields, PAYLOAD_FIELD_NAME);
-    InputIterator tfp = (InputIterator) dictionary.getWordsIterator();
+    InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator();
     BytesRef f;
-    while((f = tfp.next())!=null) {
+    while((f = inputIterator.next())!=null) {
       Document doc = docs.remove(f.utf8ToString());
       long w1 = doc.getField(WEIGHT_FIELD_NAME_1).numericValue().longValue();
       long w2 = doc.getField(WEIGHT_FIELD_NAME_2).numericValue().longValue();
       long w3 = doc.getField(WEIGHT_FIELD_NAME_3).numericValue().longValue();
       assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
-      assertEquals(tfp.weight(), (w1 + w2) - w3);
-      assertTrue(tfp.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue()));
+      assertEquals(inputIterator.weight(), (w1 + w2) - w3);
+      assertTrue(inputIterator.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue()));
     }
     assertTrue(docs.isEmpty());
     ir.close();
@@ -112,7 +138,7 @@ public class DocumentExpressionDictionar
     IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
     iwc.setMergePolicy(newLogMergePolicy());
     RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
-    Map<String, Document> docs = generateIndexDocuments(atLeast(10));
+    Map<String, Document> docs = generateIndexDocuments(atLeast(100));
     for(Document doc: docs.values()) {
       writer.addDocument(doc);
     }
@@ -125,16 +151,16 @@ public class DocumentExpressionDictionar
     sortFields.add(new SortField(WEIGHT_FIELD_NAME_2, SortField.Type.LONG));
     sortFields.add(new SortField(WEIGHT_FIELD_NAME_3, SortField.Type.LONG));
     Dictionary dictionary = new DocumentExpressionDictionary(ir, FIELD_NAME, "w1 + (0.2 *
w2) - (w3 - w1)/2", sortFields);
-    InputIterator tfp = (InputIterator) dictionary.getWordsIterator();
+    InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator();
     BytesRef f;
-    while((f = tfp.next())!=null) {
+    while((f = inputIterator.next())!=null) {
       Document doc = docs.remove(f.utf8ToString());
       long w1 = doc.getField(WEIGHT_FIELD_NAME_1).numericValue().longValue();
       long w2 = doc.getField(WEIGHT_FIELD_NAME_2).numericValue().longValue();
       long w3 = doc.getField(WEIGHT_FIELD_NAME_3).numericValue().longValue();
       assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
-      assertEquals(tfp.weight(), (long)(w1 + (0.2 * w2) - (w3 - w1)/2));
-      assertEquals(tfp.payload(), null);
+      assertEquals(inputIterator.weight(), (long)(w1 + (0.2 * w2) - (w3 - w1)/2));
+      assertEquals(inputIterator.payload(), null);
     }
     assertTrue(docs.isEmpty());
     ir.close();
@@ -147,7 +173,7 @@ public class DocumentExpressionDictionar
     IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
     iwc.setMergePolicy(newLogMergePolicy());
     RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
-    Map<String, Document> docs = generateIndexDocuments(atLeast(10));
+    Map<String, Document> docs = generateIndexDocuments(atLeast(100));
     Random rand = random();
     List<String> termsToDel = new ArrayList<>();
     for(Document doc : docs.values()) {
@@ -180,15 +206,44 @@ public class DocumentExpressionDictionar
     sortFields.add(new SortField(WEIGHT_FIELD_NAME_1, SortField.Type.LONG));
     sortFields.add(new SortField(WEIGHT_FIELD_NAME_2, SortField.Type.LONG));
     Dictionary dictionary = new DocumentExpressionDictionary(ir, FIELD_NAME, "w2-w1", sortFields,
PAYLOAD_FIELD_NAME);
-    InputIterator tfp = (InputIterator) dictionary.getWordsIterator();
+    InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator();
     BytesRef f;
-    while((f = tfp.next())!=null) {
+    while((f = inputIterator.next())!=null) {
       Document doc = docs.remove(f.utf8ToString());
       long w1 = doc.getField(WEIGHT_FIELD_NAME_1).numericValue().longValue();
       long w2 = doc.getField(WEIGHT_FIELD_NAME_2).numericValue().longValue();
       assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
-      assertEquals(tfp.weight(), w2-w1);
-      assertTrue(tfp.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue()));
+      assertEquals(inputIterator.weight(), w2-w1);
+      assertTrue(inputIterator.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue()));
+    }
+    assertTrue(docs.isEmpty());
+    ir.close();
+    dir.close();
+  }
+  
+  @Test
+  public void testWithValueSource() throws IOException {
+    
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+    iwc.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
+    Map<String, Document> docs = generateIndexDocuments(atLeast(100));
+    for(Document doc: docs.values()) {
+      writer.addDocument(doc);
+    }
+    writer.commit();
+    writer.close();
+
+    IndexReader ir = DirectoryReader.open(dir);
+    Dictionary dictionary = new DocumentExpressionDictionary(ir, FIELD_NAME, new DoubleConstValueSource(10),
PAYLOAD_FIELD_NAME);
+    InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator();
+    BytesRef f;
+    while((f = inputIterator.next())!=null) {
+      Document doc = docs.remove(f.utf8ToString());
+      assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
+      assertEquals(inputIterator.weight(), 10);
+      assertTrue(inputIterator.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue()));
     }
     assertTrue(docs.isEmpty());
     ir.close();



Mime
View raw message