Return-Path: X-Original-To: apmail-lucene-commits-archive@www.apache.org Delivered-To: apmail-lucene-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id F255A10CC1 for ; Fri, 22 Nov 2013 15:21:54 +0000 (UTC) Received: (qmail 79531 invoked by uid 500); 22 Nov 2013 15:21:54 -0000 Mailing-List: contact commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@lucene.apache.org Delivered-To: mailing list commits@lucene.apache.org Received: (qmail 79524 invoked by uid 99); 22 Nov 2013 15:21:54 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 22 Nov 2013 15:21:54 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 22 Nov 2013 15:21:51 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id B9E9B23889CB; Fri, 22 Nov 2013 15:21:31 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1544570 - in /lucene/dev/trunk/lucene: ./ suggest/ suggest/src/java/org/apache/lucene/search/suggest/ suggest/src/test/org/apache/lucene/search/suggest/ Date: Fri, 22 Nov 2013 15:21:31 -0000 To: commits@lucene.apache.org From: mikemccand@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20131122152131.B9E9B23889CB@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: mikemccand Date: Fri Nov 22 15:21:31 2013 New Revision: 1544570 URL: http://svn.apache.org/r1544570 Log: LUCENE-5329: Document/ExpressionDictionary are now lenient if a doc is missing term/payload/weight Modified: lucene/dev/trunk/lucene/CHANGES.txt lucene/dev/trunk/lucene/suggest/build.xml lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentExpressionDictionary.java lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentExpressionDictionaryTest.java Modified: lucene/dev/trunk/lucene/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1544570&r1=1544569&r2=1544570&view=diff ============================================================================== --- lucene/dev/trunk/lucene/CHANGES.txt (original) +++ lucene/dev/trunk/lucene/CHANGES.txt Fri Nov 22 15:21:31 2013 @@ -68,6 +68,11 @@ New Features * LUCENE-5336: Add SimpleQueryParser: parser for human-entered queries. (Jack Conradson via Robert Muir) +* LUCENE-5329: suggest: DocumentDictionary and + DocumentExpressionDictionary are now lenient for dirty documents + (missing the term, weight or payload). (Areek Zillur via + Mike McCandless) + Build * LUCENE-5217: Maven config: get dependencies from Ant+Ivy config; disable Modified: lucene/dev/trunk/lucene/suggest/build.xml URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/build.xml?rev=1544570&r1=1544569&r2=1544570&view=diff ============================================================================== --- lucene/dev/trunk/lucene/suggest/build.xml (original) +++ lucene/dev/trunk/lucene/suggest/build.xml Fri Nov 22 15:21:31 2013 @@ -42,6 +42,7 @@ + Modified: lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java?rev=1544570&r1=1544569&r2=1544570&view=diff ============================================================================== --- lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java (original) +++ lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java Fri Nov 22 15:21:31 2013 @@ -17,13 +17,13 @@ package org.apache.lucene.search.suggest * limitations under the License. */ import java.io.IOException; -import java.util.Arrays; import java.util.HashSet; -import java.util.List; import java.util.Set; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.MultiDocValues; import org.apache.lucene.index.MultiFields; +import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.StorableField; import org.apache.lucene.index.StoredDocument; import org.apache.lucene.search.spell.Dictionary; @@ -32,14 +32,24 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefIterator; /** + *

* Dictionary with terms, weights and optionally payload information - * taken from stored fields in a Lucene index. - * - * NOTE: + * taken from stored/indexed fields in a Lucene index. + *

+ * NOTE: *
    *
  • - * The term, weight and (optionally) payload fields supplied - * are required for ALL documents and has to be stored + * The term and (optionally) payload fields have to be + * stored + *
  • + *
  • + * The weight field can be stored or can be a {@link NumericDocValues}. + * If the weight field is not defined, the value of the weight is 0 + *
  • + *
  • + * if any of the term or (optionally) payload fields supplied + * do not have a value for a document, then the document is + * skipped by the dictionary *
  • *
*/ @@ -59,10 +69,7 @@ public class DocumentDictionary implemen * the corresponding terms. */ public DocumentDictionary(IndexReader reader, String field, String weightField) { - this.reader = reader; - this.field = field; - this.weightField = weightField; - this.payloadField = null; + this(reader, field, weightField, null); } /** @@ -85,14 +92,16 @@ public class DocumentDictionary implemen /** Implements {@link InputIterator} from stored fields. */ protected class DocumentInputIterator implements InputIterator { + private final int docCount; private final Set relevantFields; private final boolean hasPayloads; private final Bits liveDocs; private int currentDocId = -1; - private long currentWeight; - private BytesRef currentPayload; - private StoredDocument doc; + private long currentWeight = 0; + private BytesRef currentPayload = null; + private final NumericDocValues weightValues; + /** * Creates an iterator over term, weight and payload fields from the lucene @@ -100,11 +109,11 @@ public class DocumentDictionary implemen * over only term and weight. */ public DocumentInputIterator(boolean hasPayloads) throws IOException { - docCount = reader.maxDoc() - 1; this.hasPayloads = hasPayloads; - currentPayload = null; - liveDocs = MultiFields.getLiveDocs(reader); - this.relevantFields = getRelevantFields(new String [] {field, weightField, payloadField}); + docCount = reader.maxDoc() - 1; + weightValues = (weightField != null) ? MultiDocValues.getNumericValues(reader, weightField) : null; + liveDocs = (reader.leaves().size() > 0) ? MultiFields.getLiveDocs(reader) : null; + relevantFields = getRelevantFields(new String [] {field, weightField, payloadField}); } @Override @@ -120,28 +129,29 @@ public class DocumentDictionary implemen continue; } - doc = reader.document(currentDocId, relevantFields); + StoredDocument doc = reader.document(currentDocId, relevantFields); + + BytesRef tempPayload = null; + BytesRef tempTerm = null; if (hasPayloads) { StorableField payload = doc.getField(payloadField); - if (payload == null) { - throw new IllegalArgumentException(payloadField + " does not exist"); - } else if (payload.binaryValue() == null) { - throw new IllegalArgumentException(payloadField + " does not have binary value"); + if (payload == null || (payload.binaryValue() == null && payload.stringValue() == null)) { + continue; } - currentPayload = payload.binaryValue(); + tempPayload = (payload.binaryValue() != null) ? payload.binaryValue() : new BytesRef(payload.stringValue()); } - currentWeight = getWeight(currentDocId); - StorableField fieldVal = doc.getField(field); - if (fieldVal == null) { - throw new IllegalArgumentException(field + " does not exist"); - } else if(fieldVal.stringValue() == null) { - throw new IllegalArgumentException(field + " does not have string value"); + if (fieldVal == null || (fieldVal.binaryValue() == null && fieldVal.stringValue() == null)) { + continue; } + tempTerm = (fieldVal.stringValue() != null) ? new BytesRef(fieldVal.stringValue()) : fieldVal.binaryValue(); - return new BytesRef(fieldVal.stringValue()); + currentPayload = tempPayload; + currentWeight = getWeight(doc, currentDocId); + + return tempTerm; } return null; } @@ -156,15 +166,21 @@ public class DocumentDictionary implemen return hasPayloads; } - /** Return the suggestion weight for this document */ - protected long getWeight(int docId) { + /** + * Returns the value of the weightField for the current document. + * Retrieves the value for the weightField if its stored (using doc) + * or if its indexed as {@link NumericDocValues} (using docId) for the document. + * If no value is found, then the weight is 0. + */ + protected long getWeight(StoredDocument doc, int docId) { StorableField weight = doc.getField(weightField); - if (weight == null) { - throw new IllegalArgumentException(weightField + " does not exist"); - } else if (weight.numericValue() == null) { - throw new IllegalArgumentException(weightField + " does not have numeric value"); + if (weight != null) { // found weight as stored + return (weight.numericValue() != null) ? weight.numericValue().longValue() : 0; + } else if (weightValues != null) { // found weight as NumericDocValue + return weightValues.get(docId); + } else { // fall back + return 0; } - return weight.numericValue().longValue(); } private Set getRelevantFields(String... fields) { Modified: lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentExpressionDictionary.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentExpressionDictionary.java?rev=1544570&r1=1544569&r2=1544570&view=diff ============================================================================== --- lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentExpressionDictionary.java (original) +++ lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentExpressionDictionary.java Fri Nov 22 15:21:31 2013 @@ -30,6 +30,7 @@ import org.apache.lucene.expressions.js. import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.ReaderUtil; +import org.apache.lucene.index.StoredDocument; import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.search.SortField; @@ -37,23 +38,34 @@ import org.apache.lucene.util.BytesRefIt /** + *

* Dictionary with terms and optionally payload information * taken from stored fields in a Lucene index. Similar to * {@link DocumentDictionary}, except it computes the weight * of the terms in a document based on a user-defined expression * having one or more {@link NumericDocValuesField} in the document. - * + *

* NOTE: *
    *
  • - * The term and (optionally) payload fields supplied - * are required for ALL documents and has to be stored + * The term and (optionally) payload fields have to be + * stored + *
  • + *
  • + * if the term or (optionally) payload fields supplied + * do not have a value for a document, then the document is + * rejected by the dictionary + *
  • + *
  • + * All the fields used in weightExpression should + * have values for all documents, if any of the fields do not + * have a value for a document, it will default to 0 *
  • *
*/ public class DocumentExpressionDictionary extends DocumentDictionary { - private ValueSource weightsValueSource; + private final ValueSource weightsValueSource; /** * Creates a new dictionary with the contents of the fields named field @@ -86,8 +98,31 @@ public class DocumentExpressionDictionar for (SortField sortField: sortFields) { bindings.add(sortField); } - weightsValueSource = expression.getValueSource(bindings); + weightsValueSource = expression.getValueSource(bindings); + } + + /** + * Creates a new dictionary with the contents of the fields named field + * for the terms, payloadField for the corresponding payloads + * and uses the weightsValueSource supplied to determine the + * score. + */ + public DocumentExpressionDictionary(IndexReader reader, String field, + ValueSource weightsValueSource, String payload) { + super(reader, field, null, payload); + this.weightsValueSource = weightsValueSource; + } + + /** + * Creates a new dictionary with the contents of the fields named field + * for the terms and uses the weightsValueSource supplied to determine the + * score. + */ + public DocumentExpressionDictionary(IndexReader reader, String field, + ValueSource weightsValueSource) { + super(reader, field, null, null); + this.weightsValueSource = weightsValueSource; } @Override @@ -98,30 +133,36 @@ public class DocumentExpressionDictionar final class DocumentExpressionInputIterator extends DocumentDictionary.DocumentInputIterator { private FunctionValues currentWeightValues; - private int currentLeafIndex = 0; + /** leaves of the reader */ private final List leaves; - + /** starting docIds of all the leaves */ private final int[] starts; + /** current leave index */ + private int currentLeafIndex = 0; public DocumentExpressionInputIterator(boolean hasPayloads) throws IOException { super(hasPayloads); leaves = reader.leaves(); - if (leaves.size() == 0) { - throw new IllegalArgumentException("Reader has to have at least one leaf"); - } starts = new int[leaves.size() + 1]; for (int i = 0; i < leaves.size(); i++) { starts[i] = leaves.get(i).docBase; } starts[leaves.size()] = reader.maxDoc(); - - currentLeafIndex = 0; - currentWeightValues = weightsValueSource.getValues(new HashMap(), leaves.get(currentLeafIndex)); + currentWeightValues = (leaves.size() > 0) + ? weightsValueSource.getValues(new HashMap(), leaves.get(currentLeafIndex)) + : null; } + /** + * Returns the weight for the current docId as computed + * by the weightsValueSource + * */ @Override - protected long getWeight(int docId) { + protected long getWeight(StoredDocument doc, int docId) { + if (currentWeightValues == null) { + return 0; + } int subIndex = ReaderUtil.subIndex(docId, starts); if (subIndex != currentLeafIndex) { currentLeafIndex = subIndex; Modified: lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java?rev=1544570&r1=1544569&r2=1544570&view=diff ============================================================================== --- lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java (original) +++ lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java Fri Nov 22 15:21:31 2013 @@ -1,22 +1,24 @@ package org.apache.lucene.search.suggest; import java.io.IOException; +import java.util.AbstractMap.SimpleEntry; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Random; -import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.StoredField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.StorableField; import org.apache.lucene.index.Term; import org.apache.lucene.search.spell.Dictionary; import org.apache.lucene.search.suggest.DocumentDictionary; @@ -48,19 +50,73 @@ public class DocumentDictionaryTest exte static final String WEIGHT_FIELD_NAME = "w1"; static final String PAYLOAD_FIELD_NAME = "p1"; - private Map generateIndexDocuments(int ndocs) { + /** Returns Pair(list of invalid document terms, Map of document term -> document) */ + private Map.Entry, Map> generateIndexDocuments(int ndocs, boolean requiresPayload) { Map docs = new HashMap<>(); + List invalidDocTerms = new ArrayList<>(); for(int i = 0; i < ndocs ; i++) { - Field field = new TextField(FIELD_NAME, "field_" + i, Field.Store.YES); - Field payload = new StoredField(PAYLOAD_FIELD_NAME, new BytesRef("payload_" + i)); - Field weight = new StoredField(WEIGHT_FIELD_NAME, 100d + i); Document doc = new Document(); - doc.add(field); - doc.add(payload); - doc.add(weight); - docs.put(field.stringValue(), doc); + boolean invalidDoc = false; + Field field = null; + // usually have valid term field in document + if (usually()) { + field = new TextField(FIELD_NAME, "field_" + i, Field.Store.YES); + doc.add(field); + } else { + invalidDoc = true; + } + + // even if payload is not required usually have it + if (requiresPayload || usually()) { + // usually have valid payload field in document + if (usually()) { + Field payload = new StoredField(PAYLOAD_FIELD_NAME, new BytesRef("payload_" + i)); + doc.add(payload); + } else if (requiresPayload) { + invalidDoc = true; + } + } + + // usually have valid weight field in document + if (usually()) { + Field weight = (rarely()) ? + new StoredField(WEIGHT_FIELD_NAME, 100d + i) : + new NumericDocValuesField(WEIGHT_FIELD_NAME, 100 + i); + doc.add(weight); + } + + String term = null; + if (invalidDoc) { + term = (field!=null) ? field.stringValue() : "invalid_" + i; + invalidDocTerms.add(term); + } else { + term = field.stringValue(); + } + + docs.put(term, doc); } - return docs; + return new SimpleEntry, Map>(invalidDocTerms, docs); + } + + @Test + public void testEmptyReader() throws IOException { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); + iwc.setMergePolicy(newLogMergePolicy()); + // Make sure the index is created? + RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); + writer.commit(); + writer.close(); + IndexReader ir = DirectoryReader.open(dir); + Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME); + InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator(); + + assertNull(inputIterator.next()); + assertEquals(inputIterator.weight(), 0); + assertNull(inputIterator.payload()); + + ir.close(); + dir.close(); } @Test @@ -69,7 +125,9 @@ public class DocumentDictionaryTest exte IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); - Map docs = generateIndexDocuments(10); + Map.Entry, Map> res = generateIndexDocuments(atLeast(1000), true); + Map docs = res.getValue(); + List invalidDocTerms = res.getKey(); for(Document doc: docs.values()) { writer.addDocument(doc); } @@ -77,15 +135,21 @@ public class DocumentDictionaryTest exte writer.close(); IndexReader ir = DirectoryReader.open(dir); Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME); - InputIterator tfp = (InputIterator) dictionary.getWordsIterator(); + InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator(); BytesRef f; - while((f = tfp.next())!=null) { + while((f = inputIterator.next())!=null) { Document doc = docs.remove(f.utf8ToString()); assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME)))); - assertEquals(tfp.weight(), doc.getField(WEIGHT_FIELD_NAME).numericValue().longValue()); - assertTrue(tfp.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue())); + Field weightField = doc.getField(WEIGHT_FIELD_NAME); + assertEquals(inputIterator.weight(), (weightField != null) ? weightField.numericValue().longValue() : 0); + assertTrue(inputIterator.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue())); + } + + for (String invalidTerm : invalidDocTerms) { + assertNotNull(docs.remove(invalidTerm)); } assertTrue(docs.isEmpty()); + ir.close(); dir.close(); } @@ -96,7 +160,9 @@ public class DocumentDictionaryTest exte IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); - Map docs = generateIndexDocuments(10); + Map.Entry, Map> res = generateIndexDocuments(atLeast(1000), false); + Map docs = res.getValue(); + List invalidDocTerms = res.getKey(); for(Document doc: docs.values()) { writer.addDocument(doc); } @@ -104,15 +170,22 @@ public class DocumentDictionaryTest exte writer.close(); IndexReader ir = DirectoryReader.open(dir); Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME); - InputIterator tfp = (InputIterator) dictionary.getWordsIterator(); + InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator(); BytesRef f; - while((f = tfp.next())!=null) { + while((f = inputIterator.next())!=null) { Document doc = docs.remove(f.utf8ToString()); assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME)))); - assertEquals(tfp.weight(), doc.getField(WEIGHT_FIELD_NAME).numericValue().longValue()); - assertEquals(tfp.payload(), null); + Field weightField = doc.getField(WEIGHT_FIELD_NAME); + assertEquals(inputIterator.weight(), (weightField != null) ? weightField.numericValue().longValue() : 0); + assertEquals(inputIterator.payload(), null); } + + for (String invalidTerm : invalidDocTerms) { + assertNotNull(docs.remove(invalidTerm)); + } + assertTrue(docs.isEmpty()); + ir.close(); dir.close(); } @@ -123,11 +196,14 @@ public class DocumentDictionaryTest exte IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); - Map docs = generateIndexDocuments(10); + Map.Entry, Map> res = generateIndexDocuments(atLeast(1000), false); + Map docs = res.getValue(); + List invalidDocTerms = res.getKey(); Random rand = random(); List termsToDel = new ArrayList<>(); for(Document doc : docs.values()) { - if(rand.nextBoolean()) { + StorableField f = doc.getField(FIELD_NAME); + if(rand.nextBoolean() && f != null && !invalidDocTerms.contains(f.stringValue())) { termsToDel.add(doc.get(FIELD_NAME)); } writer.addDocument(doc); @@ -152,15 +228,21 @@ public class DocumentDictionaryTest exte IndexReader ir = DirectoryReader.open(dir); assertEquals(ir.numDocs(), docs.size()); Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME); - InputIterator tfp = (InputIterator) dictionary.getWordsIterator(); + InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator(); BytesRef f; - while((f = tfp.next())!=null) { + while((f = inputIterator.next())!=null) { Document doc = docs.remove(f.utf8ToString()); assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME)))); - assertEquals(tfp.weight(), doc.getField(WEIGHT_FIELD_NAME).numericValue().longValue()); - assertEquals(tfp.payload(), null); + Field weightField = doc.getField(WEIGHT_FIELD_NAME); + assertEquals(inputIterator.weight(), (weightField != null) ? weightField.numericValue().longValue() : 0); + assertEquals(inputIterator.payload(), null); + } + + for (String invalidTerm : invalidDocTerms) { + assertNotNull(docs.remove(invalidTerm)); } assertTrue(docs.isEmpty()); + ir.close(); dir.close(); } Modified: lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentExpressionDictionaryTest.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentExpressionDictionaryTest.java?rev=1544570&r1=1544569&r2=1544570&view=diff ============================================================================== --- lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentExpressionDictionaryTest.java (original) +++ lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentExpressionDictionaryTest.java Fri Nov 22 15:21:31 2013 @@ -37,6 +37,7 @@ import org.apache.lucene.index.IndexRead import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; +import org.apache.lucene.queries.function.valuesource.DoubleConstValueSource; import org.apache.lucene.search.SortField; import org.apache.lucene.search.spell.Dictionary; import org.apache.lucene.store.Directory; @@ -72,12 +73,37 @@ public class DocumentExpressionDictionar } @Test + public void testEmptyReader() throws IOException { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); + iwc.setMergePolicy(newLogMergePolicy()); + // Make sure the index is created? + RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); + writer.commit(); + writer.close(); + IndexReader ir = DirectoryReader.open(dir); + Set sortFields = new HashSet(); + sortFields.add(new SortField(WEIGHT_FIELD_NAME_1, SortField.Type.LONG)); + sortFields.add(new SortField(WEIGHT_FIELD_NAME_2, SortField.Type.LONG)); + sortFields.add(new SortField(WEIGHT_FIELD_NAME_3, SortField.Type.LONG)); + Dictionary dictionary = new DocumentExpressionDictionary(ir, FIELD_NAME, "((w1 + w2) - w3)", sortFields, PAYLOAD_FIELD_NAME); + InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator(); + + assertNull(inputIterator.next()); + assertEquals(inputIterator.weight(), 0); + assertNull(inputIterator.payload()); + + ir.close(); + dir.close(); + } + + @Test public void testBasic() throws IOException { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); - Map docs = generateIndexDocuments(atLeast(10)); + Map docs = generateIndexDocuments(atLeast(100)); for(Document doc: docs.values()) { writer.addDocument(doc); } @@ -90,16 +116,16 @@ public class DocumentExpressionDictionar sortFields.add(new SortField(WEIGHT_FIELD_NAME_2, SortField.Type.LONG)); sortFields.add(new SortField(WEIGHT_FIELD_NAME_3, SortField.Type.LONG)); Dictionary dictionary = new DocumentExpressionDictionary(ir, FIELD_NAME, "((w1 + w2) - w3)", sortFields, PAYLOAD_FIELD_NAME); - InputIterator tfp = (InputIterator) dictionary.getWordsIterator(); + InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator(); BytesRef f; - while((f = tfp.next())!=null) { + while((f = inputIterator.next())!=null) { Document doc = docs.remove(f.utf8ToString()); long w1 = doc.getField(WEIGHT_FIELD_NAME_1).numericValue().longValue(); long w2 = doc.getField(WEIGHT_FIELD_NAME_2).numericValue().longValue(); long w3 = doc.getField(WEIGHT_FIELD_NAME_3).numericValue().longValue(); assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME)))); - assertEquals(tfp.weight(), (w1 + w2) - w3); - assertTrue(tfp.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue())); + assertEquals(inputIterator.weight(), (w1 + w2) - w3); + assertTrue(inputIterator.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue())); } assertTrue(docs.isEmpty()); ir.close(); @@ -112,7 +138,7 @@ public class DocumentExpressionDictionar IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); - Map docs = generateIndexDocuments(atLeast(10)); + Map docs = generateIndexDocuments(atLeast(100)); for(Document doc: docs.values()) { writer.addDocument(doc); } @@ -125,16 +151,16 @@ public class DocumentExpressionDictionar sortFields.add(new SortField(WEIGHT_FIELD_NAME_2, SortField.Type.LONG)); sortFields.add(new SortField(WEIGHT_FIELD_NAME_3, SortField.Type.LONG)); Dictionary dictionary = new DocumentExpressionDictionary(ir, FIELD_NAME, "w1 + (0.2 * w2) - (w3 - w1)/2", sortFields); - InputIterator tfp = (InputIterator) dictionary.getWordsIterator(); + InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator(); BytesRef f; - while((f = tfp.next())!=null) { + while((f = inputIterator.next())!=null) { Document doc = docs.remove(f.utf8ToString()); long w1 = doc.getField(WEIGHT_FIELD_NAME_1).numericValue().longValue(); long w2 = doc.getField(WEIGHT_FIELD_NAME_2).numericValue().longValue(); long w3 = doc.getField(WEIGHT_FIELD_NAME_3).numericValue().longValue(); assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME)))); - assertEquals(tfp.weight(), (long)(w1 + (0.2 * w2) - (w3 - w1)/2)); - assertEquals(tfp.payload(), null); + assertEquals(inputIterator.weight(), (long)(w1 + (0.2 * w2) - (w3 - w1)/2)); + assertEquals(inputIterator.payload(), null); } assertTrue(docs.isEmpty()); ir.close(); @@ -147,7 +173,7 @@ public class DocumentExpressionDictionar IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); - Map docs = generateIndexDocuments(atLeast(10)); + Map docs = generateIndexDocuments(atLeast(100)); Random rand = random(); List termsToDel = new ArrayList<>(); for(Document doc : docs.values()) { @@ -180,15 +206,44 @@ public class DocumentExpressionDictionar sortFields.add(new SortField(WEIGHT_FIELD_NAME_1, SortField.Type.LONG)); sortFields.add(new SortField(WEIGHT_FIELD_NAME_2, SortField.Type.LONG)); Dictionary dictionary = new DocumentExpressionDictionary(ir, FIELD_NAME, "w2-w1", sortFields, PAYLOAD_FIELD_NAME); - InputIterator tfp = (InputIterator) dictionary.getWordsIterator(); + InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator(); BytesRef f; - while((f = tfp.next())!=null) { + while((f = inputIterator.next())!=null) { Document doc = docs.remove(f.utf8ToString()); long w1 = doc.getField(WEIGHT_FIELD_NAME_1).numericValue().longValue(); long w2 = doc.getField(WEIGHT_FIELD_NAME_2).numericValue().longValue(); assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME)))); - assertEquals(tfp.weight(), w2-w1); - assertTrue(tfp.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue())); + assertEquals(inputIterator.weight(), w2-w1); + assertTrue(inputIterator.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue())); + } + assertTrue(docs.isEmpty()); + ir.close(); + dir.close(); + } + + @Test + public void testWithValueSource() throws IOException { + + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); + iwc.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); + Map docs = generateIndexDocuments(atLeast(100)); + for(Document doc: docs.values()) { + writer.addDocument(doc); + } + writer.commit(); + writer.close(); + + IndexReader ir = DirectoryReader.open(dir); + Dictionary dictionary = new DocumentExpressionDictionary(ir, FIELD_NAME, new DoubleConstValueSource(10), PAYLOAD_FIELD_NAME); + InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator(); + BytesRef f; + while((f = inputIterator.next())!=null) { + Document doc = docs.remove(f.utf8ToString()); + assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME)))); + assertEquals(inputIterator.weight(), 10); + assertTrue(inputIterator.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue())); } assertTrue(docs.isEmpty()); ir.close();