Return-Path: X-Original-To: apmail-lucene-commits-archive@www.apache.org Delivered-To: apmail-lucene-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 648FC11834 for ; Tue, 3 Jun 2014 07:52:18 +0000 (UTC) Received: (qmail 49622 invoked by uid 500); 3 Jun 2014 07:52:18 -0000 Mailing-List: contact commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@lucene.apache.org Delivered-To: mailing list commits@lucene.apache.org Received: (qmail 49615 invoked by uid 99); 3 Jun 2014 07:52:18 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 03 Jun 2014 07:52:18 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 03 Jun 2014 07:52:16 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 6616B238890B; Tue, 3 Jun 2014 07:51:56 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1599442 - in /lucene/dev/trunk: lucene/ lucene/classification/src/java/org/apache/lucene/classification/ lucene/queries/src/java/org/apache/lucene/queries/mlt/ lucene/queries/src/test/org/apache/lucene/queries/mlt/ solr/core/src/java/org/a... Date: Tue, 03 Jun 2014 07:51:56 -0000 To: commits@lucene.apache.org From: simonw@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20140603075156.6616B238890B@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: simonw Date: Tue Jun 3 07:51:55 2014 New Revision: 1599442 URL: http://svn.apache.org/r1599442 Log: LUCENE-5725: MoreLikeThis#like now accetps multiple values per field Modified: lucene/dev/trunk/lucene/CHANGES.txt lucene/dev/trunk/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java lucene/dev/trunk/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java lucene/dev/trunk/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThisQuery.java lucene/dev/trunk/lucene/queries/src/test/org/apache/lucene/queries/mlt/TestMoreLikeThis.java lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/MoreLikeThisHandler.java Modified: lucene/dev/trunk/lucene/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1599442&r1=1599441&r2=1599442&view=diff ============================================================================== --- lucene/dev/trunk/lucene/CHANGES.txt (original) +++ lucene/dev/trunk/lucene/CHANGES.txt Tue Jun 3 07:51:55 2014 @@ -138,6 +138,10 @@ Changes in Backwards Compatibility Polic API Changes +* LUCENE-5725: MoreLikeThis#like now accetps multiple values per field. + The pre-existing method has been deprecated in favor of a variable arguments + for the like text. (Alex Ksikes via Simon Willnauer) + * LUCENE-5711: MergePolicy accepts an IndexWriter instance on each method rather than holding state against a single IndexWriter instance. (Simon Willnauer) Modified: lucene/dev/trunk/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java?rev=1599442&r1=1599441&r2=1599442&view=diff ============================================================================== --- lucene/dev/trunk/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java (original) +++ lucene/dev/trunk/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java Tue Jun 3 07:51:55 2014 @@ -84,7 +84,7 @@ public class KNearestNeighborClassifier } BooleanQuery mltQuery = new BooleanQuery(); for (String textFieldName : textFieldNames) { - mltQuery.add(new BooleanClause(mlt.like(new StringReader(text), textFieldName), BooleanClause.Occur.SHOULD)); + mltQuery.add(new BooleanClause(mlt.like(textFieldName, new StringReader(text)), BooleanClause.Occur.SHOULD)); } Query classFieldQuery = new WildcardQuery(new Term(classFieldName, "*")); mltQuery.add(new BooleanClause(classFieldQuery, BooleanClause.Occur.MUST)); Modified: lucene/dev/trunk/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java?rev=1599442&r1=1599441&r2=1599442&view=diff ============================================================================== --- lucene/dev/trunk/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java (original) +++ lucene/dev/trunk/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java Tue Jun 3 07:51:55 2014 @@ -15,23 +15,21 @@ */ package org.apache.lucene.queries.mlt; -import java.io.*; -import java.util.*; - import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.document.Document; import org.apache.lucene.index.Fields; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.StorableField; import org.apache.lucene.index.StoredDocument; import org.apache.lucene.index.Term; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.search.*; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.similarities.DefaultSimilarity; import org.apache.lucene.search.similarities.TFIDFSimilarity; import org.apache.lucene.util.BytesRef; @@ -39,6 +37,15 @@ import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.PriorityQueue; import org.apache.lucene.util.UnicodeUtil; +import java.io.IOException; +import java.io.Reader; +import java.io.StringReader; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; + /** * Generate "more like this" similarity queries. @@ -581,12 +588,17 @@ public final class MoreLikeThis { } /** - * Return a query that will return docs like the passed Reader. + * Return a query that will return docs like the passed Readers. + * This was added in order to treat multi-value fields. * - * @return a query that will return docs like the passed Reader. + * @return a query that will return docs like the passed Readers. */ - public Query like(Reader r, String fieldName) throws IOException { - return createQuery(retrieveTerms(r, fieldName)); + public Query like(String fieldName, Reader... readers) throws IOException { + Map words = new HashMap<>(); + for (Reader r : readers) { + addTermFrequencies(r, words, fieldName); + } + return createQuery(createQueue(words)); } /** Modified: lucene/dev/trunk/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThisQuery.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThisQuery.java?rev=1599442&r1=1599441&r2=1599442&view=diff ============================================================================== --- lucene/dev/trunk/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThisQuery.java (original) +++ lucene/dev/trunk/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThisQuery.java Tue Jun 3 07:51:55 2014 @@ -26,7 +26,6 @@ import org.apache.lucene.search.BooleanC import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Query; -import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.StringReader; import java.util.Arrays; @@ -71,7 +70,7 @@ public class MoreLikeThisQuery extends Q } mlt.setMaxQueryTerms(maxQueryTerms); mlt.setStopWords(stopWords); - BooleanQuery bq = (BooleanQuery) mlt.like(new StringReader(likeText), fieldName); + BooleanQuery bq = (BooleanQuery) mlt.like(fieldName, new StringReader(likeText)); BooleanClause[] clauses = bq.getClauses(); //make at least half the terms match bq.setMinimumNumberShouldMatch((int) (clauses.length * percentTermsToMatch)); Modified: lucene/dev/trunk/lucene/queries/src/test/org/apache/lucene/queries/mlt/TestMoreLikeThis.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/queries/src/test/org/apache/lucene/queries/mlt/TestMoreLikeThis.java?rev=1599442&r1=1599441&r2=1599442&view=diff ============================================================================== --- lucene/dev/trunk/lucene/queries/src/test/org/apache/lucene/queries/mlt/TestMoreLikeThis.java (original) +++ lucene/dev/trunk/lucene/queries/src/test/org/apache/lucene/queries/mlt/TestMoreLikeThis.java Tue Jun 3 07:51:55 2014 @@ -19,17 +19,18 @@ package org.apache.lucene.queries.mlt; import java.io.IOException; import java.io.StringReader; +import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; -import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; @@ -53,6 +54,8 @@ public class TestMoreLikeThis extends Lu // Add series of docs with specific information for MoreLikeThis addDoc(writer, "lucene"); addDoc(writer, "lucene release"); + addDoc(writer, "apache"); + addDoc(writer, "apache lucene"); reader = writer.getReader(); writer.shutdown(); @@ -88,8 +91,8 @@ public class TestMoreLikeThis extends Lu float boostFactor = 5; mlt.setBoostFactor(boostFactor); - BooleanQuery query = (BooleanQuery) mlt.like(new StringReader( - "lucene release"), "text"); + BooleanQuery query = (BooleanQuery) mlt.like("text", new StringReader( + "lucene release")); List clauses = query.clauses(); assertEquals("Expected " + originalValues.size() + " clauses.", @@ -116,8 +119,8 @@ public class TestMoreLikeThis extends Lu mlt.setMinWordLen(1); mlt.setFieldNames(new String[] {"text"}); mlt.setBoost(true); - BooleanQuery query = (BooleanQuery) mlt.like(new StringReader( - "lucene release"), "text"); + BooleanQuery query = (BooleanQuery) mlt.like("text", new StringReader( + "lucene release")); List clauses = query.clauses(); for (BooleanClause clause : clauses) { @@ -135,9 +138,29 @@ public class TestMoreLikeThis extends Lu mlt.setMinTermFreq(1); mlt.setMinWordLen(1); mlt.setFieldNames(new String[] {"text", "foobar"}); - mlt.like(new StringReader("this is a test"), "foobar"); + mlt.like("foobar", new StringReader("this is a test")); } - + + // LUCENE-5725 + public void testMultiValues() throws Exception { + MoreLikeThis mlt = new MoreLikeThis(reader); + mlt.setAnalyzer(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)); + mlt.setMinDocFreq(1); + mlt.setMinTermFreq(1); + mlt.setMinWordLen(1); + mlt.setFieldNames(new String[] {"text"}); + + BooleanQuery query = (BooleanQuery) mlt.like("text", + new StringReader("lucene"), new StringReader("lucene release"), + new StringReader("apache"), new StringReader("apache lucene")); + List clauses = query.clauses(); + assertEquals("Expected 2 clauses only!", 2, clauses.size()); + for (BooleanClause clause : clauses) { + Term term = ((TermQuery) clause.getQuery()).getTerm(); + assertTrue(Arrays.asList(new Term("text", "lucene"), new Term("text", "apache")).contains(term)); + } + } + // just basic equals/hashcode etc public void testMoreLikeThisQuery() throws Exception { Query query = new MoreLikeThisQuery("this is a test", new String[] { "text" }, new MockAnalyzer(random()), "text"); Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/MoreLikeThisHandler.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/MoreLikeThisHandler.java?rev=1599442&r1=1599441&r2=1599442&view=diff ============================================================================== --- lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/MoreLikeThisHandler.java (original) +++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/MoreLikeThisHandler.java Tue Jun 3 07:51:55 2014 @@ -370,7 +370,7 @@ public class MoreLikeThisHandler extends public DocListAndSet getMoreLikeThis( Reader reader, int start, int rows, List filters, List terms, int flags ) throws IOException { // analyzing with the first field: previous (stupid) behavior - rawMLTQuery = mlt.like(reader, mlt.getFieldNames()[0]); + rawMLTQuery = mlt.like(mlt.getFieldNames()[0], reader); boostedMLTQuery = getBoostedQuery( rawMLTQuery ); if( terms != null ) { fillInterestingTermsFromMLTQuery( boostedMLTQuery, terms );