lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ans...@apache.org
Subject lucene-solr:branch_6x: SOLR-9644: Fixed SimpleMLTQParser and CloudMLTQParser to handle boosts properly and CloudMLTQParser to only extract actual values from IndexableField type fields to the filtered document.
Date Mon, 09 Jan 2017 22:48:44 GMT
Repository: lucene-solr
Updated Branches:
  refs/heads/branch_6x af2ac8376 -> dcb836500


SOLR-9644: Fixed SimpleMLTQParser and CloudMLTQParser to handle boosts properly and CloudMLTQParser
to only extract actual values from IndexableField type fields to the filtered document.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/dcb83650
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/dcb83650
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/dcb83650

Branch: refs/heads/branch_6x
Commit: dcb836500a8d5f8dd0d59264ad0061e5a2926c20
Parents: af2ac83
Author: anshum <anshum@apache.org>
Authored: Mon Jan 9 13:05:21 2017 -0800
Committer: anshum <anshum@apache.org>
Committed: Mon Jan 9 14:47:32 2017 -0800

----------------------------------------------------------------------
 solr/CHANGES.txt                                |  4 ++
 .../apache/solr/search/mlt/CloudMLTQParser.java | 49 ++++++++++++--------
 .../solr/search/mlt/SimpleMLTQParser.java       | 30 ++++++------
 .../solr/search/mlt/CloudMLTQParserTest.java    | 23 ++++++++-
 .../solr/search/mlt/SimpleMLTQParserTest.java   | 34 ++++++++++++--
 5 files changed, 103 insertions(+), 37 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/dcb83650/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 66ddc85..33083b49 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -287,6 +287,10 @@ Bug Fixes
 * SOLR-9883: Example schemaless solr config files can lead to invalid tlog replays: when
updates are buffered,
   update processors ordered before DistributedUpdateProcessor, e.g. field normalization,
are never run. (Steve Rowe)
 
+* SOLR-9644: SimpleMLTQParser and CloudMLTQParser did not handle field boosts properly
+  and CloudMLTQParser included extra strings from the field definitions in the query.
+  (Ere Maijala via Anshum Gupta)
+
 Other Changes
 ----------------------
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/dcb83650/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java b/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java
index 42b5d20..074b275 100644
--- a/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java
+++ b/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java
@@ -22,6 +22,7 @@ import java.util.HashMap;
 import java.util.Map;
 import java.util.regex.Pattern;
 
+import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.queries.mlt.MoreLikeThis;
 import org.apache.lucene.search.BooleanClause;
@@ -64,73 +65,83 @@ public class CloudMLTQParser extends QParser {
           SolrException.ErrorCode.BAD_REQUEST, "Error completing MLT request. Could not fetch
" +
           "document with id [" + id + "]");
     }
-    
+
     String[] qf = localParams.getParams("qf");
     Map<String,Float> boostFields = new HashMap<>();
     MoreLikeThis mlt = new MoreLikeThis(req.getSearcher().getIndexReader());
-    
-    mlt.setMinTermFreq(localParams.getInt("mintf", MoreLikeThis.DEFAULT_MIN_TERM_FREQ));
 
+    mlt.setMinTermFreq(localParams.getInt("mintf", MoreLikeThis.DEFAULT_MIN_TERM_FREQ));
     mlt.setMinDocFreq(localParams.getInt("mindf", 0));
-
     mlt.setMinWordLen(localParams.getInt("minwl", MoreLikeThis.DEFAULT_MIN_WORD_LENGTH));
-
     mlt.setMaxWordLen(localParams.getInt("maxwl", MoreLikeThis.DEFAULT_MAX_WORD_LENGTH));
-
     mlt.setMaxQueryTerms(localParams.getInt("maxqt", MoreLikeThis.DEFAULT_MAX_QUERY_TERMS));
-
     mlt.setMaxNumTokensParsed(localParams.getInt("maxntp", MoreLikeThis.DEFAULT_MAX_NUM_TOKENS_PARSED));
-    
     mlt.setMaxDocFreq(localParams.getInt("maxdf", MoreLikeThis.DEFAULT_MAX_DOC_FREQ));
 
-    if(localParams.get("boost") != null) {
-      mlt.setBoost(localParams.getBool("boost"));
-      boostFields = SolrPluginUtils.parseFieldBoosts(qf);
-    }
+    Boolean boost = localParams.getBool("boost", MoreLikeThis.DEFAULT_BOOST);
+    mlt.setBoost(boost);
 
     mlt.setAnalyzer(req.getSchema().getIndexAnalyzer());
 
     Map<String, Collection<Object>> filteredDocument = new HashMap<>();
-    ArrayList<String> fieldNames = new ArrayList<>();
+    String[] fieldNames;
 
     if (qf != null) {
+      ArrayList<String> fields = new ArrayList();
       for (String fieldName : qf) {
         if (!StringUtils.isEmpty(fieldName))  {
           String[] strings = splitList.split(fieldName);
           for (String string : strings) {
             if (!StringUtils.isEmpty(string)) {
-              fieldNames.add(string);
+              fields.add(string);
             }
           }
         }
       }
+      // Parse field names and boosts from the fields
+      boostFields = SolrPluginUtils.parseFieldBoosts(fields.toArray(new String[0]));
+      fieldNames = boostFields.keySet().toArray(new String[0]);
     } else {
+      ArrayList<String> fields = new ArrayList();
       for (String field : doc.getFieldNames()) {
         // Only use fields that are stored and have an explicit analyzer.
         // This makes sense as the query uses tf/idf/.. for query construction.
         // We might want to relook and change this in the future though.
         SchemaField f = req.getSchema().getFieldOrNull(field);
         if (f != null && f.stored() && f.getType().isExplicitAnalyzer())
{
-          fieldNames.add(field);
+          fields.add(field);
         }
       }
+      fieldNames = fields.toArray(new String[0]);
     }
 
-    if( fieldNames.size() < 1 ) {
+    if (fieldNames.length < 1) {
       throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,
           "MoreLikeThis requires at least one similarity field: qf" );
     }
 
-    mlt.setFieldNames(fieldNames.toArray(new String[fieldNames.size()]));
+    mlt.setFieldNames(fieldNames);
     for (String field : fieldNames) {
-      filteredDocument.put(field, doc.getFieldValues(field));
+      Collection<Object> fieldValues = doc.getFieldValues(field);
+      if (fieldValues != null) {
+        Collection<Object> values = new ArrayList();
+        for (Object val : fieldValues) {
+          if (val instanceof IndexableField) {
+            values.add(((IndexableField)val).stringValue());
+          }
+          else {
+            values.add(val);
+          }
+        }
+        filteredDocument.put(field, values);
+      }
     }
 
     try {
       Query rawMLTQuery = mlt.like(filteredDocument);
       BooleanQuery boostedMLTQuery = (BooleanQuery) rawMLTQuery;
 
-      if (boostFields.size() > 0) {
+      if (boost && boostFields.size() > 0) {
         BooleanQuery.Builder newQ = new BooleanQuery.Builder();
         newQ.setDisableCoord(boostedMLTQuery.isCoordDisabled());
         newQ.setMinimumNumberShouldMatch(boostedMLTQuery.getMinimumNumberShouldMatch());

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/dcb83650/solr/core/src/java/org/apache/solr/search/mlt/SimpleMLTQParser.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/mlt/SimpleMLTQParser.java b/solr/core/src/java/org/apache/solr/search/mlt/SimpleMLTQParser.java
index d4403ad..d31fcc1 100644
--- a/solr/core/src/java/org/apache/solr/search/mlt/SimpleMLTQParser.java
+++ b/solr/core/src/java/org/apache/solr/search/mlt/SimpleMLTQParser.java
@@ -76,16 +76,13 @@ public class SimpleMLTQParser extends QParser {
       mlt.setMaxQueryTerms(localParams.getInt("maxqt", MoreLikeThis.DEFAULT_MAX_QUERY_TERMS));
       mlt.setMaxNumTokensParsed(localParams.getInt("maxntp", MoreLikeThis.DEFAULT_MAX_NUM_TOKENS_PARSED));
       mlt.setMaxDocFreq(localParams.getInt("maxdf", MoreLikeThis.DEFAULT_MAX_DOC_FREQ));
+      Boolean boost = localParams.getBool("boost", false);
+      mlt.setBoost(boost);
 
-      // what happens if value is explicitly set to false?
-      if(localParams.get("boost") != null) {
-        mlt.setBoost(localParams.getBool("boost", false));
-        boostFields = SolrPluginUtils.parseFieldBoosts(qf);
-      }
+      String[] fieldNames;
       
-      ArrayList<String> fields = new ArrayList<>();
-
       if (qf != null) {
+        ArrayList<String> fields = new ArrayList<>();
         for (String fieldName : qf) {
           if (!StringUtils.isEmpty(fieldName))  {
             String[] strings = splitList.split(fieldName);
@@ -96,26 +93,31 @@ public class SimpleMLTQParser extends QParser {
             }
           }
         }
+        // Parse field names and boosts from the fields
+        boostFields = SolrPluginUtils.parseFieldBoosts(fields.toArray(new String[0]));
+        fieldNames = boostFields.keySet().toArray(new String[0]);
       } else {
-        Map<String, SchemaField> fieldNames = req.getSearcher().getSchema().getFields();
-        for (String fieldName : fieldNames.keySet()) {
-          if (fieldNames.get(fieldName).indexed() && fieldNames.get(fieldName).stored())
-            if (fieldNames.get(fieldName).getType().getNumericType() == null)
+        Map<String, SchemaField> fieldDefinitions = req.getSearcher().getSchema().getFields();
+        ArrayList<String> fields = new ArrayList();
+        for (String fieldName : fieldDefinitions.keySet()) {
+          if (fieldDefinitions.get(fieldName).indexed() && fieldDefinitions.get(fieldName).stored())
+            if (fieldDefinitions.get(fieldName).getType().getNumericType() == null)
               fields.add(fieldName);
         }
+        fieldNames = fields.toArray(new String[0]);
       }
-      if( fields.size() < 1 ) {
+      if (fieldNames.length < 1) {
         throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,
             "MoreLikeThis requires at least one similarity field: qf" );
       }
 
-      mlt.setFieldNames(fields.toArray(new String[fields.size()]));
+      mlt.setFieldNames(fieldNames);
       mlt.setAnalyzer(req.getSchema().getIndexAnalyzer());
 
       Query rawMLTQuery = mlt.like(scoreDocs[0].doc);
       BooleanQuery boostedMLTQuery = (BooleanQuery) rawMLTQuery;
 
-      if (boostFields.size() > 0) {
+      if (boost && boostFields.size() > 0) {
         BooleanQuery.Builder newQ = new BooleanQuery.Builder();
         newQ.setDisableCoord(boostedMLTQuery.isCoordDisabled());
         newQ.setMinimumNumberShouldMatch(boostedMLTQuery.getMinimumNumberShouldMatch());

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/dcb83650/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java b/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java
index e3925d1..c00cc71 100644
--- a/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java
+++ b/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java
@@ -121,6 +121,27 @@ public class CloudMLTQParserTest extends SolrCloudTestCase {
     }
     assertArrayEquals(expectedIds, actualIds);
 
+    queryResponse = cluster.getSolrClient().query(COLLECTION, new SolrQuery("{!mlt qf=lowerfilt_u^10,lowerfilt1_u^1000
boost=false mintf=0 mindf=0}30"));
+    solrDocuments = queryResponse.getResults();
+    expectedIds = new int[]{13, 14, 20, 22, 31, 32, 18, 23, 15, 16};
+    actualIds = new int[solrDocuments.size()];
+    i = 0;
+    for (SolrDocument solrDocument : solrDocuments) {
+      actualIds[i++] = Integer.valueOf(String.valueOf(solrDocument.getFieldValue("id")));
+    }
+    System.out.println("Actual Array1::::: " + Arrays.toString(actualIds));
+    assertArrayEquals(expectedIds, actualIds);
+
+    queryResponse = cluster.getSolrClient().query(COLLECTION, new SolrQuery("{!mlt qf=lowerfilt_u^10,lowerfilt1_u^1000
boost=true mintf=0 mindf=0}30"));
+    solrDocuments = queryResponse.getResults();
+    expectedIds = new int[]{32, 31, 29, 13, 14, 20, 22, 15, 16, 24};
+    actualIds = new int[solrDocuments.size()];
+    i = 0;
+    for (SolrDocument solrDocument : solrDocuments) {
+      actualIds[i++] = Integer.valueOf(String.valueOf(solrDocument.getFieldValue("id")));
+    }
+    System.out.println("Actual Array2::::: " + Arrays.toString(actualIds));
+    assertArrayEquals(expectedIds, actualIds);
   }
 
   @Test
@@ -220,7 +241,7 @@ public class CloudMLTQParserTest extends SolrCloudTestCase {
     }
     assertArrayEquals(expectedIds, actualIds);
   }
-  
+
   public void testInvalidSourceDocument() throws IOException {
     SolrException e = expectThrows(SolrException.class, () -> {
       cluster.getSolrClient().query(COLLECTION, new SolrQuery("{!mlt qf=lowerfilt_u}999999"));

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/dcb83650/solr/core/src/test/org/apache/solr/search/mlt/SimpleMLTQParserTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/search/mlt/SimpleMLTQParserTest.java b/solr/core/src/test/org/apache/solr/search/mlt/SimpleMLTQParserTest.java
index 6f3570f..073e96d 100644
--- a/solr/core/src/test/org/apache/solr/search/mlt/SimpleMLTQParserTest.java
+++ b/solr/core/src/test/org/apache/solr/search/mlt/SimpleMLTQParserTest.java
@@ -108,8 +108,38 @@ public class SimpleMLTQParserTest extends SolrTestCaseJ4 {
     );
 
     params = new ModifiableSolrParams();
+    params.set(CommonParams.Q, "{!mlt qf=lowerfilt,lowerfilt1^1000 boost=false mintf=0 mindf=0}30");
+    assertQ(req(params),
+        "//result/doc[1]/int[@name='id'][.='31']",
+        "//result/doc[2]/int[@name='id'][.='13']",
+        "//result/doc[3]/int[@name='id'][.='14']",
+        "//result/doc[4]/int[@name='id'][.='18']",
+        "//result/doc[5]/int[@name='id'][.='20']",
+        "//result/doc[6]/int[@name='id'][.='22']",
+        "//result/doc[7]/int[@name='id'][.='23']",
+        "//result/doc[8]/int[@name='id'][.='32']",
+        "//result/doc[9]/int[@name='id'][.='15']",
+        "//result/doc[10]/int[@name='id'][.='16']"
+    );
+
+    params = new ModifiableSolrParams();
+    params.set(CommonParams.Q, "{!mlt qf=lowerfilt,lowerfilt1^1000 boost=true mintf=0 mindf=0}30");
+    params.set(CommonParams.FL, "id");
+    assertQ(req(params),
+        "//result/doc[1]/int[@name='id'][.='31']",
+        "//result/doc[2]/int[@name='id'][.='32']",
+        "//result/doc[3]/int[@name='id'][.='29']",
+        "//result/doc[4]/int[@name='id'][.='13']",
+        "//result/doc[5]/int[@name='id'][.='14']",
+        "//result/doc[6]/int[@name='id'][.='18']",
+        "//result/doc[7]/int[@name='id'][.='20']",
+        "//result/doc[8]/int[@name='id'][.='22']",
+        "//result/doc[9]/int[@name='id'][.='23']",
+        "//result/doc[10]/int[@name='id'][.='15']"
+    );
+
+    params = new ModifiableSolrParams();
     params.set(CommonParams.Q, "{!mlt qf=lowerfilt mindf=0 mintf=1}26");
-    params.set(CommonParams.DEBUG, "true");
     assertQ(req(params),
         "//result/doc[1]/int[@name='id'][.='29']",
         "//result/doc[2]/int[@name='id'][.='27']",
@@ -118,14 +148,12 @@ public class SimpleMLTQParserTest extends SolrTestCaseJ4 {
 
     params = new ModifiableSolrParams();
     params.set(CommonParams.Q, "{!mlt qf=lowerfilt mindf=10 mintf=1}26");
-    params.set(CommonParams.DEBUG, "true");
     assertQ(req(params),
         "//result[@numFound='0']"
     );
 
     params = new ModifiableSolrParams();
     params.set(CommonParams.Q, "{!mlt qf=lowerfilt minwl=3 mintf=1 mindf=1}26");
-    params.set(CommonParams.DEBUG, "true");
     assertQ(req(params),
         "//result[@numFound='3']"
     );


Mime
View raw message