lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From m..@apache.org
Subject svn commit: r1171970 [3/3] - in /lucene/dev/trunk: lucene/src/java/org/apache/lucene/search/ modules/grouping/src/java/org/apache/lucene/search/grouping/ solr/ solr/core/src/java/org/apache/solr/handler/component/ solr/core/src/java/org/apache/solr/sea...
Date Sat, 17 Sep 2011 12:48:28 GMT
Copied: lucene/dev/trunk/solr/core/src/test/org/apache/solr/TestDistributedGrouping.java (from
r1167477, lucene/dev/trunk/solr/core/src/test/org/apache/solr/TestDistributedSearch.java)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/TestDistributedGrouping.java?p2=lucene/dev/trunk/solr/core/src/test/org/apache/solr/TestDistributedGrouping.java&p1=lucene/dev/trunk/solr/core/src/test/org/apache/solr/TestDistributedSearch.java&r1=1167477&r2=1171970&rev=1171970&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/TestDistributedSearch.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/TestDistributedGrouping.java Sat Sep
17 12:48:27 2011
@@ -1,4 +1,6 @@
-/**
+package org.apache.solr;
+
+/*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
@@ -15,45 +17,47 @@
  * limitations under the License.
  */
 
-package org.apache.solr;
-
+import org.apache.solr.client.solrj.SolrServerException;
 import org.apache.solr.common.params.CommonParams;
+import org.apache.solr.common.params.ModifiableSolrParams;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
 
 /**
  * TODO? perhaps use:
  *  http://docs.codehaus.org/display/JETTY/ServletTester
  * rather then open a real connection?
  *
- *
- * @since solr 1.3
+ * @since solr 4.0
  */
-public class TestDistributedSearch extends BaseDistributedSearchTestCase {
+public class TestDistributedGrouping extends BaseDistributedSearchTestCase {
 
   String t1="a_t";
   String i1="a_si";
-  String nint = "n_i";
-  String tint = "n_ti";
-  String nfloat = "n_f";
-  String tfloat = "n_tf";
-  String ndouble = "n_d";
-  String tdouble = "n_td";
-  String nlong = "n_l";
+  String s1="a_s";
   String tlong = "other_tl1";
-  String ndate = "n_dt";
   String tdate_a = "a_n_tdt";
   String tdate_b = "b_n_tdt";
-  
   String oddField="oddField_s";
-  String missingField="ignore_exception__missing_but_valid_field_t";
-  String invalidField="ignore_exception__invalid_field_not_in_schema";
 
-  @Override
   public void doTest() throws Exception {
-    int backupStress = stress; // make a copy so we can restore
+    del("*:*");
+    commit();
 
+    handle.clear();
+    handle.put("QTime", SKIPVAL);
+    handle.put("timestamp", SKIPVAL);
+    // Test distributed grouping with empty indices
+    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1,
"group.limit", 10, "sort", i1 + " asc, id asc");
+    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1,
"group.limit", 10, "sort", i1 + " asc, id asc", "hl","true","hl.fl",t1);
+    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1,
"group.limit", 10, "sort", i1 + " asc, id asc", "facet", "true", "facet.field", t1);
+    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1,
"group.limit", 10, "sort", i1 + " asc, id asc", "stats", "true", "stats.field", i1);
+    query("q", "kings", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1,
"group.limit", 10, "sort", i1 + " asc, id asc", "spellcheck", "true", "spellcheck.build",
"true", "qt", "spellCheckCompRH");
 
-    del("*:*");
-    indexr(id,1, i1, 100, tlong, 100,t1,"now is the time for all good men", 
+    indexr(id,1, i1, 100, tlong, 100,t1,"now is the time for all good men",
            tdate_a, "2010-04-20T11:00:00Z",
            tdate_b, "2009-08-20T11:00:00Z",
            "foo_f", 1.414f, "foo_b", "true", "foo_d", 1.414d);
@@ -97,188 +101,52 @@ public class TestDistributedSearch exten
       indexr(id, i);      
     }
 
-    commit();
-
-    handle.clear();
-    handle.put("QTime", SKIPVAL);
-    handle.put("timestamp", SKIPVAL);
-
-    // random value sort
-    for (String f : fieldNames) {
-      query("q","*:*", "sort",f+" desc");
-      query("q","*:*", "sort",f+" asc");
+    int[] values = new int[]{9999, 99999, 999999, 9999999};
+    for (int shard = 0; shard < clients.size(); shard++) {
+      int groupValue = values[shard];
+      for (int i = 500; i < 600; i++) {
+        index_specific(shard, i1, groupValue, s1, "a", id, i * (shard + 1));
+      }
     }
 
-    // these queries should be exactly ordered and scores should exactly match
-    query("q","*:*", "sort",i1+" desc");
-    query("q","*:*", "sort","{!func}add("+i1+",5)"+" desc");
-    query("q","*:*", "sort",i1+" asc");
-    query("q","*:*", "sort",i1+" desc", "fl","*,score");
-    query("q","*:*", "sort","n_tl1 asc", "fl","score");  // test legacy behavior - "score"=="*,score"
-    query("q","*:*", "sort","n_tl1 desc");
-    handle.put("maxScore", SKIPVAL);
-    query("q","{!func}"+i1);// does not expect maxScore. So if it comes ,ignore it. JavaBinCodec.writeSolrDocumentList()
-    //is agnostic of request params.
-    handle.remove("maxScore");
-    query("q","{!func}"+i1, "fl","*,score");  // even scores should match exactly here
-
-    handle.put("highlighting", UNORDERED);
-    handle.put("response", UNORDERED);
-
-    handle.put("maxScore", SKIPVAL);
-    query("q","quick");
-    query("q","all","fl","id","start","0");
-    query("q","all","fl","foofoofoo","start","0");  // no fields in returned docs
-    query("q","all","fl","id","start","100");
-
-    handle.put("score", SKIPVAL);
-    query("q","quick","fl","*,score");
-    query("q","all","fl","*,score","start","1");
-    query("q","all","fl","*,score","start","100");
-
-    query("q","now their fox sat had put","fl","*,score",
-            "hl","true","hl.fl",t1);
-
-    query("q","now their fox sat had put","fl","foofoofoo",
-            "hl","true","hl.fl",t1);
-
-    query("q","matchesnothing","fl","*,score");  
-
-    // test that a single NOW value is propagated to all shards... if that is true
-    // then the primary sort should always be a tie and then the secondary should always
decide
-    query("q","{!func}ms(NOW)", "sort","score desc,"+i1+" desc","fl","id");    
-
-    query("q","*:*", "rows",0, "facet","true", "facet.field",t1);
-    query("q","*:*", "rows",0, "facet","true", "facet.field",t1,"facet.limit",1);
-    query("q","*:*", "rows",0, "facet","true", "facet.query","quick", "facet.query","all",
"facet.query","*:*");
-    query("q","*:*", "rows",0, "facet","true", "facet.field",t1, "facet.mincount",2);
-
-    // simple date facet on one field
-    query("q","*:*", "rows",100, "facet","true", 
-          "facet.date",tdate_a, 
-          "facet.date.other", "all", 
-          "facet.date.start","2010-05-01T11:00:00Z", 
-          "facet.date.gap","+1DAY", 
-          "facet.date.end","2010-05-20T11:00:00Z");
-
-    // date facet on multiple fields
-    query("q","*:*", "rows",100, "facet","true", 
-          "facet.date",tdate_a, 
-          "facet.date",tdate_b, 
-          "facet.date.other", "all", 
-          "f."+tdate_b+".facet.date.start","2009-05-01T11:00:00Z", 
-          "f."+tdate_b+".facet.date.gap","+3MONTHS", 
-          "facet.date.start","2010-05-01T11:00:00Z", 
-          "facet.date.gap","+1DAY", 
-          "facet.date.end","2010-05-20T11:00:00Z");
-
-    // simple range facet on one field
-    query("q","*:*", "rows",100, "facet","true", 
-          "facet.range",tlong, 
-          "facet.range.start",200, 
-          "facet.range.gap",100, 
-          "facet.range.end",900);
-
-    // range facet on multiple fields
-    query("q","*:*", "rows",100, "facet","true", 
-          "facet.range",tlong, 
-          "facet.range",i1, 
-          "f."+i1+".facet.range.start",300, 
-          "f."+i1+".facet.range.gap",87, 
-          "facet.range.end",900,
-          "facet.range.start",200, 
-          "facet.range.gap",100, 
-          "f."+tlong+".facet.range.end",900);
-
-    stress=0;  // turn off stress... we want to tex max combos in min time
-    for (int i=0; i<25*RANDOM_MULTIPLIER; i++) {
-      String f = fieldNames[random.nextInt(fieldNames.length)];
-      if (random.nextBoolean()) f = t1;  // the text field is a really interesting one to
facet on (and it's multi-valued too)
-
-      // we want a random query and not just *:* so we'll get zero counts in facets also
-      // TODO: do a better random query
-      String q = random.nextBoolean() ? "*:*" : "id:(1 3 5 7 9 11 13) OR id:[100 TO " + random.nextInt(50)
+ "]";
-
-      int nolimit = random.nextBoolean() ? -1 : 10000;  // these should be equivalent
-
-      // if limit==-1, we should always get exact matches
-      query("q",q, "rows",0, "facet","true", "facet.field",f, "facet.limit",nolimit, "facet.sort","count",
"facet.mincount",random.nextInt(5), "facet.offset",random.nextInt(10));
-      query("q",q, "rows",0, "facet","true", "facet.field",f, "facet.limit",nolimit, "facet.sort","index",
"facet.mincount",random.nextInt(5), "facet.offset",random.nextInt(10));
-      // for index sort, we should get exact results for mincount <= 1
-      query("q",q, "rows",0, "facet","true", "facet.field",f, "facet.sort","index", "facet.mincount",random.nextInt(2),
"facet.offset",random.nextInt(10), "facet.limit",random.nextInt(11)-1);
-    }
-    stress = backupStress;  // restore stress
+    commit();
 
-    // test faceting multiple things at once
-    query("q","*:*", "rows",0, "facet","true", "facet.query","quick", "facet.query","all",
"facet.query","*:*"
-    ,"facet.field",t1);
-
-    // test filter tagging, facet exclusion, and naming (multi-select facet support)
-    query("q","*:*", "rows",0, "facet","true", "facet.query","{!key=myquick}quick", "facet.query","{!key=myall
ex=a}all", "facet.query","*:*"
-    ,"facet.field","{!key=mykey ex=a}"+t1
-    ,"facet.field","{!key=other ex=b}"+t1
-    ,"facet.field","{!key=again ex=a,b}"+t1
-    ,"facet.field",t1
-    ,"fq","{!tag=a}id:[1 TO 7]", "fq","{!tag=b}id:[3 TO 9]"
-    );
-    query("q", "*:*", "facet", "true", "facet.field", "{!ex=t1}SubjectTerms_mfacet", "fq",
"{!tag=t1}SubjectTerms_mfacet:(test 1)", "facet.limit", "10", "facet.mincount", "1");
-
-    // test field that is valid in schema but missing in all shards
-    query("q","*:*", "rows",100, "facet","true", "facet.field",missingField, "facet.mincount",2);
-    // test field that is valid in schema and missing in some shards
-    query("q","*:*", "rows",100, "facet","true", "facet.field",oddField, "facet.mincount",2);
-
-    query("q","*:*", "sort",i1+" desc", "stats", "true", "stats.field", i1);
-
-    /*** TODO: the failure may come back in "exception"
-    try {
-      // test error produced for field that is invalid for schema
-      query("q","*:*", "rows",100, "facet","true", "facet.field",invalidField, "facet.mincount",2);
-      TestCase.fail("SolrServerException expected for invalid field that is not in schema");
-    } catch (SolrServerException ex) {
-      // expected
-    }
-    ***/
+	  // test grouping
+    // The second sort = id asc . The sorting behaviour is different in dist mode. See TopDocs#merge
+    // The shard the result came from matters in the order if both document sortvalues are
equal
+    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1,
"group.limit", 10, "sort", i1 + " asc, id asc");
+    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1,
"group.limit", 10, "sort", "id asc, _docid_ asc");
+    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1,
"group.limit", 10, "sort", "{!func}add(" + i1 + ",5) asc, id asc");
+    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1,
"group.limit", 10, "sort", i1 + " asc, id asc", "facet", "true", "facet.field", t1);
+    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1,
"group.limit", 10, "sort", i1 + " asc, id asc", "stats", "true", "stats.field", i1);
+    query("q", "kings", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1,
"group.limit", 10, "sort", i1 + " asc, id asc", "spellcheck", "true", "spellcheck.build",
"true", "qt", "spellCheckCompRH");
+    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1,
"group.limit", 10, "sort", i1 + " asc, id asc", "facet", "true", "hl","true","hl.fl",t1);
+    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1,
"group.limit", 10, "sort", i1 + " asc, id asc", "group.sort", "id desc");
+
+    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1,
"group.offset", 5, "group.limit", 5, "sort", i1 + " asc, id asc");
+    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1,
"offset", 5, "rows", 5, "group.offset", 5, "group.limit", 5, "sort", i1 + " asc, id asc");
+    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1,
"offset", 5, "rows", 5, "sort", i1 + " asc, id asc", "group.format", "simple");
+    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1,
"offset", 5, "rows", 5, "sort", i1 + " asc, id asc", "group.main", "true");
+    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1,
"group.offset", 5, "group.limit", 5, "sort", i1 + " asc, id asc", "group.format", "simple",
"offset", 5, "rows", 5);
+    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1,
"group.offset", 5, "group.limit", 5, "sort", i1 + " asc, id asc", "group.main", "true", "offset",
5, "rows", 5);
+
+    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.query", t1 +
":kings OR " + t1 + ":eggs", "group.limit", 10, "sort", i1 + " asc, id asc");
+    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1,
"group.query", t1 + ":kings OR " + t1 + ":eggs", "group.limit", 10, "sort", i1 + " asc, id
asc");
+
+    // In order to validate this we need to make sure that during indexing that all documents
of one group only occur on the same shard
+    query("q", "*:*", "fq", s1 + ":a", "rows", 100, "fl", "id," + i1, "group", "true", "group.field",
i1, "group.limit", 10, "sort", i1 + " asc, id asc", "group.ngroups", "true");
+
+    // We cannot validate distributed grouping with scoring as first sort. since there is
no global idf. We can check if no errors occur
+    simpleQuery("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field",
i1, "group.limit", 10, "sort", "score desc, _docid_ asc, id asc");
+    simpleQuery("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field",
i1, "group.limit", 10);
+  }
 
-    // Try to get better coverage for refinement queries by turning off over requesting.
-    // This makes it much more likely that we may not get the top facet values and hence
-    // we turn of that checking.
-    handle.put("facet_fields", SKIPVAL);    
-    query("q","*:*", "rows",0, "facet","true", "facet.field",t1,"facet.limit",5, "facet.shard.limit",5);
-    // check a complex key name
-    query("q","*:*", "rows",0, "facet","true", "facet.field","{!key='$a b/c \\' \\} foo'}"+t1,"facet.limit",5,
"facet.shard.limit",5);
-    query("q","*:*", "rows",0, "facet","true", "facet.field","{!key='$a'}"+t1,"facet.limit",5,
"facet.shard.limit",5);
-    handle.remove("facet_fields");
-
-
-    // index the same document to two servers and make sure things
-    // don't blow up.
-    if (clients.size()>=2) {
-      index(id,100, i1, 107 ,t1,"oh no, a duplicate!");
-      for (int i=0; i<clients.size(); i++) {
-        index_specific(i, id,100, i1, 107 ,t1,"oh no, a duplicate!");
-      }
-      commit();
-      query("q","duplicate", "hl","true", "hl.fl", t1);
-      query("q","fox duplicate horses", "hl","true", "hl.fl", t1);
-      query("q","*:*", "rows",100);
+  private void simpleQuery(Object... queryParams) throws SolrServerException {
+    ModifiableSolrParams params = new ModifiableSolrParams();
+    for (int i = 0; i < queryParams.length; i += 2) {
+      params.add(queryParams[i].toString(), queryParams[i + 1].toString());
     }
-
-    // test debugging
-    handle.put("explain", UNORDERED);
-    handle.put("debug", UNORDERED);
-    handle.put("time", SKIPVAL);
-    query("q","now their fox sat had put","fl","*,score",CommonParams.DEBUG_QUERY, "true");
-    query("q", "id:[1 TO 5]", CommonParams.DEBUG_QUERY, "true");
-    query("q", "id:[1 TO 5]", CommonParams.DEBUG, CommonParams.TIMING);
-    query("q", "id:[1 TO 5]", CommonParams.DEBUG, CommonParams.RESULTS);
-    query("q", "id:[1 TO 5]", CommonParams.DEBUG, CommonParams.QUERY);
-
-    // TODO: This test currently fails because debug info is obtained only
-    // on shards with matches.
-    // query("q","matchesnothing","fl","*,score", "debugQuery", "true");
-
-    // Thread.sleep(10000000000L);
+    queryServer(params);
   }
 
 }



Mime
View raw message