lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From m..@apache.org
Subject svn commit: r1137067 - in /lucene/dev/branches/branch_3x: lucene/src/java/org/apache/lucene/search/ lucene/src/test/org/apache/lucene/search/ solr/ solr/src/common/org/apache/solr/common/params/ solr/src/java/org/apache/solr/handler/component/ solr/src...
Date Fri, 17 Jun 2011 22:59:36 GMT
Author: mvg
Date: Fri Jun 17 22:59:36 2011
New Revision: 1137067

URL: http://svn.apache.org/viewvc?rev=1137067&view=rev
Log:
SOLR-2524: Adding grouping to Solr 3x

Added:
    lucene/dev/branches/branch_3x/solr/src/common/org/apache/solr/common/params/GroupParams.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/schema/StrFieldSource.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/search/Grouping.java
    lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/TestGroupingSearch.java
Modified:
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/CachingCollector.java
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/search/TestCachingCollector.java
    lucene/dev/branches/branch_3x/solr/CHANGES.txt
    lucene/dev/branches/branch_3x/solr/common-build.xml
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/component/QueryComponent.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/schema/StrField.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java

Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/CachingCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/CachingCollector.java?rev=1137067&r1=1137066&r2=1137067&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/CachingCollector.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/CachingCollector.java Fri Jun 17 22:59:36 2011
@@ -110,7 +110,16 @@ public abstract class CachingCollector e
       curScores = new float[128];
       cachedScores.add(curScores);
     }
-    
+
+    ScoreCachingCollector(Collector other, int maxDocsToCache) {
+      super(other, maxDocsToCache);
+
+      cachedScorer = new CachedScorer();
+      cachedScores = new ArrayList<float[]>();
+      curScores = new float[INITIAL_ARRAY_SIZE];
+      cachedScores.add(curScores);
+    }
+
     @Override
     public void collect(int doc) throws IOException {
 
@@ -212,6 +221,10 @@ public abstract class CachingCollector e
     NoScoreCachingCollector(Collector other, double maxRAMMB) {
      super(other, maxRAMMB, false);
     }
+
+    NoScoreCachingCollector(Collector other, int maxDocsToCache) {
+     super(other, maxDocsToCache);
+    }
     
     @Override
     public void collect(int doc) throws IOException {
@@ -356,6 +369,24 @@ public abstract class CachingCollector e
   public static CachingCollector create(Collector other, boolean cacheScores, double maxRAMMB) {
     return cacheScores ? new ScoreCachingCollector(other, maxRAMMB) : new NoScoreCachingCollector(other, maxRAMMB);
   }
+
+  /**
+   * Create a new {@link CachingCollector} that wraps the given collector and
+   * caches documents and scores up to the specified max docs threshold.
+   *
+   * @param other
+   *          the Collector to wrap and delegate calls to.
+   * @param cacheScores
+   *          whether to cache scores in addition to document IDs. Note that
+   *          this increases the RAM consumed per doc
+   * @param maxDocsToCache
+   *          the maximum number of documents for caching the documents and
+   *          possible the scores. If the collector exceeds the threshold,
+   *          no documents and scores are cached.
+   */
+  public static CachingCollector create(Collector other, boolean cacheScores, int maxDocsToCache) {
+    return cacheScores ? new ScoreCachingCollector(other, maxDocsToCache) : new NoScoreCachingCollector(other, maxDocsToCache);
+  }
   
   // Prevent extension from non-internal classes
   private CachingCollector(Collector other, double maxRAMMB, boolean cacheScores) {
@@ -372,6 +403,15 @@ public abstract class CachingCollector e
     maxDocsToCache = (int) ((maxRAMMB * 1024 * 1024) / bytesPerDoc);
   }
 
+  private CachingCollector(Collector other, int maxDocsToCache) {
+    this.other = other;
+
+    cachedDocs = new ArrayList<int[]>();
+    curDocs = new int[INITIAL_ARRAY_SIZE];
+    cachedDocs.add(curDocs);
+    this.maxDocsToCache = maxDocsToCache;
+  }
+
   @Override
   public boolean acceptsDocsOutOfOrder() {
     return other.acceptsDocsOutOfOrder();

Modified: lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/search/TestCachingCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/search/TestCachingCollector.java?rev=1137067&r1=1137066&r2=1137067&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/search/TestCachingCollector.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/search/TestCachingCollector.java Fri Jun 17 22:59:36 2011
@@ -17,15 +17,11 @@ package org.apache.lucene.search;
  * limitations under the License.
  */
 
-import java.io.IOException;
-
 import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.search.CachingCollector;
-import org.apache.lucene.search.Collector;
-import org.apache.lucene.search.Scorer;
-import org.apache.lucene.search.Weight;
 import org.apache.lucene.util.LuceneTestCase;
 
+import java.io.IOException;
+
 public class TestCachingCollector extends LuceneTestCase {
 
   private static final double ONE_BYTE = 1.0 / (1024 * 1024); // 1 byte out of MB
@@ -76,7 +72,7 @@ public class TestCachingCollector extend
 
   public void testBasic() throws Exception {
     for (boolean cacheScores : new boolean[] { false, true }) {
-      CachingCollector cc = CachingCollector.create(new NoOpCollector(false), cacheScores, 1);
+      CachingCollector cc = CachingCollector.create(new NoOpCollector(false), cacheScores, 1.0);
       cc.setScorer(new MockScorer());
       
       // collect 1000 docs

Modified: lucene/dev/branches/branch_3x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/CHANGES.txt?rev=1137067&r1=1137066&r2=1137067&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/solr/CHANGES.txt Fri Jun 17 22:59:36 2011
@@ -110,7 +110,15 @@ New Features
 
 * SOLR-1915: DebugComponent now supports using a NamedList to model
   Explanation objects in it's responses instead of
-  Explanation.toString  (hossman) 
+  Explanation.toString  (hossman)
+
+* SOLR-2524: (SOLR-236, SOLR-237, SOLR-1773, SOLR-1311) Grouping / Field collapsing
+using the Lucene grouping contrib. The search result can be grouped by field and query. 
+(Martijn van Groningen, Emmanuel Keller, Shalin Shekhar Mangar, Koji Sekiguchi, 
+   Iv�n de Prado, Ryan McKinley, Marc Sturlese, Peter Karich, Bojan Smid, 
+   Charles Hornberger, Dieter Grad, Dmitry Lihachev, Doug Steigerwald,
+   Karsten Sperling, Michael Gundlach, Oleg Gnatovskiy, Thomas Traeger,
+   Harish Agarwal, yonik, Michael McCandless, Bill Bell)
 
 
 Optimizations

Modified: lucene/dev/branches/branch_3x/solr/common-build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/common-build.xml?rev=1137067&r1=1137066&r2=1137067&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/common-build.xml (original)
+++ lucene/dev/branches/branch_3x/solr/common-build.xml Fri Jun 17 22:59:36 2011
@@ -201,7 +201,8 @@
     <pathelement location="${common-solr.dir}/../lucene/build/contrib/queries/classes/java" />
     <pathelement location="${common-solr.dir}/../lucene/build/contrib/spatial/classes/java" />
     <pathelement location="${common-solr.dir}/../lucene/build/contrib/spellchecker/classes/java" />
-  </path>   
+    <pathelement location="${common-solr.dir}/../lucene/build/contrib/grouping/classes/java" />
+  </path>
 
   <target name="prep-lucene-jars">
     <sequential>
@@ -216,6 +217,7 @@
         <fileset dir="../lucene/contrib/queries" includes="build.xml" />
         <fileset dir="../lucene/contrib/spatial" includes="build.xml" />
         <fileset dir="../lucene/contrib/spellchecker" includes="build.xml" />
+        <fileset dir="../lucene/contrib/grouping" includes="build.xml" />
       </subant>
     </sequential>
   </target>
@@ -247,6 +249,9 @@
       <fileset dir="../lucene/build/contrib/spellchecker">
         <include name="lucene-spellchecker-${version}.jar" />
       </fileset>
+      <fileset dir="../lucene/build/contrib/grouping">
+        <include name="lucene-grouping-${version}.jar" />
+      </fileset>
       </copy>
   </target>  
   
@@ -260,6 +265,7 @@
     <ant antfile="build.xml" target="compile" dir="../lucene/contrib/spatial"/>
     <ant antfile="build.xml" target="compile" dir="../lucene/contrib/spellchecker"/>
     <ant antfile="build.xml" target="compile" dir="../lucene/contrib/xml-query-parser"/>
+    <ant antfile="build.xml" target="compile" dir="../lucene/contrib/grouping"/>
   </target>
    
    

Added: lucene/dev/branches/branch_3x/solr/src/common/org/apache/solr/common/params/GroupParams.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/common/org/apache/solr/common/params/GroupParams.java?rev=1137067&view=auto
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/common/org/apache/solr/common/params/GroupParams.java (added)
+++ lucene/dev/branches/branch_3x/solr/src/common/org/apache/solr/common/params/GroupParams.java Fri Jun 17 22:59:36 2011
@@ -0,0 +1,57 @@
+package org.apache.solr.common.params;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Group parameters.
+ */
+public interface GroupParams {
+
+  public static final String GROUP = "group";
+
+  public static final String GROUP_QUERY = GROUP + ".query";
+  public static final String GROUP_FIELD = GROUP + ".field";
+  public static final String GROUP_SORT = GROUP + ".sort";
+
+  /** the limit for the number of documents in each group */
+  public static final String GROUP_LIMIT = GROUP + ".limit";
+  /** the offset for the doclist of each group */
+  public static final String GROUP_OFFSET = GROUP + ".offset";
+
+  /** treat the first group result as the main result.  true/false */
+  public static final String GROUP_MAIN = GROUP + ".main";
+
+  /** treat the first group result as the main result.  true/false */
+  public static final String GROUP_FORMAT = GROUP + ".format";
+
+  /**
+   * Whether to cache the first pass search (doc ids and score) for the second pass search.
+   * Also defines the maximum size of the group cache relative to maxdoc in a percentage.
+   * Values can be a positive integer, from 0 till 100. A value of 0 will disable the group cache.
+   * The default is 0.*/
+  public static final String GROUP_CACHE_PERCENTAGE = GROUP + ".cache.percent";
+
+  // Note: Since you can supply multiple fields to group on, but only have a facets for the whole result. It only makes
+  // sense to me to support these parameters for the first group.
+  /** Whether the docSet (for example for faceting) should be based on plain documents (a.k.a UNGROUPED) or on the groups (a.k.a GROUPED). */
+  public static final String GROUP_COLLAPSE = GROUP + ".collapse";
+
+  /** Whether the group count should be included in the response. */
+  public static final String GROUP_TOTAL_COUNT = GROUP + ".ngroups";
+
+}

Modified: lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/component/QueryComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/component/QueryComponent.java?rev=1137067&r1=1137066&r2=1137067&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/component/QueryComponent.java (original)
+++ lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/component/QueryComponent.java Fri Jun 17 22:59:36 2011
@@ -27,10 +27,7 @@ import org.apache.lucene.search.SortFiel
 import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrDocumentList;
 import org.apache.solr.common.SolrException;
-import org.apache.solr.common.params.CommonParams;
-import org.apache.solr.common.params.ModifiableSolrParams;
-import org.apache.solr.common.params.ShardParams;
-import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.params.*;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.common.util.StrUtils;
 import org.apache.solr.request.SolrQueryRequest;
@@ -179,6 +176,85 @@ public class QueryComponent extends Sear
     SolrIndexSearcher.QueryCommand cmd = rb.getQueryCommand();
     cmd.setTimeAllowed(timeAllowed);
     SolrIndexSearcher.QueryResult result = new SolrIndexSearcher.QueryResult();
+      
+    //
+    // grouping / field collapsing
+    //
+    boolean doGroup = params.getBool(GroupParams.GROUP, false);
+    if (doGroup) {
+      try {
+        int maxDocsPercentageToCache = params.getInt(GroupParams.GROUP_CACHE_PERCENTAGE, 0);
+        boolean cacheSecondPassSearch = maxDocsPercentageToCache >= 1 && maxDocsPercentageToCache <= 100;
+        String[] fields = params.getParams(GroupParams.GROUP_FIELD);
+        String[] queries = params.getParams(GroupParams.GROUP_QUERY);
+        String groupSortStr = params.get(GroupParams.GROUP_SORT);
+        boolean main = params.getBool(GroupParams.GROUP_MAIN, false);
+
+        String formatStr = params.get(GroupParams.GROUP_FORMAT, Grouping.Format.grouped.name());
+        Grouping.Format defaultFormat;
+        try {
+          defaultFormat = Grouping.Format.valueOf(formatStr);
+        } catch (IllegalArgumentException e) {
+          throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, String.format("Illegal %s parameter", GroupParams.GROUP_FORMAT));
+        }
+
+        boolean includeTotalGroupCount = params.getBool(GroupParams.GROUP_TOTAL_COUNT, false);
+        Grouping.TotalCount defaultTotalCount = includeTotalGroupCount ? Grouping.TotalCount.grouped : Grouping.TotalCount.ungrouped;
+        Sort sort = cmd.getSort();
+        // groupSort defaults to sort
+        Sort groupSort = groupSortStr == null ? cmd.getSort() : QueryParsing.parseSort(groupSortStr, req);
+
+        int limitDefault = cmd.getLen(); // this is normally from "rows"
+        int groupOffsetDefault = params.getInt(GroupParams.GROUP_OFFSET, 0);
+        int docsPerGroupDefault = params.getInt(GroupParams.GROUP_LIMIT, 1);
+
+        Grouping grouping = new Grouping(searcher, result, cmd, cacheSecondPassSearch, maxDocsPercentageToCache, main);
+        grouping.setSort(sort)
+            .setGroupSort(groupSort)
+            .setDefaultFormat(defaultFormat)
+            .setLimitDefault(limitDefault)
+            .setDefaultTotalCount(defaultTotalCount)
+            .setDocsPerGroupDefault(docsPerGroupDefault)
+            .setGroupOffsetDefault(groupOffsetDefault);
+
+        if (fields != null) {
+          for (String field : fields) {
+            grouping.addFieldCommand(field, rb.req);
+          }
+        }
+
+        if (queries != null) {
+          for (String groupByStr : queries) {
+            grouping.addQueryCommand(groupByStr, rb.req);
+          }
+        }
+
+        if (rb.doHighlights || rb.isDebug()) {
+          // we need a single list of the returned docs
+          cmd.setFlags(SolrIndexSearcher.GET_DOCLIST);
+        }
+
+        grouping.execute();
+        rb.setResult( result );
+        if (grouping.isSignalCacheWarning()) {
+          rsp.add(
+              "cacheWarning",
+              String.format("Cache limit of %d percent relative to maxdoc has exceeded. Please increase cache size or disable caching.", maxDocsPercentageToCache)
+          );
+        }
+        rsp.add("grouped", result.groupedResults);
+        if (grouping.mainResult != null) {
+          rsp.add("response", grouping.mainResult);
+          rsp.getToLog().add("hits", grouping.mainResult.matches());
+        } else {
+          rsp.getToLog().add("hits", grouping.getCommands().get(0).getMatches());
+        }
+        return;
+      } catch (ParseException e) {
+        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
+      }
+    }
+
     searcher.search(result,cmd);
     rb.setResult( result );
 
@@ -309,6 +385,7 @@ public class QueryComponent extends Sear
   public void handleResponses(ResponseBuilder rb, ShardRequest sreq) {
     if ((sreq.purpose & ShardRequest.PURPOSE_GET_TOP_IDS) != 0) {
       mergeIds(rb, sreq);
+      mergeGroupCounts(rb, sreq);
     }
 
     if ((sreq.purpose & ShardRequest.PURPOSE_GET_FIELDS) != 0) {
@@ -525,6 +602,9 @@ public class QueryComponent extends Sear
 
       // we already have the field sort values
       sreq.params.remove(ResponseBuilder.FIELD_SORT_VALUES);
+        
+      // disable grouping
+	    sreq.params.remove("group");
 
       // make sure that the id is returned for correlation.
       String fl = sreq.params.get(CommonParams.FL);
@@ -578,6 +658,42 @@ public class QueryComponent extends Sear
     }
   }
 
+  /**
+   * Merges the collapse responses from the shards into one distributed collapse response.
+   *
+   * @param rb   The response builder
+   * @param sreq The shard request
+   */
+  private void mergeGroupCounts(ResponseBuilder rb, ShardRequest sreq) {
+    NamedList combinedGroupCounts = new NamedList<Object>();
+
+    for (ShardResponse srsp : sreq.responses) {
+      //check if the namelist is null or not (if a shard crashed)
+      if (srsp.getSolrResponse().getResponse() == null) {
+        continue;
+      }
+
+      NamedList groupCounts = (NamedList<Object>) srsp.getSolrResponse().getResponse().get("groupCount");
+      /*for (Object o : rb.resultIds.keySet()) {
+        String id = (String) o;
+      }*/
+
+      if (groupCounts != null) {
+        for (int i = 0; i < groupCounts.size(); i++) {
+          String groupGroupId = groupCounts.getName(i);
+          ShardDoc sdoc = rb.resultIds.get(groupGroupId);
+          if (sdoc != null) {
+            combinedGroupCounts.add(groupGroupId, groupCounts.getVal(i));
+          }
+        }
+      }
+    }
+
+    if (combinedGroupCounts.size() > 0) {
+      rb.rsp.add("groupCount", combinedGroupCounts);
+    }
+  }
+
   /////////////////////////////////////////////
   ///  SolrInfoMBean
   ////////////////////////////////////////////

Modified: lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/schema/StrField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/schema/StrField.java?rev=1137067&r1=1137066&r2=1137067&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/schema/StrField.java (original)
+++ lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/schema/StrField.java Fri Jun 17 22:59:36 2011
@@ -17,19 +17,15 @@
 
 package org.apache.solr.schema;
 
-import org.apache.lucene.search.SortField;
 import org.apache.lucene.document.Fieldable;
-import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.SortField;
 import org.apache.solr.response.TextResponseWriter;
 import org.apache.solr.response.XMLWriter;
-import org.apache.solr.search.function.ValueSource;
-import org.apache.solr.search.function.FieldCacheSource;
-import org.apache.solr.search.function.DocValues;
-import org.apache.solr.search.function.StringIndexDocValues;
 import org.apache.solr.search.QParser;
+import org.apache.solr.search.function.ValueSource;
 
-import java.util.Map;
 import java.io.IOException;
+import java.util.Map;
 /**
  * @version $Id$
  */
@@ -59,71 +55,4 @@ public class StrField extends FieldType 
     field.checkFieldCacheSource(parser);
     return new StrFieldSource(field.getName());
   }
-}
-
-
-class StrFieldSource extends FieldCacheSource {
-
-  public StrFieldSource(String field) {
-    super(field);
-  }
-
-  @Override
-  public String description() {
-    return "str(" + field + ')';
-  }
-
-  @Override
-  public DocValues getValues(Map context, IndexReader reader) throws IOException {
-    return new StringIndexDocValues(this, reader, field) {
-      @Override
-      protected String toTerm(String readableValue) {
-        return readableValue;
-      }
-
-      @Override
-      public float floatVal(int doc) {
-        return (float)intVal(doc);
-      }
-
-      @Override
-      public int intVal(int doc) {
-        int ord=order[doc];
-        return ord;
-      }
-
-      @Override
-      public long longVal(int doc) {
-        return (long)intVal(doc);
-      }
-
-      @Override
-      public double doubleVal(int doc) {
-        return (double)intVal(doc);
-      }
-
-      @Override
-      public String strVal(int doc) {
-        int ord=order[doc];
-        return lookup[ord];
-      }
-
-      @Override
-      public String toString(int doc) {
-        return description() + '=' + strVal(doc);
-      }
-    };
-  }
-
-  @Override
-  public boolean equals(Object o) {
-    return o instanceof StrFieldSource
-            && super.equals(o);
-  }
-
-  private static int hcode = SortableFloatFieldSource.class.hashCode();
-  @Override
-  public int hashCode() {
-    return hcode + super.hashCode();
-  };
 }
\ No newline at end of file

Added: lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/schema/StrFieldSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/schema/StrFieldSource.java?rev=1137067&view=auto
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/schema/StrFieldSource.java (added)
+++ lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/schema/StrFieldSource.java Fri Jun 17 22:59:36 2011
@@ -0,0 +1,92 @@
+package org.apache.solr.schema;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.solr.search.function.DocValues;
+import org.apache.solr.search.function.FieldCacheSource;
+import org.apache.solr.search.function.StringIndexDocValues;
+
+import java.io.IOException;
+import java.util.Map;
+
+public class StrFieldSource extends FieldCacheSource {
+
+  public StrFieldSource(String field) {
+    super(field);
+  }
+
+  @Override
+  public String description() {
+    return "str(" + field + ')';
+  }
+
+  @Override
+  public DocValues getValues(Map context, IndexReader reader) throws IOException {
+    return new StringIndexDocValues(this, reader, field) {
+      @Override
+      protected String toTerm(String readableValue) {
+        return readableValue;
+      }
+
+      @Override
+      public float floatVal(int doc) {
+        return (float)intVal(doc);
+      }
+
+      @Override
+      public int intVal(int doc) {
+        int ord=order[doc];
+        return ord;
+      }
+
+      @Override
+      public long longVal(int doc) {
+        return (long)intVal(doc);
+      }
+
+      @Override
+      public double doubleVal(int doc) {
+        return (double)intVal(doc);
+      }
+
+      @Override
+      public String strVal(int doc) {
+        int ord=order[doc];
+        return lookup[ord];
+      }
+
+      @Override
+      public String toString(int doc) {
+        return description() + '=' + strVal(doc);
+      }
+    };
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    return o instanceof StrFieldSource
+            && super.equals(o);
+  }
+
+  private static int hcode = SortableFloatFieldSource.class.hashCode();
+  @Override
+  public int hashCode() {
+    return hcode + super.hashCode();
+  };
+}

Added: lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/search/Grouping.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/search/Grouping.java?rev=1137067&view=auto
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/search/Grouping.java (added)
+++ lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/search/Grouping.java Fri Jun 17 22:59:36 2011
@@ -0,0 +1,789 @@
+package org.apache.solr.search;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.commons.lang.ArrayUtils;
+import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.search.*;
+import org.apache.lucene.search.grouping.*;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.util.SimpleOrderedMap;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.schema.FieldType;
+import org.apache.solr.schema.SchemaField;
+import org.apache.solr.schema.StrFieldSource;
+import org.apache.solr.search.function.OrdFieldSource;
+import org.apache.solr.search.function.ReverseOrdFieldSource;
+import org.apache.solr.search.function.ValueSource;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.*;
+
+/**
+ * Basic Solr Grouping infrastructure.
+ * Warning NOT thread save!
+ *
+ * @lucene.experimental
+ */
+public class Grouping {
+
+  private final static Logger logger = LoggerFactory.getLogger(Grouping.class);
+
+  private final SolrIndexSearcher searcher;
+  private final SolrIndexSearcher.QueryResult qr;
+  private final SolrIndexSearcher.QueryCommand cmd;
+  private final List<Command> commands = new ArrayList<Command>();
+  private final boolean main;
+  private final boolean cacheSecondPassSearch;
+  private final int maxDocsPercentageToCache;
+
+  private Sort sort;
+  private Sort groupSort;
+  private int limitDefault;
+  private int docsPerGroupDefault;
+  private int groupOffsetDefault;
+  private Format defaultFormat;
+  private TotalCount defaultTotalCount;
+
+  private int maxDoc;
+  private boolean needScores;
+  private boolean getDocSet;
+  private boolean getDocList; // doclist needed for debugging or highlighting
+  private Query query;
+  private DocSet filter;
+  private Filter luceneFilter;
+  private NamedList grouped = new SimpleOrderedMap();
+  private Set<Integer> idSet = new LinkedHashSet<Integer>();  // used for tracking unique docs when we need a doclist
+  private int maxMatches;  // max number of matches from any grouping command
+  private float maxScore = Float.NEGATIVE_INFINITY;  // max score seen in any doclist
+  private boolean signalCacheWarning = false;
+
+
+  public DocList mainResult;  // output if one of the grouping commands should be used as the main result.
+
+  /**
+   * @param searcher
+   * @param qr
+   * @param cmd
+   * @param cacheSecondPassSearch Whether to cache the documents and scores from the first pass search for the second
+   *                              pass search.
+   * @param maxDocsPercentageToCache The maximum number of documents in a percentage relative from maxdoc
+   *                                 that is allowed in the cache. When this threshold is met,
+   *                                 the cache is not used in the second pass search.
+   */
+  public Grouping(SolrIndexSearcher searcher,
+                  SolrIndexSearcher.QueryResult qr,
+                  SolrIndexSearcher.QueryCommand cmd,
+                  boolean cacheSecondPassSearch,
+                  int maxDocsPercentageToCache,
+                  boolean main) {
+    this.searcher = searcher;
+    this.qr = qr;
+    this.cmd = cmd;
+    this.cacheSecondPassSearch = cacheSecondPassSearch;
+    this.maxDocsPercentageToCache = maxDocsPercentageToCache;
+    this.main = main;
+  }
+
+  public void add(Grouping.Command groupingCommand) {
+    commands.add(groupingCommand);
+  }
+
+  /**
+   * Adds a field command based on the specified field.
+   *
+   * @param field The fieldname to group by.
+   */
+  public void addFieldCommand(String field, SolrQueryRequest request) throws ParseException {
+    SchemaField schemaField = searcher.getSchema().getField(field); // Throws an exception when field doesn't exist. Bad request.
+    FieldType fieldType = schemaField.getType();
+    ValueSource valueSource = fieldType.getValueSource(schemaField, null);
+    if (!(valueSource instanceof OrdFieldSource
+        || valueSource instanceof ReverseOrdFieldSource
+        || valueSource instanceof StrFieldSource)) {
+      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Cannot group on non string like field.");
+    }
+
+    Grouping.CommandField gc = new CommandField();
+    gc.groupSort = groupSort;
+    gc.groupBy = field;
+    gc.key = field;
+    gc.numGroups = limitDefault;
+    gc.docsPerGroup = docsPerGroupDefault;
+    gc.groupOffset = groupOffsetDefault;
+    gc.offset = cmd.getOffset();
+    gc.sort = sort;
+    gc.format = defaultFormat;
+    gc.totalCount = defaultTotalCount;
+
+    if (main) {
+      gc.main = true;
+      gc.format = Grouping.Format.simple;
+    }
+
+    if (gc.format == Grouping.Format.simple) {
+      gc.groupOffset = 0;  // doesn't make sense
+    }
+    commands.add(gc);
+  }
+
+  public void addQueryCommand(String groupByStr, SolrQueryRequest request) throws ParseException {
+    QParser parser = QParser.getParser(groupByStr, null, request);
+    Query gq = parser.getQuery();
+    Grouping.CommandQuery gc = new CommandQuery();
+    gc.query = gq;
+    gc.groupSort = groupSort;
+    gc.key = groupByStr;
+    gc.numGroups = limitDefault;
+    gc.docsPerGroup = docsPerGroupDefault;
+    gc.groupOffset = groupOffsetDefault;
+
+    // these two params will only be used if this is for the main result set
+    gc.offset = cmd.getOffset();
+    gc.numGroups = limitDefault;
+    gc.format = defaultFormat;
+
+    if (main) {
+      gc.main = true;
+      gc.format = Grouping.Format.simple;
+    }
+    if (gc.format == Grouping.Format.simple) {
+      gc.docsPerGroup = gc.numGroups;  // doesn't make sense to limit to one
+      gc.groupOffset = gc.offset;
+    }
+
+    commands.add(gc);
+  }
+
+  public Grouping setSort(Sort sort) {
+    this.sort = sort;
+    return this;
+  }
+
+  public Grouping setGroupSort(Sort groupSort) {
+    this.groupSort = groupSort;
+    return this;
+  }
+
+  public Grouping setLimitDefault(int limitDefault) {
+    this.limitDefault = limitDefault;
+    return this;
+  }
+
+  public Grouping setDocsPerGroupDefault(int docsPerGroupDefault) {
+    this.docsPerGroupDefault = docsPerGroupDefault;
+    return this;
+  }
+
+  public Grouping setGroupOffsetDefault(int groupOffsetDefault) {
+    this.groupOffsetDefault = groupOffsetDefault;
+    return this;
+  }
+
+  public Grouping setDefaultFormat(Format defaultFormat) {
+    this.defaultFormat = defaultFormat;
+    return this;
+  }
+
+  public Grouping setDefaultTotalCount(TotalCount defaultTotalCount) {
+    this.defaultTotalCount = defaultTotalCount;
+    return this;
+  }
+
+  public List<Command> getCommands() {
+    return commands;
+  }
+
+  public void execute() throws IOException {
+    if (commands.isEmpty()) {
+      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Specify at least on field, function or query to group by.");
+    }
+
+    DocListAndSet out = new DocListAndSet();
+    qr.setDocListAndSet(out);
+
+    filter = cmd.getFilter() != null ? cmd.getFilter() : searcher.getDocSet(cmd.getFilterList());
+    luceneFilter = filter == null ? null : filter.getTopFilter();
+    maxDoc = searcher.maxDoc();
+
+    needScores = (cmd.getFlags() & SolrIndexSearcher.GET_SCORES) != 0;
+    boolean cacheScores = false;
+    // NOTE: Change this when groupSort can be specified per group
+    if (cacheSecondPassSearch && !needScores && !commands.isEmpty()) {
+      if (commands.get(0).groupSort == null) {
+        cacheScores = true;
+      } else {
+        for (SortField field : commands.get(0).groupSort.getSort()) {
+          if (field.getType() == SortField.SCORE) {
+            cacheScores = true;
+            break;
+          }
+        }
+      }
+    } else if (needScores) {
+      cacheScores = needScores;
+    }
+    getDocSet = (cmd.getFlags() & SolrIndexSearcher.GET_DOCSET) != 0;
+    getDocList = (cmd.getFlags() & SolrIndexSearcher.GET_DOCLIST) != 0;
+    query = QueryUtils.makeQueryable(cmd.getQuery());
+
+    for (Command cmd : commands) {
+      cmd.prepare();
+    }
+
+    List<Collector> collectors = new ArrayList<Collector>(commands.size());
+    for (Command cmd : commands) {
+      Collector collector = cmd.createFirstPassCollector();
+      if (collector != null)
+        collectors.add(collector);
+    }
+
+    Collector allCollectors = MultiCollector.wrap(collectors.toArray(new Collector[collectors.size()]));
+    DocSetCollector setCollector = null;
+    if (getDocSet) {
+      setCollector = new DocSetDelegateCollector(maxDoc >> 6, maxDoc, allCollectors);
+      allCollectors = setCollector;
+    }
+
+    CachingCollector cachedCollector = null;
+    if (cacheSecondPassSearch && allCollectors != null) {
+      int maxDocsToCache = (int) Math.round(maxDoc * (maxDocsPercentageToCache / 100.0d));
+      // Only makes sense to cache if we cache more than zero.
+      // Maybe we should have a minimum and a maximum, that defines the window we would like caching for.
+      if (maxDocsToCache > 0) {
+        allCollectors = cachedCollector = CachingCollector.create(allCollectors, cacheScores, maxDocsToCache);
+      }
+    }
+
+    if (allCollectors != null) {
+      searcher.search(query, luceneFilter, allCollectors);
+    }
+
+    if (getDocSet) {
+      qr.setDocSet(setCollector.getDocSet());
+    }
+
+    collectors.clear();
+    for (Command cmd : commands) {
+      Collector collector = cmd.createSecondPassCollector();
+      if (collector != null)
+        collectors.add(collector);
+    }
+
+    if (!collectors.isEmpty()) {
+      Collector secondPhaseCollectors = MultiCollector.wrap(collectors.toArray(new Collector[collectors.size()]));
+      if (collectors.size() > 0) {
+        if (cachedCollector != null) {
+          if (cachedCollector.isCached()) {
+            cachedCollector.replay(secondPhaseCollectors);
+          } else {
+            signalCacheWarning = true;
+            logger.warn(String.format("The grouping cache is active, but not used because it exceeded the max cache limit of %d percent", maxDocsPercentageToCache));
+            logger.warn("Please increase cache size or disable group caching.");
+            searcher.search(query, luceneFilter, secondPhaseCollectors);
+          }
+        } else {
+          searcher.search(query, luceneFilter, secondPhaseCollectors);
+        }
+      }
+    }
+
+    for (Command cmd : commands) {
+      cmd.finish();
+    }
+
+    qr.groupedResults = grouped;
+
+    if (getDocList) {
+      int sz = idSet.size();
+      int[] ids = new int[sz];
+      int idx = 0;
+      for (int val : idSet) {
+        ids[idx++] = val;
+      }
+      qr.setDocList(new DocSlice(0, sz, ids, null, maxMatches, maxScore));
+    }
+  }
+
+  /**
+   * Returns offset + len if len equals zero or higher. Otherwise returns max.
+   *
+   * @param offset The offset
+   * @param len The number of documents to return
+   * @param max The number of document to return if len < 0 or if offset + len < 0
+   * @return offset + len if len equals zero or higher. Otherwise returns max
+   */
+  int getMax(int offset, int len, int max) {
+    int v = len < 0 ? max : offset + len;
+    if (v < 0 || v > max) v = max;
+    return v;
+  }
+
+  /**
+   * Returns whether a cache warning should be send to the client.
+   * The value <code>true</code> is returned when the cache is emptied because the caching limits where met, otherwise
+   * <code>false</code> is returned.
+   *
+   * @return whether a cache warning should be send to the client
+   */
+  public boolean isSignalCacheWarning() {
+    return signalCacheWarning;
+  }
+
+  //======================================   Inner classes =============================================================
+
+  public static enum Format {
+
+    /**
+     * Grouped result. Each group has its own result set.
+     */
+    grouped,
+
+    /**
+     * Flat result. All documents of all groups are put in one list.
+     */
+    simple
+  }
+
+  public static enum TotalCount {
+    /**
+     * Computations should be based on groups.
+     */
+    grouped,
+
+    /**
+     * Computations should be based on plain documents, so not taking grouping into account.
+     */
+    ungrouped
+  }
+
+  /**
+   * General group command. A group command is responsible for creating the first and second pass collectors.
+   * A group command is also responsible for creating the response structure.
+   * <p/>
+   * Note: Maybe the creating the response structure should be done in something like a ReponseBuilder???
+   * Warning NOT thread save!
+   */
+  public abstract class Command<GROUP_VALUE_TYPE> {
+
+    public String key;       // the name to use for this group in the response
+    public Sort groupSort;   // the sort of the documents *within* a single group.
+    public Sort sort;        // the sort between groups
+    public int docsPerGroup; // how many docs in each group - from "group.limit" param, default=1
+    public int groupOffset;  // the offset within each group (for paging within each group)
+    public int numGroups;    // how many groups - defaults to the "rows" parameter
+    int actualGroupsToFind;  // How many groups should actually be found. Based on groupOffset and numGroups.
+    public int offset;       // offset into the list of groups
+    public Format format;
+    public boolean main;     // use as the main result in simple format (grouped.main=true param)
+    public TotalCount totalCount = TotalCount.ungrouped;
+
+    TopGroups<GROUP_VALUE_TYPE> result;
+
+
+    /**
+     * Prepare this <code>Command</code> for execution.
+     *
+     * @throws IOException If I/O related errors occur
+     */
+    protected abstract void prepare() throws IOException;
+
+    /**
+     * Returns one or more {@link Collector} instances that are needed to perform the first pass search.
+     * If multiple Collectors are returned then these wrapped in a {@link org.apache.lucene.search.MultiCollector}.
+     *
+     * @return one or more {@link Collector} instances that are need to perform the first pass search
+     * @throws IOException If I/O related errors occur
+     */
+    protected abstract Collector createFirstPassCollector() throws IOException;
+
+    /**
+     * Returns zero or more {@link Collector} instances that are needed to perform the second pass search.
+     * In the case when no {@link Collector} instances are created <code>null</code> is returned.
+     * If multiple Collectors are returned then these wrapped in a {@link org.apache.lucene.search.MultiCollector}.
+     *
+     * @return zero or more {@link Collector} instances that are needed to perform the second pass search
+     * @throws IOException If I/O related errors occur
+     */
+    protected Collector createSecondPassCollector() throws IOException {
+      return null;
+    }
+
+    /**
+     * Performs any necessary post actions to prepare the response.
+     *
+     * @throws IOException If I/O related errors occur
+     */
+    protected abstract void finish() throws IOException;
+
+    /**
+     * Returns the number of matches for this <code>Command</code>.
+     *
+     * @return the number of matches for this <code>Command</code>
+     */
+    public abstract int getMatches();
+
+    /**
+     * Returns the number of groups found for this <code>Command</code>.
+     * If the command doesn't support counting the groups <code>null</code> is returned.
+     *
+     * @return the number of groups found for this <code>Command</code>
+     */
+    protected Integer getNumberOfGroups() {
+      return null;
+    }
+
+    protected NamedList commonResponse() {
+      NamedList groupResult = new SimpleOrderedMap();
+      grouped.add(key, groupResult);  // grouped={ key={
+
+      int matches = getMatches();
+      groupResult.add("matches", matches);
+      if (totalCount == TotalCount.grouped) {
+        Integer totalNrOfGroups = getNumberOfGroups();
+        groupResult.add("ngroups", totalNrOfGroups == null ? 0 : totalNrOfGroups);
+      }
+      maxMatches = Math.max(maxMatches, matches);
+      return groupResult;
+    }
+
+    protected DocList getDocList(GroupDocs groups) {
+      int max = groups.totalHits;
+      int off = groupOffset;
+      int len = docsPerGroup;
+      if (format == Format.simple) {
+        off = offset;
+        len = numGroups;
+      }
+      int docsToCollect = getMax(off, len, max);
+
+      // TODO: implement a DocList impl that doesn't need to start at offset=0
+      int docsCollected = Math.min(docsToCollect, groups.scoreDocs.length);
+
+      int ids[] = new int[docsCollected];
+      float[] scores = needScores ? new float[docsCollected] : null;
+      for (int i = 0; i < ids.length; i++) {
+        ids[i] = groups.scoreDocs[i].doc;
+        if (scores != null)
+          scores[i] = groups.scoreDocs[i].score;
+      }
+
+      float score = groups.maxScore;
+      maxScore = Math.max(maxScore, score);
+      DocSlice docs = new DocSlice(off, Math.max(0, ids.length - off), ids, scores, groups.totalHits, score);
+
+      if (getDocList) {
+        DocIterator iter = docs.iterator();
+        while (iter.hasNext())
+          idSet.add(iter.nextDoc());
+      }
+      return docs;
+    }
+
+    protected void addDocList(NamedList rsp, GroupDocs groups) {
+      rsp.add("doclist", getDocList(groups));
+    }
+
+    // Flatten the groups and get up offset + rows documents
+    protected DocList createSimpleResponse() {
+      GroupDocs[] groups = result != null ? result.groups : new GroupDocs[0];
+
+      List<Integer> ids = new ArrayList<Integer>();
+      List<Float> scores = new ArrayList<Float>();
+      int docsToGather = getMax(offset, numGroups, maxDoc);
+      int docsGathered = 0;
+      float maxScore = Float.NEGATIVE_INFINITY;
+
+      outer:
+      for (GroupDocs group : groups) {
+        if (group.maxScore > maxScore) {
+          maxScore = group.maxScore;
+        }
+
+        for (ScoreDoc scoreDoc : group.scoreDocs) {
+          if (docsGathered >= docsToGather) {
+            break outer;
+          }
+
+          ids.add(scoreDoc.doc);
+          scores.add(scoreDoc.score);
+          docsGathered++;
+        }
+      }
+
+      int len = Math.min(numGroups, docsGathered);
+      if (offset > len) {
+        len = 0;
+      }
+
+      int[] docs = ArrayUtils.toPrimitive(ids.toArray(new Integer[ids.size()]));
+      float[] docScores = ArrayUtils.toPrimitive(scores.toArray(new Float[scores.size()]));
+      DocSlice docSlice = new DocSlice(offset, len, docs, docScores, getMatches(), maxScore);
+
+      if (getDocList) {
+        for (int i = offset; i < docs.length; i++) {
+          idSet.add(docs[i]);
+        }
+      }
+
+      return docSlice;
+    }
+
+  }
+
+  /**
+   * A group command for grouping on a field.
+   */
+  public class CommandField extends Command<String> {
+
+    public String groupBy;
+    TermFirstPassGroupingCollector firstPass;
+    TermSecondPassGroupingCollector secondPass;
+
+    TermAllGroupsCollector allGroupsCollector;
+
+    // If offset falls outside the number of documents a group can provide use this collector instead of secondPass
+    TotalHitCountCollector fallBackCollector;
+    Collection<SearchGroup<String>> topGroups;
+
+    /**
+     * {@inheritDoc}
+     */
+    protected void prepare() throws IOException {
+      actualGroupsToFind = getMax(offset, numGroups, maxDoc);
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    protected Collector createFirstPassCollector() throws IOException {
+      // Ok we don't want groups, but do want a total count
+      if (actualGroupsToFind <= 0) {
+        fallBackCollector = new TotalHitCountCollector();
+        return fallBackCollector;
+      }
+
+      sort = sort == null ? Sort.RELEVANCE : sort;
+      firstPass = new TermFirstPassGroupingCollector(groupBy, sort, actualGroupsToFind);
+      return firstPass;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    protected Collector createSecondPassCollector() throws IOException {
+      if (actualGroupsToFind <= 0) {
+        allGroupsCollector = new TermAllGroupsCollector(groupBy);
+        return totalCount == TotalCount.grouped ? allGroupsCollector : null;
+      }
+
+      topGroups = format == Format.grouped ? firstPass.getTopGroups(offset, false) : firstPass.getTopGroups(0, false);
+      if (topGroups == null) {
+        if (totalCount == TotalCount.grouped) {
+          allGroupsCollector = new TermAllGroupsCollector(groupBy);
+          fallBackCollector = new TotalHitCountCollector();
+          return MultiCollector.wrap(allGroupsCollector, fallBackCollector);
+        } else {
+          fallBackCollector = new TotalHitCountCollector();
+          return fallBackCollector;
+        }
+      }
+
+      int groupedDocsToCollect = getMax(groupOffset, docsPerGroup, maxDoc);
+      groupedDocsToCollect = Math.max(groupedDocsToCollect, 1);
+      secondPass = new TermSecondPassGroupingCollector(
+          groupBy, topGroups, sort, groupSort, groupedDocsToCollect, needScores, needScores, false
+      );
+
+      if (totalCount == TotalCount.grouped) {
+        allGroupsCollector = new TermAllGroupsCollector(groupBy);
+        return MultiCollector.wrap(secondPass, allGroupsCollector);
+      } else {
+        return secondPass;
+      }
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    protected void finish() throws IOException {
+      result = secondPass != null ? secondPass.getTopGroups(0) : null;
+      if (main) {
+        mainResult = createSimpleResponse();
+        return;
+      }
+
+      NamedList groupResult = commonResponse();
+
+      if (format == Format.simple) {
+        groupResult.add("doclist", createSimpleResponse());
+        return;
+      }
+
+      List groupList = new ArrayList();
+      groupResult.add("groups", groupList);        // grouped={ key={ groups=[
+
+      if (result == null) {
+        return;
+      }
+
+      // handle case of rows=0
+      if (numGroups == 0) return;
+
+      for (GroupDocs<String> group : result.groups) {
+        NamedList nl = new SimpleOrderedMap();
+        groupList.add(nl);                         // grouped={ key={ groups=[ {
+
+
+        // To keep the response format compatable with trunk.
+        // In trunk MutableValue can convert an indexed value to its native type. E.g. string to int
+        // The only option I currently see is the use the FieldType for this
+        if (group.groupValue != null) {
+          SchemaField schemaField = searcher.getSchema().getField(groupBy);
+          FieldType fieldType = schemaField.getType();
+          String readableValue = fieldType.indexedToReadable(group.groupValue);
+          Fieldable field = schemaField.createField(readableValue, 0.0f);
+          nl.add("groupValue", fieldType.toObject(field));
+        } else {
+          nl.add("groupValue", null);
+        }
+
+        addDocList(nl, group);
+      }
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    public int getMatches() {
+      if (result == null && fallBackCollector == null) {
+        return 0;
+      }
+
+      return result != null ? result.totalHitCount : fallBackCollector.getTotalHits();
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    protected Integer getNumberOfGroups() {
+      return allGroupsCollector == null ? null : allGroupsCollector.getGroupCount();
+    }
+  }
+
+  /**
+   * A group command for grouping on a query.
+   */
+  //NOTE: doesn't need to be generic. Maybe Command interface --> First / Second pass abstract impl.
+  public class CommandQuery extends Command {
+
+    public Query query;
+    TopDocsCollector topCollector;
+    FilterCollector collector;
+
+    /**
+     * {@inheritDoc}
+     */
+    protected void prepare() throws IOException {
+      actualGroupsToFind = getMax(offset, numGroups, maxDoc);
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    protected Collector createFirstPassCollector() throws IOException {
+      DocSet groupFilt = searcher.getDocSet(query);
+      topCollector = newCollector(groupSort, needScores);
+      collector = new FilterCollector(groupFilt, topCollector);
+      return collector;
+    }
+
+    TopDocsCollector newCollector(Sort sort, boolean needScores) throws IOException {
+      int groupDocsToCollect = getMax(groupOffset, docsPerGroup, maxDoc);
+      if (sort == null || sort == Sort.RELEVANCE) {
+        return TopScoreDocCollector.create(groupDocsToCollect, true);
+      } else {
+        return TopFieldCollector.create(searcher.weightSort(sort), groupDocsToCollect, false, needScores, needScores, true);
+      }
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    protected void finish() throws IOException {
+      TopDocsCollector topDocsCollector = (TopDocsCollector) collector.collector;
+      TopDocs topDocs = topDocsCollector.topDocs();
+      GroupDocs<String> groupDocs = new GroupDocs<String>(topDocs.getMaxScore(), topDocs.totalHits, topDocs.scoreDocs, query.toString(), null);
+      if (main) {
+        mainResult = getDocList(groupDocs);
+      } else {
+        NamedList rsp = commonResponse();
+        addDocList(rsp, groupDocs);
+      }
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    public int getMatches() {
+      return collector.matches;
+    }
+  }
+
+  /**
+   * A collector that filters incoming doc ids that are not in the filter
+   */
+  static class FilterCollector extends Collector {
+
+    final DocSet filter;
+    final Collector collector;
+    int docBase;
+    int matches;
+
+    public FilterCollector(DocSet filter, Collector collector) throws IOException {
+      this.filter = filter;
+      this.collector = collector;
+    }
+
+    public void setScorer(Scorer scorer) throws IOException {
+      collector.setScorer(scorer);
+    }
+
+    public void collect(int doc) throws IOException {
+      matches++;
+      if (filter.exists(doc + docBase)) {
+        collector.collect(doc);
+      }
+    }
+
+    public void setNextReader(IndexReader reader, int docBase) throws IOException {
+      this.docBase = docBase;
+      collector.setNextReader(reader, docBase);
+    }
+
+    public boolean acceptsDocsOutOfOrder() {
+      return collector.acceptsDocsOutOfOrder();
+    }
+  }
+
+}

Modified: lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java?rev=1137067&r1=1137066&r2=1137067&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java (original)
+++ lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java Fri Jun 17 22:59:36 2011
@@ -947,9 +947,10 @@ public class SolrIndexSearcher extends I
   }
 
   private static final int NO_CHECK_QCACHE       = 0x80000000;
-  private static final int GET_DOCSET            = 0x40000000;
+  public static final int GET_DOCSET            = 0x40000000;
   private static final int NO_CHECK_FILTERCACHE  = 0x20000000;
 
+  public static final int GET_DOCLIST           =        0x02;
   public static final int GET_SCORES             =       0x01;
 
   /**
@@ -1872,6 +1873,7 @@ public class SolrIndexSearcher extends I
   public static class QueryResult {
     private boolean partialResults;
     private DocListAndSet docListAndSet;
+    public Object groupedResults; // Todo: Refactor. At least getter setter and different type.
     
     public DocList getDocList() { return docListAndSet.docList; }
     public void setDocList(DocList list) {

Added: lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/TestGroupingSearch.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/TestGroupingSearch.java?rev=1137067&view=auto
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/TestGroupingSearch.java (added)
+++ lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/TestGroupingSearch.java Fri Jun 17 22:59:36 2011
@@ -0,0 +1,713 @@
+package org.apache.solr;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.search.FieldCache;
+import org.apache.noggit.JSONUtil;
+import org.apache.noggit.ObjectBuilder;
+import org.apache.solr.common.params.GroupParams;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.schema.IndexSchema;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import java.util.*;
+
+/**
+ *
+ */
+public class TestGroupingSearch extends SolrTestCaseJ4 {
+
+  public static final String FOO_STRING_FIELD = "foo_s1";
+  public static final String SMALL_STRING_FIELD = "small_s1";
+  public static final String SMALL_INT_FIELD = "small_i";
+
+  @BeforeClass
+  public static void beforeTests() throws Exception {
+    initCore("solrconfig.xml","schema12.xml");
+  }
+
+  @Before
+  public void cleanIndex() {
+    assertU(delQ("*:*"));
+    assertU(commit());
+  }
+
+  @Test
+  public void testGroupingGroupSortingScore_basic() {
+    assertU(add(doc("id", "1","name", "author1", "title", "a book title", "group_sI", "1")));
+    assertU(add(doc("id", "2","name", "author1", "title", "the title", "group_sI", "2")));
+    assertU(add(doc("id", "3","name", "author2", "title", "a book title", "group_sI", "1")));
+    assertU(add(doc("id", "4","name", "author2", "title", "title", "group_sI", "2")));
+    assertU(add(doc("id", "5","name", "author3", "title", "the title of a title", "group_sI", "1")));
+    assertU(commit());
+
+    assertQ(req("q","title:title", "group", "true", "group.field","name")
+            ,"//lst[@name='grouped']/lst[@name='name']"
+            ,"*[count(//arr[@name='groups']/lst) = 3]"
+
+            ,"//arr[@name='groups']/lst[1]/str[@name='groupValue'][.='author2']"
+    //        ,"//arr[@name='groups']/lst[1]/int[@name='matches'][.='2']"
+            ,"//arr[@name='groups']/lst[1]/result[@numFound='2']"
+            ,"//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='4']"
+
+            ,"//arr[@name='groups']/lst[2]/str[@name='groupValue'][.='author1']"
+    //       ,"//arr[@name='groups']/lst[2]/int[@name='matches'][.='2']"
+            ,"//arr[@name='groups']/lst[2]/result[@numFound='2']"
+            ,"//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='2']"
+
+            ,"//arr[@name='groups']/lst[3]/str[@name='groupValue'][.='author3']"
+    //        ,"//arr[@name='groups']/lst[3]/int[@name='matches'][.='1']"
+            ,"//arr[@name='groups']/lst[3]/result[@numFound='1']"
+            ,"//arr[@name='groups']/lst[3]/result/doc/*[@name='id'][.='5']"
+            );
+
+    assertQ(req("q","title:title", "group", "true", "group.field","group_sI")
+            ,"//lst[@name='grouped']/lst[@name='group_sI']"
+            ,"*[count(//arr[@name='groups']/lst) = 2]"
+
+            ,"//arr[@name='groups']/lst[1]/str[@name='groupValue'][.='2']"
+            ,"//arr[@name='groups']/lst[1]/result[@numFound='2']"
+            ,"//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='4']"
+
+            ,"//arr[@name='groups']/lst[2]/str[@name='groupValue'][.='1']"
+            ,"//arr[@name='groups']/lst[2]/result[@numFound='3']"
+            ,"//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='5']"
+            );
+  }
+
+  @Test
+  public void testGroupingGroupSortingScore_basicWithGroupSortEqualToSort() {
+    assertU(add(doc("id", "1","name", "author1", "title", "a book title")));
+    assertU(add(doc("id", "2","name", "author1", "title", "the title")));
+    assertU(add(doc("id", "3","name", "author2", "title", "a book title")));
+    assertU(add(doc("id", "4","name", "author2", "title", "title")));
+    assertU(add(doc("id", "5","name", "author3", "title", "the title of a title")));
+    assertU(commit());
+
+    assertQ(req("q","title:title", "group", "true", "group.field","name", "sort", "score desc", "group.sort", "score desc")
+            ,"//arr[@name='groups']/lst[1]/str[@name='groupValue'][.='author2']"
+    //        ,"//arr[@name='groups']/lst[1]/int[@name='matches'][.='2']"
+            ,"//arr[@name='groups']/lst[1]/result[@numFound='2']"
+            ,"//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='4']"
+
+            ,"//arr[@name='groups']/lst[2]/str[@name='groupValue'][.='author1']"
+    //        ,"//arr[@name='groups']/lst[2]/int[@name='matches'][.='2']"
+            ,"//arr[@name='groups']/lst[2]/result[@numFound='2']"
+            ,"//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='2']"
+
+            ,"//arr[@name='groups']/lst[3]/str[@name='groupValue'][.='author3']"
+    //        ,"//arr[@name='groups']/lst[3]/int[@name='matches'][.='1']"
+            ,"//arr[@name='groups']/lst[3]/result[@numFound='1']"
+            ,"//arr[@name='groups']/lst[3]/result/doc/*[@name='id'][.='5']"
+            );
+  }
+
+  @Test
+  public void testGroupingGroupSortingScore_withTotalGroupCount() {
+    assertU(add(doc("id", "1","name", "author1", "title", "a book title", "group_sI", "1")));
+    assertU(add(doc("id", "2","name", "author1", "title", "the title", "group_sI", "2")));
+    assertU(add(doc("id", "3","name", "author2", "title", "a book title", "group_sI", "1")));
+    assertU(add(doc("id", "4","name", "author2", "title", "title", "group_sI", "2")));
+    assertU(add(doc("id", "5","name", "author3", "title", "the title of a title", "group_sI", "1")));
+    assertU(commit());
+
+    assertQ(req("q","title:title", "group", "true", "group.field","name", "group.ngroups", "true")
+            ,"//lst[@name='grouped']/lst[@name='name']"
+            ,"//lst[@name='grouped']/lst[@name='name']/int[@name='matches'][.='5']"
+            ,"//lst[@name='grouped']/lst[@name='name']/int[@name='ngroups'][.='3']"
+            ,"*[count(//arr[@name='groups']/lst) = 3]"
+
+            ,"//arr[@name='groups']/lst[1]/str[@name='groupValue'][.='author2']"
+            ,"//arr[@name='groups']/lst[1]/result[@numFound='2']"
+            ,"//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='4']"
+
+            ,"//arr[@name='groups']/lst[2]/str[@name='groupValue'][.='author1']"
+            ,"//arr[@name='groups']/lst[2]/result[@numFound='2']"
+            ,"//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='2']"
+
+            ,"//arr[@name='groups']/lst[3]/str[@name='groupValue'][.='author3']"
+            ,"//arr[@name='groups']/lst[3]/result[@numFound='1']"
+            ,"//arr[@name='groups']/lst[3]/result/doc/*[@name='id'][.='5']"
+            );
+
+    assertQ(req("q","title:title", "group", "true", "group.field","group_sI", "group.ngroups", "true")
+            ,"//lst[@name='grouped']/lst[@name='group_sI']/int[@name='matches'][.='5']"
+            ,"//lst[@name='grouped']/lst[@name='group_sI']/int[@name='ngroups'][.='2']"
+            ,"*[count(//arr[@name='groups']/lst) = 2]"
+
+            ,"//arr[@name='groups']/lst[1]/str[@name='groupValue'][.='2']"
+            ,"//arr[@name='groups']/lst[1]/result[@numFound='2']"
+            ,"//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='4']"
+
+            ,"//arr[@name='groups']/lst[2]/str[@name='groupValue'][.='1']"
+            ,"//arr[@name='groups']/lst[2]/result[@numFound='3']"
+            ,"//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='5']"
+            );
+  }
+
+  @Test
+  public void testGroupingGroupSortingScore_basicWithSortFooIDescAndScoreAscWithCaching() {
+    assertU(add(doc("id", "1","name", "author1", "title", "a book title", "score_f", "20", "foo_i", "5")));
+    assertU(add(doc("id", "2","name", "author1", "title", "the title", "score_f", "10", "foo_i", "5")));
+    assertU(add(doc("id", "3","name", "author2", "title", "a book title", "score_f", "30", "foo_i", "3")));
+    assertU(commit());
+    assertU(add(doc("id", "4","name", "author2", "title", "title", "score_f", "40", "foo_i", "2")));
+    assertU(add(doc("id", "5","name", "author3", "title", "the titttle of a title blehh", "score_f", "50", "foo_i", "1")));
+    assertU(commit());
+
+    assertQ(req("q","{!func} score_f", "group", "true", "group.field","name", "sort", "foo_i desc, score asc", GroupParams.GROUP_CACHE_PERCENTAGE, "100")
+            ,"//arr[@name='groups']/lst[3]/str[@name='groupValue'][.='author3']"
+            ,"//arr[@name='groups']/lst[3]/result[@numFound='1']"
+            ,"//arr[@name='groups']/lst[3]/result/doc/*[@name='id'][.='5']"
+
+            ,"//arr[@name='groups']/lst[1]/str[@name='groupValue'][.='author1']"
+            ,"//arr[@name='groups']/lst[1]/result[@numFound='2']"
+            ,"//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='2']"
+
+            ,"//arr[@name='groups']/lst[2]/str[@name='groupValue'][.='author2']"
+            ,"//arr[@name='groups']/lst[2]/result[@numFound='2']"
+            ,"//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='3']"
+            );
+  }
+
+
+  @Test
+  public void testGroupingGroupSortingWeight() {
+    assertU(add(doc("id", "1","name", "author1", "weight", "12.1")));
+    assertU(add(doc("id", "2","name", "author1", "weight", "2.1")));
+    assertU(add(doc("id", "3","name", "author2", "weight", "0.1")));
+    assertU(add(doc("id", "4","name", "author2", "weight", "0.11")));
+    assertU(commit());
+
+    assertQ(req("q","*:*", "group", "true", "group.field","name", "sort", "id asc", "group.sort", "weight desc")
+            ,"*[count(//arr[@name='groups']/lst) = 2]"
+            ,"//arr[@name='groups']/lst[1]/str[@name='groupValue'][.='author1']"
+    //        ,"//arr[@name='groups']/lst[1]/int[@name='matches'][.='2']"
+            ,"//arr[@name='groups']/lst[1]/result[@numFound='2']"
+            ,"//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='1']"
+
+            ,"//arr[@name='groups']/lst[2]/str[@name='groupValue'][.='author2']"
+    //        ,"//arr[@name='groups']/lst[2]/int[@name='matches'][.='2']"
+            ,"//arr[@name='groups']/lst[2]/result[@numFound='2']"
+            ,"//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='4']"
+            );
+  }
+
+
+
+  static String f = "foo_s1";
+  static String f2 = "foo2_i";
+
+  public static void createIndex() {
+    assertU(adoc("id","1", f,"5",  f2,"4"));
+    assertU(adoc("id","2", f,"4",  f2,"2"));
+    assertU(adoc("id","3", f,"3",  f2,"7"));
+    assertU(commit());
+    assertU(adoc("id","4", f,"2",  f2,"6"));
+    assertU(adoc("id","5", f,"1",  f2,"2"));
+    assertU(adoc("id","6", f,"3",  f2,"2"));
+    assertU(adoc("id","7", f,"2",  f2,"3"));
+    assertU(commit());
+    assertU(adoc("id","8", f,"1",  f2,"10"));
+    assertU(adoc("id","9", f,"2",  f2,"1"));
+    assertU(commit());
+    assertU(adoc("id","10", f,"1", f2,"3"));
+    assertU(commit());
+  }
+
+  @Test
+  public void testGroupedCount() throws Exception {
+    createIndex();
+    String filt = f + ":[* TO *]";
+
+    assertJQ(req("fq",filt,  "q","{!func}"+f2, "group","true", "group.field",f, "sort", f + " asc", "fl","id", "group.ngroups", "true")
+      ,"/responseHeader/status==0"                         // exact match
+      ,"/responseHeader=={'_SKIP_':'QTime', 'status':0}"   // partial match by skipping some elements
+      ,"/responseHeader=={'_MATCH_':'status', 'status':0}" // partial match by only including some elements
+      ,"/grouped=={'"+f+"':{'matches':10,'ngroups': 5,'groups':[\n" +
+              "{'groupValue':'1','doclist':{'numFound':3,'start':0,'docs':[{'id':'5'}]}}," +
+              "{'groupValue':'2','doclist':{'numFound':3,'start':0,'docs':[{'id':'4'}]}}," +
+              "{'groupValue':'3','doclist':{'numFound':2,'start':0,'docs':[{'id':'3'}]}}," +
+              "{'groupValue':'4','doclist':{'numFound':1,'start':0,'docs':[{'id':'2'}]}}," +
+              "{'groupValue':'5','doclist':{'numFound':1,'start':0,'docs':[{'id':'1'}]}}" +
+            "]}}"
+    );
+  }
+
+  @Test
+  public void testGroupAPI() throws Exception {
+    createIndex();
+    String filt = f + ":[* TO *]";
+
+    assertQ(req("fq",filt,  "q","{!func}"+f2, "group","true", "group.field",f)
+        ,"/response/lst[@name='grouped']/lst[@name='"+f+"']/arr[@name='groups']"
+    );
+
+    assertJQ(req("fq",filt,  "q","{!func}"+f2, "group","true", "group.field",f, "fl","id", "sort", f + " asc", "group.sort", "score desc")
+      ,"/responseHeader/status==0"                         // exact match
+      ,"/responseHeader=={'_SKIP_':'QTime', 'status':0}"   // partial match by skipping some elements
+      ,"/responseHeader=={'_MATCH_':'status', 'status':0}" // partial match by only including some elements
+      ,"/grouped=={'"+f+"':{'matches':10,'groups':[\n" +
+              "{'groupValue':'1','doclist':{'numFound':3,'start':0,'docs':[{'id':'8'}]}}," +
+              "{'groupValue':'2','doclist':{'numFound':3,'start':0,'docs':[{'id':'4'}]}}," +
+              "{'groupValue':'3','doclist':{'numFound':2,'start':0,'docs':[{'id':'3'}]}}," +
+              "{'groupValue':'4','doclist':{'numFound':1,'start':0,'docs':[{'id':'2'}]}}," +
+              "{'groupValue':'5','doclist':{'numFound':1,'start':0,'docs':[{'id':'1'}]}}" +
+            "]}}"
+    );
+
+    // test that filtering cuts down the result set
+    assertJQ(req("fq",filt,  "q","{!func}"+f2, "group","true", "group.field",f, "fl","id", "fq",f+":2")
+      ,"/grouped=={'"+f+"':{'matches':3,'groups':[" +
+            "{'groupValue':'2','doclist':{'numFound':3,'start':0,'docs':[{'id':'4'}]}}" +
+            "]}}"
+    );
+
+    // test limiting the number of groups returned
+    assertJQ(req("fq",filt,  "q","{!func}"+f2, "group","true", "group.field",f, "fl","id", "rows","2")
+      ,"/grouped=={'"+f+"':{'matches':10,'groups':[" +
+              "{'groupValue':'1','doclist':{'numFound':3,'start':0,'docs':[{'id':'8'}]}}," +
+              "{'groupValue':'3','doclist':{'numFound':2,'start':0,'docs':[{'id':'3'}]}}" +
+            "]}}"
+    );
+
+    // test offset into group list
+    assertJQ(req("fq",filt,  "q","{!func}"+f2, "group","true", "group.field",f, "fl","id", "rows","1", "start","1")
+      ,"/grouped=={'"+f+"':{'matches':10,'groups':[" +
+              "{'groupValue':'3','doclist':{'numFound':2,'start':0,'docs':[{'id':'3'}]}}" +
+            "]}}"
+    );
+
+    // test big offset into group list
+     assertJQ(req("fq",filt,  "q","{!func}"+f2, "group","true", "group.field",f, "fl","id", "rows","1", "start","100")
+      ,"/grouped=={'"+f+"':{'matches':10,'groups':[" +
+            "]}}"
+    );
+
+    // test increasing the docs per group returned
+    assertJQ(req("fq",filt,  "q","{!func}"+f2, "group","true", "group.field",f, "fl","id", "rows","2", "group.limit","3")
+      ,"/grouped=={'"+f+"':{'matches':10,'groups':[" +
+            "{'groupValue':'1','doclist':{'numFound':3,'start':0,'docs':[{'id':'8'},{'id':'10'},{'id':'5'}]}}," +
+            "{'groupValue':'3','doclist':{'numFound':2,'start':0,'docs':[{'id':'3'},{'id':'6'}]}}" +
+          "]}}"
+    );
+
+    // test offset into each group
+    assertJQ(req("fq",filt,  "q","{!func}"+f2, "group","true", "group.field",f, "fl","id", "rows","2", "group.limit","3", "group.offset","1")
+      ,"/grouped=={'"+f+"':{'matches':10,'groups':[" +
+            "{'groupValue':'1','doclist':{'numFound':3,'start':1,'docs':[{'id':'10'},{'id':'5'}]}}," +
+            "{'groupValue':'3','doclist':{'numFound':2,'start':1,'docs':[{'id':'6'}]}}" +
+          "]}}"
+    );
+
+    // test big offset into each group
+     assertJQ(req("fq",filt,  "q","{!func}"+f2, "group","true", "group.field",f, "fl","id", "rows","2", "group.limit","3", "group.offset","10")
+      ,"/grouped=={'"+f+"':{'matches':10,'groups':[" +
+            "{'groupValue':'1','doclist':{'numFound':3,'start':10,'docs':[]}}," +
+            "{'groupValue':'3','doclist':{'numFound':2,'start':10,'docs':[]}}" +
+          "]}}"
+    );
+
+    // test adding in scores
+    assertJQ(req("fq",filt,  "q","{!func}"+f2, "group","true", "group.field",f, "fl","id,score", "rows","2", "group.limit","2", "indent","off")
+      ,"/grouped/"+f+"/groups==" +
+            "[" +
+              "{'groupValue':'1','doclist':{'numFound':3,'start':0,'maxScore':10.0,'docs':[{'id':'8','score':10.0},{'id':'10','score':3.0}]}}," +
+              "{'groupValue':'3','doclist':{'numFound':2,'start':0,'maxScore':7.0,'docs':[{'id':'3','score':7.0},{'id':'6','score':2.0}]}}" +
+            "]"
+
+    );
+
+    /* Not supperted yet!
+    // test function (functions are currently all float - this may change)
+    String func = "add("+f+","+f+")";
+    assertJQ(req("fq",filt,  "q","{!func}"+f2, "group","true", "group.func", func  , "fl","id", "rows","2")
+      ,"/grouped=={'"+func+"':{'matches':10,'groups':[" +
+              "{'groupValue':2.0,'doclist':{'numFound':3,'start':0,'docs':[{'id':'8'}]}}," +
+              "{'groupValue':6.0,'doclist':{'numFound':2,'start':0,'docs':[{'id':'3'}]}}" +
+            "]}}"
+    );
+    */
+
+    // test that faceting works with grouping
+    assertJQ(req("fq",filt,  "q","{!func}"+f2, "group","true", "group.field",f, "fl","id"
+                 ,"facet","true", "facet.field",f)
+      ,"/grouped/"+f+"/matches==10"
+      ,"/facet_counts/facet_fields/"+f+"==['1',3, '2',3, '3',2, '4',1, '5',1]"
+    );
+    purgeFieldCache(FieldCache.DEFAULT);   // avoid FC insanity
+
+    // test that grouping works with highlighting
+    assertJQ(req("fq",filt,  "q","{!func}"+f2, "group","true", "group.field",f, "fl","id"
+                 ,"hl","true", "hl.fl",f)
+      ,"/grouped/"+f+"/matches==10"
+      ,"/highlighting=={'_ORDERED_':'', '8':{},'3':{},'4':{},'1':{},'2':{}}"
+    );
+
+    // test that grouping works with debugging
+    assertJQ(req("fq",filt,  "q","{!func}"+f2, "group","true", "group.field",f, "fl","id"
+                 ,"debugQuery","true")
+      ,"/grouped/"+f+"/matches==10"
+      ,"/debug/explain/8=="
+      ,"/debug/explain/2=="
+    );
+
+     ///////////////////////// group.query
+    assertJQ(req("fq",filt,  "q","{!func}"+f2, "group","true", "group.query","id:[2 TO 5]", "fl","id", "group.limit","3")
+       ,"/grouped=={'id:[2 TO 5]':{'matches':10," +
+           "'doclist':{'numFound':4,'start':0,'docs':[{'id':'3'},{'id':'4'},{'id':'2'}]}}}"
+    );
+
+    // group.query and offset
+    assertJQ(req("fq",filt,  "q","{!func}"+f2, "group","true", "group.query","id:[2 TO 5]", "fl","id", "group.limit","3", "group.offset","2")
+       ,"/grouped=={'id:[2 TO 5]':{'matches':10," +
+           "'doclist':{'numFound':4,'start':2,'docs':[{'id':'2'},{'id':'5'}]}}}"
+    );
+
+    // group.query and big offset
+    assertJQ(req("fq",filt,  "q","{!func}"+f2, "group","true", "group.query","id:[2 TO 5]", "fl","id", "group.limit","3", "group.offset","10")
+       ,"/grouped=={'id:[2 TO 5]':{'matches':10," +
+           "'doclist':{'numFound':4,'start':10,'docs':[]}}}"
+    );
+
+    ///////////////////////// group.query as main result
+    assertJQ(req("fq",filt,  "q","{!func}"+f2, "group","true", "group.query","id:[2 TO 5]", "fl","id", "rows","3", "group.main","true")
+       ,"/response=={'numFound':4,'start':0,'docs':[{'id':'3'},{'id':'4'},{'id':'2'}]}"
+    );
+
+    // group.query and offset
+    assertJQ(req("fq",filt,  "q","{!func}"+f2, "group","true", "group.query","id:[2 TO 5]", "fl","id", "rows","3", "start","2", "group.main","true")
+       ,"/response=={'numFound':4,'start':2,'docs':[{'id':'2'},{'id':'5'}]}"
+    );
+
+    // group.query and big offset
+    assertJQ(req("fq",filt,  "q","{!func}"+f2, "group","true", "group.query","id:[2 TO 5]", "fl","id", "rows","3", "start","10", "group.main","true")
+       ,"/response=={'numFound':4,'start':10,'docs':[]}"
+    );
+
+
+    // multiple at once
+    assertJQ(req("fq",filt,  "q","{!func}"+f2, "group","true",
+        "group.query","id:[2 TO 5]",
+        "group.query","id:[5 TO 5]",
+        "group.field",f,
+        "rows","1",
+        "fl","id", "group.limit","2")
+       ,"/grouped/id:[2 TO 5]=={'matches':10,'doclist':{'numFound':4,'start':0,'docs':[{'id':'3'},{'id':'4'}]}}"
+       ,"/grouped/id:[5 TO 5]=={'matches':10,'doclist':{'numFound':1,'start':0,'docs':[{'id':'5'}]}}"
+       ,"/grouped/"+f+"=={'matches':10,'groups':[{'groupValue':'1','doclist':{'numFound':3,'start':0,'docs':[{'id':'8'},{'id':'10'}]}}]}"
+    );
+
+    ///////////////////////// group.field as main result
+    assertJQ(req("fq",filt,  "q","{!func}"+f2, "group","true", "group.field",f, "fl","id", "group.main","true")
+        ,"/response=={'numFound':10,'start':0,'docs':[{'id':'8'},{'id':'3'},{'id':'4'},{'id':'1'},{'id':'2'}]}"
+    );
+    // test that rows limits #docs
+    assertJQ(req("fq",filt,  "q","{!func}"+f2, "group","true", "group.field",f, "fl","id", "rows","3", "group.main","true")
+        ,"/response=={'numFound':10,'start':0,'docs':[{'id':'8'},{'id':'3'},{'id':'4'}]}"
+    );
+    // small  offset
+    assertJQ(req("fq",filt,  "q","{!func}"+f2, "group","true", "group.field",f, "fl","id", "rows","2", "start","1", "group.main","true")
+        ,"/response=={'numFound':10,'start':1,'docs':[{'id':'3'},{'id':'4'}]}"
+    );
+    // large offset
+    assertJQ(req("fq",filt,  "q","{!func}"+f2, "group","true", "group.field",f, "fl","id", "rows","2", "start","20", "group.main","true")
+        ,"/response=={'numFound':10,'start':20,'docs':[]}"
+    );
+    // group.limit>1
+    assertJQ(req("fq",filt,  "q","{!func}"+f2, "group","true", "group.field",f, "fl","id", "rows","3", "group.limit","2", "group.main","true")
+        ,"/response=={'numFound':10,'start':0,'docs':[{'id':'8'},{'id':'10'},{'id':'3'}]}"
+    );
+    // group.limit>1 with start>0
+    assertJQ(req("fq",filt,  "q","{!func}"+f2, "group","true", "group.field",f, "fl","id", "rows","3", "start","1", "group.limit","2", "group.main","true")
+        ,"/response=={'numFound':10,'start':1,'docs':[{'id':'10'},{'id':'3'},{'id':'6'}]}"
+    );
+
+    ///////////////////////// group.format == simple
+    assertJQ(req("fq",filt,  "q","{!func}"+f2, "group","true", "group.field",f, "fl","id", "rows","3", "start","1", "group.limit","2", "group.format","simple")
+    , "/grouped/foo_s1=={'matches':10,'doclist':"
+        +"{'numFound':10,'start':1,'docs':[{'id':'10'},{'id':'3'},{'id':'6'}]}}"
+    );
+  }
+
+
+  @Test
+  public void testRandomGrouping() throws Exception {
+    /**
+     updateJ("{\"add\":{\"doc\":{\"id\":\"77\"}}}", params("commit","true"));
+     assertJQ(req("q","id:77"), "/response/numFound==1");
+
+     Doc doc = createDocObj(types);
+     updateJ(toJSON(doc), params("commit","true"));
+
+     assertJQ(req("q","id:"+doc.id), "/response/numFound==1");
+    **/
+
+    int indexIter=50 * RANDOM_MULTIPLIER;  // make >0 to enable test
+    int queryIter=100 * RANDOM_MULTIPLIER;
+
+    while (--indexIter >= 0) {
+
+      int indexSize = random.nextInt(25 * RANDOM_MULTIPLIER);
+//indexSize=2;
+      List<FldType> types = new ArrayList<FldType>();
+      types.add(new FldType("id",ONE_ONE, new SVal('A','Z',4,4)));
+      types.add(new FldType("score_s1",ONE_ONE, new SVal('a','c',1,1)));  // field used to score
+      types.add(new FldType("bar_s1",ONE_ONE, new SVal('a','z',3,5)));
+      types.add(new FldType(FOO_STRING_FIELD,ZERO_ONE, new SVal('a','z',1,2)));
+      types.add(new FldType(SMALL_STRING_FIELD,ZERO_ONE, new SVal('a',(char)('c'+indexSize/10),1,1)));
+//      types.add(new FldType(SMALL_INT_FIELD,ZERO_ONE, new IRange(0,5+indexSize/10)));
+
+      clearIndex();
+      Map<Comparable, Doc> model = indexDocs(types, null, indexSize);
+      //System.out.println("############### model=" + model);
+
+      // test with specific docs
+      if (false) {
+        clearIndex();
+        model.clear();
+        Doc d1 = createDoc(types);
+        d1.getValues(SMALL_STRING_FIELD).set(0,"c");
+        d1.getValues(SMALL_INT_FIELD).set(0,5);
+        d1.order = 0;
+        updateJ(toJSON(d1), params("commit","true"));
+        model.put(d1.id, d1);
+
+        d1 = createDoc(types);
+        d1.getValues(SMALL_STRING_FIELD).set(0,"b");
+        d1.getValues(SMALL_INT_FIELD).set(0,5);
+        d1.order = 1;
+        updateJ(toJSON(d1), params("commit","false"));
+        model.put(d1.id, d1);
+
+        d1 = createDoc(types);
+        d1.getValues(SMALL_STRING_FIELD).set(0,"c");
+        d1.getValues(SMALL_INT_FIELD).set(0,5);
+        d1.order = 2;
+        updateJ(toJSON(d1), params("commit","false"));
+        model.put(d1.id, d1);
+
+        d1 = createDoc(types);
+        d1.getValues(SMALL_STRING_FIELD).set(0,"c");
+        d1.getValues(SMALL_INT_FIELD).set(0,5);
+        d1.order = 3;
+        updateJ(toJSON(d1), params("commit","false"));
+        model.put(d1.id, d1);
+
+        d1 = createDoc(types);
+        d1.getValues(SMALL_STRING_FIELD).set(0,"b");
+        d1.getValues(SMALL_INT_FIELD).set(0,2);
+        d1.order = 4;
+        updateJ(toJSON(d1), params("commit","true"));
+        model.put(d1.id, d1);
+      }
+
+
+      for (int qiter=0; qiter<queryIter; qiter++) {
+        String groupField = types.get(random.nextInt(types.size())).fname;
+
+        int rows = random.nextInt(10)==0 ? random.nextInt(model.size()+2) : random.nextInt(11)-1;
+        int start = random.nextInt(5)==0 ? random.nextInt(model.size()+2) : random.nextInt(5); // pick a small start normally for better coverage
+        int group_limit = random.nextInt(10)==0 ? random.nextInt(model.size()+2) : random.nextInt(11)-1;
+        int group_offset = random.nextInt(10)==0 ? random.nextInt(model.size()+2) : random.nextInt(2); // pick a small start normally for better coverage
+
+        String[] stringSortA = new String[1];
+        Comparator<Doc> sortComparator = createSort(h.getCore().getSchema(), types, stringSortA);
+        String sortStr = stringSortA[0];
+        Comparator<Doc> groupComparator = random.nextBoolean() ? sortComparator : createSort(h.getCore().getSchema(), types, stringSortA);
+        String groupSortStr = stringSortA[0];
+
+        // since groupSortStr defaults to sortStr, we need to normalize null to "score desc" if
+        // sortStr != null.
+        if (groupSortStr == null && groupSortStr != sortStr) {
+          groupSortStr = "score desc";
+        }
+
+         // Test specific case
+        if (false) {
+          groupField=SMALL_INT_FIELD;
+          sortComparator=createComparator(Arrays.asList(createComparator(SMALL_STRING_FIELD, true, true, false, true)));
+          sortStr = SMALL_STRING_FIELD + " asc";
+          groupComparator = createComparator(Arrays.asList(createComparator(SMALL_STRING_FIELD, true, true, false, false)));
+          groupSortStr = SMALL_STRING_FIELD + " asc";
+          rows=1; start=0; group_offset=1; group_limit=1;
+        }
+
+        Map<Comparable, Grp> groups = groupBy(model.values(), groupField);
+
+        // first sort the docs in each group
+        for (Grp grp : groups.values()) {
+          Collections.sort(grp.docs, groupComparator);
+        }
+
+        // now sort the groups
+
+        // if sort != group.sort, we need to find the max doc by "sort"
+        if (groupComparator != sortComparator) {
+          for (Grp grp : groups.values()) grp.setMaxDoc(sortComparator);
+        }
+
+        List<Grp> sortedGroups = new ArrayList(groups.values());
+        Collections.sort(sortedGroups,  groupComparator==sortComparator ? createFirstDocComparator(sortComparator) : createMaxDocComparator(sortComparator));
+
+        boolean includeNGroups = random.nextBoolean();
+        Object modelResponse = buildGroupedResult(h.getCore().getSchema(), sortedGroups, start, rows, group_offset, group_limit, includeNGroups);
+
+        int randomPercentage = random.nextInt(101);
+        // TODO: create a random filter too
+        SolrQueryRequest req = req("group","true","wt","json","indent","true", "echoParams","all", "q","{!func}score_f", "group.field",groupField
+            ,sortStr==null ? "nosort":"sort", sortStr ==null ? "": sortStr
+            ,(groupSortStr==null || groupSortStr==sortStr) ? "noGroupsort":"group.sort", groupSortStr==null ? "": groupSortStr
+            ,"rows",""+rows, "start",""+start, "group.offset",""+group_offset, "group.limit",""+group_limit,
+            GroupParams.GROUP_CACHE_PERCENTAGE, Integer.toString(randomPercentage), GroupParams.GROUP_TOTAL_COUNT, includeNGroups ? "true" : "false"
+        );
+
+        String strResponse = h.query(req);
+
+        Object realResponse = ObjectBuilder.fromJSON(strResponse);
+        String err = JSONTestUtil.matchObj("/grouped/"+groupField, realResponse, modelResponse);
+        if (err != null) {
+          log.error("GROUPING MISMATCH: " + err
+           + "\n\trequest="+req
+           + "\n\tresult="+strResponse
+           + "\n\texpected="+ JSONUtil.toJSON(modelResponse)
+           + "\n\tsorted_model="+ sortedGroups
+          );
+
+          // re-execute the request... good for putting a breakpoint here for debugging
+          String rsp = h.query(req);
+
+          fail(err);
+        }
+      } // end query iter
+    } // end index iter
+
+  }
+
+  public static Object buildGroupedResult(IndexSchema schema, List<Grp> sortedGroups, int start, int rows, int group_offset, int group_limit, boolean includeNGroups) {
+    Map<String,Object> result = new LinkedHashMap<String,Object>();
+
+    long matches = 0;
+    for (Grp grp : sortedGroups) {
+      matches += grp.docs.size();
+    }
+    result.put("matches", matches);
+    if (includeNGroups) {
+      result.put("ngroups", sortedGroups.size());
+    }
+    List groupList = new ArrayList();
+    result.put("groups", groupList);
+
+    for (int i=start; i<sortedGroups.size(); i++) {
+      if (rows != -1 && groupList.size() >= rows) break;  // directly test rather than calculating, so we can catch any calc errors in the real code
+      Map<String,Object> group = new LinkedHashMap<String,Object>();
+      groupList.add(group);
+
+      Grp grp = sortedGroups.get(i);
+      group.put("groupValue", grp.groupValue);
+
+      Map<String,Object> resultSet = new LinkedHashMap<String,Object>();
+      group.put("doclist", resultSet);
+      resultSet.put("numFound", grp.docs.size());
+      resultSet.put("start", group_offset);
+      List docs = new ArrayList();
+      resultSet.put("docs", docs);
+      for (int j=group_offset; j<grp.docs.size(); j++) {
+        if (group_limit != -1 && docs.size() >= group_limit) break;
+        docs.add( grp.docs.get(j).toObject(schema) );
+      }
+    }
+
+    return result;
+  }
+
+
+  public static Comparator<Grp> createMaxDocComparator(final Comparator<Doc> docComparator) {
+    return new Comparator<Grp>() {
+      public int compare(Grp o1, Grp o2) {
+        // all groups should have at least one doc
+        Doc d1 = o1.maxDoc;
+        Doc d2 = o2.maxDoc;
+        return docComparator.compare(d1, d2);
+      }
+    };
+  }
+
+  public static Comparator<Grp> createFirstDocComparator(final Comparator<Doc> docComparator) {
+    return new Comparator<Grp>() {
+      public int compare(Grp o1, Grp o2) {
+        // all groups should have at least one doc
+        Doc d1 = o1.docs.get(0);
+        Doc d2 = o2.docs.get(0);
+        return docComparator.compare(d1, d2);
+      }
+    };
+  }
+
+  public static Map<Comparable, Grp> groupBy(Collection<Doc> docs, String field) {
+    Map<Comparable, Grp> groups = new HashMap<Comparable, Grp>();
+    for (Doc doc : docs) {
+      List<Comparable> vals = doc.getValues(field);
+      if (vals == null) {
+        Grp grp = groups.get(null);
+        if (grp == null) {
+          grp = new Grp();
+          grp.groupValue = null;
+          grp.docs = new ArrayList<Doc>();
+          groups.put(null, grp);
+        }
+        grp.docs.add(doc);
+      } else {
+        for (Comparable val : vals) {
+
+          Grp grp = groups.get(val);
+          if (grp == null) {
+            grp = new Grp();
+            grp.groupValue = val;
+            grp.docs = new ArrayList<Doc>();
+            groups.put(grp.groupValue, grp);
+          }
+          grp.docs.add(doc);
+        }
+      }
+    }
+    return groups;
+  }
+
+
+  public static class Grp {
+    public Comparable groupValue;
+    public List<Doc> docs;
+    public Doc maxDoc;  // the document highest according to the "sort" param
+
+
+    public void setMaxDoc(Comparator<Doc> comparator) {
+      Doc[] arr = docs.toArray(new Doc[docs.size()]);
+      Arrays.sort(arr, comparator);
+      maxDoc = arr.length > 0 ? arr[0] : null;
+    }
+
+    @Override
+    public String toString() {
+      return "{groupValue="+groupValue+",docs="+docs+"}";
+    }
+  }
+
+}



Mime
View raw message