lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sar...@apache.org
Subject svn commit: r1137126 [2/2] - in /lucene/dev/branches/solr2452: ./ dev-tools/idea/lucene/contrib/ dev-tools/idea/solr/ dev-tools/maven/solr/ dev-tools/maven/solr/contrib/analysis-extras/ dev-tools/maven/solr/contrib/clustering/ dev-tools/maven/solr/cont...
Date Sat, 18 Jun 2011 05:16:59 GMT
Modified: lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/search/Grouping.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/search/Grouping.java?rev=1137126&r1=1137125&r2=1137126&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/search/Grouping.java (original)
+++ lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/search/Grouping.java Sat Jun 18 05:16:57 2011
@@ -17,355 +17,287 @@
 
 package org.apache.solr.search;
 
+import org.apache.commons.lang.ArrayUtils;
+import org.apache.lucene.document.Fieldable;
 import org.apache.lucene.index.IndexReader.AtomicReaderContext;
+import org.apache.lucene.queryParser.ParseException;
 import org.apache.lucene.search.*;
+import org.apache.lucene.search.grouping.*;
 import org.apache.lucene.util.BytesRef;
+import org.apache.solr.common.SolrException;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.common.util.SimpleOrderedMap;
-import org.apache.solr.schema.StrFieldSource;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.schema.*;
 import org.apache.solr.search.function.DocValues;
-import org.apache.solr.search.function.StringIndexDocValues;
+import org.apache.solr.search.function.FunctionQuery;
+import org.apache.solr.search.function.QueryValueSource;
 import org.apache.solr.search.function.ValueSource;
-import org.apache.solr.util.SentinelIntSet;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
 import java.util.*;
 
+/**
+ * Basic Solr Grouping infrastructure.
+ * Warning NOT thread save!
+ *
+ * @lucene.experimental
+ */
 public class Grouping {
 
-  public enum Format {Grouped, Simple}
+  private final static Logger logger = LoggerFactory.getLogger(Grouping.class);
 
-  public abstract class Command {
-    public String key;       // the name to use for this group in the response
-    public Sort groupSort;   // the sort of the documents *within* a single group.
-    public Sort sort;        // the sort between groups
-    public int docsPerGroup; // how many docs in each group - from "group.limit" param, default=1
-    public int groupOffset;  // the offset within each group (for paging within each group)
-    public int numGroups;    // how many groups - defaults to the "rows" parameter
-    public int offset;       // offset into the list of groups
-    public Format format;
-    public boolean main;     // use as the main result in simple format (grouped.main=true param)
-
-
-    abstract void prepare() throws IOException;
-    abstract Collector createCollector() throws IOException;
-    Collector createNextCollector() throws IOException {
-      return null;
-    }
-    abstract void finish() throws IOException;
-
-    abstract int getMatches();
-
-    NamedList commonResponse() {
-      NamedList groupResult = new SimpleOrderedMap();
-      grouped.add(key, groupResult);  // grouped={ key={
-
-      int this_matches = getMatches();
-      groupResult.add("matches", this_matches);
-      maxMatches = Math.max(maxMatches, this_matches);
-      return groupResult;
-    }
-
-    DocList getDocList(TopDocsCollector collector) {
-      int max = collector.getTotalHits();
-      int off = groupOffset;
-      int len = docsPerGroup;
-      if (format == Format.Simple) {
-        off = offset;
-        len = numGroups;
-      }
-      int docsToCollect = getMax(off, len, max);
-
-      // TODO: implement a DocList impl that doesn't need to start at offset=0
-      TopDocs topDocs = collector.topDocs(0, Math.max(docsToCollect,1));  // 0 isn't supported as a valid value
-      int docsCollected = Math.min(docsToCollect, topDocs.scoreDocs.length);
+  private final SolrIndexSearcher searcher;
+  private final SolrIndexSearcher.QueryResult qr;
+  private final SolrIndexSearcher.QueryCommand cmd;
+  private final List<Command> commands = new ArrayList<Command>();
+  private final boolean main;
+  private final boolean cacheSecondPassSearch;
+  private final int maxDocsPercentageToCache;
+
+  private Sort sort;
+  private Sort groupSort;
+  private int limitDefault;
+  private int docsPerGroupDefault;
+  private int groupOffsetDefault;
+  private Format defaultFormat;
+  private TotalCount defaultTotalCount;
+
+  private int maxDoc;
+  private boolean needScores;
+  private boolean getDocSet;
+  private boolean getDocList; // doclist needed for debugging or highlighting
+  private Query query;
+  private DocSet filter;
+  private Filter luceneFilter;
+  private NamedList grouped = new SimpleOrderedMap();
+  private Set<Integer> idSet = new LinkedHashSet<Integer>();  // used for tracking unique docs when we need a doclist
+  private int maxMatches;  // max number of matches from any grouping command
+  private float maxScore = Float.NEGATIVE_INFINITY;  // max score seen in any doclist
+  private boolean signalCacheWarning = false;
 
-      int ids[] = new int[docsCollected];
-      float[] scores = needScores ? new float[docsCollected] : null;
-      for (int i=0; i<ids.length; i++) {
-        ids[i] = topDocs.scoreDocs[i].doc;
-        if (scores != null)
-          scores[i] = topDocs.scoreDocs[i].score;
-      }
-
-      float score = topDocs.getMaxScore();
-      maxScore = Math.max(maxScore, score);
-      DocSlice docs = new DocSlice(off, Math.max(0, ids.length - off), ids, scores, topDocs.totalHits, score);
 
-      if (getDocList) {
-        DocIterator iter = docs.iterator();
-        while (iter.hasNext())
-          idSet.add(iter.nextDoc());
-      }
-      return docs;
-    }
+  public DocList mainResult;  // output if one of the grouping commands should be used as the main result.
 
-    void addDocList(NamedList rsp, TopDocsCollector collector) {
-      rsp.add("doclist", getDocList(collector));
-    }
+  /**
+   * @param searcher
+   * @param qr
+   * @param cmd
+   * @param cacheSecondPassSearch Whether to cache the documents and scores from the first pass search for the second
+   *                              pass search.
+   * @param maxDocsPercentageToCache The maximum number of documents in a percentage relative from maxdoc
+   *                                 that is allowed in the cache. When this threshold is met,
+   *                                 the cache is not used in the second pass search.
+   */
+  public Grouping(SolrIndexSearcher searcher,
+                  SolrIndexSearcher.QueryResult qr,
+                  SolrIndexSearcher.QueryCommand cmd,
+                  boolean cacheSecondPassSearch,
+                  int maxDocsPercentageToCache,
+                  boolean main) {
+    this.searcher = searcher;
+    this.qr = qr;
+    this.cmd = cmd;
+    this.cacheSecondPassSearch = cacheSecondPassSearch;
+    this.maxDocsPercentageToCache = maxDocsPercentageToCache;
+    this.main = main;
   }
 
-  public class CommandQuery extends Command {
-    public Query query;
-
-    TopDocsCollector topCollector;
-    FilterCollector collector;
-
-    @Override
-    void prepare() throws IOException {
-    }
-
-    @Override
-    Collector createCollector() throws IOException {
-      int docsToCollect = getMax(groupOffset, docsPerGroup, maxDoc);
-      DocSet groupFilt = searcher.getDocSet(query);
-      topCollector = newCollector(groupSort, docsToCollect, false, needScores);
-      collector = new FilterCollector(groupFilt, topCollector);
-      return collector;
-    }
-
-    @Override
-    void finish() throws IOException {
-      if (main) {
-        mainResult = getDocList((TopDocsCollector)collector.getCollector());
-      } else {
-        NamedList rsp = commonResponse();
-        addDocList(rsp, (TopDocsCollector)collector.getCollector());
-      }
-    }
-
-    @Override
-    int getMatches() {
-      return collector.getMatches();
-    }
+  public void add(Grouping.Command groupingCommand) {
+    commands.add(groupingCommand);
   }
 
-  
-  public class CommandFunc extends Command {
-    public ValueSource groupBy;
-
-
-    int maxGroupToFind;
-    Map context;
-    TopGroupCollector collector = null;
-    Phase2GroupCollector collector2;
-    
-    @Override
-    void prepare() throws IOException {
-        Map context = ValueSource.newContext(searcher);
-        groupBy.createWeight(context, searcher);
+  /**
+   * Adds a field command based on the specified field.
+   * If the field is not compatible with {@link CommandField} it invokes the
+   * {@link #addFunctionCommand(String, org.apache.solr.request.SolrQueryRequest)} method.
+   *
+   * @param field The fieldname to group by.
+   */
+  public void addFieldCommand(String field, SolrQueryRequest request) throws ParseException {
+    SchemaField schemaField = searcher.getSchema().getField(field); // Throws an exception when field doesn't exist. Bad request.
+    FieldType fieldType = schemaField.getType();
+    ValueSource valueSource = fieldType.getValueSource(schemaField, null);
+    if (!(valueSource instanceof StrFieldSource)) {
+      addFunctionCommand(field, request);
+      return;
     }
 
-    @Override
-    Collector createCollector() throws IOException {
-      maxGroupToFind = getMax(offset, numGroups, maxDoc);
-
-      // if we aren't going to return any groups, disregard the offset 
-      if (numGroups == 0) maxGroupToFind = 0;
-
-      collector = new TopGroupCollector(groupBy, context, searcher.weightSort(normalizeSort(sort)), maxGroupToFind);
-
-      /*** if we need a different algorithm when sort != group.sort
-      if (compareSorts(sort, groupSort)) {
-        collector = new TopGroupCollector(groupBy, context, normalizeSort(sort), maxGroupToFind);
+    Grouping.CommandField gc = new CommandField();
+    gc.groupSort = groupSort;
+    gc.groupBy = field;
+    gc.key = field;
+    gc.numGroups = limitDefault;
+    gc.docsPerGroup = docsPerGroupDefault;
+    gc.groupOffset = groupOffsetDefault;
+    gc.offset = cmd.getOffset();
+    gc.sort = sort;
+    gc.format = defaultFormat;
+    gc.totalCount = defaultTotalCount;
+
+    if (main) {
+      gc.main = true;
+      gc.format = Grouping.Format.simple;
+    }
+
+    if (gc.format == Grouping.Format.simple) {
+      gc.groupOffset = 0;  // doesn't make sense
+    }
+    commands.add(gc);
+  }
+
+  public void addFunctionCommand(String groupByStr, SolrQueryRequest request) throws ParseException {
+    QParser parser = QParser.getParser(groupByStr, "func", request);
+    Query q = parser.getQuery();
+    final Grouping.Command gc;
+    if (q instanceof FunctionQuery) {
+      ValueSource valueSource = ((FunctionQuery)q).getValueSource();
+      if (valueSource instanceof StrFieldSource) {
+        String field = ((StrFieldSource) valueSource).getField();
+        CommandField commandField = new CommandField();
+        commandField.groupBy = field;
+        gc = commandField;
       } else {
-        collector = new TopGroupSortCollector(groupBy, context, normalizeSort(sort), normalizeSort(groupSort), maxGroupToFind);
+        CommandFunc commandFunc = new CommandFunc();
+        commandFunc.groupBy = valueSource;
+        gc = commandFunc;
       }
-      ***/
-      return collector;
+    } else {
+      CommandFunc commandFunc = new CommandFunc();
+      commandFunc.groupBy = new QueryValueSource(q, 0.0f);
+      gc = commandFunc;
     }
+    gc.groupSort = groupSort;
+    gc.key = groupByStr;
+    gc.numGroups = limitDefault;
+    gc.docsPerGroup = docsPerGroupDefault;
+    gc.groupOffset = groupOffsetDefault;
+    gc.offset = cmd.getOffset();
+    gc.sort = sort;
+    gc.format = defaultFormat;
+    gc.totalCount = defaultTotalCount;
 
-    @Override
-    Collector createNextCollector() throws IOException {
-      if (numGroups == 0) return null;
-
-      int docsToCollect = getMax(groupOffset, docsPerGroup, maxDoc);
-      docsToCollect = Math.max(docsToCollect, 1);
-
-      // if the format is simple, don't skip groups (since we are counting docs, not groups)
-      int collectorOffset = format==Format.Simple ? 0 : offset;
-
-      if (groupBy instanceof StrFieldSource) {
-        collector2 = new Phase2StringGroupCollector(collector, groupBy, context, searcher.weightSort(groupSort), docsToCollect, needScores, collectorOffset);
-      } else {
-        collector2 = new Phase2GroupCollector(collector, groupBy, context, searcher.weightSort(groupSort), docsToCollect, needScores, collectorOffset);
-      }
-      return collector2;
+    if (main) {
+      gc.main = true;
+      gc.format = Grouping.Format.simple;
     }
 
-    @Override
-    void finish() throws IOException {
-      if (main) {
-        mainResult = createSimpleResponse();
-        return;
-      }
-
-      NamedList groupResult = commonResponse();
-
-      if (format == Format.Simple) {
-        groupResult.add("doclist", createSimpleResponse());
-        return;
-      }
-
-      List groupList = new ArrayList();
-      groupResult.add("groups", groupList);        // grouped={ key={ groups=[
-
-      // handle case of rows=0
-      if (numGroups == 0) return;
-
-      if (collector.orderedGroups == null) collector.buildSet();
-
-      int skipCount = offset;
-      for (SearchGroup group : collector.orderedGroups) {
-        if (skipCount > 0) {
-          skipCount--;
-          continue;
-        }
-        NamedList nl = new SimpleOrderedMap();
-        groupList.add(nl);                         // grouped={ key={ groups=[ {
-
-        nl.add("groupValue", group.groupValue.toObject());
-
-        SearchGroupDocs groupDocs = collector2.groupMap.get(group.groupValue);
-        addDocList(nl, groupDocs.collector);
-      }
+    if (gc.format == Grouping.Format.simple) {
+      gc.groupOffset = 0;  // doesn't make sense
     }
 
-    private DocList createSimpleResponse() {
-      int docCount = numGroups;
-      int docOffset = offset;    
-      int docsToGather = getMax(docOffset, docCount, maxDoc);
-
-      float maxScore = Float.NEGATIVE_INFINITY; 
-      List<TopDocs> topDocsList = new ArrayList<TopDocs>();
-      int numDocs = 0;
-      for (SearchGroup group : collector.orderedGroups) {
-        SearchGroupDocs groupDocs = collector2.groupMap.get(group.groupValue);
-        
-        TopDocsCollector collector = groupDocs.collector;
-        int hits = collector.getTotalHits();
-
-        int num = Math.min(docsPerGroup, hits - groupOffset); // how many docs are in this group
-        if (num <= 0) continue;
-
-        TopDocs topDocs = collector.topDocs(groupOffset, Math.min(docsPerGroup,docsToGather-numDocs));
-        topDocsList.add(topDocs);
-        numDocs += topDocs.scoreDocs.length;
-
-        float score = topDocs.getMaxScore();
-        maxScore = Math.max(maxScore, score);
-
-        if (numDocs >= docsToGather) break;
-      }
-      assert numDocs <= docsToGather; // make sure we didn't gather too many
-      
-      int[] ids = new int[numDocs];
-      float[] scores = needScores ? new float[numDocs] : null;
-      int pos = 0;
-
-      for (TopDocs topDocs : topDocsList) {
-        for (ScoreDoc sd : topDocs.scoreDocs) {
-          ids[pos] = sd.doc;
-          if (scores != null) scores[pos] = sd.score;
-          pos++;
-        }
-      }
+    commands.add(gc);
+  }
 
-      DocSlice docs = new DocSlice(docOffset, Math.max(0, ids.length - docOffset), ids, scores, getMatches(), maxScore);
+  public void addQueryCommand(String groupByStr, SolrQueryRequest request) throws ParseException {
+    QParser parser = QParser.getParser(groupByStr, null, request);
+    Query gq = parser.getQuery();
+    Grouping.CommandQuery gc = new CommandQuery();
+    gc.query = gq;
+    gc.groupSort = groupSort;
+    gc.key = groupByStr;
+    gc.numGroups = limitDefault;
+    gc.docsPerGroup = docsPerGroupDefault;
+    gc.groupOffset = groupOffsetDefault;
 
-      if (getDocList) {
-        DocIterator iter = docs.iterator();
-        while (iter.hasNext())
-          idSet.add(iter.nextDoc());
-      }
+    // these two params will only be used if this is for the main result set
+    gc.offset = cmd.getOffset();
+    gc.numGroups = limitDefault;
+    gc.format = defaultFormat;
 
-      return docs;
+    if (main) {
+      gc.main = true;
+      gc.format = Grouping.Format.simple;
     }
-
-    @Override
-    int getMatches() {
-      return collector.getMatches();
+    if (gc.format == Grouping.Format.simple) {
+      gc.docsPerGroup = gc.numGroups;  // doesn't make sense to limit to one
+      gc.groupOffset = gc.offset;
     }
-  }
-
-
 
-  static Sort byScoreDesc = new Sort();
-
-  static boolean compareSorts(Sort sort1, Sort sort2) {
-    return sort1 == sort2 || normalizeSort(sort1).equals(normalizeSort(sort2)); 
+    commands.add(gc);
   }
 
-  /** returns a sort by score desc if null */
-  static Sort normalizeSort(Sort sort) {
-    return sort==null ? byScoreDesc : sort;
-  } 
+  public Grouping setSort(Sort sort) {
+    this.sort = sort;
+    return this;
+  }
 
-  static int getMax(int offset, int len, int max) {
-    int v = len<0 ? max : offset + len;
-    if (v < 0 || v > max) v = max;
-    return v;
+  public Grouping setGroupSort(Sort groupSort) {
+    this.groupSort = groupSort;
+    return this;
   }
 
-  TopDocsCollector newCollector(Sort sort, int numHits, boolean fillFields, boolean needScores) throws IOException {
-    if (sort==null || sort==byScoreDesc) {
-      return TopScoreDocCollector.create(numHits, true);
-    } else {
-      return TopFieldCollector.create(searcher.weightSort(sort), numHits, false, needScores, needScores, true);
-    }
+  public Grouping setLimitDefault(int limitDefault) {
+    this.limitDefault = limitDefault;
+    return this;
   }
 
+  public Grouping setDocsPerGroupDefault(int docsPerGroupDefault) {
+    this.docsPerGroupDefault = docsPerGroupDefault;
+    return this;
+  }
 
-  final SolrIndexSearcher searcher;
-  final SolrIndexSearcher.QueryResult qr;
-  final SolrIndexSearcher.QueryCommand cmd;
-  final List<Command> commands = new ArrayList<Command>();
+  public Grouping setGroupOffsetDefault(int groupOffsetDefault) {
+    this.groupOffsetDefault = groupOffsetDefault;
+    return this;
+  }
 
-  public DocList mainResult;  // output if one of the grouping commands should be used as the main result.
+  public Grouping setDefaultFormat(Format defaultFormat) {
+    this.defaultFormat = defaultFormat;
+    return this;
+  }
 
-  public Grouping(SolrIndexSearcher searcher, SolrIndexSearcher.QueryResult qr, SolrIndexSearcher.QueryCommand cmd) {
-    this.searcher = searcher;
-    this.qr = qr;
-    this.cmd = cmd;
+  public Grouping setDefaultTotalCount(TotalCount defaultTotalCount) {
+    this.defaultTotalCount = defaultTotalCount;
+    return this;
   }
 
-  public void add(Grouping.Command groupingCommand) {
-    commands.add(groupingCommand);
+  public List<Command> getCommands() {
+    return commands;
   }
 
-  int maxDoc;
-  boolean needScores;
-  boolean getDocSet;
-  boolean getDocList; // doclist needed for debugging or highlighting
-  Query query;
-  DocSet filter;
-  Filter luceneFilter;
-  NamedList grouped = new SimpleOrderedMap();
-  Set<Integer> idSet = new LinkedHashSet<Integer>();  // used for tracking unique docs when we need a doclist
-  int maxMatches;  // max number of matches from any grouping command  
-  float maxScore = Float.NEGATIVE_INFINITY;  // max score seen in any doclist
-  
   public void execute() throws IOException {
+    if (commands.isEmpty()) {
+      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Specify at least on field, function or query to group by.");
+    }
+
     DocListAndSet out = new DocListAndSet();
     qr.setDocListAndSet(out);
 
-    filter = cmd.getFilter()!=null ? cmd.getFilter() : searcher.getDocSet(cmd.getFilterList());
+    filter = cmd.getFilter() != null ? cmd.getFilter() : searcher.getDocSet(cmd.getFilterList());
     luceneFilter = filter == null ? null : filter.getTopFilter();
-
     maxDoc = searcher.maxDoc();
 
     needScores = (cmd.getFlags() & SolrIndexSearcher.GET_SCORES) != 0;
+    boolean cacheScores = false;
+    // NOTE: Change this when groupSort can be specified per group
+    if (!needScores && !commands.isEmpty()) {
+      if (commands.get(0).groupSort == null) {
+        cacheScores = true;
+      } else {
+        for (SortField field : commands.get(0).groupSort.getSort()) {
+          if (field.getType() == SortField.SCORE) {
+            cacheScores = true;
+            break;
+          }
+        }
+      }
+    } else if (needScores) {
+      cacheScores = needScores;
+    }
     getDocSet = (cmd.getFlags() & SolrIndexSearcher.GET_DOCSET) != 0;
-    getDocList = (cmd.getFlags() & SolrIndexSearcher.GET_DOCLIST) != 0; // doclist needed for debugging or highlighting
+    getDocList = (cmd.getFlags() & SolrIndexSearcher.GET_DOCLIST) != 0;
     query = QueryUtils.makeQueryable(cmd.getQuery());
 
     for (Command cmd : commands) {
       cmd.prepare();
     }
-    
+
     List<Collector> collectors = new ArrayList<Collector>(commands.size());
     for (Command cmd : commands) {
-      Collector collector = cmd.createCollector();
+      Collector collector = cmd.createFirstPassCollector();
       if (collector != null)
         collectors.add(collector);
     }
@@ -373,11 +305,23 @@ public class Grouping {
     Collector allCollectors = MultiCollector.wrap(collectors.toArray(new Collector[collectors.size()]));
     DocSetCollector setCollector = null;
     if (getDocSet) {
-      setCollector = new DocSetDelegateCollector(maxDoc>>6, maxDoc, allCollectors);
+      setCollector = new DocSetDelegateCollector(maxDoc >> 6, maxDoc, allCollectors);
       allCollectors = setCollector;
     }
 
-    searcher.search(query, luceneFilter, allCollectors);
+    CachingCollector cachedCollector = null;
+    if (cacheSecondPassSearch && allCollectors != null) {
+      int maxDocsToCache = (int) Math.round(maxDoc * (maxDocsPercentageToCache / 100.0d));
+      // Only makes sense to cache if we cache more than zero.
+      // Maybe we should have a minimum and a maximum, that defines the window we would like caching for.
+      if (maxDocsToCache > 0) {
+        allCollectors = cachedCollector = CachingCollector.create(allCollectors, cacheScores, maxDocsToCache);
+      }
+    }
+
+    if (allCollectors != null) {
+      searcher.search(query, luceneFilter, allCollectors);
+    }
 
     if (getDocSet) {
       qr.setDocSet(setCollector.getDocSet());
@@ -385,13 +329,27 @@ public class Grouping {
 
     collectors.clear();
     for (Command cmd : commands) {
-      Collector collector = cmd.createNextCollector();
+      Collector collector = cmd.createSecondPassCollector();
       if (collector != null)
         collectors.add(collector);
     }
 
-    if (collectors.size() > 0) {
-      searcher.search(query, luceneFilter, MultiCollector.wrap(collectors.toArray(new Collector[collectors.size()])));
+    if (!collectors.isEmpty()) {
+      Collector secondPhaseCollectors = MultiCollector.wrap(collectors.toArray(new Collector[collectors.size()]));
+      if (collectors.size() > 0) {
+        if (cachedCollector != null) {
+          if (cachedCollector.isCached()) {
+            cachedCollector.replay(secondPhaseCollectors);
+          } else {
+            signalCacheWarning = true;
+            logger.warn(String.format("The grouping cache is active, but not used because it exceeded the max cache limit of %d percent", maxDocsPercentageToCache));
+            logger.warn("Please increase cache size or disable group caching.");
+            searcher.search(query, luceneFilter, secondPhaseCollectors);
+          }
+        } else {
+          searcher.search(query, luceneFilter, secondPhaseCollectors);
+        }
+      }
     }
 
     for (Command cmd : commands) {
@@ -411,429 +369,705 @@ public class Grouping {
     }
   }
 
-}
-
+  /**
+   * Returns offset + len if len equals zero or higher. Otherwise returns max.
+   *
+   * @param offset The offset
+   * @param len The number of documents to return
+   * @param max The number of document to return if len < 0 or if offset + len < 0
+   * @return offset + len if len equals zero or higher. Otherwise returns max
+   */
+  int getMax(int offset, int len, int max) {
+    int v = len < 0 ? max : offset + len;
+    if (v < 0 || v > max) v = max;
+    return v;
+  }
 
-class SearchGroup {
-  public MutableValue groupValue;
-  int matches;
-  int topDoc;
-  // float topDocScore;  // currently unused
-  int comparatorSlot;
+  /**
+   * Returns whether a cache warning should be send to the client.
+   * The value <code>true</code> is returned when the cache is emptied because the caching limits where met, otherwise
+   * <code>false</code> is returned.
+   *
+   * @return whether a cache warning should be send to the client
+   */
+  public boolean isSignalCacheWarning() {
+    return signalCacheWarning;
+  }
+
+  //======================================   Inner classes =============================================================
+
+  public static enum Format {
+
+    /**
+     * Grouped result. Each group has its own result set.
+     */
+    grouped,
+
+    /**
+     * Flat result. All documents of all groups are put in one list.
+     */
+    simple
+  }
+
+  public static enum TotalCount {
+    /**
+     * Computations should be based on groups.
+     */
+    grouped,
+
+    /**
+     * Computations should be based on plain documents, so not taking grouping into account.
+     */
+    ungrouped
+  }
+
+  /**
+   * General group command. A group command is responsible for creating the first and second pass collectors.
+   * A group command is also responsible for creating the response structure.
+   * <p/>
+   * Note: Maybe the creating the response structure should be done in something like a ReponseBuilder???
+   * Warning NOT thread save!
+   */
+  public abstract class Command<GROUP_VALUE_TYPE> {
 
-  /***
-  @Override
-  public int hashCode() {
-    return super.hashCode();
-  }
+    public String key;       // the name to use for this group in the response
+    public Sort groupSort;   // the sort of the documents *within* a single group.
+    public Sort sort;        // the sort between groups
+    public int docsPerGroup; // how many docs in each group - from "group.limit" param, default=1
+    public int groupOffset;  // the offset within each group (for paging within each group)
+    public int numGroups;    // how many groups - defaults to the "rows" parameter
+    int actualGroupsToFind;  // How many groups should actually be found. Based on groupOffset and numGroups.
+    public int offset;       // offset into the list of groups
+    public Format format;
+    public boolean main;     // use as the main result in simple format (grouped.main=true param)
+    public TotalCount totalCount = TotalCount.ungrouped;
 
-  @Override
-  public boolean equals(Object obj) {
-    return groupValue.equalsSameType(((SearchGroup)obj).groupValue);
-  }
-  ***/
-}
+    TopGroups<GROUP_VALUE_TYPE> result;
 
-abstract class GroupCollector extends Collector {
-  /** get the number of matches before grouping or limiting have been applied */
-  public abstract int getMatches();
-}
 
-class FilterCollector extends GroupCollector {
-  private final DocSet filter;
-  private final Collector collector;
-  private int docBase;
-  private int matches;
+    /**
+     * Prepare this <code>Command</code> for execution.
+     *
+     * @throws IOException If I/O related errors occur
+     */
+    protected abstract void prepare() throws IOException;
+
+    /**
+     * Returns one or more {@link Collector} instances that are needed to perform the first pass search.
+     * If multiple Collectors are returned then these wrapped in a {@link org.apache.lucene.search.MultiCollector}.
+     *
+     * @return one or more {@link Collector} instances that are need to perform the first pass search
+     * @throws IOException If I/O related errors occur
+     */
+    protected abstract Collector createFirstPassCollector() throws IOException;
+
+    /**
+     * Returns zero or more {@link Collector} instances that are needed to perform the second pass search.
+     * In the case when no {@link Collector} instances are created <code>null</code> is returned.
+     * If multiple Collectors are returned then these wrapped in a {@link org.apache.lucene.search.MultiCollector}.
+     *
+     * @return zero or more {@link Collector} instances that are needed to perform the second pass search
+     * @throws IOException If I/O related errors occur
+     */
+    protected Collector createSecondPassCollector() throws IOException {
+      return null;
+    }
 
-  public FilterCollector(DocSet filter, Collector collector) throws IOException {
-    this.filter = filter;
-    this.collector = collector;
-  }
+    /**
+     * Performs any necessary post actions to prepare the response.
+     *
+     * @throws IOException If I/O related errors occur
+     */
+    protected abstract void finish() throws IOException;
+
+    /**
+     * Returns the number of matches for this <code>Command</code>.
+     *
+     * @return the number of matches for this <code>Command</code>
+     */
+    public abstract int getMatches();
+
+    /**
+     * Returns the number of groups found for this <code>Command</code>.
+     * If the command doesn't support counting the groups <code>null</code> is returned.
+     *
+     * @return the number of groups found for this <code>Command</code>
+     */
+    protected Integer getNumberOfGroups() {
+      return null;
+    }
 
-  @Override
-  public void setScorer(Scorer scorer) throws IOException {
-    collector.setScorer(scorer);
-  }
+    protected NamedList commonResponse() {
+      NamedList groupResult = new SimpleOrderedMap();
+      grouped.add(key, groupResult);  // grouped={ key={
 
-  @Override
-  public void collect(int doc) throws IOException {
-    matches++;
-    if (filter.exists(doc + docBase)) {
-      collector.collect(doc);
+      int matches = getMatches();
+      groupResult.add("matches", matches);
+      if (totalCount == TotalCount.grouped) {
+        Integer totalNrOfGroups = getNumberOfGroups();
+        groupResult.add("ngroups", totalNrOfGroups == null ? 0 : totalNrOfGroups);
+      }
+      maxMatches = Math.max(maxMatches, matches);
+      return groupResult;
     }
-  }
 
-  @Override
-  public void setNextReader(AtomicReaderContext context) throws IOException {
-    docBase = context.docBase;
-    collector.setNextReader(context);
-  }
+    protected DocList getDocList(GroupDocs groups) {
+      int max = groups.totalHits;
+      int off = groupOffset;
+      int len = docsPerGroup;
+      if (format == Format.simple) {
+        off = offset;
+        len = numGroups;
+      }
+      int docsToCollect = getMax(off, len, max);
 
-  @Override
-  public boolean acceptsDocsOutOfOrder() {
-    return collector.acceptsDocsOutOfOrder();
-  }
+      // TODO: implement a DocList impl that doesn't need to start at offset=0
+      int docsCollected = Math.min(docsToCollect, groups.scoreDocs.length);
 
-  @Override
-  public int getMatches() {
-    return matches;
-  }
+      int ids[] = new int[docsCollected];
+      float[] scores = needScores ? new float[docsCollected] : null;
+      for (int i = 0; i < ids.length; i++) {
+        ids[i] = groups.scoreDocs[i].doc;
+        if (scores != null)
+          scores[i] = groups.scoreDocs[i].score;
+      }
 
-  Collector getCollector() {
-    return collector;
-  }
-}
+      float score = groups.maxScore;
+      maxScore = Math.max(maxScore, score);
+      DocSlice docs = new DocSlice(off, Math.max(0, ids.length - off), ids, scores, groups.totalHits, score);
 
+      if (getDocList) {
+        DocIterator iter = docs.iterator();
+        while (iter.hasNext())
+          idSet.add(iter.nextDoc());
+      }
+      return docs;
+    }
 
+    protected void addDocList(NamedList rsp, GroupDocs groups) {
+      rsp.add("doclist", getDocList(groups));
+    }
 
+    // Flatten the groups and get up offset + rows documents
+    protected DocList createSimpleResponse() {
+      GroupDocs[] groups = result != null ? result.groups : new GroupDocs[0];
+
+      List<Integer> ids = new ArrayList<Integer>();
+      List<Float> scores = new ArrayList<Float>();
+      int docsToGather = getMax(offset, numGroups, maxDoc);
+      int docsGathered = 0;
+      float maxScore = Float.NEGATIVE_INFINITY;
+
+      outer:
+      for (GroupDocs group : groups) {
+        if (group.maxScore > maxScore) {
+          maxScore = group.maxScore;
+        }
+
+        for (ScoreDoc scoreDoc : group.scoreDocs) {
+          if (docsGathered >= docsToGather) {
+            break outer;
+          }
 
-/** Finds the top set of groups, grouped by groupByVS when sort == group.sort */
-class TopGroupCollector extends GroupCollector {
-  final int nGroups;
-  final HashMap<MutableValue, SearchGroup> groupMap;
-  TreeSet<SearchGroup> orderedGroups;
-  final ValueSource vs;
-  final Map context;
-  final FieldComparator[] comparators;
-  final int[] reversed;
+          ids.add(scoreDoc.doc);
+          scores.add(scoreDoc.score);
+          docsGathered++;
+        }
+      }
 
-  DocValues docValues;
-  DocValues.ValueFiller filler;
-  MutableValue mval;
-  Scorer scorer;
-  int docBase;
-  int spareSlot;
+      int len = Math.min(numGroups, docsGathered);
+      if (offset > len) {
+        len = 0;
+      }
 
-  int matches;
+      int[] docs = ArrayUtils.toPrimitive(ids.toArray(new Integer[ids.size()]));
+      float[] docScores = ArrayUtils.toPrimitive(scores.toArray(new Float[scores.size()]));
+      DocSlice docSlice = new DocSlice(offset, len, docs, docScores, getMatches(), maxScore);
 
-  public TopGroupCollector(ValueSource groupByVS, Map vsContext, Sort weightedSort, int nGroups) throws IOException {
-    this.vs = groupByVS;
-    this.context = vsContext;
-    this.nGroups = nGroups = Math.max(1,nGroups);  // we need a minimum of 1 for this collector
+      if (getDocList) {
+        for (int i = offset; i < docs.length; i++) {
+          idSet.add(docs[i]);
+        }
+      }
 
-    SortField[] sortFields = weightedSort.getSort();
-    this.comparators = new FieldComparator[sortFields.length];
-    this.reversed = new int[sortFields.length];
-    for (int i = 0; i < sortFields.length; i++) {
-      SortField sortField = sortFields[i];
-      reversed[i] = sortField.getReverse() ? -1 : 1;
-      // use nGroups + 1 so we have a spare slot to use for comparing (tracked by this.spareSlot)
-      comparators[i] = sortField.getComparator(nGroups + 1, i);
+      return docSlice;
     }
-    this.spareSlot = nGroups;
 
-    this.groupMap = new HashMap<MutableValue, SearchGroup>(nGroups);
   }
 
-  @Override
-  public void setScorer(Scorer scorer) throws IOException {
-    this.scorer = scorer;
-    for (FieldComparator fc : comparators)
-      fc.setScorer(scorer);
-  }
+  /**
+   * A group command for grouping on a field.
+   */
+  public class CommandField extends Command<BytesRef> {
 
-  @Override
-  public void collect(int doc) throws IOException {
-    matches++;
+    public String groupBy;
+    TermFirstPassGroupingCollector firstPass;
+    TermSecondPassGroupingCollector secondPass;
 
-    // if orderedGroups != null, then we already have collected N groups and
-    // can short circuit by comparing this document to the smallest group
-    // without having to even find what group this document belongs to.
-    // Even if this document belongs to a group in the top N, we know that
-    // we don't have to update that group.
-    //
-    // Downside: if the number of unique groups is very low, this is
-    // wasted effort as we will most likely be updating an existing group.
-    if (orderedGroups != null) {
-      for (int i = 0;; i++) {
-        final int c = reversed[i] * comparators[i].compareBottom(doc);
-        if (c < 0) {
-          // Definitely not competitive. So don't even bother to continue
-          return;
-        } else if (c > 0) {
-          // Definitely competitive.
-          break;
-        } else if (i == comparators.length - 1) {
-          // Here c=0. If we're at the last comparator, this doc is not
-          // competitive, since docs are visited in doc Id order, which means
-          // this doc cannot compete with any other document in the queue.
-          return;
-        }
+    TermAllGroupsCollector allGroupsCollector;
+
+    // If offset falls outside the number of documents a group can provide use this collector instead of secondPass
+    TotalHitCountCollector fallBackCollector;
+    Collection<SearchGroup<BytesRef>> topGroups;
+
+    /**
+     * {@inheritDoc}
+     */
+    protected void prepare() throws IOException {
+      actualGroupsToFind = getMax(offset, numGroups, maxDoc);
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    protected Collector createFirstPassCollector() throws IOException {
+      // Ok we don't want groups, but do want a total count
+      if (actualGroupsToFind <= 0) {
+        fallBackCollector = new TotalHitCountCollector();
+        return fallBackCollector;
       }
+
+      sort = sort == null ? Sort.RELEVANCE : sort;
+      firstPass = new TermFirstPassGroupingCollector(groupBy, sort, actualGroupsToFind);
+      return firstPass;
     }
 
-    filler.fillValue(doc);
-    SearchGroup group = groupMap.get(mval);
-    if (group == null) {
-      int num = groupMap.size();
-      if (groupMap.size() < nGroups) {
-        SearchGroup sg = new SearchGroup();
-        sg.groupValue = mval.duplicate();
-        sg.comparatorSlot = num++;
-        sg.matches = 1;
-        sg.topDoc = docBase + doc;
-        // sg.topDocScore = scorer.score();
-        for (FieldComparator fc : comparators)
-          fc.copy(sg.comparatorSlot, doc);
-        groupMap.put(sg.groupValue, sg);
-        if (groupMap.size() == nGroups) {
-          buildSet();
+    /**
+     * {@inheritDoc}
+     */
+    protected Collector createSecondPassCollector() throws IOException {
+      if (actualGroupsToFind <= 0) {
+        allGroupsCollector = new TermAllGroupsCollector(groupBy);
+        return totalCount == TotalCount.grouped ? allGroupsCollector : null;
+      }
+
+      topGroups = format == Format.grouped ? firstPass.getTopGroups(offset, false) : firstPass.getTopGroups(0, false);
+      if (topGroups == null) {
+        if (totalCount == TotalCount.grouped) {
+          allGroupsCollector = new TermAllGroupsCollector(groupBy);
+          fallBackCollector = new TotalHitCountCollector();
+          return MultiCollector.wrap(allGroupsCollector, fallBackCollector);
+        } else {
+          fallBackCollector = new TotalHitCountCollector();
+          return fallBackCollector;
         }
+      }
+
+      int groupedDocsToCollect = getMax(groupOffset, docsPerGroup, maxDoc);
+      groupedDocsToCollect = Math.max(groupedDocsToCollect, 1);
+      secondPass = new TermSecondPassGroupingCollector(
+          groupBy, topGroups, sort, groupSort, groupedDocsToCollect, needScores, needScores, false
+      );
+
+      if (totalCount == TotalCount.grouped) {
+        allGroupsCollector = new TermAllGroupsCollector(groupBy);
+        return MultiCollector.wrap(secondPass, allGroupsCollector);
+      } else {
+        return secondPass;
+      }
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    protected void finish() throws IOException {
+      result = secondPass != null ? secondPass.getTopGroups(0) : null;
+      if (main) {
+        mainResult = createSimpleResponse();
         return;
       }
 
-      // we already tested that the document is competitive, so replace
-      // the smallest group with this new group.
+      NamedList groupResult = commonResponse();
 
-      // remove current smallest group
-      SearchGroup smallest = orderedGroups.pollLast();
-      assert orderedGroups.size() == nGroups -1;
-
-      groupMap.remove(smallest.groupValue);
-
-      // reuse the removed SearchGroup
-      smallest.groupValue.copy(mval);
-      smallest.matches = 1;
-      smallest.topDoc = docBase + doc;
-      // smallest.topDocScore = scorer.score();
-      for (FieldComparator fc : comparators)
-        fc.copy(smallest.comparatorSlot, doc);
-
-      groupMap.put(smallest.groupValue, smallest);
-      orderedGroups.add(smallest);
-      assert orderedGroups.size() == nGroups;
+      if (format == Format.simple) {
+        groupResult.add("doclist", createSimpleResponse());
+        return;
+      }
 
-      for (FieldComparator fc : comparators)
-        fc.setBottom(orderedGroups.last().comparatorSlot);
+      List groupList = new ArrayList();
+      groupResult.add("groups", groupList);        // grouped={ key={ groups=[
 
-      return;
+      if (result == null) {
+        return;
+      }
+
+      // handle case of rows=0
+      if (numGroups == 0) return;
+
+      for (GroupDocs<BytesRef> group : result.groups) {
+        NamedList nl = new SimpleOrderedMap();
+        groupList.add(nl);                         // grouped={ key={ groups=[ {
+
+
+        // To keep the response format compatable with trunk.
+        // In trunk MutableValue can convert an indexed value to its native type. E.g. string to int
+        // The only option I currently see is the use the FieldType for this
+        if (group.groupValue != null) {
+          SchemaField schemaField = searcher.getSchema().getField(groupBy);
+          FieldType fieldType = schemaField.getType();
+          String readableValue = fieldType.indexedToReadable(group.groupValue.utf8ToString());
+          Fieldable field = schemaField.createField(readableValue, 0.0f);
+          nl.add("groupValue", fieldType.toObject(field));
+        } else {
+          nl.add("groupValue", null);
+        }
+
+        addDocList(nl, group);
+      }
     }
 
-    //
-    // update existing group
-    //
-
-    group.matches++; // TODO: these aren't valid if the group is every discarded then re-added.  keep track if there have been discards?
-
-    for (int i = 0;; i++) {
-      FieldComparator fc = comparators[i];
-      fc.copy(spareSlot, doc);
-
-      final int c = reversed[i] * fc.compare(group.comparatorSlot, spareSlot);
-      if (c < 0) {
-        // Definitely not competitive.
-        return;
-      } else if (c > 0) {
-        // Definitely competitive.
-        // Set remaining comparators
-        for (int j=i+1; j<comparators.length; j++)
-          comparators[j].copy(spareSlot, doc);
-        break;
-      } else if (i == comparators.length - 1) {
-        // Here c=0. If we're at the last comparator, this doc is not
-        // competitive, since docs are visited in doc Id order, which means
-        // this doc cannot compete with any other document in the queue.
-        return;
+    /**
+     * {@inheritDoc}
+     */
+    public int getMatches() {
+      if (result == null && fallBackCollector == null) {
+        return 0;
       }
+
+      return result != null ? result.totalHitCount : fallBackCollector.getTotalHits();
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    protected Integer getNumberOfGroups() {
+      return allGroupsCollector == null ? null : allGroupsCollector.getGroupCount();
+    }
+  }
+
+  /**
+   * A group command for grouping on a query.
+   */
+  //NOTE: doesn't need to be generic. Maybe Command interface --> First / Second pass abstract impl.
+  public class CommandQuery extends Command {
+
+    public Query query;
+    TopDocsCollector topCollector;
+    FilterCollector collector;
+
+    /**
+     * {@inheritDoc}
+     */
+    protected void prepare() throws IOException {
+      actualGroupsToFind = getMax(offset, numGroups, maxDoc);
     }
 
-    // remove before updating the group since lookup is done via comparators
-    // TODO: optimize this
+    /**
+     * {@inheritDoc}
+     */
+    protected Collector createFirstPassCollector() throws IOException {
+      DocSet groupFilt = searcher.getDocSet(query);
+      topCollector = newCollector(groupSort, needScores);
+      collector = new FilterCollector(groupFilt, topCollector);
+      return collector;
+    }
 
-    SearchGroup prevLast = null;
-    if (orderedGroups != null) {
-      prevLast = orderedGroups.last();
-      orderedGroups.remove(group);
-      assert orderedGroups.size() == nGroups-1;
-    }
-
-    group.topDoc = docBase + doc;
-    // group.topDocScore = scorer.score();
-    int tmp = spareSlot; spareSlot = group.comparatorSlot; group.comparatorSlot=tmp;  // swap slots
-
-    // re-add the changed group
-    if (orderedGroups != null) {
-      orderedGroups.add(group);
-      assert orderedGroups.size() == nGroups;
-      SearchGroup newLast = orderedGroups.last();
-      // if we changed the value of the last group, or changed which group was last, then update bottom
-      if (group == newLast || prevLast != newLast) {
-        for (FieldComparator fc : comparators)
-          fc.setBottom(newLast.comparatorSlot);
+    TopDocsCollector newCollector(Sort sort, boolean needScores) throws IOException {
+      int groupDocsToCollect = getMax(groupOffset, docsPerGroup, maxDoc);
+      if (sort == null || sort == Sort.RELEVANCE) {
+        return TopScoreDocCollector.create(groupDocsToCollect, true);
+      } else {
+        return TopFieldCollector.create(searcher.weightSort(sort), groupDocsToCollect, false, needScores, needScores, true);
       }
     }
-  }
 
-  void buildSet() {
-    Comparator<SearchGroup> comparator = new Comparator<SearchGroup>() {
-      public int compare(SearchGroup o1, SearchGroup o2) {
-        for (int i = 0;; i++) {
-          FieldComparator fc = comparators[i];
-          int c = reversed[i] * fc.compare(o1.comparatorSlot, o2.comparatorSlot);
-          if (c != 0) {
-            return c;
-          } else if (i == comparators.length - 1) {
-            return o1.topDoc - o2.topDoc;
-          }
-        }
+    /**
+     * {@inheritDoc}
+     */
+    protected void finish() throws IOException {
+      TopDocsCollector topDocsCollector = (TopDocsCollector) collector.collector;
+      TopDocs topDocs = topDocsCollector.topDocs();
+      GroupDocs<String> groupDocs = new GroupDocs<String>(topDocs.getMaxScore(), topDocs.totalHits, topDocs.scoreDocs, query.toString(), null);
+      if (main) {
+        mainResult = getDocList(groupDocs);
+      } else {
+        NamedList rsp = commonResponse();
+        addDocList(rsp, groupDocs);
       }
-    };
+    }
 
-    orderedGroups = new TreeSet<SearchGroup>(comparator);
-    orderedGroups.addAll(groupMap.values());
-    if (orderedGroups.size() == 0) return;
-    for (FieldComparator fc : comparators)
-      fc.setBottom(orderedGroups.last().comparatorSlot);
+    /**
+     * {@inheritDoc}
+     */
+    public int getMatches() {
+      return collector.matches;
+    }
   }
 
-  @Override
-  public void setNextReader(AtomicReaderContext readerContext) throws IOException {
-    this.docBase = readerContext.docBase;
-    docValues = vs.getValues(context, readerContext);
-    filler = docValues.getValueFiller();
-    mval = filler.getValue();
-    for (int i=0; i<comparators.length; i++)
-      comparators[i] = comparators[i].setNextReader(readerContext);
-  }
+  /**
+   * A command for grouping on a function.
+   */
+  public class CommandFunc extends Command<MutableValue> {
 
-  @Override
-  public boolean acceptsDocsOutOfOrder() {
-    return false;
-  }
+    public ValueSource groupBy;
+    Map context;
 
-  @Override
-  public int getMatches() {
-    return matches;
-  }
-}
+    FunctionFirstPassGroupingCollector firstPass;
+    FunctionSecondPassGroupingCollector secondPass;
+    // If offset falls outside the number of documents a group can provide use this collector instead of secondPass
+    TotalHitCountCollector fallBackCollector;
+    FunctionAllGroupsCollector allGroupsCollector;
+    Collection<SearchGroup<MutableValue>> topGroups;
+
+    /**
+     * {@inheritDoc}
+     */
+    protected void prepare() throws IOException {
+      Map context = ValueSource.newContext(searcher);
+      groupBy.createWeight(context, searcher);
+      actualGroupsToFind = getMax(offset, numGroups, maxDoc);
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    protected Collector createFirstPassCollector() throws IOException {
+      // Ok we don't want groups, but do want a total count
+      if (actualGroupsToFind <= 0) {
+        fallBackCollector = new TotalHitCountCollector();
+        return fallBackCollector;
+      }
+
+      sort = sort == null ? Sort.RELEVANCE : sort;
+      firstPass = new FunctionFirstPassGroupingCollector(groupBy, context, searcher.weightSort(sort), actualGroupsToFind);
+      return firstPass;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    protected Collector createSecondPassCollector() throws IOException {
+      if (actualGroupsToFind <= 0) {
+        allGroupsCollector = new FunctionAllGroupsCollector(groupBy, context);
+        return totalCount == TotalCount.grouped ? allGroupsCollector : null;
+      }
+
+      topGroups = format == Format.grouped ? firstPass.getTopGroups(offset, false) : firstPass.getTopGroups(0, false);
+      if (topGroups == null) {
+        if (totalCount == TotalCount.grouped) {
+          allGroupsCollector = new FunctionAllGroupsCollector(groupBy, context);
+          fallBackCollector = new TotalHitCountCollector();
+          return MultiCollector.wrap(allGroupsCollector, fallBackCollector);
+        } else {
+          fallBackCollector = new TotalHitCountCollector();
+          return fallBackCollector;
+        }
+      }
 
+      int groupdDocsToCollect = getMax(groupOffset, docsPerGroup, maxDoc);
+      groupdDocsToCollect = Math.max(groupdDocsToCollect, 1);
+      secondPass = new FunctionSecondPassGroupingCollector(
+          topGroups, sort, groupSort, groupdDocsToCollect, needScores, needScores, false, groupBy, context
+      );
+
+      if (totalCount == TotalCount.grouped) {
+        allGroupsCollector = new FunctionAllGroupsCollector(groupBy, context);
+        return MultiCollector.wrap(secondPass, allGroupsCollector);
+      } else {
+        return secondPass;
+      }
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    protected void finish() throws IOException {
+      result = secondPass != null ? secondPass.getTopGroups(0) : null;
+      if (main) {
+        mainResult = createSimpleResponse();
+        return;
+      }
 
-class Phase2GroupCollector extends Collector {
-  final HashMap<MutableValue, SearchGroupDocs> groupMap;
-  final ValueSource vs;
-  final Map context;
+      NamedList groupResult = commonResponse();
+
+      if (format == Format.simple) {
+        groupResult.add("doclist", createSimpleResponse());
+        return;
+      }
 
-  DocValues docValues;
-  DocValues.ValueFiller filler;
-  MutableValue mval;
-  Scorer scorer;
-  int docBase;
+      List groupList = new ArrayList();
+      groupResult.add("groups", groupList);        // grouped={ key={ groups=[
 
-  // TODO: may want to decouple from the phase1 collector
-  public Phase2GroupCollector(TopGroupCollector topGroups, ValueSource groupByVS, Map vsContext, Sort weightedSort, int docsPerGroup, boolean getScores, int offset) throws IOException {
-    boolean getSortFields = false;
+      if (result == null) {
+        return;
+      }
 
-    if (topGroups.orderedGroups == null)
-      topGroups.buildSet();
+      // handle case of rows=0
+      if (numGroups == 0) return;
 
-    groupMap = new HashMap<MutableValue, SearchGroupDocs>(topGroups.groupMap.size());
-    for (SearchGroup group : topGroups.orderedGroups) {
-      if (offset > 0) {
-        offset--;
-        continue;
+      for (GroupDocs<MutableValue> group : result.groups) {
+        NamedList nl = new SimpleOrderedMap();
+        groupList.add(nl);                         // grouped={ key={ groups=[ {
+        nl.add("groupValue", group.groupValue.toObject());
+        addDocList(nl, group);
       }
-      SearchGroupDocs groupDocs = new SearchGroupDocs();
-      groupDocs.groupValue = group.groupValue;
-      if (weightedSort==null)
-        groupDocs.collector = TopScoreDocCollector.create(docsPerGroup, true);        
-      else
-        groupDocs.collector = TopFieldCollector.create(weightedSort, docsPerGroup, getSortFields, getScores, getScores, true);
-      groupMap.put(groupDocs.groupValue, groupDocs);
     }
 
-    this.vs = groupByVS;
-    this.context = vsContext;
-  }
+    /**
+     * {@inheritDoc}
+     */
+    public int getMatches() {
+      if (result == null && fallBackCollector == null) {
+        return 0;
+      }
 
-  @Override
-  public void setScorer(Scorer scorer) throws IOException {
-    this.scorer = scorer;
-    for (SearchGroupDocs group : groupMap.values())
-      group.collector.setScorer(scorer);
-  }
+      return result != null ? result.totalHitCount : fallBackCollector.getTotalHits();
+    }
 
-  @Override
-  public void collect(int doc) throws IOException {
-    filler.fillValue(doc);
-    SearchGroupDocs group = groupMap.get(mval);
-    if (group == null) return;
-    group.collector.collect(doc);
-  }
+    /**
+     * {@inheritDoc}
+     */
+    protected Integer getNumberOfGroups() {
+      return allGroupsCollector == null ? null : allGroupsCollector.getGroupCount();
+    }
 
-  @Override
-  public void setNextReader(AtomicReaderContext readerContext) throws IOException {
-    this.docBase = readerContext.docBase;
-    docValues = vs.getValues(context, readerContext);
-    filler = docValues.getValueFiller();
-    mval = filler.getValue();
-    for (SearchGroupDocs group : groupMap.values())
-      group.collector.setNextReader(readerContext);
   }
 
-  @Override
-  public boolean acceptsDocsOutOfOrder() {
-    return false;
-  }
-}
+  /**
+   * A collector that filters incoming doc ids that are not in the filter
+   */
+  static class FilterCollector extends Collector {
+
+    final DocSet filter;
+    final Collector collector;
+    int docBase;
+    int matches;
 
-// TODO: merge with SearchGroup or not?
-// ad: don't need to build a new hashmap
-// disad: blows up the size of SearchGroup if we need many of them, and couples implementations
-class SearchGroupDocs {
-  public MutableValue groupValue;
-  TopDocsCollector collector;
-}
+    public FilterCollector(DocSet filter, Collector collector) throws IOException {
+      this.filter = filter;
+      this.collector = collector;
+    }
 
+    public void setScorer(Scorer scorer) throws IOException {
+      collector.setScorer(scorer);
+    }
 
+    public void collect(int doc) throws IOException {
+      matches++;
+      if (filter.exists(doc + docBase)) {
+        collector.collect(doc);
+      }
+    }
 
-class Phase2StringGroupCollector extends Phase2GroupCollector {
-  FieldCache.DocTermsIndex index;
-  final SentinelIntSet ordSet;
-  final SearchGroupDocs[] groups;
-  final BytesRef spare = new BytesRef();
+    public void setNextReader(AtomicReaderContext context) throws IOException {
+      this.docBase = context.docBase;
+      collector.setNextReader(context);
+    }
 
-  public Phase2StringGroupCollector(TopGroupCollector topGroups, ValueSource groupByVS, Map vsContext, Sort weightedSort, int docsPerGroup, boolean getScores, int offset) throws IOException {
-    super(topGroups, groupByVS, vsContext,weightedSort,docsPerGroup,getScores,offset);
-    ordSet = new SentinelIntSet(groupMap.size(), -1);
-    groups = new SearchGroupDocs[ordSet.keys.length];
+    public boolean acceptsDocsOutOfOrder() {
+      return collector.acceptsDocsOutOfOrder();
+    }
   }
 
-  @Override
-  public void setScorer(Scorer scorer) throws IOException {
-    this.scorer = scorer;
-    for (SearchGroupDocs group : groupMap.values())
-      group.collector.setScorer(scorer);
+  static class FunctionFirstPassGroupingCollector extends AbstractFirstPassGroupingCollector<MutableValue> {
+
+    private final ValueSource groupByVS;
+    private final Map vsContext;
+
+    private DocValues docValues;
+    private DocValues.ValueFiller filler;
+    private MutableValue mval;
+
+    FunctionFirstPassGroupingCollector(ValueSource groupByVS, Map vsContext, Sort groupSort, int topNGroups) throws IOException {
+      super(groupSort, topNGroups);
+      this.groupByVS = groupByVS;
+      this.vsContext = vsContext;
+    }
+
+    @Override
+    protected MutableValue getDocGroupValue(int doc) {
+      filler.fillValue(doc);
+      return mval;
+    }
+
+    @Override
+    protected MutableValue copyDocGroupValue(MutableValue groupValue, MutableValue reuse) {
+      if (reuse != null) {
+        reuse.copy(groupValue);
+        return reuse;
+      }
+      return groupValue.duplicate();
+    }
+
+    @Override
+    public void setNextReader(AtomicReaderContext readerContext) throws IOException {
+      super.setNextReader(readerContext);
+      docValues = groupByVS.getValues(vsContext, readerContext);
+      filler = docValues.getValueFiller();
+      mval = filler.getValue();
+    }
   }
 
-  @Override
-  public void collect(int doc) throws IOException {
-    int slot = ordSet.find(index.getOrd(doc));
-    if (slot >= 0) {
-      groups[slot].collector.collect(doc);
+  static class FunctionSecondPassGroupingCollector extends AbstractSecondPassGroupingCollector<MutableValue> {
+
+    private final ValueSource groupByVS;
+    private final Map vsContext;
+
+    private DocValues docValues;
+    private DocValues.ValueFiller filler;
+    private MutableValue mval;
+
+    FunctionSecondPassGroupingCollector(Collection<SearchGroup<MutableValue>> searchGroups, Sort groupSort, Sort withinGroupSort, int maxDocsPerGroup, boolean getScores, boolean getMaxScores, boolean fillSortFields, ValueSource groupByVS, Map vsContext) throws IOException {
+      super(searchGroups, groupSort, withinGroupSort, maxDocsPerGroup, getScores, getMaxScores, fillSortFields);
+      this.groupByVS = groupByVS;
+      this.vsContext = vsContext;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    protected SearchGroupDocs<MutableValue> retrieveGroup(int doc) throws IOException {
+      filler.fillValue(doc);
+      return groupMap.get(mval);
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    public void setNextReader(AtomicReaderContext readerContext) throws IOException {
+      super.setNextReader(readerContext);
+      docValues = groupByVS.getValues(vsContext, readerContext);
+      filler = docValues.getValueFiller();
+      mval = filler.getValue();
     }
   }
 
-  @Override
-  public void setNextReader(AtomicReaderContext context) throws IOException {
-    super.setNextReader(context);
-    index = ((StringIndexDocValues)docValues).getDocTermsIndex();
 
-    ordSet.clear();
-    for (SearchGroupDocs group : groupMap.values()) {
-      MutableValueStr gv = (MutableValueStr)group.groupValue;
-      int ord = 0;
-      if (gv.exists) {
-        ord = index.binarySearchLookup(((MutableValueStr)group.groupValue).value, spare);
-      }
-      if (ord >= 0) {
-        int slot = ordSet.put(ord);
-        groups[slot] = group;
+  static class FunctionAllGroupsCollector extends AbstractAllGroupsCollector<MutableValue> {
+
+    private final Map vsContext;
+    private final ValueSource groupBy;
+    private final SortedSet<MutableValue> groups = new TreeSet<MutableValue>();
+
+    private DocValues docValues;
+    private DocValues.ValueFiller filler;
+    private MutableValue mval;
+
+    FunctionAllGroupsCollector(ValueSource groupBy, Map vsContext) {
+      this.vsContext = vsContext;
+      this.groupBy = groupBy;
+    }
+
+    public Collection<MutableValue> getGroups() {
+      return groups;
+    }
+
+    public void collect(int doc) throws IOException {
+      filler.fillValue(doc);
+      if (!groups.contains(mval)) {
+        groups.add(mval.duplicate());
       }
     }
-  }
 
-  @Override
-  public boolean acceptsDocsOutOfOrder() {
-    return false;
+    /**
+     * {@inheritDoc}
+     */
+    public void setNextReader(AtomicReaderContext context) throws IOException {
+      docValues = groupBy.getValues(vsContext, context);
+      filler = docValues.getValueFiller();
+      mval = filler.getValue();
+    }
+
   }
+
 }
\ No newline at end of file

Modified: lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/search/function/FieldCacheSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/search/function/FieldCacheSource.java?rev=1137126&r1=1137125&r2=1137126&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/search/function/FieldCacheSource.java (original)
+++ lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/search/function/FieldCacheSource.java Sat Jun 18 05:16:57 2011
@@ -37,6 +37,10 @@ public abstract class FieldCacheSource e
     return cache;
   }
 
+  public String getField() {
+    return field;
+  }
+
   @Override
   public String description() {
     return field;

Modified: lucene/dev/branches/solr2452/solr/src/test/org/apache/solr/TestGroupingSearch.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/solr/src/test/org/apache/solr/TestGroupingSearch.java?rev=1137126&r1=1137125&r2=1137126&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/solr/src/test/org/apache/solr/TestGroupingSearch.java (original)
+++ lucene/dev/branches/solr2452/solr/src/test/org/apache/solr/TestGroupingSearch.java Sat Jun 18 05:16:57 2011
@@ -20,6 +20,7 @@ package org.apache.solr;
 import org.apache.lucene.search.FieldCache;
 import org.apache.noggit.JSONUtil;
 import org.apache.noggit.ObjectBuilder;
+import org.apache.solr.common.params.GroupParams;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.schema.IndexSchema;
 import org.junit.Before;
@@ -89,6 +90,49 @@ public class TestGroupingSearch extends 
   }
 
   @Test
+  public void testGroupingGroupSortingScore_withTotalGroupCount() {
+    assertU(add(doc("id", "1","name", "author1", "title", "a book title", "group_si", "1")));
+    assertU(add(doc("id", "2","name", "author1", "title", "the title", "group_si", "2")));
+    assertU(add(doc("id", "3","name", "author2", "title", "a book title", "group_si", "1")));
+    assertU(add(doc("id", "4","name", "author2", "title", "title", "group_si", "2")));
+    assertU(add(doc("id", "5","name", "author3", "title", "the title of a title", "group_si", "1")));
+    assertU(commit());
+
+    assertQ(req("q","title:title", "group", "true", "group.field","name", "group.ngroups", "true")
+            ,"//lst[@name='grouped']/lst[@name='name']"
+            ,"//lst[@name='grouped']/lst[@name='name']/int[@name='matches'][.='5']"
+            ,"//lst[@name='grouped']/lst[@name='name']/int[@name='ngroups'][.='3']"
+            ,"*[count(//arr[@name='groups']/lst) = 3]"
+
+            ,"//arr[@name='groups']/lst[1]/str[@name='groupValue'][.='author2']"
+            ,"//arr[@name='groups']/lst[1]/result[@numFound='2']"
+            ,"//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='4']"
+
+            ,"//arr[@name='groups']/lst[2]/str[@name='groupValue'][.='author1']"
+            ,"//arr[@name='groups']/lst[2]/result[@numFound='2']"
+            ,"//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='2']"
+
+            ,"//arr[@name='groups']/lst[3]/str[@name='groupValue'][.='author3']"
+            ,"//arr[@name='groups']/lst[3]/result[@numFound='1']"
+            ,"//arr[@name='groups']/lst[3]/result/doc/*[@name='id'][.='5']"
+            );
+
+    assertQ(req("q","title:title", "group", "true", "group.field","group_si", "group.ngroups", "true")
+            ,"//lst[@name='grouped']/lst[@name='group_si']/int[@name='matches'][.='5']"
+            ,"//lst[@name='grouped']/lst[@name='group_si']/int[@name='ngroups'][.='2']"
+            ,"*[count(//arr[@name='groups']/lst) = 2]"
+
+            ,"//arr[@name='groups']/lst[1]/int[@name='groupValue'][.='2']"
+            ,"//arr[@name='groups']/lst[1]/result[@numFound='2']"
+            ,"//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='4']"
+
+            ,"//arr[@name='groups']/lst[2]/int[@name='groupValue'][.='1']"
+            ,"//arr[@name='groups']/lst[2]/result[@numFound='3']"
+            ,"//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='5']"
+            );
+  }
+
+  @Test
   public void testGroupingGroupSortingScore_basicWithGroupSortEqualToSort() {
     assertU(add(doc("id", "1","name", "author1", "title", "a book title")));
     assertU(add(doc("id", "2","name", "author1", "title", "the title")));
@@ -353,7 +397,7 @@ public class TestGroupingSearch extends 
     , "/grouped/foo_i=={'matches':10,'doclist':"
         +"{'numFound':10,'start':1,'docs':[{'id':'10'},{'id':'3'},{'id':'6'}]}}"
     );
-  };
+  }
 
 
 
@@ -476,14 +520,16 @@ public class TestGroupingSearch extends 
         List<Grp> sortedGroups = new ArrayList(groups.values());
         Collections.sort(sortedGroups,  groupComparator==sortComparator ? createFirstDocComparator(sortComparator) : createMaxDocComparator(sortComparator));
 
-        Object modelResponse = buildGroupedResult(h.getCore().getSchema(), sortedGroups, start, rows, group_offset, group_limit);
+        boolean includeNGroups = random.nextBoolean();
+        Object modelResponse = buildGroupedResult(h.getCore().getSchema(), sortedGroups, start, rows, group_offset, group_limit, includeNGroups);
 
+        int randomPercentage = random.nextInt(101);
         // TODO: create a random filter too
-
         SolrQueryRequest req = req("group","true","wt","json","indent","true", "echoParams","all", "q","{!func}score_f", "group.field",groupField
             ,sortStr==null ? "nosort":"sort", sortStr ==null ? "": sortStr
-            ,(groupSortStr==null || groupSortStr==sortStr) ? "nosort":"group.sort", groupSortStr==null ? "": groupSortStr
-            ,"rows",""+rows, "start",""+start, "group.offset",""+group_offset, "group.limit",""+group_limit
+            ,(groupSortStr==null || groupSortStr==sortStr) ? "noGroupsort":"group.sort", groupSortStr==null ? "": groupSortStr
+            ,"rows",""+rows, "start",""+start, "group.offset",""+group_offset, "group.limit",""+group_limit,
+            GroupParams.GROUP_CACHE_PERCENTAGE, Integer.toString(randomPercentage), GroupParams.GROUP_TOTAL_COUNT, includeNGroups ? "true" : "false"
         );
 
         String strResponse = h.query(req);
@@ -508,7 +554,7 @@ public class TestGroupingSearch extends 
 
   }
 
-  public static Object buildGroupedResult(IndexSchema schema, List<Grp> sortedGroups, int start, int rows, int group_offset, int group_limit) {
+  public static Object buildGroupedResult(IndexSchema schema, List<Grp> sortedGroups, int start, int rows, int group_offset, int group_limit, boolean includeNGroups) {
     Map<String,Object> result = new LinkedHashMap<String,Object>();
 
     long matches = 0;
@@ -516,6 +562,9 @@ public class TestGroupingSearch extends 
       matches += grp.docs.size();
     }
     result.put("matches", matches);
+    if (includeNGroups) {
+      result.put("ngroups", sortedGroups.size());
+    }
     List groupList = new ArrayList();
     result.put("groups", groupList);
 



Mime
View raw message