Author: mvg
Date: Fri Jun 17 22:59:36 2011
New Revision: 1137067
URL: http://svn.apache.org/viewvc?rev=1137067&view=rev
Log:
SOLR-2524: Adding grouping to Solr 3x
Added:
lucene/dev/branches/branch_3x/solr/src/common/org/apache/solr/common/params/GroupParams.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/schema/StrFieldSource.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/search/Grouping.java
lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/TestGroupingSearch.java
Modified:
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/CachingCollector.java
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/search/TestCachingCollector.java
lucene/dev/branches/branch_3x/solr/CHANGES.txt
lucene/dev/branches/branch_3x/solr/common-build.xml
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/component/QueryComponent.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/schema/StrField.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java
Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/CachingCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/CachingCollector.java?rev=1137067&r1=1137066&r2=1137067&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/CachingCollector.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/CachingCollector.java Fri Jun 17 22:59:36 2011
@@ -110,7 +110,16 @@ public abstract class CachingCollector e
curScores = new float[128];
cachedScores.add(curScores);
}
-
+
+ ScoreCachingCollector(Collector other, int maxDocsToCache) {
+ super(other, maxDocsToCache);
+
+ cachedScorer = new CachedScorer();
+ cachedScores = new ArrayList<float[]>();
+ curScores = new float[INITIAL_ARRAY_SIZE];
+ cachedScores.add(curScores);
+ }
+
@Override
public void collect(int doc) throws IOException {
@@ -212,6 +221,10 @@ public abstract class CachingCollector e
NoScoreCachingCollector(Collector other, double maxRAMMB) {
super(other, maxRAMMB, false);
}
+
+ NoScoreCachingCollector(Collector other, int maxDocsToCache) {
+ super(other, maxDocsToCache);
+ }
@Override
public void collect(int doc) throws IOException {
@@ -356,6 +369,24 @@ public abstract class CachingCollector e
public static CachingCollector create(Collector other, boolean cacheScores, double maxRAMMB) {
return cacheScores ? new ScoreCachingCollector(other, maxRAMMB) : new NoScoreCachingCollector(other, maxRAMMB);
}
+
+ /**
+ * Create a new {@link CachingCollector} that wraps the given collector and
+ * caches documents and scores up to the specified max docs threshold.
+ *
+ * @param other
+ * the Collector to wrap and delegate calls to.
+ * @param cacheScores
+ * whether to cache scores in addition to document IDs. Note that
+ * this increases the RAM consumed per doc
+ * @param maxDocsToCache
+ * the maximum number of documents for caching the documents and
+ * possible the scores. If the collector exceeds the threshold,
+ * no documents and scores are cached.
+ */
+ public static CachingCollector create(Collector other, boolean cacheScores, int maxDocsToCache) {
+ return cacheScores ? new ScoreCachingCollector(other, maxDocsToCache) : new NoScoreCachingCollector(other, maxDocsToCache);
+ }
// Prevent extension from non-internal classes
private CachingCollector(Collector other, double maxRAMMB, boolean cacheScores) {
@@ -372,6 +403,15 @@ public abstract class CachingCollector e
maxDocsToCache = (int) ((maxRAMMB * 1024 * 1024) / bytesPerDoc);
}
+ private CachingCollector(Collector other, int maxDocsToCache) {
+ this.other = other;
+
+ cachedDocs = new ArrayList<int[]>();
+ curDocs = new int[INITIAL_ARRAY_SIZE];
+ cachedDocs.add(curDocs);
+ this.maxDocsToCache = maxDocsToCache;
+ }
+
@Override
public boolean acceptsDocsOutOfOrder() {
return other.acceptsDocsOutOfOrder();
Modified: lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/search/TestCachingCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/search/TestCachingCollector.java?rev=1137067&r1=1137066&r2=1137067&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/search/TestCachingCollector.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/search/TestCachingCollector.java Fri Jun 17 22:59:36 2011
@@ -17,15 +17,11 @@ package org.apache.lucene.search;
* limitations under the License.
*/
-import java.io.IOException;
-
import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.search.CachingCollector;
-import org.apache.lucene.search.Collector;
-import org.apache.lucene.search.Scorer;
-import org.apache.lucene.search.Weight;
import org.apache.lucene.util.LuceneTestCase;
+import java.io.IOException;
+
public class TestCachingCollector extends LuceneTestCase {
private static final double ONE_BYTE = 1.0 / (1024 * 1024); // 1 byte out of MB
@@ -76,7 +72,7 @@ public class TestCachingCollector extend
public void testBasic() throws Exception {
for (boolean cacheScores : new boolean[] { false, true }) {
- CachingCollector cc = CachingCollector.create(new NoOpCollector(false), cacheScores, 1);
+ CachingCollector cc = CachingCollector.create(new NoOpCollector(false), cacheScores, 1.0);
cc.setScorer(new MockScorer());
// collect 1000 docs
Modified: lucene/dev/branches/branch_3x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/CHANGES.txt?rev=1137067&r1=1137066&r2=1137067&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/solr/CHANGES.txt Fri Jun 17 22:59:36 2011
@@ -110,7 +110,15 @@ New Features
* SOLR-1915: DebugComponent now supports using a NamedList to model
Explanation objects in it's responses instead of
- Explanation.toString (hossman)
+ Explanation.toString (hossman)
+
+* SOLR-2524: (SOLR-236, SOLR-237, SOLR-1773, SOLR-1311) Grouping / Field collapsing
+using the Lucene grouping contrib. The search result can be grouped by field and query.
+(Martijn van Groningen, Emmanuel Keller, Shalin Shekhar Mangar, Koji Sekiguchi,
+ Iv�n de Prado, Ryan McKinley, Marc Sturlese, Peter Karich, Bojan Smid,
+ Charles Hornberger, Dieter Grad, Dmitry Lihachev, Doug Steigerwald,
+ Karsten Sperling, Michael Gundlach, Oleg Gnatovskiy, Thomas Traeger,
+ Harish Agarwal, yonik, Michael McCandless, Bill Bell)
Optimizations
Modified: lucene/dev/branches/branch_3x/solr/common-build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/common-build.xml?rev=1137067&r1=1137066&r2=1137067&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/common-build.xml (original)
+++ lucene/dev/branches/branch_3x/solr/common-build.xml Fri Jun 17 22:59:36 2011
@@ -201,7 +201,8 @@
<pathelement location="${common-solr.dir}/../lucene/build/contrib/queries/classes/java" />
<pathelement location="${common-solr.dir}/../lucene/build/contrib/spatial/classes/java" />
<pathelement location="${common-solr.dir}/../lucene/build/contrib/spellchecker/classes/java" />
- </path>
+ <pathelement location="${common-solr.dir}/../lucene/build/contrib/grouping/classes/java" />
+ </path>
<target name="prep-lucene-jars">
<sequential>
@@ -216,6 +217,7 @@
<fileset dir="../lucene/contrib/queries" includes="build.xml" />
<fileset dir="../lucene/contrib/spatial" includes="build.xml" />
<fileset dir="../lucene/contrib/spellchecker" includes="build.xml" />
+ <fileset dir="../lucene/contrib/grouping" includes="build.xml" />
</subant>
</sequential>
</target>
@@ -247,6 +249,9 @@
<fileset dir="../lucene/build/contrib/spellchecker">
<include name="lucene-spellchecker-${version}.jar" />
</fileset>
+ <fileset dir="../lucene/build/contrib/grouping">
+ <include name="lucene-grouping-${version}.jar" />
+ </fileset>
</copy>
</target>
@@ -260,6 +265,7 @@
<ant antfile="build.xml" target="compile" dir="../lucene/contrib/spatial"/>
<ant antfile="build.xml" target="compile" dir="../lucene/contrib/spellchecker"/>
<ant antfile="build.xml" target="compile" dir="../lucene/contrib/xml-query-parser"/>
+ <ant antfile="build.xml" target="compile" dir="../lucene/contrib/grouping"/>
</target>
Added: lucene/dev/branches/branch_3x/solr/src/common/org/apache/solr/common/params/GroupParams.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/common/org/apache/solr/common/params/GroupParams.java?rev=1137067&view=auto
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/common/org/apache/solr/common/params/GroupParams.java (added)
+++ lucene/dev/branches/branch_3x/solr/src/common/org/apache/solr/common/params/GroupParams.java Fri Jun 17 22:59:36 2011
@@ -0,0 +1,57 @@
+package org.apache.solr.common.params;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Group parameters.
+ */
+public interface GroupParams {
+
+ public static final String GROUP = "group";
+
+ public static final String GROUP_QUERY = GROUP + ".query";
+ public static final String GROUP_FIELD = GROUP + ".field";
+ public static final String GROUP_SORT = GROUP + ".sort";
+
+ /** the limit for the number of documents in each group */
+ public static final String GROUP_LIMIT = GROUP + ".limit";
+ /** the offset for the doclist of each group */
+ public static final String GROUP_OFFSET = GROUP + ".offset";
+
+ /** treat the first group result as the main result. true/false */
+ public static final String GROUP_MAIN = GROUP + ".main";
+
+ /** treat the first group result as the main result. true/false */
+ public static final String GROUP_FORMAT = GROUP + ".format";
+
+ /**
+ * Whether to cache the first pass search (doc ids and score) for the second pass search.
+ * Also defines the maximum size of the group cache relative to maxdoc in a percentage.
+ * Values can be a positive integer, from 0 till 100. A value of 0 will disable the group cache.
+ * The default is 0.*/
+ public static final String GROUP_CACHE_PERCENTAGE = GROUP + ".cache.percent";
+
+ // Note: Since you can supply multiple fields to group on, but only have a facets for the whole result. It only makes
+ // sense to me to support these parameters for the first group.
+ /** Whether the docSet (for example for faceting) should be based on plain documents (a.k.a UNGROUPED) or on the groups (a.k.a GROUPED). */
+ public static final String GROUP_COLLAPSE = GROUP + ".collapse";
+
+ /** Whether the group count should be included in the response. */
+ public static final String GROUP_TOTAL_COUNT = GROUP + ".ngroups";
+
+}
Modified: lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/component/QueryComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/component/QueryComponent.java?rev=1137067&r1=1137066&r2=1137067&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/component/QueryComponent.java (original)
+++ lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/component/QueryComponent.java Fri Jun 17 22:59:36 2011
@@ -27,10 +27,7 @@ import org.apache.lucene.search.SortFiel
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
-import org.apache.solr.common.params.CommonParams;
-import org.apache.solr.common.params.ModifiableSolrParams;
-import org.apache.solr.common.params.ShardParams;
-import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.params.*;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.request.SolrQueryRequest;
@@ -179,6 +176,85 @@ public class QueryComponent extends Sear
SolrIndexSearcher.QueryCommand cmd = rb.getQueryCommand();
cmd.setTimeAllowed(timeAllowed);
SolrIndexSearcher.QueryResult result = new SolrIndexSearcher.QueryResult();
+
+ //
+ // grouping / field collapsing
+ //
+ boolean doGroup = params.getBool(GroupParams.GROUP, false);
+ if (doGroup) {
+ try {
+ int maxDocsPercentageToCache = params.getInt(GroupParams.GROUP_CACHE_PERCENTAGE, 0);
+ boolean cacheSecondPassSearch = maxDocsPercentageToCache >= 1 && maxDocsPercentageToCache <= 100;
+ String[] fields = params.getParams(GroupParams.GROUP_FIELD);
+ String[] queries = params.getParams(GroupParams.GROUP_QUERY);
+ String groupSortStr = params.get(GroupParams.GROUP_SORT);
+ boolean main = params.getBool(GroupParams.GROUP_MAIN, false);
+
+ String formatStr = params.get(GroupParams.GROUP_FORMAT, Grouping.Format.grouped.name());
+ Grouping.Format defaultFormat;
+ try {
+ defaultFormat = Grouping.Format.valueOf(formatStr);
+ } catch (IllegalArgumentException e) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, String.format("Illegal %s parameter", GroupParams.GROUP_FORMAT));
+ }
+
+ boolean includeTotalGroupCount = params.getBool(GroupParams.GROUP_TOTAL_COUNT, false);
+ Grouping.TotalCount defaultTotalCount = includeTotalGroupCount ? Grouping.TotalCount.grouped : Grouping.TotalCount.ungrouped;
+ Sort sort = cmd.getSort();
+ // groupSort defaults to sort
+ Sort groupSort = groupSortStr == null ? cmd.getSort() : QueryParsing.parseSort(groupSortStr, req);
+
+ int limitDefault = cmd.getLen(); // this is normally from "rows"
+ int groupOffsetDefault = params.getInt(GroupParams.GROUP_OFFSET, 0);
+ int docsPerGroupDefault = params.getInt(GroupParams.GROUP_LIMIT, 1);
+
+ Grouping grouping = new Grouping(searcher, result, cmd, cacheSecondPassSearch, maxDocsPercentageToCache, main);
+ grouping.setSort(sort)
+ .setGroupSort(groupSort)
+ .setDefaultFormat(defaultFormat)
+ .setLimitDefault(limitDefault)
+ .setDefaultTotalCount(defaultTotalCount)
+ .setDocsPerGroupDefault(docsPerGroupDefault)
+ .setGroupOffsetDefault(groupOffsetDefault);
+
+ if (fields != null) {
+ for (String field : fields) {
+ grouping.addFieldCommand(field, rb.req);
+ }
+ }
+
+ if (queries != null) {
+ for (String groupByStr : queries) {
+ grouping.addQueryCommand(groupByStr, rb.req);
+ }
+ }
+
+ if (rb.doHighlights || rb.isDebug()) {
+ // we need a single list of the returned docs
+ cmd.setFlags(SolrIndexSearcher.GET_DOCLIST);
+ }
+
+ grouping.execute();
+ rb.setResult( result );
+ if (grouping.isSignalCacheWarning()) {
+ rsp.add(
+ "cacheWarning",
+ String.format("Cache limit of %d percent relative to maxdoc has exceeded. Please increase cache size or disable caching.", maxDocsPercentageToCache)
+ );
+ }
+ rsp.add("grouped", result.groupedResults);
+ if (grouping.mainResult != null) {
+ rsp.add("response", grouping.mainResult);
+ rsp.getToLog().add("hits", grouping.mainResult.matches());
+ } else {
+ rsp.getToLog().add("hits", grouping.getCommands().get(0).getMatches());
+ }
+ return;
+ } catch (ParseException e) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
+ }
+ }
+
searcher.search(result,cmd);
rb.setResult( result );
@@ -309,6 +385,7 @@ public class QueryComponent extends Sear
public void handleResponses(ResponseBuilder rb, ShardRequest sreq) {
if ((sreq.purpose & ShardRequest.PURPOSE_GET_TOP_IDS) != 0) {
mergeIds(rb, sreq);
+ mergeGroupCounts(rb, sreq);
}
if ((sreq.purpose & ShardRequest.PURPOSE_GET_FIELDS) != 0) {
@@ -525,6 +602,9 @@ public class QueryComponent extends Sear
// we already have the field sort values
sreq.params.remove(ResponseBuilder.FIELD_SORT_VALUES);
+
+ // disable grouping
+ sreq.params.remove("group");
// make sure that the id is returned for correlation.
String fl = sreq.params.get(CommonParams.FL);
@@ -578,6 +658,42 @@ public class QueryComponent extends Sear
}
}
+ /**
+ * Merges the collapse responses from the shards into one distributed collapse response.
+ *
+ * @param rb The response builder
+ * @param sreq The shard request
+ */
+ private void mergeGroupCounts(ResponseBuilder rb, ShardRequest sreq) {
+ NamedList combinedGroupCounts = new NamedList<Object>();
+
+ for (ShardResponse srsp : sreq.responses) {
+ //check if the namelist is null or not (if a shard crashed)
+ if (srsp.getSolrResponse().getResponse() == null) {
+ continue;
+ }
+
+ NamedList groupCounts = (NamedList<Object>) srsp.getSolrResponse().getResponse().get("groupCount");
+ /*for (Object o : rb.resultIds.keySet()) {
+ String id = (String) o;
+ }*/
+
+ if (groupCounts != null) {
+ for (int i = 0; i < groupCounts.size(); i++) {
+ String groupGroupId = groupCounts.getName(i);
+ ShardDoc sdoc = rb.resultIds.get(groupGroupId);
+ if (sdoc != null) {
+ combinedGroupCounts.add(groupGroupId, groupCounts.getVal(i));
+ }
+ }
+ }
+ }
+
+ if (combinedGroupCounts.size() > 0) {
+ rb.rsp.add("groupCount", combinedGroupCounts);
+ }
+ }
+
/////////////////////////////////////////////
/// SolrInfoMBean
////////////////////////////////////////////
Modified: lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/schema/StrField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/schema/StrField.java?rev=1137067&r1=1137066&r2=1137067&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/schema/StrField.java (original)
+++ lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/schema/StrField.java Fri Jun 17 22:59:36 2011
@@ -17,19 +17,15 @@
package org.apache.solr.schema;
-import org.apache.lucene.search.SortField;
import org.apache.lucene.document.Fieldable;
-import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.SortField;
import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.response.XMLWriter;
-import org.apache.solr.search.function.ValueSource;
-import org.apache.solr.search.function.FieldCacheSource;
-import org.apache.solr.search.function.DocValues;
-import org.apache.solr.search.function.StringIndexDocValues;
import org.apache.solr.search.QParser;
+import org.apache.solr.search.function.ValueSource;
-import java.util.Map;
import java.io.IOException;
+import java.util.Map;
/**
* @version $Id$
*/
@@ -59,71 +55,4 @@ public class StrField extends FieldType
field.checkFieldCacheSource(parser);
return new StrFieldSource(field.getName());
}
-}
-
-
-class StrFieldSource extends FieldCacheSource {
-
- public StrFieldSource(String field) {
- super(field);
- }
-
- @Override
- public String description() {
- return "str(" + field + ')';
- }
-
- @Override
- public DocValues getValues(Map context, IndexReader reader) throws IOException {
- return new StringIndexDocValues(this, reader, field) {
- @Override
- protected String toTerm(String readableValue) {
- return readableValue;
- }
-
- @Override
- public float floatVal(int doc) {
- return (float)intVal(doc);
- }
-
- @Override
- public int intVal(int doc) {
- int ord=order[doc];
- return ord;
- }
-
- @Override
- public long longVal(int doc) {
- return (long)intVal(doc);
- }
-
- @Override
- public double doubleVal(int doc) {
- return (double)intVal(doc);
- }
-
- @Override
- public String strVal(int doc) {
- int ord=order[doc];
- return lookup[ord];
- }
-
- @Override
- public String toString(int doc) {
- return description() + '=' + strVal(doc);
- }
- };
- }
-
- @Override
- public boolean equals(Object o) {
- return o instanceof StrFieldSource
- && super.equals(o);
- }
-
- private static int hcode = SortableFloatFieldSource.class.hashCode();
- @Override
- public int hashCode() {
- return hcode + super.hashCode();
- };
}
\ No newline at end of file
Added: lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/schema/StrFieldSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/schema/StrFieldSource.java?rev=1137067&view=auto
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/schema/StrFieldSource.java (added)
+++ lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/schema/StrFieldSource.java Fri Jun 17 22:59:36 2011
@@ -0,0 +1,92 @@
+package org.apache.solr.schema;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.solr.search.function.DocValues;
+import org.apache.solr.search.function.FieldCacheSource;
+import org.apache.solr.search.function.StringIndexDocValues;
+
+import java.io.IOException;
+import java.util.Map;
+
+public class StrFieldSource extends FieldCacheSource {
+
+ public StrFieldSource(String field) {
+ super(field);
+ }
+
+ @Override
+ public String description() {
+ return "str(" + field + ')';
+ }
+
+ @Override
+ public DocValues getValues(Map context, IndexReader reader) throws IOException {
+ return new StringIndexDocValues(this, reader, field) {
+ @Override
+ protected String toTerm(String readableValue) {
+ return readableValue;
+ }
+
+ @Override
+ public float floatVal(int doc) {
+ return (float)intVal(doc);
+ }
+
+ @Override
+ public int intVal(int doc) {
+ int ord=order[doc];
+ return ord;
+ }
+
+ @Override
+ public long longVal(int doc) {
+ return (long)intVal(doc);
+ }
+
+ @Override
+ public double doubleVal(int doc) {
+ return (double)intVal(doc);
+ }
+
+ @Override
+ public String strVal(int doc) {
+ int ord=order[doc];
+ return lookup[ord];
+ }
+
+ @Override
+ public String toString(int doc) {
+ return description() + '=' + strVal(doc);
+ }
+ };
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ return o instanceof StrFieldSource
+ && super.equals(o);
+ }
+
+ private static int hcode = SortableFloatFieldSource.class.hashCode();
+ @Override
+ public int hashCode() {
+ return hcode + super.hashCode();
+ };
+}
Added: lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/search/Grouping.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/search/Grouping.java?rev=1137067&view=auto
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/search/Grouping.java (added)
+++ lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/search/Grouping.java Fri Jun 17 22:59:36 2011
@@ -0,0 +1,789 @@
+package org.apache.solr.search;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.commons.lang.ArrayUtils;
+import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.search.*;
+import org.apache.lucene.search.grouping.*;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.util.SimpleOrderedMap;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.schema.FieldType;
+import org.apache.solr.schema.SchemaField;
+import org.apache.solr.schema.StrFieldSource;
+import org.apache.solr.search.function.OrdFieldSource;
+import org.apache.solr.search.function.ReverseOrdFieldSource;
+import org.apache.solr.search.function.ValueSource;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.*;
+
+/**
+ * Basic Solr Grouping infrastructure.
+ * Warning NOT thread save!
+ *
+ * @lucene.experimental
+ */
+public class Grouping {
+
+ private final static Logger logger = LoggerFactory.getLogger(Grouping.class);
+
+ private final SolrIndexSearcher searcher;
+ private final SolrIndexSearcher.QueryResult qr;
+ private final SolrIndexSearcher.QueryCommand cmd;
+ private final List<Command> commands = new ArrayList<Command>();
+ private final boolean main;
+ private final boolean cacheSecondPassSearch;
+ private final int maxDocsPercentageToCache;
+
+ private Sort sort;
+ private Sort groupSort;
+ private int limitDefault;
+ private int docsPerGroupDefault;
+ private int groupOffsetDefault;
+ private Format defaultFormat;
+ private TotalCount defaultTotalCount;
+
+ private int maxDoc;
+ private boolean needScores;
+ private boolean getDocSet;
+ private boolean getDocList; // doclist needed for debugging or highlighting
+ private Query query;
+ private DocSet filter;
+ private Filter luceneFilter;
+ private NamedList grouped = new SimpleOrderedMap();
+ private Set<Integer> idSet = new LinkedHashSet<Integer>(); // used for tracking unique docs when we need a doclist
+ private int maxMatches; // max number of matches from any grouping command
+ private float maxScore = Float.NEGATIVE_INFINITY; // max score seen in any doclist
+ private boolean signalCacheWarning = false;
+
+
+ public DocList mainResult; // output if one of the grouping commands should be used as the main result.
+
+ /**
+ * @param searcher
+ * @param qr
+ * @param cmd
+ * @param cacheSecondPassSearch Whether to cache the documents and scores from the first pass search for the second
+ * pass search.
+ * @param maxDocsPercentageToCache The maximum number of documents in a percentage relative from maxdoc
+ * that is allowed in the cache. When this threshold is met,
+ * the cache is not used in the second pass search.
+ */
+ public Grouping(SolrIndexSearcher searcher,
+ SolrIndexSearcher.QueryResult qr,
+ SolrIndexSearcher.QueryCommand cmd,
+ boolean cacheSecondPassSearch,
+ int maxDocsPercentageToCache,
+ boolean main) {
+ this.searcher = searcher;
+ this.qr = qr;
+ this.cmd = cmd;
+ this.cacheSecondPassSearch = cacheSecondPassSearch;
+ this.maxDocsPercentageToCache = maxDocsPercentageToCache;
+ this.main = main;
+ }
+
+ public void add(Grouping.Command groupingCommand) {
+ commands.add(groupingCommand);
+ }
+
+ /**
+ * Adds a field command based on the specified field.
+ *
+ * @param field The fieldname to group by.
+ */
+ public void addFieldCommand(String field, SolrQueryRequest request) throws ParseException {
+ SchemaField schemaField = searcher.getSchema().getField(field); // Throws an exception when field doesn't exist. Bad request.
+ FieldType fieldType = schemaField.getType();
+ ValueSource valueSource = fieldType.getValueSource(schemaField, null);
+ if (!(valueSource instanceof OrdFieldSource
+ || valueSource instanceof ReverseOrdFieldSource
+ || valueSource instanceof StrFieldSource)) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Cannot group on non string like field.");
+ }
+
+ Grouping.CommandField gc = new CommandField();
+ gc.groupSort = groupSort;
+ gc.groupBy = field;
+ gc.key = field;
+ gc.numGroups = limitDefault;
+ gc.docsPerGroup = docsPerGroupDefault;
+ gc.groupOffset = groupOffsetDefault;
+ gc.offset = cmd.getOffset();
+ gc.sort = sort;
+ gc.format = defaultFormat;
+ gc.totalCount = defaultTotalCount;
+
+ if (main) {
+ gc.main = true;
+ gc.format = Grouping.Format.simple;
+ }
+
+ if (gc.format == Grouping.Format.simple) {
+ gc.groupOffset = 0; // doesn't make sense
+ }
+ commands.add(gc);
+ }
+
+ public void addQueryCommand(String groupByStr, SolrQueryRequest request) throws ParseException {
+ QParser parser = QParser.getParser(groupByStr, null, request);
+ Query gq = parser.getQuery();
+ Grouping.CommandQuery gc = new CommandQuery();
+ gc.query = gq;
+ gc.groupSort = groupSort;
+ gc.key = groupByStr;
+ gc.numGroups = limitDefault;
+ gc.docsPerGroup = docsPerGroupDefault;
+ gc.groupOffset = groupOffsetDefault;
+
+ // these two params will only be used if this is for the main result set
+ gc.offset = cmd.getOffset();
+ gc.numGroups = limitDefault;
+ gc.format = defaultFormat;
+
+ if (main) {
+ gc.main = true;
+ gc.format = Grouping.Format.simple;
+ }
+ if (gc.format == Grouping.Format.simple) {
+ gc.docsPerGroup = gc.numGroups; // doesn't make sense to limit to one
+ gc.groupOffset = gc.offset;
+ }
+
+ commands.add(gc);
+ }
+
+ public Grouping setSort(Sort sort) {
+ this.sort = sort;
+ return this;
+ }
+
+ public Grouping setGroupSort(Sort groupSort) {
+ this.groupSort = groupSort;
+ return this;
+ }
+
+ public Grouping setLimitDefault(int limitDefault) {
+ this.limitDefault = limitDefault;
+ return this;
+ }
+
+ public Grouping setDocsPerGroupDefault(int docsPerGroupDefault) {
+ this.docsPerGroupDefault = docsPerGroupDefault;
+ return this;
+ }
+
+ public Grouping setGroupOffsetDefault(int groupOffsetDefault) {
+ this.groupOffsetDefault = groupOffsetDefault;
+ return this;
+ }
+
+ public Grouping setDefaultFormat(Format defaultFormat) {
+ this.defaultFormat = defaultFormat;
+ return this;
+ }
+
+ public Grouping setDefaultTotalCount(TotalCount defaultTotalCount) {
+ this.defaultTotalCount = defaultTotalCount;
+ return this;
+ }
+
+ public List<Command> getCommands() {
+ return commands;
+ }
+
+ public void execute() throws IOException {
+ if (commands.isEmpty()) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Specify at least on field, function or query to group by.");
+ }
+
+ DocListAndSet out = new DocListAndSet();
+ qr.setDocListAndSet(out);
+
+ filter = cmd.getFilter() != null ? cmd.getFilter() : searcher.getDocSet(cmd.getFilterList());
+ luceneFilter = filter == null ? null : filter.getTopFilter();
+ maxDoc = searcher.maxDoc();
+
+ needScores = (cmd.getFlags() & SolrIndexSearcher.GET_SCORES) != 0;
+ boolean cacheScores = false;
+ // NOTE: Change this when groupSort can be specified per group
+ if (cacheSecondPassSearch && !needScores && !commands.isEmpty()) {
+ if (commands.get(0).groupSort == null) {
+ cacheScores = true;
+ } else {
+ for (SortField field : commands.get(0).groupSort.getSort()) {
+ if (field.getType() == SortField.SCORE) {
+ cacheScores = true;
+ break;
+ }
+ }
+ }
+ } else if (needScores) {
+ cacheScores = needScores;
+ }
+ getDocSet = (cmd.getFlags() & SolrIndexSearcher.GET_DOCSET) != 0;
+ getDocList = (cmd.getFlags() & SolrIndexSearcher.GET_DOCLIST) != 0;
+ query = QueryUtils.makeQueryable(cmd.getQuery());
+
+ for (Command cmd : commands) {
+ cmd.prepare();
+ }
+
+ List<Collector> collectors = new ArrayList<Collector>(commands.size());
+ for (Command cmd : commands) {
+ Collector collector = cmd.createFirstPassCollector();
+ if (collector != null)
+ collectors.add(collector);
+ }
+
+ Collector allCollectors = MultiCollector.wrap(collectors.toArray(new Collector[collectors.size()]));
+ DocSetCollector setCollector = null;
+ if (getDocSet) {
+ setCollector = new DocSetDelegateCollector(maxDoc >> 6, maxDoc, allCollectors);
+ allCollectors = setCollector;
+ }
+
+ CachingCollector cachedCollector = null;
+ if (cacheSecondPassSearch && allCollectors != null) {
+ int maxDocsToCache = (int) Math.round(maxDoc * (maxDocsPercentageToCache / 100.0d));
+ // Only makes sense to cache if we cache more than zero.
+ // Maybe we should have a minimum and a maximum, that defines the window we would like caching for.
+ if (maxDocsToCache > 0) {
+ allCollectors = cachedCollector = CachingCollector.create(allCollectors, cacheScores, maxDocsToCache);
+ }
+ }
+
+ if (allCollectors != null) {
+ searcher.search(query, luceneFilter, allCollectors);
+ }
+
+ if (getDocSet) {
+ qr.setDocSet(setCollector.getDocSet());
+ }
+
+ collectors.clear();
+ for (Command cmd : commands) {
+ Collector collector = cmd.createSecondPassCollector();
+ if (collector != null)
+ collectors.add(collector);
+ }
+
+ if (!collectors.isEmpty()) {
+ Collector secondPhaseCollectors = MultiCollector.wrap(collectors.toArray(new Collector[collectors.size()]));
+ if (collectors.size() > 0) {
+ if (cachedCollector != null) {
+ if (cachedCollector.isCached()) {
+ cachedCollector.replay(secondPhaseCollectors);
+ } else {
+ signalCacheWarning = true;
+ logger.warn(String.format("The grouping cache is active, but not used because it exceeded the max cache limit of %d percent", maxDocsPercentageToCache));
+ logger.warn("Please increase cache size or disable group caching.");
+ searcher.search(query, luceneFilter, secondPhaseCollectors);
+ }
+ } else {
+ searcher.search(query, luceneFilter, secondPhaseCollectors);
+ }
+ }
+ }
+
+ for (Command cmd : commands) {
+ cmd.finish();
+ }
+
+ qr.groupedResults = grouped;
+
+ if (getDocList) {
+ int sz = idSet.size();
+ int[] ids = new int[sz];
+ int idx = 0;
+ for (int val : idSet) {
+ ids[idx++] = val;
+ }
+ qr.setDocList(new DocSlice(0, sz, ids, null, maxMatches, maxScore));
+ }
+ }
+
+ /**
+ * Returns offset + len if len equals zero or higher. Otherwise returns max.
+ *
+ * @param offset The offset
+ * @param len The number of documents to return
+ * @param max The number of document to return if len < 0 or if offset + len < 0
+ * @return offset + len if len equals zero or higher. Otherwise returns max
+ */
+ int getMax(int offset, int len, int max) {
+ int v = len < 0 ? max : offset + len;
+ if (v < 0 || v > max) v = max;
+ return v;
+ }
+
+ /**
+ * Returns whether a cache warning should be send to the client.
+ * The value <code>true</code> is returned when the cache is emptied because the caching limits where met, otherwise
+ * <code>false</code> is returned.
+ *
+ * @return whether a cache warning should be send to the client
+ */
+ public boolean isSignalCacheWarning() {
+ return signalCacheWarning;
+ }
+
+ //====================================== Inner classes =============================================================
+
+ public static enum Format {
+
+ /**
+ * Grouped result. Each group has its own result set.
+ */
+ grouped,
+
+ /**
+ * Flat result. All documents of all groups are put in one list.
+ */
+ simple
+ }
+
+ public static enum TotalCount {
+ /**
+ * Computations should be based on groups.
+ */
+ grouped,
+
+ /**
+ * Computations should be based on plain documents, so not taking grouping into account.
+ */
+ ungrouped
+ }
+
+ /**
+ * General group command. A group command is responsible for creating the first and second pass collectors.
+ * A group command is also responsible for creating the response structure.
+ * <p/>
+ * Note: Maybe the creating the response structure should be done in something like a ReponseBuilder???
+ * Warning NOT thread save!
+ */
+ public abstract class Command<GROUP_VALUE_TYPE> {
+
+ public String key; // the name to use for this group in the response
+ public Sort groupSort; // the sort of the documents *within* a single group.
+ public Sort sort; // the sort between groups
+ public int docsPerGroup; // how many docs in each group - from "group.limit" param, default=1
+ public int groupOffset; // the offset within each group (for paging within each group)
+ public int numGroups; // how many groups - defaults to the "rows" parameter
+ int actualGroupsToFind; // How many groups should actually be found. Based on groupOffset and numGroups.
+ public int offset; // offset into the list of groups
+ public Format format;
+ public boolean main; // use as the main result in simple format (grouped.main=true param)
+ public TotalCount totalCount = TotalCount.ungrouped;
+
+ TopGroups<GROUP_VALUE_TYPE> result;
+
+
+ /**
+ * Prepare this <code>Command</code> for execution.
+ *
+ * @throws IOException If I/O related errors occur
+ */
+ protected abstract void prepare() throws IOException;
+
+ /**
+ * Returns one or more {@link Collector} instances that are needed to perform the first pass search.
+ * If multiple Collectors are returned then these wrapped in a {@link org.apache.lucene.search.MultiCollector}.
+ *
+ * @return one or more {@link Collector} instances that are need to perform the first pass search
+ * @throws IOException If I/O related errors occur
+ */
+ protected abstract Collector createFirstPassCollector() throws IOException;
+
+ /**
+ * Returns zero or more {@link Collector} instances that are needed to perform the second pass search.
+ * In the case when no {@link Collector} instances are created <code>null</code> is returned.
+ * If multiple Collectors are returned then these wrapped in a {@link org.apache.lucene.search.MultiCollector}.
+ *
+ * @return zero or more {@link Collector} instances that are needed to perform the second pass search
+ * @throws IOException If I/O related errors occur
+ */
+ protected Collector createSecondPassCollector() throws IOException {
+ return null;
+ }
+
+ /**
+ * Performs any necessary post actions to prepare the response.
+ *
+ * @throws IOException If I/O related errors occur
+ */
+ protected abstract void finish() throws IOException;
+
+ /**
+ * Returns the number of matches for this <code>Command</code>.
+ *
+ * @return the number of matches for this <code>Command</code>
+ */
+ public abstract int getMatches();
+
+ /**
+ * Returns the number of groups found for this <code>Command</code>.
+ * If the command doesn't support counting the groups <code>null</code> is returned.
+ *
+ * @return the number of groups found for this <code>Command</code>
+ */
+ protected Integer getNumberOfGroups() {
+ return null;
+ }
+
+ protected NamedList commonResponse() {
+ NamedList groupResult = new SimpleOrderedMap();
+ grouped.add(key, groupResult); // grouped={ key={
+
+ int matches = getMatches();
+ groupResult.add("matches", matches);
+ if (totalCount == TotalCount.grouped) {
+ Integer totalNrOfGroups = getNumberOfGroups();
+ groupResult.add("ngroups", totalNrOfGroups == null ? 0 : totalNrOfGroups);
+ }
+ maxMatches = Math.max(maxMatches, matches);
+ return groupResult;
+ }
+
+ protected DocList getDocList(GroupDocs groups) {
+ int max = groups.totalHits;
+ int off = groupOffset;
+ int len = docsPerGroup;
+ if (format == Format.simple) {
+ off = offset;
+ len = numGroups;
+ }
+ int docsToCollect = getMax(off, len, max);
+
+ // TODO: implement a DocList impl that doesn't need to start at offset=0
+ int docsCollected = Math.min(docsToCollect, groups.scoreDocs.length);
+
+ int ids[] = new int[docsCollected];
+ float[] scores = needScores ? new float[docsCollected] : null;
+ for (int i = 0; i < ids.length; i++) {
+ ids[i] = groups.scoreDocs[i].doc;
+ if (scores != null)
+ scores[i] = groups.scoreDocs[i].score;
+ }
+
+ float score = groups.maxScore;
+ maxScore = Math.max(maxScore, score);
+ DocSlice docs = new DocSlice(off, Math.max(0, ids.length - off), ids, scores, groups.totalHits, score);
+
+ if (getDocList) {
+ DocIterator iter = docs.iterator();
+ while (iter.hasNext())
+ idSet.add(iter.nextDoc());
+ }
+ return docs;
+ }
+
+ protected void addDocList(NamedList rsp, GroupDocs groups) {
+ rsp.add("doclist", getDocList(groups));
+ }
+
+ // Flatten the groups and get up offset + rows documents
+ protected DocList createSimpleResponse() {
+ GroupDocs[] groups = result != null ? result.groups : new GroupDocs[0];
+
+ List<Integer> ids = new ArrayList<Integer>();
+ List<Float> scores = new ArrayList<Float>();
+ int docsToGather = getMax(offset, numGroups, maxDoc);
+ int docsGathered = 0;
+ float maxScore = Float.NEGATIVE_INFINITY;
+
+ outer:
+ for (GroupDocs group : groups) {
+ if (group.maxScore > maxScore) {
+ maxScore = group.maxScore;
+ }
+
+ for (ScoreDoc scoreDoc : group.scoreDocs) {
+ if (docsGathered >= docsToGather) {
+ break outer;
+ }
+
+ ids.add(scoreDoc.doc);
+ scores.add(scoreDoc.score);
+ docsGathered++;
+ }
+ }
+
+ int len = Math.min(numGroups, docsGathered);
+ if (offset > len) {
+ len = 0;
+ }
+
+ int[] docs = ArrayUtils.toPrimitive(ids.toArray(new Integer[ids.size()]));
+ float[] docScores = ArrayUtils.toPrimitive(scores.toArray(new Float[scores.size()]));
+ DocSlice docSlice = new DocSlice(offset, len, docs, docScores, getMatches(), maxScore);
+
+ if (getDocList) {
+ for (int i = offset; i < docs.length; i++) {
+ idSet.add(docs[i]);
+ }
+ }
+
+ return docSlice;
+ }
+
+ }
+
+ /**
+ * A group command for grouping on a field.
+ */
+ public class CommandField extends Command<String> {
+
+ public String groupBy;
+ TermFirstPassGroupingCollector firstPass;
+ TermSecondPassGroupingCollector secondPass;
+
+ TermAllGroupsCollector allGroupsCollector;
+
+ // If offset falls outside the number of documents a group can provide use this collector instead of secondPass
+ TotalHitCountCollector fallBackCollector;
+ Collection<SearchGroup<String>> topGroups;
+
+ /**
+ * {@inheritDoc}
+ */
+ protected void prepare() throws IOException {
+ actualGroupsToFind = getMax(offset, numGroups, maxDoc);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ protected Collector createFirstPassCollector() throws IOException {
+ // Ok we don't want groups, but do want a total count
+ if (actualGroupsToFind <= 0) {
+ fallBackCollector = new TotalHitCountCollector();
+ return fallBackCollector;
+ }
+
+ sort = sort == null ? Sort.RELEVANCE : sort;
+ firstPass = new TermFirstPassGroupingCollector(groupBy, sort, actualGroupsToFind);
+ return firstPass;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ protected Collector createSecondPassCollector() throws IOException {
+ if (actualGroupsToFind <= 0) {
+ allGroupsCollector = new TermAllGroupsCollector(groupBy);
+ return totalCount == TotalCount.grouped ? allGroupsCollector : null;
+ }
+
+ topGroups = format == Format.grouped ? firstPass.getTopGroups(offset, false) : firstPass.getTopGroups(0, false);
+ if (topGroups == null) {
+ if (totalCount == TotalCount.grouped) {
+ allGroupsCollector = new TermAllGroupsCollector(groupBy);
+ fallBackCollector = new TotalHitCountCollector();
+ return MultiCollector.wrap(allGroupsCollector, fallBackCollector);
+ } else {
+ fallBackCollector = new TotalHitCountCollector();
+ return fallBackCollector;
+ }
+ }
+
+ int groupedDocsToCollect = getMax(groupOffset, docsPerGroup, maxDoc);
+ groupedDocsToCollect = Math.max(groupedDocsToCollect, 1);
+ secondPass = new TermSecondPassGroupingCollector(
+ groupBy, topGroups, sort, groupSort, groupedDocsToCollect, needScores, needScores, false
+ );
+
+ if (totalCount == TotalCount.grouped) {
+ allGroupsCollector = new TermAllGroupsCollector(groupBy);
+ return MultiCollector.wrap(secondPass, allGroupsCollector);
+ } else {
+ return secondPass;
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ protected void finish() throws IOException {
+ result = secondPass != null ? secondPass.getTopGroups(0) : null;
+ if (main) {
+ mainResult = createSimpleResponse();
+ return;
+ }
+
+ NamedList groupResult = commonResponse();
+
+ if (format == Format.simple) {
+ groupResult.add("doclist", createSimpleResponse());
+ return;
+ }
+
+ List groupList = new ArrayList();
+ groupResult.add("groups", groupList); // grouped={ key={ groups=[
+
+ if (result == null) {
+ return;
+ }
+
+ // handle case of rows=0
+ if (numGroups == 0) return;
+
+ for (GroupDocs<String> group : result.groups) {
+ NamedList nl = new SimpleOrderedMap();
+ groupList.add(nl); // grouped={ key={ groups=[ {
+
+
+ // To keep the response format compatable with trunk.
+ // In trunk MutableValue can convert an indexed value to its native type. E.g. string to int
+ // The only option I currently see is the use the FieldType for this
+ if (group.groupValue != null) {
+ SchemaField schemaField = searcher.getSchema().getField(groupBy);
+ FieldType fieldType = schemaField.getType();
+ String readableValue = fieldType.indexedToReadable(group.groupValue);
+ Fieldable field = schemaField.createField(readableValue, 0.0f);
+ nl.add("groupValue", fieldType.toObject(field));
+ } else {
+ nl.add("groupValue", null);
+ }
+
+ addDocList(nl, group);
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public int getMatches() {
+ if (result == null && fallBackCollector == null) {
+ return 0;
+ }
+
+ return result != null ? result.totalHitCount : fallBackCollector.getTotalHits();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ protected Integer getNumberOfGroups() {
+ return allGroupsCollector == null ? null : allGroupsCollector.getGroupCount();
+ }
+ }
+
+ /**
+ * A group command for grouping on a query.
+ */
+ //NOTE: doesn't need to be generic. Maybe Command interface --> First / Second pass abstract impl.
+ public class CommandQuery extends Command {
+
+ public Query query;
+ TopDocsCollector topCollector;
+ FilterCollector collector;
+
+ /**
+ * {@inheritDoc}
+ */
+ protected void prepare() throws IOException {
+ actualGroupsToFind = getMax(offset, numGroups, maxDoc);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ protected Collector createFirstPassCollector() throws IOException {
+ DocSet groupFilt = searcher.getDocSet(query);
+ topCollector = newCollector(groupSort, needScores);
+ collector = new FilterCollector(groupFilt, topCollector);
+ return collector;
+ }
+
+ TopDocsCollector newCollector(Sort sort, boolean needScores) throws IOException {
+ int groupDocsToCollect = getMax(groupOffset, docsPerGroup, maxDoc);
+ if (sort == null || sort == Sort.RELEVANCE) {
+ return TopScoreDocCollector.create(groupDocsToCollect, true);
+ } else {
+ return TopFieldCollector.create(searcher.weightSort(sort), groupDocsToCollect, false, needScores, needScores, true);
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ protected void finish() throws IOException {
+ TopDocsCollector topDocsCollector = (TopDocsCollector) collector.collector;
+ TopDocs topDocs = topDocsCollector.topDocs();
+ GroupDocs<String> groupDocs = new GroupDocs<String>(topDocs.getMaxScore(), topDocs.totalHits, topDocs.scoreDocs, query.toString(), null);
+ if (main) {
+ mainResult = getDocList(groupDocs);
+ } else {
+ NamedList rsp = commonResponse();
+ addDocList(rsp, groupDocs);
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public int getMatches() {
+ return collector.matches;
+ }
+ }
+
+ /**
+ * A collector that filters incoming doc ids that are not in the filter
+ */
+ static class FilterCollector extends Collector {
+
+ final DocSet filter;
+ final Collector collector;
+ int docBase;
+ int matches;
+
+ public FilterCollector(DocSet filter, Collector collector) throws IOException {
+ this.filter = filter;
+ this.collector = collector;
+ }
+
+ public void setScorer(Scorer scorer) throws IOException {
+ collector.setScorer(scorer);
+ }
+
+ public void collect(int doc) throws IOException {
+ matches++;
+ if (filter.exists(doc + docBase)) {
+ collector.collect(doc);
+ }
+ }
+
+ public void setNextReader(IndexReader reader, int docBase) throws IOException {
+ this.docBase = docBase;
+ collector.setNextReader(reader, docBase);
+ }
+
+ public boolean acceptsDocsOutOfOrder() {
+ return collector.acceptsDocsOutOfOrder();
+ }
+ }
+
+}
Modified: lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java?rev=1137067&r1=1137066&r2=1137067&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java (original)
+++ lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java Fri Jun 17 22:59:36 2011
@@ -947,9 +947,10 @@ public class SolrIndexSearcher extends I
}
private static final int NO_CHECK_QCACHE = 0x80000000;
- private static final int GET_DOCSET = 0x40000000;
+ public static final int GET_DOCSET = 0x40000000;
private static final int NO_CHECK_FILTERCACHE = 0x20000000;
+ public static final int GET_DOCLIST = 0x02;
public static final int GET_SCORES = 0x01;
/**
@@ -1872,6 +1873,7 @@ public class SolrIndexSearcher extends I
public static class QueryResult {
private boolean partialResults;
private DocListAndSet docListAndSet;
+ public Object groupedResults; // Todo: Refactor. At least getter setter and different type.
public DocList getDocList() { return docListAndSet.docList; }
public void setDocList(DocList list) {
Added: lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/TestGroupingSearch.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/TestGroupingSearch.java?rev=1137067&view=auto
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/TestGroupingSearch.java (added)
+++ lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/TestGroupingSearch.java Fri Jun 17 22:59:36 2011
@@ -0,0 +1,713 @@
+package org.apache.solr;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.search.FieldCache;
+import org.apache.noggit.JSONUtil;
+import org.apache.noggit.ObjectBuilder;
+import org.apache.solr.common.params.GroupParams;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.schema.IndexSchema;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import java.util.*;
+
+/**
+ *
+ */
+public class TestGroupingSearch extends SolrTestCaseJ4 {
+
+ public static final String FOO_STRING_FIELD = "foo_s1";
+ public static final String SMALL_STRING_FIELD = "small_s1";
+ public static final String SMALL_INT_FIELD = "small_i";
+
+ @BeforeClass
+ public static void beforeTests() throws Exception {
+ initCore("solrconfig.xml","schema12.xml");
+ }
+
+ @Before
+ public void cleanIndex() {
+ assertU(delQ("*:*"));
+ assertU(commit());
+ }
+
+ @Test
+ public void testGroupingGroupSortingScore_basic() {
+ assertU(add(doc("id", "1","name", "author1", "title", "a book title", "group_sI", "1")));
+ assertU(add(doc("id", "2","name", "author1", "title", "the title", "group_sI", "2")));
+ assertU(add(doc("id", "3","name", "author2", "title", "a book title", "group_sI", "1")));
+ assertU(add(doc("id", "4","name", "author2", "title", "title", "group_sI", "2")));
+ assertU(add(doc("id", "5","name", "author3", "title", "the title of a title", "group_sI", "1")));
+ assertU(commit());
+
+ assertQ(req("q","title:title", "group", "true", "group.field","name")
+ ,"//lst[@name='grouped']/lst[@name='name']"
+ ,"*[count(//arr[@name='groups']/lst) = 3]"
+
+ ,"//arr[@name='groups']/lst[1]/str[@name='groupValue'][.='author2']"
+ // ,"//arr[@name='groups']/lst[1]/int[@name='matches'][.='2']"
+ ,"//arr[@name='groups']/lst[1]/result[@numFound='2']"
+ ,"//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='4']"
+
+ ,"//arr[@name='groups']/lst[2]/str[@name='groupValue'][.='author1']"
+ // ,"//arr[@name='groups']/lst[2]/int[@name='matches'][.='2']"
+ ,"//arr[@name='groups']/lst[2]/result[@numFound='2']"
+ ,"//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='2']"
+
+ ,"//arr[@name='groups']/lst[3]/str[@name='groupValue'][.='author3']"
+ // ,"//arr[@name='groups']/lst[3]/int[@name='matches'][.='1']"
+ ,"//arr[@name='groups']/lst[3]/result[@numFound='1']"
+ ,"//arr[@name='groups']/lst[3]/result/doc/*[@name='id'][.='5']"
+ );
+
+ assertQ(req("q","title:title", "group", "true", "group.field","group_sI")
+ ,"//lst[@name='grouped']/lst[@name='group_sI']"
+ ,"*[count(//arr[@name='groups']/lst) = 2]"
+
+ ,"//arr[@name='groups']/lst[1]/str[@name='groupValue'][.='2']"
+ ,"//arr[@name='groups']/lst[1]/result[@numFound='2']"
+ ,"//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='4']"
+
+ ,"//arr[@name='groups']/lst[2]/str[@name='groupValue'][.='1']"
+ ,"//arr[@name='groups']/lst[2]/result[@numFound='3']"
+ ,"//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='5']"
+ );
+ }
+
+ @Test
+ public void testGroupingGroupSortingScore_basicWithGroupSortEqualToSort() {
+ assertU(add(doc("id", "1","name", "author1", "title", "a book title")));
+ assertU(add(doc("id", "2","name", "author1", "title", "the title")));
+ assertU(add(doc("id", "3","name", "author2", "title", "a book title")));
+ assertU(add(doc("id", "4","name", "author2", "title", "title")));
+ assertU(add(doc("id", "5","name", "author3", "title", "the title of a title")));
+ assertU(commit());
+
+ assertQ(req("q","title:title", "group", "true", "group.field","name", "sort", "score desc", "group.sort", "score desc")
+ ,"//arr[@name='groups']/lst[1]/str[@name='groupValue'][.='author2']"
+ // ,"//arr[@name='groups']/lst[1]/int[@name='matches'][.='2']"
+ ,"//arr[@name='groups']/lst[1]/result[@numFound='2']"
+ ,"//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='4']"
+
+ ,"//arr[@name='groups']/lst[2]/str[@name='groupValue'][.='author1']"
+ // ,"//arr[@name='groups']/lst[2]/int[@name='matches'][.='2']"
+ ,"//arr[@name='groups']/lst[2]/result[@numFound='2']"
+ ,"//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='2']"
+
+ ,"//arr[@name='groups']/lst[3]/str[@name='groupValue'][.='author3']"
+ // ,"//arr[@name='groups']/lst[3]/int[@name='matches'][.='1']"
+ ,"//arr[@name='groups']/lst[3]/result[@numFound='1']"
+ ,"//arr[@name='groups']/lst[3]/result/doc/*[@name='id'][.='5']"
+ );
+ }
+
+ @Test
+ public void testGroupingGroupSortingScore_withTotalGroupCount() {
+ assertU(add(doc("id", "1","name", "author1", "title", "a book title", "group_sI", "1")));
+ assertU(add(doc("id", "2","name", "author1", "title", "the title", "group_sI", "2")));
+ assertU(add(doc("id", "3","name", "author2", "title", "a book title", "group_sI", "1")));
+ assertU(add(doc("id", "4","name", "author2", "title", "title", "group_sI", "2")));
+ assertU(add(doc("id", "5","name", "author3", "title", "the title of a title", "group_sI", "1")));
+ assertU(commit());
+
+ assertQ(req("q","title:title", "group", "true", "group.field","name", "group.ngroups", "true")
+ ,"//lst[@name='grouped']/lst[@name='name']"
+ ,"//lst[@name='grouped']/lst[@name='name']/int[@name='matches'][.='5']"
+ ,"//lst[@name='grouped']/lst[@name='name']/int[@name='ngroups'][.='3']"
+ ,"*[count(//arr[@name='groups']/lst) = 3]"
+
+ ,"//arr[@name='groups']/lst[1]/str[@name='groupValue'][.='author2']"
+ ,"//arr[@name='groups']/lst[1]/result[@numFound='2']"
+ ,"//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='4']"
+
+ ,"//arr[@name='groups']/lst[2]/str[@name='groupValue'][.='author1']"
+ ,"//arr[@name='groups']/lst[2]/result[@numFound='2']"
+ ,"//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='2']"
+
+ ,"//arr[@name='groups']/lst[3]/str[@name='groupValue'][.='author3']"
+ ,"//arr[@name='groups']/lst[3]/result[@numFound='1']"
+ ,"//arr[@name='groups']/lst[3]/result/doc/*[@name='id'][.='5']"
+ );
+
+ assertQ(req("q","title:title", "group", "true", "group.field","group_sI", "group.ngroups", "true")
+ ,"//lst[@name='grouped']/lst[@name='group_sI']/int[@name='matches'][.='5']"
+ ,"//lst[@name='grouped']/lst[@name='group_sI']/int[@name='ngroups'][.='2']"
+ ,"*[count(//arr[@name='groups']/lst) = 2]"
+
+ ,"//arr[@name='groups']/lst[1]/str[@name='groupValue'][.='2']"
+ ,"//arr[@name='groups']/lst[1]/result[@numFound='2']"
+ ,"//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='4']"
+
+ ,"//arr[@name='groups']/lst[2]/str[@name='groupValue'][.='1']"
+ ,"//arr[@name='groups']/lst[2]/result[@numFound='3']"
+ ,"//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='5']"
+ );
+ }
+
+ @Test
+ public void testGroupingGroupSortingScore_basicWithSortFooIDescAndScoreAscWithCaching() {
+ assertU(add(doc("id", "1","name", "author1", "title", "a book title", "score_f", "20", "foo_i", "5")));
+ assertU(add(doc("id", "2","name", "author1", "title", "the title", "score_f", "10", "foo_i", "5")));
+ assertU(add(doc("id", "3","name", "author2", "title", "a book title", "score_f", "30", "foo_i", "3")));
+ assertU(commit());
+ assertU(add(doc("id", "4","name", "author2", "title", "title", "score_f", "40", "foo_i", "2")));
+ assertU(add(doc("id", "5","name", "author3", "title", "the titttle of a title blehh", "score_f", "50", "foo_i", "1")));
+ assertU(commit());
+
+ assertQ(req("q","{!func} score_f", "group", "true", "group.field","name", "sort", "foo_i desc, score asc", GroupParams.GROUP_CACHE_PERCENTAGE, "100")
+ ,"//arr[@name='groups']/lst[3]/str[@name='groupValue'][.='author3']"
+ ,"//arr[@name='groups']/lst[3]/result[@numFound='1']"
+ ,"//arr[@name='groups']/lst[3]/result/doc/*[@name='id'][.='5']"
+
+ ,"//arr[@name='groups']/lst[1]/str[@name='groupValue'][.='author1']"
+ ,"//arr[@name='groups']/lst[1]/result[@numFound='2']"
+ ,"//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='2']"
+
+ ,"//arr[@name='groups']/lst[2]/str[@name='groupValue'][.='author2']"
+ ,"//arr[@name='groups']/lst[2]/result[@numFound='2']"
+ ,"//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='3']"
+ );
+ }
+
+
+ @Test
+ public void testGroupingGroupSortingWeight() {
+ assertU(add(doc("id", "1","name", "author1", "weight", "12.1")));
+ assertU(add(doc("id", "2","name", "author1", "weight", "2.1")));
+ assertU(add(doc("id", "3","name", "author2", "weight", "0.1")));
+ assertU(add(doc("id", "4","name", "author2", "weight", "0.11")));
+ assertU(commit());
+
+ assertQ(req("q","*:*", "group", "true", "group.field","name", "sort", "id asc", "group.sort", "weight desc")
+ ,"*[count(//arr[@name='groups']/lst) = 2]"
+ ,"//arr[@name='groups']/lst[1]/str[@name='groupValue'][.='author1']"
+ // ,"//arr[@name='groups']/lst[1]/int[@name='matches'][.='2']"
+ ,"//arr[@name='groups']/lst[1]/result[@numFound='2']"
+ ,"//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='1']"
+
+ ,"//arr[@name='groups']/lst[2]/str[@name='groupValue'][.='author2']"
+ // ,"//arr[@name='groups']/lst[2]/int[@name='matches'][.='2']"
+ ,"//arr[@name='groups']/lst[2]/result[@numFound='2']"
+ ,"//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='4']"
+ );
+ }
+
+
+
+ static String f = "foo_s1";
+ static String f2 = "foo2_i";
+
+ public static void createIndex() {
+ assertU(adoc("id","1", f,"5", f2,"4"));
+ assertU(adoc("id","2", f,"4", f2,"2"));
+ assertU(adoc("id","3", f,"3", f2,"7"));
+ assertU(commit());
+ assertU(adoc("id","4", f,"2", f2,"6"));
+ assertU(adoc("id","5", f,"1", f2,"2"));
+ assertU(adoc("id","6", f,"3", f2,"2"));
+ assertU(adoc("id","7", f,"2", f2,"3"));
+ assertU(commit());
+ assertU(adoc("id","8", f,"1", f2,"10"));
+ assertU(adoc("id","9", f,"2", f2,"1"));
+ assertU(commit());
+ assertU(adoc("id","10", f,"1", f2,"3"));
+ assertU(commit());
+ }
+
+ @Test
+ public void testGroupedCount() throws Exception {
+ createIndex();
+ String filt = f + ":[* TO *]";
+
+ assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "sort", f + " asc", "fl","id", "group.ngroups", "true")
+ ,"/responseHeader/status==0" // exact match
+ ,"/responseHeader=={'_SKIP_':'QTime', 'status':0}" // partial match by skipping some elements
+ ,"/responseHeader=={'_MATCH_':'status', 'status':0}" // partial match by only including some elements
+ ,"/grouped=={'"+f+"':{'matches':10,'ngroups': 5,'groups':[\n" +
+ "{'groupValue':'1','doclist':{'numFound':3,'start':0,'docs':[{'id':'5'}]}}," +
+ "{'groupValue':'2','doclist':{'numFound':3,'start':0,'docs':[{'id':'4'}]}}," +
+ "{'groupValue':'3','doclist':{'numFound':2,'start':0,'docs':[{'id':'3'}]}}," +
+ "{'groupValue':'4','doclist':{'numFound':1,'start':0,'docs':[{'id':'2'}]}}," +
+ "{'groupValue':'5','doclist':{'numFound':1,'start':0,'docs':[{'id':'1'}]}}" +
+ "]}}"
+ );
+ }
+
+ @Test
+ public void testGroupAPI() throws Exception {
+ createIndex();
+ String filt = f + ":[* TO *]";
+
+ assertQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f)
+ ,"/response/lst[@name='grouped']/lst[@name='"+f+"']/arr[@name='groups']"
+ );
+
+ assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id", "sort", f + " asc", "group.sort", "score desc")
+ ,"/responseHeader/status==0" // exact match
+ ,"/responseHeader=={'_SKIP_':'QTime', 'status':0}" // partial match by skipping some elements
+ ,"/responseHeader=={'_MATCH_':'status', 'status':0}" // partial match by only including some elements
+ ,"/grouped=={'"+f+"':{'matches':10,'groups':[\n" +
+ "{'groupValue':'1','doclist':{'numFound':3,'start':0,'docs':[{'id':'8'}]}}," +
+ "{'groupValue':'2','doclist':{'numFound':3,'start':0,'docs':[{'id':'4'}]}}," +
+ "{'groupValue':'3','doclist':{'numFound':2,'start':0,'docs':[{'id':'3'}]}}," +
+ "{'groupValue':'4','doclist':{'numFound':1,'start':0,'docs':[{'id':'2'}]}}," +
+ "{'groupValue':'5','doclist':{'numFound':1,'start':0,'docs':[{'id':'1'}]}}" +
+ "]}}"
+ );
+
+ // test that filtering cuts down the result set
+ assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id", "fq",f+":2")
+ ,"/grouped=={'"+f+"':{'matches':3,'groups':[" +
+ "{'groupValue':'2','doclist':{'numFound':3,'start':0,'docs':[{'id':'4'}]}}" +
+ "]}}"
+ );
+
+ // test limiting the number of groups returned
+ assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id", "rows","2")
+ ,"/grouped=={'"+f+"':{'matches':10,'groups':[" +
+ "{'groupValue':'1','doclist':{'numFound':3,'start':0,'docs':[{'id':'8'}]}}," +
+ "{'groupValue':'3','doclist':{'numFound':2,'start':0,'docs':[{'id':'3'}]}}" +
+ "]}}"
+ );
+
+ // test offset into group list
+ assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id", "rows","1", "start","1")
+ ,"/grouped=={'"+f+"':{'matches':10,'groups':[" +
+ "{'groupValue':'3','doclist':{'numFound':2,'start':0,'docs':[{'id':'3'}]}}" +
+ "]}}"
+ );
+
+ // test big offset into group list
+ assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id", "rows","1", "start","100")
+ ,"/grouped=={'"+f+"':{'matches':10,'groups':[" +
+ "]}}"
+ );
+
+ // test increasing the docs per group returned
+ assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id", "rows","2", "group.limit","3")
+ ,"/grouped=={'"+f+"':{'matches':10,'groups':[" +
+ "{'groupValue':'1','doclist':{'numFound':3,'start':0,'docs':[{'id':'8'},{'id':'10'},{'id':'5'}]}}," +
+ "{'groupValue':'3','doclist':{'numFound':2,'start':0,'docs':[{'id':'3'},{'id':'6'}]}}" +
+ "]}}"
+ );
+
+ // test offset into each group
+ assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id", "rows","2", "group.limit","3", "group.offset","1")
+ ,"/grouped=={'"+f+"':{'matches':10,'groups':[" +
+ "{'groupValue':'1','doclist':{'numFound':3,'start':1,'docs':[{'id':'10'},{'id':'5'}]}}," +
+ "{'groupValue':'3','doclist':{'numFound':2,'start':1,'docs':[{'id':'6'}]}}" +
+ "]}}"
+ );
+
+ // test big offset into each group
+ assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id", "rows","2", "group.limit","3", "group.offset","10")
+ ,"/grouped=={'"+f+"':{'matches':10,'groups':[" +
+ "{'groupValue':'1','doclist':{'numFound':3,'start':10,'docs':[]}}," +
+ "{'groupValue':'3','doclist':{'numFound':2,'start':10,'docs':[]}}" +
+ "]}}"
+ );
+
+ // test adding in scores
+ assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id,score", "rows","2", "group.limit","2", "indent","off")
+ ,"/grouped/"+f+"/groups==" +
+ "[" +
+ "{'groupValue':'1','doclist':{'numFound':3,'start':0,'maxScore':10.0,'docs':[{'id':'8','score':10.0},{'id':'10','score':3.0}]}}," +
+ "{'groupValue':'3','doclist':{'numFound':2,'start':0,'maxScore':7.0,'docs':[{'id':'3','score':7.0},{'id':'6','score':2.0}]}}" +
+ "]"
+
+ );
+
+ /* Not supperted yet!
+ // test function (functions are currently all float - this may change)
+ String func = "add("+f+","+f+")";
+ assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.func", func , "fl","id", "rows","2")
+ ,"/grouped=={'"+func+"':{'matches':10,'groups':[" +
+ "{'groupValue':2.0,'doclist':{'numFound':3,'start':0,'docs':[{'id':'8'}]}}," +
+ "{'groupValue':6.0,'doclist':{'numFound':2,'start':0,'docs':[{'id':'3'}]}}" +
+ "]}}"
+ );
+ */
+
+ // test that faceting works with grouping
+ assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id"
+ ,"facet","true", "facet.field",f)
+ ,"/grouped/"+f+"/matches==10"
+ ,"/facet_counts/facet_fields/"+f+"==['1',3, '2',3, '3',2, '4',1, '5',1]"
+ );
+ purgeFieldCache(FieldCache.DEFAULT); // avoid FC insanity
+
+ // test that grouping works with highlighting
+ assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id"
+ ,"hl","true", "hl.fl",f)
+ ,"/grouped/"+f+"/matches==10"
+ ,"/highlighting=={'_ORDERED_':'', '8':{},'3':{},'4':{},'1':{},'2':{}}"
+ );
+
+ // test that grouping works with debugging
+ assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id"
+ ,"debugQuery","true")
+ ,"/grouped/"+f+"/matches==10"
+ ,"/debug/explain/8=="
+ ,"/debug/explain/2=="
+ );
+
+ ///////////////////////// group.query
+ assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.query","id:[2 TO 5]", "fl","id", "group.limit","3")
+ ,"/grouped=={'id:[2 TO 5]':{'matches':10," +
+ "'doclist':{'numFound':4,'start':0,'docs':[{'id':'3'},{'id':'4'},{'id':'2'}]}}}"
+ );
+
+ // group.query and offset
+ assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.query","id:[2 TO 5]", "fl","id", "group.limit","3", "group.offset","2")
+ ,"/grouped=={'id:[2 TO 5]':{'matches':10," +
+ "'doclist':{'numFound':4,'start':2,'docs':[{'id':'2'},{'id':'5'}]}}}"
+ );
+
+ // group.query and big offset
+ assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.query","id:[2 TO 5]", "fl","id", "group.limit","3", "group.offset","10")
+ ,"/grouped=={'id:[2 TO 5]':{'matches':10," +
+ "'doclist':{'numFound':4,'start':10,'docs':[]}}}"
+ );
+
+ ///////////////////////// group.query as main result
+ assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.query","id:[2 TO 5]", "fl","id", "rows","3", "group.main","true")
+ ,"/response=={'numFound':4,'start':0,'docs':[{'id':'3'},{'id':'4'},{'id':'2'}]}"
+ );
+
+ // group.query and offset
+ assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.query","id:[2 TO 5]", "fl","id", "rows","3", "start","2", "group.main","true")
+ ,"/response=={'numFound':4,'start':2,'docs':[{'id':'2'},{'id':'5'}]}"
+ );
+
+ // group.query and big offset
+ assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.query","id:[2 TO 5]", "fl","id", "rows","3", "start","10", "group.main","true")
+ ,"/response=={'numFound':4,'start':10,'docs':[]}"
+ );
+
+
+ // multiple at once
+ assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true",
+ "group.query","id:[2 TO 5]",
+ "group.query","id:[5 TO 5]",
+ "group.field",f,
+ "rows","1",
+ "fl","id", "group.limit","2")
+ ,"/grouped/id:[2 TO 5]=={'matches':10,'doclist':{'numFound':4,'start':0,'docs':[{'id':'3'},{'id':'4'}]}}"
+ ,"/grouped/id:[5 TO 5]=={'matches':10,'doclist':{'numFound':1,'start':0,'docs':[{'id':'5'}]}}"
+ ,"/grouped/"+f+"=={'matches':10,'groups':[{'groupValue':'1','doclist':{'numFound':3,'start':0,'docs':[{'id':'8'},{'id':'10'}]}}]}"
+ );
+
+ ///////////////////////// group.field as main result
+ assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id", "group.main","true")
+ ,"/response=={'numFound':10,'start':0,'docs':[{'id':'8'},{'id':'3'},{'id':'4'},{'id':'1'},{'id':'2'}]}"
+ );
+ // test that rows limits #docs
+ assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id", "rows","3", "group.main","true")
+ ,"/response=={'numFound':10,'start':0,'docs':[{'id':'8'},{'id':'3'},{'id':'4'}]}"
+ );
+ // small offset
+ assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id", "rows","2", "start","1", "group.main","true")
+ ,"/response=={'numFound':10,'start':1,'docs':[{'id':'3'},{'id':'4'}]}"
+ );
+ // large offset
+ assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id", "rows","2", "start","20", "group.main","true")
+ ,"/response=={'numFound':10,'start':20,'docs':[]}"
+ );
+ // group.limit>1
+ assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id", "rows","3", "group.limit","2", "group.main","true")
+ ,"/response=={'numFound':10,'start':0,'docs':[{'id':'8'},{'id':'10'},{'id':'3'}]}"
+ );
+ // group.limit>1 with start>0
+ assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id", "rows","3", "start","1", "group.limit","2", "group.main","true")
+ ,"/response=={'numFound':10,'start':1,'docs':[{'id':'10'},{'id':'3'},{'id':'6'}]}"
+ );
+
+ ///////////////////////// group.format == simple
+ assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id", "rows","3", "start","1", "group.limit","2", "group.format","simple")
+ , "/grouped/foo_s1=={'matches':10,'doclist':"
+ +"{'numFound':10,'start':1,'docs':[{'id':'10'},{'id':'3'},{'id':'6'}]}}"
+ );
+ }
+
+
+ @Test
+ public void testRandomGrouping() throws Exception {
+ /**
+ updateJ("{\"add\":{\"doc\":{\"id\":\"77\"}}}", params("commit","true"));
+ assertJQ(req("q","id:77"), "/response/numFound==1");
+
+ Doc doc = createDocObj(types);
+ updateJ(toJSON(doc), params("commit","true"));
+
+ assertJQ(req("q","id:"+doc.id), "/response/numFound==1");
+ **/
+
+ int indexIter=50 * RANDOM_MULTIPLIER; // make >0 to enable test
+ int queryIter=100 * RANDOM_MULTIPLIER;
+
+ while (--indexIter >= 0) {
+
+ int indexSize = random.nextInt(25 * RANDOM_MULTIPLIER);
+//indexSize=2;
+ List<FldType> types = new ArrayList<FldType>();
+ types.add(new FldType("id",ONE_ONE, new SVal('A','Z',4,4)));
+ types.add(new FldType("score_s1",ONE_ONE, new SVal('a','c',1,1))); // field used to score
+ types.add(new FldType("bar_s1",ONE_ONE, new SVal('a','z',3,5)));
+ types.add(new FldType(FOO_STRING_FIELD,ZERO_ONE, new SVal('a','z',1,2)));
+ types.add(new FldType(SMALL_STRING_FIELD,ZERO_ONE, new SVal('a',(char)('c'+indexSize/10),1,1)));
+// types.add(new FldType(SMALL_INT_FIELD,ZERO_ONE, new IRange(0,5+indexSize/10)));
+
+ clearIndex();
+ Map<Comparable, Doc> model = indexDocs(types, null, indexSize);
+ //System.out.println("############### model=" + model);
+
+ // test with specific docs
+ if (false) {
+ clearIndex();
+ model.clear();
+ Doc d1 = createDoc(types);
+ d1.getValues(SMALL_STRING_FIELD).set(0,"c");
+ d1.getValues(SMALL_INT_FIELD).set(0,5);
+ d1.order = 0;
+ updateJ(toJSON(d1), params("commit","true"));
+ model.put(d1.id, d1);
+
+ d1 = createDoc(types);
+ d1.getValues(SMALL_STRING_FIELD).set(0,"b");
+ d1.getValues(SMALL_INT_FIELD).set(0,5);
+ d1.order = 1;
+ updateJ(toJSON(d1), params("commit","false"));
+ model.put(d1.id, d1);
+
+ d1 = createDoc(types);
+ d1.getValues(SMALL_STRING_FIELD).set(0,"c");
+ d1.getValues(SMALL_INT_FIELD).set(0,5);
+ d1.order = 2;
+ updateJ(toJSON(d1), params("commit","false"));
+ model.put(d1.id, d1);
+
+ d1 = createDoc(types);
+ d1.getValues(SMALL_STRING_FIELD).set(0,"c");
+ d1.getValues(SMALL_INT_FIELD).set(0,5);
+ d1.order = 3;
+ updateJ(toJSON(d1), params("commit","false"));
+ model.put(d1.id, d1);
+
+ d1 = createDoc(types);
+ d1.getValues(SMALL_STRING_FIELD).set(0,"b");
+ d1.getValues(SMALL_INT_FIELD).set(0,2);
+ d1.order = 4;
+ updateJ(toJSON(d1), params("commit","true"));
+ model.put(d1.id, d1);
+ }
+
+
+ for (int qiter=0; qiter<queryIter; qiter++) {
+ String groupField = types.get(random.nextInt(types.size())).fname;
+
+ int rows = random.nextInt(10)==0 ? random.nextInt(model.size()+2) : random.nextInt(11)-1;
+ int start = random.nextInt(5)==0 ? random.nextInt(model.size()+2) : random.nextInt(5); // pick a small start normally for better coverage
+ int group_limit = random.nextInt(10)==0 ? random.nextInt(model.size()+2) : random.nextInt(11)-1;
+ int group_offset = random.nextInt(10)==0 ? random.nextInt(model.size()+2) : random.nextInt(2); // pick a small start normally for better coverage
+
+ String[] stringSortA = new String[1];
+ Comparator<Doc> sortComparator = createSort(h.getCore().getSchema(), types, stringSortA);
+ String sortStr = stringSortA[0];
+ Comparator<Doc> groupComparator = random.nextBoolean() ? sortComparator : createSort(h.getCore().getSchema(), types, stringSortA);
+ String groupSortStr = stringSortA[0];
+
+ // since groupSortStr defaults to sortStr, we need to normalize null to "score desc" if
+ // sortStr != null.
+ if (groupSortStr == null && groupSortStr != sortStr) {
+ groupSortStr = "score desc";
+ }
+
+ // Test specific case
+ if (false) {
+ groupField=SMALL_INT_FIELD;
+ sortComparator=createComparator(Arrays.asList(createComparator(SMALL_STRING_FIELD, true, true, false, true)));
+ sortStr = SMALL_STRING_FIELD + " asc";
+ groupComparator = createComparator(Arrays.asList(createComparator(SMALL_STRING_FIELD, true, true, false, false)));
+ groupSortStr = SMALL_STRING_FIELD + " asc";
+ rows=1; start=0; group_offset=1; group_limit=1;
+ }
+
+ Map<Comparable, Grp> groups = groupBy(model.values(), groupField);
+
+ // first sort the docs in each group
+ for (Grp grp : groups.values()) {
+ Collections.sort(grp.docs, groupComparator);
+ }
+
+ // now sort the groups
+
+ // if sort != group.sort, we need to find the max doc by "sort"
+ if (groupComparator != sortComparator) {
+ for (Grp grp : groups.values()) grp.setMaxDoc(sortComparator);
+ }
+
+ List<Grp> sortedGroups = new ArrayList(groups.values());
+ Collections.sort(sortedGroups, groupComparator==sortComparator ? createFirstDocComparator(sortComparator) : createMaxDocComparator(sortComparator));
+
+ boolean includeNGroups = random.nextBoolean();
+ Object modelResponse = buildGroupedResult(h.getCore().getSchema(), sortedGroups, start, rows, group_offset, group_limit, includeNGroups);
+
+ int randomPercentage = random.nextInt(101);
+ // TODO: create a random filter too
+ SolrQueryRequest req = req("group","true","wt","json","indent","true", "echoParams","all", "q","{!func}score_f", "group.field",groupField
+ ,sortStr==null ? "nosort":"sort", sortStr ==null ? "": sortStr
+ ,(groupSortStr==null || groupSortStr==sortStr) ? "noGroupsort":"group.sort", groupSortStr==null ? "": groupSortStr
+ ,"rows",""+rows, "start",""+start, "group.offset",""+group_offset, "group.limit",""+group_limit,
+ GroupParams.GROUP_CACHE_PERCENTAGE, Integer.toString(randomPercentage), GroupParams.GROUP_TOTAL_COUNT, includeNGroups ? "true" : "false"
+ );
+
+ String strResponse = h.query(req);
+
+ Object realResponse = ObjectBuilder.fromJSON(strResponse);
+ String err = JSONTestUtil.matchObj("/grouped/"+groupField, realResponse, modelResponse);
+ if (err != null) {
+ log.error("GROUPING MISMATCH: " + err
+ + "\n\trequest="+req
+ + "\n\tresult="+strResponse
+ + "\n\texpected="+ JSONUtil.toJSON(modelResponse)
+ + "\n\tsorted_model="+ sortedGroups
+ );
+
+ // re-execute the request... good for putting a breakpoint here for debugging
+ String rsp = h.query(req);
+
+ fail(err);
+ }
+ } // end query iter
+ } // end index iter
+
+ }
+
+ public static Object buildGroupedResult(IndexSchema schema, List<Grp> sortedGroups, int start, int rows, int group_offset, int group_limit, boolean includeNGroups) {
+ Map<String,Object> result = new LinkedHashMap<String,Object>();
+
+ long matches = 0;
+ for (Grp grp : sortedGroups) {
+ matches += grp.docs.size();
+ }
+ result.put("matches", matches);
+ if (includeNGroups) {
+ result.put("ngroups", sortedGroups.size());
+ }
+ List groupList = new ArrayList();
+ result.put("groups", groupList);
+
+ for (int i=start; i<sortedGroups.size(); i++) {
+ if (rows != -1 && groupList.size() >= rows) break; // directly test rather than calculating, so we can catch any calc errors in the real code
+ Map<String,Object> group = new LinkedHashMap<String,Object>();
+ groupList.add(group);
+
+ Grp grp = sortedGroups.get(i);
+ group.put("groupValue", grp.groupValue);
+
+ Map<String,Object> resultSet = new LinkedHashMap<String,Object>();
+ group.put("doclist", resultSet);
+ resultSet.put("numFound", grp.docs.size());
+ resultSet.put("start", group_offset);
+ List docs = new ArrayList();
+ resultSet.put("docs", docs);
+ for (int j=group_offset; j<grp.docs.size(); j++) {
+ if (group_limit != -1 && docs.size() >= group_limit) break;
+ docs.add( grp.docs.get(j).toObject(schema) );
+ }
+ }
+
+ return result;
+ }
+
+
+ public static Comparator<Grp> createMaxDocComparator(final Comparator<Doc> docComparator) {
+ return new Comparator<Grp>() {
+ public int compare(Grp o1, Grp o2) {
+ // all groups should have at least one doc
+ Doc d1 = o1.maxDoc;
+ Doc d2 = o2.maxDoc;
+ return docComparator.compare(d1, d2);
+ }
+ };
+ }
+
+ public static Comparator<Grp> createFirstDocComparator(final Comparator<Doc> docComparator) {
+ return new Comparator<Grp>() {
+ public int compare(Grp o1, Grp o2) {
+ // all groups should have at least one doc
+ Doc d1 = o1.docs.get(0);
+ Doc d2 = o2.docs.get(0);
+ return docComparator.compare(d1, d2);
+ }
+ };
+ }
+
+ public static Map<Comparable, Grp> groupBy(Collection<Doc> docs, String field) {
+ Map<Comparable, Grp> groups = new HashMap<Comparable, Grp>();
+ for (Doc doc : docs) {
+ List<Comparable> vals = doc.getValues(field);
+ if (vals == null) {
+ Grp grp = groups.get(null);
+ if (grp == null) {
+ grp = new Grp();
+ grp.groupValue = null;
+ grp.docs = new ArrayList<Doc>();
+ groups.put(null, grp);
+ }
+ grp.docs.add(doc);
+ } else {
+ for (Comparable val : vals) {
+
+ Grp grp = groups.get(val);
+ if (grp == null) {
+ grp = new Grp();
+ grp.groupValue = val;
+ grp.docs = new ArrayList<Doc>();
+ groups.put(grp.groupValue, grp);
+ }
+ grp.docs.add(doc);
+ }
+ }
+ }
+ return groups;
+ }
+
+
+ public static class Grp {
+ public Comparable groupValue;
+ public List<Doc> docs;
+ public Doc maxDoc; // the document highest according to the "sort" param
+
+
+ public void setMaxDoc(Comparator<Doc> comparator) {
+ Doc[] arr = docs.toArray(new Doc[docs.size()]);
+ Arrays.sort(arr, comparator);
+ maxDoc = arr.length > 0 ? arr[0] : null;
+ }
+
+ @Override
+ public String toString() {
+ return "{groupValue="+groupValue+",docs="+docs+"}";
+ }
+ }
+
+}
|