lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sh...@apache.org
Subject svn commit: r1124379 - in /lucene/dev/trunk: lucene/ lucene/src/java/org/apache/lucene/search/ lucene/src/test/org/apache/lucene/search/ modules/grouping/src/test/org/apache/lucene/search/grouping/
Date Wed, 18 May 2011 18:51:39 GMT
Author: shaie
Date: Wed May 18 18:51:39 2011
New Revision: 1124379

URL: http://svn.apache.org/viewvc?rev=1124379&view=rev
Log:
LUCENE-3102: add no-wrap ability to CachingCollector

Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/CachingCollector.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestCachingCollector.java
    lucene/dev/trunk/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1124379&r1=1124378&r2=1124379&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Wed May 18 18:51:39 2011
@@ -412,7 +412,7 @@ New features
   bytes in RAM. (Mike McCandless)
 
 * LUCENE-1421, LUCENE-3102: added CachingCollector which allow you to cache 
-  document IDs and scores encountered during the search, and "reply" them to 
+  document IDs and scores encountered during the search, and "replay" them to 
   another Collector. (Mike McCandless, Shai Erera)
 
 Optimizations

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/CachingCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/CachingCollector.java?rev=1124379&r1=1124378&r2=1124379&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/CachingCollector.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/CachingCollector.java Wed May
18 18:51:39 2011
@@ -308,6 +308,48 @@ public abstract class CachingCollector e
   protected int base;
   protected int lastDocBase;
   
+  /**
+   * Creates a {@link CachingCollector} which does not wrap another collector.
+   * The cached documents and scores can later be {@link #replay(Collector)
+   * replayed}.
+   * 
+   * @param acceptDocsOutOfOrder
+   *          whether documents are allowed to be collected out-of-order
+   */
+  public static CachingCollector create(final boolean acceptDocsOutOfOrder, boolean cacheScores,
double maxRAMMB) {
+    Collector other = new Collector() {
+      @Override
+      public boolean acceptsDocsOutOfOrder() {
+        return acceptDocsOutOfOrder;
+      }
+      
+      @Override
+      public void setScorer(Scorer scorer) throws IOException {}
+
+      @Override
+      public void collect(int doc) throws IOException {}
+
+      @Override
+      public void setNextReader(AtomicReaderContext context) throws IOException {}
+
+    };
+    return create(other, cacheScores, maxRAMMB);
+  }
+
+  /**
+   * Create a new {@link CachingCollector} that wraps the given collector and
+   * caches documents and scores up to the specified RAM threshold.
+   * 
+   * @param other
+   *          the Collector to wrap and delegate calls to.
+   * @param cacheScores
+   *          whether to cache scores in addition to document IDs. Note that
+   *          this increases the RAM consumed per doc
+   * @param maxRAMMB
+   *          the maximum RAM in MB to consume for caching the documents and
+   *          scores. If the collector exceeds the threshold, no documents and
+   *          scores are cached.
+   */
   public static CachingCollector create(Collector other, boolean cacheScores, double maxRAMMB)
{
     return cacheScores ? new ScoreCachingCollector(other, maxRAMMB) : new NoScoreCachingCollector(other,
maxRAMMB);
     }

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestCachingCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestCachingCollector.java?rev=1124379&r1=1124378&r2=1124379&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestCachingCollector.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestCachingCollector.java Wed
May 18 18:51:39 2011
@@ -171,5 +171,18 @@ public class TestCachingCollector extend
       assertFalse(cc.isCached());
     }
   }
+
+  public void testNoWrappedCollector() throws Exception {
+    for (boolean cacheScores : new boolean[] { false, true }) {
+      // create w/ null wrapped collector, and test that the methods work
+      CachingCollector cc = CachingCollector.create(true, cacheScores, 50 * ONE_BYTE);
+      cc.setNextReader(null);
+      cc.setScorer(new MockScorer());
+      cc.collect(0);
+      
+      assertTrue(cc.isCached());
+      cc.replay(new NoOpCollector(true));
+    }
+  }
   
 }

Modified: lucene/dev/trunk/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java?rev=1124379&r1=1124378&r2=1124379&view=diff
==============================================================================
--- lucene/dev/trunk/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java
(original)
+++ lucene/dev/trunk/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java
Wed May 18 18:51:39 2011
@@ -445,27 +445,54 @@ public class TestGrouping extends Lucene
         final FirstPassGroupingCollector c1 = new FirstPassGroupingCollector("group", groupSort,
groupOffset+topNGroups);
         final CachingCollector cCache;
         final Collector c;
+        
+        final boolean useWrappingCollector = random.nextBoolean();
+        
         if (doCache) {
           final double maxCacheMB = random.nextDouble();
           if (VERBOSE) {
             System.out.println("TEST: maxCacheMB=" + maxCacheMB);
           }
 
-          if (doAllGroups) {
-            cCache = CachingCollector.create(c1, true, maxCacheMB);
-            c = MultiCollector.wrap(cCache, allGroupsCollector);
+          if (useWrappingCollector) {
+            if (doAllGroups) {
+              cCache = CachingCollector.create(c1, true, maxCacheMB);              
+              c = MultiCollector.wrap(cCache, allGroupsCollector);
+            } else {
+              c = cCache = CachingCollector.create(c1, true, maxCacheMB);              
+            }
           } else {
-            c = cCache = CachingCollector.create(c1, true, maxCacheMB);
+            // Collect only into cache, then replay multiple times:
+            c = cCache = CachingCollector.create(false, true, maxCacheMB);
           }
-        } else if (doAllGroups) {
-          c = MultiCollector.wrap(c1, allGroupsCollector);
-          cCache = null;
         } else {
-          c = c1;
           cCache = null;
+          if (doAllGroups) {
+            c = MultiCollector.wrap(c1, allGroupsCollector);
+          } else {
+            c = c1;
+          }
         }
+        
         s.search(new TermQuery(new Term("content", searchTerm)), c);
 
+        if (doCache && !useWrappingCollector) {
+          if (cCache.isCached()) {
+            // Replay for first-pass grouping
+            cCache.replay(c1);
+            if (doAllGroups) {
+              // Replay for all groups:
+              cCache.replay(allGroupsCollector);
+            }
+          } else {
+            // Replay by re-running search:
+            s.search(new TermQuery(new Term("content", searchTerm)), c1);
+            if (doAllGroups) {
+              s.search(new TermQuery(new Term("content", searchTerm)), allGroupsCollector);
+            }
+          }
+        }
+
         final Collection<SearchGroup> topGroups = c1.getTopGroups(groupOffset, fillFields);
         final TopGroups groupsResult;
 



Mime
View raw message