lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From markrmil...@apache.org
Subject svn commit: r1182187 - in /lucene/dev/branches/solrcloud: ./ dev-tools/idea/lucene/contrib/ lucene/ lucene/contrib/ lucene/contrib/misc/src/java/org/apache/lucene/search/ lucene/contrib/misc/src/test/org/apache/lucene/index/ lucene/contrib/misc/src/tes...
Date Wed, 12 Oct 2011 03:16:01 GMT
Author: markrmiller
Date: Wed Oct 12 03:15:59 2011
New Revision: 1182187

URL: http://svn.apache.org/viewvc?rev=1182187&view=rev
Log:
merge to trunk

Added:
    lucene/dev/branches/solrcloud/lucene/contrib/misc/src/java/org/apache/lucene/search/SearcherLifetimeManager.java
      - copied unchanged from r1182182, lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/search/SearcherLifetimeManager.java
    lucene/dev/branches/solrcloud/modules/facet/src/java/org/apache/lucene/facet/search/sampling/RandomSampler.java
      - copied unchanged from r1182182, lucene/dev/trunk/modules/facet/src/java/org/apache/lucene/facet/search/sampling/RandomSampler.java
    lucene/dev/branches/solrcloud/modules/facet/src/java/org/apache/lucene/facet/search/sampling/RepeatableSampler.java
      - copied unchanged from r1182182, lucene/dev/trunk/modules/facet/src/java/org/apache/lucene/facet/search/sampling/RepeatableSampler.java
Removed:
    lucene/dev/branches/solrcloud/modules/facet/src/java/org/apache/lucene/facet/util/RandomSample.java
Modified:
    lucene/dev/branches/solrcloud/   (props changed)
    lucene/dev/branches/solrcloud/dev-tools/idea/lucene/contrib/   (props changed)
    lucene/dev/branches/solrcloud/lucene/   (props changed)
    lucene/dev/branches/solrcloud/lucene/CHANGES.txt
    lucene/dev/branches/solrcloud/lucene/contrib/CHANGES.txt
    lucene/dev/branches/solrcloud/lucene/contrib/misc/src/test/org/apache/lucene/index/TestLazyBug.java
    lucene/dev/branches/solrcloud/lucene/contrib/misc/src/test/org/apache/lucene/search/TestSearcherManager.java
    lucene/dev/branches/solrcloud/lucene/contrib/sandbox/src/test/org/apache/lucene/sandbox/queries/regex/TestSpanRegexQuery.java
  (props changed)
    lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/codecs/DocValuesConsumer.java
    lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/search/DisjunctionSumScorer.java
    lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/search/similarities/BasicModelBE.java
    lucene/dev/branches/solrcloud/lucene/src/test-framework/org/apache/lucene/search/CheckHits.java
    lucene/dev/branches/solrcloud/lucene/src/test-framework/org/apache/lucene/search/RandomSimilarityProvider.java
    lucene/dev/branches/solrcloud/lucene/src/test/org/apache/lucene/index/values/TestTypePromotion.java
    lucene/dev/branches/solrcloud/lucene/src/test/org/apache/lucene/search/TestBooleanMinShouldMatch.java
    lucene/dev/branches/solrcloud/lucene/src/test/org/apache/lucene/search/TestBooleanQuery.java
    lucene/dev/branches/solrcloud/modules/facet/src/java/org/apache/lucene/facet/search/AdaptiveFacetsAccumulator.java
    lucene/dev/branches/solrcloud/modules/facet/src/java/org/apache/lucene/facet/search/sampling/Sampler.java
    lucene/dev/branches/solrcloud/modules/facet/src/test/org/apache/lucene/facet/FacetTestBase.java
    lucene/dev/branches/solrcloud/modules/facet/src/test/org/apache/lucene/facet/search/sampling/BaseSampleTestTopK.java
    lucene/dev/branches/solrcloud/modules/queryparser/src/test/org/apache/lucene/queryparser/xml/builders/TestNumericRangeFilterBuilder.java
  (props changed)
    lucene/dev/branches/solrcloud/solr/   (props changed)
    lucene/dev/branches/solrcloud/solr/CHANGES.txt   (props changed)
    lucene/dev/branches/solrcloud/solr/LICENSE.txt   (props changed)
    lucene/dev/branches/solrcloud/solr/NOTICE.txt   (props changed)
    lucene/dev/branches/solrcloud/solr/README.txt   (props changed)
    lucene/dev/branches/solrcloud/solr/build.xml   (props changed)
    lucene/dev/branches/solrcloud/solr/client/   (props changed)
    lucene/dev/branches/solrcloud/solr/common-build.xml   (props changed)
    lucene/dev/branches/solrcloud/solr/contrib/   (props changed)
    lucene/dev/branches/solrcloud/solr/contrib/clustering/src/test-files/   (props changed)
    lucene/dev/branches/solrcloud/solr/contrib/dataimporthandler-extras/src/java/   (props
changed)
    lucene/dev/branches/solrcloud/solr/contrib/dataimporthandler/src/java/   (props changed)
    lucene/dev/branches/solrcloud/solr/contrib/dataimporthandler/src/test-files/   (props
changed)
    lucene/dev/branches/solrcloud/solr/contrib/dataimporthandler/src/test/org/   (props changed)
    lucene/dev/branches/solrcloud/solr/contrib/uima/src/java/   (props changed)
    lucene/dev/branches/solrcloud/solr/contrib/uima/src/test-files/   (props changed)
    lucene/dev/branches/solrcloud/solr/core/   (props changed)
    lucene/dev/branches/solrcloud/solr/core/src/java/   (props changed)
    lucene/dev/branches/solrcloud/solr/core/src/test/   (props changed)
    lucene/dev/branches/solrcloud/solr/dev-tools/   (props changed)
    lucene/dev/branches/solrcloud/solr/example/   (props changed)
    lucene/dev/branches/solrcloud/solr/lib/   (props changed)
    lucene/dev/branches/solrcloud/solr/scripts/   (props changed)
    lucene/dev/branches/solrcloud/solr/site/   (props changed)
    lucene/dev/branches/solrcloud/solr/site-src/   (props changed)
    lucene/dev/branches/solrcloud/solr/solrj/   (props changed)
    lucene/dev/branches/solrcloud/solr/solrj/src/java/   (props changed)
    lucene/dev/branches/solrcloud/solr/solrj/src/test/org/apache/solr/client/   (props changed)
    lucene/dev/branches/solrcloud/solr/solrj/src/test/org/apache/solr/client/solrj/   (props
changed)
    lucene/dev/branches/solrcloud/solr/solrj/src/test/org/apache/solr/common/   (props changed)
    lucene/dev/branches/solrcloud/solr/test-framework/   (props changed)
    lucene/dev/branches/solrcloud/solr/testlogging.properties   (props changed)
    lucene/dev/branches/solrcloud/solr/webapp/   (props changed)

Modified: lucene/dev/branches/solrcloud/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/lucene/CHANGES.txt?rev=1182187&r1=1182186&r2=1182187&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/solrcloud/lucene/CHANGES.txt Wed Oct 12 03:15:59 2011
@@ -646,6 +646,10 @@ Bug fixes
 * LUCENE-3215: SloppyPhraseScorer sometimes computed Infinite freq
   (Robert Muir, Doron Cohen)  
 
+* LUCENE-3503: DisjunctionSumScorer would give slightly different scores
+  for a document depending if you used nextDoc() versus advance().
+  (Mike McCandless, Robert Muir)
+
 New Features
 
 * LUCENE-3448: Added FixedBitSet.and(other/DISI), andNot(other/DISI).

Modified: lucene/dev/branches/solrcloud/lucene/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/lucene/contrib/CHANGES.txt?rev=1182187&r1=1182186&r2=1182187&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/lucene/contrib/CHANGES.txt (original)
+++ lucene/dev/branches/solrcloud/lucene/contrib/CHANGES.txt Wed Oct 12 03:15:59 2011
@@ -100,6 +100,11 @@ New Features
    refCount is used to safely close the reader only once all threads are done
    using it.  (Michael McCandless)
 
+ * LUCENE-3486: Add SearcherLifetimeManager, to manage retrieving the
+   same searcher used in a previous search to ensure follow-on actions
+   (next page, drill down, etc.) use the same searcher as before (Mike
+   McCandless)
+
 Bug Fixes
 
  * LUCENE-3417: DictionaryCompoundWordFilter did not properly add tokens from the

Modified: lucene/dev/branches/solrcloud/lucene/contrib/misc/src/test/org/apache/lucene/index/TestLazyBug.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/lucene/contrib/misc/src/test/org/apache/lucene/index/TestLazyBug.java?rev=1182187&r1=1182186&r2=1182187&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/lucene/contrib/misc/src/test/org/apache/lucene/index/TestLazyBug.java
(original)
+++ lucene/dev/branches/solrcloud/lucene/contrib/misc/src/test/org/apache/lucene/index/TestLazyBug.java
Wed Oct 12 03:15:59 2011
@@ -35,7 +35,7 @@ import org.junit.BeforeClass;
 
 /**
  * Test demonstrating EOF bug on the last field of the last doc
- * if other docs have allready been accessed.
+ * if other docs have already been accessed.
  */
 public class TestLazyBug extends LuceneTestCase {
 

Modified: lucene/dev/branches/solrcloud/lucene/contrib/misc/src/test/org/apache/lucene/search/TestSearcherManager.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/lucene/contrib/misc/src/test/org/apache/lucene/search/TestSearcherManager.java?rev=1182187&r1=1182186&r2=1182187&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/lucene/contrib/misc/src/test/org/apache/lucene/search/TestSearcherManager.java
(original)
+++ lucene/dev/branches/solrcloud/lucene/contrib/misc/src/test/org/apache/lucene/search/TestSearcherManager.java
Wed Oct 12 03:15:59 2011
@@ -20,6 +20,8 @@ package org.apache.lucene.search;
 import java.io.IOException;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.ExecutorService;
+import java.util.List;
+import java.util.ArrayList;
 import java.util.concurrent.Executors;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
@@ -38,7 +40,10 @@ public class TestSearcherManager extends
 
   boolean warmCalled;
 
+  private SearcherLifetimeManager.Pruner pruner;
+
   public void testSearcherManager() throws Exception {
+    pruner = new SearcherLifetimeManager.PruneByAge(TEST_NIGHTLY ? _TestUtil.nextInt(random,
1, 20) : 1);
     runTest("TestSearcherManager");
   }
 
@@ -52,6 +57,8 @@ public class TestSearcherManager extends
   }
 
   private SearcherManager mgr;
+  private SearcherLifetimeManager lifetimeMGR;
+  private final List<Long> pastSearchers = new ArrayList<Long>();
   private boolean isNRT;
 
   @Override
@@ -73,6 +80,8 @@ public class TestSearcherManager extends
       isNRT = false;
     }
     
+
+    lifetimeMGR = new SearcherLifetimeManager();
   }
 
   @Override
@@ -86,7 +95,9 @@ public class TestSearcherManager extends
             Thread.sleep(_TestUtil.nextInt(random, 1, 100));
             writer.commit();
             Thread.sleep(_TestUtil.nextInt(random, 1, 5));
-            mgr.maybeReopen();
+            if (mgr.maybeReopen()) {
+              lifetimeMGR.prune(pruner);
+            }
           }
         } catch (Throwable t) {
           System.out.println("TEST: reopen thread hit exc");
@@ -111,15 +122,48 @@ public class TestSearcherManager extends
       // synchronous to your search threads, but still we
       // test as apps will presumably do this for
       // simplicity:
-      mgr.maybeReopen();
+      if (mgr.maybeReopen()) {
+        lifetimeMGR.prune(pruner);
+      }
     }
 
-    return mgr.acquire();
+    IndexSearcher s = null;
+
+    synchronized(pastSearchers) {
+      while (pastSearchers.size() != 0 && random.nextDouble() < 0.25) {
+        // 1/4 of the time pull an old searcher, ie, simulate
+        // a user doing a follow-on action on a previous
+        // search (drilling down/up, clicking next/prev page,
+        // etc.)
+        final Long token = pastSearchers.get(random.nextInt(pastSearchers.size()));
+        s = lifetimeMGR.acquire(token);
+        if (s == null) {
+          // Searcher was pruned
+          pastSearchers.remove(token);
+        } else {
+          break;
+        }
+      }
+    }
+
+    if (s == null) {
+      s = mgr.acquire();
+      if (s.getIndexReader().numDocs() != 0) {
+        Long token = lifetimeMGR.record(s);
+        synchronized(pastSearchers) {
+          if (!pastSearchers.contains(token)) {
+            pastSearchers.add(token);
+          }
+        }
+      }
+    }
+
+    return s;
   }
 
   @Override
   protected void releaseSearcher(IndexSearcher s) throws Exception {
-    mgr.release(s);
+    s.getIndexReader().decRef();
   }
 
   @Override
@@ -129,6 +173,7 @@ public class TestSearcherManager extends
       System.out.println("TEST: now close SearcherManager");
     }
     mgr.close();
+    lifetimeMGR.close();
   }
   
   public void testIntermediateClose() throws IOException, InterruptedException {

Modified: lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/codecs/DocValuesConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/codecs/DocValuesConsumer.java?rev=1182187&r1=1182186&r2=1182187&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/codecs/DocValuesConsumer.java
(original)
+++ lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/index/codecs/DocValuesConsumer.java
Wed Oct 12 03:15:59 2011
@@ -93,7 +93,7 @@ public abstract class DocValuesConsumer 
    * @param mergeState
    *          the state to merge
    * @param docValues docValues array containing one instance per reader (
-   *          {@link MergeState#readers}) or <code>null</code> if the reader
has
+   *          {@link org.apache.lucene.index.codecs.MergeState#readers}) or <code>null</code>
if the reader has
    *          no {@link IndexDocValues} instance.
    * @throws IOException
    *           if an {@link IOException} occurs

Modified: lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/search/DisjunctionSumScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/search/DisjunctionSumScorer.java?rev=1182187&r1=1182186&r2=1182187&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/search/DisjunctionSumScorer.java
(original)
+++ lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/search/DisjunctionSumScorer.java
Wed Oct 12 03:15:59 2011
@@ -55,7 +55,7 @@ class DisjunctionSumScorer extends Score
   /** The number of subscorers that provide the current match. */
   protected int nrMatchers = -1;
 
-  private float currentScore = Float.NaN;
+  private double currentScore = Float.NaN;
   
   /** Construct a <code>DisjunctionScorer</code>.
    * @param weight The weight to be used.
@@ -195,7 +195,7 @@ class DisjunctionSumScorer extends Score
    * Initially invalid, until {@link #nextDoc()} is called the first time.
    */
   @Override
-  public float score() throws IOException { return currentScore; }
+  public float score() throws IOException { return (float)currentScore; }
    
   @Override
   public int docID() {

Modified: lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/search/similarities/BasicModelBE.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/search/similarities/BasicModelBE.java?rev=1182187&r1=1182186&r2=1182187&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/search/similarities/BasicModelBE.java
(original)
+++ lucene/dev/branches/solrcloud/lucene/src/java/org/apache/lucene/search/similarities/BasicModelBE.java
Wed Oct 12 03:15:59 2011
@@ -24,6 +24,10 @@ import static org.apache.lucene.search.s
  * slightly from the one in the original paper: {@code F} is increased by {@code tfn+1}
  * and {@code N} is increased by {@code F} 
  * @lucene.experimental
+ * NOTE: in some corner cases this model may give poor performance with Normalizations that
+ * return large values for {@code tfn} such as NormalizationH3. Consider using the 
+ * geometric approximation ({@link BasicModelG}) instead, which provides the same relevance
+ * but with less practical problems. 
  */
 public class BasicModelBE extends BasicModel {
   @Override

Modified: lucene/dev/branches/solrcloud/lucene/src/test-framework/org/apache/lucene/search/CheckHits.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/lucene/src/test-framework/org/apache/lucene/search/CheckHits.java?rev=1182187&r1=1182186&r2=1182187&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/lucene/src/test-framework/org/apache/lucene/search/CheckHits.java
(original)
+++ lucene/dev/branches/solrcloud/lucene/src/test-framework/org/apache/lucene/search/CheckHits.java
Wed Oct 12 03:15:59 2011
@@ -313,7 +313,9 @@ public class CheckHits {
 
   }
 
-  private static float explainToleranceDelta(float f1, float f2) {
+  /** returns a reasonable epsilon for comparing two floats,
+   *  where minor differences are acceptable such as score vs. explain */
+  public static float explainToleranceDelta(float f1, float f2) {
     return Math.max(EXPLAIN_SCORE_TOLERANCE_MINIMUM, Math.max(Math.abs(f1), Math.abs(f2))
* EXPLAIN_SCORE_TOLERANCE_DELTA);
   }
 

Modified: lucene/dev/branches/solrcloud/lucene/src/test-framework/org/apache/lucene/search/RandomSimilarityProvider.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/lucene/src/test-framework/org/apache/lucene/search/RandomSimilarityProvider.java?rev=1182187&r1=1182186&r2=1182187&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/lucene/src/test-framework/org/apache/lucene/search/RandomSimilarityProvider.java
(original)
+++ lucene/dev/branches/solrcloud/lucene/src/test-framework/org/apache/lucene/search/RandomSimilarityProvider.java
Wed Oct 12 03:15:59 2011
@@ -102,7 +102,7 @@ public class RandomSimilarityProvider ex
   // all the similarities that we rotate through
   /** The DFR basic models to test. */
   static BasicModel[] BASIC_MODELS = {
-    new BasicModelBE(), /* TODO: enable new BasicModelD(), */ new BasicModelG(),
+    /* TODO: enable new BasicModelBE(), */ /* TODO: enable new BasicModelD(), */ new BasicModelG(),
     new BasicModelIF(), new BasicModelIn(), new BasicModelIne(),
     /* TODO: enable new BasicModelP() */
   };

Modified: lucene/dev/branches/solrcloud/lucene/src/test/org/apache/lucene/index/values/TestTypePromotion.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/lucene/src/test/org/apache/lucene/index/values/TestTypePromotion.java?rev=1182187&r1=1182186&r2=1182187&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/lucene/src/test/org/apache/lucene/index/values/TestTypePromotion.java
(original)
+++ lucene/dev/branches/solrcloud/lucene/src/test/org/apache/lucene/index/values/TestTypePromotion.java
Wed Oct 12 03:15:59 2011
@@ -158,6 +158,7 @@ public class TestTypePromotion extends L
         break;
       case Int:
         assertEquals(msg, values[id], directSource.getInt(i));
+        break;
       default:
         break;
       }

Modified: lucene/dev/branches/solrcloud/lucene/src/test/org/apache/lucene/search/TestBooleanMinShouldMatch.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/lucene/src/test/org/apache/lucene/search/TestBooleanMinShouldMatch.java?rev=1182187&r1=1182186&r2=1182187&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/lucene/src/test/org/apache/lucene/search/TestBooleanMinShouldMatch.java
(original)
+++ lucene/dev/branches/solrcloud/lucene/src/test/org/apache/lucene/search/TestBooleanMinShouldMatch.java
Wed Oct 12 03:15:59 2011
@@ -355,12 +355,11 @@ public class TestBooleanMinShouldMatch e
               found=true;
               float otherScore = top1.scoreDocs[other].score;
               // check if scores match
-              if (Math.abs(otherScore-score)>1.0e-6f) {
-                        fail("Doc " + id + " scores don't match\n"
-                + CheckHits.topdocsString(top1,0,0)
-                + CheckHits.topdocsString(top2,0,0)
-                + "for query:" + q2.toString());
-              }
+              assertEquals("Doc " + id + " scores don't match\n"
+                  + CheckHits.topdocsString(top1,0,0)
+                  + CheckHits.topdocsString(top2,0,0)
+                  + "for query:" + q2.toString(),
+                  score, otherScore, CheckHits.explainToleranceDelta(score, otherScore));
             }
           }
 

Modified: lucene/dev/branches/solrcloud/lucene/src/test/org/apache/lucene/search/TestBooleanQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/lucene/src/test/org/apache/lucene/search/TestBooleanQuery.java?rev=1182187&r1=1182186&r2=1182187&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/lucene/src/test/org/apache/lucene/search/TestBooleanQuery.java
(original)
+++ lucene/dev/branches/solrcloud/lucene/src/test/org/apache/lucene/search/TestBooleanQuery.java
Wed Oct 12 03:15:59 2011
@@ -17,6 +17,9 @@ package org.apache.lucene.search;
  * limitations under the License.
  */
 
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.TimeUnit;
@@ -33,6 +36,7 @@ import org.apache.lucene.search.similari
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.NamedThreadFactory;
+import org.apache.lucene.util._TestUtil;
 
 public class TestBooleanQuery extends LuceneTestCase {
   
@@ -183,6 +187,113 @@ public class TestBooleanQuery extends Lu
     dir1.close();
     dir2.close();
   }
-}
- 
 
+  public void testBS2DisjunctionNextVsAdvance() throws Exception {
+    final Directory d = newDirectory();
+    final RandomIndexWriter w = new RandomIndexWriter(random, d);
+    final int numDocs = atLeast(300);
+    for(int docUpto=0;docUpto<numDocs;docUpto++) {
+      String contents = "a";
+      if (random.nextInt(20) <= 16) {
+        contents += " b";
+      }
+      if (random.nextInt(20) <= 8) {
+        contents += " c";
+      }
+      if (random.nextInt(20) <= 4) {
+        contents += " d";
+      }
+      if (random.nextInt(20) <= 2) {
+        contents += " e";
+      }
+      if (random.nextInt(20) <= 1) {
+        contents += " f";
+      }
+      Document doc = new Document();
+      doc.add(new TextField("field", contents));
+      w.addDocument(doc);
+    }
+    w.optimize();
+    final IndexReader r = w.getReader();
+    final IndexSearcher s = newSearcher(r);
+    w.close();
+
+    for(int iter=0;iter<10*RANDOM_MULTIPLIER;iter++) {
+      if (VERBOSE) {
+        System.out.println("iter=" + iter);
+      }
+      final List<String> terms = new ArrayList<String>(Arrays.asList("a", "b",
"c", "d", "e", "f"));
+      final int numTerms = _TestUtil.nextInt(random, 1, terms.size());
+      while(terms.size() > numTerms) {
+        terms.remove(random.nextInt(terms.size()));
+      }
+
+      if (VERBOSE) {
+        System.out.println("  terms=" + terms);
+      }
+
+      final BooleanQuery q = new BooleanQuery();
+      for(String term : terms) {
+        q.add(new BooleanClause(new TermQuery(new Term("field", term)), BooleanClause.Occur.SHOULD));
+      }
+
+      Weight weight = s.createNormalizedWeight(q);
+
+      Scorer scorer = weight.scorer(s.leafContexts[0],
+                                          true, false, null);
+
+      // First pass: just use .nextDoc() to gather all hits
+      final List<ScoreDoc> hits = new ArrayList<ScoreDoc>();
+      while(scorer.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
+        hits.add(new ScoreDoc(scorer.docID(), scorer.score()));
+      }
+
+      if (VERBOSE) {
+        System.out.println("  " + hits.size() + " hits");
+      }
+
+      // Now, randomly next/advance through the list and
+      // verify exact match:
+      for(int iter2=0;iter2<10;iter2++) {
+
+        weight = s.createNormalizedWeight(q);
+        scorer = weight.scorer(s.leafContexts[0],
+                               true, false, null);
+
+        if (VERBOSE) {
+          System.out.println("  iter2=" + iter2);
+        }
+
+        int upto = -1;
+        while(upto < hits.size()) {
+          final int nextUpto;
+          final int nextDoc;
+          final int left = hits.size() - upto;
+          if (left == 1 || random.nextBoolean()) {
+            // next
+            nextUpto = 1+upto;
+            nextDoc = scorer.nextDoc();
+          } else {
+            // advance
+            int inc = _TestUtil.nextInt(random, 1, left-1);
+            nextUpto = inc + upto;
+            nextDoc = scorer.advance(hits.get(nextUpto).doc);
+          }
+
+          if (nextUpto == hits.size()) {
+            assertEquals(DocIdSetIterator.NO_MORE_DOCS, nextDoc);
+          } else {
+            final ScoreDoc hit = hits.get(nextUpto);
+            assertEquals(hit.doc, nextDoc);
+            // Test for precise float equality:
+            assertTrue("doc " + hit.doc + " has wrong score: expected=" + hit.score + " actual="
+ scorer.score(), hit.score == scorer.score());
+          }
+          upto = nextUpto;
+        }
+      }
+    }
+    
+    r.close();
+    d.close();
+  }
+}

Modified: lucene/dev/branches/solrcloud/modules/facet/src/java/org/apache/lucene/facet/search/AdaptiveFacetsAccumulator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/facet/src/java/org/apache/lucene/facet/search/AdaptiveFacetsAccumulator.java?rev=1182187&r1=1182186&r2=1182187&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/facet/src/java/org/apache/lucene/facet/search/AdaptiveFacetsAccumulator.java
(original)
+++ lucene/dev/branches/solrcloud/modules/facet/src/java/org/apache/lucene/facet/search/AdaptiveFacetsAccumulator.java
Wed Oct 12 03:15:59 2011
@@ -2,12 +2,15 @@ package org.apache.lucene.facet.search;
 
 import java.io.IOException;
 import java.util.List;
+import java.util.Random;
 
 import org.apache.lucene.index.IndexReader;
 
 import org.apache.lucene.facet.search.params.FacetSearchParams;
 import org.apache.lucene.facet.search.results.FacetResult;
 import org.apache.lucene.facet.search.results.FacetResultNode;
+import org.apache.lucene.facet.search.sampling.RandomSampler;
+import org.apache.lucene.facet.search.sampling.RepeatableSampler;
 import org.apache.lucene.facet.search.sampling.Sampler;
 import org.apache.lucene.facet.search.sampling.SamplingAccumulator;
 import org.apache.lucene.facet.taxonomy.TaxonomyReader;
@@ -44,7 +47,7 @@ import org.apache.lucene.facet.taxonomy.
  */
 public final class AdaptiveFacetsAccumulator extends StandardFacetsAccumulator {
   
-  private Sampler sampler = new Sampler();
+  private Sampler sampler = new RandomSampler();
 
   /**
    * Create an {@link AdaptiveFacetsAccumulator} 

Modified: lucene/dev/branches/solrcloud/modules/facet/src/java/org/apache/lucene/facet/search/sampling/Sampler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/facet/src/java/org/apache/lucene/facet/search/sampling/Sampler.java?rev=1182187&r1=1182186&r2=1182187&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/facet/src/java/org/apache/lucene/facet/search/sampling/Sampler.java
(original)
+++ lucene/dev/branches/solrcloud/modules/facet/src/java/org/apache/lucene/facet/search/sampling/Sampler.java
Wed Oct 12 03:15:59 2011
@@ -1,8 +1,6 @@
 package org.apache.lucene.facet.search.sampling;
 
 import java.io.IOException;
-import java.util.logging.Level;
-import java.util.logging.Logger;
 
 import org.apache.lucene.index.IndexReader;
 
@@ -15,8 +13,6 @@ import org.apache.lucene.facet.search.re
 import org.apache.lucene.facet.search.results.FacetResultNode;
 import org.apache.lucene.facet.search.results.MutableFacetResultNode;
 import org.apache.lucene.facet.taxonomy.TaxonomyReader;
-import org.apache.lucene.facet.util.RandomSample;
-import org.apache.lucene.facet.util.ScoredDocIdsUtils;
 
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -48,11 +44,9 @@ import org.apache.lucene.facet.util.Scor
  * 
  * @lucene.experimental
  */
-public class Sampler {
+public abstract class Sampler {
 
-  private static final Logger logger = Logger.getLogger(Sampler.class.getName());
-
-  private final SamplingParams samplingParams;
+  protected final SamplingParams samplingParams;
   
   /**
    * Construct with {@link SamplingParams}
@@ -103,26 +97,20 @@ public class Sampler {
     sampleSetSize = Math.max(sampleSetSize, samplingParams.getMinSampleSize());
     sampleSetSize = Math.min(sampleSetSize, samplingParams.getMaxSampleSize());
 
-    int[] sampleSet = null;
-    try {
-      sampleSet = RandomSample.repeatableSample(docids, actualSize,
-          sampleSetSize);
-    } catch (IOException e) {
-      if (logger.isLoggable(Level.WARNING)) {
-        logger.log(Level.WARNING, "sampling failed: "+e.getMessage()+" - falling back to
no sampling!", e);
-      }
-      return new SampleResult(docids, 1d);
-    }
-
-    ScoredDocIDs sampled = ScoredDocIdsUtils.createScoredDocIDsSubset(docids,
-        sampleSet);
-    if (logger.isLoggable(Level.FINEST)) {
-      logger.finest("******************** " + sampled.size());
-    }
-    return new SampleResult(sampled, sampled.size()/(double)docids.size());
+    return createSample(docids, actualSize, sampleSetSize);
   }
 
   /**
+   * Create and return a sample of the input set
+   * @param docids input set out of which a sample is to be created 
+   * @param actualSize original size of set, prior to sampling
+   * @param sampleSetSize required size of sample set
+   * @return sample of the input set in the required size
+   */
+  protected abstract SampleResult createSample(ScoredDocIDs docids, int actualSize,
+      int sampleSetSize) throws IOException;
+
+  /**
    * Get a fixer of sample facet accumulation results. Default implementation
    * returns a <code>TakmiSampleFixer</code> which is adequate only for
    * counting. For any other accumulator, provide a different fixer.

Modified: lucene/dev/branches/solrcloud/modules/facet/src/test/org/apache/lucene/facet/FacetTestBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/facet/src/test/org/apache/lucene/facet/FacetTestBase.java?rev=1182187&r1=1182186&r2=1182187&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/facet/src/test/org/apache/lucene/facet/FacetTestBase.java
(original)
+++ lucene/dev/branches/solrcloud/modules/facet/src/test/org/apache/lucene/facet/FacetTestBase.java
Wed Oct 12 03:15:59 2011
@@ -313,7 +313,7 @@ public abstract class FacetTestBase exte
       System.err.println("Results are not the same!");
       System.err.println("Expected:\n" + expectedResults);
       System.err.println("Actual" + actualResults);
-      fail("Results are not the same!");
+      throw new NotSameResultError();
     }
   }
   
@@ -325,4 +325,12 @@ public abstract class FacetTestBase exte
     }
     return sb.toString().replaceAll("Residue:.*.0", "").replaceAll("Num valid Descendants.*",
"");
   }
+  
+  /** Special Error class for ability to ignore only this error and retry... */ 
+  public static class NotSameResultError extends Error {
+    public NotSameResultError() {
+      super("Results are not the same!");
+    }
+  }
+  
 }

Modified: lucene/dev/branches/solrcloud/modules/facet/src/test/org/apache/lucene/facet/search/sampling/BaseSampleTestTopK.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/facet/src/test/org/apache/lucene/facet/search/sampling/BaseSampleTestTopK.java?rev=1182187&r1=1182186&r2=1182187&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/facet/src/test/org/apache/lucene/facet/search/sampling/BaseSampleTestTopK.java
(original)
+++ lucene/dev/branches/solrcloud/modules/facet/src/test/org/apache/lucene/facet/search/sampling/BaseSampleTestTopK.java
Wed Oct 12 03:15:59 2011
@@ -2,6 +2,7 @@ package org.apache.lucene.facet.search.s
 
 import java.io.IOException;
 import java.util.List;
+import java.util.Random;
 
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
@@ -41,7 +42,7 @@ public abstract class BaseSampleTestTopK
   protected static final int K = 2; 
   
   /** since there is a chance that this test would fail even if the code is correct, retry
the sampling */
-  protected static final int RETRIES = 4; 
+  protected static final int RETRIES = 10;
   
   protected abstract FacetsAccumulator getSamplingAccumulator(Sampler sampler,
       TaxonomyReader taxoReader, IndexReader indexReader,
@@ -53,53 +54,56 @@ public abstract class BaseSampleTestTopK
    * is performed. The results are compared to non-sampled ones.
    */
   public void testCountUsingSamping() throws Exception, IOException {
+    boolean useRandomSampler = random.nextBoolean();
     for (int partitionSize : partitionSizes) {
-      initIndex(partitionSize);
-      
-      // Get all of the documents and run the query, then do different
-      // facet counts and compare to control
-      Query q = new TermQuery(new Term(CONTENT_FIELD, BETA)); // 90% of the docs
-      ScoredDocIdCollector docCollector = ScoredDocIdCollector.create(searcher.maxDoc(),
false);
-      
-      FacetSearchParams expectedSearchParams = searchParamsWithRequests(K, partitionSize);

-      FacetsCollector fc = new FacetsCollector(expectedSearchParams, indexReader, taxoReader);
-      
-      searcher.search(q, MultiCollector.wrap(docCollector, fc));
-      
-      List<FacetResult> expectedResults = fc.getFacetResults();
-      
-      // complement with sampling!
-      final Sampler sampler = createSampler(docCollector.getScoredDocIDs());
-      
-      FacetSearchParams samplingSearchParams = searchParamsWithRequests(K, partitionSize);

-
-      assertSampling(expectedResults, q, sampler, samplingSearchParams, false);
-      assertSampling(expectedResults, q, sampler, samplingSearchParams, true);
-
-      closeAll();
-    }
-  }
-  
-  private void assertSampling(List<FacetResult> expected, Query q, Sampler sampler,
FacetSearchParams params, boolean complement) throws Exception {
-    // try several times in case of failure, because the test has a chance to fail 
-    // if the top K facets are not sufficiently common with the sample set
-    for (int n=RETRIES; n>0; n--) {
-      FacetsCollector samplingFC = samplingCollector(false, sampler, params);
-      
-      searcher.search(q, samplingFC);
-      List<FacetResult> sampledResults = samplingFC.getFacetResults();
-      
       try {
-        assertSameResults(expected, sampledResults);
-        break; // succeeded
-      } catch (Exception e) {
-        if (n<=1) { // otherwise try again
-          throw e; 
+        initIndex(partitionSize);
+        // Get all of the documents and run the query, then do different
+        // facet counts and compare to control
+        Query q = new TermQuery(new Term(CONTENT_FIELD, BETA)); // 90% of the docs
+        ScoredDocIdCollector docCollector = ScoredDocIdCollector.create(searcher.maxDoc(),
false);
+        
+        FacetSearchParams expectedSearchParams = searchParamsWithRequests(K, partitionSize);

+        FacetsCollector fc = new FacetsCollector(expectedSearchParams, indexReader, taxoReader);
+        
+        searcher.search(q, MultiCollector.wrap(docCollector, fc));
+        
+        List<FacetResult> expectedResults = fc.getFacetResults();
+        
+        FacetSearchParams samplingSearchParams = searchParamsWithRequests(K, partitionSize);

+        
+        // try several times in case of failure, because the test has a chance to fail 
+        // if the top K facets are not sufficiently common with the sample set
+        for (int nTrial=0; nTrial<RETRIES; nTrial++) {
+          try {
+            // complement with sampling!
+            final Sampler sampler = createSampler(nTrial, docCollector.getScoredDocIDs(),
useRandomSampler);
+            
+            assertSampling(expectedResults, q, sampler, samplingSearchParams, false);
+            assertSampling(expectedResults, q, sampler, samplingSearchParams, true);
+            
+            break; // succeeded
+          } catch (NotSameResultError e) {
+            if (nTrial>=RETRIES-1) {
+              throw e; // no more retries allowed, must fail
+            }
+          }
         }
+      } finally { 
+        closeAll();
       }
     }
   }
   
+  private void assertSampling(List<FacetResult> expected, Query q, Sampler sampler,
FacetSearchParams params, boolean complement) throws Exception {
+    FacetsCollector samplingFC = samplingCollector(complement, sampler, params);
+    
+    searcher.search(q, samplingFC);
+    List<FacetResult> sampledResults = samplingFC.getFacetResults();
+    
+    assertSameResults(expected, sampledResults);
+  }
+  
   private FacetsCollector samplingCollector(
       final boolean complement,
       final Sampler sampler,
@@ -117,14 +121,19 @@ public abstract class BaseSampleTestTopK
     return samplingFC;
   }
   
-  private Sampler createSampler(ScoredDocIDs scoredDocIDs) {
+  private Sampler createSampler(int nTrial, ScoredDocIDs scoredDocIDs, boolean useRandomSampler)
{
     SamplingParams samplingParams = new SamplingParams();
-    samplingParams.setSampleRatio(0.8);
-    samplingParams.setMinSampleSize(100);
-    samplingParams.setMaxSampleSize(10000);
+    
+    final double retryFactor = Math.pow(1.01, nTrial);
+    samplingParams.setSampleRatio(0.8 * retryFactor);
+    samplingParams.setMinSampleSize((int) (100 * retryFactor));
+    samplingParams.setMaxSampleSize((int) (10000 * retryFactor));
+    samplingParams.setOversampleFactor(5.0 * retryFactor);
+
     samplingParams.setSampingThreshold(11000); //force sampling 
-    samplingParams.setOversampleFactor(5.0);
-    Sampler sampler = new Sampler(samplingParams);
+    Sampler sampler = useRandomSampler ? 
+        new RandomSampler(samplingParams, new Random(random.nextLong())) :
+          new RepeatableSampler(samplingParams);
     assertTrue("must enable sampling for this test!",sampler.shouldSample(scoredDocIDs));
     return sampler;
   }



Mime
View raw message