lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sh...@apache.org
Subject svn commit: r1141060 [17/21] - in /lucene/dev/branches/branch_3x: dev-tools/eclipse/ dev-tools/maven/lucene/contrib/facet/ lucene/contrib/ lucene/contrib/facet/ lucene/contrib/facet/src/ lucene/contrib/facet/src/examples/ lucene/contrib/facet/src/examp...
Date Wed, 29 Jun 2011 11:53:19 GMT
Added: lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/TestTotalFacetCountsCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/TestTotalFacetCountsCache.java?rev=1141060&view=auto
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/TestTotalFacetCountsCache.java (added)
+++ lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/TestTotalFacetCountsCache.java Wed Jun 29 11:53:10 2011
@@ -0,0 +1,515 @@
+package org.apache.lucene.facet.search;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.store.Directory;
+import org.junit.Before;
+import org.junit.Test;
+
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.facet.FacetTestUtils;
+import org.apache.lucene.facet.FacetTestUtils.IndexTaxonomyReaderPair;
+import org.apache.lucene.facet.FacetTestUtils.IndexTaxonomyWriterPair;
+import org.apache.lucene.facet.example.ExampleResult;
+import org.apache.lucene.facet.example.TestMultiCLExample;
+import org.apache.lucene.facet.example.multiCL.MultiCLIndexer;
+import org.apache.lucene.facet.example.multiCL.MultiCLSearcher;
+import org.apache.lucene.facet.index.CategoryDocumentBuilder;
+import org.apache.lucene.facet.index.params.DefaultFacetIndexingParams;
+import org.apache.lucene.facet.index.params.FacetIndexingParams;
+import org.apache.lucene.facet.search.TotalFacetCounts.CreationType;
+import org.apache.lucene.facet.search.results.FacetResult;
+import org.apache.lucene.facet.search.results.FacetResultNode;
+import org.apache.lucene.facet.taxonomy.CategoryPath;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
+import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyReader;
+import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyWriter;
+import org.apache.lucene.util.SlowRAMDirectory;
+import org.apache.lucene.util._TestUtil;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class TestTotalFacetCountsCache extends LuceneTestCase {
+
+  static final TotalFacetCountsCache TFC = TotalFacetCountsCache.getSingleton();
+
+  /**
+   * Thread class to be used in tests for this method. This thread gets a TFC
+   * and records times.
+   */
+  private static class TFCThread extends Thread {
+    private final IndexReader r;
+    private final LuceneTaxonomyReader tr;
+    private final FacetIndexingParams iParams;
+    
+    TotalFacetCounts tfc;
+
+    public TFCThread(IndexReader r, LuceneTaxonomyReader tr, FacetIndexingParams iParams) {
+      this.r = r;
+      this.tr = tr;
+      this.iParams = iParams;
+    }
+    @Override
+    public void run() {
+      try {
+        tfc = TFC.getTotalCounts(r, tr, iParams, null);
+      } catch (Exception e) {
+        throw new RuntimeException(e);
+      }
+    }
+  }
+
+  /** Utility method to add a document and facets to an index/taxonomy. */
+  static void addFacets(FacetIndexingParams iParams, IndexWriter iw,
+                        TaxonomyWriter tw, String... strings) throws IOException {
+    ArrayList<CategoryPath> cps = new ArrayList<CategoryPath>();
+    cps.add(new CategoryPath(strings));
+    CategoryDocumentBuilder builder = new CategoryDocumentBuilder(tw, iParams);
+    iw.addDocument(builder.setCategoryPaths(cps).build(new Document()));
+  }
+
+  /** Clears the cache and sets its size to one. */
+  static void initCache() {
+    TFC.clear();
+    TFC.setCacheSize(1); // Set to keep one in memory
+  }
+
+  @Override
+  @Before
+  public void setUp() throws Exception {
+    super.setUp();
+    initCache();
+  }
+
+  /**
+   * Run many instances of {@link MultiCLSearcher} in parallel, results should
+   * be sane. Each instance has a random delay for reading bytes, to ensure
+   * that threads finish in different order than started.
+   */
+  @Test
+  public void testGeneralSynchronization() throws Exception {
+    int[] numThreads = new int[] { 2, 3, 5, 8 };
+    int[] sleepMillis = new int[] { -1, 1, 20, 33 };
+    int[] cacheSize = new int[] { 0,1,2,3,5 };
+    for (int size : cacheSize) {
+      for (int sleep : sleepMillis) {
+        for (int nThreads : numThreads) {
+          doTestGeneralSynchronization(nThreads, sleep, size);
+        }
+      }
+    }
+  }
+
+  private void doTestGeneralSynchronization(int numThreads, int sleepMillis,
+      int cacheSize) throws Exception, CorruptIndexException, IOException,
+      InterruptedException {
+    TFC.setCacheSize(cacheSize);
+    SlowRAMDirectory slowIndexDir = new SlowRAMDirectory(-1, random);
+    SlowRAMDirectory slowTaxoDir = new SlowRAMDirectory(-1, random);
+
+    // Index documents without the "slowness"
+    MultiCLIndexer.index(slowIndexDir, slowTaxoDir);
+
+    slowIndexDir.setSleepMillis(sleepMillis);
+    slowTaxoDir.setSleepMillis(sleepMillis);
+    
+    // Open the slow readers
+    IndexReader slowIndexReader = IndexReader.open(slowIndexDir);
+    TaxonomyReader slowTaxoReader = new LuceneTaxonomyReader(slowTaxoDir);
+
+    // Class to perform search and return results as threads
+    class Multi extends Thread {
+      private List<FacetResult> results;
+      private FacetIndexingParams iParams;
+      private IndexReader indexReader;
+      private TaxonomyReader taxoReader;
+
+      public Multi(IndexReader indexReader, TaxonomyReader taxoReader,
+                    FacetIndexingParams iParams) {
+        this.indexReader = indexReader;
+        this.taxoReader = taxoReader;
+        this.iParams = iParams;
+      }
+
+      public ExampleResult getResults() {
+        ExampleResult exampleRes = new ExampleResult();
+        exampleRes.setFacetResults(results);
+        return exampleRes;
+      }
+
+      @Override
+      public void run() {
+        try {
+          results = MultiCLSearcher.searchWithFacets(indexReader, taxoReader, iParams);
+        } catch (Exception e) {
+          throw new RuntimeException(e);
+        }
+      }
+    }
+
+    // Instantiate threads, but do not start them
+    Multi[] multis = new Multi[numThreads];
+    for (int i = 0; i < numThreads - 1; i++) {
+      multis[i] = new Multi(slowIndexReader, slowTaxoReader, MultiCLIndexer.MULTI_IPARAMS);
+    }
+    // The last thread uses ONLY the DefaultFacetIndexingParams so that
+    // it references a different TFC cache. This will still result
+    // in valid results, but will only search one of the category lists
+    // instead of all of them.
+    multis[numThreads - 1] = new Multi(slowIndexReader, slowTaxoReader, new DefaultFacetIndexingParams());
+
+    // Gentleman, start your engines
+    for (Multi m : multis) {
+      m.start();
+    }
+
+    // Wait for threads and get results
+    ExampleResult[] multiResults = new ExampleResult[numThreads];
+    for (int i = 0; i < numThreads; i++) {
+      multis[i].join();
+      multiResults[i] = multis[i].getResults();
+    }
+
+    // Each of the (numThreads-1) should have the same predictable
+    // results, which we test for here.
+    for (int i = 0; i < numThreads - 1; i++) {
+      ExampleResult eResults = multiResults[i];
+      TestMultiCLExample.assertCorrectMultiResults(eResults);
+    }
+
+    // The last thread, which only searched over the
+    // DefaultFacetIndexingParams,
+    // has its own results
+    ExampleResult eResults = multiResults[numThreads - 1];
+    List<FacetResult> results = eResults.getFacetResults();
+    assertEquals(3, results.size());
+    String[] expLabels = new String[] { "5", "5/5", "6/2" };
+    double[] expValues = new double[] { 0.0, 0.0, 1.0 };
+    for (int i = 0; i < 3; i++) {
+      FacetResult result = results.get(i);
+      assertNotNull("Result should not be null", result);
+      FacetResultNode resNode = result.getFacetResultNode();
+      assertEquals("Invalid label", expLabels[i], resNode.getLabel().toString());
+      assertEquals("Invalid value", expValues[i], resNode.getValue(), 0.0);
+      assertEquals("Invalid number of subresults", 0, resNode.getNumSubResults());
+    }
+    // we're done, close the index reader and the taxonomy.
+    slowIndexReader.close();
+    slowTaxoReader.close();
+  }
+
+  /**
+   * Simple test to make sure the TotalFacetCountsManager updates the
+   * TotalFacetCounts array only when it is supposed to, and whether it
+   * is recomputed or read from disk.
+   */
+  @Test
+  public void testGenerationalConsistency() throws Exception {
+    // Create temporary RAMDirectories
+    Directory[][] dirs = FacetTestUtils.createIndexTaxonomyDirs(1);
+
+    // Create our index/taxonomy writers
+    IndexTaxonomyWriterPair[] writers = FacetTestUtils.createIndexTaxonomyWriterPair(dirs);
+    DefaultFacetIndexingParams iParams = new DefaultFacetIndexingParams();
+
+    // Add a facet to the index
+    addFacets(iParams, writers[0].indexWriter, writers[0].taxWriter, "a", "b");
+
+    // Commit Changes
+    writers[0].indexWriter.commit();
+    writers[0].taxWriter.commit();
+
+    // Open readers
+    IndexTaxonomyReaderPair[] readers = FacetTestUtils.createIndexTaxonomyReaderPair(dirs);
+
+    // As this is the first time we have invoked the TotalFacetCountsManager, 
+    // we should expect to compute and not read from disk.
+    TotalFacetCounts totalCounts = 
+      TFC.getTotalCounts(readers[0].indexReader, readers[0].taxReader, iParams, null);
+    int prevGen = assertRecomputed(totalCounts, 0, "after first attempt to get it!");
+
+    // Repeating same operation should pull from the cache - not recomputed. 
+    assertTrue("Should be obtained from cache at 2nd attempt",totalCounts == 
+      TFC.getTotalCounts(readers[0].indexReader, readers[0].taxReader, iParams, null));
+
+    // Repeat the same operation as above. but clear first - now should recompute again
+    initCache();
+    totalCounts = TFC.getTotalCounts(readers[0].indexReader, readers[0].taxReader, iParams, null);
+    prevGen = assertRecomputed(totalCounts, prevGen, "after cache clear, 3rd attempt to get it!");
+    
+    //store to file
+    File outputFile = _TestUtil.createTempFile("test", "tmp", TEMP_DIR);
+    initCache();
+    TFC.store(outputFile, readers[0].indexReader, readers[0].taxReader, iParams, null);
+    totalCounts = TFC.getTotalCounts(readers[0].indexReader, readers[0].taxReader, iParams, null);
+    prevGen = assertRecomputed(totalCounts, prevGen, "after cache clear, 4th attempt to get it!");
+
+    //clear and load
+    initCache();
+    TFC.load(outputFile, readers[0].indexReader, readers[0].taxReader, iParams);
+    totalCounts = TFC.getTotalCounts(readers[0].indexReader, readers[0].taxReader, iParams, null);
+    prevGen = assertReadFromDisc(totalCounts, prevGen, "after 5th attempt to get it!");
+
+    // Add a new facet to the index, commit and refresh readers
+    addFacets(iParams, writers[0].indexWriter, writers[0].taxWriter, "c", "d");
+    writers[0].indexWriter.close();
+    writers[0].taxWriter.close();
+
+    readers[0].taxReader.refresh();
+    IndexReader r2 = readers[0].indexReader.reopen();
+    // Hold on to the 'original' reader so we can do some checks with it
+    IndexReader origReader = null;
+
+    assertTrue("Reader must be updated!", readers[0].indexReader != r2);
+    
+    // Set the 'original' reader
+    origReader = readers[0].indexReader;
+    // Set the new master index Reader
+    readers[0].indexReader = r2;
+
+    // Try to get total-counts the originalReader AGAIN, just for sanity. Should pull from the cache - not recomputed. 
+    assertTrue("Should be obtained from cache at 6th attempt",totalCounts == 
+      TFC.getTotalCounts(origReader, readers[0].taxReader, iParams, null));
+
+    // now use the new reader - should recompute
+    totalCounts = TFC.getTotalCounts(readers[0].indexReader, readers[0].taxReader, iParams, null);
+    prevGen = assertRecomputed(totalCounts, prevGen, "after updating the index - 7th attempt!");
+
+    // try again - should not recompute
+    assertTrue("Should be obtained from cache at 8th attempt",totalCounts == 
+      TFC.getTotalCounts(readers[0].indexReader, readers[0].taxReader, iParams, null));
+    
+    // delete a doc from the reader and commit - should recompute
+    origReader.close();
+    origReader = readers[0].indexReader;
+    readers[0].indexReader = IndexReader.open(origReader.directory(),false);
+    initCache();
+    totalCounts = TFC.getTotalCounts(readers[0].indexReader, readers[0].taxReader, iParams, null);
+    prevGen = assertRecomputed(totalCounts, prevGen, "after opening a writable reader - 9th attempt!");
+    // now do the delete
+    readers[0].indexReader.deleteDocument(1);
+    readers[0].indexReader.commit(null);
+    totalCounts = TFC.getTotalCounts(readers[0].indexReader, readers[0].taxReader, iParams, null);
+    prevGen = assertRecomputed(totalCounts, prevGen, "after deleting docs the index - 10th attempt!");
+    
+    origReader.close();
+    readers[0].close();
+    r2.close();
+    outputFile.delete();
+  }
+
+  private int assertReadFromDisc(TotalFacetCounts totalCounts, int prevGen, String errMsg) {
+    assertEquals("should read from disk "+errMsg, CreationType.Loaded, totalCounts.createType4test);
+    int gen4test = totalCounts.gen4test;
+    assertTrue("should read from disk "+errMsg, gen4test > prevGen);
+    return gen4test;
+  }
+  
+  private int assertRecomputed(TotalFacetCounts totalCounts, int prevGen, String errMsg) {
+    assertEquals("should recompute "+errMsg, CreationType.Computed, totalCounts.createType4test);
+    int gen4test = totalCounts.gen4test;
+    assertTrue("should recompute "+errMsg, gen4test > prevGen);
+    return gen4test;
+  }
+
+  /**
+   * This test is to address a bug (Tracker #146354) in a previous version.  If a TFC cache is
+   * written to disk, and then the taxonomy grows (but the index does not change),
+   * and then the TFC cache is re-read from disk, there will be an exception
+   * thrown, as the integers are read off of the disk according to taxonomy
+   * size, which has changed.
+   */
+  @Test
+  public void testGrowingTaxonomy() throws Exception {
+    // Create temporary RAMDirectories
+    Directory[][] dirs = FacetTestUtils.createIndexTaxonomyDirs(1);
+    // Create our index/taxonomy writers
+    IndexTaxonomyWriterPair[] writers = FacetTestUtils
+    .createIndexTaxonomyWriterPair(dirs);
+    DefaultFacetIndexingParams iParams = new DefaultFacetIndexingParams() {
+      @Override
+      protected int fixedPartitionSize() {
+        return 2;
+      }
+    };
+    // Add a facet to the index
+    addFacets(iParams, writers[0].indexWriter, writers[0].taxWriter, "a", "b");
+    // Commit Changes
+    writers[0].indexWriter.commit();
+    writers[0].taxWriter.commit();
+
+    IndexTaxonomyReaderPair[] readers = FacetTestUtils.createIndexTaxonomyReaderPair(dirs);
+
+    // Create TFC and write cache to disk
+    File outputFile = _TestUtil.createTempFile("test", "tmp", TEMP_DIR);
+    TFC.store(outputFile, readers[0].indexReader, readers[0].taxReader, iParams, null);
+    
+    // Make the taxonomy grow without touching the index
+    for (int i = 0; i < 10; i++) {
+      writers[0].taxWriter.addCategory(new CategoryPath("foo", Integer.toString(i)));
+    }
+    writers[0].taxWriter.commit();
+    readers[0].taxReader.refresh();
+
+    initCache();
+
+    // With the bug, this next call should result in an exception
+    TFC.load(outputFile, readers[0].indexReader, readers[0].taxReader, iParams);
+    TotalFacetCounts totalCounts = TFC.getTotalCounts(
+        readers[0].indexReader, readers[0].taxReader, iParams, null);
+    assertReadFromDisc(totalCounts, 0, "after reading from disk.");
+    outputFile.delete();
+  }
+
+  /**
+   * Test that a new TFC is only calculated and placed in memory (by two
+   * threads who want it at the same time) only once.
+   */
+  @Test
+  public void testMemoryCacheSynchronization() throws Exception {
+    SlowRAMDirectory indexDir = new SlowRAMDirectory(-1, null);
+    SlowRAMDirectory taxoDir = new SlowRAMDirectory(-1, null);
+
+    // Write index using 'normal' directories
+    IndexWriter w = new IndexWriter(indexDir, new IndexWriterConfig(
+        TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)));
+    LuceneTaxonomyWriter tw = new LuceneTaxonomyWriter(taxoDir);
+    DefaultFacetIndexingParams iParams = new DefaultFacetIndexingParams();
+    // Add documents and facets
+    for (int i = 0; i < 1000; i++) {
+      addFacets(iParams, w, tw, "facet", Integer.toString(i));
+    }
+    w.close();
+    tw.close();
+
+    indexDir.setSleepMillis(1);
+    taxoDir.setSleepMillis(1);
+
+    IndexReader r = IndexReader.open(indexDir);
+    LuceneTaxonomyReader tr = new LuceneTaxonomyReader(taxoDir);
+
+    // Create and start threads. Thread1 should lock the cache and calculate
+    // the TFC array. The second thread should block until the first is
+    // done, then successfully retrieve from the cache without recalculating
+    // or reading from disk.
+    TFCThread tfcCalc1 = new TFCThread(r, tr, iParams);
+    TFCThread tfcCalc2 = new TFCThread(r, tr, iParams);
+    tfcCalc1.start();
+    // Give thread 1 a head start to ensure correct sequencing for testing
+    Thread.sleep(5);
+    tfcCalc2.start();
+
+    tfcCalc1.join();
+    tfcCalc2.join();
+
+    // Since this test ends up with references to the same TFC object, we
+    // can only test the times to make sure that they are the same.
+    assertRecomputed(tfcCalc1.tfc, 0, "thread 1 should recompute");
+    assertRecomputed(tfcCalc2.tfc, 0, "thread 2 should recompute");
+    assertTrue("Both results should be the same (as their inputs are the same objects)",
+        tfcCalc1.tfc == tfcCalc2.tfc);
+
+    r.close();
+    tr.close();
+  }
+
+  /**
+   * Simple test to make sure the TotalFacetCountsManager updates the
+   * TotalFacetCounts array only when it is supposed to, and whether it
+   * is recomputed or read from disk, but this time with TWO different
+   * TotalFacetCounts
+   */
+  @Test
+  public void testMultipleIndices() throws IOException {
+    // Create temporary RAMDirectories
+    Directory[][] dirs = FacetTestUtils.createIndexTaxonomyDirs(2);
+    // Create our index/taxonomy writers
+    IndexTaxonomyWriterPair[] writers = FacetTestUtils.createIndexTaxonomyWriterPair(dirs);
+    DefaultFacetIndexingParams iParams = new DefaultFacetIndexingParams();
+
+    // Add a facet to the index
+    addFacets(iParams, writers[0].indexWriter, writers[0].taxWriter, "a", "b");
+    addFacets(iParams, writers[1].indexWriter, writers[1].taxWriter, "d", "e");
+    // Commit Changes
+    writers[0].indexWriter.commit();
+    writers[0].taxWriter.commit();
+    writers[1].indexWriter.commit();
+    writers[1].taxWriter.commit();
+
+    // Open two readers
+    IndexTaxonomyReaderPair[] readers = FacetTestUtils.createIndexTaxonomyReaderPair(dirs);
+
+    // As this is the first time we have invoked the TotalFacetCountsManager, we
+    // should expect to compute.
+    TotalFacetCounts totalCounts0 = 
+      TFC.getTotalCounts(readers[0].indexReader, readers[0].taxReader, iParams, null);
+    int prevGen = -1;
+    prevGen = assertRecomputed(totalCounts0, prevGen, "after attempt 1");
+    assertTrue("attempt 1b for same input [0] shout find it in cache",
+        totalCounts0 == TFC.getTotalCounts(readers[0].indexReader, readers[0].taxReader, iParams, null));
+    
+    // 2nd Reader - As this is the first time we have invoked the
+    // TotalFacetCountsManager, we should expect a state of NEW to be returned.
+    TotalFacetCounts totalCounts1 = 
+      TFC.getTotalCounts(readers[1].indexReader, readers[1].taxReader, iParams, null);
+    prevGen = assertRecomputed(totalCounts1, prevGen, "after attempt 2");
+    assertTrue("attempt 2b for same input [1] shout find it in cache",
+        totalCounts1 == TFC.getTotalCounts(readers[1].indexReader, readers[1].taxReader, iParams, null));
+
+    // Right now cache size is one, so first TFC is gone and should be recomputed  
+    totalCounts0 = 
+      TFC.getTotalCounts(readers[0].indexReader, readers[0].taxReader, iParams, null);
+    prevGen = assertRecomputed(totalCounts0, prevGen, "after attempt 3");
+    
+    // Similarly will recompute the second result  
+    totalCounts1 = 
+      TFC.getTotalCounts(readers[1].indexReader, readers[1].taxReader, iParams, null);
+    prevGen = assertRecomputed(totalCounts1, prevGen, "after attempt 4");
+
+    // Now we set the cache size to two, meaning both should exist in the
+    // cache simultaneously
+    TFC.setCacheSize(2);
+
+    // Re-compute totalCounts0 (was evicted from the cache when the cache was smaller)
+    totalCounts0 = 
+      TFC.getTotalCounts(readers[0].indexReader, readers[0].taxReader, iParams, null);
+    prevGen = assertRecomputed(totalCounts0, prevGen, "after attempt 5");
+
+    // now both are in the larger cache and should not be recomputed 
+    totalCounts1 = TFC.getTotalCounts(readers[1].indexReader,
+        readers[1].taxReader, iParams, null);
+    assertTrue("with cache of size 2 res no. 0 should come from cache",
+        totalCounts0 == TFC.getTotalCounts(readers[0].indexReader, readers[0].taxReader, iParams, null));
+    assertTrue("with cache of size 2 res no. 1 should come from cache",
+        totalCounts1 == TFC.getTotalCounts(readers[1].indexReader, readers[1].taxReader, iParams, null));
+    
+    readers[0].close();
+    readers[1].close();
+  }
+
+}

Added: lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/association/AssociationsFacetRequestTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/association/AssociationsFacetRequestTest.java?rev=1141060&view=auto
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/association/AssociationsFacetRequestTest.java (added)
+++ lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/association/AssociationsFacetRequestTest.java Wed Jun 29 11:53:10 2011
@@ -0,0 +1,180 @@
+package org.apache.lucene.facet.search.association;
+
+import java.util.List;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.RAMDirectory;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.analysis.KeywordAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.facet.enhancements.EnhancementsDocumentBuilder;
+import org.apache.lucene.facet.enhancements.association.AssociationEnhancement;
+import org.apache.lucene.facet.enhancements.association.AssociationFloatProperty;
+import org.apache.lucene.facet.enhancements.association.AssociationIntProperty;
+import org.apache.lucene.facet.enhancements.params.DefaultEnhancementsIndexingParams;
+import org.apache.lucene.facet.index.CategoryContainer;
+import org.apache.lucene.facet.search.FacetsCollector;
+import org.apache.lucene.facet.search.params.FacetSearchParams;
+import org.apache.lucene.facet.search.params.association.AssociationFloatSumFacetRequest;
+import org.apache.lucene.facet.search.params.association.AssociationIntSumFacetRequest;
+import org.apache.lucene.facet.search.results.FacetResult;
+import org.apache.lucene.facet.taxonomy.CategoryPath;
+import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
+import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyReader;
+import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyWriter;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** Test for associations */
+public class AssociationsFacetRequestTest extends LuceneTestCase {
+
+  private static Directory dir = new RAMDirectory();
+  private static Directory taxoDir = new RAMDirectory();
+  
+  private static final CategoryPath aint = new CategoryPath("int", "a");
+  private static final CategoryPath bint = new CategoryPath("int", "b");
+  private static final CategoryPath afloat = new CategoryPath("float", "a");
+  private static final CategoryPath bfloat = new CategoryPath("float", "b");
+  
+  @BeforeClass
+  public static void beforeClassAssociationsFacetRequestTest() throws Exception {
+    // preparations - index, taxonomy, content
+    IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new KeywordAnalyzer()));
+    
+    TaxonomyWriter taxoWriter = new LuceneTaxonomyWriter(taxoDir);
+    
+    EnhancementsDocumentBuilder builder = new EnhancementsDocumentBuilder(
+        taxoWriter, new DefaultEnhancementsIndexingParams(
+            new AssociationEnhancement()));
+    
+    // index documents, 50% have only 'b' and all have 'a'
+    for (int i = 0; i < 100; i++) {
+      Document doc = new Document();
+      CategoryContainer container = new CategoryContainer();
+      container.addCategory(aint, new AssociationIntProperty(2));
+      container.addCategory(afloat, new AssociationFloatProperty(0.5f));
+      if (i % 2 == 0) { // 50
+        container.addCategory(bint, new AssociationIntProperty(3));
+        container.addCategory(bfloat, new AssociationFloatProperty(0.2f));
+      }
+      builder.setCategories(container).build(doc);
+      writer.addDocument(doc);
+    }
+    
+    taxoWriter.close();
+    writer.close();
+  }
+  
+  @AfterClass
+  public static void afterClassAssociationsFacetRequestTest() throws Exception {
+    dir.close();
+    taxoDir.close();
+  }
+  
+  @Test
+  public void testIntSumAssociation() throws Exception {
+    IndexReader reader = IndexReader.open(dir, true);
+    LuceneTaxonomyReader taxo = new LuceneTaxonomyReader(taxoDir);
+
+    // facet requests for two facets
+    FacetSearchParams fsp = new FacetSearchParams();
+    fsp.addFacetRequest(new AssociationIntSumFacetRequest(aint, 10));
+    fsp.addFacetRequest(new AssociationIntSumFacetRequest(bint, 10));
+    
+    Query q = new MatchAllDocsQuery();
+
+    FacetsCollector fc = new FacetsCollector(fsp, reader, taxo);
+    
+    new IndexSearcher(reader).search(q, fc);
+    List<FacetResult> res = fc.getFacetResults();
+    
+    assertNotNull("No results!",res);
+    assertEquals("Wrong number of results!",2, res.size());
+    assertEquals("Wrong count for category 'a'!",200, (int) res.get(0).getFacetResultNode().getValue());
+    assertEquals("Wrong count for category 'b'!",150, (int) res.get(1).getFacetResultNode().getValue());
+    
+    taxo.close();
+    reader.close();
+  }
+  
+  @Test
+  public void testFloatSumAssociation() throws Exception {
+    
+    IndexReader reader = IndexReader.open(dir, true);
+    LuceneTaxonomyReader taxo = new LuceneTaxonomyReader(taxoDir);
+
+    // facet requests for two facets
+    FacetSearchParams fsp = new FacetSearchParams();
+    fsp.addFacetRequest(new AssociationFloatSumFacetRequest(afloat, 10));
+    fsp.addFacetRequest(new AssociationFloatSumFacetRequest(bfloat, 10));
+    
+    Query q = new MatchAllDocsQuery();
+
+    FacetsCollector fc = new FacetsCollector(fsp, reader, taxo);
+    
+    new IndexSearcher(reader).search(q, fc);
+    List<FacetResult> res = fc.getFacetResults();
+    
+    assertNotNull("No results!",res);
+    assertEquals("Wrong number of results!",2, res.size());
+    assertEquals("Wrong count for category 'a'!",50f, (float) res.get(0).getFacetResultNode().getValue(), 0.00001);
+    assertEquals("Wrong count for category 'b'!",10f, (float) res.get(1).getFacetResultNode().getValue(), 0.00001);
+    
+    taxo.close();
+    reader.close();
+  }  
+    
+  @Test
+  public void testDifferentAggregatorsSameCategoryList() throws Exception {
+    // Same category list cannot be aggregated by two different aggregators. If
+    // you want to do that, you need to separate the categories into two
+    // category list (you'll still have one association list).
+    IndexReader reader = IndexReader.open(dir, true);
+    LuceneTaxonomyReader taxo = new LuceneTaxonomyReader(taxoDir);
+
+    // facet requests for two facets
+    FacetSearchParams fsp = new FacetSearchParams();
+    fsp.addFacetRequest(new AssociationIntSumFacetRequest(aint, 10));
+    fsp.addFacetRequest(new AssociationIntSumFacetRequest(bint, 10));
+    fsp.addFacetRequest(new AssociationFloatSumFacetRequest(afloat, 10));
+    fsp.addFacetRequest(new AssociationFloatSumFacetRequest(bfloat, 10));
+    
+    Query q = new MatchAllDocsQuery();
+
+    FacetsCollector fc = new FacetsCollector(fsp, reader, taxo);
+    
+    new IndexSearcher(reader).search(q, fc);
+    try {
+      fc.getFacetResults();
+      fail("different aggregators for same category list should not be supported");
+    } catch (RuntimeException e) {
+      // ok - expected
+    }
+  }  
+
+}

Added: lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/params/FacetRequestTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/params/FacetRequestTest.java?rev=1141060&view=auto
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/params/FacetRequestTest.java (added)
+++ lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/params/FacetRequestTest.java Wed Jun 29 11:53:10 2011
@@ -0,0 +1,90 @@
+package org.apache.lucene.facet.search.params;
+
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.store.RAMDirectory;
+import org.junit.Test;
+
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.facet.search.FacetResultsHandler;
+import org.apache.lucene.facet.taxonomy.CategoryPath;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyReader;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class FacetRequestTest extends LuceneTestCase {
+
+  @Test(expected=IllegalArgumentException.class)
+  public void testIllegalNumResults() throws Exception {
+    new CountFacetRequest(new CategoryPath("a", "b"), 0);
+  }
+  
+  @Test(expected=IllegalArgumentException.class)
+  public void testIllegalCategoryPath() throws Exception {
+    new CountFacetRequest(null, 1);
+  }
+
+  @Test
+  public void testHashAndEquals() {
+    CountFacetRequest fr1 = new CountFacetRequest(new CategoryPath("a"), 8);
+    CountFacetRequest fr2 = new CountFacetRequest(new CategoryPath("a"), 8);
+    assertEquals("hashCode() should agree on both objects", fr1.hashCode(), fr2.hashCode());
+    assertTrue("equals() should return true", fr1.equals(fr2));
+    fr1.setDepth(10);
+    assertFalse("equals() should return false as fr1.depth != fr2.depth", fr1.equals(fr2));
+  }
+  
+  @Test
+  public void testGetFacetResultHandlerDifferentTaxonomy() throws Exception {
+    FacetRequest fr = new CountFacetRequest(new CategoryPath("a"), 10);
+    RAMDirectory dir1 = new RAMDirectory();
+    RAMDirectory dir2 = new RAMDirectory();
+    // create empty indexes, so that LTR ctor won't complain about a missing index.
+    new IndexWriter(dir1, new IndexWriterConfig(TEST_VERSION_CURRENT, null)).close();
+    new IndexWriter(dir2, new IndexWriterConfig(TEST_VERSION_CURRENT, null)).close();
+    TaxonomyReader tr1 = new LuceneTaxonomyReader(dir1);
+    TaxonomyReader tr2 = new LuceneTaxonomyReader(dir2);
+    FacetResultsHandler frh1 = fr.createFacetResultsHandler(tr1);
+    FacetResultsHandler frh2 = fr.createFacetResultsHandler(tr2);
+    assertTrue("should not return the same FacetResultHandler instance for different TaxonomyReader instances", frh1 != frh2);
+  }
+  
+  @Test
+  public void testImmutability() throws Exception {
+    // Tests that after a FRH is created by FR, changes to FR are not reflected
+    // in the FRH.
+    FacetRequest fr = new CountFacetRequest(new CategoryPath("a"), 10);
+    RAMDirectory dir = new RAMDirectory();
+    // create empty indexes, so that LTR ctor won't complain about a missing index.
+    new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, null)).close();
+    TaxonomyReader tr = new LuceneTaxonomyReader(dir);
+    FacetResultsHandler frh = fr.createFacetResultsHandler(tr);
+    fr.setDepth(10);
+    assertEquals(FacetRequest.DEFAULT_DEPTH, frh.getFacetRequest().getDepth());
+  }
+  
+  @Test
+  public void testClone() throws Exception {
+    FacetRequest fr = new CountFacetRequest(new CategoryPath("a"), 10);
+    FacetRequest clone = (FacetRequest) fr.clone();
+    fr.setDepth(10);
+    assertEquals("depth should not have been affected in the clone", FacetRequest.DEFAULT_DEPTH, clone.getDepth());
+  }
+  
+}

Added: lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/params/FacetSearchParamsTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/params/FacetSearchParamsTest.java?rev=1141060&view=auto
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/params/FacetSearchParamsTest.java (added)
+++ lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/params/FacetSearchParamsTest.java Wed Jun 29 11:53:10 2011
@@ -0,0 +1,75 @@
+package org.apache.lucene.facet.search.params;
+
+import org.apache.lucene.store.RAMDirectory;
+import org.junit.Test;
+
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.facet.index.params.DefaultFacetIndexingParams;
+import org.apache.lucene.facet.taxonomy.CategoryPath;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
+import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyReader;
+import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyWriter;
+import org.apache.lucene.facet.util.PartitionsUtils;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class FacetSearchParamsTest extends LuceneTestCase {
+
+  @Test
+  public void testDefaultSettings() throws Exception {
+    FacetSearchParams fsp = new FacetSearchParams();
+    assertEquals("unexpected default facet indexing params class", DefaultFacetIndexingParams.class.getName(), fsp.getFacetIndexingParams().getClass().getName());
+    assertEquals("no facet requests should be added by default", 0, fsp.getFacetRequests().size());
+    RAMDirectory dir = new RAMDirectory();
+    new LuceneTaxonomyWriter(dir).close();
+    TaxonomyReader tr = new LuceneTaxonomyReader(dir);
+    assertEquals("unexpected partition offset for 0 categories", 1, PartitionsUtils.partitionOffset(fsp, 1, tr));
+    assertEquals("unexpected partition size for 0 categories", 1, PartitionsUtils.partitionSize(fsp,tr));
+  }
+  
+  @Test
+  public void testAddFacetRequest() throws Exception {
+    FacetSearchParams fsp = new FacetSearchParams();
+    fsp.addFacetRequest(new CountFacetRequest(new CategoryPath("a", "b"), 1));
+    assertEquals("expected 1 facet request", 1, fsp.getFacetRequests().size());
+  }
+  
+  @Test
+  public void testPartitionSizeWithCategories() throws Exception {
+    FacetSearchParams fsp = new FacetSearchParams();
+    RAMDirectory dir = new RAMDirectory();
+    TaxonomyWriter tw = new LuceneTaxonomyWriter(dir);
+    tw.addCategory(new CategoryPath("a"));
+    tw.commit();
+    tw.close();
+    TaxonomyReader tr = new LuceneTaxonomyReader(dir);
+    assertEquals("unexpected partition offset for 1 categories", 2, PartitionsUtils.partitionOffset(fsp, 1, tr));
+    assertEquals("unexpected partition size for 1 categories", 2, PartitionsUtils.partitionSize(fsp,tr));
+  }
+  
+  @Test
+  public void testSearchParamsWithNullRequest() throws Exception {
+    FacetSearchParams fsp = new FacetSearchParams();
+    try {
+      fsp.addFacetRequest(null);
+      fail("FacetSearchParams should throw IllegalArgumentException when trying to add a null FacetRequest");
+    } catch (IllegalArgumentException e) {
+    }
+  }
+}

Added: lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/params/MultiIteratorsPerCLParamsTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/params/MultiIteratorsPerCLParamsTest.java?rev=1141060&view=auto
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/params/MultiIteratorsPerCLParamsTest.java (added)
+++ lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/params/MultiIteratorsPerCLParamsTest.java Wed Jun 29 11:53:10 2011
@@ -0,0 +1,263 @@
+package org.apache.lucene.facet.search.params;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.lucene.analysis.KeywordAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.RAMDirectory;
+import org.junit.Test;
+
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.facet.index.CategoryDocumentBuilder;
+import org.apache.lucene.facet.index.params.CategoryListParams;
+import org.apache.lucene.facet.index.params.DefaultFacetIndexingParams;
+import org.apache.lucene.facet.index.params.FacetIndexingParams;
+import org.apache.lucene.facet.search.CategoryListIterator;
+import org.apache.lucene.facet.search.FacetArrays;
+import org.apache.lucene.facet.search.FacetResultsHandler;
+import org.apache.lucene.facet.search.FacetsAccumulator;
+import org.apache.lucene.facet.search.ScoredDocIDs;
+import org.apache.lucene.facet.search.StandardFacetsAccumulator;
+import org.apache.lucene.facet.search.TopKFacetResultsHandler;
+import org.apache.lucene.facet.search.cache.CategoryListCache;
+import org.apache.lucene.facet.search.results.FacetResult;
+import org.apache.lucene.facet.search.results.FacetResultNode;
+import org.apache.lucene.facet.search.results.IntermediateFacetResult;
+import org.apache.lucene.facet.taxonomy.CategoryPath;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
+import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyReader;
+import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyWriter;
+import org.apache.lucene.facet.util.ScoredDocIdsUtils;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Test faceted search with creation of multiple category list iterators by the
+ * same CLP, depending on the provided facet request
+ */
+public class MultiIteratorsPerCLParamsTest extends LuceneTestCase {
+
+  CategoryPath[][] perDocCategories = new CategoryPath[][] {
+      { new CategoryPath("author", "Mark Twain"),
+          new CategoryPath("date", "2010") },
+      { new CategoryPath("author", "Robert Frost"),
+          new CategoryPath("date", "2009") },
+      { new CategoryPath("author", "Artur Miller"),
+          new CategoryPath("date", "2010") },
+      { new CategoryPath("author", "Edgar Allan Poe"),
+          new CategoryPath("date", "2009") },
+      { new CategoryPath("author", "Henry James"),
+          new CategoryPath("date", "2010") } };
+  
+  String countForbiddenDimension;
+
+  @Test
+  public void testCLParamMultiIteratorsByRequest() throws Exception {
+    doTestCLParamMultiIteratorsByRequest(false);
+  }
+
+  @Test
+  public void testCLParamMultiIteratorsByRequestCacheCLI() throws Exception {
+    doTestCLParamMultiIteratorsByRequest(true);
+  }
+
+  private void doTestCLParamMultiIteratorsByRequest(boolean cacheCLI) throws Exception,
+      CorruptIndexException, IOException {
+    // Create a CLP which generates different CLIs according to the
+    // FacetRequest's dimension
+    CategoryListParams clp = new CategoryListParams();
+    FacetIndexingParams iParams = new DefaultFacetIndexingParams(clp);
+    Directory indexDir = new RAMDirectory();
+    Directory taxoDir = new RAMDirectory();
+    populateIndex(iParams, indexDir, taxoDir);
+
+    TaxonomyReader taxo = new LuceneTaxonomyReader(taxoDir);
+    IndexReader reader = IndexReader.open(indexDir);
+
+    CategoryListCache clCache = null;
+    if (cacheCLI) {
+      // caching the iteratorr, so:
+      // 1: create the cached iterator, using original params
+      clCache = new CategoryListCache();
+      clCache.loadAndRegister(clp, reader, taxo, iParams);
+    }
+    
+    ScoredDocIDs allDocs = ScoredDocIdsUtils
+        .createAllDocsScoredDocIDs(reader);
+
+    // Search index with 'author' should filter ONLY ordinals whose parent
+    // is 'author'
+    countForbiddenDimension = "date";
+    validateFacetedSearch(iParams, taxo, reader, clCache, allDocs, "author", 5, 5);
+
+    // Search index with 'date' should filter ONLY ordinals whose parent is
+    // 'date'
+    countForbiddenDimension = "author";
+    validateFacetedSearch(iParams, taxo, reader, clCache, allDocs, "date", 5, 2);
+
+    // Search index with both 'date' and 'author'
+    countForbiddenDimension = null;
+    validateFacetedSearch(iParams, taxo, reader, clCache, allDocs, new String[] {
+            "author", "date" }, new int[] { 5, 5 }, new int[] { 5, 2 });
+  }
+
+  private void validateFacetedSearch(FacetIndexingParams iParams,
+      TaxonomyReader taxo, IndexReader reader, CategoryListCache clCache,
+      ScoredDocIDs allDocs, String dimension, int expectedValue, int expectedNumDescendants) throws IOException {
+    validateFacetedSearch(iParams, taxo, reader, clCache, allDocs,
+        new String[] { dimension }, new int[] { expectedValue },
+        new int[] { expectedNumDescendants });
+  }
+
+  private void validateFacetedSearch(FacetIndexingParams iParams,
+      TaxonomyReader taxo, IndexReader reader,  CategoryListCache clCache, ScoredDocIDs allDocs,
+      String[] dimension, int[] expectedValue,
+      int[] expectedNumDescendants)
+      throws IOException {
+    FacetSearchParams sParams = new FacetSearchParams(iParams);
+    sParams.setClCache(clCache);
+    for (String dim : dimension) {
+      sParams.addFacetRequest(new PerDimCountFacetRequest(
+          new CategoryPath(dim), 10));
+    }
+    FacetsAccumulator acc = new StandardFacetsAccumulator(sParams, reader, taxo);
+    
+    // no use to test this with complement since at that mode all facets are taken
+    acc.setComplementThreshold(FacetsAccumulator.DISABLE_COMPLEMENT);
+
+    List<FacetResult> results = acc.accumulate(allDocs);
+    assertEquals("Wrong #results", dimension.length, results.size());
+
+    for (int i = 0; i < results.size(); i++) {
+      FacetResult res = results.get(i);
+      assertEquals("wrong num-descendants for dimension " + dimension[i],
+          expectedNumDescendants[i], res.getNumValidDescendants());
+      FacetResultNode resNode = res.getFacetResultNode();
+      assertEquals("wrong value for dimension " + dimension[i],
+          expectedValue[i], (int) resNode.getValue());
+    }
+  }
+
+  private void populateIndex(FacetIndexingParams iParams, Directory indexDir,
+      Directory taxoDir) throws Exception {
+    IndexWriter writer = new IndexWriter(indexDir, new IndexWriterConfig(TEST_VERSION_CURRENT, new KeywordAnalyzer()));
+    TaxonomyWriter taxoWriter = new LuceneTaxonomyWriter(taxoDir);
+
+    for (CategoryPath[] categories : perDocCategories) {
+      writer.addDocument(new CategoryDocumentBuilder(taxoWriter, iParams)
+          .setCategoryPaths(Arrays.asList(categories)).build(
+              new Document()));
+
+    }
+    taxoWriter.commit();
+    writer.commit();
+    taxoWriter.close();
+    writer.close();
+  }
+
+  private class PerDimCountFacetRequest extends CountFacetRequest {
+    
+    public PerDimCountFacetRequest(CategoryPath path, int num) {
+      super(path, num);
+    }
+
+    @Override
+    public CategoryListIterator createCategoryListIterator(IndexReader reader, 
+        TaxonomyReader taxo, FacetSearchParams sParams, int partition) throws IOException {
+      // categories of certain dimension only
+      return new PerDimensionCLI(taxo, super.createCategoryListIterator(
+          reader, taxo, sParams, partition), getCategoryPath());
+    }
+    
+    @Override
+    /** Override this method just for verifying that only specified facets are iterated.. */
+    public FacetResultsHandler createFacetResultsHandler(
+        TaxonomyReader taxonomyReader) {
+      return new TopKFacetResultsHandler(taxonomyReader, this) {
+        @Override
+        public IntermediateFacetResult fetchPartitionResult(
+            FacetArrays facetArrays, int offset) throws IOException {
+          final IntermediateFacetResult res = super.fetchPartitionResult(facetArrays, offset);
+          if (countForbiddenDimension!=null) {
+            int ord = taxonomyReader.getOrdinal(new CategoryPath(countForbiddenDimension));
+            assertEquals("Should not have accumulated for dimension '"+countForbiddenDimension+"'!",0,facetArrays.getIntArray()[ord]);
+          }
+          return res;
+        }
+      };
+    }
+  }
+
+  /**
+   * a CLI which filters another CLI for the dimension of the provided
+   * category-path
+   */
+  private static class PerDimensionCLI implements CategoryListIterator {
+    private final CategoryListIterator superCLI;
+    private final int[] parentArray;
+    private final int parentOrdinal;
+
+    PerDimensionCLI(TaxonomyReader taxo, CategoryListIterator superCLI,
+        CategoryPath requestedPath) throws IOException {
+      this.superCLI = superCLI;
+      if (requestedPath == null) {
+        parentOrdinal = 0;
+      } else {
+        CategoryPath cp = new CategoryPath(requestedPath.getComponent(0));
+        parentOrdinal = taxo.getOrdinal(cp);
+      }
+      parentArray = taxo.getParentArray();
+    }
+
+    public boolean init() throws IOException {
+      return superCLI.init();
+    }
+
+    public long nextCategory() throws IOException {
+      long next;
+      while ((next = superCLI.nextCategory()) <= Integer.MAX_VALUE
+          && !isInDimension((int) next)) {
+      }
+
+      return next;
+    }
+
+    /** look for original parent ordinal, meaning same dimension */
+    private boolean isInDimension(int ordinal) {
+      while (ordinal > 0) {
+        if (ordinal == parentOrdinal) {
+          return true;
+        }
+        ordinal = parentArray[ordinal];
+      }
+      return false;
+    }
+
+    public boolean skipTo(int docId) throws IOException {
+      return superCLI.skipTo(docId);
+    }
+  }
+}
\ No newline at end of file

Added: lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/sampling/BaseSampleTestTopK.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/sampling/BaseSampleTestTopK.java?rev=1141060&view=auto
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/sampling/BaseSampleTestTopK.java (added)
+++ lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/sampling/BaseSampleTestTopK.java Wed Jun 29 11:53:10 2011
@@ -0,0 +1,135 @@
+package org.apache.lucene.facet.search.sampling;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+import org.junit.Test;
+
+import org.apache.lucene.search.MultiCollector;
+import org.apache.lucene.facet.search.BaseTestTopK;
+import org.apache.lucene.facet.search.FacetsAccumulator;
+import org.apache.lucene.facet.search.FacetsCollector;
+import org.apache.lucene.facet.search.ScoredDocIDs;
+import org.apache.lucene.facet.search.ScoredDocIdCollector;
+import org.apache.lucene.facet.search.params.FacetSearchParams;
+import org.apache.lucene.facet.search.results.FacetResult;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public abstract class BaseSampleTestTopK extends BaseTestTopK {
+  
+  /** Number of top results */
+  protected static final int K = 2; 
+  
+  /** since there is a chance that this test would fail even if the code is correct, retry the sampling */
+  protected static final int RETRIES = 4; 
+  
+  protected abstract FacetsAccumulator getSamplingAccumulator(Sampler sampler,
+      TaxonomyReader taxoReader, IndexReader indexReader,
+      FacetSearchParams searchParams);
+  
+  @Test
+  public void testCountUsingComplementSampling() throws Exception {
+    doTestWithSamping(true);
+  }
+  
+  @Test
+  public void testCountUsingSampling() throws Exception {
+    doTestWithSamping(false);
+  }
+  
+  /**
+   * Try out faceted search with sampling enabled and complements either disabled or enforced
+   * Lots of randomly generated data is being indexed, and later on a "90% docs" faceted search
+   * is performed. The results are compared to non-sampled ones.
+   */
+  private void doTestWithSamping(boolean complement) throws Exception, IOException {
+    for (int partitionSize : partitionSizes) {
+      initIndex(partitionSize);
+      
+      // Get all of the documents and run the query, then do different
+      // facet counts and compare to control
+      Query q = new TermQuery(new Term(CONTENT_FIELD, BETA)); // 90% of the docs
+      ScoredDocIdCollector docCollector = ScoredDocIdCollector.create(searcher.maxDoc(), false);
+      
+      FacetSearchParams expectedSearchParams = searchParamsWithRequests(K, partitionSize); 
+      FacetsCollector fc = new FacetsCollector(expectedSearchParams, indexReader, taxoReader);
+      
+      searcher.search(q, MultiCollector.wrap(docCollector, fc));
+      
+      List<FacetResult> expectedResults = fc.getFacetResults();
+      
+      // complement with sampling!
+      final Sampler sampler = createSampler(docCollector.getScoredDocIDs());
+      
+      FacetSearchParams samplingSearchParams = searchParamsWithRequests(K, partitionSize); 
+
+      // try several times in case of failure, because the test has a chance to fail 
+      // if the top K facets are not sufficiently common with the sample set
+      for (int n=RETRIES; n>0; n--) {
+        FacetsCollector samplingFC = samplingCollector(complement, sampler,  samplingSearchParams);
+        
+        searcher.search(q, samplingFC);
+        List<FacetResult> sampledResults = samplingFC.getFacetResults();
+        
+        try {
+          assertSameResults(expectedResults, sampledResults);
+          break; // succeeded
+        } catch (Exception e) {
+          if (n<=1) { // otherwise try again
+            throw e; 
+          }
+        }
+      }
+    }
+  }
+  
+  private FacetsCollector samplingCollector(
+      final boolean complement,
+      final Sampler sampler,
+      FacetSearchParams samplingSearchParams) {
+    FacetsCollector samplingFC = new FacetsCollector(samplingSearchParams, indexReader, taxoReader) {
+      @Override
+      protected FacetsAccumulator initFacetsAccumulator(
+          FacetSearchParams facetSearchParams, IndexReader indexReader,
+          TaxonomyReader taxonomyReader) {
+        FacetsAccumulator acc = getSamplingAccumulator(sampler, taxonomyReader, indexReader, facetSearchParams);
+        acc.setComplementThreshold(complement ? FacetsAccumulator.FORCE_COMPLEMENT : FacetsAccumulator.DISABLE_COMPLEMENT);
+        return acc;
+      }
+    };
+    return samplingFC;
+  }
+  
+  private Sampler createSampler(ScoredDocIDs scoredDocIDs) {
+    SamplingParams samplingParams = new SamplingParams();
+    samplingParams.setSampleRatio(0.8);
+    samplingParams.setMinSampleSize(100);
+    samplingParams.setMaxSampleSize(10000);
+    samplingParams.setSampingThreshold(11000); //force sampling 
+    samplingParams.setOversampleFactor(5.0);
+    Sampler sampler = new Sampler(samplingParams);
+    assertTrue("must enable sampling for this test!",sampler.shouldSample(scoredDocIDs));
+    return sampler;
+  }
+}

Added: lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/sampling/SamplingAccumulatorTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/sampling/SamplingAccumulatorTest.java?rev=1141060&view=auto
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/sampling/SamplingAccumulatorTest.java (added)
+++ lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/sampling/SamplingAccumulatorTest.java Wed Jun 29 11:53:10 2011
@@ -0,0 +1,35 @@
+package org.apache.lucene.facet.search.sampling;
+
+import org.apache.lucene.index.IndexReader;
+
+import org.apache.lucene.facet.search.FacetsAccumulator;
+import org.apache.lucene.facet.search.params.FacetSearchParams;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class SamplingAccumulatorTest extends BaseSampleTestTopK {
+
+  @Override
+  protected FacetsAccumulator getSamplingAccumulator(Sampler sampler,
+      TaxonomyReader taxoReader, IndexReader indexReader,
+      FacetSearchParams searchParams) {
+    return new SamplingAccumulator(sampler, searchParams, indexReader,
+        taxoReader);
+  }
+}



Mime
View raw message