Return-Path: X-Original-To: apmail-lucene-commits-archive@www.apache.org Delivered-To: apmail-lucene-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id B9097421D for ; Wed, 29 Jun 2011 11:54:34 +0000 (UTC) Received: (qmail 45189 invoked by uid 500); 29 Jun 2011 11:54:34 -0000 Mailing-List: contact commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@lucene.apache.org Delivered-To: mailing list commits@lucene.apache.org Received: (qmail 45182 invoked by uid 99); 29 Jun 2011 11:54:34 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 29 Jun 2011 11:54:34 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 29 Jun 2011 11:54:30 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 478D42388C06; Wed, 29 Jun 2011 11:53:23 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1141060 [16/21] - in /lucene/dev/branches/branch_3x: dev-tools/eclipse/ dev-tools/maven/lucene/contrib/facet/ lucene/contrib/ lucene/contrib/facet/ lucene/contrib/facet/src/ lucene/contrib/facet/src/examples/ lucene/contrib/facet/src/examp... Date: Wed, 29 Jun 2011 11:53:19 -0000 To: commits@lucene.apache.org From: shaie@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20110629115323.478D42388C06@eris.apache.org> Added: lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/TestFacetsAccumulatorWithComplement.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/TestFacetsAccumulatorWithComplement.java?rev=1141060&view=auto ============================================================================== --- lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/TestFacetsAccumulatorWithComplement.java (added) +++ lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/TestFacetsAccumulatorWithComplement.java Wed Jun 29 11:53:10 2011 @@ -0,0 +1,161 @@ +package org.apache.lucene.facet.search; + +import java.io.IOException; +import java.util.List; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.MultiReader; +import org.apache.lucene.index.ParallelReader; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import org.apache.lucene.facet.FacetTestBase; +import org.apache.lucene.facet.search.FacetsAccumulator; +import org.apache.lucene.facet.search.ScoredDocIDs; +import org.apache.lucene.facet.search.ScoredDocIdCollector; +import org.apache.lucene.facet.search.StandardFacetsAccumulator; +import org.apache.lucene.facet.search.params.CountFacetRequest; +import org.apache.lucene.facet.search.params.FacetSearchParams; +import org.apache.lucene.facet.search.results.FacetResult; +import org.apache.lucene.facet.search.results.FacetResultNode; +import org.apache.lucene.facet.taxonomy.CategoryPath; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Test that complementsworks as expected. + * We place this test under *.facet.search rather than *.search + * because the test actually does faceted search. + */ +public class TestFacetsAccumulatorWithComplement extends FacetTestBase { + + @Override + @Before + public void setUp() throws Exception { + super.setUp(); + initIndex(); + } + + @Override + @After + public void tearDown() throws Exception { + closeAll(); + super.tearDown(); + } + + /** + * Test that complements does not cause a failure when using a parallel reader + */ + @Test + public void testComplementsWithParallerReader() throws Exception { + IndexReader origReader = indexReader; + ParallelReader pr = new ParallelReader(true); + pr.add(origReader); + indexReader = pr; + try { + doTestComplements(); + } finally { + indexReader = origReader; + } + } + + /** + * Test that complements works with MultiReader + */ + @Test + public void testComplementsWithMultiReader() throws Exception { + final IndexReader origReader = indexReader; + indexReader = new MultiReader(origReader); + try { + doTestComplements(); + } finally { + indexReader = origReader; + } + } + + /** + * Test that score is indeed constant when using a constant score + */ + @Test + public void testComplements() throws Exception { + doTestComplements(); + } + + private void doTestComplements() throws Exception { + assertTrue("Would like to test this with deletions!",indexReader.hasDeletions()); + assertTrue("Would like to test this with deletions!",indexReader.numDeletedDocs()>0); + Query q = new MatchAllDocsQuery(); //new TermQuery(new Term(TEXT,"white")); + if (VERBOSE) { + System.out.println("Query: "+q); + } + ScoredDocIdCollector dCollector = + ScoredDocIdCollector.create(indexReader.maxDoc(),false); // scoring is disabled + searcher.search(q, dCollector); + + // verify by facet values + List countResWithComplement = findFacets(dCollector.getScoredDocIDs(), true); + List countResNoComplement = findFacets(dCollector.getScoredDocIDs(), false); + + assertEquals("Wrong number of facet count results with complement!",1,countResWithComplement.size()); + assertEquals("Wrong number of facet count results no complement!",1,countResNoComplement.size()); + + FacetResultNode parentResWithComp = countResWithComplement.get(0).getFacetResultNode(); + FacetResultNode parentResNoComp = countResWithComplement.get(0).getFacetResultNode(); + + assertEquals("Wrong number of top count aggregated categories with complement!",3,parentResWithComp.getNumSubResults()); + assertEquals("Wrong number of top count aggregated categories no complement!",3,parentResNoComp.getNumSubResults()); + + } + + @Override + protected FacetSearchParams getFacetedSearchParams() { + FacetSearchParams res = super.getFacetedSearchParams(); + res.addFacetRequest(new CountFacetRequest(new CategoryPath("root","a"), 10)); + return res; + } + + /** compute facets with certain facet requests and docs */ + private List findFacets(ScoredDocIDs sDocids, boolean withComplement) throws IOException { + + FacetsAccumulator fAccumulator = + new StandardFacetsAccumulator(getFacetedSearchParams(), indexReader, taxoReader); + + fAccumulator.setComplementThreshold( + withComplement ? + FacetsAccumulator.FORCE_COMPLEMENT: + FacetsAccumulator.DISABLE_COMPLEMENT); + + List res = fAccumulator.accumulate(sDocids); + + // Results are ready, printing them... + int i = 0; + for (FacetResult facetResult : res) { + if (VERBOSE) { + System.out.println("Res "+(i++)+": "+facetResult); + } + } + + assertEquals(withComplement, ((StandardFacetsAccumulator) fAccumulator).isUsingComplements); + + return res; + } + +} \ No newline at end of file Added: lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/TestMultipleCategoryLists.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/TestMultipleCategoryLists.java?rev=1141060&view=auto ============================================================================== --- lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/TestMultipleCategoryLists.java (added) +++ lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/TestMultipleCategoryLists.java Wed Jun 29 11:53:10 2011 @@ -0,0 +1,383 @@ +package org.apache.lucene.facet.search; + +import java.io.IOException; +import java.util.Iterator; +import java.util.List; + +import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermDocs; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TopScoreDocCollector; +import org.apache.lucene.store.Directory; +import org.junit.Test; + +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.search.MultiCollector; +import org.apache.lucene.facet.FacetTestUtils; +import org.apache.lucene.facet.index.params.CategoryListParams; +import org.apache.lucene.facet.index.params.FacetIndexingParams; +import org.apache.lucene.facet.index.params.PerDimensionIndexingParams; +import org.apache.lucene.facet.search.FacetsCollector; +import org.apache.lucene.facet.search.params.CountFacetRequest; +import org.apache.lucene.facet.search.params.FacetSearchParams; +import org.apache.lucene.facet.search.results.FacetResult; +import org.apache.lucene.facet.search.results.FacetResultNode; +import org.apache.lucene.facet.taxonomy.CategoryPath; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.facet.taxonomy.TaxonomyWriter; +import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyReader; +import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyWriter; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class TestMultipleCategoryLists extends LuceneTestCase { + + @Test + public void testDefault() throws Exception { + Directory[][] dirs = getDirs(); + // create and open an index writer + IndexWriter iw = new IndexWriter(dirs[0][0], new IndexWriterConfig( + TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))); + // create and open a taxonomy writer + TaxonomyWriter tw = new LuceneTaxonomyWriter(dirs[0][1], OpenMode.CREATE); + + /** + * Configure with no custom counting lists + */ + PerDimensionIndexingParams iParams = new PerDimensionIndexingParams(); + + seedIndex(iw, tw, iParams); + + iw.commit(); + tw.commit(); + + // prepare index reader and taxonomy. + TaxonomyReader tr = new LuceneTaxonomyReader(dirs[0][1]); + IndexReader ir = IndexReader.open(dirs[0][0]); + + // prepare searcher to search against + IndexSearcher searcher = new IndexSearcher(ir); + + FacetsCollector facetsCollector = performSearch(iParams, tr, ir, + searcher); + + // Obtain facets results and hand-test them + assertCorrectResults(facetsCollector); + + TermDocs td = ir.termDocs(new Term("$facets", "$fulltree$")); + assertTrue(td.next()); + + tr.close(); + ir.close(); + searcher.close(); + iw.close(); + tw.close(); + } + + @Test + public void testCustom() throws Exception { + Directory[][] dirs = getDirs(); + // create and open an index writer + IndexWriter iw = new IndexWriter(dirs[0][0], new IndexWriterConfig( + TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))); + // create and open a taxonomy writer + TaxonomyWriter tw = new LuceneTaxonomyWriter(dirs[0][1], + OpenMode.CREATE); + + PerDimensionIndexingParams iParams = new PerDimensionIndexingParams(); + iParams.addCategoryListParams(new CategoryPath("Author"), + new CategoryListParams(new Term("$author", "Authors"))); + seedIndex(iw, tw, iParams); + + iw.commit(); + tw.commit(); + + // prepare index reader and taxonomy. + TaxonomyReader tr = new LuceneTaxonomyReader(dirs[0][1]); + IndexReader ir = IndexReader.open(dirs[0][0]); + + // prepare searcher to search against + IndexSearcher searcher = new IndexSearcher(ir); + + FacetsCollector facetsCollector = performSearch(iParams, tr, ir, + searcher); + + // Obtain facets results and hand-test them + assertCorrectResults(facetsCollector); + + assertPostingListExists("$facets", "$fulltree$", ir); + assertPostingListExists("$author", "Authors", ir); + + tr.close(); + ir.close(); + searcher.close(); + iw.close(); + tw.close(); + } + + @Test + public void testTwoCustomsSameField() throws Exception { + Directory[][] dirs = getDirs(); + // create and open an index writer + IndexWriter iw = new IndexWriter(dirs[0][0], new IndexWriterConfig( + TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))); + // create and open a taxonomy writer + TaxonomyWriter tw = new LuceneTaxonomyWriter(dirs[0][1], + OpenMode.CREATE); + + PerDimensionIndexingParams iParams = new PerDimensionIndexingParams(); + iParams.addCategoryListParams(new CategoryPath("Band"), + new CategoryListParams(new Term("$music", "Bands"))); + iParams.addCategoryListParams(new CategoryPath("Composer"), + new CategoryListParams(new Term("$music", "Composers"))); + seedIndex(iw, tw, iParams); + + iw.commit(); + tw.commit(); + + // prepare index reader and taxonomy. + TaxonomyReader tr = new LuceneTaxonomyReader(dirs[0][1]); + IndexReader ir = IndexReader.open(dirs[0][0]); + + // prepare searcher to search against + IndexSearcher searcher = new IndexSearcher(ir); + + FacetsCollector facetsCollector = performSearch(iParams, tr, ir, + searcher); + + // Obtain facets results and hand-test them + assertCorrectResults(facetsCollector); + + assertPostingListExists("$facets", "$fulltree$", ir); + assertPostingListExists("$music", "Bands", ir); + assertPostingListExists("$music", "Composers", ir); + + tr.close(); + ir.close(); + searcher.close(); + iw.close(); + tw.close(); + } + + private void assertPostingListExists(String field, String text, IndexReader ir) throws IOException { + TermDocs td; + Term term = new Term(field, text); + td = ir.termDocs(term); + assertTrue(td.next()); + } + + @Test + public void testDifferentFieldsAndText() throws Exception { + Directory[][] dirs = getDirs(); + // create and open an index writer + IndexWriter iw = new IndexWriter(dirs[0][0], new IndexWriterConfig( + TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))); + // create and open a taxonomy writer + TaxonomyWriter tw = new LuceneTaxonomyWriter(dirs[0][1], OpenMode.CREATE); + + PerDimensionIndexingParams iParams = new PerDimensionIndexingParams(); + iParams.addCategoryListParams(new CategoryPath("Band"), + new CategoryListParams(new Term("$bands", "Bands"))); + iParams.addCategoryListParams(new CategoryPath("Composer"), + new CategoryListParams(new Term("$composers", "Composers"))); + seedIndex(iw, tw, iParams); + + iw.commit(); + tw.commit(); + + // prepare index reader and taxonomy. + TaxonomyReader tr = new LuceneTaxonomyReader(dirs[0][1]); + IndexReader ir = IndexReader.open(dirs[0][0]); + + // prepare searcher to search against + IndexSearcher searcher = new IndexSearcher(ir); + + FacetsCollector facetsCollector = performSearch(iParams, tr, ir, + searcher); + + // Obtain facets results and hand-test them + assertCorrectResults(facetsCollector); + assertPostingListExists("$facets", "$fulltree$", ir); + assertPostingListExists("$bands", "Bands", ir); + assertPostingListExists("$composers", "Composers", ir); + tr.close(); + ir.close(); + searcher.close(); + iw.close(); + tw.close(); + } + + @Test + public void testSomeSameSomeDifferent() throws Exception { + Directory[][] dirs = getDirs(); + // create and open an index writer + IndexWriter iw = new IndexWriter(dirs[0][0], new IndexWriterConfig( + TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))); + // create and open a taxonomy writer + TaxonomyWriter tw = new LuceneTaxonomyWriter(dirs[0][1], + OpenMode.CREATE); + + PerDimensionIndexingParams iParams = new PerDimensionIndexingParams(); + iParams.addCategoryListParams(new CategoryPath("Band"), + new CategoryListParams(new Term("$music", "music"))); + iParams.addCategoryListParams(new CategoryPath("Composer"), + new CategoryListParams(new Term("$music", "music"))); + iParams.addCategoryListParams(new CategoryPath("Author"), + new CategoryListParams(new Term("$literature", "Authors"))); + + seedIndex(iw, tw, iParams); + + iw.commit(); + tw.commit(); + + // prepare index reader and taxonomy. + TaxonomyReader tr = new LuceneTaxonomyReader(dirs[0][1]); + IndexReader ir = IndexReader.open(dirs[0][0]); + + // prepare searcher to search against + IndexSearcher searcher = new IndexSearcher(ir); + + FacetsCollector facetsCollector = performSearch(iParams, tr, ir, + searcher); + + // Obtain facets results and hand-test them + assertCorrectResults(facetsCollector); + assertPostingListExists("$music", "music", ir); + assertPostingListExists("$literature", "Authors", ir); + + tr.close(); + ir.close(); + searcher.close(); + iw.close(); + tw.close(); + } + + private Directory[][] getDirs() throws IOException { + return FacetTestUtils.createIndexTaxonomyDirs(1); + } + + private void assertCorrectResults(FacetsCollector facetsCollector) + throws IOException, IllegalAccessException, InstantiationException { + List res = facetsCollector.getFacetResults(); + + FacetResult results = res.get(0); + FacetResultNode resNode = results.getFacetResultNode(); + Iterable subResults = resNode + .getSubResults(); + Iterator subIter = subResults.iterator(); + + checkResult(resNode, "Band", 5.0); + checkResult(subIter.next(), "Band/Rock & Pop", 4.0); + checkResult(subIter.next(), "Band/Punk", 1.0); + + results = res.get(1); + resNode = results.getFacetResultNode(); + subResults = resNode.getSubResults(); + subIter = subResults.iterator(); + + checkResult(resNode, "Band", 5.0); + checkResult(subIter.next(), "Band/Rock & Pop", 4.0); + checkResult(subIter.next(), "Band/Rock & Pop/Dave Matthews Band", 1.0); + checkResult(subIter.next(), "Band/Rock & Pop/REM", 1.0); + checkResult(subIter.next(), "Band/Rock & Pop/U2", 1.0); + checkResult(subIter.next(), "Band/Punk/The Ramones", 1.0); + checkResult(subIter.next(), "Band/Punk", 1.0); + checkResult(subIter.next(), "Band/Rock & Pop/The Beatles", 1.0); + + results = res.get(2); + resNode = results.getFacetResultNode(); + subResults = resNode.getSubResults(); + subIter = subResults.iterator(); + + checkResult(resNode, "Author", 3.0); + checkResult(subIter.next(), "Author/Kurt Vonnegut", 1.0); + checkResult(subIter.next(), "Author/Stephen King", 1.0); + checkResult(subIter.next(), "Author/Mark Twain", 1.0); + + results = res.get(3); + resNode = results.getFacetResultNode(); + subResults = resNode.getSubResults(); + subIter = subResults.iterator(); + + checkResult(resNode, "Band/Rock & Pop", 4.0); + checkResult(subIter.next(), "Band/Rock & Pop/Dave Matthews Band", 1.0); + checkResult(subIter.next(), "Band/Rock & Pop/REM", 1.0); + checkResult(subIter.next(), "Band/Rock & Pop/U2", 1.0); + checkResult(subIter.next(), "Band/Rock & Pop/The Beatles", 1.0); + } + + private FacetsCollector performSearch(FacetIndexingParams iParams, + TaxonomyReader tr, IndexReader ir, + IndexSearcher searcher) throws IOException { + // step 1: collect matching documents into a collector + Query q = new MatchAllDocsQuery(); + TopScoreDocCollector topDocsCollector = TopScoreDocCollector.create(10, + true); + + // Faceted search parameters indicate which facets are we interested in + FacetSearchParams facetSearchParams = new FacetSearchParams(iParams); + + facetSearchParams.addFacetRequest(new CountFacetRequest( + new CategoryPath("Band"), 10)); + CountFacetRequest bandDepth = new CountFacetRequest(new CategoryPath( + "Band"), 10); + bandDepth.setDepth(2); + facetSearchParams.addFacetRequest(bandDepth); + facetSearchParams.addFacetRequest(new CountFacetRequest( + new CategoryPath("Author"), 10)); + facetSearchParams.addFacetRequest(new CountFacetRequest( + new CategoryPath("Band", "Rock & Pop"), 10)); + + // perform documents search and facets accumulation + FacetsCollector facetsCollector = new FacetsCollector(facetSearchParams, ir, tr); + searcher.search(q, MultiCollector.wrap(topDocsCollector, facetsCollector)); + return facetsCollector; + } + + private void seedIndex(IndexWriter iw, TaxonomyWriter tw, + FacetIndexingParams iParams) throws IOException, CorruptIndexException { + FacetTestUtils.add(iParams, iw, tw, "Author", "Mark Twain"); + FacetTestUtils.add(iParams, iw, tw, "Author", "Stephen King"); + FacetTestUtils.add(iParams, iw, tw, "Author", "Kurt Vonnegut"); + FacetTestUtils.add(iParams, iw, tw, "Band", "Rock & Pop", + "The Beatles"); + FacetTestUtils.add(iParams, iw, tw, "Band", "Punk", "The Ramones"); + FacetTestUtils.add(iParams, iw, tw, "Band", "Rock & Pop", "U2"); + FacetTestUtils.add(iParams, iw, tw, "Band", "Rock & Pop", "REM"); + FacetTestUtils.add(iParams, iw, tw, "Band", "Rock & Pop", + "Dave Matthews Band"); + FacetTestUtils.add(iParams, iw, tw, "Composer", "Bach"); + } + + private static void checkResult(FacetResultNode sub, String label, double value) { + assertEquals("Label of subresult " + sub.getLabel() + " was incorrect", + label, sub.getLabel().toString()); + assertEquals( + "Value for " + sub.getLabel() + " subresult was incorrect", + value, sub.getValue(), 0.0); + } + +} \ No newline at end of file Added: lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/TestScoredDocIdCollector.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/TestScoredDocIdCollector.java?rev=1141060&view=auto ============================================================================== --- lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/TestScoredDocIdCollector.java (added) +++ lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/TestScoredDocIdCollector.java Wed Jun 29 11:53:10 2011 @@ -0,0 +1,177 @@ +package org.apache.lucene.facet.search; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; + +import org.apache.lucene.index.Term; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.junit.Before; +import org.junit.Test; + +import org.apache.lucene.facet.FacetTestBase; +import org.apache.lucene.facet.search.FacetsAccumulator; +import org.apache.lucene.facet.search.ScoredDocIDs; +import org.apache.lucene.facet.search.ScoredDocIDsIterator; +import org.apache.lucene.facet.search.ScoredDocIdCollector; +import org.apache.lucene.facet.search.StandardFacetsAccumulator; +import org.apache.lucene.facet.search.params.CountFacetRequest; +import org.apache.lucene.facet.search.params.FacetSearchParams; +import org.apache.lucene.facet.search.params.ScoreFacetRequest; +import org.apache.lucene.facet.search.results.FacetResult; +import org.apache.lucene.facet.search.results.FacetResultNode; +import org.apache.lucene.facet.taxonomy.CategoryPath; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** Test ScoredDocIdCollector. */ +public class TestScoredDocIdCollector extends FacetTestBase { + + @Override + @Before + public void setUp() throws Exception { + super.setUp(); + initIndex(); + } + + @Override + public void tearDown() throws Exception { + closeAll(); + super.tearDown(); + } + + @Test + public void testConstantScore() throws Exception { + // test that constant score works well + assertTrue("Would like to test this with deletions!",indexReader.hasDeletions()); + assertTrue("Would like to test this with deletions!",indexReader.numDeletedDocs()>0); + + Query q = new TermQuery(new Term(CONTENT_FIELD, "white")); + if (VERBOSE) { + System.out.println("Query: " + q); + } + float constScore = 17.0f; + ScoredDocIdCollector dCollector = ScoredDocIdCollector.create(indexReader + .maxDoc(), false); // scoring is disabled + dCollector.setDefaultScore(constScore); + searcher.search(q, dCollector); + + // verify by doc scores at the level of doc-id-iterator + ScoredDocIDs scoredDocIDs = dCollector.getScoredDocIDs(); + assertEquals("Wrong number of matching documents!", 2, scoredDocIDs.size()); + ScoredDocIDsIterator docItr = scoredDocIDs.iterator(); + while (docItr.next()) { + assertEquals("Wrong score for doc " + docItr.getDocID(), constScore, + docItr.getScore(), Double.MIN_VALUE); + } + + // verify by facet values + List countRes = findFacets(scoredDocIDs, getFacetedSearchParams()); + List scoreRes = findFacets(scoredDocIDs, sumScoreSearchParams()); + + assertEquals("Wrong number of facet count results!", 1, countRes.size()); + assertEquals("Wrong number of facet score results!", 1, scoreRes.size()); + + FacetResultNode parentCountRes = countRes.get(0).getFacetResultNode(); + FacetResultNode parentScoreRes = scoreRes.get(0).getFacetResultNode(); + + assertEquals("Wrong number of top count aggregated categories!", 3, + parentCountRes.getNumSubResults()); + assertEquals("Wrong number of top score aggregated categories!", 3, + parentScoreRes.getNumSubResults()); + + // rely on that facet value is computed as doc-score, and + // accordingly compare values of the two top-category results. + + FacetResultNode[] countResNodes = resultNodesAsArray(parentCountRes); + FacetResultNode[] scoreResNodes = resultNodesAsArray(parentScoreRes); + + for (int i = 0; i < scoreResNodes.length; i++) { + assertEquals("Ordinals differ!", + countResNodes[i].getOrdinal(), scoreResNodes[i].getOrdinal()); + assertEquals("Wrong scores!", + constScore * countResNodes[i].getValue(), + scoreResNodes[i].getValue(), + Double.MIN_VALUE); + } + } + + // compute facets with certain facet requests and docs + private List findFacets(ScoredDocIDs sDocids, + FacetSearchParams facetSearchParams) throws IOException { + FacetsAccumulator fAccumulator = new StandardFacetsAccumulator( + facetSearchParams, indexReader, taxoReader); + List res = fAccumulator.accumulate(sDocids); + + // Results are ready, printing them... + int i = 0; + for (FacetResult facetResult : res) { + if (VERBOSE) { + System.out.println("Res " + (i++) + ": " + facetResult); + } + } + + return res; + } + + @Test + public void testOutOfOrderCollectionScoringEnabled() throws Exception { + assertFalse( + "when scoring enabled, out-of-order collection should not be supported", + ScoredDocIdCollector.create(1, true).acceptsDocsOutOfOrder()); + } + + @Test + public void testOutOfOrderCollectionScoringDisabled() throws Exception { + // This used to fail, because ScoredDocIdCollector.acceptDocsOutOfOrder + // returned true, even when scoring was enabled. + final int[] docs = new int[] { 1, 0, 2 }; // out of order on purpose + + ScoredDocIdCollector sdic = ScoredDocIdCollector.create(docs.length, false); + assertTrue( + "when scoring disabled, out-of-order collection should be supported", + sdic.acceptsDocsOutOfOrder()); + for (int i = 0; i < docs.length; i++) { + sdic.collect(docs[i]); + } + + assertEquals("expected 3 documents but got " + sdic.getScoredDocIDs().size(), 3, sdic.getScoredDocIDs().size()); + ScoredDocIDsIterator iter = sdic.getScoredDocIDs().iterator(); + Arrays.sort(docs); + for (int i = 0; iter.next(); i++) { + assertEquals("expected doc " + docs[i], docs[i], iter.getDocID()); + } + } + + /* use a scoring aggregator */ + private FacetSearchParams sumScoreSearchParams() { + // this will use default faceted indexing params, not altering anything about indexing + FacetSearchParams res = super.getFacetedSearchParams(); + res.addFacetRequest(new ScoreFacetRequest(new CategoryPath("root", "a"), 10)); + return res; + } + + @Override + protected FacetSearchParams getFacetedSearchParams() { + FacetSearchParams res = super.getFacetedSearchParams(); + res.addFacetRequest(new CountFacetRequest(new CategoryPath("root","a"), 10)); + return res; + } + +} \ No newline at end of file Added: lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/TestTopKInEachNodeResultHandler.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/TestTopKInEachNodeResultHandler.java?rev=1141060&view=auto ============================================================================== --- lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/TestTopKInEachNodeResultHandler.java (added) +++ lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/TestTopKInEachNodeResultHandler.java Wed Jun 29 11:53:10 2011 @@ -0,0 +1,339 @@ +package org.apache.lucene.facet.search; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.Field.Index; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.Field.TermVector; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.RAMDirectory; +import org.junit.Test; + +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.facet.index.CategoryDocumentBuilder; +import org.apache.lucene.facet.index.params.DefaultFacetIndexingParams; +import org.apache.lucene.facet.search.FacetsAccumulator; +import org.apache.lucene.facet.search.FloatArrayAllocator; +import org.apache.lucene.facet.search.IntArrayAllocator; +import org.apache.lucene.facet.search.ScoredDocIdCollector; +import org.apache.lucene.facet.search.StandardFacetsAccumulator; +import org.apache.lucene.facet.search.params.CountFacetRequest; +import org.apache.lucene.facet.search.params.FacetSearchParams; +import org.apache.lucene.facet.search.params.FacetRequest.ResultMode; +import org.apache.lucene.facet.search.results.FacetResult; +import org.apache.lucene.facet.search.results.FacetResultNode; +import org.apache.lucene.facet.taxonomy.CategoryPath; +import org.apache.lucene.facet.taxonomy.TaxonomyWriter; +import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyReader; +import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyWriter; +import org.apache.lucene.facet.util.PartitionsUtils; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class TestTopKInEachNodeResultHandler extends LuceneTestCase { + + //TODO (Facet): Move to extend BaseTestTopK and separate to several smaller test cases (methods) - see TestTopKResultsHandler + + @Test + public void testSimple() throws Exception { + + int[] partitionSizes = new int[] { + 2,3,4, 5, 6, 7, 10, 1000, + Integer.MAX_VALUE }; + + for (int partitionSize : partitionSizes) { + Directory iDir = new RAMDirectory(); + Directory tDir = new RAMDirectory(); + + if (VERBOSE) { + System.out.println("Partition Size: " + partitionSize); + } + + final int pSize = partitionSize; + DefaultFacetIndexingParams iParams = new DefaultFacetIndexingParams() { + @Override + protected int fixedPartitionSize() { + return pSize; + } + }; + + IndexWriter iw = new IndexWriter(iDir, + new IndexWriterConfig(TEST_VERSION_CURRENT, + new StandardAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.CREATE)); + TaxonomyWriter tw = new LuceneTaxonomyWriter(tDir); + prvt_add(iParams, iw, tw, "a", "b"); + prvt_add(iParams, iw, tw, "a", "b", "1"); + prvt_add(iParams, iw, tw, "a", "b", "1"); + prvt_add(iParams, iw, tw, "a", "b", "2"); + prvt_add(iParams, iw, tw, "a", "b", "2"); + prvt_add(iParams, iw, tw, "a", "b", "2"); + prvt_add(iParams, iw, tw, "a", "b", "3"); + prvt_add(iParams, iw, tw, "a", "b", "4"); + prvt_add(iParams, iw, tw, "a", "c"); + prvt_add(iParams, iw, tw, "a", "c"); + prvt_add(iParams, iw, tw, "a", "c"); + prvt_add(iParams, iw, tw, "a", "c"); + prvt_add(iParams, iw, tw, "a", "c"); + prvt_add(iParams, iw, tw, "a", "c", "1"); + prvt_add(iParams, iw, tw, "a", "d"); + prvt_add(iParams, iw, tw, "a", "e"); + + iw.commit(); + iw.close(); + tw.commit(); + tw.close(); + + IndexSearcher is = new IndexSearcher(iDir); + LuceneTaxonomyReader tr = new LuceneTaxonomyReader(tDir); + + // Get all of the documents and run the query, then do different + // facet counts and compare to control + Query q = new TermQuery(new Term("content", "alpha")); + ScoredDocIdCollector scoredDoc = ScoredDocIdCollector.create(is.maxDoc(), true); + + // Collector collector = new MultiCollector(scoredDoc); + is.search(q, scoredDoc); + + CountFacetRequest cfra23 = new CountFacetRequest( + new CategoryPath("a"), 2); + cfra23.setDepth(3); + cfra23.setResultMode(ResultMode.PER_NODE_IN_TREE); + + CountFacetRequest cfra22 = new CountFacetRequest( + new CategoryPath("a"), 2); + cfra22.setDepth(2); + cfra22.setResultMode(ResultMode.PER_NODE_IN_TREE); + + CountFacetRequest cfra21 = new CountFacetRequest( + new CategoryPath("a"), 2); + cfra21.setDepth(1); + cfra21.setResultMode(ResultMode.PER_NODE_IN_TREE); + + CountFacetRequest cfrb22 = new CountFacetRequest( + new CategoryPath("a", "b"), 2); + cfrb22.setDepth(2); + cfrb22.setResultMode(ResultMode.PER_NODE_IN_TREE); + + CountFacetRequest cfrb23 = new CountFacetRequest( + new CategoryPath("a", "b"), 2); + cfrb23.setDepth(3); + cfrb23.setResultMode(ResultMode.PER_NODE_IN_TREE); + + CountFacetRequest cfrb21 = new CountFacetRequest( + new CategoryPath("a", "b"), 2); + cfrb21.setDepth(1); + cfrb21.setResultMode(ResultMode.PER_NODE_IN_TREE); + + CountFacetRequest doctor = new CountFacetRequest( + new CategoryPath("Doctor"), 2); + doctor.setDepth(1); + doctor.setResultMode(ResultMode.PER_NODE_IN_TREE); + + CountFacetRequest cfrb20 = new CountFacetRequest( + new CategoryPath("a", "b"), 2); + cfrb20.setDepth(0); + cfrb20.setResultMode(ResultMode.PER_NODE_IN_TREE); + + FacetSearchParams facetSearchParams = new FacetSearchParams(iParams); + facetSearchParams.addFacetRequest(cfra23); + facetSearchParams.addFacetRequest(cfra22); + facetSearchParams.addFacetRequest(cfra21); + facetSearchParams.addFacetRequest(cfrb23); + facetSearchParams.addFacetRequest(cfrb22); + facetSearchParams.addFacetRequest(cfrb21); + facetSearchParams.addFacetRequest(doctor); + facetSearchParams.addFacetRequest(cfrb20); + + IntArrayAllocator iaa = new IntArrayAllocator(PartitionsUtils.partitionSize(facetSearchParams,tr), 1); + FloatArrayAllocator faa = new FloatArrayAllocator(PartitionsUtils.partitionSize(facetSearchParams,tr), 1); + FacetsAccumulator fctExtrctr = new StandardFacetsAccumulator(facetSearchParams, is.getIndexReader(), tr, iaa, faa); + fctExtrctr.setComplementThreshold(FacetsAccumulator.DISABLE_COMPLEMENT); + long start = System.currentTimeMillis(); + + List facetResults = fctExtrctr.accumulate(scoredDoc.getScoredDocIDs()); + + long end = System.currentTimeMillis(); + if (VERBOSE) { + System.out.println("Time: " + (end - start)); + } + + FacetResult fr = facetResults.get(0); // a, depth=3, K=2 + boolean hasDoctor = "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0)); + assertEquals(9, fr.getNumValidDescendants()); + FacetResultNode parentRes = fr.getFacetResultNode(); + assertEquals(16.0, parentRes.getValue(), Double.MIN_VALUE); + assertEquals(2.0, parentRes.getResidue(), Double.MIN_VALUE); + assertEquals(2, parentRes.getNumSubResults()); + // two nodes sorted by descending values: a/b with 8 and a/c with 6 + // a/b has residue 2 and two children a/b/2 with value 3, and a/b/1 with value 2. + // a/c has residue 0, and one child a/c/1 with value 1. + double [] expectedValues0 = { 8.0, 2.0, 3.0, 0.0, 2.0, 0.0, 6.0, 0.0, 1.0, 0.0 }; + int i = 0; + for (FacetResultNode node : parentRes.getSubResults()) { + assertEquals(expectedValues0[i++], node.getValue(), Double.MIN_VALUE); + assertEquals(expectedValues0[i++], node.getResidue(), Double.MIN_VALUE); + for (FacetResultNode node2 : node.getSubResults()) { + assertEquals(expectedValues0[i++], node2.getValue(), Double.MIN_VALUE); + assertEquals(expectedValues0[i++], node2.getResidue(), Double.MIN_VALUE); + } + } + + // now just change the value of the first child of the root to 5, and then rearrange + // expected are: first a/c of value 6 and residue 0, and one child a/c/1 with value 1 + // then a/b with value 5 and residue 2, and both children: a/b/2 with value 3, and a/b/1 with value 2. + for (FacetResultNode node : parentRes.getSubResults()) { + node.setValue(5.0); + break; + } + // now rearrange + double [] expectedValues00 = { 6.0, 0.0, 1.0, 0.0, 5.0, 2.0, 3.0, 0.0, 2.0, 0.0 }; + fr = cfra23.createFacetResultsHandler(tr).rearrangeFacetResult(fr); + i = 0; + for (FacetResultNode node : parentRes.getSubResults()) { + assertEquals(expectedValues00[i++], node.getValue(), Double.MIN_VALUE); + assertEquals(expectedValues00[i++], node.getResidue(), Double.MIN_VALUE); + for (FacetResultNode node2 : node.getSubResults()) { + assertEquals(expectedValues00[i++], node2.getValue(), Double.MIN_VALUE); + assertEquals(expectedValues00[i++], node2.getResidue(), Double.MIN_VALUE); + } + } + + fr = facetResults.get(1); // a, depth=2, K=2. same result as before + hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0)); + assertEquals(9, fr.getNumValidDescendants()); + parentRes = fr.getFacetResultNode(); + assertEquals(16.0, parentRes.getValue(), Double.MIN_VALUE); + assertEquals(2.0, parentRes.getResidue(), Double.MIN_VALUE); + assertEquals(2, parentRes.getNumSubResults()); + // two nodes sorted by descending values: a/b with 8 and a/c with 6 + // a/b has residue 2 and two children a/b/2 with value 3, and a/b/1 with value 2. + // a/c has residue 0, and one child a/c/1 with value 1. + i = 0; + for (FacetResultNode node : parentRes.getSubResults()) { + assertEquals(expectedValues0[i++], node.getValue(), Double.MIN_VALUE); + assertEquals(expectedValues0[i++], node.getResidue(), Double.MIN_VALUE); + for (FacetResultNode node2 : node.getSubResults()) { + assertEquals(expectedValues0[i++], node2.getValue(), Double.MIN_VALUE); + assertEquals(expectedValues0[i++], node2.getResidue(), Double.MIN_VALUE); + } + } + + fr = facetResults.get(2); // a, depth=1, K=2 + hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0)); + assertEquals(4, fr.getNumValidDescendants(), 4); + parentRes = fr.getFacetResultNode(); + assertEquals(16.0, parentRes.getValue(), Double.MIN_VALUE); + assertEquals(2.0, parentRes.getResidue(), Double.MIN_VALUE); + assertEquals(2, parentRes.getNumSubResults()); + // two nodes sorted by descending values: + // a/b with value 8 and residue 0 (because no children considered), + // and a/c with value 6 and residue 0 (because no children considered) + double [] expectedValues2 = { 8.0, 0.0, 6.0, 0.0 }; + i = 0; + for (FacetResultNode node : parentRes.getSubResults()) { + assertEquals(expectedValues2[i++], node.getValue(), Double.MIN_VALUE); + assertEquals(expectedValues2[i++], node.getResidue(), Double.MIN_VALUE); + assertEquals(node.getNumSubResults(), 0); + } + + fr = facetResults.get(3); // a/b, depth=3, K=2 + hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0)); + assertEquals(4, fr.getNumValidDescendants()); + parentRes = fr.getFacetResultNode(); + assertEquals(8.0, parentRes.getValue(), Double.MIN_VALUE); + assertEquals(2.0, parentRes.getResidue(), Double.MIN_VALUE); + assertEquals(2, parentRes.getNumSubResults()); + double [] expectedValues3 = { 3.0, 2.0 }; + i = 0; + for (FacetResultNode node : parentRes.getSubResults()) { + assertEquals(expectedValues3[i++], node.getValue(), Double.MIN_VALUE); + assertEquals(0.0, node.getResidue(), Double.MIN_VALUE); + assertEquals(0, node.getNumSubResults()); + } + + fr = facetResults.get(4); // a/b, depth=2, K=2 + hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0)); + assertEquals(4, fr.getNumValidDescendants()); + parentRes = fr.getFacetResultNode(); + assertEquals(8.0, parentRes.getValue(), Double.MIN_VALUE); + assertEquals(2.0, parentRes.getResidue(), Double.MIN_VALUE); + assertEquals(2, parentRes.getNumSubResults()); + i = 0; + for (FacetResultNode node : parentRes.getSubResults()) { + assertEquals(expectedValues3[i++], node.getValue(), Double.MIN_VALUE); + assertEquals(0.0, node.getResidue(), Double.MIN_VALUE); + assertEquals(0, node.getNumSubResults()); + } + + fr = facetResults.get(5); // a/b, depth=1, K=2 + hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0)); + assertEquals(4, fr.getNumValidDescendants()); + parentRes = fr.getFacetResultNode(); + assertEquals(8.0, parentRes.getValue(), Double.MIN_VALUE); + assertEquals(2.0, parentRes.getResidue(), Double.MIN_VALUE); + assertEquals(2, parentRes.getNumSubResults()); + i = 0; + for (FacetResultNode node : parentRes.getSubResults()) { + assertEquals(expectedValues3[i++], node.getValue(), Double.MIN_VALUE); + assertEquals(0.0, node.getResidue(), Double.MIN_VALUE); + assertEquals(0, node.getNumSubResults()); + } + + fr = facetResults.get(6); // a/b, depth=0, K=2 + hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0)); + assertEquals(0, fr.getNumValidDescendants()); // 0 descendants but rootnode + parentRes = fr.getFacetResultNode(); + assertEquals(8.0, parentRes.getValue(), Double.MIN_VALUE); + assertEquals(0.0, parentRes.getResidue(), Double.MIN_VALUE); + assertEquals(0, parentRes.getNumSubResults()); + hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0)); + + // doctor, depth=1, K=2 + assertFalse("Shouldn't have found anything for a FacetRequest " + + "of a facet that doesn't exist in the index.", hasDoctor); + assertEquals("Shouldn't have found more than seven request.", 7, facetResults.size()); + } + + } + + private void prvt_add(DefaultFacetIndexingParams iParams, IndexWriter iw, + TaxonomyWriter tw, String... strings) throws IOException, + CorruptIndexException { + ArrayList cps = new ArrayList(); + CategoryPath cp = new CategoryPath(strings); + cps.add(cp); + Document d = new Document(); + new CategoryDocumentBuilder(tw, iParams).setCategoryPaths(cps).build(d); + d.add(new Field("content", "alpha", Store.YES, Index.ANALYZED, TermVector.NO)); + iw.addDocument(d); + } + +} Added: lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/TestTopKResultsHandler.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/TestTopKResultsHandler.java?rev=1141060&view=auto ============================================================================== --- lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/TestTopKResultsHandler.java (added) +++ lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/TestTopKResultsHandler.java Wed Jun 29 11:53:10 2011 @@ -0,0 +1,239 @@ +package org.apache.lucene.facet.search; + +import java.util.Arrays; +import java.util.List; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.junit.Test; + +import org.apache.lucene.facet.search.params.CountFacetRequest; +import org.apache.lucene.facet.search.params.FacetSearchParams; +import org.apache.lucene.facet.search.results.FacetResult; +import org.apache.lucene.facet.search.results.FacetResultNode; +import org.apache.lucene.facet.taxonomy.CategoryPath; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class TestTopKResultsHandler extends BaseTestTopK { + + private static final CategoryPath[] CATEGORIES = { + new CategoryPath( "a", "b"), + new CategoryPath( "a", "b", "1"), + new CategoryPath( "a", "b", "1"), + new CategoryPath( "a", "b", "2"), + new CategoryPath( "a", "b", "2"), + new CategoryPath( "a", "b", "3"), + new CategoryPath( "a", "b", "4"), + new CategoryPath( "a", "c"), + new CategoryPath( "a", "c"), + new CategoryPath( "a", "c"), + new CategoryPath( "a", "c"), + new CategoryPath( "a", "c"), + new CategoryPath( "a", "c", "1"), + }; + + @Override + protected String getContent(int doc) { + return ALPHA; + } + + @Override + protected int numDocsToIndex() { + return CATEGORIES.length; + } + + @Override + protected List getCategories(int doc) { + return Arrays.asList(CATEGORIES[doc]); + } + + /** + * Strait forward test: Adding specific documents with specific facets and + * counting them in the most basic form. + */ + @Test + public void testSimple() throws Exception { + for (int partitionSize : partitionSizes) { + initIndex(partitionSize); + + // do different facet counts and compare to control + FacetSearchParams sParams = getFacetedSearchParams(partitionSize); + + sParams.addFacetRequest(new CountFacetRequest(new CategoryPath("a"), 100)); + CountFacetRequest cfra = new CountFacetRequest(new CategoryPath("a"), 100); + cfra.setDepth(3); + sParams.addFacetRequest(cfra); + sParams.addFacetRequest(new CountFacetRequest(new CategoryPath("a", "b"), 100)); + sParams.addFacetRequest(new CountFacetRequest(new CategoryPath("a", "b", "1"), 100)); + sParams.addFacetRequest(new CountFacetRequest(new CategoryPath("a", "c"), 100)); + + FacetsCollector fc = new FacetsCollector(sParams, indexReader, taxoReader) { + @Override + protected FacetsAccumulator initFacetsAccumulator(FacetSearchParams facetSearchParams, IndexReader indexReader, TaxonomyReader taxonomyReader) { + FacetsAccumulator fa = new StandardFacetsAccumulator(facetSearchParams, indexReader, taxonomyReader); + fa.setComplementThreshold(FacetsAccumulator.DISABLE_COMPLEMENT); + return fa; + } + }; + + searcher.search(new MatchAllDocsQuery(), fc); + long start = System.currentTimeMillis(); + List facetResults = fc.getFacetResults(); + long end = System.currentTimeMillis(); + + if (VERBOSE) { + System.out.println("Time: " + (end - start)); + } + + FacetResult fr = facetResults.get(0); + FacetResultNode parentRes = fr.getFacetResultNode(); + assertEquals(13.0, parentRes.getValue(), Double.MIN_VALUE); + FacetResultNode[] frn = resultNodesAsArray(parentRes); + assertEquals(7.0, frn[0].getValue(), Double.MIN_VALUE); + assertEquals(6.0, frn[1].getValue(), Double.MIN_VALUE); + + fr = facetResults.get(1); + parentRes = fr.getFacetResultNode(); + assertEquals(13.0, parentRes.getValue(), Double.MIN_VALUE); + frn = resultNodesAsArray(parentRes); + assertEquals(7.0, frn[0].getValue(), Double.MIN_VALUE); + assertEquals(6.0, frn[1].getValue(), Double.MIN_VALUE); + assertEquals(2.0, frn[2].getValue(), Double.MIN_VALUE); + assertEquals(2.0, frn[3].getValue(), Double.MIN_VALUE); + assertEquals(1.0, frn[4].getValue(), Double.MIN_VALUE); + assertEquals(1.0, frn[5].getValue(), Double.MIN_VALUE); + + fr = facetResults.get(2); + parentRes = fr.getFacetResultNode(); + assertEquals(7.0, parentRes.getValue(), Double.MIN_VALUE); + frn = resultNodesAsArray(parentRes); + assertEquals(2.0, frn[0].getValue(), Double.MIN_VALUE); + assertEquals(2.0, frn[1].getValue(), Double.MIN_VALUE); + assertEquals(1.0, frn[2].getValue(), Double.MIN_VALUE); + assertEquals(1.0, frn[3].getValue(), Double.MIN_VALUE); + + fr = facetResults.get(3); + parentRes = fr.getFacetResultNode(); + assertEquals(2.0, parentRes.getValue(), Double.MIN_VALUE); + frn = resultNodesAsArray(parentRes); + assertEquals(0, frn.length); + + fr = facetResults.get(4); + parentRes = fr.getFacetResultNode(); + assertEquals(6.0, parentRes.getValue(), Double.MIN_VALUE); + frn = resultNodesAsArray(parentRes); + assertEquals(1.0, frn[0].getValue(), Double.MIN_VALUE); + } + } + + /** + * Creating an index, matching the results of an top K = Integer.MAX_VALUE and top-1000 requests + */ + @Test + public void testGetMaxIntFacets() throws Exception { + for (int partitionSize : partitionSizes) { + initIndex(partitionSize); + + // do different facet counts and compare to control + CategoryPath path = new CategoryPath("a", "b"); + FacetSearchParams sParams = getFacetedSearchParams(partitionSize); + sParams.addFacetRequest(new CountFacetRequest(path, Integer.MAX_VALUE)); + + FacetsCollector fc = new FacetsCollector(sParams, indexReader, taxoReader) { + @Override + protected FacetsAccumulator initFacetsAccumulator(FacetSearchParams facetSearchParams, IndexReader indexReader, TaxonomyReader taxonomyReader) { + FacetsAccumulator fa = new StandardFacetsAccumulator(facetSearchParams, indexReader, taxonomyReader); + fa.setComplementThreshold(FacetsAccumulator.DISABLE_COMPLEMENT); + return fa; + } + }; + + searcher.search(new MatchAllDocsQuery(), fc); + long start = System.currentTimeMillis(); + List results = fc.getFacetResults(); + long end = System.currentTimeMillis(); + + if (VERBOSE) { + System.out.println("Time: " + (end - start)); + } + + assertEquals("Should only be one result as there's only one request", 1, results.size()); + FacetResult res = results.get(0); + assertEquals(path + " should only have 4 desendants", 4, res.getNumValidDescendants()); + + // As a control base results, ask for top-1000 results + FacetSearchParams sParams2 = getFacetedSearchParams(partitionSize); + sParams2.addFacetRequest(new CountFacetRequest(path, Integer.MAX_VALUE)); + + FacetsCollector fc2 = new FacetsCollector(sParams2, indexReader, taxoReader) { + @Override + protected FacetsAccumulator initFacetsAccumulator(FacetSearchParams facetSearchParams, IndexReader indexReader, TaxonomyReader taxonomyReader) { + FacetsAccumulator fa = new StandardFacetsAccumulator(facetSearchParams, indexReader, taxonomyReader); + fa.setComplementThreshold(FacetsAccumulator.DISABLE_COMPLEMENT); + return fa; + } + }; + + searcher.search(new MatchAllDocsQuery(), fc2); + List baseResults = fc2.getFacetResults(); + FacetResult baseRes = baseResults.get(0); + + // Removing the first line which holds the REQUEST and this is surly different between the two + String baseResultString = baseRes.toString(); + baseResultString = baseResultString.substring(baseResultString.indexOf('\n')); + + // Removing the first line + String resultString = res.toString(); + resultString = resultString.substring(resultString.indexOf('\n')); + + assertTrue("Results for k=MAX_VALUE do not match the regular results for k=1000!!", + baseResultString.equals(resultString)); + + closeAll(); + } + } + + @Test + public void testSimpleSearchForNonexistentFacet() throws Exception { + for (int partitionSize : partitionSizes) { + initIndex(partitionSize); + + CategoryPath path = new CategoryPath("Miau Hattulla"); + FacetSearchParams sParams = getFacetedSearchParams(partitionSize); + sParams.addFacetRequest(new CountFacetRequest(path, 10)); + + FacetsCollector fc = new FacetsCollector(sParams, indexReader, taxoReader); + + searcher.search(new MatchAllDocsQuery(), fc); + + long start = System.currentTimeMillis(); + List facetResults = fc.getFacetResults(); + long end = System.currentTimeMillis(); + + if (VERBOSE) { + System.out.println("Time: " + (end - start)); + } + + assertEquals("Shouldn't have found anything for a FacetRequest " + + "of a facet that doesn't exist in the index.", 0, facetResults.size()); + + } + } +} Added: lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/TestTopKResultsHandlerRandom.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/TestTopKResultsHandlerRandom.java?rev=1141060&view=auto ============================================================================== --- lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/TestTopKResultsHandlerRandom.java (added) +++ lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/TestTopKResultsHandlerRandom.java Wed Jun 29 11:53:10 2011 @@ -0,0 +1,151 @@ +package org.apache.lucene.facet.search; + +import java.io.IOException; +import java.util.HashMap; +import java.util.List; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; +import org.junit.Test; + +import org.apache.lucene.facet.search.params.FacetSearchParams; +import org.apache.lucene.facet.search.results.FacetResult; +import org.apache.lucene.facet.search.results.FacetResultNode; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class TestTopKResultsHandlerRandom extends BaseTestTopK { + + /** + * Try out faceted search in it's most basic form (no sampling nor complement + * that is). In this test lots (and lots..) of randomly generated data is + * being indexed, and later on an "over-all" faceted search is performed. The + * results are checked against the DF of each facet by itself + */ + @Test + public void testCountsComplementDisabled() throws Exception { + doTestCounts(false); + } + + private void doTestCounts(boolean doComplement) throws Exception, + IOException, IllegalAccessException, InstantiationException { + for (int partitionSize : partitionSizes) { + initIndex(partitionSize); + + List facetResults = countFacets(partitionSize, 100000, doComplement); + assertCountsAndCardinality(facetCountsTruth(), facetResults); + + closeAll(); + } + } + + /** + * Try out faceted search with complements. In this test lots (and lots..) of + * randomly generated data is being indexed, and later on, a "beta" faceted + * search is performed - retrieving ~90% of the documents so complements takes + * place in here. The results are checked against the a regular (a.k.a + * no-complement, no-sampling) faceted search with the same parameters. + */ + @Test + public void testCountsComplementEnforced() throws Exception { + doTestCounts(true); + } + + private List countFacets(int partitionSize, int numResults, final boolean doComplement) + throws IOException, IllegalAccessException, InstantiationException { + Query q = new MatchAllDocsQuery(); + FacetSearchParams facetSearchParams = searchParamsWithRequests(numResults, partitionSize); + FacetsCollector fc = new FacetsCollector(facetSearchParams, indexReader, taxoReader) { + @Override + protected FacetsAccumulator initFacetsAccumulator( + FacetSearchParams facetSearchParams, IndexReader indexReader, + TaxonomyReader taxonomyReader) { + FacetsAccumulator accumulator = new StandardFacetsAccumulator(facetSearchParams, indexReader, taxonomyReader); + double complement = doComplement ? FacetsAccumulator.FORCE_COMPLEMENT : FacetsAccumulator.DISABLE_COMPLEMENT; + accumulator.setComplementThreshold(complement); + return accumulator; + } + }; + searcher.search(q, fc); + List facetResults = fc.getFacetResults(); + return facetResults; + } + + /** + * Test that indeed top results are returned, ordered same as all results + * also when some facets have the same counts. + */ + @Test + public void testTopCountsOrder() throws Exception { + for (int partitionSize : partitionSizes) { + initIndex(partitionSize); + + List allFacetResults = countFacets(partitionSize, 100000, false); + + HashMap all = new HashMap(); + int maxNumNodes = 0; + int k = 0; + for (FacetResult fr : allFacetResults) { + FacetResultNode topResNode = fr.getFacetResultNode(); + maxNumNodes = Math.max(maxNumNodes, topResNode.getNumSubResults()); + int prevCount = Integer.MAX_VALUE; + int pos = 0; + for (FacetResultNode frn: topResNode.getSubResults()) { + assertTrue("wrong counts order: prev="+prevCount+" curr="+frn.getValue(), prevCount>=frn.getValue()); + prevCount = (int) frn.getValue(); + String key = k+"--"+frn.getLabel()+"=="+frn.getValue(); + if (VERBOSE) { + System.out.println(frn.getLabel() + " - " + frn.getValue() + " "+key+" "+pos); + } + all.put(key, pos++); // will use this later to verify order of sub-results + } + k++; + } + + // verify that when asking for less results, they are always of highest counts + // also verify that the order is stable + for (int n=1; n someResults = countFacets(partitionSize, n, false); + k = 0; + for (FacetResult fr : someResults) { + FacetResultNode topResNode = fr.getFacetResultNode(); + assertTrue("too many results: n="+n+" but got "+topResNode.getNumSubResults(), n>=topResNode.getNumSubResults()); + int pos = 0; + for (FacetResultNode frn: topResNode.getSubResults()) { + String key = k+"--"+frn.getLabel()+"=="+frn.getValue(); + if (VERBOSE) { + System.out.println(frn.getLabel() + " - " + frn.getValue() + " "+key+" "+pos); + } + Integer origPos = all.get(key); + assertNotNull("missing in all results: "+frn,origPos); + assertEquals("wrong order of sub-results!",pos++, origPos.intValue()); // verify order of sub-results + } + k++; + } + } + + closeAll(); // done with this partition + } + } + +} Added: lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/TestTotalFacetCounts.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/TestTotalFacetCounts.java?rev=1141060&view=auto ============================================================================== --- lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/TestTotalFacetCounts.java (added) +++ lucene/dev/branches/branch_3x/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/TestTotalFacetCounts.java Wed Jun 29 11:53:10 2011 @@ -0,0 +1,114 @@ +package org.apache.lucene.facet.search; + +import java.io.File; +import java.io.IOException; +import java.util.Arrays; + +import org.apache.lucene.store.Directory; +import org.apache.lucene.util._TestUtil; +import org.junit.Test; + +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.facet.FacetTestUtils; +import org.apache.lucene.facet.FacetTestUtils.IndexTaxonomyReaderPair; +import org.apache.lucene.facet.FacetTestUtils.IndexTaxonomyWriterPair; +import org.apache.lucene.facet.index.params.DefaultFacetIndexingParams; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class TestTotalFacetCounts extends LuceneTestCase { + + private static void initCache(int numEntries) { + TotalFacetCountsCache.getSingleton().clear(); + TotalFacetCountsCache.getSingleton().setCacheSize(numEntries); // Set to keep one in mem + } + + @Test + public void testWriteRead() throws IOException { + doTestWriteRead(14); + doTestWriteRead(100); + doTestWriteRead(7); + doTestWriteRead(3); + doTestWriteRead(1); + } + + private void doTestWriteRead(final int partitionSize) throws IOException { + initCache(1); + + // Create temporary RAMDirectories + Directory[][] dirs = FacetTestUtils.createIndexTaxonomyDirs(1); + // Create our index/taxonomy writers + IndexTaxonomyWriterPair[] writers = FacetTestUtils + .createIndexTaxonomyWriterPair(dirs); + DefaultFacetIndexingParams iParams = new DefaultFacetIndexingParams() { + @Override + protected int fixedPartitionSize() { + return partitionSize; + } + }; + // The counts that the TotalFacetCountsArray should have after adding + // the below facets to the index. + int[] expectedCounts = new int[] { 0, 3, 1, 3, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1 }; + + // Add a facet to the index + TestTotalFacetCountsCache.addFacets(iParams, writers[0].indexWriter, writers[0].taxWriter, "a", "b"); + TestTotalFacetCountsCache.addFacets(iParams, writers[0].indexWriter, writers[0].taxWriter, "c", "d"); + TestTotalFacetCountsCache.addFacets(iParams, writers[0].indexWriter, writers[0].taxWriter, "a", "e"); + TestTotalFacetCountsCache.addFacets(iParams, writers[0].indexWriter, writers[0].taxWriter, "a", "d"); + TestTotalFacetCountsCache.addFacets(iParams, writers[0].indexWriter, writers[0].taxWriter, "c", "g"); + TestTotalFacetCountsCache.addFacets(iParams, writers[0].indexWriter, writers[0].taxWriter, "c", "z"); + TestTotalFacetCountsCache.addFacets(iParams, writers[0].indexWriter, writers[0].taxWriter, "b", "a"); + TestTotalFacetCountsCache.addFacets(iParams, writers[0].indexWriter, writers[0].taxWriter, "1", "2"); + TestTotalFacetCountsCache.addFacets(iParams, writers[0].indexWriter, writers[0].taxWriter, "b", "c"); + + // Commit Changes + writers[0].commit(); + writers[0].close(); + + IndexTaxonomyReaderPair[] readers = + FacetTestUtils.createIndexTaxonomyReaderPair(dirs); + + int[] intArray = new int[iParams.getPartitionSize()]; + + TotalFacetCountsCache tfcc = TotalFacetCountsCache.getSingleton(); + File tmpFile = _TestUtil.createTempFile("test", "tmp", TEMP_DIR); + tfcc.store(tmpFile, readers[0].indexReader, readers[0].taxReader, iParams, null); + tfcc.clear(); // not really required because TFCC overrides on load(), but in the test we need not rely on this. + tfcc.load(tmpFile, readers[0].indexReader, readers[0].taxReader, iParams); + + // now retrieve the one just loaded + TotalFacetCounts totalCounts = + tfcc.getTotalCounts(readers[0].indexReader, readers[0].taxReader, iParams, null); + + int partition = 0; + for (int i=0; i