Return-Path: X-Original-To: apmail-lucenenet-commits-archive@www.apache.org Delivered-To: apmail-lucenenet-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 069D3C8D7 for ; Fri, 14 Nov 2014 11:59:18 +0000 (UTC) Received: (qmail 58271 invoked by uid 500); 14 Nov 2014 11:59:17 -0000 Delivered-To: apmail-lucenenet-commits-archive@lucenenet.apache.org Received: (qmail 58133 invoked by uid 500); 14 Nov 2014 11:59:17 -0000 Mailing-List: contact commits-help@lucenenet.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: lucene-net-dev@lucenenet.apache.org Delivered-To: mailing list commits@lucenenet.apache.org Received: (qmail 57618 invoked by uid 99); 14 Nov 2014 11:59:17 -0000 Received: from tyr.zones.apache.org (HELO tyr.zones.apache.org) (140.211.11.114) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 14 Nov 2014 11:59:17 +0000 Received: by tyr.zones.apache.org (Postfix, from userid 65534) id 4FDF7940664; Fri, 14 Nov 2014 11:59:17 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit From: synhershko@apache.org To: commits@lucenenet.apache.org Date: Fri, 14 Nov 2014 11:59:26 -0000 Message-Id: <324ad28447714700a68309c13151e2c0@git.apache.org> In-Reply-To: <25d3e5ad3026426c84d9af894c5dece8@git.apache.org> References: <25d3e5ad3026426c84d9af894c5dece8@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [11/26] lucenenet git commit: first commit of facet porting, failing tests will be fixed in next commits. http://git-wip-us.apache.org/repos/asf/lucenenet/blob/982eaf60/src/Lucene.Net.Tests/core/Facet/Range/TestRangeFacetCounts.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/core/Facet/Range/TestRangeFacetCounts.cs b/src/Lucene.Net.Tests/core/Facet/Range/TestRangeFacetCounts.cs new file mode 100644 index 0000000..b9e4ef2 --- /dev/null +++ b/src/Lucene.Net.Tests/core/Facet/Range/TestRangeFacetCounts.cs @@ -0,0 +1,1174 @@ +using System; +using System.Diagnostics; +using System.Collections; +using System.Collections.Generic; +using Lucene.Net.Facet.Range; +using Lucene.Net.Randomized.Generators; +using Lucene.Net.Support; +using NUnit.Framework; + +namespace Lucene.Net.Facet.Range +{ + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + + using Document = Lucene.Net.Documents.Document; + using DoubleDocValuesField = Lucene.Net.Documents.DoubleDocValuesField; + using DoubleField = Lucene.Net.Documents.DoubleField; + using Field = Lucene.Net.Documents.Field; + using FloatDocValuesField = Lucene.Net.Documents.FloatDocValuesField; + using FloatField = Lucene.Net.Documents.FloatField; + using LongField = Lucene.Net.Documents.LongField; + using NumericDocValuesField = Lucene.Net.Documents.NumericDocValuesField; + using DrillSidewaysResult = Lucene.Net.Facet.DrillSideways.DrillSidewaysResult; + using TaxonomyReader = Lucene.Net.Facet.Taxonomy.TaxonomyReader; + using DirectoryTaxonomyReader = Lucene.Net.Facet.Taxonomy.Directory.DirectoryTaxonomyReader; + using DirectoryTaxonomyWriter = Lucene.Net.Facet.Taxonomy.Directory.DirectoryTaxonomyWriter; + using AtomicReader = Lucene.Net.Index.AtomicReader; + using AtomicReaderContext = Lucene.Net.Index.AtomicReaderContext; + using IndexReader = Lucene.Net.Index.IndexReader; + using IndexWriterConfig = Lucene.Net.Index.IndexWriterConfig; + using RandomIndexWriter = Lucene.Net.Index.RandomIndexWriter; + using FunctionValues = Lucene.Net.Queries.Function.FunctionValues; + using ValueSource = Lucene.Net.Queries.Function.ValueSource; + using DoubleDocValues = Lucene.Net.Queries.Function.DocValues.DoubleDocValues; + using DoubleFieldSource = Lucene.Net.Queries.Function.ValueSources.DoubleFieldSource; + using FloatFieldSource = Lucene.Net.Queries.Function.ValueSources.FloatFieldSource; + using LongFieldSource = Lucene.Net.Queries.Function.ValueSources.LongFieldSource; + using CachingWrapperFilter = Lucene.Net.Search.CachingWrapperFilter; + using DocIdSet = Lucene.Net.Search.DocIdSet; + using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator; + using Filter = Lucene.Net.Search.Filter; + using IndexSearcher = Lucene.Net.Search.IndexSearcher; + using MatchAllDocsQuery = Lucene.Net.Search.MatchAllDocsQuery; + using Lucene.Net.Search; + using QueryWrapperFilter = Lucene.Net.Search.QueryWrapperFilter; + using Directory = Lucene.Net.Store.Directory; + using FixedBitSet = Lucene.Net.Util.FixedBitSet; + using IOUtils = Lucene.Net.Util.IOUtils; + using TestUtil = Lucene.Net.Util.TestUtil; + + [TestFixture] + public class TestRangeFacetCounts : FacetTestCase + { + + [Test] + public virtual void TestBasicLong() + { + Directory d = NewDirectory(); + RandomIndexWriter w = new RandomIndexWriter(Random(), d); + Document doc = new Document(); + NumericDocValuesField field = new NumericDocValuesField("field", 0L); + doc.Add(field); + for (long l = 0; l < 100; l++) + { + field.LongValue = l; + w.AddDocument(doc); + } + + // Also add Long.MAX_VALUE + field.LongValue = long.MaxValue; + w.AddDocument(doc); + + IndexReader r = w.Reader; + w.Dispose(); + + FacetsCollector fc = new FacetsCollector(); + IndexSearcher s = NewSearcher(r); + s.Search(new MatchAllDocsQuery(), fc); + + Facets facets = new LongRangeFacetCounts("field", fc, new LongRange("less than 10", 0L, true, 10L, false), new LongRange("less than or equal to 10", 0L, true, 10L, true), new LongRange("over 90", 90L, false, 100L, false), new LongRange("90 or above", 90L, true, 100L, false), new LongRange("over 1000", 1000L, false, long.MaxValue, true)); + + FacetResult result = facets.GetTopChildren(10, "field"); + + Assert.AreEqual("dim=field path=[] value=22 childCount=5\n less than 10 (10)\n less than or equal to 10 (11)\n over 90 (9)\n 90 or above (10)\n over 1000 (1)\n", result.ToString()); + + r.Dispose(); + d.Dispose(); + } + + [Test] + public virtual void TestUselessRange() + { + try + { + new LongRange("useless", 7, true, 6, true); + Fail("did not hit expected exception"); + } + catch (System.ArgumentException) + { + // expected + } + try + { + new LongRange("useless", 7, true, 7, false); + Fail("did not hit expected exception"); + } + catch (System.ArgumentException) + { + // expected + } + try + { + new DoubleRange("useless", 7.0, true, 6.0, true); + Fail("did not hit expected exception"); + } + catch (System.ArgumentException) + { + // expected + } + try + { + new DoubleRange("useless", 7.0, true, 7.0, false); + Fail("did not hit expected exception"); + } + catch (System.ArgumentException) + { + // expected + } + } + + [Test] + public virtual void TestLongMinMax() + { + + Directory d = NewDirectory(); + RandomIndexWriter w = new RandomIndexWriter(Random(), d); + Document doc = new Document(); + NumericDocValuesField field = new NumericDocValuesField("field", 0L); + doc.Add(field); + field.LongValue = long.MinValue; + w.AddDocument(doc); + field.LongValue = 0; + w.AddDocument(doc); + field.LongValue = long.MaxValue; + w.AddDocument(doc); + + IndexReader r = w.Reader; + w.Dispose(); + + FacetsCollector fc = new FacetsCollector(); + IndexSearcher s = NewSearcher(r); + s.Search(new MatchAllDocsQuery(), fc); + + Facets facets = new LongRangeFacetCounts("field", fc, new LongRange("min", long.MinValue, true, long.MinValue, true), new LongRange("max", long.MaxValue, true, long.MaxValue, true), new LongRange("all0", long.MinValue, true, long.MaxValue, true), new LongRange("all1", long.MinValue, false, long.MaxValue, true), new LongRange("all2", long.MinValue, true, long.MaxValue, false), new LongRange("all3", long.MinValue, false, long.MaxValue, false)); + + FacetResult result = facets.GetTopChildren(10, "field"); + Assert.AreEqual("dim=field path=[] value=3 childCount=6\n min (1)\n max (1)\n all0 (3)\n all1 (2)\n all2 (2)\n all3 (1)\n", result.ToString()); + + r.Dispose(); + d.Dispose(); + } + + [Test] + public virtual void TestOverlappedEndStart() + { + Directory d = NewDirectory(); + var w = new RandomIndexWriter(Random(), d); + Document doc = new Document(); + NumericDocValuesField field = new NumericDocValuesField("field", 0L); + doc.Add(field); + for (long l = 0; l < 100; l++) + { + field.LongValue = l; + w.AddDocument(doc); + } + field.LongValue = long.MaxValue; + w.AddDocument(doc); + + IndexReader r = w.Reader; + w.Dispose(); + + FacetsCollector fc = new FacetsCollector(); + IndexSearcher s = NewSearcher(r); + s.Search(new MatchAllDocsQuery(), fc); + + Facets facets = new LongRangeFacetCounts("field", fc, new LongRange("0-10", 0L, true, 10L, true), new LongRange("10-20", 10L, true, 20L, true), new LongRange("20-30", 20L, true, 30L, true), new LongRange("30-40", 30L, true, 40L, true)); + + FacetResult result = facets.GetTopChildren(10, "field"); + Assert.AreEqual("dim=field path=[] value=41 childCount=4\n 0-10 (11)\n 10-20 (11)\n 20-30 (11)\n 30-40 (11)\n", result.ToString()); + + r.Dispose(); + d.Dispose(); + } + + /// + /// Tests single request that mixes Range and non-Range + /// faceting, with DrillSideways and taxonomy. + /// + [Test] + public virtual void TestMixedRangeAndNonRangeTaxonomy() + { + Directory d = NewDirectory(); + RandomIndexWriter w = new RandomIndexWriter(Random(), d); + Directory td = NewDirectory(); + DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(td, IndexWriterConfig.OpenMode_e.CREATE); + + FacetsConfig config = new FacetsConfig(); + + for (long l = 0; l < 100; l++) + { + Document doc = new Document(); + // For computing range facet counts: + doc.Add(new NumericDocValuesField("field", l)); + // For drill down by numeric range: + doc.Add(new LongField("field", l, Field.Store.NO)); + + if ((l & 3) == 0) + { + doc.Add(new FacetField("dim", "a")); + } + else + { + doc.Add(new FacetField("dim", "b")); + } + w.AddDocument(config.Build(tw, doc)); + } + + IndexReader r = w.Reader; + + var tr = new DirectoryTaxonomyReader(tw); + + IndexSearcher s = NewSearcher(r); + + if (VERBOSE) + { + Console.WriteLine("TEST: searcher=" + s); + } + + DrillSideways ds = new DrillSidewaysAnonymousInnerClassHelper(this, s, config, tr); + + // First search, no drill downs: + DrillDownQuery ddq = new DrillDownQuery(config); + DrillSidewaysResult dsr = ds.Search(null, ddq, 10); + + Assert.AreEqual(100, dsr.Hits.TotalHits); + Assert.AreEqual("dim=dim path=[] value=100 childCount=2\n b (75)\n a (25)\n", dsr.Facets.GetTopChildren(10, "dim").ToString()); + Assert.AreEqual("dim=field path=[] value=21 childCount=5\n less than 10 (10)\n less than or equal to 10 (11)\n over 90 (9)\n 90 or above (10)\n over 1000 (0)\n", dsr.Facets.GetTopChildren(10, "field").ToString()); + + // Second search, drill down on dim=b: + ddq = new DrillDownQuery(config); + ddq.Add("dim", "b"); + dsr = ds.Search(null, ddq, 10); + + Assert.AreEqual(75, dsr.Hits.TotalHits); + Assert.AreEqual("dim=dim path=[] value=100 childCount=2\n b (75)\n a (25)\n", dsr.Facets.GetTopChildren(10, "dim").ToString()); + Assert.AreEqual("dim=field path=[] value=16 childCount=5\n less than 10 (7)\n less than or equal to 10 (8)\n over 90 (7)\n 90 or above (8)\n over 1000 (0)\n", dsr.Facets.GetTopChildren(10, "field").ToString()); + + // Third search, drill down on "less than or equal to 10": + ddq = new DrillDownQuery(config); + ddq.Add("field", NumericRangeQuery.NewLongRange("field", 0L, 10L, true, true)); + dsr = ds.Search(null, ddq, 10); + + Assert.AreEqual(11, dsr.Hits.TotalHits); + Assert.AreEqual("dim=dim path=[] value=11 childCount=2\n b (8)\n a (3)\n", dsr.Facets.GetTopChildren(10, "dim").ToString()); + Assert.AreEqual("dim=field path=[] value=21 childCount=5\n less than 10 (10)\n less than or equal to 10 (11)\n over 90 (9)\n 90 or above (10)\n over 1000 (0)\n", dsr.Facets.GetTopChildren(10, "field").ToString()); + IOUtils.Close(tw, tr, td, w, r, d); + } + + private class DrillSidewaysAnonymousInnerClassHelper : DrillSideways + { + private readonly TestRangeFacetCounts outerInstance; + + private new FacetsConfig config; + + public DrillSidewaysAnonymousInnerClassHelper(TestRangeFacetCounts outerInstance, IndexSearcher s, FacetsConfig config, TaxonomyReader tr) + : base(s, config, tr) + { + this.outerInstance = outerInstance; + this.config = config; + } + + protected override Facets BuildFacetsResult(FacetsCollector drillDowns, FacetsCollector[] drillSideways, string[] drillSidewaysDims) + { + FacetsCollector dimFC = drillDowns; + FacetsCollector fieldFC = drillDowns; + if (drillSideways != null) + { + for (int i = 0; i < drillSideways.Length; i++) + { + string dim = drillSidewaysDims[i]; + if (dim.Equals("field")) + { + fieldFC = drillSideways[i]; + } + else + { + dimFC = drillSideways[i]; + } + } + } + + IDictionary byDim = new Dictionary(); + byDim["field"] = new LongRangeFacetCounts("field", fieldFC, new LongRange("less than 10", 0L, true, 10L, false), new LongRange("less than or equal to 10", 0L, true, 10L, true), new LongRange("over 90", 90L, false, 100L, false), new LongRange("90 or above", 90L, true, 100L, false), new LongRange("over 1000", 1000L, false, long.MaxValue, false)); + byDim["dim"] = outerInstance.GetTaxonomyFacetCounts(taxoReader, config, dimFC); + return new MultiFacets(byDim, null); + } + + protected override bool ScoreSubDocsAtOnce() + { + return Random().NextBoolean(); + } + } + + [Test] + public virtual void TestBasicDouble() + { + Directory d = NewDirectory(); + RandomIndexWriter w = new RandomIndexWriter(Random(), d); + Document doc = new Document(); + DoubleDocValuesField field = new DoubleDocValuesField("field", 0.0); + doc.Add(field); + for (long l = 0; l < 100; l++) + { + field.DoubleValue = l; + w.AddDocument(doc); + } + + IndexReader r = w.Reader; + + FacetsCollector fc = new FacetsCollector(); + + IndexSearcher s = NewSearcher(r); + s.Search(new MatchAllDocsQuery(), fc); + Facets facets = new DoubleRangeFacetCounts("field", fc, new DoubleRange("less than 10", 0.0, true, 10.0, false), new DoubleRange("less than or equal to 10", 0.0, true, 10.0, true), new DoubleRange("over 90", 90.0, false, 100.0, false), new DoubleRange("90 or above", 90.0, true, 100.0, false), new DoubleRange("over 1000", 1000.0, false, double.PositiveInfinity, false)); + + Assert.AreEqual("dim=field path=[] value=21 childCount=5\n less than 10 (10)\n less than or equal to 10 (11)\n over 90 (9)\n 90 or above (10)\n over 1000 (0)\n", facets.GetTopChildren(10, "field").ToString()); + + IOUtils.Close(w, r, d); + } + + [Test] + public virtual void TestBasicFloat() + { + Directory d = NewDirectory(); + RandomIndexWriter w = new RandomIndexWriter(Random(), d); + Document doc = new Document(); + FloatDocValuesField field = new FloatDocValuesField("field", 0.0f); + doc.Add(field); + for (long l = 0; l < 100; l++) + { + field.FloatValue = l; + w.AddDocument(doc); + } + + IndexReader r = w.Reader; + + FacetsCollector fc = new FacetsCollector(); + + IndexSearcher s = NewSearcher(r); + s.Search(new MatchAllDocsQuery(), fc); + + Facets facets = new DoubleRangeFacetCounts("field", new FloatFieldSource("field"), fc, new DoubleRange("less than 10", 0.0f, true, 10.0f, false), new DoubleRange("less than or equal to 10", 0.0f, true, 10.0f, true), new DoubleRange("over 90", 90.0f, false, 100.0f, false), new DoubleRange("90 or above", 90.0f, true, 100.0f, false), new DoubleRange("over 1000", 1000.0f, false, double.PositiveInfinity, false)); + + Assert.AreEqual("dim=field path=[] value=21 childCount=5\n less than 10 (10)\n less than or equal to 10 (11)\n over 90 (9)\n 90 or above (10)\n over 1000 (0)\n", facets.GetTopChildren(10, "field").ToString()); + + IOUtils.Close(w, r, d); + } + + [Test] + public virtual void TestRandomLongs() + { + Directory dir = NewDirectory(); + var w = new RandomIndexWriter(Random(), dir); + + int numDocs = AtLeast(1000); + if (VERBOSE) + { + Console.WriteLine("TEST: numDocs=" + numDocs); + } + long[] values = new long[numDocs]; + long minValue = long.MaxValue; + long maxValue = long.MinValue; + for (int i = 0; i < numDocs; i++) + { + Document doc = new Document(); + long v = Random().NextLong(); + values[i] = v; + doc.Add(new NumericDocValuesField("field", v)); + doc.Add(new LongField("field", v, Field.Store.NO)); + w.AddDocument(doc); + minValue = Math.Min(minValue, v); + maxValue = Math.Max(maxValue, v); + } + IndexReader r = w.Reader; + + IndexSearcher s = NewSearcher(r); + FacetsConfig config = new FacetsConfig(); + + int numIters = AtLeast(10); + for (int iter = 0; iter < numIters; iter++) + { + if (VERBOSE) + { + Console.WriteLine("TEST: iter=" + iter); + } + int numRange = TestUtil.NextInt(Random(), 1, 100); + LongRange[] ranges = new LongRange[numRange]; + int[] expectedCounts = new int[numRange]; + long minAcceptedValue = long.MaxValue; + long maxAcceptedValue = long.MinValue; + for (int rangeID = 0; rangeID < numRange; rangeID++) + { + long min; + if (rangeID > 0 && Random().Next(10) == 7) + { + // Use an existing boundary: + LongRange prevRange = ranges[Random().Next(rangeID)]; + if (Random().NextBoolean()) + { + min = prevRange.min; + } + else + { + min = prevRange.max; + } + } + else + { + min = Random().NextLong(); + } + long max; + if (rangeID > 0 && Random().Next(10) == 7) + { + // Use an existing boundary: + LongRange prevRange = ranges[Random().Next(rangeID)]; + if (Random().NextBoolean()) + { + max = prevRange.min; + } + else + { + max = prevRange.max; + } + } + else + { + max = Random().NextLong(); + } + + if (min > max) + { + long x = min; + min = max; + max = x; + } + bool minIncl; + bool maxIncl; + if (min == max) + { + minIncl = true; + maxIncl = true; + } + else + { + minIncl = Random().NextBoolean(); + maxIncl = Random().NextBoolean(); + } + ranges[rangeID] = new LongRange("r" + rangeID, min, minIncl, max, maxIncl); + if (VERBOSE) + { + Console.WriteLine(" range " + rangeID + ": " + ranges[rangeID]); + } + + // Do "slow but hopefully correct" computation of + // expected count: + for (int i = 0; i < numDocs; i++) + { + bool accept = true; + if (minIncl) + { + accept &= values[i] >= min; + } + else + { + accept &= values[i] > min; + } + if (maxIncl) + { + accept &= values[i] <= max; + } + else + { + accept &= values[i] < max; + } + if (accept) + { + expectedCounts[rangeID]++; + minAcceptedValue = Math.Min(minAcceptedValue, values[i]); + maxAcceptedValue = Math.Max(maxAcceptedValue, values[i]); + } + } + } + + FacetsCollector sfc = new FacetsCollector(); + s.Search(new MatchAllDocsQuery(), sfc); + Filter fastMatchFilter; + if (Random().NextBoolean()) + { + if (Random().NextBoolean()) + { + fastMatchFilter = NumericRangeFilter.NewLongRange("field", minValue, maxValue, true, true); + } + else + { + fastMatchFilter = NumericRangeFilter.NewLongRange("field", minAcceptedValue, maxAcceptedValue, true, true); + } + } + else + { + fastMatchFilter = null; + } + ValueSource vs = new LongFieldSource("field"); + Facets facets = new LongRangeFacetCounts("field", vs, sfc, fastMatchFilter, ranges); + FacetResult result = facets.GetTopChildren(10, "field"); + Assert.AreEqual(numRange, result.labelValues.Length); + for (int rangeID = 0; rangeID < numRange; rangeID++) + { + if (VERBOSE) + { + Console.WriteLine(" range " + rangeID + " expectedCount=" + expectedCounts[rangeID]); + } + LabelAndValue subNode = result.labelValues[rangeID]; + Assert.AreEqual("r" + rangeID, subNode.label); + Assert.AreEqual(expectedCounts[rangeID], (int)subNode.value); + + LongRange range = ranges[rangeID]; + + // Test drill-down: + DrillDownQuery ddq = new DrillDownQuery(config); + if (Random().NextBoolean()) + { + if (Random().NextBoolean()) + { + ddq.Add("field", NumericRangeFilter.NewLongRange("field", range.min, range.max, range.minInclusive, range.maxInclusive)); + } + else + { + ddq.Add("field", NumericRangeQuery.NewLongRange("field", range.min, range.max, range.minInclusive, range.maxInclusive)); + } + } + else + { + ddq.Add("field", range.GetFilter(fastMatchFilter, vs)); + } + Assert.AreEqual(expectedCounts[rangeID], s.Search(ddq, 10).TotalHits); + } + } + + IOUtils.Close(w, r, dir); + } + + [Test] + public virtual void TestRandomFloats() + { + Directory dir = NewDirectory(); + RandomIndexWriter w = new RandomIndexWriter(Random(), dir); + + int numDocs = AtLeast(1000); + float[] values = new float[numDocs]; + float minValue = float.PositiveInfinity; + float maxValue = float.NegativeInfinity; + for (int i = 0; i < numDocs; i++) + { + Document doc = new Document(); + float v = Random().NextFloat(); + values[i] = v; + doc.Add(new FloatDocValuesField("field", v)); + doc.Add(new FloatField("field", v, Field.Store.NO)); + w.AddDocument(doc); + minValue = Math.Min(minValue, v); + maxValue = Math.Max(maxValue, v); + } + IndexReader r = w.Reader; + + IndexSearcher s = NewSearcher(r); + FacetsConfig config = new FacetsConfig(); + + int numIters = AtLeast(10); + for (int iter = 0; iter < numIters; iter++) + { + if (VERBOSE) + { + Console.WriteLine("TEST: iter=" + iter); + } + int numRange = TestUtil.NextInt(Random(), 1, 5); + DoubleRange[] ranges = new DoubleRange[numRange]; + int[] expectedCounts = new int[numRange]; + float minAcceptedValue = float.PositiveInfinity; + float maxAcceptedValue = float.NegativeInfinity; + if (VERBOSE) + { + Console.WriteLine("TEST: " + numRange + " ranges"); + } + for (int rangeID = 0; rangeID < numRange; rangeID++) + { + double min; + if (rangeID > 0 && Random().Next(10) == 7) + { + // Use an existing boundary: + DoubleRange prevRange = ranges[Random().Next(rangeID)]; + if (Random().NextBoolean()) + { + min = prevRange.min; + } + else + { + min = prevRange.max; + } + } + else + { + min = Random().NextDouble(); + } + double max; + if (rangeID > 0 && Random().Next(10) == 7) + { + // Use an existing boundary: + DoubleRange prevRange = ranges[Random().Next(rangeID)]; + if (Random().NextBoolean()) + { + max = prevRange.min; + } + else + { + max = prevRange.max; + } + } + else + { + max = Random().NextDouble(); + } + + if (min > max) + { + double x = min; + min = max; + max = x; + } + + // Must truncate to float precision so that the + // drill-down counts (which use NRQ.newFloatRange) + // are correct: + min = (float)min; + max = (float)max; + + bool minIncl; + bool maxIncl; + if (min == max) + { + minIncl = true; + maxIncl = true; + } + else + { + minIncl = Random().NextBoolean(); + maxIncl = Random().NextBoolean(); + } + ranges[rangeID] = new DoubleRange("r" + rangeID, min, minIncl, max, maxIncl); + + if (VERBOSE) + { + Console.WriteLine("TEST: range " + rangeID + ": " + ranges[rangeID]); + } + + // Do "slow but hopefully correct" computation of + // expected count: + for (int i = 0; i < numDocs; i++) + { + bool accept = true; + if (minIncl) + { + accept &= values[i] >= min; + } + else + { + accept &= values[i] > min; + } + if (maxIncl) + { + accept &= values[i] <= max; + } + else + { + accept &= values[i] < max; + } + if (VERBOSE) + { + Console.WriteLine("TEST: check doc=" + i + " val=" + values[i] + " accept=" + accept); + } + if (accept) + { + expectedCounts[rangeID]++; + minAcceptedValue = Math.Min(minAcceptedValue, values[i]); + maxAcceptedValue = Math.Max(maxAcceptedValue, values[i]); + } + } + } + + FacetsCollector sfc = new FacetsCollector(); + s.Search(new MatchAllDocsQuery(), sfc); + Filter fastMatchFilter; + if (Random().NextBoolean()) + { + if (Random().NextBoolean()) + { + fastMatchFilter = NumericRangeFilter.NewFloatRange("field", minValue, maxValue, true, true); + } + else + { + fastMatchFilter = NumericRangeFilter.NewFloatRange("field", minAcceptedValue, maxAcceptedValue, true, true); + } + } + else + { + fastMatchFilter = null; + } + ValueSource vs = new FloatFieldSource("field"); + Facets facets = new DoubleRangeFacetCounts("field", vs, sfc, fastMatchFilter, ranges); + FacetResult result = facets.GetTopChildren(10, "field"); + Assert.AreEqual(numRange, result.labelValues.Length); + for (int rangeID = 0; rangeID < numRange; rangeID++) + { + if (VERBOSE) + { + Console.WriteLine("TEST: verify range " + rangeID + " expectedCount=" + expectedCounts[rangeID]); + } + LabelAndValue subNode = result.labelValues[rangeID]; + Assert.AreEqual("r" + rangeID, subNode.label); + Assert.AreEqual(expectedCounts[rangeID], (int)subNode.value); + + DoubleRange range = ranges[rangeID]; + + // Test drill-down: + DrillDownQuery ddq = new DrillDownQuery(config); + if (Random().NextBoolean()) + { + if (Random().NextBoolean()) + { + ddq.Add("field", NumericRangeFilter.NewFloatRange("field", (float)range.min, (float)range.max, range.minInclusive, range.maxInclusive)); + } + else + { + ddq.Add("field", NumericRangeQuery.NewFloatRange("field", (float)range.min, (float)range.max, range.minInclusive, range.maxInclusive)); + } + } + else + { + ddq.Add("field", range.GetFilter(fastMatchFilter, vs)); + } + Assert.AreEqual(expectedCounts[rangeID], s.Search(ddq, 10).TotalHits); + } + } + + IOUtils.Close(w, r, dir); + } + + [Test] + public virtual void TestRandomDoubles() + { + Directory dir = NewDirectory(); + RandomIndexWriter w = new RandomIndexWriter(Random(), dir); + + int numDocs = AtLeast(1000); + double[] values = new double[numDocs]; + double minValue = double.PositiveInfinity; + double maxValue = double.NegativeInfinity; + for (int i = 0; i < numDocs; i++) + { + Document doc = new Document(); + double v = Random().NextDouble(); + values[i] = v; + doc.Add(new DoubleDocValuesField("field", v)); + doc.Add(new DoubleField("field", v, Field.Store.NO)); + w.AddDocument(doc); + minValue = Math.Min(minValue, v); + maxValue = Math.Max(maxValue, v); + } + IndexReader r = w.Reader; + + IndexSearcher s = NewSearcher(r); + FacetsConfig config = new FacetsConfig(); + + int numIters = AtLeast(10); + for (int iter = 0; iter < numIters; iter++) + { + if (VERBOSE) + { + Console.WriteLine("TEST: iter=" + iter); + } + int numRange = TestUtil.NextInt(Random(), 1, 5); + DoubleRange[] ranges = new DoubleRange[numRange]; + int[] expectedCounts = new int[numRange]; + double minAcceptedValue = double.PositiveInfinity; + double maxAcceptedValue = double.NegativeInfinity; + for (int rangeID = 0; rangeID < numRange; rangeID++) + { + double min; + if (rangeID > 0 && Random().Next(10) == 7) + { + // Use an existing boundary: + DoubleRange prevRange = ranges[Random().Next(rangeID)]; + if (Random().NextBoolean()) + { + min = prevRange.min; + } + else + { + min = prevRange.max; + } + } + else + { + min = Random().NextDouble(); + } + double max; + if (rangeID > 0 && Random().Next(10) == 7) + { + // Use an existing boundary: + DoubleRange prevRange = ranges[Random().Next(rangeID)]; + if (Random().NextBoolean()) + { + max = prevRange.min; + } + else + { + max = prevRange.max; + } + } + else + { + max = Random().NextDouble(); + } + + if (min > max) + { + double x = min; + min = max; + max = x; + } + + bool minIncl; + bool maxIncl; + if (min == max) + { + minIncl = true; + maxIncl = true; + } + else + { + minIncl = Random().NextBoolean(); + maxIncl = Random().NextBoolean(); + } + ranges[rangeID] = new DoubleRange("r" + rangeID, min, minIncl, max, maxIncl); + + // Do "slow but hopefully correct" computation of + // expected count: + for (int i = 0; i < numDocs; i++) + { + bool accept = true; + if (minIncl) + { + accept &= values[i] >= min; + } + else + { + accept &= values[i] > min; + } + if (maxIncl) + { + accept &= values[i] <= max; + } + else + { + accept &= values[i] < max; + } + if (accept) + { + expectedCounts[rangeID]++; + minAcceptedValue = Math.Min(minAcceptedValue, values[i]); + maxAcceptedValue = Math.Max(maxAcceptedValue, values[i]); + } + } + } + + FacetsCollector sfc = new FacetsCollector(); + s.Search(new MatchAllDocsQuery(), sfc); + Filter fastMatchFilter; + if (Random().NextBoolean()) + { + if (Random().NextBoolean()) + { + fastMatchFilter = NumericRangeFilter.NewDoubleRange("field", minValue, maxValue, true, true); + } + else + { + fastMatchFilter = NumericRangeFilter.NewDoubleRange("field", minAcceptedValue, maxAcceptedValue, true, true); + } + } + else + { + fastMatchFilter = null; + } + ValueSource vs = new DoubleFieldSource("field"); + Facets facets = new DoubleRangeFacetCounts("field", vs, sfc, fastMatchFilter, ranges); + FacetResult result = facets.GetTopChildren(10, "field"); + Assert.AreEqual(numRange, result.labelValues.Length); + for (int rangeID = 0; rangeID < numRange; rangeID++) + { + if (VERBOSE) + { + Console.WriteLine(" range " + rangeID + " expectedCount=" + expectedCounts[rangeID]); + } + LabelAndValue subNode = result.labelValues[rangeID]; + Assert.AreEqual("r" + rangeID, subNode.label); + Assert.AreEqual(expectedCounts[rangeID], (int)subNode.value); + + DoubleRange range = ranges[rangeID]; + + // Test drill-down: + DrillDownQuery ddq = new DrillDownQuery(config); + if (Random().NextBoolean()) + { + if (Random().NextBoolean()) + { + ddq.Add("field", NumericRangeFilter.NewDoubleRange("field", range.min, range.max, range.minInclusive, range.maxInclusive)); + } + else + { + ddq.Add("field", NumericRangeQuery.NewDoubleRange("field", range.min, range.max, range.minInclusive, range.maxInclusive)); + } + } + else + { + ddq.Add("field", range.GetFilter(fastMatchFilter, vs)); + } + + Assert.AreEqual(expectedCounts[rangeID], s.Search(ddq, 10).TotalHits); + } + } + + IOUtils.Close(w, r, dir); + } + + // LUCENE-5178 + [Test] + public virtual void TestMissingValues() + { + AssumeTrue("codec does not support docsWithField", DefaultCodecSupportsDocsWithField()); + Directory d = NewDirectory(); + RandomIndexWriter w = new RandomIndexWriter(Random(), d); + Document doc = new Document(); + NumericDocValuesField field = new NumericDocValuesField("field", 0L); + doc.Add(field); + for (long l = 0; l < 100; l++) + { + if (l % 5 == 0) + { + // Every 5th doc is missing the value: + w.AddDocument(new Document()); + continue; + } + field.LongValue = l; + w.AddDocument(doc); + } + + IndexReader r = w.Reader; + + FacetsCollector fc = new FacetsCollector(); + + IndexSearcher s = NewSearcher(r); + s.Search(new MatchAllDocsQuery(), fc); + Facets facets = new LongRangeFacetCounts("field", fc, new LongRange("less than 10", 0L, true, 10L, false), new LongRange("less than or equal to 10", 0L, true, 10L, true), new LongRange("over 90", 90L, false, 100L, false), new LongRange("90 or above", 90L, true, 100L, false), new LongRange("over 1000", 1000L, false, long.MaxValue, false)); + + Assert.AreEqual("dim=field path=[] value=16 childCount=5\n less than 10 (8)\n less than or equal to 10 (8)\n over 90 (8)\n 90 or above (8)\n over 1000 (0)\n", facets.GetTopChildren(10, "field").ToString()); + + IOUtils.Close(w, r, d); + } + + [Test] + public virtual void TestCustomDoublesValueSource() + { + Directory dir = NewDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); + + Document doc = new Document(); + writer.AddDocument(doc); + writer.AddDocument(doc); + writer.AddDocument(doc); + + // Test wants 3 docs in one segment: + writer.ForceMerge(1); + + var vs = new ValueSourceAnonymousInnerClassHelper(this, doc); + + FacetsConfig config = new FacetsConfig(); + + FacetsCollector fc = new FacetsCollector(); + + IndexReader r = writer.Reader; + IndexSearcher s = NewSearcher(r); + s.Search(new MatchAllDocsQuery(), fc); + + DoubleRange[] ranges = new DoubleRange[] { new DoubleRange("< 1", 0.0, true, 1.0, false), new DoubleRange("< 2", 0.0, true, 2.0, false), new DoubleRange("< 5", 0.0, true, 5.0, false), new DoubleRange("< 10", 0.0, true, 10.0, false), new DoubleRange("< 20", 0.0, true, 20.0, false), new DoubleRange("< 50", 0.0, true, 50.0, false) }; + + Filter fastMatchFilter; + AtomicBoolean filterWasUsed = new AtomicBoolean(); + if (Random().NextBoolean()) + { + // Sort of silly: + fastMatchFilter = new CachingWrapperFilterAnonymousInnerClassHelper(this, new QueryWrapperFilter(new MatchAllDocsQuery()), filterWasUsed); + } + else + { + fastMatchFilter = null; + } + + if (VERBOSE) + { + Console.WriteLine("TEST: fastMatchFilter=" + fastMatchFilter); + } + + Facets facets = new DoubleRangeFacetCounts("field", vs, fc, fastMatchFilter, ranges); + + Assert.AreEqual("dim=field path=[] value=3 childCount=6\n < 1 (0)\n < 2 (1)\n < 5 (3)\n < 10 (3)\n < 20 (3)\n < 50 (3)\n", facets.GetTopChildren(10, "field").ToString()); + Assert.True(fastMatchFilter == null || filterWasUsed.Get()); + + DrillDownQuery ddq = new DrillDownQuery(config); + ddq.Add("field", ranges[1].GetFilter(fastMatchFilter, vs)); + + // Test simple drill-down: + Assert.AreEqual(1, s.Search(ddq, 10).TotalHits); + + // Test drill-sideways after drill-down + DrillSideways ds = new DrillSidewaysAnonymousInnerClassHelper2(this, s, config, (TaxonomyReader)null, vs, ranges, fastMatchFilter); + + + DrillSidewaysResult dsr = ds.Search(ddq, 10); + Assert.AreEqual(1, dsr.Hits.TotalHits); + Assert.AreEqual("dim=field path=[] value=3 childCount=6\n < 1 (0)\n < 2 (1)\n < 5 (3)\n < 10 (3)\n < 20 (3)\n < 50 (3)\n", dsr.Facets.GetTopChildren(10, "field").ToString()); + + IOUtils.Close(r, writer, dir); + } + + private class ValueSourceAnonymousInnerClassHelper : ValueSource + { + private readonly TestRangeFacetCounts outerInstance; + + private Document doc; + + public ValueSourceAnonymousInnerClassHelper(TestRangeFacetCounts outerInstance, Document doc) + { + this.outerInstance = outerInstance; + this.doc = doc; + } + + public override FunctionValues GetValues(IDictionary ignored, AtomicReaderContext ignored2) + { + return new DoubleDocValuesAnonymousInnerClassHelper(this); + } + + private class DoubleDocValuesAnonymousInnerClassHelper : DoubleDocValues + { + private readonly ValueSourceAnonymousInnerClassHelper outerInstance; + + public DoubleDocValuesAnonymousInnerClassHelper(ValueSourceAnonymousInnerClassHelper outerInstance) + : base(null) + { + this.outerInstance = outerInstance; + } + + public override double DoubleVal(int doc) + { + return doc + 1; + } + } + + public override bool Equals(object o) + { + throw new System.NotSupportedException(); + } + + public override int GetHashCode() + { + throw new System.NotSupportedException(); + } + + public override string Description + { + get { throw new NotSupportedException(); } + } + + } + + private class CachingWrapperFilterAnonymousInnerClassHelper : CachingWrapperFilter + { + private readonly TestRangeFacetCounts outerInstance; + + private AtomicBoolean filterWasUsed; + + public CachingWrapperFilterAnonymousInnerClassHelper(TestRangeFacetCounts outerInstance, QueryWrapperFilter org, AtomicBoolean filterWasUsed) + : base(org) + { + this.outerInstance = outerInstance; + this.filterWasUsed = filterWasUsed; + } + + protected override DocIdSet CacheImpl(DocIdSetIterator iterator, AtomicReader reader) + { + var cached = new FixedBitSet(reader.MaxDoc); + filterWasUsed.Set(true); + cached.Or(iterator); + return cached; + } + } + + private class DrillSidewaysAnonymousInnerClassHelper2 : DrillSideways + { + private readonly TestRangeFacetCounts outerInstance; + + private ValueSource vs; + private Lucene.Net.Facet.Range.DoubleRange[] ranges; + private Filter fastMatchFilter; + + + public DrillSidewaysAnonymousInnerClassHelper2(TestRangeFacetCounts testRangeFacetCounts, IndexSearcher indexSearcher, FacetsConfig facetsConfig, TaxonomyReader org, ValueSource valueSource, DoubleRange[] doubleRanges, Filter filter) + : base(indexSearcher, facetsConfig, org) + { + this.outerInstance = outerInstance; + this.vs = vs; + this.ranges = ranges; + this.fastMatchFilter = fastMatchFilter; + } + + + protected override Facets BuildFacetsResult(FacetsCollector drillDowns, FacetsCollector[] drillSideways, string[] drillSidewaysDims) + { + Debug.Assert(drillSideways.Length == 1); + return new DoubleRangeFacetCounts("field", vs, drillSideways[0], fastMatchFilter, ranges); + } + + protected override bool ScoreSubDocsAtOnce() + { + return Random().NextBoolean(); + } + } + } + +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/982eaf60/src/Lucene.Net.Tests/core/Facet/SlowRAMDirectory.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/core/Facet/SlowRAMDirectory.cs b/src/Lucene.Net.Tests/core/Facet/SlowRAMDirectory.cs new file mode 100644 index 0000000..72de557 --- /dev/null +++ b/src/Lucene.Net.Tests/core/Facet/SlowRAMDirectory.cs @@ -0,0 +1,261 @@ +using System; +using System.Threading; +using Lucene.Net.Randomized.Generators; + +namespace Lucene.Net.Facet +{ + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + + using IOContext = Lucene.Net.Store.IOContext; + using IndexInput = Lucene.Net.Store.IndexInput; + using IndexOutput = Lucene.Net.Store.IndexOutput; + using RAMDirectory = Lucene.Net.Store.RAMDirectory; + using ThreadInterruptedException = Lucene.Net.Util.ThreadInterruptedException; + + /// + /// Test utility - slow directory + /// + // TODO: move to test-framework and sometimes use in tests? + public class SlowRAMDirectory : RAMDirectory + { + + private const int IO_SLEEP_THRESHOLD = 50; + + internal Random random; + private int sleepMillis; + + public virtual int SleepMillis + { + set + { + this.sleepMillis = value; + } + } + + public SlowRAMDirectory(int sleepMillis, Random random) + { + this.sleepMillis = sleepMillis; + this.random = random; + } + + public override IndexOutput CreateOutput(string name, IOContext context) + { + if (sleepMillis != -1) + { + return new SlowIndexOutput(this, base.CreateOutput(name, context)); + } + + return base.CreateOutput(name, context); + } + + public override IndexInput OpenInput(string name, IOContext context) + { + if (sleepMillis != -1) + { + return new SlowIndexInput(this, base.OpenInput(name, context)); + } + return base.OpenInput(name, context); + } + + internal virtual void doSleep(Random random, int length) + { + int sTime = length < 10 ? sleepMillis : (int)(sleepMillis * Math.Log(length)); + if (random != null) + { + sTime = random.Next(sTime); + } + try + { + Thread.Sleep(sTime); + } + catch (ThreadInterruptedException e) + { + throw new ThreadInterruptedException(e); + } + } + + /// + /// Make a private random. + internal virtual Random forkRandom() + { + if (random == null) + { + return null; + } + return new Random((int)random.NextLong()); + } + + /// + /// Delegate class to wrap an IndexInput and delay reading bytes by some + /// specified time. + /// + private class SlowIndexInput : IndexInput + { + private readonly SlowRAMDirectory outerInstance; + + internal IndexInput ii; + internal int numRead = 0; + internal Random rand; + + public SlowIndexInput(SlowRAMDirectory outerInstance, IndexInput ii) + : base("SlowIndexInput(" + ii + ")") + { + this.outerInstance = outerInstance; + this.rand = outerInstance.forkRandom(); + this.ii = ii; + } + + public override byte ReadByte() + { + if (numRead >= IO_SLEEP_THRESHOLD) + { + outerInstance.doSleep(rand, 0); + numRead = 0; + } + ++numRead; + return ii.ReadByte(); + } + + public override void ReadBytes(byte[] b, int offset, int len) + { + if (numRead >= IO_SLEEP_THRESHOLD) + { + outerInstance.doSleep(rand, len); + numRead = 0; + } + numRead += len; + ii.ReadBytes(b, offset, len); + } + + + // TODO: is it intentional that clone doesnt wrap? + public override object Clone() + { + return ii.Clone(); + } + + + public override void Dispose() + { + ii.Dispose(); + } + public override bool Equals(object o) + { + return ii.Equals(o); + } + public override long FilePointer + { + get + { + return ii.FilePointer; + } + } + + public override void Seek(long pos) + { + ii.Seek(pos); + } + + + public override int GetHashCode() + { + return ii.GetHashCode(); + } + public override long Length() + { + return ii.Length(); + } + + } + + /// + /// Delegate class to wrap an IndexOutput and delay writing bytes by some + /// specified time. + /// + private class SlowIndexOutput : IndexOutput + { + private readonly SlowRAMDirectory outerInstance; + + + internal IndexOutput io; + internal int numWrote; + internal readonly Random rand; + + public SlowIndexOutput(SlowRAMDirectory outerInstance, IndexOutput io) + { + this.outerInstance = outerInstance; + this.io = io; + this.rand = outerInstance.forkRandom(); + } + + public override void WriteByte(byte b) + { + if (numWrote >= IO_SLEEP_THRESHOLD) + { + outerInstance.doSleep(rand, 0); + numWrote = 0; + } + ++numWrote; + io.WriteByte(b); + } + + public override void WriteBytes(byte[] b, int offset, int length) + { + if (numWrote >= IO_SLEEP_THRESHOLD) + { + outerInstance.doSleep(rand, length); + numWrote = 0; + } + numWrote += length; + io.WriteBytes(b, offset, length); + } + + public override void Dispose() + { + io.Dispose(); + } + public override void Flush() + { + io.Flush(); + } + public override long FilePointer + { + get + { + return io.FilePointer; + } + } + public override void Seek(long pos) + { + io.Seek(pos); + } + + public override long Checksum + { + get + { + return io.Checksum; + } + } + } + + } + +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/982eaf60/src/Lucene.Net.Tests/core/Facet/SortedSet/TestSortedSetDocValuesFacets.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/core/Facet/SortedSet/TestSortedSetDocValuesFacets.cs b/src/Lucene.Net.Tests/core/Facet/SortedSet/TestSortedSetDocValuesFacets.cs new file mode 100644 index 0000000..091a2c8 --- /dev/null +++ b/src/Lucene.Net.Tests/core/Facet/SortedSet/TestSortedSetDocValuesFacets.cs @@ -0,0 +1,394 @@ +using System; +using System.Collections; +using System.Collections.Generic; +using System.Linq; +using Lucene.Net.Randomized; +using Lucene.Net.Randomized.Generators; +using NUnit.Framework; + +namespace Lucene.Net.Facet.SortedSet +{ + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + + using Document = Lucene.Net.Documents.Document; + using Field = Lucene.Net.Documents.Field; + using IndexReader = Lucene.Net.Index.IndexReader; + using RandomIndexWriter = Lucene.Net.Index.RandomIndexWriter; + using SlowCompositeReaderWrapper = Lucene.Net.Index.SlowCompositeReaderWrapper; + using Term = Lucene.Net.Index.Term; + using IndexSearcher = Lucene.Net.Search.IndexSearcher; + using MatchAllDocsQuery = Lucene.Net.Search.MatchAllDocsQuery; + using TermQuery = Lucene.Net.Search.TermQuery; + using TopDocs = Lucene.Net.Search.TopDocs; + using Directory = Lucene.Net.Store.Directory; + using IOUtils = Lucene.Net.Util.IOUtils; + using TestUtil = Lucene.Net.Util.TestUtil; + + [TestFixture] + public class TestSortedSetDocValuesFacets : FacetTestCase + { + + // NOTE: TestDrillSideways.testRandom also sometimes + // randomly uses SortedSetDV + [Test] + public virtual void TestBasic() + { + + AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet()); + Directory dir = NewDirectory(); + + FacetsConfig config = new FacetsConfig(); + config.SetMultiValued("a", true); + RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); + + Document doc = new Document(); + doc.Add(new SortedSetDocValuesFacetField("a", "foo")); + doc.Add(new SortedSetDocValuesFacetField("a", "bar")); + doc.Add(new SortedSetDocValuesFacetField("a", "zoo")); + doc.Add(new SortedSetDocValuesFacetField("b", "baz")); + writer.AddDocument(config.Build(doc)); + if (Random().NextBoolean()) + { + writer.Commit(); + } + + doc = new Document(); + doc.Add(new SortedSetDocValuesFacetField("a", "foo")); + writer.AddDocument(config.Build(doc)); + + // NRT open + IndexSearcher searcher = NewSearcher(writer.Reader); + + // Per-top-reader state: + SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.IndexReader); + + FacetsCollector c = new FacetsCollector(); + + searcher.Search(new MatchAllDocsQuery(), c); + + SortedSetDocValuesFacetCounts facets = new SortedSetDocValuesFacetCounts(state, c); + + Assert.AreEqual("dim=a path=[] value=4 childCount=3\n foo (2)\n bar (1)\n zoo (1)\n", facets.GetTopChildren(10, "a").ToString()); + Assert.AreEqual("dim=b path=[] value=1 childCount=1\n baz (1)\n", facets.GetTopChildren(10, "b").ToString()); + + // DrillDown: + DrillDownQuery q = new DrillDownQuery(config); + q.Add("a", "foo"); + q.Add("b", "baz"); + TopDocs hits = searcher.Search(q, 1); + Assert.AreEqual(1, hits.TotalHits); + + IOUtils.Close(writer, searcher.IndexReader, dir); + } + + // LUCENE-5090 + [Test] + public virtual void TestStaleState() + { + AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet()); + Directory dir = NewDirectory(); + + RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); + + FacetsConfig config = new FacetsConfig(); + + Document doc = new Document(); + doc.Add(new SortedSetDocValuesFacetField("a", "foo")); + writer.AddDocument(config.Build(doc)); + + IndexReader r = writer.Reader; + SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(r); + + doc = new Document(); + doc.Add(new SortedSetDocValuesFacetField("a", "bar")); + writer.AddDocument(config.Build(doc)); + + doc = new Document(); + doc.Add(new SortedSetDocValuesFacetField("a", "baz")); + writer.AddDocument(config.Build(doc)); + + IndexSearcher searcher = NewSearcher(writer.Reader); + + FacetsCollector c = new FacetsCollector(); + + searcher.Search(new MatchAllDocsQuery(), c); + + try + { + new SortedSetDocValuesFacetCounts(state, c); + Fail("did not hit expected exception"); + } + catch (IllegalStateException) + { + // expected + } + + r.Dispose(); + writer.Dispose(); + searcher.IndexReader.Dispose(); + dir.Dispose(); + } + + // LUCENE-5333 + [Test] + public virtual void TestSparseFacets() + { + AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet()); + Directory dir = NewDirectory(); + + RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); + + FacetsConfig config = new FacetsConfig(); + + Document doc = new Document(); + doc.Add(new SortedSetDocValuesFacetField("a", "foo1")); + writer.AddDocument(config.Build(doc)); + + if (Random().NextBoolean()) + { + writer.Commit(); + } + + doc = new Document(); + doc.Add(new SortedSetDocValuesFacetField("a", "foo2")); + doc.Add(new SortedSetDocValuesFacetField("b", "bar1")); + writer.AddDocument(config.Build(doc)); + + if (Random().NextBoolean()) + { + writer.Commit(); + } + + doc = new Document(); + doc.Add(new SortedSetDocValuesFacetField("a", "foo3")); + doc.Add(new SortedSetDocValuesFacetField("b", "bar2")); + doc.Add(new SortedSetDocValuesFacetField("c", "baz1")); + writer.AddDocument(config.Build(doc)); + + // NRT open + IndexSearcher searcher = NewSearcher(writer.Reader); + writer.Dispose(); + + // Per-top-reader state: + SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.IndexReader); + + FacetsCollector c = new FacetsCollector(); + searcher.Search(new MatchAllDocsQuery(), c); + SortedSetDocValuesFacetCounts facets = new SortedSetDocValuesFacetCounts(state, c); + + // Ask for top 10 labels for any dims that have counts: + IList results = facets.GetAllDims(10); + + Assert.AreEqual(3, results.Count); + Assert.AreEqual("dim=a path=[] value=3 childCount=3\n foo1 (1)\n foo2 (1)\n foo3 (1)\n", results[0].ToString()); + Assert.AreEqual("dim=b path=[] value=2 childCount=2\n bar1 (1)\n bar2 (1)\n", results[1].ToString()); + Assert.AreEqual("dim=c path=[] value=1 childCount=1\n baz1 (1)\n", results[2].ToString()); + + searcher.IndexReader.Dispose(); + dir.Dispose(); + } + + [Test] + public virtual void TestSomeSegmentsMissing() + { + AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet()); + Directory dir = NewDirectory(); + + RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); + + FacetsConfig config = new FacetsConfig(); + + Document doc = new Document(); + doc.Add(new SortedSetDocValuesFacetField("a", "foo1")); + writer.AddDocument(config.Build(doc)); + writer.Commit(); + + doc = new Document(); + writer.AddDocument(config.Build(doc)); + writer.Commit(); + + doc = new Document(); + doc.Add(new SortedSetDocValuesFacetField("a", "foo2")); + writer.AddDocument(config.Build(doc)); + writer.Commit(); + + // NRT open + IndexSearcher searcher = NewSearcher(writer.Reader); + writer.Dispose(); + + // Per-top-reader state: + SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.IndexReader); + + FacetsCollector c = new FacetsCollector(); + searcher.Search(new MatchAllDocsQuery(), c); + SortedSetDocValuesFacetCounts facets = new SortedSetDocValuesFacetCounts(state, c); + + // Ask for top 10 labels for any dims that have counts: + Assert.AreEqual("dim=a path=[] value=2 childCount=2\n foo1 (1)\n foo2 (1)\n", facets.GetTopChildren(10, "a").ToString()); + + searcher.IndexReader.Dispose(); + dir.Dispose(); + } + + [Test] + public virtual void TestSlowCompositeReaderWrapper() + { + AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet()); + Directory dir = NewDirectory(); + + RandomIndexWriter writer = new RandomIndexWriter(Random(), dir); + + FacetsConfig config = new FacetsConfig(); + + Document doc = new Document(); + doc.Add(new SortedSetDocValuesFacetField("a", "foo1")); + writer.AddDocument(config.Build(doc)); + + writer.Commit(); + + doc = new Document(); + doc.Add(new SortedSetDocValuesFacetField("a", "foo2")); + writer.AddDocument(config.Build(doc)); + + // NRT open + IndexSearcher searcher = new IndexSearcher(SlowCompositeReaderWrapper.Wrap(writer.Reader)); + + // Per-top-reader state: + SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.IndexReader); + + FacetsCollector c = new FacetsCollector(); + searcher.Search(new MatchAllDocsQuery(), c); + Facets facets = new SortedSetDocValuesFacetCounts(state, c); + + // Ask for top 10 labels for any dims that have counts: + Assert.AreEqual("dim=a path=[] value=2 childCount=2\n foo1 (1)\n foo2 (1)\n", facets.GetTopChildren(10, "a").ToString()); + + IOUtils.Close(writer, searcher.IndexReader, dir); + } + + + [Test] + public virtual void TestRandom() + { + AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet()); + string[] tokens = GetRandomTokens(10); + Directory indexDir = NewDirectory(); + Directory taxoDir = NewDirectory(); + + RandomIndexWriter w = new RandomIndexWriter(Random(), indexDir); + FacetsConfig config = new FacetsConfig(); + int numDocs = AtLeast(1000); + int numDims = TestUtil.NextInt(Random(), 1, 7); + IList testDocs = GetRandomDocs(tokens, numDocs, numDims); + foreach (TestDoc testDoc in testDocs) + { + Document doc = new Document(); + doc.Add(NewStringField("content", testDoc.content, Field.Store.NO)); + for (int j = 0; j < numDims; j++) + { + if (testDoc.dims[j] != null) + { + doc.Add(new SortedSetDocValuesFacetField("dim" + j, testDoc.dims[j])); + } + } + w.AddDocument(config.Build(doc)); + } + + // NRT open + IndexSearcher searcher = NewSearcher(w.Reader); + + // Per-top-reader state: + SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.IndexReader); + + int iters = AtLeast(100); + for (int iter = 0; iter < iters; iter++) + { + string searchToken = tokens[Random().Next(tokens.Length)]; + if (VERBOSE) + { + Console.WriteLine("\nTEST: iter content=" + searchToken); + } + FacetsCollector fc = new FacetsCollector(); + FacetsCollector.Search(searcher, new TermQuery(new Term("content", searchToken)), 10, fc); + Facets facets = new SortedSetDocValuesFacetCounts(state, fc); + + // Slow, yet hopefully bug-free, faceting: + var expectedCounts = new List>(); + for (int i = 0; i < numDims; i++) + { + expectedCounts.Add(new Dictionary()); + } + + foreach (TestDoc doc in testDocs) + { + if (doc.content.Equals(searchToken)) + { + for (int j = 0; j < numDims; j++) + { + if (doc.dims[j] != null) + { + int? v; + + if (!expectedCounts[j].TryGetValue(doc.dims[j],out v)) + { + expectedCounts[j][doc.dims[j]] = 1; + } + else + { + expectedCounts[j][doc.dims[j]] = (int)v + 1; + } + } + } + } + } + + IList expected = new List(); + for (int i = 0; i < numDims; i++) + { + IList labelValues = new List(); + int totCount = 0; + foreach (KeyValuePair ent in expectedCounts[i]) + { + labelValues.Add(new LabelAndValue(ent.Key, ent.Value.Value)); + totCount += ent.Value.Value; + } + SortLabelValues(labelValues); + if (totCount > 0) + { + expected.Add(new FacetResult("dim" + i, new string[0], totCount, labelValues.ToArray(), labelValues.Count)); + } + } + + // Sort by highest value, tie break by value: + SortFacetResults(expected); + + IList actual = facets.GetAllDims(10); + + // Messy: fixup ties + //sortTies(actual); + + Assert.AreEqual(expected, actual); + } + + IOUtils.Close(w, searcher.IndexReader, indexDir, taxoDir); + } + } + +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/982eaf60/src/Lucene.Net.Tests/core/Facet/Taxonomy/Directory/TestAddTaxonomy.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/core/Facet/Taxonomy/Directory/TestAddTaxonomy.cs b/src/Lucene.Net.Tests/core/Facet/Taxonomy/Directory/TestAddTaxonomy.cs new file mode 100644 index 0000000..2ece8ca --- /dev/null +++ b/src/Lucene.Net.Tests/core/Facet/Taxonomy/Directory/TestAddTaxonomy.cs @@ -0,0 +1,323 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Threading; +using System.Threading.Tasks; +using Lucene.Net.Randomized.Generators; +using Lucene.Net.Support; +using NUnit.Framework; + +namespace Lucene.Net.Facet.Taxonomy.Directory +{ + + + using DiskOrdinalMap = Lucene.Net.Facet.Taxonomy.Directory.DirectoryTaxonomyWriter.DiskOrdinalMap; + using MemoryOrdinalMap = Lucene.Net.Facet.Taxonomy.Directory.DirectoryTaxonomyWriter.MemoryOrdinalMap; + using OrdinalMap = Lucene.Net.Facet.Taxonomy.Directory.DirectoryTaxonomyWriter.OrdinalMap; + using Directory = Lucene.Net.Store.Directory; + using IOUtils = Lucene.Net.Util.IOUtils; + using TestUtil = Lucene.Net.Util.TestUtil; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + [TestFixture] + public class TestAddTaxonomy : FacetTestCase + { + private void Dotest(int ncats, int range) + { + AtomicInteger numCats = new AtomicInteger(ncats); + Directory[] dirs = new Directory[2]; + for (int i = 0; i < dirs.Length; i++) + { + dirs[i] = NewDirectory(); + var tw = new DirectoryTaxonomyWriter(dirs[i]); + ThreadClass[] addThreads = new ThreadClass[4]; + for (int j = 0; j < addThreads.Length; j++) + { + addThreads[j] = new ThreadAnonymousInnerClassHelper(this, range, numCats, tw); + } + + foreach (ThreadClass t in addThreads) + { + t.Start(); + } + foreach (ThreadClass t in addThreads) + { + t.Join(); + } + + tw.Dispose(); + } + + var tw1 = new DirectoryTaxonomyWriter(dirs[0]); + OrdinalMap map = randomOrdinalMap(); + tw1.AddTaxonomy(dirs[1], map); + tw1.Dispose(); + + validate(dirs[0], dirs[1], map); + + IOUtils.Close(dirs); + } + + private class ThreadAnonymousInnerClassHelper : ThreadClass + { + private readonly TestAddTaxonomy outerInstance; + + private int range; + private AtomicInteger numCats; + private DirectoryTaxonomyWriter tw; + + public ThreadAnonymousInnerClassHelper(TestAddTaxonomy outerInstance, int range, AtomicInteger numCats, DirectoryTaxonomyWriter tw) + { + this.outerInstance = outerInstance; + this.range = range; + this.numCats = numCats; + this.tw = tw; + } + + public override void Run() + { + Random random = Random(); + while (numCats.DecrementAndGet() > 0) + { + string cat = Convert.ToString(random.Next(range)); + try + { + tw.AddCategory(new FacetLabel("a", cat)); + } + catch (IOException e) + { + throw new Exception(e.Message, e); + } + } + } + } + + + private OrdinalMap randomOrdinalMap() + { + if (Random().NextBoolean()) + { + return new DiskOrdinalMap("taxoMap"); + } + else + { + return new MemoryOrdinalMap(); + } + } + + private void validate(Directory dest, Directory src, OrdinalMap ordMap) + { + var destTr = new DirectoryTaxonomyReader(dest); + try + { + int destSize = destTr.Size; + var srcTR = new DirectoryTaxonomyReader(src); + try + { + var map = ordMap.Map; + + // validate taxo sizes + int srcSize = srcTR.Size; + Assert.True(destSize >= srcSize, "destination taxonomy expected to be larger than source; dest=" + destSize + " src=" + srcSize); + + // validate that all source categories exist in destination, and their + // ordinals are as expected. + for (int j = 1; j < srcSize; j++) + { + FacetLabel cp = srcTR.GetPath(j); + int destOrdinal = destTr.GetOrdinal(cp); + Assert.True(destOrdinal > 0, cp + " not found in destination"); + Assert.AreEqual(destOrdinal, map[j]); + } + } + finally + { + ((TaxonomyReader)srcTR).Dispose(true); + } + } + finally + { + ((TaxonomyReader)destTr).Dispose(true); + } + } + + [Test] + public virtual void TestAddEmpty() + { + Directory dest = NewDirectory(); + var destTW = new DirectoryTaxonomyWriter(dest); + destTW.AddCategory(new FacetLabel("Author", "Rob Pike")); + destTW.AddCategory(new FacetLabel("Aardvarks", "Bob")); + destTW.Commit(); + + Directory src = NewDirectory(); + (new DirectoryTaxonomyWriter(src)).Dispose(); // create an empty taxonomy + + OrdinalMap map = randomOrdinalMap(); + destTW.AddTaxonomy(src, map); + destTW.Dispose(); + + validate(dest, src, map); + + IOUtils.Close(dest, src); + } + + [Test] + public virtual void TestAddToEmpty() + { + Directory dest = NewDirectory(); + + Directory src = NewDirectory(); + DirectoryTaxonomyWriter srcTW = new DirectoryTaxonomyWriter(src); + srcTW.AddCategory(new FacetLabel("Author", "Rob Pike")); + srcTW.AddCategory(new FacetLabel("Aardvarks", "Bob")); + srcTW.Dispose(); + + DirectoryTaxonomyWriter destTW = new DirectoryTaxonomyWriter(dest); + OrdinalMap map = randomOrdinalMap(); + destTW.AddTaxonomy(src, map); + destTW.Dispose(); + + validate(dest, src, map); + + IOUtils.Close(dest, src); + } + + // A more comprehensive and big random test. + [Test] + public virtual void TestBig() + { + Dotest(200, 10000); + Dotest(1000, 20000); + Dotest(400000, 1000000); + } + + // a reasonable random test + [Test] + public virtual void TestMedium() + { + Random random = Random(); + int numTests = AtLeast(3); + for (int i = 0; i < numTests; i++) + { + Dotest(TestUtil.NextInt(random, 2, 100), TestUtil.NextInt(random, 100, 1000)); + } + } + + [Test] + public virtual void TestSimple() + { + Directory dest = NewDirectory(); + var tw1 = new DirectoryTaxonomyWriter(dest); + tw1.AddCategory(new FacetLabel("Author", "Mark Twain")); + tw1.AddCategory(new FacetLabel("Animals", "Dog")); + tw1.AddCategory(new FacetLabel("Author", "Rob Pike")); + + Directory src = NewDirectory(); + var tw2 = new DirectoryTaxonomyWriter(src); + tw2.AddCategory(new FacetLabel("Author", "Rob Pike")); + tw2.AddCategory(new FacetLabel("Aardvarks", "Bob")); + tw2.Dispose(); + + OrdinalMap map = randomOrdinalMap(); + + tw1.AddTaxonomy(src, map); + tw1.Dispose(); + + validate(dest, src, map); + + IOUtils.Close(dest, src); + } + + [Test] + public virtual void TestConcurrency() + { + // tests that addTaxonomy and addCategory work in parallel + int numCategories = AtLeast(10000); + + // build an input taxonomy index + Directory src = NewDirectory(); + var tw = new DirectoryTaxonomyWriter(src); + for (int i = 0; i < numCategories; i++) + { + tw.AddCategory(new FacetLabel("a", Convert.ToString(i))); + } + tw.Dispose(); + + // now add the taxonomy to an empty taxonomy, while adding the categories + // again, in parallel -- in the end, no duplicate categories should exist. + Directory dest = NewDirectory(); + var destTW = new DirectoryTaxonomyWriter(dest); + ThreadClass t = new ThreadAnonymousInnerClassHelper2(this, numCategories, destTW); + t.Start(); + + OrdinalMap map = new MemoryOrdinalMap(); + destTW.AddTaxonomy(src, map); + t.Join(); + destTW.Dispose(); + + // now validate + + var dtr = new DirectoryTaxonomyReader(dest); + // +2 to account for the root category + "a" + Assert.AreEqual(numCategories + 2, dtr.Size); + var categories = new HashSet(); + for (int i = 1; i < dtr.Size; i++) + { + FacetLabel cat = dtr.GetPath(i); + Assert.True(categories.Add(cat), "category " + cat + " already existed"); + } + (dtr).Dispose(); + + IOUtils.Close(src, dest); + } + + private class ThreadAnonymousInnerClassHelper2 : ThreadClass + { + private readonly TestAddTaxonomy outerInstance; + + private int numCategories; + private Lucene.Net.Facet.Taxonomy.Directory.DirectoryTaxonomyWriter destTW; + + public ThreadAnonymousInnerClassHelper2(TestAddTaxonomy outerInstance, int numCategories, Lucene.Net.Facet.Taxonomy.Directory.DirectoryTaxonomyWriter destTW) + { + this.outerInstance = outerInstance; + this.numCategories = numCategories; + this.destTW = destTW; + } + + public override void Run() + { + for (int i = 0; i < numCategories; i++) + { + try + { + destTW.AddCategory(new FacetLabel("a", Convert.ToString(i))); + } + catch (IOException e) + { + // shouldn't happen - if it does, let the test fail on uncaught exception. + throw new Exception(e.Message, e); + } + } + } + } + + } + +} \ No newline at end of file