lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nightowl...@apache.org
Subject [43/58] [abbrv] lucenenet git commit: Ported Grouping.DistinctValuesCollectorTest
Date Thu, 10 Nov 2016 11:33:54 GMT
Ported Grouping.DistinctValuesCollectorTest


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/44c29eb8
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/44c29eb8
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/44c29eb8

Branch: refs/heads/grouping
Commit: 44c29eb8921d0c3d08493de6e2d3333b51aebe53
Parents: fa5f440
Author: Shad Storhaug <shad@shadstorhaug.com>
Authored: Sat Nov 5 20:38:20 2016 +0700
Committer: Shad Storhaug <shad@shadstorhaug.com>
Committed: Tue Nov 8 02:24:56 2016 +0700

----------------------------------------------------------------------
 .../AbstractDistinctValuesCollector.cs          |  36 +-
 .../Function/FunctionDistinctValuesCollector.cs |  18 +-
 .../Term/TermDistinctValuesCollector.cs         |  28 +-
 .../DistinctValuesCollectorTest.cs              | 327 +++++++++----------
 .../Lucene.Net.Tests.Grouping.csproj            |   1 +
 5 files changed, 207 insertions(+), 203 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/44c29eb8/src/Lucene.Net.Grouping/AbstractDistinctValuesCollector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Grouping/AbstractDistinctValuesCollector.cs b/src/Lucene.Net.Grouping/AbstractDistinctValuesCollector.cs
index e9df8d8..0f51a24 100644
--- a/src/Lucene.Net.Grouping/AbstractDistinctValuesCollector.cs
+++ b/src/Lucene.Net.Grouping/AbstractDistinctValuesCollector.cs
@@ -13,13 +13,14 @@ namespace Lucene.Net.Search.Grouping
     /// @lucene.experimental
     /// </summary>
     /// <typeparam name="GC"></typeparam>
-    public abstract class AbstractDistinctValuesCollector<GC> : Collector where GC : IGroupCount /* AbstractDistinctValuesCollector<GC>.GroupCount */
+    public abstract class AbstractDistinctValuesCollector<GC> : Collector, IAbstractDistinctValuesCollector<GC>
+        where GC : IGroupCount /* AbstractDistinctValuesCollector<GC>.GroupCount */
     {
         /// <summary>
         /// Returns all unique values for each top N group.
         /// </summary>
         /// <returns>all unique values for each top N group</returns>
-        public abstract List<GC> GetGroups();
+        public abstract IEnumerable<GC> Groups { get; }
 
         public override bool AcceptsDocsOutOfOrder()
         {
@@ -66,16 +67,16 @@ namespace Lucene.Net.Search.Grouping
     /// <see cref="AbstractDistinctValuesCollector{GC}"/> and renamed
     /// from GroupCount to AbstractGroupCount
     /// </remarks>
-    public abstract class AbstractGroupCount<TGroupValue> : IGroupCount
+    public abstract class AbstractGroupCount<TGroupValue> : IGroupCount<TGroupValue>
         //where TGroupValue : IComparable
     {
-        public readonly TGroupValue groupValue;
-        public readonly ISet<TGroupValue> uniqueValues;
+        public TGroupValue GroupValue { get; protected set; }
+        public IEnumerable<TGroupValue> UniqueValues { get; protected set; }
 
         public AbstractGroupCount(TGroupValue groupValue)
         {
-            this.groupValue = groupValue;
-            this.uniqueValues = new HashSet<TGroupValue>();
+            this.GroupValue = groupValue;
+            this.UniqueValues = new HashSet<TGroupValue>();
         }
     }
 
@@ -86,4 +87,25 @@ namespace Lucene.Net.Search.Grouping
     public interface IGroupCount
     {
     }
+
+
+    /// <summary>
+    /// LUCENENET specific interface used to apply covariance to TGroupValue
+    /// </summary>
+    /// <typeparam name="TGroupValue"></typeparam>
+    public interface IGroupCount<out TGroupValue> : IGroupCount
+    {
+        TGroupValue GroupValue { get; }
+        IEnumerable<TGroupValue> UniqueValues { get; }
+    }
+
+
+    /// <summary>
+    /// LUCENENET specific interface used to apply covariance to GC
+    /// </summary>
+    /// <typeparam name="GC"></typeparam>
+    public interface IAbstractDistinctValuesCollector<out GC>
+    {
+        IEnumerable<GC> Groups { get; }
+    }
 }

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/44c29eb8/src/Lucene.Net.Grouping/Function/FunctionDistinctValuesCollector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Grouping/Function/FunctionDistinctValuesCollector.cs b/src/Lucene.Net.Grouping/Function/FunctionDistinctValuesCollector.cs
index 3d5dc0a..2d5b9ee 100644
--- a/src/Lucene.Net.Grouping/Function/FunctionDistinctValuesCollector.cs
+++ b/src/Lucene.Net.Grouping/Function/FunctionDistinctValuesCollector.cs
@@ -1,13 +1,9 @@
-using Lucene.Net.Queries.Function;
+using Lucene.Net.Index;
+using Lucene.Net.Queries.Function;
 using Lucene.Net.Support;
 using Lucene.Net.Util.Mutable;
-using System;
 using System.Collections;
 using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
-using Lucene.Net.Index;
 
 namespace Lucene.Net.Search.Grouping.Function
 {
@@ -28,21 +24,21 @@ namespace Lucene.Net.Search.Grouping.Function
         private MutableValue groupMval;
         private MutableValue countMval;
 
-        public FunctionDistinctValuesCollector(IDictionary /*Map<?, ?>*/ vsContext, ValueSource groupSource, ValueSource countSource, ICollection<SearchGroup<MutableValue>> groups)
+        public FunctionDistinctValuesCollector(IDictionary /*Map<?, ?>*/ vsContext, ValueSource groupSource, ValueSource countSource, IEnumerable<ISearchGroup<MutableValue>> groups)
         {
             this.vsContext = vsContext;
             this.groupSource = groupSource;
             this.countSource = countSource;
-            groupMap = new LurchTable<MutableValue, GroupCount>(1 << 4);
+            groupMap = new LinkedHashMap<MutableValue, GroupCount>();
             foreach (SearchGroup<MutableValue> group in groups)
             {
                 groupMap[group.GroupValue] = new GroupCount(group.GroupValue);
             }
         }
 
-        public override List<GroupCount> GetGroups()
+        public override IEnumerable<GroupCount> Groups
         {
-            return new List<GroupCount>(groupMap.Values);
+            get { return new List<GroupCount>(groupMap.Values); }
         }
 
         public override void Collect(int doc)
@@ -52,7 +48,7 @@ namespace Lucene.Net.Search.Grouping.Function
             if (groupMap.TryGetValue(groupMval, out groupCount))
             {
                 countFiller.FillValue(doc);
-                groupCount.uniqueValues.Add(countMval.Duplicate());
+                ((ISet<MutableValue>)groupCount.UniqueValues).Add(countMval.Duplicate());
             }
         }
 

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/44c29eb8/src/Lucene.Net.Grouping/Term/TermDistinctValuesCollector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Grouping/Term/TermDistinctValuesCollector.cs b/src/Lucene.Net.Grouping/Term/TermDistinctValuesCollector.cs
index 502b0ea..ac4d4e9 100644
--- a/src/Lucene.Net.Grouping/Term/TermDistinctValuesCollector.cs
+++ b/src/Lucene.Net.Grouping/Term/TermDistinctValuesCollector.cs
@@ -1,12 +1,9 @@
 using Lucene.Net.Index;
-using Lucene.Net.Search;
 using Lucene.Net.Support;
 using Lucene.Net.Util;
 using System;
 using System.Collections.Generic;
 using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
 
 namespace Lucene.Net.Search.Grouping.Terms
 {
@@ -34,16 +31,17 @@ namespace Lucene.Net.Search.Grouping.Terms
          * @param countField The field to count distinct values for
          * @param groups The top N groups, collected during the first phase search
          */
-        public TermDistinctValuesCollector(string groupField, string countField, ICollection<SearchGroup<BytesRef>> groups)
+        public TermDistinctValuesCollector(string groupField, string countField, IEnumerable<ISearchGroup<BytesRef>> groups)
         {
             this.groupField = groupField;
             this.countField = countField;
-            this.groups = new List<GroupCount>(groups.Count);
-            foreach (SearchGroup<BytesRef> group in groups)
+            int groupCount = groups.Count();
+            this.groups = new List<GroupCount>(groupCount);
+            foreach (ISearchGroup<BytesRef> group in groups)
             {
                 this.groups.Add(new GroupCount(group.GroupValue));
             }
-            ordSet = new SentinelIntSet(groups.Count, -2);
+            ordSet = new SentinelIntSet(groupCount, -2);
             groupCounts = new GroupCount[ordSet.Keys.Length];
         }
 
@@ -61,13 +59,13 @@ namespace Lucene.Net.Search.Grouping.Terms
             {
                 if (countOrd == -1)
                 {
-                    gc.uniqueValues.Add(null);
+                    ((ISet<BytesRef>)gc.UniqueValues).Add(null);
                 }
                 else
                 {
                     BytesRef br = new BytesRef();
                     countFieldTermIndex.LookupOrd(countOrd, br);
-                    gc.uniqueValues.Add(br);
+                    ((ISet<BytesRef>)gc.UniqueValues).Add(br);
                 }
 
                 gc.ords = Arrays.CopyOf(gc.ords, gc.ords.Length + 1);
@@ -92,9 +90,9 @@ namespace Lucene.Net.Search.Grouping.Terms
             return Array.BinarySearch(ords, ord) < 0;
         }
 
-        public override List<GroupCount> GetGroups()
+        public override IEnumerable<GroupCount> Groups
         {
-            return groups;
+            get { return groups; }
         }
 
         public override AtomicReaderContext NextReader
@@ -106,17 +104,17 @@ namespace Lucene.Net.Search.Grouping.Terms
                 ordSet.Clear();
                 foreach (GroupCount group in groups)
                 {
-                    int groupOrd = group.groupValue == null ? -1 : groupFieldTermIndex.LookupTerm(group.groupValue);
-                    if (group.groupValue != null && groupOrd < 0)
+                    int groupOrd = group.GroupValue == null ? -1 : groupFieldTermIndex.LookupTerm(group.GroupValue);
+                    if (group.GroupValue != null && groupOrd < 0)
                     {
                         continue;
                     }
 
                     groupCounts[ordSet.Put(groupOrd)] = group;
-                    group.ords = new int[group.uniqueValues.Count];
+                    group.ords = new int[group.UniqueValues.Count()];
                     Arrays.Fill(group.ords, -2);
                     int i = 0;
-                    foreach (BytesRef value2 in group.uniqueValues)
+                    foreach (BytesRef value2 in group.UniqueValues)
                     {
                         int countOrd = value2 == null ? -1 : countFieldTermIndex.LookupTerm(value2);
                         if (value2 == null || countOrd >= 0)

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/44c29eb8/src/Lucene.Net.Tests.Grouping/DistinctValuesCollectorTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Grouping/DistinctValuesCollectorTest.cs b/src/Lucene.Net.Tests.Grouping/DistinctValuesCollectorTest.cs
index 854050b..58fae2c 100644
--- a/src/Lucene.Net.Tests.Grouping/DistinctValuesCollectorTest.cs
+++ b/src/Lucene.Net.Tests.Grouping/DistinctValuesCollectorTest.cs
@@ -1,10 +1,9 @@
 using Lucene.Net.Analysis;
 using Lucene.Net.Documents;
-using Lucene.Net.Search.Grouping.Function;
-using Lucene.Net.Search.Grouping.Term;
 using Lucene.Net.Index;
 using Lucene.Net.Queries.Function.ValueSources;
-using Lucene.Net.Search;
+using Lucene.Net.Search.Grouping.Function;
+using Lucene.Net.Search.Grouping.Terms;
 using Lucene.Net.Store;
 using Lucene.Net.Support;
 using Lucene.Net.Util;
@@ -15,10 +14,6 @@ using System.Collections;
 using System.Collections.Generic;
 using System.Globalization;
 using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
-using Lucene.Net.Search.Grouping;
-using Lucene.Net.Search.Grouping.Terms;
 
 namespace Lucene.Net.Search.Grouping
 {
@@ -31,7 +26,7 @@ namespace Lucene.Net.Search.Grouping
         private readonly string countField = "publisher";
         private readonly string dvCountField = "publisher_dv";
 
-        internal class ComparerAnonymousHelper1 : IComparer<AbstractGroupCount<IComparable<object>>>
+        internal class ComparerAnonymousHelper1 : IComparer<IGroupCount<IComparable>>
         {
             private readonly DistinctValuesCollectorTest outerInstance;
 
@@ -40,29 +35,29 @@ namespace Lucene.Net.Search.Grouping
                 this.outerInstance = outerInstance;
             }
 
-            public int Compare(AbstractGroupCount<IComparable<object>> groupCount1, AbstractGroupCount<IComparable<object>> groupCount2)
+            public int Compare(IGroupCount<IComparable> groupCount1, IGroupCount<IComparable> groupCount2)
             {
-                if (groupCount1.groupValue == null)
+                if (groupCount1.GroupValue == null)
                 {
-                    if (groupCount2.groupValue == null)
+                    if (groupCount2.GroupValue == null)
                     {
                         return 0;
                     }
                     return -1;
                 }
-                else if (groupCount2.groupValue == null)
+                else if (groupCount2.GroupValue == null)
                 {
                     return 1;
                 }
                 else
                 {
-                    return groupCount1.groupValue.CompareTo(groupCount2.groupValue);
+                    return groupCount1.GroupValue.CompareTo(groupCount2.GroupValue);
                 }
             }
         }
 
         [Test]
-        public void TestSimple()
+        public virtual void TestSimple()
         {
             Random random = Random();
             FieldInfo.DocValuesType_e[] dvTypes = new FieldInfo.DocValuesType_e[]{
@@ -80,24 +75,24 @@ namespace Lucene.Net.Search.Grouping
             FieldInfo.DocValuesType_e? dvType = canUseDV ? dvTypes[random.nextInt(dvTypes.Length)] : (FieldInfo.DocValuesType_e?)null;
 
             Document doc = new Document();
-            addField(doc, groupField, "1", dvType);
-            addField(doc, countField, "1", dvType);
+            AddField(doc, groupField, "1", dvType);
+            AddField(doc, countField, "1", dvType);
             doc.Add(new TextField("content", "random text", Field.Store.NO));
             doc.Add(new StringField("id", "1", Field.Store.NO));
             w.AddDocument(doc);
 
             // 1
             doc = new Document();
-            addField(doc, groupField, "1", dvType);
-            addField(doc, countField, "1", dvType);
+            AddField(doc, groupField, "1", dvType);
+            AddField(doc, countField, "1", dvType);
             doc.Add(new TextField("content", "some more random text blob", Field.Store.NO));
             doc.Add(new StringField("id", "2", Field.Store.NO));
             w.AddDocument(doc);
 
             // 2
             doc = new Document();
-            addField(doc, groupField, "1", dvType);
-            addField(doc, countField, "2", dvType);
+            AddField(doc, groupField, "1", dvType);
+            AddField(doc, countField, "2", dvType);
             doc.Add(new TextField("content", "some more random textual data", Field.Store.NO));
             doc.Add(new StringField("id", "3", Field.Store.NO));
             w.AddDocument(doc);
@@ -105,23 +100,23 @@ namespace Lucene.Net.Search.Grouping
 
             // 3
             doc = new Document();
-            addField(doc, groupField, "2", dvType);
+            AddField(doc, groupField, "2", dvType);
             doc.Add(new TextField("content", "some random text", Field.Store.NO));
             doc.Add(new StringField("id", "4", Field.Store.NO));
             w.AddDocument(doc);
 
             // 4
             doc = new Document();
-            addField(doc, groupField, "3", dvType);
-            addField(doc, countField, "1", dvType);
+            AddField(doc, groupField, "3", dvType);
+            AddField(doc, countField, "1", dvType);
             doc.Add(new TextField("content", "some more random text", Field.Store.NO));
             doc.Add(new StringField("id", "5", Field.Store.NO));
             w.AddDocument(doc);
 
             // 5
             doc = new Document();
-            addField(doc, groupField, "3", dvType);
-            addField(doc, countField, "1", dvType);
+            AddField(doc, groupField, "3", dvType);
+            AddField(doc, countField, "1", dvType);
             doc.Add(new TextField("content", "random blob", Field.Store.NO));
             doc.Add(new StringField("id", "6", Field.Store.NO));
             w.AddDocument(doc);
@@ -129,7 +124,7 @@ namespace Lucene.Net.Search.Grouping
             // 6 -- no author field
             doc = new Document();
             doc.Add(new TextField("content", "random word stuck in alot of other text", Field.Store.YES));
-            addField(doc, countField, "1", dvType);
+            AddField(doc, countField, "1", dvType);
             doc.Add(new StringField("id", "6", Field.Store.NO));
             w.AddDocument(doc);
 
@@ -138,147 +133,140 @@ namespace Lucene.Net.Search.Grouping
 
             var cmp = new ComparerAnonymousHelper1(this);
 
-            //    Comparator<AbstractDistinctValuesCollector.GroupCount<Comparable<Object>>> cmp = new Comparator<AbstractDistinctValuesCollector.GroupCount<Comparable<Object>>>() {
-
-            //      @Override
-            //      public int compare(AbstractDistinctValuesCollector.GroupCount<Comparable<Object>> groupCount1, AbstractDistinctValuesCollector.GroupCount<Comparable<Object>> groupCount2)
-            //    {
-            //        if (groupCount1.groupValue == null)
-            //        {
-            //            if (groupCount2.groupValue == null)
-            //            {
-            //                return 0;
-            //            }
-            //            return -1;
-            //        }
-            //        else if (groupCount2.groupValue == null)
-            //        {
-            //            return 1;
-            //        }
-            //        else
-            //        {
-            //            return groupCount1.groupValue.compareTo(groupCount2.groupValue);
-            //        }
-            //    }
-
-            //};
-
             // === Search for content:random
-            AbstractFirstPassGroupingCollector<IComparable<object>> firstCollector = createRandomFirstPassCollector(dvType, new Sort(), groupField, 10);
-            indexSearcher.Search(new TermQuery(new Term("content", "random")), firstCollector);
-            Collector distinctValuesCollector
-                = createDistinctCountCollector(firstCollector, groupField, countField, dvType.GetValueOrDefault());
-            indexSearcher.Search(new TermQuery(new Term("content", "random")), distinctValuesCollector);
-
-            var gcs = distinctValuesCollector.GetGroups();
-            //Collections.sort(gcs, cmp);
+            IAbstractFirstPassGroupingCollector<IComparable> firstCollector = CreateRandomFirstPassCollector(dvType, new Sort(), groupField, 10);
+            // LUCENENET TODO: Create an ICollector interface that we can inherit our Collector interfaces from
+            // so this cast is not necessary. Consider eliminating the Collector abstract class.
+            indexSearcher.Search(new TermQuery(new Term("content", "random")), firstCollector as Collector);
+            IAbstractDistinctValuesCollector<IGroupCount<IComparable>> distinctValuesCollector
+                = CreateDistinctCountCollector(firstCollector, groupField, countField, dvType.GetValueOrDefault());
+            // LUCENENET TODO: Create an ICollector interface that we can inherit our Collector interfaces from
+            // so this cast is not necessary. Consider eliminating the Collector abstract class.
+            indexSearcher.Search(new TermQuery(new Term("content", "random")), distinctValuesCollector as Collector);
+
+            //var gcs = distinctValuesCollector.Groups as List<IGroupCount<IComparable>>;
+            // LUCENENET TODO: Try to work out how to do this without an O(n) operation
+            var gcs = new List<IGroupCount<IComparable>>(distinctValuesCollector.Groups);
             gcs.Sort(cmp);
             assertEquals(4, gcs.Count);
 
-            compareNull(gcs[0].groupValue);
-            List<IComparable> countValues = new List<IComparable>(gcs[0].uniqueValues);
+            CompareNull(gcs[0].GroupValue);
+            List<IComparable> countValues = new List<IComparable>(gcs[0].UniqueValues);
             assertEquals(1, countValues.size());
-            compare("1", countValues[0]);
+            Compare("1", countValues[0]);
 
-            compare("1", gcs[1].groupValue);
-            countValues = new List<IComparable>(gcs[1].uniqueValues);
-            //Collections.sort(countValues, nullComparator);
+            Compare("1", gcs[1].GroupValue);
+            countValues = new List<IComparable>(gcs[1].UniqueValues);
             countValues.Sort(nullComparator);
             assertEquals(2, countValues.size());
-            compare("1", countValues[0]);
-            compare("2", countValues[1]);
+            Compare("1", countValues[0]);
+            Compare("2", countValues[1]);
 
-            compare("2", gcs[2].groupValue);
-            countValues = new List<IComparable>(gcs[2].uniqueValues);
+            Compare("2", gcs[2].GroupValue);
+            countValues = new List<IComparable>(gcs[2].UniqueValues);
             assertEquals(1, countValues.size());
-            compareNull(countValues[0]);
+            CompareNull(countValues[0]);
 
-            compare("3", gcs[3].groupValue);
-            countValues = new List<IComparable>(gcs[3].uniqueValues);
+            Compare("3", gcs[3].GroupValue);
+            countValues = new List<IComparable>(gcs[3].UniqueValues);
             assertEquals(1, countValues.size());
-            compare("1", countValues[0]);
+            Compare("1", countValues[0]);
 
             // === Search for content:some
-            firstCollector = createRandomFirstPassCollector(dvType, new Sort(), groupField, 10);
-            indexSearcher.Search(new TermQuery(new Term("content", "some")), firstCollector);
-            distinctValuesCollector = createDistinctCountCollector(firstCollector, groupField, countField, dvType);
-            indexSearcher.Search(new TermQuery(new Term("content", "some")), distinctValuesCollector);
-
-            gcs = distinctValuesCollector.getGroups();
-            //Collections.sort(gcs, cmp);
+            firstCollector = CreateRandomFirstPassCollector(dvType, new Sort(), groupField, 10);
+            // LUCENENET TODO: Create an ICollector interface that we can inherit our Collector interfaces from
+            // so this cast is not necessary. Consider eliminating the Collector abstract class.
+            indexSearcher.Search(new TermQuery(new Term("content", "some")), firstCollector as Collector);
+            distinctValuesCollector = CreateDistinctCountCollector(firstCollector, groupField, countField, dvType);
+            // LUCENENET TODO: Create an ICollector interface that we can inherit our Collector interfaces from
+            // so this cast is not necessary. Consider eliminating the Collector abstract class.
+            indexSearcher.Search(new TermQuery(new Term("content", "some")), distinctValuesCollector as Collector);
+
+            // LUCENENET TODO: Try to work out how to do this without an O(n) operation
+            //gcs = distinctValuesCollector.Groups as List<IGroupCount<IComparable>>;
+            gcs = new List<IGroupCount<IComparable>>(distinctValuesCollector.Groups);
             gcs.Sort(cmp);
             assertEquals(3, gcs.Count);
 
-            compare("1", gcs.get(0).groupValue);
-            countValues = new List<IComparable>(gcs[0].uniqueValues);
+            Compare("1", gcs[0].GroupValue);
+            countValues = new List<IComparable>(gcs[0].UniqueValues);
             assertEquals(2, countValues.size());
-            //Collections.sort(countValues, nullComparator);
             countValues.Sort(nullComparator);
-            compare("1", countValues[0]);
-            compare("2", countValues[1]);
+            Compare("1", countValues[0]);
+            Compare("2", countValues[1]);
 
-            compare("2", gcs[1].groupValue);
-            countValues = new List<IComparable>(gcs[1].uniqueValues);
+            Compare("2", gcs[1].GroupValue);
+            countValues = new List<IComparable>(gcs[1].UniqueValues);
             assertEquals(1, countValues.size());
-            compareNull(countValues[0]);
+            CompareNull(countValues[0]);
 
-            compare("3", gcs.get(2).groupValue);
-            countValues = new List<IComparable>(gcs.get(2).uniqueValues);
+            Compare("3", gcs[2].GroupValue);
+            countValues = new List<IComparable>(gcs[2].UniqueValues);
             assertEquals(1, countValues.size());
-            compare("1", countValues[0]);
+            Compare("1", countValues[0]);
 
             // === Search for content:blob
-            firstCollector = createRandomFirstPassCollector(dvType, new Sort(), groupField, 10);
-            indexSearcher.search(new TermQuery(new Term("content", "blob")), firstCollector);
-            distinctValuesCollector = createDistinctCountCollector(firstCollector, groupField, countField, dvType);
-            indexSearcher.search(new TermQuery(new Term("content", "blob")), distinctValuesCollector);
-
-            gcs = distinctValuesCollector.getGroups();
-            //Collections.sort(gcs, cmp);
+            firstCollector = CreateRandomFirstPassCollector(dvType, new Sort(), groupField, 10);
+            // LUCENENET TODO: Create an ICollector interface that we can inherit our Collector interfaces from
+            // so this cast is not necessary. Consider eliminating the Collector abstract class.
+            indexSearcher.Search(new TermQuery(new Term("content", "blob")), firstCollector as Collector);
+            distinctValuesCollector = CreateDistinctCountCollector(firstCollector, groupField, countField, dvType);
+            // LUCENENET TODO: Create an ICollector interface that we can inherit our Collector interfaces from
+            // so this cast is not necessary. Consider eliminating the Collector abstract class.
+            indexSearcher.Search(new TermQuery(new Term("content", "blob")), distinctValuesCollector as Collector);
+
+            // LUCENENET TODO: Try to work out how to do this without an O(n) operation
+            //gcs = distinctValuesCollector.Groups as List<IGroupCount<IComparable>>;
+            gcs = new List<IGroupCount<IComparable>>(distinctValuesCollector.Groups);
             gcs.Sort(cmp);
             assertEquals(2, gcs.Count);
 
-            compare("1", gcs[0].groupValue);
-            countValues = new List<IComparable>(gcs[0].uniqueValues);
+            Compare("1", gcs[0].GroupValue);
+            countValues = new List<IComparable>(gcs[0].UniqueValues);
             // B/c the only one document matched with blob inside the author 1 group
             assertEquals(1, countValues.Count);
-            compare("1", countValues[0]);
+            Compare("1", countValues[0]);
 
-            compare("3", gcs[1].groupValue);
-            countValues = new List<IComparable>(gcs[1].uniqueValues);
+            Compare("3", gcs[1].GroupValue);
+            countValues = new List<IComparable>(gcs[1].UniqueValues);
             assertEquals(1, countValues.Count);
-            compare("1", countValues[0]);
+            Compare("1", countValues[0]);
 
             indexSearcher.IndexReader.Dispose();
             dir.Dispose();
         }
 
         [Test]
-        public void testRandom()
+        public virtual void TestRandom()
         {
             Random random = Random();
             int numberOfRuns = TestUtil.NextInt(random, 3, 6);
             for (int indexIter = 0; indexIter < numberOfRuns; indexIter++)
             {
-                IndexContext context = createIndexContext();
+                IndexContext context = CreateIndexContext();
                 for (int searchIter = 0; searchIter < 100; searchIter++)
                 {
                     IndexSearcher searcher = NewSearcher(context.indexReader);
                     bool useDv = context.dvType != null && random.nextBoolean();
-                    FieldInfo.DocValuesType_e? dvType = useDv ? context.dvType : (FieldInfo.DocValuesType_e?)null;
+                    FieldInfo.DocValuesType_e? dvType = useDv ? context.dvType : null;
                     string term = context.contentStrings[random.nextInt(context.contentStrings.Length)];
                     Sort groupSort = new Sort(new SortField("id", SortField.Type_e.STRING));
                     int topN = 1 + random.nextInt(10);
 
-                    List<AbstractGroupCount<IComparable>> expectedResult = createExpectedResult(context, term, groupSort, topN);
+                    List<IGroupCount<IComparable>> expectedResult = CreateExpectedResult(context, term, groupSort, topN);
 
-                    AbstractFirstPassGroupingCollector < Comparable <?>> firstCollector = createRandomFirstPassCollector(dvType, groupSort, groupField, topN);
-                    searcher.Search(new TermQuery(new Term("content", term)), firstCollector);
-                    AbstractDistinctValuesCollector <? extends AbstractDistinctValuesCollector.GroupCount < Comparable <?>>> distinctValuesCollector
-                        = createDistinctCountCollector(firstCollector, groupField, countField, dvType);
-                    searcher.Search(new TermQuery(new Term("content", term)), distinctValuesCollector);
+                    IAbstractFirstPassGroupingCollector<IComparable> firstCollector = CreateRandomFirstPassCollector(dvType, groupSort, groupField, topN);
+                    // LUCENENET TODO: Create an ICollector interface that we can inherit our Collector interfaces from
+                    // so this cast is not necessary. Consider eliminating the Collector abstract class.
+                    searcher.Search(new TermQuery(new Term("content", term)), firstCollector as Collector);
+                    IAbstractDistinctValuesCollector<IGroupCount<IComparable>> distinctValuesCollector
+                        = CreateDistinctCountCollector(firstCollector, groupField, countField, dvType);
+                    // LUCENENET TODO: Create an ICollector interface that we can inherit our Collector interfaces from
+                    // so this cast is not necessary. Consider eliminating the Collector abstract class.
+                    searcher.Search(new TermQuery(new Term("content", term)), distinctValuesCollector as Collector);
 
-                    List<AbstractGroupCount<IComparable>> actualResult = (List<AbstractGroupCount<IComparable>>)distinctValuesCollector.Groups;
+                    // LUCENENET TODO: Try to work out how to do this without an O(n) operation
+                    List<IGroupCount<IComparable>> actualResult = new List<IGroupCount<IComparable>>(distinctValuesCollector.Groups);
 
                     if (VERBOSE)
                     {
@@ -288,27 +276,28 @@ namespace Lucene.Net.Search.Grouping
                         Console.WriteLine("2nd pass collector class name=" + distinctValuesCollector.GetType().Name);
                         Console.WriteLine("Search term=" + term);
                         Console.WriteLine("DVType=" + dvType);
-                        Console.WriteLine("1st pass groups=" + firstCollector.GetTopGroups(0, false));
+                        Console.WriteLine("1st pass groups=" + firstCollector.GetTopGroups(0, false).toString());
                         Console.WriteLine("Expected:");
-                        printGroups(expectedResult);
+                        PrintGroups(expectedResult);
                         Console.WriteLine("Actual:");
-                        printGroups(actualResult);
+                        PrintGroups(actualResult);
+                        Console.Out.Flush();
                     }
 
                     assertEquals(expectedResult.Count, actualResult.Count);
                     for (int i = 0; i < expectedResult.size(); i++)
                     {
-                        AbstractDistinctValuesCollector.GroupCount < Comparable <?>> expected = expectedResult.get(i);
-                        AbstractDistinctValuesCollector.GroupCount < Comparable <?>> actual = actualResult.get(i);
-                        assertValues(expected.groupValue, actual.groupValue);
-                        assertEquals(expected.uniqueValues.size(), actual.uniqueValues.size());
-                        List < Comparable <?>> expectedUniqueValues = new ArrayList<>(expected.uniqueValues);
-                        Collections.sort(expectedUniqueValues, nullComparator);
-                        List < Comparable <?>> actualUniqueValues = new ArrayList<>(actual.uniqueValues);
-                        Collections.sort(actualUniqueValues, nullComparator);
+                        IGroupCount<IComparable> expected = expectedResult[i];
+                        IGroupCount<IComparable> actual = actualResult[i];
+                        AssertValues(expected.GroupValue, actual.GroupValue);
+                        assertEquals(expected.UniqueValues.Count(), actual.UniqueValues.Count());
+                        List<IComparable> expectedUniqueValues = new List<IComparable>(expected.UniqueValues);
+                        expectedUniqueValues.Sort(nullComparator);
+                        List<IComparable> actualUniqueValues = new List<IComparable>(actual.UniqueValues);
+                        actualUniqueValues.Sort(nullComparator);
                         for (int j = 0; j < expectedUniqueValues.size(); j++)
                         {
-                            assertValues(expectedUniqueValues.get(j), actualUniqueValues.get(j));
+                            AssertValues(expectedUniqueValues[j], actualUniqueValues[j]);
                         }
                     }
                 }
@@ -317,12 +306,12 @@ namespace Lucene.Net.Search.Grouping
             }
         }
 
-        private void printGroups(List<AbstractDistinctValuesCollector.GroupCount<IComparable>> results)
+        private void PrintGroups(List<IGroupCount<IComparable>> results)
         {
             for (int i = 0; i < results.size(); i++)
             {
                 var group = results[i];
-                object gv = group.groupValue;
+                object gv = group.GroupValue;
                 if (gv is BytesRef)
                 {
                     Console.WriteLine(i + ": groupValue=" + ((BytesRef)gv).Utf8ToString());
@@ -331,7 +320,7 @@ namespace Lucene.Net.Search.Grouping
                 {
                     Console.WriteLine(i + ": groupValue=" + gv);
                 }
-                foreach (object o in group.uniqueValues)
+                foreach (object o in group.UniqueValues)
                 {
                     if (o is BytesRef)
                     {
@@ -345,19 +334,19 @@ namespace Lucene.Net.Search.Grouping
             }
         }
 
-        private void assertValues(object expected, object actual)
+        private void AssertValues(object expected, object actual)
         {
             if (expected == null)
             {
-                compareNull(actual);
+                CompareNull(actual);
             }
             else
             {
-                compare(((BytesRef)expected).Utf8ToString(), actual);
+                Compare(((BytesRef)expected).Utf8ToString(), actual);
             }
         }
 
-        private void compare(string expected, object groupValue)
+        private void Compare(string expected, object groupValue)
         {
             if (typeof(BytesRef).IsAssignableFrom(groupValue.GetType()))
             {
@@ -383,7 +372,7 @@ namespace Lucene.Net.Search.Grouping
             }
         }
 
-        private void compareNull(object groupValue)
+        private void CompareNull(object groupValue)
         {
             if (groupValue == null)
             {
@@ -413,7 +402,7 @@ namespace Lucene.Net.Search.Grouping
             }
         }
 
-        private void addField(Document doc, string field, string value, FieldInfo.DocValuesType_e? type)
+        private void AddField(Document doc, string field, string value, FieldInfo.DocValuesType_e? type)
         {
             doc.Add(new StringField(field, value, Field.Store.YES));
             if (type == null)
@@ -438,65 +427,66 @@ namespace Lucene.Net.Search.Grouping
             doc.Add(valuesField);
         }
 
-        private AbstractDistinctValuesCollector<AbstractGroupCount<T>> createDistinctCountCollector<T>(AbstractFirstPassGroupingCollector<T> firstPassGroupingCollector,
+        private IAbstractDistinctValuesCollector<IGroupCount<T>> CreateDistinctCountCollector<T>(IAbstractFirstPassGroupingCollector<T> firstPassGroupingCollector,
                                                                             string groupField,
                                                                             string countField,
-                                                                            FieldInfo.DocValuesType_e dvType)
-                  where T : IComparable
+                                                                            FieldInfo.DocValuesType_e? dvType)
         {
             Random random = Random();
-            ICollection<SearchGroup<T>> searchGroups = firstPassGroupingCollector.GetTopGroups(0, false);
+            IEnumerable<ISearchGroup<T>> searchGroups = firstPassGroupingCollector.GetTopGroups(0, false);
             if (typeof(FunctionFirstPassGroupingCollector).IsAssignableFrom(firstPassGroupingCollector.GetType()))
             {
-                return (AbstractDistinctValuesCollector)new FunctionDistinctValuesCollector(new Hashtable(), new BytesRefFieldSource(groupField), new BytesRefFieldSource(countField), searchGroups as ICollection<SearchGroup<MutableValue>>);
+                return (IAbstractDistinctValuesCollector<IGroupCount<T>>)new FunctionDistinctValuesCollector(new Hashtable(), new BytesRefFieldSource(groupField), new BytesRefFieldSource(countField), searchGroups as IEnumerable<ISearchGroup<MutableValue>>);
             }
             else
             {
-                return (AbstractDistinctValuesCollector)new TermDistinctValuesCollector(groupField, countField, searchGroups as ICollection<SearchGroup<BytesRef>>);
+                return (IAbstractDistinctValuesCollector<IGroupCount<T>>)new TermDistinctValuesCollector(groupField, countField, searchGroups as IEnumerable<ISearchGroup<BytesRef>>);
             }
         }
 
-        private AbstractFirstPassGroupingCollector<T> createRandomFirstPassCollector<T>(FieldInfo.DocValuesType_e dvType, Sort groupSort, string groupField, int topNGroups)
+        private IAbstractFirstPassGroupingCollector<IComparable> CreateRandomFirstPassCollector(FieldInfo.DocValuesType_e? dvType, Sort groupSort, string groupField, int topNGroups)
         {
             Random random = Random();
             if (dvType != null)
             {
                 if (random.nextBoolean())
                 {
-                    return (AbstractFirstPassGroupingCollector<T>)new FunctionFirstPassGroupingCollector(new BytesRefFieldSource(groupField), new Hashtable(), groupSort, topNGroups);
+                    return new FunctionFirstPassGroupingCollector(new BytesRefFieldSource(groupField), new Hashtable(), groupSort, topNGroups)
+                        as IAbstractFirstPassGroupingCollector<IComparable>;
                 }
                 else
                 {
-                    return (AbstractFirstPassGroupingCollector<T>)new TermFirstPassGroupingCollector(groupField, groupSort, topNGroups);
+                    return new TermFirstPassGroupingCollector(groupField, groupSort, topNGroups)
+                        as IAbstractFirstPassGroupingCollector<IComparable>;
                 }
             }
             else
             {
                 if (random.nextBoolean())
                 {
-                    return (AbstractFirstPassGroupingCollector<T>)new FunctionFirstPassGroupingCollector(new BytesRefFieldSource(groupField), new Hashtable(), groupSort, topNGroups);
+                    return new FunctionFirstPassGroupingCollector(new BytesRefFieldSource(groupField), new Hashtable(), groupSort, topNGroups)
+                        as IAbstractFirstPassGroupingCollector<IComparable>;
                 }
                 else
                 {
-                    return (AbstractFirstPassGroupingCollector<T>)new TermFirstPassGroupingCollector(groupField, groupSort, topNGroups);
+                    return new TermFirstPassGroupingCollector(groupField, groupSort, topNGroups)
+                        as IAbstractFirstPassGroupingCollector<IComparable>;
                 }
             }
         }
 
         internal class GroupCount : AbstractGroupCount<BytesRef>
         {
-            internal GroupCount(BytesRef groupValue, ICollection<BytesRef> uniqueValues)
+            internal GroupCount(BytesRef groupValue, IEnumerable<BytesRef> uniqueValues)
                 : base(groupValue)
             {
-                this.uniqueValues.UnionWith(uniqueValues);
+                ((ISet<BytesRef>)this.UniqueValues).UnionWith(uniqueValues);
             }
         }
 
-        private List<AbstractGroupCount<IComparable>> createExpectedResult(IndexContext context, string term, Sort groupSort, int topN)
+        private List<IGroupCount<IComparable>> CreateExpectedResult(IndexContext context, string term, Sort groupSort, int topN)
         {
-
-
-            List<AbstractGroupCount<IComparable>> result = new List<AbstractGroupCount<IComparable>>();
+            List<IGroupCount<IComparable>> result = new List<IGroupCount<IComparable>>();
             IDictionary<string, ISet<string>> groupCounts = context.searchTermToGroupCounts[term];
             int i = 0;
             foreach (string group in groupCounts.Keys)
@@ -510,18 +500,19 @@ namespace Lucene.Net.Search.Grouping
                 {
                     uniqueValues.Add(val != null ? new BytesRef(val) : null);
                 }
-                result.Add(new GroupCount(group != null ? new BytesRef(group) : (BytesRef)null, uniqueValues));
+                var gc = new GroupCount(group != null ? new BytesRef(group) : (BytesRef)null, uniqueValues);
+                result.Add(gc);
             }
             return result;
         }
 
-        private IndexContext createIndexContext()
+        private IndexContext CreateIndexContext()
         {
             Random random = Random();
-            FieldInfo.DocValuesType_e[] dvTypes = new FieldInfo.DocValuesType_e[]{
-        FieldInfo.DocValuesType_e.BINARY,
-        FieldInfo.DocValuesType_e.SORTED
-    };
+                FieldInfo.DocValuesType_e[] dvTypes = new FieldInfo.DocValuesType_e[]{
+                FieldInfo.DocValuesType_e.BINARY,
+                FieldInfo.DocValuesType_e.SORTED
+            };
 
             Directory dir = NewDirectory();
             RandomIndexWriter w = new RandomIndexWriter(
@@ -547,28 +538,24 @@ namespace Lucene.Net.Search.Grouping
             }
 
             List<string> contentStrings = new List<string>();
-            IDictionary<string, IDictionary<string, ISet<string>>> searchTermToGroupCounts = new Dictionary<string, IDictionary<string, ISet<string>>>();
+            IDictionary<string, IDictionary<string, ISet<string>>> searchTermToGroupCounts = new HashMap<string, IDictionary<string, ISet<string>>>();
             for (int i = 1; i <= numDocs; i++)
             {
                 string groupValue = random.nextInt(23) == 14 ? null : groupValues[random.nextInt(groupValues.Length)];
                 string countValue = random.nextInt(21) == 13 ? null : countValues[random.nextInt(countValues.Length)];
                 string content = "random" + random.nextInt(numDocs / 20);
-                //IDictionary<string, ISet<string>> groupToCounts = searchTermToGroupCounts[content];
-                //      if (groupToCounts == null)
                 IDictionary<string, ISet<string>> groupToCounts;
                 if (!searchTermToGroupCounts.TryGetValue(content, out groupToCounts))
                 {
                     // Groups sort always DOCID asc...
-                    searchTermToGroupCounts[content] = groupToCounts = new LurchTable<string, ISet<string>>(16);
+                    searchTermToGroupCounts.Add(content, groupToCounts = new LinkedHashMap<string, ISet<string>>());
                     contentStrings.Add(content);
                 }
 
-                //ISet<string> countsVals = groupToCounts.get(groupValue);
-                //if (countsVals == null)
                 ISet<string> countsVals;
                 if (!groupToCounts.TryGetValue(groupValue, out countsVals))
                 {
-                    groupToCounts[groupValue] = countsVals = new HashSet<string>();
+                    groupToCounts.Add(groupValue, countsVals = new HashSet<string>());
                 }
                 countsVals.Add(countValue);
 
@@ -576,11 +563,11 @@ namespace Lucene.Net.Search.Grouping
                 doc.Add(new StringField("id", string.Format(CultureInfo.InvariantCulture, "{0:D9}", i), Field.Store.YES));
                 if (groupValue != null)
                 {
-                    addField(doc, groupField, groupValue, dvType);
+                    AddField(doc, groupField, groupValue, dvType);
                 }
                 if (countValue != null)
                 {
-                    addField(doc, countField, countValue, dvType);
+                    AddField(doc, countField, countValue, dvType);
                 }
                 doc.Add(new TextField("content", content, Field.Store.YES));
                 w.AddDocument(doc);
@@ -597,7 +584,7 @@ namespace Lucene.Net.Search.Grouping
             }
 
             w.Dispose();
-            return new IndexContext(dir, reader, dvType.GetValueOrDefault(), searchTermToGroupCounts, contentStrings.ToArray(/*new String[contentStrings.size()]*/));
+            return new IndexContext(dir, reader, dvType, searchTermToGroupCounts, contentStrings.ToArray(/*new String[contentStrings.size()]*/));
         }
 
         internal class IndexContext
@@ -605,11 +592,11 @@ namespace Lucene.Net.Search.Grouping
 
             internal readonly Directory directory;
             internal readonly DirectoryReader indexReader;
-            internal readonly FieldInfo.DocValuesType_e dvType;
+            internal readonly FieldInfo.DocValuesType_e? dvType;
             internal readonly IDictionary<string, IDictionary<string, ISet<string>>> searchTermToGroupCounts;
             internal readonly string[] contentStrings;
 
-            internal IndexContext(Directory directory, DirectoryReader indexReader, FieldInfo.DocValuesType_e dvType,
+            internal IndexContext(Directory directory, DirectoryReader indexReader, FieldInfo.DocValuesType_e? dvType,
                          IDictionary<string, IDictionary<string, ISet<string>>> searchTermToGroupCounts, string[] contentStrings)
             {
                 this.directory = directory;

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/44c29eb8/src/Lucene.Net.Tests.Grouping/Lucene.Net.Tests.Grouping.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Grouping/Lucene.Net.Tests.Grouping.csproj b/src/Lucene.Net.Tests.Grouping/Lucene.Net.Tests.Grouping.csproj
index cc96210..f5906f1 100644
--- a/src/Lucene.Net.Tests.Grouping/Lucene.Net.Tests.Grouping.csproj
+++ b/src/Lucene.Net.Tests.Grouping/Lucene.Net.Tests.Grouping.csproj
@@ -47,6 +47,7 @@
     <Compile Include="AbstractGroupingTestCase.cs" />
     <Compile Include="AllGroupHeadsCollectorTest.cs" />
     <Compile Include="AllGroupsCollectorTest.cs" />
+    <Compile Include="DistinctValuesCollectorTest.cs" />
     <Compile Include="GroupFacetCollectorTest.cs" />
     <Compile Include="GroupingSearchTest.cs" />
     <Compile Include="Properties\AssemblyInfo.cs" />


Mime
View raw message