Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 314A3200BBB for ; Thu, 10 Nov 2016 12:47:22 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id 2FC81160AF6; Thu, 10 Nov 2016 11:47:22 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 4B719160B15 for ; Thu, 10 Nov 2016 12:47:19 +0100 (CET) Received: (qmail 32388 invoked by uid 500); 10 Nov 2016 11:47:18 -0000 Mailing-List: contact commits-help@lucenenet.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: lucene-net-dev@lucenenet.apache.org Delivered-To: mailing list commits@lucenenet.apache.org Received: (qmail 30180 invoked by uid 99); 10 Nov 2016 11:47:15 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 10 Nov 2016 11:47:15 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id AD272EEE45; Thu, 10 Nov 2016 11:47:15 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit From: nightowl888@apache.org To: commits@lucenenet.apache.org Date: Thu, 10 Nov 2016 11:47:42 -0000 Message-Id: <43766660a0db4931ba736933ca192c52@git.apache.org> In-Reply-To: <75d0a803894f49d5a0c77d723f975556@git.apache.org> References: <75d0a803894f49d5a0c77d723f975556@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [28/58] [abbrv] lucenenet git commit: WIP on Grouping archived-at: Thu, 10 Nov 2016 11:47:22 -0000 http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9d72bcb3/src/Lucene.Net.Tests.Grouping/TestGrouping.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests.Grouping/TestGrouping.cs b/src/Lucene.Net.Tests.Grouping/TestGrouping.cs new file mode 100644 index 0000000..cda472d --- /dev/null +++ b/src/Lucene.Net.Tests.Grouping/TestGrouping.cs @@ -0,0 +1,1692 @@ +using Lucene.Net.Analysis; +using Lucene.Net.Documents; +using Lucene.Net.Index; +using Lucene.Net.Queries.Function; +using Lucene.Net.Queries.Function.ValueSources; +using Lucene.Net.Search.Grouping.Function; +using Lucene.Net.Search.Grouping.Terms; +using Lucene.Net.Store; +using Lucene.Net.Support; +using Lucene.Net.Util; +using Lucene.Net.Util.Mutable; +using NUnit.Framework; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Lucene.Net.Search.Grouping +{ + // TODO + // - should test relevance sort too + // - test null + // - test ties + // - test compound sort + public class TestGrouping : LuceneTestCase + { + [Test] + public void testBasic() + { + + string groupField = "author"; + + FieldType customType = new FieldType(); + customType.Stored = (true); + + Directory dir = NewDirectory(); + RandomIndexWriter w = new RandomIndexWriter( + Random(), + dir, + NewIndexWriterConfig(TEST_VERSION_CURRENT, + new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy())); + bool canUseIDV = !"Lucene3x".equals(w.w.Config.Codec.Name); + // 0 + Document doc = new Document(); + addGroupField(doc, groupField, "author1", canUseIDV); + doc.Add(new TextField("content", "random text", Field.Store.YES)); + doc.Add(new Field("id", "1", customType)); + w.AddDocument(doc); + + // 1 + doc = new Document(); + addGroupField(doc, groupField, "author1", canUseIDV); + doc.Add(new TextField("content", "some more random text", Field.Store.YES)); + doc.Add(new Field("id", "2", customType)); + w.AddDocument(doc); + + // 2 + doc = new Document(); + addGroupField(doc, groupField, "author1", canUseIDV); + doc.Add(new TextField("content", "some more random textual data", Field.Store.YES)); + doc.Add(new Field("id", "3", customType)); + w.AddDocument(doc); + + // 3 + doc = new Document(); + addGroupField(doc, groupField, "author2", canUseIDV); + doc.Add(new TextField("content", "some random text", Field.Store.YES)); + doc.Add(new Field("id", "4", customType)); + w.AddDocument(doc); + + // 4 + doc = new Document(); + addGroupField(doc, groupField, "author3", canUseIDV); + doc.Add(new TextField("content", "some more random text", Field.Store.YES)); + doc.Add(new Field("id", "5", customType)); + w.AddDocument(doc); + + // 5 + doc = new Document(); + addGroupField(doc, groupField, "author3", canUseIDV); + doc.Add(new TextField("content", "random", Field.Store.YES)); + doc.Add(new Field("id", "6", customType)); + w.AddDocument(doc); + + // 6 -- no author field + doc = new Document(); + doc.Add(new TextField("content", "random word stuck in alot of other text", Field.Store.YES)); + doc.Add(new Field("id", "6", customType)); + w.AddDocument(doc); + + IndexSearcher indexSearcher = NewSearcher(w.Reader); + w.Dispose(); + + Sort groupSort = Sort.RELEVANCE; + + if (canUseIDV && Random().nextBoolean()) + { + groupField += "_dv"; + } + + AbstractFirstPassGroupingCollector c1 = createRandomFirstPassCollector(groupField, groupSort, 10); + indexSearcher.Search(new TermQuery(new Index.Term("content", "random")), c1); + + AbstractSecondPassGroupingCollector c2 = createSecondPassCollector(c1, groupField, groupSort, null, 0, 5, true, true, true); + indexSearcher.Search(new TermQuery(new Index.Term("content", "random")), c2); + + TopGroups groups = c2.getTopGroups(0); + assertFalse(float.IsNaN(groups.maxScore)); + + assertEquals(7, groups.totalHitCount); + assertEquals(7, groups.totalGroupedHitCount); + assertEquals(4, groups.groups.Length); + + // relevance order: 5, 0, 3, 4, 1, 2, 6 + + // the later a document is added the higher this docId + // value + GroupDocs group = groups.groups[0]; + compareGroupValue("author3", group); + assertEquals(2, group.scoreDocs.Length); + assertEquals(5, group.scoreDocs[0].doc); + assertEquals(4, group.scoreDocs[1].doc); + assertTrue(group.scoreDocs[0].score > group.scoreDocs[1].score); + + group = groups.groups[1]; + compareGroupValue("author1", group); + assertEquals(3, group.scoreDocs.Length); + assertEquals(0, group.scoreDocs[0].doc); + assertEquals(1, group.scoreDocs[1].doc); + assertEquals(2, group.scoreDocs[2].doc); + assertTrue(group.scoreDocs[0].score > group.scoreDocs[1].score); + assertTrue(group.scoreDocs[1].score > group.scoreDocs[2].score); + + group = groups.groups[2]; + compareGroupValue("author2", group); + assertEquals(1, group.scoreDocs.Length); + assertEquals(3, group.scoreDocs[0].doc); + + group = groups.groups[3]; + compareGroupValue(null, group); + assertEquals(1, group.scoreDocs.Length); + assertEquals(6, group.scoreDocs[0].doc); + + indexSearcher.IndexReader.Dispose(); + dir.Dispose(); + } + + private void addGroupField(Document doc, string groupField, string value, bool canUseIDV) + { + doc.Add(new TextField(groupField, value, Field.Store.YES)); + if (canUseIDV) + { + doc.Add(new SortedDocValuesField(groupField + "_dv", new BytesRef(value))); + } + } + + private AbstractFirstPassGroupingCollector createRandomFirstPassCollector(String groupField, Sort groupSort, int topDocs) + { + AbstractFirstPassGroupingCollector selected; + if (Random().nextbool()) + { + ValueSource vs = new BytesRefFieldSource(groupField); + selected = new FunctionFirstPassGroupingCollector(vs, new Dictionary(), groupSort, topDocs); + } + else + { + selected = new TermFirstPassGroupingCollector(groupField, groupSort, topDocs); + } + if (VERBOSE) + { + Console.WriteLine("Selected implementation: " + selected.GetType().Name); + } + return selected; + } + + private AbstractFirstPassGroupingCollector createFirstPassCollector(string groupField, Sort groupSort, int topDocs, AbstractFirstPassGroupingCollector firstPassGroupingCollector) + { + if (typeof(TermFirstPassGroupingCollector).IsAssignableFrom(firstPassGroupingCollector.GetType())) + { + ValueSource vs = new BytesRefFieldSource(groupField); + return new FunctionFirstPassGroupingCollector(vs, new Dictionary(), groupSort, topDocs); + } + else + { + return new TermFirstPassGroupingCollector(groupField, groupSort, topDocs); + } + } + + private AbstractSecondPassGroupingCollector createSecondPassCollector(AbstractFirstPassGroupingCollector firstPassGroupingCollector, + string groupField, + Sort groupSort, + Sort sortWithinGroup, + int groupOffset, + int maxDocsPerGroup, + bool getScores, + bool getMaxScores, + bool fillSortFields) + { + + if (typeof(TermFirstPassGroupingCollector).IsAssignableFrom(firstPassGroupingCollector.GetType())) + { + ICollection> searchGroups = firstPassGroupingCollector.GetTopGroups(groupOffset, fillSortFields); + return (AbstractSecondPassGroupingCollector)new TermSecondPassGroupingCollector(groupField, searchGroups, groupSort, sortWithinGroup, maxDocsPerGroup, getScores, getMaxScores, fillSortFields); + } + else + { + ValueSource vs = new BytesRefFieldSource(groupField); + ICollection> searchGroups = firstPassGroupingCollector.GetTopGroups(groupOffset, fillSortFields); + return (AbstractSecondPassGroupingCollector)new FunctionSecondPassGroupingCollector(searchGroups, groupSort, sortWithinGroup, maxDocsPerGroup, getScores, getMaxScores, fillSortFields, vs, new HashMap()); + } + } + + // Basically converts searchGroups from MutableValue to BytesRef if grouping by ValueSource + private AbstractSecondPassGroupingCollector createSecondPassCollector(AbstractFirstPassGroupingCollector firstPassGroupingCollector, + string groupField, + ICollection> searchGroups, + Sort groupSort, + Sort sortWithinGroup, + int maxDocsPerGroup, + bool getScores, + bool getMaxScores, + bool fillSortFields) + { + if (firstPassGroupingCollector.GetType().IsAssignableFrom(typeof(TermFirstPassGroupingCollector))) + { + return new TermSecondPassGroupingCollector(groupField, searchGroups, groupSort, sortWithinGroup, maxDocsPerGroup, getScores, getMaxScores, fillSortFields); + } + else + { + ValueSource vs = new BytesRefFieldSource(groupField); + List> mvalSearchGroups = new List>(searchGroups.size()); + foreach (SearchGroup mergedTopGroup in searchGroups) + { + SearchGroup sg = new SearchGroup(); + MutableValueStr groupValue = new MutableValueStr(); + if (mergedTopGroup.groupValue != null) + { + groupValue.Value = mergedTopGroup.groupValue; + } + else + { + groupValue.Value = new BytesRef(); + groupValue.Exists = false; + } + sg.groupValue = groupValue; + sg.sortValues = mergedTopGroup.sortValues; + mvalSearchGroups.Add(sg); + } + + return new FunctionSecondPassGroupingCollector(mvalSearchGroups, groupSort, sortWithinGroup, maxDocsPerGroup, getScores, getMaxScores, fillSortFields, vs, new HashMap<>()); + } + } + + private AbstractAllGroupsCollector createAllGroupsCollector(AbstractFirstPassGroupingCollector firstPassGroupingCollector, + string groupField) + { + if (firstPassGroupingCollector.GetType().IsAssignableFrom(typeof(TermFirstPassGroupingCollector))) + { + return new TermAllGroupsCollector(groupField); + } + else + { + ValueSource vs = new BytesRefFieldSource(groupField); + return new FunctionAllGroupsCollector(vs, new HashMap<>()); + } + } + + private void compareGroupValue(string expected, GroupDocs group) + { + if (expected == null) + { + if (group.GroupValue == null) + { + return; + } + else if (group.GroupValue.GetType().IsAssignableFrom(typeof(MutableValueStr))) + { + return; + } + else if ((group.GroupValue as BytesRef).Length == 0) + { + return; + } + fail(); + } + + if (group.GroupValue.GetType().IsAssignableFrom(typeof(BytesRef))) + { + assertEquals(new BytesRef(expected), group.GroupValue); + } + else if (group.GroupValue.GetType().IsAssignableFrom(typeof(MutableValueStr))) + { + MutableValueStr v = new MutableValueStr(); + v.Value = new BytesRef(expected); + assertEquals(v, group.GroupValue); + } + else + { + fail(); + } + } + + private ICollection> getSearchGroups(AbstractFirstPassGroupingCollector c, int groupOffset, bool fillFields) + { + if (typeof(TermFirstPassGroupingCollector).IsAssignableFrom(c.GetType())) + { + + return (ICollection>)c.GetTopGroups(groupOffset, fillFields); + } + else if (typeof(FunctionFirstPassGroupingCollector).IsAssignableFrom(c.GetType())) + { + var mutableValueGroups = c.GetTopGroups(groupOffset, fillFields); + if (mutableValueGroups == null) + { + return null; + } + + List> groups = new List>(mutableValueGroups.size()); + foreach (var mutableValueGroup in mutableValueGroups) + { + SearchGroup sg = new SearchGroup(); + sg.groupValue = mutableValueGroup.groupValue.Exists ? ((MutableValueStr)mutableValueGroup.groupValue).Value : null; + sg.sortValues = mutableValueGroup.sortValues; + groups.Add(sg); + } + return groups; + } + fail(); + return null; + } + + private TopGroups getTopGroups(AbstractSecondPassGroupingCollector c, int withinGroupOffset) + { + if (c.GetType().IsAssignableFrom(typeof(TermSecondPassGroupingCollector))) + { + return ((TermSecondPassGroupingCollector)c).GetTopGroups(withinGroupOffset); + } + else if (c.GetType().IsAssignableFrom(typeof(FunctionSecondPassGroupingCollector))) + { + TopGroups mvalTopGroups = ((FunctionSecondPassGroupingCollector)c).GetTopGroups(withinGroupOffset); + List> groups = new List>(mvalTopGroups.Groups.Length); + foreach (GroupDocs mvalGd in mvalTopGroups.Groups) + { + BytesRef groupValue = mvalGd.GroupValue.Exists ? ((MutableValueStr)mvalGd.GroupValue).Value : null; + groups.Add(new GroupDocs(float.NaN, mvalGd.MaxScore, mvalGd.TotalHits, mvalGd.ScoreDocs, groupValue, mvalGd.GroupSortValues)); + } + return new TopGroups(mvalTopGroups.GroupSort, mvalTopGroups.WithinGroupSort, mvalTopGroups.TotalHitCount, mvalTopGroups.TotalGroupedHitCount, groups.ToArray(/*new GroupDocs[groups.size()]*/), float.NaN); + } + fail(); + return null; + } + + internal class GroupDoc + { + internal readonly int id; + internal readonly BytesRef group; + internal readonly BytesRef sort1; + internal readonly BytesRef sort2; + // content must be "realN ..." + internal readonly string content; + internal float score; + internal float score2; + + public GroupDoc(int id, BytesRef group, BytesRef sort1, BytesRef sort2, string content) + { + this.id = id; + this.group = group; + this.sort1 = sort1; + this.sort2 = sort2; + this.content = content; + } + } + + private Sort getRandomSort() + { + List sortFields = new List(); + if (Random().nextInt(7) == 2) + { + sortFields.Add(SortField.FIELD_SCORE); + } + else + { + if (Random().nextBoolean()) + { + if (Random().nextBoolean()) + { + sortFields.Add(new SortField("sort1", SortField.Type_e.STRING, Random().nextBoolean())); + } + else + { + sortFields.Add(new SortField("sort2", SortField.Type_e.STRING, Random().nextBoolean())); + } + } + else if (Random().nextBoolean()) + { + sortFields.Add(new SortField("sort1", SortField.Type_e.STRING, Random().nextBoolean())); + sortFields.Add(new SortField("sort2", SortField.Type_e.STRING, Random().nextBoolean())); + } + } + // Break ties: + sortFields.Add(new SortField("id", SortField.Type_e.INT)); + return new Sort(sortFields.ToArray(/*new SortField[sortFields.size()]*/)); + } + + internal class ComparerAnonymousHelper : IComparer + { + private readonly TestGrouping outerInstance; + private readonly SortField[] sortFields; + internal ComparerAnonymousHelper(TestGrouping outerInstance, SortField[] sortFields) + { + this.outerInstance = outerInstance; + this.sortFields = sortFields; + } + + public int Compare(GroupDoc d1, GroupDoc d2) + { + foreach (SortField sf in sortFields) + { + int cmp; + if (sf.Type == SortField.Type_e.SCORE) + { + if (d1.score > d2.score) + { + cmp = -1; + } + else if (d1.score < d2.score) + { + cmp = 1; + } + else + { + cmp = 0; + } + } + else if (sf.Field.equals("sort1")) + { + cmp = d1.sort1.CompareTo(d2.sort1); + } + else if (sf.Field.equals("sort2")) + { + cmp = d1.sort2.CompareTo(d2.sort2); + } + else + { + assertEquals(sf.Field, "id"); + cmp = d1.id - d2.id; + } + if (cmp != 0) + { + return sf.Reverse ? -cmp : cmp; + } + } + // Our sort always fully tie breaks: + fail(); + return 0; + } + } + + private IComparer getComparator(Sort sort) + { + SortField[] sortFields = sort.GetSort(); + return new ComparerAnonymousHelper(this, sortFields); + // return new Comparator() { + // @Override + // public int compare(GroupDoc d1, GroupDoc d2) + //{ + // for (SortField sf : sortFields) + // { + // final int cmp; + // if (sf.getType() == SortField.Type.SCORE) + // { + // if (d1.score > d2.score) + // { + // cmp = -1; + // } + // else if (d1.score < d2.score) + // { + // cmp = 1; + // } + // else + // { + // cmp = 0; + // } + // } + // else if (sf.getField().equals("sort1")) + // { + // cmp = d1.sort1.compareTo(d2.sort1); + // } + // else if (sf.getField().equals("sort2")) + // { + // cmp = d1.sort2.compareTo(d2.sort2); + // } + // else + // { + // assertEquals(sf.getField(), "id"); + // cmp = d1.id - d2.id; + // } + // if (cmp != 0) + // { + // return sf.getReverse() ? -cmp : cmp; + // } + // } + // // Our sort always fully tie breaks: + // fail(); + // return 0; + //} + // }; + } + + private IComparable[] FillFields(GroupDoc d, Sort sort) + { + SortField[] sortFields = sort.GetSort(); + IComparable[] fields = new IComparable[sortFields.Length]; + for (int fieldIDX = 0; fieldIDX < sortFields.Length; fieldIDX++) + { + IComparable c; + SortField sf = sortFields[fieldIDX]; + if (sf.Type == SortField.Type_e.SCORE) + { + c = new float?(d.score); + } + else if (sf.Field.equals("sort1")) + { + c = d.sort1; + } + else if (sf.Field.equals("sort2")) + { + c = d.sort2; + } + else + { + assertEquals("id", sf.Field); + c = new int?(d.id); + } + fields[fieldIDX] = c; + } + return fields; + } + + private string groupToString(BytesRef b) + { + if (b == null) + { + return "null"; + } + else + { + return b.Utf8ToString(); + } + } + + private TopGroups slowGrouping(GroupDoc[] groupDocs, + string searchTerm, + bool fillFields, + bool getScores, + bool getMaxScores, + bool doAllGroups, + Sort groupSort, + Sort docSort, + int topNGroups, + int docsPerGroup, + int groupOffset, + int docOffset) + { + + IComparer groupSortComp = getComparator(groupSort); + + Array.Sort(groupDocs, groupSortComp); + HashMap> groups = new HashMap>(); + List sortedGroups = new List(); + List sortedGroupFields = new List(); + + int totalHitCount = 0; + ISet knownGroups = new HashSet(); + + //Console.WriteLine("TEST: slowGrouping"); + foreach (GroupDoc d in groupDocs) + { + // TODO: would be better to filter by searchTerm before sorting! + if (!d.content.StartsWith(searchTerm)) + { + continue; + } + totalHitCount++; + //Console.WriteLine(" match id=" + d.id + " score=" + d.score); + + if (doAllGroups) + { + if (!knownGroups.contains(d.group)) + { + knownGroups.add(d.group); + //Console.WriteLine(" add group=" + groupToString(d.group)); + } + } + + List l = groups[d.group]; + if (l == null) + { + //Console.WriteLine(" add sortedGroup=" + groupToString(d.group)); + sortedGroups.Add(d.group); + if (fillFields) + { + sortedGroupFields.Add(FillFields(d, groupSort)); + } + l = new List(); + groups.Put(d.group, l); + } + l.Add(d); + } + + if (groupOffset >= sortedGroups.size()) + { + // slice is out of bounds + return null; + } + + int limit = Math.Min(groupOffset + topNGroups, groups.size()); + + IComparer docSortComp = getComparator(docSort); + + GroupDocs[] result = new GroupDocs[limit - groupOffset]; + int totalGroupedHitCount = 0; + for (int idx = groupOffset; idx < limit; idx++) + { + BytesRef group = sortedGroups[idx]; + List docs = groups[group]; + totalGroupedHitCount += docs.size(); + //Collections.sort(docs, docSortComp); + docs.Sort(docSortComp); + ScoreDoc[] hits; + if (docs.size() > docOffset) + { + int docIDXLimit = Math.Min(docOffset + docsPerGroup, docs.size()); + hits = new ScoreDoc[docIDXLimit - docOffset]; + for (int docIDX = docOffset; docIDX < docIDXLimit; docIDX++) + { + GroupDoc d = docs[docIDX]; + FieldDoc fd; + if (fillFields) + { + fd = new FieldDoc(d.id, getScores ? d.score : float.NaN, FillFields(d, docSort)); + } + else + { + fd = new FieldDoc(d.id, getScores ? d.score : float.NaN); + } + hits[docIDX - docOffset] = fd; + } + } + else + { + hits = new ScoreDoc[0]; + } + + result[idx - groupOffset] = new GroupDocs(float.NaN, + 0.0f, + docs.size(), + hits, + group, + fillFields ? sortedGroupFields[idx] : null); + } + + if (doAllGroups) + { + return new TopGroups( + new TopGroups(groupSort.GetSort(), docSort.GetSort(), totalHitCount, totalGroupedHitCount, result, float.NaN), + knownGroups.size() + ); + } + else + { + return new TopGroups(groupSort.GetSort(), docSort.GetSort(), totalHitCount, totalGroupedHitCount, result, float.NaN); + } + } + + private DirectoryReader getDocBlockReader(Directory dir, GroupDoc[] groupDocs) + { + // Coalesce by group, but in random order: + //Collections.shuffle(Arrays.asList(groupDocs), Random()); + groupDocs = CollectionsHelper.Shuffle(Arrays.AsList(groupDocs)).ToArray(); + HashMap> groupMap = new HashMap>(); + List groupValues = new List(); + + foreach (GroupDoc groupDoc in groupDocs) + { + if (!groupMap.ContainsKey(groupDoc.group)) + { + groupValues.Add(groupDoc.group); + groupMap.Put(groupDoc.group, new List()); + } + groupMap[groupDoc.group].Add(groupDoc); + } + + RandomIndexWriter w = new RandomIndexWriter( + Random(), + dir, + NewIndexWriterConfig(TEST_VERSION_CURRENT, + new MockAnalyzer(Random()))); + + List> updateDocs = new List>(); + + FieldType groupEndType = new FieldType(StringField.TYPE_NOT_STORED); + groupEndType.IndexOptions = (FieldInfo.IndexOptions.DOCS_ONLY); + groupEndType.OmitNorms = (true); + + //Console.WriteLine("TEST: index groups"); + foreach (BytesRef group in groupValues) + { + List docs = new List(); + //Console.WriteLine("TEST: group=" + (group == null ? "null" : group.utf8ToString())); + foreach (GroupDoc groupValue in groupMap[group]) + { + Document doc = new Document(); + docs.Add(doc); + if (groupValue.group != null) + { + doc.Add(NewStringField("group", groupValue.group.Utf8ToString(), Field.Store.NO)); + } + doc.Add(NewStringField("sort1", groupValue.sort1.Utf8ToString(), Field.Store.NO)); + doc.Add(NewStringField("sort2", groupValue.sort2.Utf8ToString(), Field.Store.NO)); + doc.Add(new IntField("id", groupValue.id, Field.Store.NO)); + doc.Add(NewTextField("content", groupValue.content, Field.Store.NO)); + //Console.WriteLine("TEST: doc content=" + groupValue.content + " group=" + (groupValue.group == null ? "null" : groupValue.group.utf8ToString()) + " sort1=" + groupValue.sort1.utf8ToString() + " id=" + groupValue.id); + } + // So we can pull filter marking last doc in block: + Field groupEnd = NewField("groupend", "x", groupEndType); + docs[docs.size() - 1].Add(groupEnd); + // Add as a doc block: + w.AddDocuments(docs); + if (group != null && Random().nextInt(7) == 4) + { + updateDocs.Add(docs); + } + } + + foreach (List docs in updateDocs) + { + // Just replaces docs w/ same docs: + w.UpdateDocuments(new Index.Term("group", docs[0].Get("group")), docs); + } + + DirectoryReader r = w.Reader; + w.Dispose(); + + return r; + } + + internal class ShardState + { + + public readonly ShardSearcher[] subSearchers; + public readonly int[] docStarts; + + public ShardState(IndexSearcher s) + { + IndexReaderContext ctx = s.TopReaderContext; + IList leaves = ctx.Leaves; + subSearchers = new ShardSearcher[leaves.size()]; + for (int searcherIDX = 0; searcherIDX < subSearchers.Length; searcherIDX++) + { + subSearchers[searcherIDX] = new ShardSearcher(leaves[searcherIDX], ctx); + } + + docStarts = new int[subSearchers.Length]; + for (int subIDX = 0; subIDX < docStarts.Length; subIDX++) + { + docStarts[subIDX] = leaves[subIDX].DocBase; + //Console.WriteLine("docStarts[" + subIDX + "]=" + docStarts[subIDX]); + } + } + } + + [Test] + public void testRandom() + { + int numberOfRuns = TestUtil.NextInt(Random(), 3, 6); + for (int iter = 0; iter < numberOfRuns; iter++) + { + if (VERBOSE) + { + Console.WriteLine("TEST: iter=" + iter); + } + + int numDocs = TestUtil.NextInt(Random(), 100, 1000) * RANDOM_MULTIPLIER; + //final int numDocs = TestUtil.nextInt(random, 5, 20); + + int numGroups = TestUtil.NextInt(Random(), 1, numDocs); + + if (VERBOSE) + { + Console.WriteLine("TEST: numDocs=" + numDocs + " numGroups=" + numGroups); + } + + List groups = new List(); + for (int i = 0; i < numGroups; i++) + { + string randomValue; + do + { + // B/c of DV based impl we can't see the difference between an empty string and a null value. + // For that reason we don't generate empty string + // groups. + randomValue = TestUtil.RandomRealisticUnicodeString(Random()); + //randomValue = TestUtil.randomSimpleString(Random()); + } while ("".equals(randomValue)); + + groups.Add(new BytesRef(randomValue)); + } + string[] contentStrings = new string[TestUtil.NextInt(Random(), 2, 20)]; + if (VERBOSE) + { + Console.WriteLine("TEST: create fake content"); + } + for (int contentIDX = 0; contentIDX < contentStrings.Length; contentIDX++) + { + StringBuilder sb = new StringBuilder(); + sb.append("real").append(Random().nextInt(3)).append(' '); + int fakeCount = Random().nextInt(10); + for (int fakeIDX = 0; fakeIDX < fakeCount; fakeIDX++) + { + sb.append("fake "); + } + contentStrings[contentIDX] = sb.toString(); + if (VERBOSE) + { + Console.WriteLine(" content=" + sb.toString()); + } + } + + Directory dir = NewDirectory(); + RandomIndexWriter w = new RandomIndexWriter( + Random(), + dir, + NewIndexWriterConfig(TEST_VERSION_CURRENT, + new MockAnalyzer(Random()))); + bool preFlex = "Lucene3x".equals(w.w.Config.Codec.Name); + bool canUseIDV = !preFlex; + + Document doc = new Document(); + Document docNoGroup = new Document(); + Field idvGroupField = new SortedDocValuesField("group_dv", new BytesRef()); + if (canUseIDV) + { + doc.Add(idvGroupField); + docNoGroup.Add(idvGroupField); + } + + Field group = NewStringField("group", "", Field.Store.NO); + doc.Add(group); + Field sort1 = NewStringField("sort1", "", Field.Store.NO); + doc.Add(sort1); + docNoGroup.Add(sort1); + Field sort2 = NewStringField("sort2", "", Field.Store.NO); + doc.Add(sort2); + docNoGroup.Add(sort2); + Field content = NewTextField("content", "", Field.Store.NO); + doc.Add(content); + docNoGroup.Add(content); + IntField id = new IntField("id", 0, Field.Store.NO); + doc.Add(id); + docNoGroup.Add(id); + GroupDoc[] groupDocs = new GroupDoc[numDocs]; + for (int i = 0; i < numDocs; i++) + { + BytesRef groupValue; + if (Random().nextInt(24) == 17) + { + // So we test the "doc doesn't have the group'd + // field" case: + groupValue = null; + } + else + { + groupValue = groups[Random().nextInt(groups.size())]; + } + GroupDoc groupDoc = new GroupDoc(i, + groupValue, + groups[Random().nextInt(groups.size())], + groups[Random().nextInt(groups.size())], + contentStrings[Random().nextInt(contentStrings.Length)]); + if (VERBOSE) + { + Console.WriteLine(" doc content=" + groupDoc.content + " id=" + i + " group=" + (groupDoc.group == null ? "null" : groupDoc.group.Utf8ToString()) + " sort1=" + groupDoc.sort1.Utf8ToString() + " sort2=" + groupDoc.sort2.Utf8ToString()); + } + + groupDocs[i] = groupDoc; + if (groupDoc.group != null) + { + group.StringValue = (groupDoc.group.Utf8ToString()); + if (canUseIDV) + { + idvGroupField.BytesValue = (BytesRef.DeepCopyOf(groupDoc.group)); + } + } + else if (canUseIDV) + { + // Must explicitly set empty string, else eg if + // the segment has all docs missing the field then + // we get null back instead of empty BytesRef: + idvGroupField.BytesValue = (new BytesRef()); + } + sort1.StringValue = (groupDoc.sort1.Utf8ToString()); + sort2.StringValue = (groupDoc.sort2.Utf8ToString()); + content.StringValue = (groupDoc.content); + id.IntValue = (groupDoc.id); + if (groupDoc.group == null) + { + w.AddDocument(docNoGroup); + } + else + { + w.AddDocument(doc); + } + } + + GroupDoc[] groupDocsByID = new GroupDoc[groupDocs.Length]; + System.Array.Copy(groupDocs, 0, groupDocsByID, 0, groupDocs.Length); + + DirectoryReader r = w.Reader; + w.Dispose(); + + // NOTE: intentional but temporary field cache insanity! + FieldCache.Ints docIDToID = FieldCache.DEFAULT.GetInts(SlowCompositeReaderWrapper.Wrap(r), "id", false); + DirectoryReader rBlocks = null; + Directory dirBlocks = null; + + try + { + IndexSearcher s = NewSearcher(r); + if (VERBOSE) + { + Console.WriteLine("\nTEST: searcher=" + s); + } + + if (typeof(SlowCompositeReaderWrapper).IsAssignableFrom(s.IndexReader.GetType())) + { + canUseIDV = false; + } + else + { + canUseIDV = !preFlex; + } + ShardState shards = new ShardState(s); + + for (int contentID = 0; contentID < 3; contentID++) + { + ScoreDoc[] hits = s.Search(new TermQuery(new Index.Term("content", "real" + contentID)), numDocs).ScoreDocs; + foreach (ScoreDoc hit in hits) + { + GroupDoc gd = groupDocs[docIDToID.Get(hit.Doc)]; + assertTrue(gd.score == 0.0); + gd.score = hit.Score; + assertEquals(gd.id, docIDToID.Get(hit.Doc)); + } + } + + foreach (GroupDoc gd in groupDocs) + { + assertTrue(gd.score != 0.0); + } + + // Build 2nd index, where docs are added in blocks by + // group, so we can use single pass collector + dirBlocks = NewDirectory(); + rBlocks = getDocBlockReader(dirBlocks, groupDocs); + Filter lastDocInBlock = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("groupend", "x")))); + FieldCache.Ints docIDToIDBlocks = FieldCache.DEFAULT.GetInts(SlowCompositeReaderWrapper.Wrap(rBlocks), "id", false); + + IndexSearcher sBlocks = NewSearcher(rBlocks); + ShardState shardsBlocks = new ShardState(sBlocks); + + // ReaderBlocks only increases maxDoc() vs reader, which + // means a monotonic shift in scores, so we can + // reliably remap them w/ Map: + IDictionary> scoreMap = new Dictionary>(); + + // Tricky: must separately set .score2, because the doc + // block index was created with possible deletions! + //Console.WriteLine("fixup score2"); + for (int contentID = 0; contentID < 3; contentID++) + { + //Console.WriteLine(" term=real" + contentID); + IDictionary termScoreMap = new Dictionary(); + scoreMap.Put("real" + contentID, termScoreMap); + //Console.WriteLine("term=real" + contentID + " dfold=" + s.docFreq(new Term("content", "real"+contentID)) + + //" dfnew=" + sBlocks.docFreq(new Term("content", "real"+contentID))); + ScoreDoc[] hits = sBlocks.Search(new TermQuery(new Term("content", "real" + contentID)), numDocs).ScoreDocs; + foreach (ScoreDoc hit in hits) + { + GroupDoc gd = groupDocsByID[docIDToIDBlocks.Get(hit.Doc)]; + assertTrue(gd.score2 == 0.0); + gd.score2 = hit.Score; + assertEquals(gd.id, docIDToIDBlocks.Get(hit.Doc)); + //Console.WriteLine(" score=" + gd.score + " score2=" + hit.score + " id=" + docIDToIDBlocks.get(hit.doc)); + termScoreMap.Put(gd.score, gd.score2); + } + } + + for (int searchIter = 0; searchIter < 100; searchIter++) + { + + if (VERBOSE) + { + Console.WriteLine("\nTEST: searchIter=" + searchIter); + } + + string searchTerm = "real" + Random().nextInt(3); + bool fillFields = Random().nextBoolean(); + bool getScores = Random().nextBoolean(); + bool getMaxScores = Random().nextBoolean(); + Sort groupSort = getRandomSort(); + //final Sort groupSort = new Sort(new SortField[] {new SortField("sort1", SortField.STRING), new SortField("id", SortField.INT)}); + // TODO: also test null (= sort by relevance) + Sort docSort = getRandomSort(); + + foreach (SortField sf in docSort.GetSort()) + { + if (sf.Type == SortField.Type_e.SCORE) + { + getScores = true; + break; + } + } + + foreach (SortField sf in groupSort.GetSort()) + { + if (sf.Type == SortField.Type_e.SCORE) + { + getScores = true; + break; + } + } + + int topNGroups = TestUtil.NextInt(Random(), 1, 30); + // int topNGroups = 10; + int docsPerGroup = TestUtil.NextInt(Random(), 1, 50); + + int groupOffset = TestUtil.NextInt(Random(), 0, (topNGroups - 1) / 2); + // int groupOffset = 0; + + int docOffset = TestUtil.NextInt(Random(), 0, docsPerGroup - 1); + // int docOffset = 0; + + bool doCache = Random().nextBoolean(); + bool doAllGroups = Random().nextBoolean(); + if (VERBOSE) + { + Console.WriteLine("TEST: groupSort=" + groupSort + " docSort=" + docSort + " searchTerm=" + searchTerm + " dF=" + r.DocFreq(new Term("content", searchTerm)) + " dFBlock=" + rBlocks.DocFreq(new Term("content", searchTerm)) + " topNGroups=" + topNGroups + " groupOffset=" + groupOffset + " docOffset=" + docOffset + " doCache=" + doCache + " docsPerGroup=" + docsPerGroup + " doAllGroups=" + doAllGroups + " getScores=" + getScores + " getMaxScores=" + getMaxScores); + } + + string groupField = "group"; + if (canUseIDV && Random().nextBoolean()) + { + groupField += "_dv"; + } + if (VERBOSE) + { + Console.WriteLine(" groupField=" + groupField); + } + AbstractFirstPassGroupingCollector c1 = createRandomFirstPassCollector(groupField, groupSort, groupOffset + topNGroups); + CachingCollector cCache; + Collector c; + + AbstractAllGroupsCollector allGroupsCollector; + if (doAllGroups) + { + allGroupsCollector = createAllGroupsCollector(c1, groupField); + } + else + { + allGroupsCollector = null; + } + + bool useWrappingCollector = Random().nextBoolean(); + + if (doCache) + { + double maxCacheMB = Random().NextDouble(); + if (VERBOSE) + { + Console.WriteLine("TEST: maxCacheMB=" + maxCacheMB); + } + + if (useWrappingCollector) + { + if (doAllGroups) + { + cCache = CachingCollector.Create(c1, true, maxCacheMB); + c = MultiCollector.Wrap(cCache, allGroupsCollector); + } + else + { + c = cCache = CachingCollector.Create(c1, true, maxCacheMB); + } + } + else + { + // Collect only into cache, then replay multiple times: + c = cCache = CachingCollector.Create(false, true, maxCacheMB); + } + } + else + { + cCache = null; + if (doAllGroups) + { + c = MultiCollector.Wrap(c1, allGroupsCollector); + } + else + { + c = c1; + } + } + + // Search top reader: + Query query = new TermQuery(new Term("content", searchTerm)); + + s.Search(query, c); + + if (doCache && !useWrappingCollector) + { + if (cCache.Cached) + { + // Replay for first-pass grouping + cCache.Replay(c1); + if (doAllGroups) + { + // Replay for all groups: + cCache.Replay(allGroupsCollector); + } + } + else + { + // Replay by re-running search: + s.Search(query, c1); + if (doAllGroups) + { + s.Search(query, allGroupsCollector); + } + } + } + + // Get 1st pass top groups + ICollection> topGroups = getSearchGroups(c1, groupOffset, fillFields); + TopGroups groupsResult; + if (VERBOSE) + { + Console.WriteLine("TEST: first pass topGroups"); + if (topGroups == null) + { + Console.WriteLine(" null"); + } + else + { + foreach (SearchGroup searchGroup in topGroups) + { + Console.WriteLine(" " + (searchGroup.groupValue == null ? "null" : searchGroup.groupValue.Utf8ToString()) + ": " + Arrays.ToString(searchGroup.sortValues)); + } + } + } + + // Get 1st pass top groups using shards + + ValueHolder idvBasedImplsUsedSharded = new ValueHolder(false); + TopGroups topGroupsShards = searchShards(s, shards.subSearchers, query, groupSort, docSort, + groupOffset, topNGroups, docOffset, docsPerGroup, getScores, getMaxScores, canUseIDV, preFlex, idvBasedImplsUsedSharded); + AbstractSecondPassGroupingCollector c2; + if (topGroups != null) + { + + if (VERBOSE) + { + Console.WriteLine("TEST: topGroups"); + foreach (SearchGroup searchGroup in topGroups) + { + Console.WriteLine(" " + (searchGroup.groupValue == null ? "null" : searchGroup.groupValue.Utf8ToString()) + ": " + Arrays.ToString(searchGroup.sortValues)); + } + } + + c2 = createSecondPassCollector(c1, groupField, groupSort, docSort, groupOffset, docOffset + docsPerGroup, getScores, getMaxScores, fillFields); + if (doCache) + { + if (cCache.Cached) + { + if (VERBOSE) + { + Console.WriteLine("TEST: cache is intact"); + } + cCache.Replay(c2); + } + else + { + if (VERBOSE) + { + Console.WriteLine("TEST: cache was too large"); + } + s.Search(query, c2); + } + } + else + { + s.Search(query, c2); + } + + if (doAllGroups) + { + TopGroups tempTopGroups = getTopGroups(c2, docOffset); + groupsResult = new TopGroups(tempTopGroups, allGroupsCollector.getGroupCount()); + } + else + { + groupsResult = getTopGroups(c2, docOffset); + } + } + else + { + c2 = null; + groupsResult = null; + if (VERBOSE) + { + Console.WriteLine("TEST: no results"); + } + } + + TopGroups expectedGroups = slowGrouping(groupDocs, searchTerm, fillFields, getScores, getMaxScores, doAllGroups, groupSort, docSort, topNGroups, docsPerGroup, groupOffset, docOffset); + + if (VERBOSE) + { + if (expectedGroups == null) + { + Console.WriteLine("TEST: no expected groups"); + } + else + { + Console.WriteLine("TEST: expected groups totalGroupedHitCount=" + expectedGroups.TotalGroupedHitCount); + foreach (GroupDocs gd in expectedGroups.Groups) + { + Console.WriteLine(" group=" + (gd.GroupValue == null ? "null" : gd.GroupValue.Utf8ToString()) + " totalHits=" + gd.TotalHits + " scoreDocs.len=" + gd.ScoreDocs.Length); + foreach (ScoreDoc sd in gd.ScoreDocs) + { + Console.WriteLine(" id=" + sd.Doc + " score=" + sd.Score); + } + } + } + + if (groupsResult == null) + { + Console.WriteLine("TEST: no matched groups"); + } + else + { + Console.WriteLine("TEST: matched groups totalGroupedHitCount=" + groupsResult.TotalGroupedHitCount); + foreach (GroupDocs gd in groupsResult.Groups) + { + Console.WriteLine(" group=" + (gd.GroupValue == null ? "null" : gd.GroupValue.Utf8ToString()) + " totalHits=" + gd.TotalHits); + foreach (ScoreDoc sd in gd.ScoreDocs) + { + Console.WriteLine(" id=" + docIDToID.Get(sd.Doc) + " score=" + sd.Score); + } + } + + if (searchIter == 14) + { + for (int docIDX = 0; docIDX < s.IndexReader.MaxDoc; docIDX++) + { + Console.WriteLine("ID=" + docIDToID.Get(docIDX) + " explain=" + s.Explain(query, docIDX)); + } + } + } + + if (topGroupsShards == null) + { + Console.WriteLine("TEST: no matched-merged groups"); + } + else + { + Console.WriteLine("TEST: matched-merged groups totalGroupedHitCount=" + topGroupsShards.TotalGroupedHitCount); + foreach (GroupDocs gd in topGroupsShards.Groups) + { + Console.WriteLine(" group=" + (gd.GroupValue == null ? "null" : gd.GroupValue.Utf8ToString()) + " totalHits=" + gd.TotalHits); + foreach (ScoreDoc sd in gd.ScoreDocs) + { + Console.WriteLine(" id=" + docIDToID.Get(sd.Doc) + " score=" + sd.Score); + } + } + } + } + + assertEquals(docIDToID, expectedGroups, groupsResult, true, true, true, getScores, groupField.EndsWith("_dv")); + + // Confirm merged shards match: + assertEquals(docIDToID, expectedGroups, topGroupsShards, true, false, fillFields, getScores, idvBasedImplsUsedSharded.value); + if (topGroupsShards != null) + { + verifyShards(shards.docStarts, topGroupsShards); + } + + bool needsScores = getScores || getMaxScores || docSort == null; + BlockGroupingCollector c3 = new BlockGroupingCollector(groupSort, groupOffset + topNGroups, needsScores, lastDocInBlock); + TermAllGroupsCollector allGroupsCollector2; + Collector c4; + if (doAllGroups) + { + // NOTE: must be "group" and not "group_dv" + // (groupField) because we didn't index doc + // values in the block index: + allGroupsCollector2 = new TermAllGroupsCollector("group"); + c4 = MultiCollector.Wrap(c3, allGroupsCollector2); + } + else + { + allGroupsCollector2 = null; + c4 = c3; + } + // Get block grouping result: + sBlocks.Search(query, c4); + TopGroups tempTopGroupsBlocks = (TopGroups)c3.GetTopGroups(docSort, groupOffset, docOffset, docOffset + docsPerGroup, fillFields); + TopGroups groupsResultBlocks; + if (doAllGroups && tempTopGroupsBlocks != null) + { + assertEquals((int)tempTopGroupsBlocks.TotalGroupCount, allGroupsCollector2.GetGroupCount()); + groupsResultBlocks = new TopGroups(tempTopGroupsBlocks, allGroupsCollector2.GetGroupCount()); + } + else + { + groupsResultBlocks = tempTopGroupsBlocks; + } + + if (VERBOSE) + { + if (groupsResultBlocks == null) + { + Console.WriteLine("TEST: no block groups"); + } + else + { + Console.WriteLine("TEST: block groups totalGroupedHitCount=" + groupsResultBlocks.TotalGroupedHitCount); + bool first = true; + foreach (GroupDocs gd in groupsResultBlocks.Groups) + { + Console.WriteLine(" group=" + (gd.GroupValue == null ? "null" : gd.GroupValue.Utf8ToString()) + " totalHits=" + gd.TotalHits); + foreach (ScoreDoc sd in gd.ScoreDocs) + { + Console.WriteLine(" id=" + docIDToIDBlocks.Get(sd.Doc) + " score=" + sd.Score); + if (first) + { + Console.WriteLine("explain: " + sBlocks.Explain(query, sd.Doc)); + first = false; + } + } + } + } + } + + // Get shard'd block grouping result: + // Block index does not index DocValues so we pass + // false for canUseIDV: + TopGroups topGroupsBlockShards = searchShards(sBlocks, shardsBlocks.subSearchers, query, + groupSort, docSort, groupOffset, topNGroups, docOffset, docsPerGroup, getScores, getMaxScores, false, false, new ValueHolder(false)); + + if (expectedGroups != null) + { + // Fixup scores for reader2 + foreach (var groupDocsHits in expectedGroups.Groups) + { + foreach (ScoreDoc hit in groupDocsHits.ScoreDocs) + { + GroupDoc gd = groupDocsByID[hit.Doc]; + assertEquals(gd.id, hit.Doc); + //Console.WriteLine("fixup score " + hit.score + " to " + gd.score2 + " vs " + gd.score); + hit.Score = gd.score2; + } + } + + SortField[] sortFields = groupSort.GetSort(); + IDictionary termScoreMap = scoreMap[searchTerm]; + for (int groupSortIDX = 0; groupSortIDX < sortFields.Length; groupSortIDX++) + { + if (sortFields[groupSortIDX].Type == SortField.Type_e.SCORE) + { + foreach (var groupDocsHits in expectedGroups.Groups) + { + if (groupDocsHits.GroupSortValues != null) + { + //Console.WriteLine("remap " + groupDocsHits.groupSortValues[groupSortIDX] + " to " + termScoreMap.get(groupDocsHits.groupSortValues[groupSortIDX])); + groupDocsHits.GroupSortValues[groupSortIDX] = termScoreMap[groupDocsHits.GroupSortValues[groupSortIDX]]; + assertNotNull(groupDocsHits.GroupSortValues[groupSortIDX]); + } + } + } + } + + SortField[] docSortFields = docSort.GetSort(); + for (int docSortIDX = 0; docSortIDX < docSortFields.Length; docSortIDX++) + { + if (docSortFields[docSortIDX].Type == SortField.Type_e.SCORE) + { + foreach (var groupDocsHits in expectedGroups.Groups) + { + foreach (ScoreDoc _hit in groupDocsHits.ScoreDocs) + { + FieldDoc hit = (FieldDoc)_hit; + if (hit.Fields != null) + { + hit.Fields[docSortIDX] = termScoreMap[hit.Fields[docSortIDX]]; + assertNotNull(hit.Fields[docSortIDX]); + } + } + } + } + } + } + + assertEquals(docIDToIDBlocks, expectedGroups, groupsResultBlocks, false, true, true, getScores, false); + assertEquals(docIDToIDBlocks, expectedGroups, topGroupsBlockShards, false, false, fillFields, getScores, false); + } + } + finally + { + QueryUtils.PurgeFieldCache(r); + if (rBlocks != null) + { + QueryUtils.PurgeFieldCache(rBlocks); + } + } + + r.Dispose(); + dir.Dispose(); + + rBlocks.Dispose(); + dirBlocks.Dispose(); + } + } + + private void verifyShards(int[] docStarts, TopGroups topGroups) + { + foreach (var group in topGroups.Groups) + { + for (int hitIDX = 0; hitIDX < group.ScoreDocs.Length; hitIDX++) + { + ScoreDoc sd = group.ScoreDocs[hitIDX]; + assertEquals("doc=" + sd.Doc + " wrong shard", + ReaderUtil.SubIndex(sd.Doc, docStarts), + sd.ShardIndex); + } + } + } + + private TopGroups searchShards(IndexSearcher topSearcher, ShardSearcher[] subSearchers, Query query, Sort groupSort, Sort docSort, int groupOffset, int topNGroups, int docOffset, + int topNDocs, bool getScores, bool getMaxScores, bool canUseIDV, bool preFlex, ValueHolder usedIdvBasedImpl) + { + + // TODO: swap in caching, all groups collector hereassertEquals(expected.totalHitCount, actual.totalHitCount); + // too... + if (VERBOSE) + { + Console.WriteLine("TEST: " + subSearchers.Length + " shards: " + Arrays.ToString(subSearchers) + " canUseIDV=" + canUseIDV); + } + // Run 1st pass collector to get top groups per shard + Weight w = topSearcher.CreateNormalizedWeight(query); + List>> shardGroups = new List>>(); + List < AbstractFirstPassGroupingCollector > firstPassGroupingCollectors = new ArrayList<>(); + AbstractFirstPassGroupingCollector firstPassCollector = null; + bool shardsCanUseIDV; + if (canUseIDV) + { + if (typeof(SlowCompositeReaderWrapper).IsAssignableFrom(subSearchers[0].IndexReader.GetType())) + { + shardsCanUseIDV = false; + } + else + { + shardsCanUseIDV = !preFlex; + } + } + else + { + shardsCanUseIDV = false; + } + + String groupField = "group"; + if (shardsCanUseIDV && Random().nextBoolean()) + { + groupField += "_dv"; + usedIdvBasedImpl.value = true; + } + + for (int shardIDX = 0; shardIDX < subSearchers.Length; shardIDX++) + { + + // First shard determines whether we use IDV or not; + // all other shards match that: + if (firstPassCollector == null) + { + firstPassCollector = createRandomFirstPassCollector(groupField, groupSort, groupOffset + topNGroups); + } + else + { + firstPassCollector = createFirstPassCollector(groupField, groupSort, groupOffset + topNGroups, firstPassCollector); + } + if (VERBOSE) + { + Console.WriteLine(" shard=" + shardIDX + " groupField=" + groupField); + Console.WriteLine(" 1st pass collector=" + firstPassCollector); + } + firstPassGroupingCollectors.Add(firstPassCollector); + subSearchers[shardIDX].Search(w, firstPassCollector); + ICollection> topGroups = getSearchGroups(firstPassCollector, 0, true); + if (topGroups != null) + { + if (VERBOSE) + { + Console.WriteLine(" shard " + shardIDX + " s=" + subSearchers[shardIDX] + " totalGroupedHitCount=?" + " " + topGroups.size() + " groups:"); + foreach (SearchGroup group in topGroups) + { + Console.WriteLine(" " + groupToString(group.groupValue) + " groupSort=" + Arrays.ToString(group.sortValues)); + } + } + shardGroups.Add(topGroups); + } + } + + ICollection> mergedTopGroups = SearchGroup.Merge(shardGroups, groupOffset, topNGroups, groupSort); + if (VERBOSE) + { + Console.WriteLine(" top groups merged:"); + if (mergedTopGroups == null) + { + Console.WriteLine(" null"); + } + else + { + Console.WriteLine(" " + mergedTopGroups.size() + " top groups:"); + foreach (SearchGroup group in mergedTopGroups) + { + Console.WriteLine(" [" + groupToString(group.groupValue) + "] groupSort=" + Arrays.ToString(group.sortValues)); + } + } + } + + if (mergedTopGroups != null) + { + // Now 2nd pass: + TopGroups[] shardTopGroups = new TopGroups[subSearchers.Length]; + for (int shardIDX = 0; shardIDX < subSearchers.Length; shardIDX++) + { + AbstractSecondPassGroupingCollector secondPassCollector = createSecondPassCollector(firstPassGroupingCollectors.get(shardIDX), + groupField, mergedTopGroups, groupSort, docSort, docOffset + topNDocs, getScores, getMaxScores, true); + subSearchers[shardIDX].Search(w, secondPassCollector); + shardTopGroups[shardIDX] = getTopGroups(secondPassCollector, 0); + if (VERBOSE) + { + Console.WriteLine(" " + shardTopGroups[shardIDX].Groups.Length + " shard[" + shardIDX + "] groups:"); + foreach (GroupDocs group in shardTopGroups[shardIDX].Groups) + { + Console.WriteLine(" [" + groupToString(group.GroupValue) + "] groupSort=" + Arrays.ToString(group.GroupSortValues) + " numDocs=" + group.ScoreDocs.Length); + } + } + } + + // LUCENENET TODO: Put Merge() in a TopGroups class + TopGroups mergedGroups = TopGroups.Merge(shardTopGroups, groupSort, docSort, docOffset, topNDocs, TopGroups.ScoreMergeMode.None); + if (VERBOSE) + { + Console.WriteLine(" " + mergedGroups.Groups.Length + " merged groups:"); + foreach (GroupDocs group in mergedGroups.Groups) + { + Console.WriteLine(" [" + groupToString(group.GroupValue) + "] groupSort=" + Arrays.ToString(group.GroupSortValues) + " numDocs=" + group.ScoreDocs.Length); + } + } + return mergedGroups; + } + else + { + return null; + } + } + + private void assertEquals(FieldCache.Ints docIDtoID, TopGroups expected, TopGroups actual, bool verifyGroupValues, bool verifyTotalGroupCount, bool verifySortValues, bool testScores, bool idvBasedImplsUsed) + { + if (expected == null) + { + assertNull(actual); + return; + } + assertNotNull(actual); + + assertEquals("expected.groups.length != actual.groups.length", expected.Groups.Length, actual.Groups.Length); + assertEquals("expected.totalHitCount != actual.totalHitCount", expected.TotalHitCount, actual.TotalHitCount); + assertEquals("expected.totalGroupedHitCount != actual.totalGroupedHitCount", expected.TotalGroupedHitCount, actual.TotalGroupedHitCount); + if (expected.TotalGroupCount != null && verifyTotalGroupCount) + { + assertEquals("expected.totalGroupCount != actual.totalGroupCount", expected.TotalGroupCount, actual.TotalGroupCount); + } + + for (int groupIDX = 0; groupIDX < expected.Groups.Length; groupIDX++) + { + if (VERBOSE) + { + Console.WriteLine(" check groupIDX=" + groupIDX); + } + GroupDocs expectedGroup = expected.Groups[groupIDX]; + GroupDocs actualGroup = actual.Groups[groupIDX]; + if (verifyGroupValues) + { + if (idvBasedImplsUsed) + { + if (actualGroup.GroupValue.Length == 0) + { + assertNull(expectedGroup.GroupValue); + } + else + { + assertEquals(expectedGroup.GroupValue, actualGroup.GroupValue); + } + } + else + { + assertEquals(expectedGroup.GroupValue, actualGroup.GroupValue); + } + + } + if (verifySortValues) + { + assertArrayEquals(expectedGroup.GroupSortValues, actualGroup.GroupSortValues); + } + + // TODO + // assertEquals(expectedGroup.maxScore, actualGroup.maxScore); + assertEquals(expectedGroup.TotalHits, actualGroup.TotalHits); + + ScoreDoc[] expectedFDs = expectedGroup.ScoreDocs; + ScoreDoc[] actualFDs = actualGroup.ScoreDocs; + + assertEquals(expectedFDs.Length, actualFDs.Length); + for (int docIDX = 0; docIDX < expectedFDs.Length; docIDX++) + { + FieldDoc expectedFD = (FieldDoc)expectedFDs[docIDX]; + FieldDoc actualFD = (FieldDoc)actualFDs[docIDX]; + //Console.WriteLine(" actual doc=" + docIDtoID.get(actualFD.doc) + " score=" + actualFD.score); + assertEquals(expectedFD.Doc, docIDtoID.Get(actualFD.Doc)); + if (testScores) + { + assertEquals(expectedFD.Score, actualFD.Score, 0.1); + } + else + { + // TODO: too anal for now + //assertEquals(Float.NaN, actualFD.score); + } + if (verifySortValues) + { + assertArrayEquals(expectedFD.Fields, actualFD.Fields); + } + } + } + } + + internal class ShardSearcher : IndexSearcher + { + private readonly List ctx; + + public ShardSearcher(AtomicReaderContext ctx, IndexReaderContext parent) + : base(parent) + { + this.ctx = Collections.SingletonList(ctx); + } + + public void Search(Weight weight, Collector collector) + { + Search(ctx, weight, collector); + } + + public override string ToString() + { + return "ShardSearcher(" + ctx[0].Reader + ")"; + } + } + + internal class ValueHolder + { + + internal V value; + + internal ValueHolder(V value) + { + this.value = value; + } + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9d72bcb3/src/Lucene.Net.Tests.Grouping/packages.config ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests.Grouping/packages.config b/src/Lucene.Net.Tests.Grouping/packages.config new file mode 100644 index 0000000..139d513 --- /dev/null +++ b/src/Lucene.Net.Tests.Grouping/packages.config @@ -0,0 +1,4 @@ + + + + \ No newline at end of file