Return-Path: X-Original-To: apmail-lucenenet-commits-archive@www.apache.org Delivered-To: apmail-lucenenet-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id C5C2618881 for ; Sun, 23 Aug 2015 22:34:01 +0000 (UTC) Received: (qmail 92725 invoked by uid 500); 23 Aug 2015 22:34:01 -0000 Delivered-To: apmail-lucenenet-commits-archive@lucenenet.apache.org Received: (qmail 92623 invoked by uid 500); 23 Aug 2015 22:34:01 -0000 Mailing-List: contact commits-help@lucenenet.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: lucene-net-dev@lucenenet.apache.org Delivered-To: mailing list commits@lucenenet.apache.org Received: (qmail 91849 invoked by uid 99); 23 Aug 2015 22:34:01 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 23 Aug 2015 22:34:01 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 031CAE1091; Sun, 23 Aug 2015 22:34:01 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit From: synhershko@apache.org To: commits@lucenenet.apache.org Date: Sun, 23 Aug 2015 22:34:15 -0000 Message-Id: In-Reply-To: References: X-Mailer: ASF-Git Admin Mailer Subject: [16/17] lucenenet git commit: Lucene.Net.Join tests now passing Lucene.Net.Join tests now passing Moved the Join/Grouping projects into the src folder and updated the sln path mapping accordingly. Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/4820f236 Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/4820f236 Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/4820f236 Branch: refs/heads/master Commit: 4820f236b2444636452012c42e2450e408720335 Parents: 0213f53 Author: Josh Sullivan Authored: Sun Aug 23 00:27:54 2015 -0400 Committer: Josh Sullivan Committed: Sun Aug 23 00:27:54 2015 -0400 ---------------------------------------------------------------------- Lucene.Net.Grouping/GroupDocs.cs | 71 - Lucene.Net.Grouping/Lucene.Net.Grouping.csproj | 61 - Lucene.Net.Grouping/Properties/AssemblyInfo.cs | 36 - Lucene.Net.Grouping/TopGroups.cs | 249 --- Lucene.Net.Join/FakeScorer.cs | 76 - .../FixedBitSetCachingWrapperFilter.cs | 62 - Lucene.Net.Join/JoinUtil.cs | 80 - Lucene.Net.Join/Lucene.Net.Join.csproj | 76 - Lucene.Net.Join/Properties/AssemblyInfo.cs | 36 - Lucene.Net.Join/ScoreMode.cs | 45 - Lucene.Net.Join/TermsCollector.cs | 127 -- Lucene.Net.Join/TermsIncludingScoreQuery.cs | 472 ------ Lucene.Net.Join/TermsQuery.cs | 147 -- Lucene.Net.Join/TermsWithScoreCollector.cs | 333 ---- Lucene.Net.Join/ToChildBlockJoinQuery.cs | 396 ----- Lucene.Net.Join/ToParentBlockJoinCollector.cs | 560 ------ .../ToParentBlockJoinFieldComparator.cs | 393 ----- Lucene.Net.Join/ToParentBlockJoinQuery.cs | 516 ------ Lucene.Net.Join/ToParentBlockJoinSortField.cs | 78 - .../Lucene.Net.Tests.Join.csproj | 86 - .../Properties/AssemblyInfo.cs | 36 - Lucene.Net.Tests.Join/TestBlockJoin.cs | 1599 ------------------ Lucene.Net.Tests.Join/TestBlockJoinSorting.cs | 277 --- .../TestBlockJoinValidation.cs | 227 --- Lucene.Net.Tests.Join/TestJoinUtil.cs | 1165 ------------- Lucene.Net.Tests.Join/packages.config | 5 - Lucene.Net.sln | 6 +- src/Lucene.Net.Grouping/GroupDocs.cs | 71 + .../Lucene.Net.Grouping.csproj | 61 + .../Properties/AssemblyInfo.cs | 36 + src/Lucene.Net.Grouping/TopGroups.cs | 249 +++ src/Lucene.Net.Join/FakeScorer.cs | 76 + .../FixedBitSetCachingWrapperFilter.cs | 62 + src/Lucene.Net.Join/JoinUtil.cs | 80 + src/Lucene.Net.Join/Lucene.Net.Join.csproj | 76 + src/Lucene.Net.Join/Properties/AssemblyInfo.cs | 36 + src/Lucene.Net.Join/ScoreMode.cs | 45 + src/Lucene.Net.Join/TermsCollector.cs | 127 ++ src/Lucene.Net.Join/TermsIncludingScoreQuery.cs | 472 ++++++ src/Lucene.Net.Join/TermsQuery.cs | 147 ++ src/Lucene.Net.Join/TermsWithScoreCollector.cs | 333 ++++ src/Lucene.Net.Join/ToChildBlockJoinQuery.cs | 396 +++++ .../ToParentBlockJoinCollector.cs | 578 +++++++ .../ToParentBlockJoinFieldComparator.cs | 393 +++++ src/Lucene.Net.Join/ToParentBlockJoinQuery.cs | 516 ++++++ .../ToParentBlockJoinSortField.cs | 78 + .../Util/LuceneTestCase.cs | 2 +- .../Lucene.Net.Tests.Join.csproj | 86 + .../Properties/AssemblyInfo.cs | 36 + src/Lucene.Net.Tests.Join/TestBlockJoin.cs | 1591 +++++++++++++++++ .../TestBlockJoinSorting.cs | 277 +++ .../TestBlockJoinValidation.cs | 227 +++ src/Lucene.Net.Tests.Join/TestJoinUtil.cs | 1165 +++++++++++++ src/Lucene.Net.Tests.Join/packages.config | 5 + 54 files changed, 7223 insertions(+), 7213 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/Lucene.Net.Grouping/GroupDocs.cs ---------------------------------------------------------------------- diff --git a/Lucene.Net.Grouping/GroupDocs.cs b/Lucene.Net.Grouping/GroupDocs.cs deleted file mode 100644 index 00cdf83..0000000 --- a/Lucene.Net.Grouping/GroupDocs.cs +++ /dev/null @@ -1,71 +0,0 @@ -using Lucene.Net.Search; - -namespace Lucene.Net.Grouping -{ - /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - /// - /// Represents one group in the results. - /// - /// @lucene.experimental - /// - public class GroupDocs - { - /// - /// The groupField value for all docs in this group; this - /// may be null if hits did not have the groupField. - /// - public readonly TGroupValueType GroupValue; - - /// - /// Max score in this group - /// - public readonly float MaxScore; - - /// - /// Overall aggregated score of this group (currently only set by join queries). - /// - public readonly float Score; - - /// - /// Hits; this may be {@link org.apache.lucene.search.FieldDoc} instances if the - /// withinGroupSort sorted by fields. - /// - public readonly ScoreDoc[] ScoreDocs; - - /// - /// Total hits within this group - /// - public readonly int TotalHits; - - /// - /// Matches the groupSort passed to {@link AbstractFirstPassGroupingCollector}. - /// - public readonly object[] GroupSortValues; - - public GroupDocs(float score, float maxScore, int totalHits, ScoreDoc[] scoreDocs, TGroupValueType groupValue, object[] groupSortValues) - { - Score = score; - MaxScore = maxScore; - TotalHits = totalHits; - ScoreDocs = scoreDocs; - GroupValue = groupValue; - GroupSortValues = groupSortValues; - } - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/Lucene.Net.Grouping/Lucene.Net.Grouping.csproj ---------------------------------------------------------------------- diff --git a/Lucene.Net.Grouping/Lucene.Net.Grouping.csproj b/Lucene.Net.Grouping/Lucene.Net.Grouping.csproj deleted file mode 100644 index 540b438..0000000 --- a/Lucene.Net.Grouping/Lucene.Net.Grouping.csproj +++ /dev/null @@ -1,61 +0,0 @@ - - - - - Debug - AnyCPU - {02BAB603-067D-48B1-AEDD-316849652568} - Library - Properties - Lucene.Net.Grouping - Lucene.Net.Grouping - v4.5.1 - 512 - - - true - full - false - bin\Debug\ - DEBUG;TRACE - prompt - 4 - - - pdbonly - true - bin\Release\ - TRACE - prompt - 4 - - - - - - - - - - - - - - - - - - - {5D4AD9BE-1FFB-41AB-9943-25737971BF57} - Lucene.Net - - - - - \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/Lucene.Net.Grouping/Properties/AssemblyInfo.cs ---------------------------------------------------------------------- diff --git a/Lucene.Net.Grouping/Properties/AssemblyInfo.cs b/Lucene.Net.Grouping/Properties/AssemblyInfo.cs deleted file mode 100644 index 9e6c1ce..0000000 --- a/Lucene.Net.Grouping/Properties/AssemblyInfo.cs +++ /dev/null @@ -1,36 +0,0 @@ -using System.Reflection; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; - -// General Information about an assembly is controlled through the following -// set of attributes. Change these attribute values to modify the information -// associated with an assembly. -[assembly: AssemblyTitle("Lucene.Net.Grouping")] -[assembly: AssemblyDescription("")] -[assembly: AssemblyConfiguration("")] -[assembly: AssemblyCompany("")] -[assembly: AssemblyProduct("Lucene.Net.Grouping")] -[assembly: AssemblyCopyright("Copyright © 2015")] -[assembly: AssemblyTrademark("")] -[assembly: AssemblyCulture("")] - -// Setting ComVisible to false makes the types in this assembly not visible -// to COM components. If you need to access a type in this assembly from -// COM, set the ComVisible attribute to true on that type. -[assembly: ComVisible(false)] - -// The following GUID is for the ID of the typelib if this project is exposed to COM -[assembly: Guid("02bab603-067d-48b1-aedd-316849652568")] - -// Version information for an assembly consists of the following four values: -// -// Major Version -// Minor Version -// Build Number -// Revision -// -// You can specify all the values or you can default the Build and Revision Numbers -// by using the '*' as shown below: -// [assembly: AssemblyVersion("1.0.*")] -[assembly: AssemblyVersion("1.0.0.0")] -[assembly: AssemblyFileVersion("1.0.0.0")] http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/Lucene.Net.Grouping/TopGroups.cs ---------------------------------------------------------------------- diff --git a/Lucene.Net.Grouping/TopGroups.cs b/Lucene.Net.Grouping/TopGroups.cs deleted file mode 100644 index 017c975..0000000 --- a/Lucene.Net.Grouping/TopGroups.cs +++ /dev/null @@ -1,249 +0,0 @@ -using System; -using Lucene.Net.Search; - -namespace Lucene.Net.Grouping -{ - /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - /// - /// Represents result returned by a grouping search. - /// - /// @lucene.experimental - /// - public class TopGroups - { - /// - /// Number of documents matching the search - public readonly int TotalHitCount; - - /// - /// Number of documents grouped into the topN groups - public readonly int TotalGroupedHitCount; - - /// - /// The total number of unique groups. If null this value is not computed. - public readonly int? TotalGroupCount; - - /// - /// Group results in groupSort order - public readonly GroupDocs[] Groups; - - /// - /// How groups are sorted against each other - public readonly SortField[] GroupSort; - - /// - /// How docs are sorted within each group - public readonly SortField[] WithinGroupSort; - - /// - /// Highest score across all hits, or - /// Float.NaN if scores were not computed. - /// - public readonly float MaxScore; - - public TopGroups(SortField[] groupSort, SortField[] withinGroupSort, int totalHitCount, int totalGroupedHitCount, GroupDocs[] groups, float maxScore) - { - GroupSort = groupSort; - WithinGroupSort = withinGroupSort; - TotalHitCount = totalHitCount; - TotalGroupedHitCount = totalGroupedHitCount; - Groups = groups; - TotalGroupCount = null; - MaxScore = maxScore; - } - - public TopGroups(TopGroups oldTopGroups, int? totalGroupCount) - { - GroupSort = oldTopGroups.GroupSort; - WithinGroupSort = oldTopGroups.WithinGroupSort; - TotalHitCount = oldTopGroups.TotalHitCount; - TotalGroupedHitCount = oldTopGroups.TotalGroupedHitCount; - Groups = oldTopGroups.Groups; - MaxScore = oldTopGroups.MaxScore; - TotalGroupCount = totalGroupCount; - } - - /// - /// How the GroupDocs score (if any) should be merged. - public enum ScoreMergeMode - { - /// - /// Set score to Float.NaN - /// - None, - - /// - /// Sum score across all shards for this group. - /// - Total, - - /// - /// Avg score across all shards for this group. - /// - Avg, - } - - /// - /// Merges an array of TopGroups, for example obtained from the second-pass - /// collector across multiple shards. Each TopGroups must have been sorted by the - /// same groupSort and docSort, and the top groups passed to all second-pass - /// collectors must be the same. - /// - /// NOTE: We can't always compute an exact totalGroupCount. - /// Documents belonging to a group may occur on more than - /// one shard and thus the merged totalGroupCount can be - /// higher than the actual totalGroupCount. In this case the - /// totalGroupCount represents a upper bound. If the documents - /// of one group do only reside in one shard then the - /// totalGroupCount is exact. - /// - /// NOTE: the topDocs in each GroupDocs is actually - /// an instance of TopDocsAndShards - /// - public static TopGroups Merge(TopGroups[] shardGroups, Sort groupSort, Sort docSort, int docOffset, int docTopN, ScoreMergeMode scoreMergeMode) - { - //System.out.println("TopGroups.merge"); - - if (shardGroups.Length == 0) - { - return null; - } - - int totalHitCount = 0; - int totalGroupedHitCount = 0; - // Optionally merge the totalGroupCount. - int? totalGroupCount = null; - - int numGroups = shardGroups[0].Groups.Length; - foreach (var shard in shardGroups) - { - if (numGroups != shard.Groups.Length) - { - throw new ArgumentException("number of groups differs across shards; you must pass same top groups to all shards' second-pass collector"); - } - totalHitCount += shard.TotalHitCount; - totalGroupedHitCount += shard.TotalGroupedHitCount; - if (shard.TotalGroupCount != null) - { - if (totalGroupCount == null) - { - totalGroupCount = 0; - } - - totalGroupCount += shard.TotalGroupCount; - } - } - - var mergedGroupDocs = new GroupDocs[numGroups]; - - TopDocs[] shardTopDocs = new TopDocs[shardGroups.Length]; - float totalMaxScore = float.MinValue; - - for (int groupIDX = 0; groupIDX < numGroups; groupIDX++) - { - T groupValue = shardGroups[0].Groups[groupIDX].GroupValue; - //System.out.println(" merge groupValue=" + groupValue + " sortValues=" + Arrays.toString(shardGroups[0].groups[groupIDX].groupSortValues)); - float maxScore = float.MinValue; - int totalHits = 0; - double scoreSum = 0.0; - for (int shardIdx = 0; shardIdx < shardGroups.Length; shardIdx++) - { - //System.out.println(" shard=" + shardIDX); - TopGroups shard = shardGroups[shardIdx]; - var shardGroupDocs = shard.Groups[groupIDX]; - if (groupValue == null) - { - if (shardGroupDocs.GroupValue != null) - { - throw new ArgumentException("group values differ across shards; you must pass same top groups to all shards' second-pass collector"); - } - } - else if (!groupValue.Equals(shardGroupDocs.GroupValue)) - { - throw new ArgumentException("group values differ across shards; you must pass same top groups to all shards' second-pass collector"); - } - - /* - for(ScoreDoc sd : shardGroupDocs.scoreDocs) { - System.out.println(" doc=" + sd.doc); - } - */ - - shardTopDocs[shardIdx] = new TopDocs(shardGroupDocs.TotalHits, shardGroupDocs.ScoreDocs, shardGroupDocs.MaxScore); - maxScore = Math.Max(maxScore, shardGroupDocs.MaxScore); - totalHits += shardGroupDocs.TotalHits; - scoreSum += shardGroupDocs.Score; - } - - TopDocs mergedTopDocs = TopDocs.Merge(docSort, docOffset + docTopN, shardTopDocs); - - // Slice; - ScoreDoc[] mergedScoreDocs; - if (docOffset == 0) - { - mergedScoreDocs = mergedTopDocs.ScoreDocs; - } - else if (docOffset >= mergedTopDocs.ScoreDocs.Length) - { - mergedScoreDocs = new ScoreDoc[0]; - } - else - { - mergedScoreDocs = new ScoreDoc[mergedTopDocs.ScoreDocs.Length - docOffset]; - Array.Copy(mergedTopDocs.ScoreDocs, docOffset, mergedScoreDocs, 0, mergedTopDocs.ScoreDocs.Length - docOffset); - } - - float groupScore; - switch (scoreMergeMode) - { - case ScoreMergeMode.None: - groupScore = float.NaN; - break; - case ScoreMergeMode.Avg: - if (totalHits > 0) - { - groupScore = (float)(scoreSum / totalHits); - } - else - { - groupScore = float.NaN; - } - break; - case ScoreMergeMode.Total: - groupScore = (float)scoreSum; - break; - default: - throw new ArgumentException("can't handle ScoreMergeMode " + scoreMergeMode); - } - - //System.out.println("SHARDS=" + Arrays.toString(mergedTopDocs.shardIndex)); - mergedGroupDocs[groupIDX] = new GroupDocs(groupScore, maxScore, totalHits, mergedScoreDocs, groupValue, shardGroups[0].Groups[groupIDX].GroupSortValues); - totalMaxScore = Math.Max(totalMaxScore, maxScore); - } - - if (totalGroupCount != null) - { - var result = new TopGroups(groupSort.GetSort(), docSort == null ? null : docSort.GetSort(), totalHitCount, totalGroupedHitCount, mergedGroupDocs, totalMaxScore); - return new TopGroups(result, totalGroupCount); - } - - return new TopGroups(groupSort.GetSort(), docSort == null ? null : docSort.GetSort(), totalHitCount, totalGroupedHitCount, mergedGroupDocs, totalMaxScore); - } - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/Lucene.Net.Join/FakeScorer.cs ---------------------------------------------------------------------- diff --git a/Lucene.Net.Join/FakeScorer.cs b/Lucene.Net.Join/FakeScorer.cs deleted file mode 100644 index 42bf91b..0000000 --- a/Lucene.Net.Join/FakeScorer.cs +++ /dev/null @@ -1,76 +0,0 @@ -using System; -using System.Collections.Generic; -using Lucene.Net.Search; - -namespace Lucene.Net.Join -{ - /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - /// - /// Passed to during join collection. - /// - internal sealed class FakeScorer : Scorer - { - internal float _score; - internal int doc = -1; - - public FakeScorer() : base(null) - { - } - - public override int DocID() - { - return doc; - } - - public override int NextDoc() - { - throw new NotSupportedException("FakeScorer doesn't support NextDoc()"); - } - - public override int Advance(int target) - { - throw new NotSupportedException("FakeScorer doesn't support Advance(int)"); - } - - public override long Cost() - { - return 1; - } - - public override int Freq() - { - throw new NotSupportedException("FakeScorer doesn't support Freq()"); - } - - public override float Score() - { - return _score; - } - - public override Weight Weight - { - get { throw new NotSupportedException(); } - } - - public override ICollection Children - { - get { throw new NotSupportedException(); } - } - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/Lucene.Net.Join/FixedBitSetCachingWrapperFilter.cs ---------------------------------------------------------------------- diff --git a/Lucene.Net.Join/FixedBitSetCachingWrapperFilter.cs b/Lucene.Net.Join/FixedBitSetCachingWrapperFilter.cs deleted file mode 100644 index da8b0b8..0000000 --- a/Lucene.Net.Join/FixedBitSetCachingWrapperFilter.cs +++ /dev/null @@ -1,62 +0,0 @@ -using Lucene.Net.Index; -using Lucene.Net.Search; -using Lucene.Net.Util; - -namespace Lucene.Net.Join -{ - /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - /// - /// A that caches sets using a , - /// as required for joins. - /// - public sealed class FixedBitSetCachingWrapperFilter : CachingWrapperFilter - { - /// - /// Sole constructor, see . - /// - public FixedBitSetCachingWrapperFilter(Filter filter) : base(filter) - { - } - - protected override DocIdSet DocIdSetToCache(DocIdSet docIdSet, AtomicReader reader) - { - if (docIdSet == null) - { - return EMPTY_DOCIDSET; - } - - if (docIdSet is FixedBitSet) - { - // this is different from CachingWrapperFilter: even when the DocIdSet is - // cacheable, we convert it to a FixedBitSet since we require all the - // cached filters to be FixedBitSets - return docIdSet; - } - - DocIdSetIterator it = docIdSet.GetIterator(); - if (it == null) - { - return EMPTY_DOCIDSET; - } - FixedBitSet copy = new FixedBitSet(reader.MaxDoc); - copy.Or(it); - return copy; - } - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/Lucene.Net.Join/JoinUtil.cs ---------------------------------------------------------------------- diff --git a/Lucene.Net.Join/JoinUtil.cs b/Lucene.Net.Join/JoinUtil.cs deleted file mode 100644 index 726731e..0000000 --- a/Lucene.Net.Join/JoinUtil.cs +++ /dev/null @@ -1,80 +0,0 @@ -using System.IO; -using Lucene.Net.Search; - -namespace Lucene.Net.Join -{ - /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - - /// - /// Utility for query time joining using TermsQuery and TermsCollector. - /// - /// @lucene.experimental - /// - public sealed class JoinUtil - { - // No instances allowed - private JoinUtil() - { - } - - /// - /// Method for query time joining. - ///

- /// Execute the returned query with a to retrieve all documents that have the same terms in the - /// to field that match with documents matching the specified fromQuery and have the same terms in the from field. - ///

- /// In the case a single document relates to more than one document the multipleValuesPerDocument option - /// should be set to true. When the multipleValuesPerDocument is set to true only the - /// the score from the first encountered join value originating from the 'from' side is mapped into the 'to' side. - /// Even in the case when a second join value related to a specific document yields a higher score. Obviously this - /// doesn't apply in the case that is used, since no scores are computed at all. - ///

- /// Memory considerations: During joining all unique join values are kept in memory. On top of that when the scoreMode - /// isn't set to a float value per unique join value is kept in memory for computing scores. - /// When scoreMode is set to also an additional integer value is kept in memory per unique - /// join value. - ///
- /// The from field to join from - /// Whether the from field has multiple terms per document - /// The to field to join to - /// The query to match documents on the from side - /// The searcher that executed the specified fromQuery - /// Instructs how scores from the fromQuery are mapped to the returned query - /// A instance that can be used to join documents based on the terms in the from and to field - /// If I/O related errors occur - public static Query CreateJoinQuery(string fromField, bool multipleValuesPerDocument, string toField, Query fromQuery, IndexSearcher fromSearcher, ScoreMode scoreMode) - { - switch (scoreMode) - { - case ScoreMode.None: - TermsCollector termsCollector = TermsCollector.Create(fromField, multipleValuesPerDocument); - fromSearcher.Search(fromQuery, termsCollector); - return new TermsQuery(toField, fromQuery, termsCollector.CollectorTerms); - case ScoreMode.Total: - case ScoreMode.Max: - case ScoreMode.Avg: - TermsWithScoreCollector termsWithScoreCollector = TermsWithScoreCollector.Create(fromField, multipleValuesPerDocument, scoreMode); - fromSearcher.Search(fromQuery, termsWithScoreCollector); - return new TermsIncludingScoreQuery(toField, multipleValuesPerDocument, termsWithScoreCollector.CollectedTerms, termsWithScoreCollector.ScoresPerTerm, fromQuery); - default: - throw new System.ArgumentException(string.Format("Score mode {0} isn't supported.", scoreMode)); - } - } - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/Lucene.Net.Join/Lucene.Net.Join.csproj ---------------------------------------------------------------------- diff --git a/Lucene.Net.Join/Lucene.Net.Join.csproj b/Lucene.Net.Join/Lucene.Net.Join.csproj deleted file mode 100644 index 2222b0e..0000000 --- a/Lucene.Net.Join/Lucene.Net.Join.csproj +++ /dev/null @@ -1,76 +0,0 @@ - - - - - Debug - AnyCPU - {E8A339C7-FCF6-4A72-8586-56D8961D7B99} - Library - Properties - Lucene.Net.Join - Lucene.Net.Join - v4.5.1 - 512 - - - true - full - false - bin\Debug\ - DEBUG;TRACE - prompt - 4 - - - pdbonly - true - bin\Release\ - TRACE - prompt - 4 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - {02BAB603-067D-48B1-AEDD-316849652568} - Lucene.Net.Grouping - - - {5D4AD9BE-1FFB-41AB-9943-25737971BF57} - Lucene.Net - - - - - \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/Lucene.Net.Join/Properties/AssemblyInfo.cs ---------------------------------------------------------------------- diff --git a/Lucene.Net.Join/Properties/AssemblyInfo.cs b/Lucene.Net.Join/Properties/AssemblyInfo.cs deleted file mode 100644 index 2c17c13..0000000 --- a/Lucene.Net.Join/Properties/AssemblyInfo.cs +++ /dev/null @@ -1,36 +0,0 @@ -using System.Reflection; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; - -// General Information about an assembly is controlled through the following -// set of attributes. Change these attribute values to modify the information -// associated with an assembly. -[assembly: AssemblyTitle("Lucene.Net.Join")] -[assembly: AssemblyDescription("")] -[assembly: AssemblyConfiguration("")] -[assembly: AssemblyCompany("")] -[assembly: AssemblyProduct("Lucene.Net.Join")] -[assembly: AssemblyCopyright("Copyright © 2015")] -[assembly: AssemblyTrademark("")] -[assembly: AssemblyCulture("")] - -// Setting ComVisible to false makes the types in this assembly not visible -// to COM components. If you need to access a type in this assembly from -// COM, set the ComVisible attribute to true on that type. -[assembly: ComVisible(false)] - -// The following GUID is for the ID of the typelib if this project is exposed to COM -[assembly: Guid("e8a339c7-fcf6-4a72-8586-56d8961d7b99")] - -// Version information for an assembly consists of the following four values: -// -// Major Version -// Minor Version -// Build Number -// Revision -// -// You can specify all the values or you can default the Build and Revision Numbers -// by using the '*' as shown below: -// [assembly: AssemblyVersion("1.0.*")] -[assembly: AssemblyVersion("1.0.0.0")] -[assembly: AssemblyFileVersion("1.0.0.0")] http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/Lucene.Net.Join/ScoreMode.cs ---------------------------------------------------------------------- diff --git a/Lucene.Net.Join/ScoreMode.cs b/Lucene.Net.Join/ScoreMode.cs deleted file mode 100644 index a5b91be..0000000 --- a/Lucene.Net.Join/ScoreMode.cs +++ /dev/null @@ -1,45 +0,0 @@ -namespace Lucene.Net.Join -{ - /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - /// - /// How to aggregate multiple child hit scores into a single parent score. - /// - public enum ScoreMode - { - /// - /// Do no scoring. - /// - None, - - /// - /// Parent hit's score is the average of all child scores. - /// - Avg, - - /// - /// Parent hit's score is the max of all child scores. - /// - Max, - - /// - /// Parent hit's score is the sum of all child scores. - /// - Total - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/Lucene.Net.Join/TermsCollector.cs ---------------------------------------------------------------------- diff --git a/Lucene.Net.Join/TermsCollector.cs b/Lucene.Net.Join/TermsCollector.cs deleted file mode 100644 index 2ccf1ed..0000000 --- a/Lucene.Net.Join/TermsCollector.cs +++ /dev/null @@ -1,127 +0,0 @@ -using Lucene.Net.Index; -using Lucene.Net.Search; -using Lucene.Net.Util; - -namespace Lucene.Net.Join -{ - /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - /// - /// A collector that collects all terms from a specified field matching the query. - /// - /// @lucene.experimental - /// - internal abstract class TermsCollector : Collector - { - private readonly string _field; - private readonly BytesRefHash _collectorTerms = new BytesRefHash(); - - internal TermsCollector(string field) - { - _field = field; - } - - public BytesRefHash CollectorTerms - { - get - { - return _collectorTerms; - } - } - - public override Scorer Scorer - { - set {} - } - - public override bool AcceptsDocsOutOfOrder() - { - return true; - } - - /// - /// Chooses the right implementation. - /// - /// The field to collect terms for. - /// Whether the field to collect terms for has multiple values per document. - /// A instance. - internal static TermsCollector Create(string field, bool multipleValuesPerDocument) - { - return multipleValuesPerDocument ? (TermsCollector) new MV(field) : new SV(field); - } - - // impl that works with multiple values per document - private class MV : TermsCollector - { - private readonly BytesRef _scratch = new BytesRef(); - private SortedSetDocValues _docTermOrds; - - internal MV(string field) : base(field) - { - } - - public override void Collect(int doc) - { - _docTermOrds.Document = doc; - long ord; - while ((ord = _docTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) - { - _docTermOrds.LookupOrd(ord, _scratch); - _collectorTerms.Add(_scratch); - } - } - - public override AtomicReaderContext NextReader - { - set { _docTermOrds = FieldCache.DEFAULT.GetDocTermOrds(value.AtomicReader, _field); } - } - - public override bool AcceptsDocsOutOfOrder() - { - throw new System.NotImplementedException(); - } - } - - // impl that works with single value per document - private class SV : TermsCollector - { - private readonly BytesRef _spare = new BytesRef(); - private BinaryDocValues _fromDocTerms; - - internal SV(string field) : base(field) - { - } - - public override void Collect(int doc) - { - _fromDocTerms.Get(doc, _spare); - _collectorTerms.Add(_spare); - } - - public override AtomicReaderContext NextReader - { - set { _fromDocTerms = FieldCache.DEFAULT.GetTerms(value.AtomicReader, _field, false); } - } - - public override bool AcceptsDocsOutOfOrder() - { - return base.AcceptsDocsOutOfOrder(); - } - } - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/Lucene.Net.Join/TermsIncludingScoreQuery.cs ---------------------------------------------------------------------- diff --git a/Lucene.Net.Join/TermsIncludingScoreQuery.cs b/Lucene.Net.Join/TermsIncludingScoreQuery.cs deleted file mode 100644 index 9f3befc..0000000 --- a/Lucene.Net.Join/TermsIncludingScoreQuery.cs +++ /dev/null @@ -1,472 +0,0 @@ -using System.Collections.Generic; -using Lucene.Net.Index; -using Lucene.Net.Search; -using Lucene.Net.Util; - -namespace Lucene.Net.Join -{ - /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - internal class TermsIncludingScoreQuery : Query - { - private readonly string _field; - private readonly bool _multipleValuesPerDocument; - private readonly BytesRefHash _terms; - private readonly float[] _scores; - private readonly int[] _ords; - private readonly Query _originalQuery; - private readonly Query _unwrittenOriginalQuery; - - internal TermsIncludingScoreQuery(string field, bool multipleValuesPerDocument, BytesRefHash terms, - float[] scores, Query originalQuery) - { - _field = field; - _multipleValuesPerDocument = multipleValuesPerDocument; - _terms = terms; - _scores = scores; - _originalQuery = originalQuery; - _ords = terms.Sort(BytesRef.UTF8SortedAsUnicodeComparer); - _unwrittenOriginalQuery = originalQuery; - } - - private TermsIncludingScoreQuery(string field, bool multipleValuesPerDocument, BytesRefHash terms, - float[] scores, int[] ords, Query originalQuery, Query unwrittenOriginalQuery) - { - _field = field; - _multipleValuesPerDocument = multipleValuesPerDocument; - _terms = terms; - _scores = scores; - _originalQuery = originalQuery; - _ords = ords; - _unwrittenOriginalQuery = unwrittenOriginalQuery; - } - - public override string ToString(string @string) - { - return string.Format("TermsIncludingScoreQuery{{field={0};originalQuery={1}}}", _field, - _unwrittenOriginalQuery); - } - - public override void ExtractTerms(ISet terms) - { - _originalQuery.ExtractTerms(terms); - } - - public override Query Rewrite(IndexReader reader) - { - Query originalQueryRewrite = _originalQuery.Rewrite(reader); - if (originalQueryRewrite != _originalQuery) - { - Query rewritten = new TermsIncludingScoreQuery(_field, _multipleValuesPerDocument, _terms, _scores, - _ords, originalQueryRewrite, _originalQuery); - rewritten.Boost = Boost; - return rewritten; - } - - return this; - } - - protected bool Equals(TermsIncludingScoreQuery other) - { - return base.Equals(other) && string.Equals(_field, other._field) && - Equals(_unwrittenOriginalQuery, other._unwrittenOriginalQuery); - } - - public override bool Equals(object obj) - { - if (ReferenceEquals(null, obj)) return false; - if (ReferenceEquals(this, obj)) return true; - if (obj.GetType() != GetType()) return false; - return Equals((TermsIncludingScoreQuery) obj); - } - - public override int GetHashCode() - { - unchecked - { - int hashCode = base.GetHashCode(); - hashCode = (hashCode*397) ^ (_field != null ? _field.GetHashCode() : 0); - hashCode = (hashCode*397) ^ - (_unwrittenOriginalQuery != null ? _unwrittenOriginalQuery.GetHashCode() : 0); - return hashCode; - } - } - - public override Weight CreateWeight(IndexSearcher searcher) - { - Weight originalWeight = _originalQuery.CreateWeight(searcher); - return new WeightAnonymousInnerClassHelper(this, originalWeight); - } - - private class WeightAnonymousInnerClassHelper : Weight - { - private readonly TermsIncludingScoreQuery outerInstance; - - private Weight originalWeight; - - public WeightAnonymousInnerClassHelper(TermsIncludingScoreQuery outerInstance, Weight originalWeight) - { - this.outerInstance = outerInstance; - this.originalWeight = originalWeight; - } - - - private TermsEnum segmentTermsEnum; - - public override Explanation Explain(AtomicReaderContext context, int doc) - { - SVInnerScorer scorer = (SVInnerScorer) BulkScorer(context, false, null); - if (scorer != null) - { - return scorer.Explain(doc); - } - return new ComplexExplanation(false, 0.0f, "Not a match"); - } - - public override bool ScoresDocsOutOfOrder() - { - // We have optimized impls below if we are allowed - // to score out-of-order: - return true; - } - - public override Query Query - { - get { return outerInstance; } - } - - public override float ValueForNormalization - { - get { return originalWeight.ValueForNormalization*outerInstance.Boost*outerInstance.Boost; } - } - - public override void Normalize(float norm, float topLevelBoost) - { - originalWeight.Normalize(norm, topLevelBoost*outerInstance.Boost); - } - - public override Scorer Scorer(AtomicReaderContext context, Bits acceptDocs) - { - Terms terms = context.AtomicReader.Terms(outerInstance._field); - if (terms == null) - { - return null; - } - - // what is the runtime...seems ok? - long cost = context.AtomicReader.MaxDoc * terms.Size(); - - segmentTermsEnum = terms.Iterator(segmentTermsEnum); - if (outerInstance._multipleValuesPerDocument) - { - return new MVInOrderScorer(outerInstance, this, acceptDocs, segmentTermsEnum, context.AtomicReader.MaxDoc, cost); - } - - return new SVInOrderScorer(outerInstance, this, acceptDocs, segmentTermsEnum, context.AtomicReader.MaxDoc, cost); - } - - public override BulkScorer BulkScorer(AtomicReaderContext context, bool scoreDocsInOrder, Bits acceptDocs) - { - if (scoreDocsInOrder) - { - return base.BulkScorer(context, scoreDocsInOrder, acceptDocs); - } - - Terms terms = context.AtomicReader.Terms(outerInstance._field); - if (terms == null) - { - return null; - } - // what is the runtime...seems ok? - long cost = context.AtomicReader.MaxDoc * terms.Size(); - - segmentTermsEnum = terms.Iterator(segmentTermsEnum); - // Optimized impls that take advantage of docs - // being allowed to be out of order: - if (outerInstance._multipleValuesPerDocument) - { - return new MVInnerScorer(outerInstance, this, acceptDocs, segmentTermsEnum, context.AtomicReader.MaxDoc, cost); - } - - return new SVInnerScorer(outerInstance, this, acceptDocs, segmentTermsEnum, cost); - } - } - - // This impl assumes that the 'join' values are used uniquely per doc per field. Used for one to many relations. - internal class SVInnerScorer : BulkScorer - { - private readonly TermsIncludingScoreQuery outerInstance; - - private readonly BytesRef _spare = new BytesRef(); - private readonly Bits _acceptDocs; - private readonly TermsEnum _termsEnum; - private readonly long _cost; - - private int _upto; - internal DocsEnum DocsEnum; - private DocsEnum _reuse; - private int _scoreUpto; - private int _doc; - - internal SVInnerScorer(TermsIncludingScoreQuery outerInstance, Weight weight, Bits acceptDocs, TermsEnum termsEnum, long cost) - { - this.outerInstance = outerInstance; - _acceptDocs = acceptDocs; - _termsEnum = termsEnum; - _cost = cost; - _doc = -1; - } - - public override bool Score(Collector collector, int max) - { - FakeScorer fakeScorer = new FakeScorer(); - collector.Scorer = fakeScorer; - if (_doc == -1) - { - _doc = NextDocOutOfOrder(); - } - while (_doc < max) - { - fakeScorer.doc = _doc; - fakeScorer._score = outerInstance._scores[outerInstance._ords[_scoreUpto]]; - collector.Collect(_doc); - _doc = NextDocOutOfOrder(); - } - - return _doc != DocIdSetIterator.NO_MORE_DOCS; - } - - private int NextDocOutOfOrder() - { - while (true) - { - if (DocsEnum != null) - { - int docId = DocsEnumNextDoc(); - if (docId == DocIdSetIterator.NO_MORE_DOCS) - { - DocsEnum = null; - } - else - { - return _doc = docId; - } - } - - if (_upto == outerInstance._terms.Size()) - { - return _doc = DocIdSetIterator.NO_MORE_DOCS; - } - - _scoreUpto = _upto; - if (_termsEnum.SeekExact(outerInstance._terms.Get(outerInstance._ords[_upto++], _spare))) - { - DocsEnum = _reuse = _termsEnum.Docs(_acceptDocs, _reuse, DocsEnum.FLAG_NONE); - } - } - } - - protected virtual int DocsEnumNextDoc() - { - return DocsEnum.NextDoc(); - } - - internal Explanation Explain(int target) - { - int docId; - do - { - docId = NextDocOutOfOrder(); - if (docId < target) - { - int tempDocId = DocsEnum.Advance(target); - if (tempDocId == target) - { - docId = tempDocId; - break; - } - } - else if (docId == target) - { - break; - } - DocsEnum = null; // goto the next ord. - } while (docId != DocIdSetIterator.NO_MORE_DOCS); - - return new ComplexExplanation(true, outerInstance._scores[outerInstance._ords[_scoreUpto]], - "Score based on join value " + _termsEnum.Term().Utf8ToString()); - } - } - - // This impl that tracks whether a docid has already been emitted. This check makes sure that docs aren't emitted - // twice for different join values. This means that the first encountered join value determines the score of a document - // even if other join values yield a higher score. - internal class MVInnerScorer : SVInnerScorer - { - private readonly TermsIncludingScoreQuery outerInstance; - - - internal readonly FixedBitSet alreadyEmittedDocs; - - internal MVInnerScorer(TermsIncludingScoreQuery outerInstance, Weight weight, Bits acceptDocs, - TermsEnum termsEnum, int maxDoc, long cost) : base(outerInstance, weight, acceptDocs, termsEnum, cost) - { - this.outerInstance = outerInstance; - alreadyEmittedDocs = new FixedBitSet(maxDoc); - } - - protected override int DocsEnumNextDoc() - { - while (true) - { - int docId = DocsEnum.NextDoc(); - if (docId == DocIdSetIterator.NO_MORE_DOCS) - { - return docId; - } - if (!alreadyEmittedDocs.GetAndSet(docId)) - { - return docId; //if it wasn't previously set, return it - } - } - } - } - - internal class SVInOrderScorer : Scorer - { - private readonly TermsIncludingScoreQuery outerInstance; - - - internal readonly DocIdSetIterator matchingDocsIterator; - internal readonly float[] scores; - internal readonly long cost_Renamed; - - internal int currentDoc = -1; - - internal SVInOrderScorer(TermsIncludingScoreQuery outerInstance, Weight weight, Bits acceptDocs, - TermsEnum termsEnum, int maxDoc, long cost) : base(weight) - { - this.outerInstance = outerInstance; - FixedBitSet matchingDocs = new FixedBitSet(maxDoc); - scores = new float[maxDoc]; - FillDocsAndScores(matchingDocs, acceptDocs, termsEnum); - matchingDocsIterator = matchingDocs.GetIterator(); - cost_Renamed = cost; - } - - protected virtual void FillDocsAndScores(FixedBitSet matchingDocs, Bits acceptDocs, - TermsEnum termsEnum) - { - BytesRef spare = new BytesRef(); - DocsEnum docsEnum = null; - for (int i = 0; i < outerInstance._terms.Size(); i++) - { - if (termsEnum.SeekExact(outerInstance._terms.Get(outerInstance._ords[i], spare))) - { - docsEnum = termsEnum.Docs(acceptDocs, docsEnum, FLAG_NONE); - float score = outerInstance._scores[outerInstance._ords[i]]; - for (int doc = docsEnum.NextDoc(); - doc != NO_MORE_DOCS; - doc = docsEnum.NextDoc()) - { - matchingDocs.Set(doc); - // In the case the same doc is also related to a another doc, a score might be overwritten. I think this - // can only happen in a many-to-many relation - scores[doc] = score; - } - } - } - } - - public override float Score() - { - return scores[currentDoc]; - } - - public override int Freq() - { - return 1; - } - - public override int DocID() - { - return currentDoc; - } - - public override int NextDoc() - { - return currentDoc = matchingDocsIterator.NextDoc(); - } - - public override int Advance(int target) - { - return currentDoc = matchingDocsIterator.Advance(target); - } - - public override long Cost() - { - return cost_Renamed; - } - } - - // This scorer deals with the fact that a document can have more than one score from multiple related documents. - internal class MVInOrderScorer : SVInOrderScorer - { - private readonly TermsIncludingScoreQuery outerInstance; - - - internal MVInOrderScorer(TermsIncludingScoreQuery outerInstance, Weight weight, Bits acceptDocs, - TermsEnum termsEnum, int maxDoc, long cost) - : base(outerInstance, weight, acceptDocs, termsEnum, maxDoc, cost) - { - this.outerInstance = outerInstance; - } - - protected override void FillDocsAndScores(FixedBitSet matchingDocs, Bits acceptDocs, - TermsEnum termsEnum) - { - BytesRef spare = new BytesRef(); - DocsEnum docsEnum = null; - for (int i = 0; i < outerInstance._terms.Size(); i++) - { - if (termsEnum.SeekExact(outerInstance._terms.Get(outerInstance._ords[i], spare))) - { - docsEnum = termsEnum.Docs(acceptDocs, docsEnum, FLAG_NONE); - float score = outerInstance._scores[outerInstance._ords[i]]; - for (int doc = docsEnum.NextDoc(); - doc != NO_MORE_DOCS; - doc = docsEnum.NextDoc()) - { - // I prefer this: - /*if (scores[doc] < score) { - scores[doc] = score; - matchingDocs.set(doc); - }*/ - // But this behaves the same as MVInnerScorer and only then the tests will pass: - if (!matchingDocs.Get(doc)) - { - scores[doc] = score; - matchingDocs.Set(doc); - } - } - } - } - } - } - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/Lucene.Net.Join/TermsQuery.cs ---------------------------------------------------------------------- diff --git a/Lucene.Net.Join/TermsQuery.cs b/Lucene.Net.Join/TermsQuery.cs deleted file mode 100644 index 2d5ccf8..0000000 --- a/Lucene.Net.Join/TermsQuery.cs +++ /dev/null @@ -1,147 +0,0 @@ -using System.Collections.Generic; -using Lucene.Net.Index; -using Lucene.Net.Search; -using Lucene.Net.Util; - -namespace Lucene.Net.Join -{ - /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - /// - /// A query that has an array of terms from a specific field. This query will match documents have one or more terms in - /// the specified field that match with the terms specified in the array. - /// - /// @lucene.experimental - /// - internal class TermsQuery : MultiTermQuery - { - private readonly BytesRefHash _terms; - private readonly int[] _ords; - private readonly Query _fromQuery; // Used for equals() only - - /// - /// - /// - /// The field that should contain terms that are specified in the previous parameter. - /// - /// The terms that matching documents should have. The terms must be sorted by natural order. - internal TermsQuery(string field, Query fromQuery, BytesRefHash terms) : base(field) - { - _fromQuery = fromQuery; - _terms = terms; - _ords = terms.Sort(BytesRef.UTF8SortedAsUnicodeComparer); - } - - public override TermsEnum GetTermsEnum(Terms terms, AttributeSource atts) - { - if (_terms.Size() == 0) - { - return TermsEnum.EMPTY; - } - - return new SeekingTermSetTermsEnum(terms.Iterator(null), _terms, _ords); - - } - - public override string ToString(string field) - { - return string.Format("TermsQuery{{field={0}}}", field); - } - - private class SeekingTermSetTermsEnum : FilteredTermsEnum - { - private readonly BytesRefHash Terms; - private readonly int[] Ords; - private readonly int _lastElement; - - private readonly BytesRef _lastTerm; - private readonly BytesRef _spare = new BytesRef(); - private readonly IComparer _comparator; - - private BytesRef _seekTerm; - private int _upto; - - internal SeekingTermSetTermsEnum(TermsEnum tenum, BytesRefHash terms, int[] ords) : base(tenum) - { - Terms = terms; - Ords = ords; - _comparator = BytesRef.UTF8SortedAsUnicodeComparer; - _lastElement = terms.Size() - 1; - _lastTerm = terms.Get(ords[_lastElement], new BytesRef()); - _seekTerm = terms.Get(ords[_upto], _spare); - } - - - - protected override BytesRef NextSeekTerm(BytesRef currentTerm) - { - BytesRef temp = _seekTerm; - _seekTerm = null; - return temp; - } - - protected override AcceptStatus Accept(BytesRef term) - { - if (_comparator.Compare(term, _lastTerm) > 0) - { - return AcceptStatus.END; - } - - BytesRef currentTerm = Terms.Get(Ords[_upto], _spare); - if (_comparator.Compare(term, currentTerm) == 0) - { - if (_upto == _lastElement) - { - return AcceptStatus.YES; - } - - _seekTerm = Terms.Get(Ords[++_upto], _spare); - return AcceptStatus.YES_AND_SEEK; - } - - if (_upto == _lastElement) - { - return AcceptStatus.NO; - } // Our current term doesn't match the the given term. - - int cmp; - do // We maybe are behind the given term by more than one step. Keep incrementing till we're the same or higher. - { - if (_upto == _lastElement) - { - return AcceptStatus.NO; - } - // typically the terms dict is a superset of query's terms so it's unusual that we have to skip many of - // our terms so we don't do a binary search here - _seekTerm = Terms.Get(Ords[++_upto], _spare); - } while ((cmp = _comparator.Compare(_seekTerm, term)) < 0); - if (cmp == 0) - { - if (_upto == _lastElement) - { - return AcceptStatus.YES; - } - _seekTerm = Terms.Get(Ords[++_upto], _spare); - return AcceptStatus.YES_AND_SEEK; - } - - return AcceptStatus.NO_AND_SEEK; - } - } - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4820f236/Lucene.Net.Join/TermsWithScoreCollector.cs ---------------------------------------------------------------------- diff --git a/Lucene.Net.Join/TermsWithScoreCollector.cs b/Lucene.Net.Join/TermsWithScoreCollector.cs deleted file mode 100644 index e823293..0000000 --- a/Lucene.Net.Join/TermsWithScoreCollector.cs +++ /dev/null @@ -1,333 +0,0 @@ -using System; -using Lucene.Net.Index; -using Lucene.Net.Search; -using Lucene.Net.Util; - -namespace Lucene.Net.Join -{ - /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - - internal abstract class TermsWithScoreCollector : Collector - { - private const int InitialArraySize = 256; - - private readonly string _field; - private readonly BytesRefHash _collectedTerms = new BytesRefHash(); - private readonly ScoreMode _scoreMode; - - private Scorer _scorer; - private float[] _scoreSums = new float[InitialArraySize]; - - internal TermsWithScoreCollector(string field, ScoreMode scoreMode) - { - this._field = field; - this._scoreMode = scoreMode; - } - - public BytesRefHash CollectedTerms - { - get - { - return _collectedTerms; - } - } - - public virtual float[] ScoresPerTerm - { - get - { - return _scoreSums; - } - } - - //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: - //ORIGINAL LINE: @Override public void setScorer(org.apache.lucene.search.Scorer scorer) throws java.io.IOException - public override Scorer Scorer - { - set - { - _scorer = value; - } - } - - public override bool AcceptsDocsOutOfOrder() - { - return true; - } - - /// - /// Chooses the right implementation. - /// - /// The field to collect terms for. - /// Whether the field to collect terms for has multiple values per document. - /// A instance - internal static TermsWithScoreCollector Create(string field, bool multipleValuesPerDocument, ScoreMode scoreMode) - { - if (multipleValuesPerDocument) - { - switch (scoreMode) - { - case ScoreMode.Avg: - return new Mv.Avg(field); - default: - return new Mv(field, scoreMode); - } - } - - switch (scoreMode) - { - case ScoreMode.Avg: - return new Sv.Avg(field); - default: - return new Sv(field, scoreMode); - } - } - - // impl that works with single value per document - internal class Sv : TermsWithScoreCollector - { - private readonly BytesRef _spare = new BytesRef(); - private BinaryDocValues _fromDocTerms; - - internal Sv(string field, ScoreMode scoreMode) : base(field, scoreMode) - { - } - - public override void Collect(int doc) - { - _fromDocTerms.Get(doc, _spare); - int ord = _collectedTerms.Add(_spare); - if (ord < 0) - { - ord = -ord - 1; - } - else - { - if (ord >= _scoreSums.Length) - { - _scoreSums = ArrayUtil.Grow(_scoreSums); - } - } - - float current = _scorer.Score(); - float existing = _scoreSums[ord]; - if (existing.CompareTo(0.0f) == 0) - { - _scoreSums[ord] = current; - } - else - { - switch (_scoreMode) - { - case ScoreMode.Total: - _scoreSums[ord] = _scoreSums[ord] + current; - break; - case ScoreMode.Max: - if (current > existing) - { - _scoreSums[ord] = current; - } - break; - } - } - } - - public override AtomicReaderContext NextReader - { - set - { - _fromDocTerms = FieldCache.DEFAULT.GetTerms(value.AtomicReader, _field, false); - } - } - - public override bool AcceptsDocsOutOfOrder() - { - return base.AcceptsDocsOutOfOrder(); - } - - internal class Avg : Sv - { - private int[] _scoreCounts = new int[InitialArraySize]; - - internal Avg(string field) : base(field, ScoreMode.Avg) - { - } - - public override void Collect(int doc) - { - _fromDocTerms.Get(doc, _spare); - int ord = _collectedTerms.Add(_spare); - if (ord < 0) - { - ord = -ord - 1; - } - else - { - if (ord >= _scoreSums.Length) - { - _scoreSums = ArrayUtil.Grow(_scoreSums); - _scoreCounts = ArrayUtil.Grow(_scoreCounts); - } - } - - float current = _scorer.Score(); - float existing = _scoreSums[ord]; - if (existing.CompareTo(0.0f) == 0) - { - _scoreSums[ord] = current; - _scoreCounts[ord] = 1; - } - else - { - _scoreSums[ord] = _scoreSums[ord] + current; - _scoreCounts[ord]++; - } - } - - public override float[] ScoresPerTerm - { - get - { - if (_scoreCounts != null) - { - for (int i = 0; i < _scoreCounts.Length; i++) - { - _scoreSums[i] = _scoreSums[i] / _scoreCounts[i]; - } - _scoreCounts = null; - } - return _scoreSums; - } - } - } - } - - // impl that works with multiple values per document - internal class Mv : TermsWithScoreCollector - { - private SortedSetDocValues _fromDocTermOrds; - private readonly BytesRef _scratch = new BytesRef(); - - internal Mv(string field, ScoreMode scoreMode) : base(field, scoreMode) - { - } - - public override void Collect(int doc) - { - _fromDocTermOrds.Document = doc; - long ord; - while ((ord = _fromDocTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) - { - _fromDocTermOrds.LookupOrd(ord, _scratch); - - int termId = _collectedTerms.Add(_scratch); - if (termId < 0) - { - termId = -termId - 1; - } - else - { - if (termId >= _scoreSums.Length) - { - _scoreSums = ArrayUtil.Grow(_scoreSums); - } - } - - switch (_scoreMode) - { - case ScoreMode.Total: - _scoreSums[termId] += _scorer.Score(); - break; - case ScoreMode.Max: - _scoreSums[termId] = Math.Max(_scoreSums[termId], _scorer.Score()); - break; - } - } - } - - //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: - //ORIGINAL LINE: @Override public void setNextReader(org.apache.lucene.index.AtomicReaderContext context) throws java.io.IOException - public override AtomicReaderContext NextReader - { - set - { - _fromDocTermOrds = FieldCache.DEFAULT.GetDocTermOrds(value.AtomicReader, _field); - } - } - - public override bool AcceptsDocsOutOfOrder() - { - throw new NotImplementedException(); - } - - internal class Avg : Mv - { - private int[] _scoreCounts = new int[InitialArraySize]; - - internal Avg(string field) : base(field, ScoreMode.Avg) - { - } - - public override void Collect(int doc) - { - _fromDocTermOrds.Document = doc; - long ord; - while ((ord = _fromDocTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) - { - _fromDocTermOrds.LookupOrd(ord, _scratch); - - int termId = _collectedTerms.Add(_scratch); - if (termId < 0) - { - termId = -termId - 1; - } - else - { - if (termId >= _scoreSums.Length) - { - _scoreSums = ArrayUtil.Grow(_scoreSums); - _scoreCounts = ArrayUtil.Grow(_scoreCounts); - } - } - - _scoreSums[termId] += _scorer.Score(); - _scoreCounts[termId]++; - } - } - - public override float[] ScoresPerTerm - { - get - { - if (_scoreCounts != null) - { - for (int i = 0; i < _scoreCounts.Length; i++) - { - _scoreSums[i] = _scoreSums[i] / _scoreCounts[i]; - } - _scoreCounts = null; - } - return _scoreSums; - } - } - } - } - - } -} \ No newline at end of file