Return-Path: X-Original-To: apmail-lucenenet-commits-archive@www.apache.org Delivered-To: apmail-lucenenet-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 6308417FF1 for ; Mon, 27 Oct 2014 14:24:19 +0000 (UTC) Received: (qmail 61752 invoked by uid 500); 27 Oct 2014 14:24:19 -0000 Delivered-To: apmail-lucenenet-commits-archive@lucenenet.apache.org Received: (qmail 61719 invoked by uid 500); 27 Oct 2014 14:24:19 -0000 Mailing-List: contact commits-help@lucenenet.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: lucene-net-dev@lucenenet.apache.org Delivered-To: mailing list commits@lucenenet.apache.org Received: (qmail 61710 invoked by uid 99); 27 Oct 2014 14:24:19 -0000 Received: from tyr.zones.apache.org (HELO tyr.zones.apache.org) (140.211.11.114) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 27 Oct 2014 14:24:19 +0000 Received: by tyr.zones.apache.org (Postfix, from userid 65534) id E6722909BE7; Mon, 27 Oct 2014 14:24:18 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit From: synhershko@apache.org To: commits@lucenenet.apache.org Message-Id: <69471033c10a414fbe5f0e39a7b40f07@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: git commit: More compilation fixes to Lucene.Net.Queries Date: Mon, 27 Oct 2014 14:24:18 +0000 (UTC) Repository: lucenenet Updated Branches: refs/heads/master af4d125b5 -> b5afe7645 More compilation fixes to Lucene.Net.Queries Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/b5afe764 Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/b5afe764 Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/b5afe764 Branch: refs/heads/master Commit: b5afe764577860d1ecb5d55da424b3245d49c025 Parents: af4d125 Author: Itamar Syn-Hershko Authored: Mon Oct 27 14:56:58 2014 +0200 Committer: Itamar Syn-Hershko Committed: Mon Oct 27 14:56:58 2014 +0200 ---------------------------------------------------------------------- src/Lucene.Net.Queries/BoostingQuery.cs | 2 +- src/Lucene.Net.Queries/CustomScoreProvider.cs | 1 - src/Lucene.Net.Queries/CustomScoreQuery.cs | 8 +- src/Lucene.Net.Queries/Function/BoostedQuery.cs | 8 +- src/Lucene.Net.Queries/Mlt/MoreLikeThisQuery.cs | 482 +++++------- src/Lucene.Net.Queries/TermsFilter.cs | 788 +++++++++---------- 6 files changed, 588 insertions(+), 701 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b5afe764/src/Lucene.Net.Queries/BoostingQuery.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Queries/BoostingQuery.cs b/src/Lucene.Net.Queries/BoostingQuery.cs index 7a73eb5..bf0e762 100644 --- a/src/Lucene.Net.Queries/BoostingQuery.cs +++ b/src/Lucene.Net.Queries/BoostingQuery.cs @@ -46,7 +46,7 @@ namespace Lucene.Net.Queries public BoostingQuery(Query match, Query context, float boost) { this.match = match; - this.context = context.Clone(); // clone before boost + this.context = (Query) context.Clone(); // clone before boost this.boost = boost; this.context.Boost = 0.0f; // ignore context-only matches } http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b5afe764/src/Lucene.Net.Queries/CustomScoreProvider.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Queries/CustomScoreProvider.cs b/src/Lucene.Net.Queries/CustomScoreProvider.cs index 42736a5..ea4d5ed 100644 --- a/src/Lucene.Net.Queries/CustomScoreProvider.cs +++ b/src/Lucene.Net.Queries/CustomScoreProvider.cs @@ -2,7 +2,6 @@ using Lucene.Net.Index; using Lucene.Net.Queries.Function; using Lucene.Net.Search; -using org.apache.lucene.queries; namespace Lucene.Net.Queries { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b5afe764/src/Lucene.Net.Queries/CustomScoreQuery.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Queries/CustomScoreQuery.cs b/src/Lucene.Net.Queries/CustomScoreQuery.cs index 770422b..490d351 100644 --- a/src/Lucene.Net.Queries/CustomScoreQuery.cs +++ b/src/Lucene.Net.Queries/CustomScoreQuery.cs @@ -114,14 +114,14 @@ namespace Lucene.Net.Queries /*(non-Javadoc) @see org.apache.lucene.search.Query#clone() */ - public override CustomScoreQuery Clone() + public override object Clone() { - CustomScoreQuery clone = (CustomScoreQuery) base.Clone(); - clone.subQuery = subQuery.Clone(); + var clone = (CustomScoreQuery) base.Clone(); + clone.subQuery = (Query)subQuery.Clone(); clone.scoringQueries = new Query[scoringQueries.Length]; for (int i = 0; i < scoringQueries.Length; i++) { - clone.scoringQueries[i] = scoringQueries[i].Clone(); + clone.scoringQueries[i] = (Query)scoringQueries[i].Clone(); } return clone; } http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b5afe764/src/Lucene.Net.Queries/Function/BoostedQuery.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Queries/Function/BoostedQuery.cs b/src/Lucene.Net.Queries/Function/BoostedQuery.cs index 157d238..1798987 100644 --- a/src/Lucene.Net.Queries/Function/BoostedQuery.cs +++ b/src/Lucene.Net.Queries/Function/BoostedQuery.cs @@ -58,17 +58,17 @@ namespace Lucene.Net.Queries.Function public override Query Rewrite(IndexReader reader) { - Query newQ = q.Rewrite(reader); - if (newQ == q) + var newQ = q.Rewrite(reader); + if (Equals(newQ, q)) { return this; } - BoostedQuery bq = (BoostedQuery)this.MemberwiseClone(); + var bq = (BoostedQuery)this.MemberwiseClone(); bq.q = newQ; return bq; } - public override void ExtractTerms(HashSet terms) + public override void ExtractTerms(ISet terms) { q.ExtractTerms(terms); } http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b5afe764/src/Lucene.Net.Queries/Mlt/MoreLikeThisQuery.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Queries/Mlt/MoreLikeThisQuery.cs b/src/Lucene.Net.Queries/Mlt/MoreLikeThisQuery.cs index c9dcbe9..b5630bd 100644 --- a/src/Lucene.Net.Queries/Mlt/MoreLikeThisQuery.cs +++ b/src/Lucene.Net.Queries/Mlt/MoreLikeThisQuery.cs @@ -1,298 +1,194 @@ -using System.Collections.Generic; -/* +/* * Created on 25-Jan-2006 */ -using Lucene.Net.Queries.Mlt; - -namespace org.apache.lucene.queries.mlt +using System.Collections.Generic; +using System.IO; +using Lucene.Net.Analysis; +using Lucene.Net.Index; +using Lucene.Net.Search; +using Lucene.Net.Support; + +namespace Lucene.Net.Queries.Mlt { - /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - using Analyzer = org.apache.lucene.analysis.Analyzer; - using IndexReader = org.apache.lucene.index.IndexReader; - using BooleanClause = org.apache.lucene.search.BooleanClause; - using BooleanQuery = org.apache.lucene.search.BooleanQuery; - using Query = org.apache.lucene.search.Query; - - - /// - /// A simple wrapper for MoreLikeThis for use in scenarios where a Query object is required eg - /// in custom QueryParser extensions. At query.rewrite() time the reader is used to construct the - /// actual MoreLikeThis object and obtain the real Query object. - /// - public class MoreLikeThisQuery : Query - { - - private string likeText; - private string[] moreLikeFields; - private Analyzer analyzer; - private readonly string fieldName; - private float percentTermsToMatch = 0.3f; - private int minTermFrequency = 1; - private int maxQueryTerms = 5; -//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET: -//ORIGINAL LINE: private java.util.Set stopWords = null; - private HashSet stopWords = null; - private int minDocFreq = -1; - - /// fields used for similarity measure - public MoreLikeThisQuery(string likeText, string[] moreLikeFields, Analyzer analyzer, string fieldName) - { - this.likeText = likeText; - this.moreLikeFields = moreLikeFields; - this.analyzer = analyzer; - this.fieldName = fieldName; - } - -//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: -//ORIGINAL LINE: @Override public org.apache.lucene.search.Query rewrite(org.apache.lucene.index.IndexReader reader) throws java.io.IOException - public override Query rewrite(IndexReader reader) - { - MoreLikeThis mlt = new MoreLikeThis(reader); - - mlt.FieldNames = moreLikeFields; - mlt.Analyzer = analyzer; - mlt.MinTermFreq = minTermFrequency; - if (minDocFreq >= 0) - { - mlt.MinDocFreq = minDocFreq; - } - mlt.MaxQueryTerms = maxQueryTerms; - mlt.StopWords = stopWords; - BooleanQuery bq = (BooleanQuery) mlt.like(new StringReader(likeText), fieldName); - BooleanClause[] clauses = bq.Clauses; - //make at least half the terms match - bq.MinimumNumberShouldMatch = (int)(clauses.Length * percentTermsToMatch); - return bq; - } - - /* (non-Javadoc) - * @see org.apache.lucene.search.Query#toString(java.lang.String) - */ - public override string ToString(string field) - { - return "like:" + likeText; - } - - public virtual float PercentTermsToMatch - { - get - { - return percentTermsToMatch; - } - set - { - this.percentTermsToMatch = value; - } - } - - - public virtual Analyzer Analyzer - { - get - { - return analyzer; - } - set - { - this.analyzer = value; - } - } - - - public virtual string LikeText - { - get - { - return likeText; - } - set - { - this.likeText = value; - } - } - - - public virtual int MaxQueryTerms - { - get - { - return maxQueryTerms; - } - set - { - this.maxQueryTerms = value; - } - } - - - public virtual int MinTermFrequency - { - get - { - return minTermFrequency; - } - set - { - this.minTermFrequency = value; - } - } - - - public virtual string[] MoreLikeFields - { - get - { - return moreLikeFields; - } - set - { - this.moreLikeFields = value; - } - } - - -//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET: -//ORIGINAL LINE: public java.util.Set getStopWords() - public virtual HashSet StopWords - { - get - { - return stopWords; - } - set - { - this.stopWords = value; - } - } - - - public virtual int MinDocFreq - { - get - { - return minDocFreq; - } - set - { - this.minDocFreq = value; - } - } - - - public override int GetHashCode() - { - const int prime = 31; - int result = base.GetHashCode(); - result = prime * result + ((analyzer == null) ? 0 : analyzer.GetHashCode()); - result = prime * result + ((fieldName == null) ? 0 : fieldName.GetHashCode()); - result = prime * result + ((likeText == null) ? 0 : likeText.GetHashCode()); - result = prime * result + maxQueryTerms; - result = prime * result + minDocFreq; - result = prime * result + minTermFrequency; - result = prime * result + Arrays.GetHashCode(moreLikeFields); - result = prime * result + Number.FloatToIntBits(percentTermsToMatch); - result = prime * result + ((stopWords == null) ? 0 : stopWords.GetHashCode()); - return result; - } - - public override bool Equals(object obj) - { - if (this == obj) - { - return true; - } - if (!base.Equals(obj)) - { - return false; - } - if (this.GetType() != obj.GetType()) - { - return false; - } - MoreLikeThisQuery other = (MoreLikeThisQuery) obj; - if (analyzer == null) - { - if (other.analyzer != null) - { - return false; - } - } - else if (!analyzer.Equals(other.analyzer)) - { - return false; - } - if (fieldName == null) - { - if (other.fieldName != null) - { - return false; - } - } - else if (!fieldName.Equals(other.fieldName)) - { - return false; - } - if (likeText == null) - { - if (other.likeText != null) - { - return false; - } - } - else if (!likeText.Equals(other.likeText)) - { - return false; - } - if (maxQueryTerms != other.maxQueryTerms) - { - return false; - } - if (minDocFreq != other.minDocFreq) - { - return false; - } - if (minTermFrequency != other.minTermFrequency) - { - return false; - } - if (!Arrays.Equals(moreLikeFields, other.moreLikeFields)) - { - return false; - } - if (Number.FloatToIntBits(percentTermsToMatch) != Number.FloatToIntBits(other.percentTermsToMatch)) - { - return false; - } - if (stopWords == null) - { - if (other.stopWords != null) - { - return false; - } - } - else if (!stopWords.Equals(other.stopWords)) - { - return false; - } - return true; - } - } - + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// + /// A simple wrapper for MoreLikeThis for use in scenarios where a Query object is required eg + /// in custom QueryParser extensions. At query.rewrite() time the reader is used to construct the + /// actual MoreLikeThis object and obtain the real Query object. + /// + public class MoreLikeThisQuery : Query + { + private readonly string fieldName; + + /// fields used for similarity measure + public MoreLikeThisQuery(string likeText, string[] moreLikeFields, Analyzer analyzer, string fieldName) + { + this.LikeText = likeText; + this.MoreLikeFields = moreLikeFields; + this.Analyzer = analyzer; + this.fieldName = fieldName; + StopWords = null; + + PercentTermsToMatch = 0.3f; + MinTermFrequency = 1; + MaxQueryTerms = 5; + MinDocFreq = -1; + } + + public override Query Rewrite(IndexReader reader) + { + var mlt = new MoreLikeThis(reader) { FieldNames = MoreLikeFields, Analyzer = Analyzer, MinTermFreq = MinTermFrequency }; + + if (MinDocFreq >= 0) + { + mlt.MinDocFreq = MinDocFreq; + } + mlt.MaxQueryTerms = MaxQueryTerms; + mlt.StopWords = StopWords; + var bq = (BooleanQuery)mlt.Like(new StringReader(LikeText), fieldName); + var clauses = bq.Clauses; + //make at least half the terms match + bq.MinimumNumberShouldMatch = (int)(clauses.Length * PercentTermsToMatch); + return bq; + } + + /* (non-Javadoc) + * @see org.apache.lucene.search.Query#toString(java.lang.String) + */ + public override string ToString(string field) + { + return "like:" + LikeText; + } + + public float PercentTermsToMatch { get; set; } + + public Analyzer Analyzer { get; set; } + + public string LikeText { get; set; } + + public int MaxQueryTerms { get; set; } + + public int MinTermFrequency { get; set; } + + public string[] MoreLikeFields { get; set; } + + public HashSet StopWords { get; set; } + + public int MinDocFreq { get; set; } + + public override int GetHashCode() + { + const int prime = 31; + int result = base.GetHashCode(); + result = prime * result + ((Analyzer == null) ? 0 : Analyzer.GetHashCode()); + result = prime * result + ((fieldName == null) ? 0 : fieldName.GetHashCode()); + result = prime * result + ((LikeText == null) ? 0 : LikeText.GetHashCode()); + result = prime * result + MaxQueryTerms; + result = prime * result + MinDocFreq; + result = prime * result + MinTermFrequency; + result = prime * result + Arrays.GetHashCode(MoreLikeFields); + result = prime * result + Number.FloatToIntBits(PercentTermsToMatch); + result = prime * result + ((StopWords == null) ? 0 : StopWords.GetHashCode()); + return result; + } + + public override bool Equals(object obj) + { + if (this == obj) + { + return true; + } + if (!base.Equals(obj)) + { + return false; + } + if (this.GetType() != obj.GetType()) + { + return false; + } + var other = (MoreLikeThisQuery)obj; + if (Analyzer == null) + { + if (other.Analyzer != null) + { + return false; + } + } + else if (!Analyzer.Equals(other.Analyzer)) + { + return false; + } + if (fieldName == null) + { + if (other.fieldName != null) + { + return false; + } + } + else if (!fieldName.Equals(other.fieldName)) + { + return false; + } + if (LikeText == null) + { + if (other.LikeText != null) + { + return false; + } + } + else if (!LikeText.Equals(other.LikeText)) + { + return false; + } + if (MaxQueryTerms != other.MaxQueryTerms) + { + return false; + } + if (MinDocFreq != other.MinDocFreq) + { + return false; + } + if (MinTermFrequency != other.MinTermFrequency) + { + return false; + } + if (!Arrays.Equals(MoreLikeFields, other.MoreLikeFields)) + { + return false; + } + if (Number.FloatToIntBits(PercentTermsToMatch) != Number.FloatToIntBits(other.PercentTermsToMatch)) + { + return false; + } + if (StopWords == null) + { + if (other.StopWords != null) + { + return false; + } + } + else if (!StopWords.Equals(other.StopWords)) + { + return false; + } + return true; + } + } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b5afe764/src/Lucene.Net.Queries/TermsFilter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Queries/TermsFilter.cs b/src/Lucene.Net.Queries/TermsFilter.cs index 421b056..1585cdd 100644 --- a/src/Lucene.Net.Queries/TermsFilter.cs +++ b/src/Lucene.Net.Queries/TermsFilter.cs @@ -10,403 +10,395 @@ using Lucene.Net.Util; namespace Lucene.Net.Queries { - /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ /// - /// Constructs a filter for docs matching any of the terms added to this class. - /// Unlike a RangeFilter this can be used for filtering on multiple terms that are not necessarily in - /// a sequence. An example might be a collection of primary keys from a database query result or perhaps - /// a choice of "category" labels picked by the end user. As a filter, this is much faster than the - /// equivalent query (a BooleanQuery with many "should" TermQueries) - /// - public sealed class TermsFilter : Filter - { - - /* - * this class is often used for large number of terms in a single field. - * to optimize for this case and to be filter-cache friendly we - * serialize all terms into a single byte array and store offsets - * in a parallel array to keep the # of object constant and speed up - * equals / hashcode. - * - * This adds quite a bit of complexity but allows large term filters to - * be efficient for GC and cache-lookups - */ - private readonly int[] offsets; - private readonly sbyte[] termsBytes; - private readonly TermsAndField[] termsAndFields; - private readonly int hashCode_Renamed; // cached hashcode for fast cache lookups - private const int PRIME = 31; - - /// - /// Creates a new from the given list. The list - /// can contain duplicate terms and multiple fields. - /// - public TermsFilter(IList terms) : this(new FieldAndTermEnumAnonymousInnerClassHelper(this, terms), terms.Count) - { - } - - private class FieldAndTermEnumAnonymousInnerClassHelper : FieldAndTermEnum - { - private readonly TermsFilter outerInstance; - - private IList terms; - - public FieldAndTermEnumAnonymousInnerClassHelper(TermsFilter outerInstance, IList terms) - { - this.outerInstance = outerInstance; - this.terms = terms; - iter = Sort(terms).GetEnumerator(); - } - - // we need to sort for deduplication and to have a common cache key - readonly IEnumerator iter; - public override BytesRef Next() - { - if (iter.HasNext()) - { - Term next = iter.next(); - field = next.field(); - return next.bytes(); - } - return null; - } - } - - /// - /// Creates a new from the given list for - /// a single field. - /// - public TermsFilter(string field, IList terms) : this(new FieldAndTermEnumAnonymousInnerClassHelper2(this, field, terms), terms.Count) - { - } - - private class FieldAndTermEnumAnonymousInnerClassHelper2 : FieldAndTermEnum - { - private readonly TermsFilter outerInstance; - - private IList terms; - - public FieldAndTermEnumAnonymousInnerClassHelper2(TermsFilter outerInstance, string field, IList terms) : base(field) - { - this.outerInstance = outerInstance; - this.terms = terms; - iter = Sort(terms).GetEnumerator(); - } - - // we need to sort for deduplication and to have a common cache key - readonly IEnumerator iter; - public override BytesRef Next() - { - if (iter.HasNext()) - { - return iter.Next(); - } - return null; - } - } - - /// - /// Creates a new from the given array for - /// a single field. - /// - public TermsFilter(string field, params BytesRef[] terms) : this(field, Arrays.AsList(terms)) - { - // this ctor prevents unnecessary Term creations - } - - /// - /// Creates a new from the given array. The array can - /// contain duplicate terms and multiple fields. - /// - public TermsFilter(params Term[] terms) : this(terms.ToList()) - { - } - - - private TermsFilter(FieldAndTermEnum iter, int length) - { - // TODO: maybe use oal.index.PrefixCodedTerms instead? - // If number of terms is more than a few hundred it - // should be a win - - // TODO: we also pack terms in FieldCache/DocValues - // ... maybe we can refactor to share that code - - // TODO: yet another option is to build the union of the terms in - // an automaton an call intersect on the termsenum if the density is high - - int hash = 9; - sbyte[] serializedTerms = new sbyte[0]; - this.offsets = new int[length + 1]; - int lastEndOffset = 0; - int index = 0; - List termsAndFields = new List(); - TermsAndField lastTermsAndField = null; - BytesRef previousTerm = null; - string previousField = null; - BytesRef currentTerm; - string currentField; - while ((currentTerm = iter.Next()) != null) - { - currentField = iter.Field(); - if (currentField == null) - { - throw new System.ArgumentException("Field must not be null"); - } - if (previousField != null) - { - // deduplicate - if (previousField.Equals(currentField)) - { - if (previousTerm.BytesEquals(currentTerm)) - { - continue; - } - } - else - { - int start = lastTermsAndField == null ? 0 : lastTermsAndField.end; - lastTermsAndField = new TermsAndField(start, index, previousField); - termsAndFields.Add(lastTermsAndField); - } - } - hash = PRIME * hash + currentField.GetHashCode(); - hash = PRIME * hash + currentTerm.GetHashCode(); - if (serializedTerms.Length < lastEndOffset + currentTerm.length) - { - serializedTerms = ArrayUtil.grow(serializedTerms, lastEndOffset + currentTerm.length); - } - Array.Copy(currentTerm.bytes, currentTerm.offset, serializedTerms, lastEndOffset, currentTerm.length); - offsets[index] = lastEndOffset; - lastEndOffset += currentTerm.length; - index++; - previousTerm = currentTerm; - previousField = currentField; - } - offsets[index] = lastEndOffset; - int start = lastTermsAndField == null ? 0 : lastTermsAndField.end; - lastTermsAndField = new TermsAndField(start, index, previousField); - termsAndFields.Add(lastTermsAndField); - this.termsBytes = ArrayUtil.Shrink(serializedTerms, lastEndOffset); - this.termsAndFields = termsAndFields.ToArray(); - this.hashCode_Renamed = hash; - - } - - public override DocIdSet GetDocIdSet(AtomicReaderContext context, Bits acceptDocs) - { - AtomicReader reader = context.AtomicReader; - FixedBitSet result = null; // lazy init if needed - no need to create a big bitset ahead of time - Fields fields = reader.Fields; - BytesRef spare = new BytesRef(this.termsBytes); - if (fields == null) - { - return result; - } - Terms terms = null; - TermsEnum termsEnum = null; - DocsEnum docs = null; - foreach (TermsAndField termsAndField in this.termsAndFields) - { - if ((terms = fields.Terms(termsAndField.field)) != null) - { - termsEnum = terms.iterator(termsEnum); // this won't return null - for (int i = termsAndField.start; i < termsAndField.end; i++) - { - spare.offset = offsets[i]; - spare.length = offsets[i + 1] - offsets[i]; - if (termsEnum.seekExact(spare)) - { - docs = termsEnum.docs(acceptDocs, docs, DocsEnum.FLAG_NONE); // no freq since we don't need them - if (result == null) - { - if (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) - { - result = new FixedBitSet(reader.maxDoc()); - // lazy init but don't do it in the hot loop since we could read many docs - result.set(docs.docID()); - } - } - while (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) - { - result.set(docs.docID()); - } - } - } - } - } - return result; - } - - public override bool Equals(object obj) - { - if (this == obj) - { - return true; - } - if ((obj == null) || (obj.GetType() != this.GetType())) - { - return false; - } - - TermsFilter test = (TermsFilter) obj; - // first check the fields before even comparing the bytes - if (test.hashCode_Renamed == hashCode_Renamed && Arrays.Equals(termsAndFields, test.termsAndFields)) - { - int lastOffset = termsAndFields[termsAndFields.Length - 1].end; - // compare offsets since we sort they must be identical - if (ArrayUtil.Equals(offsets, 0, test.offsets, 0, lastOffset + 1)) - { - // straight byte comparison since we sort they must be identical - return ArrayUtil.Equals(termsBytes, 0, test.termsBytes, 0, offsets[lastOffset]); - } - } - return false; - } - - public override int GetHashCode() - { - return hashCode_Renamed; - } - - public override string ToString() - { - StringBuilder builder = new StringBuilder(); - BytesRef spare = new BytesRef(termsBytes); - bool first = true; - for (int i = 0; i < termsAndFields.Length; i++) - { - TermsAndField current = termsAndFields[i]; - for (int j = current.start; j < current.end; j++) - { - spare.Offset = offsets[j]; - spare.Length = offsets[j + 1] - offsets[j]; - if (!first) - { - builder.Append(' '); - } - first = false; - builder.Append(current.field).Append(':'); - builder.Append(spare.Utf8ToString()); - } - } - - return builder.ToString(); - } - - private sealed class TermsAndField - { - internal readonly int start; - internal readonly int end; - internal readonly string field; - - - internal TermsAndField(int start, int end, string field) : base() - { - this.start = start; - this.end = end; - this.field = field; - } - - public override int GetHashCode() - { - const int prime = 31; - int result = 1; - result = prime * result + ((field == null) ? 0 : field.GetHashCode()); - result = prime * result + end; - result = prime * result + start; - return result; - } - - public override bool Equals(object obj) - { - if (this == obj) - { - return true; - } - if (obj == null) - { - return false; - } - if (this.GetType() != obj.GetType()) - { - return false; - } - TermsAndField other = (TermsAndField) obj; - if (field == null) - { - if (other.field != null) - { - return false; - } - } - else if (!field.Equals(other.field)) - { - return false; - } - if (end != other.end) - { - return false; - } - if (start != other.start) - { - return false; - } - return true; - } - - } - - private abstract class FieldAndTermEnum - { - protected internal string field; - - public abstract BytesRef Next(); - - public FieldAndTermEnum() - { - } - - public FieldAndTermEnum(string field) - { - this.field = field; - } - - public virtual string Field - { - get { return field; } - } - } - - /* - * simple utility that returns the in-place sorted list - */ -//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET: -//ORIGINAL LINE: private static > java.util.List sort(java.util.List toSort) - private static IList Sort(IList toSort) where T : Comparable - { - if (toSort.Count == 0) - { - throw new System.ArgumentException("no terms provided"); - } - toSort.Sort(); - return toSort; - } - } - + /// Constructs a filter for docs matching any of the terms added to this class. + /// Unlike a RangeFilter this can be used for filtering on multiple terms that are not necessarily in + /// a sequence. An example might be a collection of primary keys from a database query result or perhaps + /// a choice of "category" labels picked by the end user. As a filter, this is much faster than the + /// equivalent query (a BooleanQuery with many "should" TermQueries) + /// + public sealed class TermsFilter : Filter + { + + /* + * this class is often used for large number of terms in a single field. + * to optimize for this case and to be filter-cache friendly we + * serialize all terms into a single byte array and store offsets + * in a parallel array to keep the # of object constant and speed up + * equals / hashcode. + * + * This adds quite a bit of complexity but allows large term filters to + * be efficient for GC and cache-lookups + */ + private readonly int[] offsets; + private readonly sbyte[] termsBytes; + private readonly TermsAndField[] termsAndFields; + private readonly int hashCode_Renamed; // cached hashcode for fast cache lookups + private const int PRIME = 31; + + /// + /// Creates a new from the given list. The list + /// can contain duplicate terms and multiple fields. + /// + public TermsFilter(List terms) + : this(new FieldAndTermEnumAnonymousInnerClassHelper(this, terms), terms.Count) + { + } + + private class FieldAndTermEnumAnonymousInnerClassHelper : FieldAndTermEnum + { + private readonly TermsFilter outerInstance; + + private IList terms; + + public FieldAndTermEnumAnonymousInnerClassHelper(TermsFilter outerInstance, List terms) + { + this.outerInstance = outerInstance; + this.terms = terms; + terms.Sort(); + iter = terms.GetEnumerator(); + } + + // we need to sort for deduplication and to have a common cache key + readonly IEnumerator iter; + public override BytesRef Next() + { + if (iter.MoveNext()) + { + var next = iter.Current; + field = next.Field(); + return next.Bytes(); + } + return null; + } + } + + /// + /// Creates a new from the given list for + /// a single field. + /// + public TermsFilter(string field, List terms) + : this(new FieldAndTermEnumAnonymousInnerClassHelper2(this, field, terms), terms.Count) + { + } + + private class FieldAndTermEnumAnonymousInnerClassHelper2 : FieldAndTermEnum + { + private readonly TermsFilter outerInstance; + + private IList terms; + + public FieldAndTermEnumAnonymousInnerClassHelper2(TermsFilter outerInstance, string field, List terms) + : base(field) + { + this.outerInstance = outerInstance; + this.terms = terms; + terms.Sort(); + iter = terms.GetEnumerator(); + } + + // we need to sort for deduplication and to have a common cache key + readonly IEnumerator iter; + public override BytesRef Next() + { + if (iter.MoveNext()) + { + return iter.Current; + } + return null; + } + } + + /// + /// Creates a new from the given array for + /// a single field. + /// + public TermsFilter(string field, params BytesRef[] terms) + : this(field, Arrays.AsList(terms)) + { + // this ctor prevents unnecessary Term creations + } + + /// + /// Creates a new from the given array. The array can + /// contain duplicate terms and multiple fields. + /// + public TermsFilter(params Term[] terms) + : this(terms.ToList()) + { + } + + + private TermsFilter(FieldAndTermEnum iter, int length) + { + // TODO: maybe use oal.index.PrefixCodedTerms instead? + // If number of terms is more than a few hundred it + // should be a win + + // TODO: we also pack terms in FieldCache/DocValues + // ... maybe we can refactor to share that code + + // TODO: yet another option is to build the union of the terms in + // an automaton an call intersect on the termsenum if the density is high + + int hash = 9; + sbyte[] serializedTerms = new sbyte[0]; + this.offsets = new int[length + 1]; + int lastEndOffset = 0; + int index = 0; + var termsAndFields = new List(); + TermsAndField lastTermsAndField = null; + BytesRef previousTerm = null; + string previousField = null; + BytesRef currentTerm; + string currentField; + while ((currentTerm = iter.Next()) != null) + { + currentField = iter.Field; + if (currentField == null) + { + throw new System.ArgumentException("Field must not be null"); + } + if (previousField != null) + { + // deduplicate + if (previousField.Equals(currentField)) + { + if (previousTerm.BytesEquals(currentTerm)) + { + continue; + } + } + else + { + int _start = lastTermsAndField == null ? 0 : lastTermsAndField.end; + lastTermsAndField = new TermsAndField(_start, index, previousField); + termsAndFields.Add(lastTermsAndField); + } + } + hash = PRIME * hash + currentField.GetHashCode(); + hash = PRIME * hash + currentTerm.GetHashCode(); + if (serializedTerms.Length < lastEndOffset + currentTerm.Length) + { + serializedTerms = ArrayUtil.Grow(serializedTerms, lastEndOffset + currentTerm.Length); + } + Array.Copy(currentTerm.Bytes, currentTerm.Offset, serializedTerms, lastEndOffset, currentTerm.Length); + offsets[index] = lastEndOffset; + lastEndOffset += currentTerm.Length; + index++; + previousTerm = currentTerm; + previousField = currentField; + } + offsets[index] = lastEndOffset; + int start = lastTermsAndField == null ? 0 : lastTermsAndField.end; + lastTermsAndField = new TermsAndField(start, index, previousField); + termsAndFields.Add(lastTermsAndField); + this.termsBytes = ArrayUtil.Shrink(serializedTerms, lastEndOffset); + this.termsAndFields = termsAndFields.ToArray(); + this.hashCode_Renamed = hash; + + } + + public override DocIdSet GetDocIdSet(AtomicReaderContext context, Bits acceptDocs) + { + AtomicReader reader = context.AtomicReader; + FixedBitSet result = null; // lazy init if needed - no need to create a big bitset ahead of time + Fields fields = reader.Fields; + BytesRef spare = new BytesRef(this.termsBytes); + if (fields == null) + { + return result; + } + Terms terms = null; + TermsEnum termsEnum = null; + DocsEnum docs = null; + foreach (TermsAndField termsAndField in this.termsAndFields) + { + if ((terms = fields.Terms(termsAndField.field)) != null) + { + termsEnum = terms.Iterator(termsEnum); // this won't return null + for (int i = termsAndField.start; i < termsAndField.end; i++) + { + spare.Offset = offsets[i]; + spare.Length = offsets[i + 1] - offsets[i]; + if (termsEnum.SeekExact(spare)) + { + docs = termsEnum.Docs(acceptDocs, docs, DocsEnum.FLAG_NONE); // no freq since we don't need them + if (result == null) + { + if (docs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) + { + result = new FixedBitSet(reader.MaxDoc); + // lazy init but don't do it in the hot loop since we could read many docs + result.Set(docs.DocID()); + } + } + while (docs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) + { + result.Set(docs.DocID()); + } + } + } + } + } + return result; + } + + public override bool Equals(object obj) + { + if (this == obj) + { + return true; + } + if ((obj == null) || (obj.GetType() != this.GetType())) + { + return false; + } + + var test = (TermsFilter)obj; + // first check the fields before even comparing the bytes + if (test.hashCode_Renamed == hashCode_Renamed && Arrays.Equals(termsAndFields, test.termsAndFields)) + { + int lastOffset = termsAndFields[termsAndFields.Length - 1].end; + // compare offsets since we sort they must be identical + if (ArrayUtil.Equals(offsets, 0, test.offsets, 0, lastOffset + 1)) + { + // straight byte comparison since we sort they must be identical + return ArrayUtil.Equals(termsBytes, 0, test.termsBytes, 0, offsets[lastOffset]); + } + } + return false; + } + + public override int GetHashCode() + { + return hashCode_Renamed; + } + + public override string ToString() + { + var builder = new StringBuilder(); + var spare = new BytesRef(termsBytes); + bool first = true; + for (int i = 0; i < termsAndFields.Length; i++) + { + TermsAndField current = termsAndFields[i]; + for (int j = current.start; j < current.end; j++) + { + spare.Offset = offsets[j]; + spare.Length = offsets[j + 1] - offsets[j]; + if (!first) + { + builder.Append(' '); + } + first = false; + builder.Append(current.field).Append(':'); + builder.Append(spare.Utf8ToString()); + } + } + + return builder.ToString(); + } + + private sealed class TermsAndField + { + internal readonly int start; + internal readonly int end; + internal readonly string field; + + + internal TermsAndField(int start, int end, string field) + : base() + { + this.start = start; + this.end = end; + this.field = field; + } + + public override int GetHashCode() + { + const int prime = 31; + int result = 1; + result = prime * result + ((field == null) ? 0 : field.GetHashCode()); + result = prime * result + end; + result = prime * result + start; + return result; + } + + public override bool Equals(object obj) + { + if (this == obj) + { + return true; + } + if (obj == null) + { + return false; + } + if (this.GetType() != obj.GetType()) + { + return false; + } + var other = (TermsAndField)obj; + if (field == null) + { + if (other.field != null) + { + return false; + } + } + else if (!field.Equals(other.field)) + { + return false; + } + if (end != other.end) + { + return false; + } + if (start != other.start) + { + return false; + } + return true; + } + + } + + private abstract class FieldAndTermEnum + { + protected internal string field; + + public abstract BytesRef Next(); + + public FieldAndTermEnum() + { + } + + public FieldAndTermEnum(string field) + { + this.field = field; + } + + public virtual string Field + { + get { return field; } + } + } + } } \ No newline at end of file