Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id EB44D200C0F for ; Thu, 2 Feb 2017 21:15:55 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id E9D9A160B61; Thu, 2 Feb 2017 20:15:55 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id BE671160B65 for ; Thu, 2 Feb 2017 21:15:54 +0100 (CET) Received: (qmail 76442 invoked by uid 500); 2 Feb 2017 20:15:53 -0000 Mailing-List: contact commits-help@lucenenet.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: lucene-net-dev@lucenenet.apache.org Delivered-To: mailing list commits@lucenenet.apache.org Received: (qmail 76273 invoked by uid 99); 2 Feb 2017 20:15:53 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 02 Feb 2017 20:15:53 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 3E562DFF6D; Thu, 2 Feb 2017 20:15:53 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: nightowl888@apache.org To: commits@lucenenet.apache.org Date: Thu, 02 Feb 2017 20:15:56 -0000 Message-Id: In-Reply-To: References: X-Mailer: ASF-Git Admin Mailer Subject: [4/6] lucenenet git commit: Lucene.Net.Analysis.CommonGrams refactor: member accessibility and documentation comments archived-at: Thu, 02 Feb 2017 20:15:56 -0000 Lucene.Net.Analysis.CommonGrams refactor: member accessibility and documentation comments Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/487927c0 Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/487927c0 Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/487927c0 Branch: refs/heads/api-work Commit: 487927c003fd5d42e4b72d642278683ca0d31aec Parents: 3e97f31 Author: Shad Storhaug Authored: Thu Feb 2 23:43:22 2017 +0700 Committer: Shad Storhaug Committed: Fri Feb 3 01:13:43 2017 +0700 ---------------------------------------------------------------------- .../Analysis/CommonGrams/CommonGramsFilter.cs | 49 +++++++++------ .../CommonGrams/CommonGramsFilterFactory.cs | 9 +-- .../CommonGrams/CommonGramsQueryFilter.cs | 63 ++++++++++++-------- .../CommonGramsQueryFilterFactory.cs | 12 ++-- src/Lucene.Net.Core/Analysis/TokenFilter.cs | 19 ++++-- src/Lucene.Net.Core/Analysis/TokenStream.cs | 14 ++--- 6 files changed, 99 insertions(+), 67 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucenenet/blob/487927c0/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilter.cs index fcfe42d..e7578be 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilter.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilter.cs @@ -31,15 +31,14 @@ namespace Lucene.Net.Analysis.CommonGrams ///

/// Construct bigrams for frequently occurring terms while indexing. Single terms /// are still indexed too, with bigrams overlaid. This is achieved through the - /// use of . Bigrams have a type - /// of Example: - ///

input:"the quick brown fox"
output:|"the","the-quick"|"brown"|"fox"|
"the-quick" has a position increment of 0 so it is in the same position - /// as "the" "the-quick" has a term.type() of "gram"

+ /// use of . Bigrams have a type + /// of Example: + /// + /// input:"the quick brown fox" + /// output:|"the","the-quick"|"brown"|"fox"| + /// "the-quick" has a position increment of 0 so it is in the same position + /// as "the" "the-quick" has a term.type() of "gram" + /// ///

/* @@ -47,7 +46,6 @@ namespace Lucene.Net.Analysis.CommonGrams */ public sealed class CommonGramsFilter : TokenFilter { - public const string GRAM_TYPE = "gram"; private const char SEPARATOR = '_'; @@ -71,7 +69,7 @@ namespace Lucene.Net.Analysis.CommonGrams /// bigrams with position increment 0 type=gram where one or both of the words /// in a potential bigram are in the set of common words . /// - /// TokenStream input in filter chain + /// input in filter chain /// The set of common words. public CommonGramsFilter(LuceneVersion matchVersion, TokenStream input, CharArraySet commonWords) : base(input) @@ -89,11 +87,11 @@ namespace Lucene.Net.Analysis.CommonGrams /// output the token. If the token and/or the following token are in the list /// of common words also output a bigram with position increment 0 and /// type="gram" - /// + /// /// TODO:Consider adding an option to not emit unigram stopwords - /// as in CDL XTF BigramStopFilter, CommonGramsQueryFilter would need to be + /// as in CDL XTF BigramStopFilter, would need to be /// changed to work with this. - /// + /// /// TODO: Consider optimizing for the case of three /// commongrams i.e "man of the year" normally produces 3 bigrams: "man-of", /// "of-the", "the-year" but with proper management of positions we could @@ -119,7 +117,7 @@ namespace Lucene.Net.Analysis.CommonGrams * When valid, the buffer always contains at least the separator. * If its empty, there is nothing before this stopword. */ - if (lastWasCommon || (Common && buffer.Length > 0)) + if (lastWasCommon || (IsCommon && buffer.Length > 0)) { savedState = CaptureState(); GramToken(); @@ -131,8 +129,21 @@ namespace Lucene.Net.Analysis.CommonGrams } ///

- /// {@inheritDoc} + /// This method is called by a consumer before it begins consumption using + /// . + /// + /// Resets this stream to a clean state. Stateful implementations must implement + /// this method so that they can be reused, just as if they had been created fresh. + /// + /// If you override this method, always call base.Reset(), otherwise + /// some internal state will not be correctly reset (e.g., will + /// throw on further usage). ///

/// Determines if the current token is a common term ///

- /// {@code true} if the current token is a common term, {@code false} otherwise - private bool Common + /// true if the current token is a common term, false otherwise + private bool IsCommon { get { @@ -164,7 +175,7 @@ namespace Lucene.Net.Analysis.CommonGrams buffer.Append(termAttribute.Buffer, 0, termAttribute.Length); buffer.Append(SEPARATOR); lastStartOffset = offsetAttribute.StartOffset; - lastWasCommon = Common; + lastWasCommon = IsCommon; } ///

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/487927c0/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilterFactory.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilterFactory.cs index f63a71f..333ac68 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilterFactory.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilterFactory.cs @@ -23,14 +23,15 @@ namespace Lucene.Net.Analysis.CommonGrams */ ///

- /// Constructs a . - ///

+    /// Constructs a .
+    /// 
     /// <fieldType name="text_cmmngrms" class="solr.TextField" positionIncrementGap="100">
     ///   <analyzer>
     ///     <tokenizer class="solr.WhitespaceTokenizerFactory"/>
     ///     <filter class="solr.CommonGramsFilterFactory" words="commongramsstopwords.txt" ignoreCase="false"/>
     ///   </analyzer>
-    /// </fieldType>


+    /// </fieldType>
+    ///

///

public class CommonGramsFilterFactory : TokenFilterFactory, IResourceLoaderAware { @@ -41,7 +42,7 @@ namespace Lucene.Net.Analysis.CommonGrams private readonly bool ignoreCase; ///

- /// Creates a new CommonGramsFilterFactory

+ /// Creates a new

public CommonGramsFilterFactory(IDictionary args) : base(args) { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/487927c0/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilter.cs index 366621d..32039ca 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilter.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilter.cs @@ -18,28 +18,26 @@ namespace Lucene.Net.Analysis.CommonGrams * See the License for the specific language governing permissions and * limitations under the License. */ - + ///

- /// Wrap a CommonGramsFilter optimizing phrase queries by only returning single + /// Wrap a optimizing phrase queries by only returning single /// words when they are not a member of a bigram. - /// + /// /// Example: - ///

query input to CommonGramsFilter: "the rain in spain falls mainly" - ///
output of CommomGramsFilter/input to CommonGramsQueryFilter: - /// |"the, "the-rain"|"rain" "rain-in"|"in, "in-spain"|"spain"|"falls"|"mainly" - ///
output of CommonGramsQueryFilter:"the-rain", "rain-in" ,"in-spain", - /// "falls", "mainly" - ///

+ /// + /// query input to CommonGramsFilter: "the rain in spain falls mainly" + /// output of CommomGramsFilter/input to CommonGramsQueryFilter: + /// |"the, "the-rain"|"rain" "rain-in"|"in, "in-spain"|"spain"|"falls"|"mainly" + /// output of CommonGramsQueryFilter:"the-rain", "rain-in" ,"in-spain", + /// "falls", "mainly" + /// ///

- - /* - * See:http://hudson.zones.apache.org/hudson/job/Lucene-trunk/javadoc//all/org/apache/lucene/analysis/TokenStream.html and - * http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/package.html?revision=718798 - */ + /// + /// See:http://hudson.zones.apache.org/hudson/job/Lucene-trunk/javadoc//all/org/apache/lucene/analysis/TokenStream.html and + /// http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/package.html?revision=718798 + /// public sealed class CommonGramsQueryFilter : TokenFilter { - private readonly ITypeAttribute typeAttribute; private readonly IPositionIncrementAttribute posIncAttribute; @@ -59,8 +57,21 @@ namespace Lucene.Net.Analysis.CommonGrams } ///

/// Output bigrams whenever possible to optimize queries. Only output unigrams /// when they are not a member of a bigram. Example: - ///

input: "the rain in spain falls mainly" - ///
output:"the-rain", "rain-in" ,"in-spain", "falls", "mainly" - ///

+ /// + /// input: "the rain in spain falls mainly" + /// output:"the-rain", "rain-in" ,"in-spain", "falls", "mainly" + /// ///

public override bool IncrementToken() { @@ -83,13 +94,13 @@ namespace Lucene.Net.Analysis.CommonGrams { State current = CaptureState(); - if (previous != null && !GramType) + if (previous != null && !IsGramType) { RestoreState(previous); previous = current; previousType = typeAttribute.Type; - if (GramType) + if (IsGramType) { posIncAttribute.PositionIncrement = 1; } @@ -109,7 +120,7 @@ namespace Lucene.Net.Analysis.CommonGrams RestoreState(previous); previous = null; - if (GramType) + if (IsGramType) { posIncAttribute.PositionIncrement = 1; } @@ -121,8 +132,8 @@ namespace Lucene.Net.Analysis.CommonGrams ///

/// Convenience method to check if the current type is a gram type ///

- /// {@code true} if the current type is a gram type, {@code false} otherwise - public bool GramType + /// true if the current type is a gram type, false otherwise + public bool IsGramType { get { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/487927c0/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilterFactory.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilterFactory.cs index f797390..1e067e9 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilterFactory.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilterFactory.cs @@ -21,28 +21,28 @@ namespace Lucene.Net.Analysis.CommonGrams ///

- /// Construct . + /// Construct . /// - ///

+    /// 
     /// <fieldType name="text_cmmngrmsqry" class="solr.TextField" positionIncrementGap="100">
     ///   <analyzer>
     ///     <tokenizer class="solr.WhitespaceTokenizerFactory"/>
     ///     <filter class="solr.CommonGramsQueryFilterFactory" words="commongramsquerystopwords.txt" ignoreCase="false"/>
     ///   </analyzer>
-    /// </fieldType>


+    /// </fieldType>
+    ///

///

public class CommonGramsQueryFilterFactory : CommonGramsFilterFactory { - ///

- /// Creates a new CommonGramsQueryFilterFactory

+ /// Creates a new public CommonGramsQueryFilterFactory(IDictionary args) : base(args) { } ///

- /// Create a CommonGramsFilter and wrap it with a CommonGramsQueryFilter + /// Create a and wrap it with a ///

public override TokenStream Create(TokenStream input) { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/487927c0/src/Lucene.Net.Core/Analysis/TokenFilter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Core/Analysis/TokenFilter.cs b/src/Lucene.Net.Core/Analysis/TokenFilter.cs index d6a96fb..b082d6a 100644 --- a/src/Lucene.Net.Core/Analysis/TokenFilter.cs +++ b/src/Lucene.Net.Core/Analysis/TokenFilter.cs @@ -61,12 +61,21 @@ namespace Lucene.Net.Analysis } ///

- /// {@inheritDoc} - ///

- /// NOTE: - /// The default implementation chains the call to the input TokenStream, so - /// be sure to call super.reset() when overriding this method. + /// This method is called by a consumer before it begins consumption using + /// . + /// + /// Resets this stream to a clean state. Stateful implementations must implement + /// this method so that they can be reused, just as if they had been created fresh. + /// + /// If you override this method, always call base.Reset(), otherwise + /// some internal state will not be correctly reset (e.g., will + /// throw on further usage). ///

+ /// + /// NOTE: + /// The default implementation chains the call to the input , so + /// be sure to call base.Reset() when overriding this method. + /// public override void Reset() { m_input.Reset(); http://git-wip-us.apache.org/repos/asf/lucenenet/blob/487927c0/src/Lucene.Net.Core/Analysis/TokenStream.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Core/Analysis/TokenStream.cs b/src/Lucene.Net.Core/Analysis/TokenStream.cs index 90bf3f2..1e104e9 100644 --- a/src/Lucene.Net.Core/Analysis/TokenStream.cs +++ b/src/Lucene.Net.Core/Analysis/TokenStream.cs @@ -183,15 +183,15 @@ namespace Lucene.Net.Analysis } ///

- /// this method is called by a consumer before it begins consumption using - /// . - ///

+ /// This method is called by a consumer before it begins consumption using + /// . + /// /// Resets this stream to a clean state. Stateful implementations must implement /// this method so that they can be reused, just as if they had been created fresh. - ///

- /// If you override this method, always call {@code super.reset()}, otherwise - /// some internal state will not be correctly reset (e.g., will - /// throw on further usage). + /// + /// If you override this method, always call base.Reset(), otherwise + /// some internal state will not be correctly reset (e.g., will + /// throw on further usage). ///

public virtual void Reset() {