Return-Path: X-Original-To: apmail-lucenenet-commits-archive@www.apache.org Delivered-To: apmail-lucenenet-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id C786F17C9B for ; Sun, 25 Jan 2015 13:46:42 +0000 (UTC) Received: (qmail 72914 invoked by uid 500); 25 Jan 2015 13:46:43 -0000 Delivered-To: apmail-lucenenet-commits-archive@lucenenet.apache.org Received: (qmail 72886 invoked by uid 500); 25 Jan 2015 13:46:43 -0000 Mailing-List: contact commits-help@lucenenet.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: lucene-net-dev@lucenenet.apache.org Delivered-To: mailing list commits@lucenenet.apache.org Received: (qmail 72877 invoked by uid 99); 25 Jan 2015 13:46:42 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 25 Jan 2015 13:46:42 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 0E0DAE03EC; Sun, 25 Jan 2015 13:46:35 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit From: synhershko@apache.org To: commits@lucenenet.apache.org Date: Sun, 25 Jan 2015 13:46:36 -0000 Message-Id: <645ac555d39e434b822ee623925a3ec9@git.apache.org> In-Reply-To: References: X-Mailer: ASF-Git Admin Mailer Subject: [2/2] lucenenet git commit: More porting work More porting work Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/56bfeaab Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/56bfeaab Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/56bfeaab Branch: refs/heads/master Commit: 56bfeaab22154916e96433eb91572f26d04d1ef2 Parents: 1b806eb Author: Itamar Syn-Hershko Authored: Sun Jan 25 15:46:16 2015 +0200 Committer: Itamar Syn-Hershko Committed: Sun Jan 25 15:46:16 2015 +0200 ---------------------------------------------------------------------- .../CodepointCountFilterFactory.cs | 13 +- .../Analysis/Miscellaneous/EmptyTokenStream.cs | 6 +- .../Miscellaneous/HyphenatedWordsFilter.cs | 311 ++++++----- .../HyphenatedWordsFilterFactory.cs | 11 +- .../Analysis/Miscellaneous/KeepWordFilter.cs | 5 +- .../Miscellaneous/KeywordMarkerFilter.cs | 114 ++-- .../Analysis/Miscellaneous/LengthFilter.cs | 20 +- .../Miscellaneous/LengthFilterFactory.cs | 1 + .../Analysis/Miscellaneous/PatternAnalyzer.cs | 13 +- .../Miscellaneous/PatternKeywordMarkerFilter.cs | 4 +- .../Analysis/Path/PathHierarchyTokenizer.cs | 476 ++++++++--------- .../Path/PathHierarchyTokenizerFactory.cs | 191 ++++--- .../Path/ReversePathHierarchyTokenizer.cs | 421 +++++++-------- .../Analysis/Position/PositionFilterFactory.cs | 7 +- .../Analysis/Query/QueryAutoStopWordAnalyzer.cs | 10 +- .../Analysis/Sinks/DateRecognizerSinkFilter.cs | 17 +- .../Analysis/Sinks/TeeSinkTokenFilter.cs | 521 +++++++++---------- .../Analysis/Sinks/TokenRangeSinkFilter.cs | 127 +++-- .../Analysis/Sinks/TokenTypeSinkFilter.cs | 91 ++-- .../Analysis/Standard/ClassicTokenizerImpl.cs | 16 +- .../Analysis/Standard/StandardAnalyzer.cs | 11 +- .../Analysis/Synonym/FSTSynonymFilterFactory.cs | 335 ++++++------ .../Analysis/Util/AbstractAnalysisFactory.cs | 30 +- .../Analysis/Util/CharArrayMap.cs | 23 +- .../Analysis/Util/CharArraySet.cs | 17 +- .../Support/Compatibility/Collections.cs | 7 + src/Lucene.Net.Core/Util/Version.cs | 2 +- 27 files changed, 1365 insertions(+), 1435 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucenenet/blob/56bfeaab/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilterFactory.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilterFactory.cs index e85fd1e..bb37bd1 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilterFactory.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilterFactory.cs @@ -1,7 +1,8 @@ using System.Collections.Generic; -using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory; +using Lucene.Net.Analysis.Util; +using org.apache.lucene.analysis.miscellaneous; -namespace org.apache.lucene.analysis.miscellaneous +namespace Lucene.Net.Analysis.Miscellaneous { /* @@ -20,10 +21,7 @@ namespace org.apache.lucene.analysis.miscellaneous * See the License for the specific language governing permissions and * limitations under the License. */ - - using TokenFilterFactory = TokenFilterFactory; - - /// + /// /// Factory for . ///
 	/// <fieldType name="text_lngth" class="solr.TextField" positionIncrementGap="100">
@@ -52,10 +50,9 @@ namespace org.apache.lucene.analysis.miscellaneous
 		}
 	  }
 
-	  public override CodepointCountFilter create(TokenStream input)
+	  public override CodepointCountFilter Create(TokenStream input)
 	  {
 		return new CodepointCountFilter(luceneMatchVersion, input, min, max);
 	  }
 	}
-
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/56bfeaab/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/EmptyTokenStream.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/EmptyTokenStream.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/EmptyTokenStream.cs
index 38af481..ef84806 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/EmptyTokenStream.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/EmptyTokenStream.cs
@@ -1,4 +1,4 @@
-namespace org.apache.lucene.analysis.miscellaneous
+namespace Lucene.Net.Analysis.Miscellaneous
 {
 
 	/*
@@ -24,11 +24,9 @@
 	public sealed class EmptyTokenStream : TokenStream
 	{
 
-	  public override bool incrementToken()
+	  public override bool IncrementToken()
 	  {
 		return false;
 	  }
-
 	}
-
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/56bfeaab/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilter.cs
index 96a2dfa..022ee31 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilter.cs
@@ -1,164 +1,159 @@
 using System.Text;
+using Lucene.Net.Analysis.Tokenattributes;
 
-namespace org.apache.lucene.analysis.miscellaneous
+namespace Lucene.Net.Analysis.Miscellaneous
 {
 
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
-
-	using org.apache.lucene.analysis;
-	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-
-	/// 
-	/// When the plain text is extracted from documents, we will often have many words hyphenated and broken into
-	/// two lines. This is often the case with documents where narrow text columns are used, such as newsletters.
-	/// In order to increase search efficiency, this filter puts hyphenated words broken into two lines back together.
-	/// This filter should be used on indexing time only.
-	/// Example field definition in schema.xml:
-	/// 
-	/// <fieldtype name="text" class="solr.TextField" positionIncrementGap="100">
-	///  <analyzer type="index">
-	///    <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-	///      <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
-	///      <filter class="solr.StopFilterFactory" ignoreCase="true"/>
-	///      <filter class="solr.HyphenatedWordsFilterFactory"/>
-	///      <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
-	///      <filter class="solr.LowerCaseFilterFactory"/>
-	///      <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
-	///  </analyzer>
-	///  <analyzer type="query">
-	///      <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-	///      <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
-	///      <filter class="solr.StopFilterFactory" ignoreCase="true"/>
-	///      <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
-	///      <filter class="solr.LowerCaseFilterFactory"/>
-	///      <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
-	///  </analyzer>
-	/// </fieldtype>
-	/// 
- /// - ///
- public sealed class HyphenatedWordsFilter : TokenFilter - { - - private readonly CharTermAttribute termAttribute = addAttribute(typeof(CharTermAttribute)); - private readonly OffsetAttribute offsetAttribute = addAttribute(typeof(OffsetAttribute)); - - private readonly StringBuilder hyphenated = new StringBuilder(); - private State savedState; - private bool exhausted = false; - private int lastEndOffset = 0; - - /// - /// Creates a new HyphenatedWordsFilter - /// - /// TokenStream that will be filtered - public HyphenatedWordsFilter(TokenStream @in) : base(@in) - { - } - - /// - /// {@inheritDoc} - /// -//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: -//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException - public override bool incrementToken() - { - while (!exhausted && input.incrementToken()) - { - char[] term = termAttribute.buffer(); - int termLength = termAttribute.length(); - lastEndOffset = offsetAttribute.endOffset(); - - if (termLength > 0 && term[termLength - 1] == '-') - { - // a hyphenated word - // capture the state of the first token only - if (savedState == null) - { - savedState = captureState(); - } - hyphenated.Append(term, 0, termLength - 1); - } - else if (savedState == null) - { - // not part of a hyphenated word. - return true; - } - else - { - // the final portion of a hyphenated word - hyphenated.Append(term, 0, termLength); - unhyphenate(); - return true; - } - } - - exhausted = true; - - if (savedState != null) - { - // the final term ends with a hyphen - // add back the hyphen, for backwards compatibility. - hyphenated.Append('-'); - unhyphenate(); - return true; - } - - return false; - } - - /// - /// {@inheritDoc} - /// -//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: -//ORIGINAL LINE: @Override public void reset() throws java.io.IOException - public override void reset() - { - base.reset(); - hyphenated.Length = 0; - savedState = null; - exhausted = false; - lastEndOffset = 0; - } - - // ================================================= Helper Methods ================================================ - - /// - /// Writes the joined unhyphenated term - /// - private void unhyphenate() - { - restoreState(savedState); - savedState = null; - - char[] term = termAttribute.buffer(); - int length = hyphenated.Length; - if (length > termAttribute.length()) - { - term = termAttribute.resizeBuffer(length); - } - - hyphenated.getChars(0, length, term, 0); - termAttribute.Length = length; - offsetAttribute.setOffset(offsetAttribute.startOffset(), lastEndOffset); - hyphenated.Length = 0; - } - } + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + /// + /// When the plain text is extracted from documents, we will often have many words hyphenated and broken into + /// two lines. This is often the case with documents where narrow text columns are used, such as newsletters. + /// In order to increase search efficiency, this filter puts hyphenated words broken into two lines back together. + /// This filter should be used on indexing time only. + /// Example field definition in schema.xml: + ///
+    /// <fieldtype name="text" class="solr.TextField" positionIncrementGap="100">
+    ///  <analyzer type="index">
+    ///    <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+    ///      <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
+    ///      <filter class="solr.StopFilterFactory" ignoreCase="true"/>
+    ///      <filter class="solr.HyphenatedWordsFilterFactory"/>
+    ///      <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
+    ///      <filter class="solr.LowerCaseFilterFactory"/>
+    ///      <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+    ///  </analyzer>
+    ///  <analyzer type="query">
+    ///      <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+    ///      <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+    ///      <filter class="solr.StopFilterFactory" ignoreCase="true"/>
+    ///      <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
+    ///      <filter class="solr.LowerCaseFilterFactory"/>
+    ///      <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+    ///  </analyzer>
+    /// </fieldtype>
+    /// 
+ /// + ///
+ public sealed class HyphenatedWordsFilter : TokenFilter + { + + private readonly ICharTermAttribute termAttribute; + private readonly IOffsetAttribute offsetAttribute; + + private readonly StringBuilder hyphenated = new StringBuilder(); + private State savedState; + private bool exhausted = false; + private int lastEndOffset = 0; + + /// + /// Creates a new HyphenatedWordsFilter + /// + /// TokenStream that will be filtered + public HyphenatedWordsFilter(TokenStream @in) + : base(@in) + { + termAttribute = AddAttribute(); + offsetAttribute = AddAttribute(); + } + + /// + /// {@inheritDoc} + /// + public override bool IncrementToken() + { + while (!exhausted && input.IncrementToken()) + { + char[] term = termAttribute.Buffer(); + int termLength = termAttribute.Length; + lastEndOffset = offsetAttribute.EndOffset(); + + if (termLength > 0 && term[termLength - 1] == '-') + { + // a hyphenated word + // capture the state of the first token only + if (savedState == null) + { + savedState = CaptureState(); + } + hyphenated.Append(term, 0, termLength - 1); + } + else if (savedState == null) + { + // not part of a hyphenated word. + return true; + } + else + { + // the final portion of a hyphenated word + hyphenated.Append(term, 0, termLength); + Unhyphenate(); + return true; + } + } + + exhausted = true; + + if (savedState != null) + { + // the final term ends with a hyphen + // add back the hyphen, for backwards compatibility. + hyphenated.Append('-'); + Unhyphenate(); + return true; + } + + return false; + } + + /// + /// {@inheritDoc} + /// + public override void Reset() + { + base.Reset(); + hyphenated.Length = 0; + savedState = null; + exhausted = false; + lastEndOffset = 0; + } + + // ================================================= Helper Methods ================================================ + + /// + /// Writes the joined unhyphenated term + /// + private void Unhyphenate() + { + RestoreState(savedState); + savedState = null; + + char[] term = termAttribute.Buffer(); + int length = hyphenated.Length; + if (length > termAttribute.Length) + { + term = termAttribute.ResizeBuffer(length); + } + + hyphenated.GetChars(0, length, term, 0); + termAttribute.Length = length; + offsetAttribute.SetOffset(offsetAttribute.StartOffset(), lastEndOffset); + hyphenated.Length = 0; + } + } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/56bfeaab/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilterFactory.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilterFactory.cs index 946cd57..b274564 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilterFactory.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilterFactory.cs @@ -1,7 +1,7 @@ using System.Collections.Generic; -using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory; +using Lucene.Net.Analysis.Util; -namespace org.apache.lucene.analysis.miscellaneous +namespace Lucene.Net.Analysis.Miscellaneous { /* @@ -20,10 +20,7 @@ namespace org.apache.lucene.analysis.miscellaneous * See the License for the specific language governing permissions and * limitations under the License. */ - - using TokenFilterFactory = TokenFilterFactory; - - /// + /// /// Factory for . ///
 	/// <fieldType name="text_hyphn" class="solr.TextField" positionIncrementGap="100">
@@ -46,7 +43,7 @@ namespace org.apache.lucene.analysis.miscellaneous
 		}
 	  }
 
-	  public override HyphenatedWordsFilter create(TokenStream input)
+	  public override TokenStream Create(TokenStream input)
 	  {
 		return new HyphenatedWordsFilter(input);
 	  }

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/56bfeaab/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs
index f9f9a53..b699de1 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs
@@ -30,7 +30,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
     public sealed class KeepWordFilter : FilteringTokenFilter
     {
         private readonly CharArraySet words;
-        private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+        private readonly ICharTermAttribute termAtt;
 
         /// @deprecated enablePositionIncrements=false is not supported anymore as of Lucene 4.4. 
         [Obsolete("enablePositionIncrements=false is not supported anymore as of Lucene 4.4.")]
@@ -38,6 +38,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
             : base(version, enablePositionIncrements, @in)
         {
             this.words = words;
+            termAtt = AddAttribute();
         }
 
         /// 
@@ -55,7 +56,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
             this.words = words;
         }
 
-        public override bool Accept()
+        protected internal override bool Accept()
         {
             return words.Contains(termAtt.Buffer(), 0, termAtt.Length);
         }

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/56bfeaab/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilter.cs
index 8918274..6403e57 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilter.cs
@@ -1,61 +1,59 @@
-namespace org.apache.lucene.analysis.miscellaneous
-{
-
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
-
-	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
-
-	/// 
-	/// Marks terms as keywords via the .
-	/// 
-	/// 
-	public abstract class KeywordMarkerFilter : TokenFilter
-	{
-
-	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+using Lucene.Net.Analysis.Tokenattributes;
 
-	  /// 
-	  /// Creates a new  
-	  ///  the input stream 
-	  protected internal KeywordMarkerFilter(TokenStream @in) : base(@in)
-	  {
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
-	  public override bool incrementToken()
-	  {
-		if (input.incrementToken())
-		{
-		  if (Keyword)
-		  {
-			keywordAttr.Keyword = true;
-		  }
-		  return true;
-		}
-		else
-		{
-		  return false;
-		}
-	  }
-
-	  protected internal abstract bool Keyword {get;}
-
-	}
+namespace Lucene.Net.Analysis.Miscellaneous
+{
 
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+    /// 
+    /// Marks terms as keywords via the .
+    /// 
+    /// 
+    public abstract class KeywordMarkerFilter : TokenFilter
+    {
+
+        private readonly IKeywordAttribute keywordAttr;
+
+        /// 
+        /// Creates a new  
+        ///  the input stream 
+        protected internal KeywordMarkerFilter(TokenStream @in)
+            : base(@in)
+        {
+            keywordAttr = AddAttribute();
+        }
+
+        public override bool IncrementToken()
+        {
+            if (input.IncrementToken())
+            {
+                if (Keyword)
+                {
+                    keywordAttr.Keyword = true;
+                }
+                return true;
+            }
+            else
+            {
+                return false;
+            }
+        }
+
+        protected internal abstract bool Keyword { get; }
+
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/56bfeaab/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilter.cs
index 802ff26..e0ba510 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilter.cs
@@ -1,7 +1,8 @@
 using System;
-using FilteringTokenFilter = Lucene.Net.Analysis.Util.FilteringTokenFilter;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Analysis.Util;
 
-namespace org.apache.lucene.analysis.miscellaneous
+namespace Lucene.Net.Analysis.Miscellaneous
 {
 
 	/*
@@ -20,12 +21,7 @@ namespace org.apache.lucene.analysis.miscellaneous
 	 * See the License for the specific language governing permissions and
 	 * limitations under the License.
 	 */
-
-	using FilteringTokenFilter = FilteringTokenFilter;
-	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-	using Version = org.apache.lucene.util.Version;
-
-	/// 
+    /// 
 	/// Removes words that are too long or too short from the stream.
 	/// 
 	/// Note: Length is calculated as the number of UTF-16 code units.
@@ -37,7 +33,7 @@ namespace org.apache.lucene.analysis.miscellaneous
 	  private readonly int min;
 	  private readonly int max;
 
-	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly ICharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
 
 	  /// @deprecated enablePositionIncrements=false is not supported anymore as of Lucene 4.4. 
 	  [Obsolete("enablePositionIncrements=false is not supported anymore as of Lucene 4.4.")]
@@ -77,11 +73,9 @@ namespace org.apache.lucene.analysis.miscellaneous
 		this.max = max;
 	  }
 
-	  public override bool accept()
+	  public override bool Accept()
 	  {
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int len = termAtt.length();
-		int len = termAtt.length();
+		int len = termAtt.Length;
 		return (len >= min && len <= max);
 	  }
 	}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/56bfeaab/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilterFactory.cs
index 6f0e4a3..afdc961 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilterFactory.cs
@@ -1,4 +1,5 @@
 using System.Collections.Generic;
+using Lucene.Net.Analysis.Miscellaneous;
 using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
 
 namespace org.apache.lucene.analysis.miscellaneous

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/56bfeaab/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
index d074038..4fe2822 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
@@ -2,7 +2,7 @@
 using Lucene.Net.Analysis.Core;
 using Lucene.Net.Analysis.Util;
 
-namespace org.apache.lucene.analysis.miscellaneous
+namespace Lucene.Net.Analysis.Miscellaneous
 {
 
 	/*
@@ -21,16 +21,7 @@ namespace org.apache.lucene.analysis.miscellaneous
 	 * See the License for the specific language governing permissions and
 	 * limitations under the License.
 	 */
-
-
-	using StopAnalyzer = StopAnalyzer;
-	using StopFilter = StopFilter;
-	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-	using CharArraySet = CharArraySet;
-	using Version = org.apache.lucene.util.Version;
-
-	/// 
+    /// 
 	/// Efficient Lucene analyzer/tokenizer that preferably operates on a String rather than a
 	/// , that can flexibly separate text into terms via a regular expression 
 	/// (with behaviour identical to ),

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/56bfeaab/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternKeywordMarkerFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternKeywordMarkerFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternKeywordMarkerFilter.cs
index 4402d5a..3886da0 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternKeywordMarkerFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternKeywordMarkerFilter.cs
@@ -1,4 +1,6 @@
-namespace org.apache.lucene.analysis.miscellaneous
+using Lucene.Net.Analysis.Miscellaneous;
+
+namespace org.apache.lucene.analysis.miscellaneous
 {
 
 	/*

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/56bfeaab/src/Lucene.Net.Analysis.Common/Analysis/Path/PathHierarchyTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Path/PathHierarchyTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Path/PathHierarchyTokenizer.cs
index b826cd6..69cc6c2 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Path/PathHierarchyTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Path/PathHierarchyTokenizer.cs
@@ -1,242 +1,242 @@
 using System.Text;
+using Lucene.Net.Analysis.Tokenattributes;
+using Reader = System.IO.TextReader;
 
-namespace org.apache.lucene.analysis.path
+namespace Lucene.Net.Analysis.Path
 {
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
-
-
-	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-
-	/// 
-	/// Tokenizer for path-like hierarchies.
-	/// 
-	/// Take something like:
-	/// 
-	/// 
-	///  /something/something/else
-	/// 
- /// - /// and make: - /// - ///
-	///  /something
-	///  /something/something
-	///  /something/something/else
-	/// 
- ///
- ///
- public class PathHierarchyTokenizer : Tokenizer - { - - public PathHierarchyTokenizer(Reader input) : this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP) - { - } - - public PathHierarchyTokenizer(Reader input, int skip) : this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, skip) - { - } - - public PathHierarchyTokenizer(Reader input, int bufferSize, char delimiter) : this(input, bufferSize, delimiter, delimiter, DEFAULT_SKIP) - { - } - - public PathHierarchyTokenizer(Reader input, char delimiter, char replacement) : this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, DEFAULT_SKIP) - { - } - - public PathHierarchyTokenizer(Reader input, char delimiter, char replacement, int skip) : this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, skip) - { - } - - public PathHierarchyTokenizer(AttributeFactory factory, Reader input, char delimiter, char replacement, int skip) : this(factory, input, DEFAULT_BUFFER_SIZE, delimiter, replacement, skip) - { - } - - public PathHierarchyTokenizer(Reader input, int bufferSize, char delimiter, char replacement, int skip) : this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, bufferSize, delimiter, replacement, skip) - { - } - - public PathHierarchyTokenizer(AttributeFactory factory, Reader input, int bufferSize, char delimiter, char replacement, int skip) : base(factory, input) - { - if (bufferSize < 0) - { - throw new System.ArgumentException("bufferSize cannot be negative"); - } - if (skip < 0) - { - throw new System.ArgumentException("skip cannot be negative"); - } - termAtt.resizeBuffer(bufferSize); - - this.delimiter = delimiter; - this.replacement = replacement; - this.skip = skip; - resultToken = new StringBuilder(bufferSize); - } - - private const int DEFAULT_BUFFER_SIZE = 1024; - public const char DEFAULT_DELIMITER = '/'; - public const int DEFAULT_SKIP = 0; - - private readonly char delimiter; - private readonly char replacement; - private readonly int skip; - - private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute)); - private readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute)); - private readonly PositionIncrementAttribute posAtt = addAttribute(typeof(PositionIncrementAttribute)); - private int startPosition = 0; - private int skipped = 0; - private bool endDelimiter = false; - private StringBuilder resultToken; - - private int charsRead = 0; - - -//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: -//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException - public override bool incrementToken() - { - clearAttributes(); - termAtt.append(resultToken); - if (resultToken.Length == 0) - { - posAtt.PositionIncrement = 1; - } - else - { - posAtt.PositionIncrement = 0; - } - int length = 0; - bool added = false; - if (endDelimiter) - { - termAtt.append(replacement); - length++; - endDelimiter = false; - added = true; - } - - while (true) - { - int c = input.read(); - if (c >= 0) - { - charsRead++; - } - else - { - if (skipped > skip) - { - length += resultToken.Length; - termAtt.Length = length; - offsetAtt.setOffset(correctOffset(startPosition), correctOffset(startPosition + length)); - if (added) - { - resultToken.Length = 0; - resultToken.Append(termAtt.buffer(), 0, length); - } - return added; - } - else - { - return false; - } - } - if (!added) - { - added = true; - skipped++; - if (skipped > skip) - { - termAtt.append(c == delimiter ? replacement : (char)c); - length++; - } - else - { - startPosition++; - } - } - else - { - if (c == delimiter) - { - if (skipped > skip) - { - endDelimiter = true; - break; - } - skipped++; - if (skipped > skip) - { - termAtt.append(replacement); - length++; - } - else - { - startPosition++; - } - } - else - { - if (skipped > skip) - { - termAtt.append((char)c); - length++; - } - else - { - startPosition++; - } - } - } - } - length += resultToken.Length; - termAtt.Length = length; - offsetAtt.setOffset(correctOffset(startPosition), correctOffset(startPosition + length)); - resultToken.Length = 0; - resultToken.Append(termAtt.buffer(), 0, length); - return true; - } - -//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: -//ORIGINAL LINE: @Override public final void end() throws java.io.IOException - public override void end() - { - base.end(); - // set final offset - int finalOffset = correctOffset(charsRead); - offsetAtt.setOffset(finalOffset, finalOffset); - } - -//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: -//ORIGINAL LINE: @Override public void reset() throws java.io.IOException - public override void reset() - { - base.reset(); - resultToken.Length = 0; - charsRead = 0; - endDelimiter = false; - skipped = 0; - startPosition = 0; - } - } - + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + /// + /// Tokenizer for path-like hierarchies. + /// + /// Take something like: + /// + ///
+    ///  /something/something/else
+    /// 
+ /// + /// and make: + /// + ///
+    ///  /something
+    ///  /something/something
+    ///  /something/something/else
+    /// 
+ ///
+ ///
+ public class PathHierarchyTokenizer : Tokenizer + { + + public PathHierarchyTokenizer(Reader input) + : this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP) + { + } + + public PathHierarchyTokenizer(Reader input, int skip) + : this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, skip) + { + } + + public PathHierarchyTokenizer(Reader input, int bufferSize, char delimiter) + : this(input, bufferSize, delimiter, delimiter, DEFAULT_SKIP) + { + } + + public PathHierarchyTokenizer(Reader input, char delimiter, char replacement) + : this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, DEFAULT_SKIP) + { + } + + public PathHierarchyTokenizer(Reader input, char delimiter, char replacement, int skip) + : this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, skip) + { + } + + public PathHierarchyTokenizer(AttributeFactory factory, Reader input, char delimiter, char replacement, int skip) + : this(factory, input, DEFAULT_BUFFER_SIZE, delimiter, replacement, skip) + { + } + + public PathHierarchyTokenizer(Reader input, int bufferSize, char delimiter, char replacement, int skip) + : this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, bufferSize, delimiter, replacement, skip) + { + } + + public PathHierarchyTokenizer(AttributeFactory factory, Reader input, int bufferSize, char delimiter, char replacement, int skip) + : base(factory, input) + { + if (bufferSize < 0) + { + throw new System.ArgumentException("bufferSize cannot be negative"); + } + if (skip < 0) + { + throw new System.ArgumentException("skip cannot be negative"); + } + + offsetAtt = AddAttribute(); + posAtt = AddAttribute(); + termAtt = AddAttribute(); + termAtt.ResizeBuffer(bufferSize); + + this.delimiter = delimiter; + this.replacement = replacement; + this.skip = skip; + resultToken = new StringBuilder(bufferSize); + } + + private const int DEFAULT_BUFFER_SIZE = 1024; + public const char DEFAULT_DELIMITER = '/'; + public const int DEFAULT_SKIP = 0; + + private readonly char delimiter; + private readonly char replacement; + private readonly int skip; + + private readonly ICharTermAttribute termAtt; + private readonly IOffsetAttribute offsetAtt; + private readonly IPositionIncrementAttribute posAtt; + private int startPosition = 0; + private int skipped = 0; + private bool endDelimiter = false; + private readonly StringBuilder resultToken; + + private int charsRead = 0; + + public override bool IncrementToken() + { + ClearAttributes(); + termAtt.Append(resultToken); + if (resultToken.Length == 0) + { + posAtt.PositionIncrement = 1; + } + else + { + posAtt.PositionIncrement = 0; + } + int length = 0; + bool added = false; + if (endDelimiter) + { + termAtt.Append(replacement); + length++; + endDelimiter = false; + added = true; + } + + while (true) + { + int c = input.Read(); + if (c >= 0) + { + charsRead++; + } + else + { + if (skipped > skip) + { + length += resultToken.Length; + termAtt.Length = length; + offsetAtt.SetOffset(CorrectOffset(startPosition), CorrectOffset(startPosition + length)); + if (added) + { + resultToken.Length = 0; + resultToken.Append(termAtt.Buffer(), 0, length); + } + return added; + } + else + { + return false; + } + } + if (!added) + { + added = true; + skipped++; + if (skipped > skip) + { + termAtt.Append(c == delimiter ? replacement : (char)c); + length++; + } + else + { + startPosition++; + } + } + else + { + if (c == delimiter) + { + if (skipped > skip) + { + endDelimiter = true; + break; + } + skipped++; + if (skipped > skip) + { + termAtt.Append(replacement); + length++; + } + else + { + startPosition++; + } + } + else + { + if (skipped > skip) + { + termAtt.Append((char)c); + length++; + } + else + { + startPosition++; + } + } + } + } + length += resultToken.Length; + termAtt.Length = length; + offsetAtt.SetOffset(CorrectOffset(startPosition), CorrectOffset(startPosition + length)); + resultToken.Length = 0; + resultToken.Append(termAtt.Buffer(), 0, length); + return true; + } + + public override void End() + { + base.End(); + // set final offset + int finalOffset = CorrectOffset(charsRead); + offsetAtt.SetOffset(finalOffset, finalOffset); + } + + public override void Reset() + { + base.Reset(); + resultToken.Length = 0; + charsRead = 0; + endDelimiter = false; + skipped = 0; + startPosition = 0; + } + } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/56bfeaab/src/Lucene.Net.Analysis.Common/Analysis/Path/PathHierarchyTokenizerFactory.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Path/PathHierarchyTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Path/PathHierarchyTokenizerFactory.cs index 7dd1e62..f43772c 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Path/PathHierarchyTokenizerFactory.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Path/PathHierarchyTokenizerFactory.cs @@ -1,105 +1,100 @@ using System.Collections.Generic; -using TokenizerFactory = Lucene.Net.Analysis.Util.TokenizerFactory; +using System.IO; +using Lucene.Net.Analysis.Util; +using Lucene.Net.Util; -namespace org.apache.lucene.analysis.path +namespace Lucene.Net.Analysis.Path { - /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - - using TokenizerFactory = TokenizerFactory; - using AttributeFactory = org.apache.lucene.util.AttributeSource.AttributeFactory; - - /// - /// Factory for . - /// - /// This factory is typically configured for use only in the index - /// Analyzer (or only in the query Analyzer, but never both). - /// - /// - /// For example, in the configuration below a query for - /// Books/NonFic will match documents indexed with values like - /// Books/NonFic, Books/NonFic/Law, - /// Books/NonFic/Science/Physics, etc. But it will not match - /// documents indexed with values like Books, or - /// Books/Fic... - /// - /// - ///
-	/// <fieldType name="descendent_path" class="solr.TextField">
-	///   <analyzer type="index">
-	///     <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
-	///   </analyzer>
-	///   <analyzer type="query">
-	///     <tokenizer class="solr.KeywordTokenizerFactory" />
-	///   </analyzer>
-	/// </fieldType>
-	/// 
- /// - /// In this example however we see the oposite configuration, so that a query - /// for Books/NonFic/Science/Physics would match documents - /// containing Books/NonFic, Books/NonFic/Science, - /// or Books/NonFic/Science/Physics, but not - /// Books/NonFic/Science/Physics/Theory or - /// Books/NonFic/Law. - /// - ///
-	/// <fieldType name="descendent_path" class="solr.TextField">
-	///   <analyzer type="index">
-	///     <tokenizer class="solr.KeywordTokenizerFactory" />
-	///   </analyzer>
-	///   <analyzer type="query">
-	///     <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
-	///   </analyzer>
-	/// </fieldType>
-	/// 
- ///
- public class PathHierarchyTokenizerFactory : TokenizerFactory - { - private readonly char delimiter; - private readonly char replacement; - private readonly bool reverse; - private readonly int skip; - - /// - /// Creates a new PathHierarchyTokenizerFactory - public PathHierarchyTokenizerFactory(IDictionary args) : base(args) - { - delimiter = getChar(args, "delimiter", PathHierarchyTokenizer.DEFAULT_DELIMITER); - replacement = getChar(args, "replace", delimiter); - reverse = getBoolean(args, "reverse", false); - skip = getInt(args, "skip", PathHierarchyTokenizer.DEFAULT_SKIP); - if (args.Count > 0) - { - throw new System.ArgumentException("Unknown parameters: " + args); - } - } - - public override Tokenizer create(AttributeFactory factory, Reader input) - { - if (reverse) - { - return new ReversePathHierarchyTokenizer(factory, input, delimiter, replacement, skip); - } - return new PathHierarchyTokenizer(factory, input, delimiter, replacement, skip); - } - } - + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + /// + /// Factory for . + /// + /// This factory is typically configured for use only in the index + /// Analyzer (or only in the query Analyzer, but never both). + /// + /// + /// For example, in the configuration below a query for + /// Books/NonFic will match documents indexed with values like + /// Books/NonFic, Books/NonFic/Law, + /// Books/NonFic/Science/Physics, etc. But it will not match + /// documents indexed with values like Books, or + /// Books/Fic... + /// + /// + ///
+    /// <fieldType name="descendent_path" class="solr.TextField">
+    ///   <analyzer type="index">
+    ///     <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
+    ///   </analyzer>
+    ///   <analyzer type="query">
+    ///     <tokenizer class="solr.KeywordTokenizerFactory" />
+    ///   </analyzer>
+    /// </fieldType>
+    /// 
+ /// + /// In this example however we see the oposite configuration, so that a query + /// for Books/NonFic/Science/Physics would match documents + /// containing Books/NonFic, Books/NonFic/Science, + /// or Books/NonFic/Science/Physics, but not + /// Books/NonFic/Science/Physics/Theory or + /// Books/NonFic/Law. + /// + ///
+    /// <fieldType name="descendent_path" class="solr.TextField">
+    ///   <analyzer type="index">
+    ///     <tokenizer class="solr.KeywordTokenizerFactory" />
+    ///   </analyzer>
+    ///   <analyzer type="query">
+    ///     <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
+    ///   </analyzer>
+    /// </fieldType>
+    /// 
+ ///
+ public class PathHierarchyTokenizerFactory : TokenizerFactory + { + private readonly char delimiter; + private readonly char replacement; + private readonly bool reverse; + private readonly int skip; + /// + /// Creates a new PathHierarchyTokenizerFactory + public PathHierarchyTokenizerFactory(IDictionary args) + : base(args) + { + delimiter = getChar(args, "delimiter", PathHierarchyTokenizer.DEFAULT_DELIMITER); + replacement = getChar(args, "replace", delimiter); + reverse = getBoolean(args, "reverse", false); + skip = getInt(args, "skip", PathHierarchyTokenizer.DEFAULT_SKIP); + if (args.Count > 0) + { + throw new System.ArgumentException("Unknown parameters: " + args); + } + } + public override Tokenizer Create(AttributeSource.AttributeFactory factory, TextReader input) + { + if (reverse) + { + return new ReversePathHierarchyTokenizer(factory, input, delimiter, replacement, skip); + } + return new PathHierarchyTokenizer(factory, input, delimiter, replacement, skip); + } + } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/56bfeaab/src/Lucene.Net.Analysis.Common/Analysis/Path/ReversePathHierarchyTokenizer.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Path/ReversePathHierarchyTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Path/ReversePathHierarchyTokenizer.cs index 00b5880..47a5d0f 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Path/ReversePathHierarchyTokenizer.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Path/ReversePathHierarchyTokenizer.cs @@ -1,214 +1,217 @@ using System.Collections.Generic; using System.Text; +using Lucene.Net.Analysis.Tokenattributes; +using Reader = System.IO.TextReader; -namespace org.apache.lucene.analysis.path +namespace Lucene.Net.Analysis.Path { - /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - - using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute; - using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute; - using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; - - /// - /// Tokenizer for domain-like hierarchies. - /// - /// Take something like: - /// - ///
-	/// www.site.co.uk
-	/// 
- /// - /// and make: - /// - ///
-	/// www.site.co.uk
-	/// site.co.uk
-	/// co.uk
-	/// uk
-	/// 
- /// - ///
- ///
- public class ReversePathHierarchyTokenizer : Tokenizer - { - - public ReversePathHierarchyTokenizer(Reader input) : this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP) - { - } - - public ReversePathHierarchyTokenizer(Reader input, int skip) : this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, skip) - { - } - - public ReversePathHierarchyTokenizer(Reader input, int bufferSize, char delimiter) : this(input, bufferSize, delimiter, delimiter, DEFAULT_SKIP) - { - } - - public ReversePathHierarchyTokenizer(Reader input, char delimiter, char replacement) : this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, DEFAULT_SKIP) - { - } - - public ReversePathHierarchyTokenizer(Reader input, int bufferSize, char delimiter, char replacement) : this(input, bufferSize, delimiter, replacement, DEFAULT_SKIP) - { - } - - public ReversePathHierarchyTokenizer(Reader input, char delimiter, int skip) : this(input, DEFAULT_BUFFER_SIZE, delimiter, delimiter, skip) - { - } - - public ReversePathHierarchyTokenizer(Reader input, char delimiter, char replacement, int skip) : this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, skip) - { - } - - public ReversePathHierarchyTokenizer(AttributeFactory factory, Reader input, char delimiter, char replacement, int skip) : this(factory, input, DEFAULT_BUFFER_SIZE, delimiter, replacement, skip) - { - } - - public ReversePathHierarchyTokenizer(Reader input, int bufferSize, char delimiter, char replacement, int skip) : this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, bufferSize, delimiter, replacement, skip) - { - } - public ReversePathHierarchyTokenizer(AttributeFactory factory, Reader input, int bufferSize, char delimiter, char replacement, int skip) : base(factory, input) - { - if (bufferSize < 0) - { - throw new System.ArgumentException("bufferSize cannot be negative"); - } - if (skip < 0) - { - throw new System.ArgumentException("skip cannot be negative"); - } - termAtt.resizeBuffer(bufferSize); - this.delimiter = delimiter; - this.replacement = replacement; - this.skip = skip; - resultToken = new StringBuilder(bufferSize); - resultTokenBuffer = new char[bufferSize]; - delimiterPositions = new List<>(bufferSize / 10); - } - - private const int DEFAULT_BUFFER_SIZE = 1024; - public const char DEFAULT_DELIMITER = '/'; - public const int DEFAULT_SKIP = 0; - - private readonly char delimiter; - private readonly char replacement; - private readonly int skip; - - private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute)); - private readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute)); - private readonly PositionIncrementAttribute posAtt = addAttribute(typeof(PositionIncrementAttribute)); - - private int endPosition = 0; - private int finalOffset = 0; - private int skipped = 0; - private StringBuilder resultToken; - - private IList delimiterPositions; - private int delimitersCount = -1; - private char[] resultTokenBuffer; - -//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: -//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException - public override bool incrementToken() - { - clearAttributes(); - if (delimitersCount == -1) - { - int length = 0; - delimiterPositions.Add(0); - while (true) - { - int c = input.read(); - if (c < 0) - { - break; - } - length++; - if (c == delimiter) - { - delimiterPositions.Add(length); - resultToken.Append(replacement); - } - else - { - resultToken.Append((char)c); - } - } - delimitersCount = delimiterPositions.Count; - if (delimiterPositions[delimitersCount - 1] < length) - { - delimiterPositions.Add(length); - delimitersCount++; - } - if (resultTokenBuffer.Length < resultToken.Length) - { - resultTokenBuffer = new char[resultToken.Length]; - } - resultToken.getChars(0, resultToken.Length, resultTokenBuffer, 0); - resultToken.Length = 0; - int idx = delimitersCount - 1 - skip; - if (idx >= 0) - { - // otherwise its ok, because we will skip and return false - endPosition = delimiterPositions[idx]; - } - finalOffset = correctOffset(length); - posAtt.PositionIncrement = 1; - } - else - { - posAtt.PositionIncrement = 0; - } - - while (skipped < delimitersCount - skip - 1) - { - int start = delimiterPositions[skipped]; - termAtt.copyBuffer(resultTokenBuffer, start, endPosition - start); - offsetAtt.setOffset(correctOffset(start), correctOffset(endPosition)); - skipped++; - return true; - } - - return false; - } - -//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: -//ORIGINAL LINE: @Override public final void end() throws java.io.IOException - public override void end() - { - base.end(); - // set final offset - offsetAtt.setOffset(finalOffset, finalOffset); - } - -//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: -//ORIGINAL LINE: @Override public void reset() throws java.io.IOException - public override void reset() - { - base.reset(); - resultToken.Length = 0; - finalOffset = 0; - endPosition = 0; - skipped = 0; - delimitersCount = -1; - delimiterPositions.Clear(); - } - } - + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + /// + /// Tokenizer for domain-like hierarchies. + /// + /// Take something like: + /// + ///
+    /// www.site.co.uk
+    /// 
+ /// + /// and make: + /// + ///
+    /// www.site.co.uk
+    /// site.co.uk
+    /// co.uk
+    /// uk
+    /// 
+ /// + ///
+ ///
+ public class ReversePathHierarchyTokenizer : Tokenizer + { + + public ReversePathHierarchyTokenizer(Reader input) + : this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP) + { + } + + public ReversePathHierarchyTokenizer(Reader input, int skip) + : this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, skip) + { + } + + public ReversePathHierarchyTokenizer(Reader input, int bufferSize, char delimiter) + : this(input, bufferSize, delimiter, delimiter, DEFAULT_SKIP) + { + } + + public ReversePathHierarchyTokenizer(Reader input, char delimiter, char replacement) + : this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, DEFAULT_SKIP) + { + } + + public ReversePathHierarchyTokenizer(Reader input, int bufferSize, char delimiter, char replacement) + : this(input, bufferSize, delimiter, replacement, DEFAULT_SKIP) + { + } + + public ReversePathHierarchyTokenizer(Reader input, char delimiter, int skip) + : this(input, DEFAULT_BUFFER_SIZE, delimiter, delimiter, skip) + { + } + + public ReversePathHierarchyTokenizer(Reader input, char delimiter, char replacement, int skip) + : this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, skip) + { + } + + public ReversePathHierarchyTokenizer(AttributeFactory factory, Reader input, char delimiter, char replacement, int skip) + : this(factory, input, DEFAULT_BUFFER_SIZE, delimiter, replacement, skip) + { + } + + public ReversePathHierarchyTokenizer(Reader input, int bufferSize, char delimiter, char replacement, int skip) + : this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, bufferSize, delimiter, replacement, skip) + { + termAtt = AddAttribute(); + offsetAtt = AddAttribute(); + posAtt = AddAttribute(); + } + + public ReversePathHierarchyTokenizer(AttributeFactory factory, Reader input, int bufferSize, char delimiter, char replacement, int skip) + : base(factory, input) + { + if (bufferSize < 0) + { + throw new System.ArgumentException("bufferSize cannot be negative"); + } + if (skip < 0) + { + throw new System.ArgumentException("skip cannot be negative"); + } + termAtt.ResizeBuffer(bufferSize); + this.delimiter = delimiter; + this.replacement = replacement; + this.skip = skip; + resultToken = new StringBuilder(bufferSize); + resultTokenBuffer = new char[bufferSize]; + delimiterPositions = new List(bufferSize / 10); + } + + private const int DEFAULT_BUFFER_SIZE = 1024; + public const char DEFAULT_DELIMITER = '/'; + public const int DEFAULT_SKIP = 0; + + private readonly char delimiter; + private readonly char replacement; + private readonly int skip; + + private readonly ICharTermAttribute termAtt; + private readonly IOffsetAttribute offsetAtt; + private readonly IPositionIncrementAttribute posAtt; + + private int endPosition = 0; + private int finalOffset = 0; + private int skipped = 0; + private readonly StringBuilder resultToken; + + private readonly IList delimiterPositions; + private int delimitersCount = -1; + private char[] resultTokenBuffer; + + public override bool IncrementToken() + { + ClearAttributes(); + if (delimitersCount == -1) + { + int length = 0; + delimiterPositions.Add(0); + while (true) + { + int c = input.Read(); + if (c < 0) + { + break; + } + length++; + if (c == delimiter) + { + delimiterPositions.Add(length); + resultToken.Append(replacement); + } + else + { + resultToken.Append((char)c); + } + } + delimitersCount = delimiterPositions.Count; + if (delimiterPositions[delimitersCount - 1] < length) + { + delimiterPositions.Add(length); + delimitersCount++; + } + if (resultTokenBuffer.Length < resultToken.Length) + { + resultTokenBuffer = new char[resultToken.Length]; + } + resultToken.GetChars(0, resultToken.Length, resultTokenBuffer, 0); + resultToken.Length = 0; + int idx = delimitersCount - 1 - skip; + if (idx >= 0) + { + // otherwise its ok, because we will skip and return false + endPosition = delimiterPositions[idx]; + } + finalOffset = CorrectOffset(length); + posAtt.PositionIncrement = 1; + } + else + { + posAtt.PositionIncrement = 0; + } + + while (skipped < delimitersCount - skip - 1) + { + var start = delimiterPositions[skipped] ?? 0; + termAtt.CopyBuffer(resultTokenBuffer, start, endPosition - start); + offsetAtt.SetOffset(CorrectOffset(start), CorrectOffset(endPosition)); + skipped++; + return true; + } + + return false; + } + + public override void End() + { + base.End(); + // set final offset + offsetAtt.SetOffset(finalOffset, finalOffset); + } + + public override void Reset() + { + base.Reset(); + resultToken.Length = 0; + finalOffset = 0; + endPosition = 0; + skipped = 0; + delimitersCount = -1; + delimiterPositions.Clear(); + } + } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/56bfeaab/src/Lucene.Net.Analysis.Common/Analysis/Position/PositionFilterFactory.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Position/PositionFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Position/PositionFilterFactory.cs index cc65164..476c7fe 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Position/PositionFilterFactory.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Position/PositionFilterFactory.cs @@ -1,6 +1,7 @@ using System; using System.Collections.Generic; using Lucene.Net.Analysis.Util; +using Lucene.Net.Util; namespace Lucene.Net.Analysis.Position { @@ -33,9 +34,7 @@ namespace Lucene.Net.Analysis.Position /// </analyzer> /// </fieldType>
///
- /// - /// @deprecated (4.4) + /// [Obsolete("(4.4)")] public class PositionFilterFactory : TokenFilterFactory { @@ -51,7 +50,7 @@ namespace Lucene.Net.Analysis.Position { throw new System.ArgumentException("Unknown parameters: " + args); } - if (luceneMatchVersion != null && luceneMatchVersion.onOrAfter(Version.LUCENE_44)) + if (luceneMatchVersion != null && luceneMatchVersion.OnOrAfter(Lucene.Net.Util.Version.LUCENE_44)) { throw new System.ArgumentException("PositionFilter is deprecated as of Lucene 4.4. You should either fix your code to not use it or use Lucene 4.3 version compatibility"); } http://git-wip-us.apache.org/repos/asf/lucenenet/blob/56bfeaab/src/Lucene.Net.Analysis.Common/Analysis/Query/QueryAutoStopWordAnalyzer.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Query/QueryAutoStopWordAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Query/QueryAutoStopWordAnalyzer.cs index be73228..548b7f6 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Query/QueryAutoStopWordAnalyzer.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Query/QueryAutoStopWordAnalyzer.cs @@ -152,12 +152,12 @@ namespace Lucene.Net.Analysis.Query protected override TokenStreamComponents WrapComponents(string fieldName, TokenStreamComponents components) { - HashSet stopWords = stopWordsPerField[fieldName]; + var stopWords = stopWordsPerField[fieldName]; if (stopWords == null) { return components; } - StopFilter stopFilter = new StopFilter(matchVersion, components.TokenStream, new CharArraySet(matchVersion, stopWords, false)); + var stopFilter = new StopFilter(matchVersion, components.TokenStream, new CharArraySet(matchVersion, stopWords, false)); return new TokenStreamComponents(components.Tokenizer, stopFilter); } @@ -168,9 +168,9 @@ namespace Lucene.Net.Analysis.Query /// method calls will be returned /// the stop words identified for a field public string[] GetStopWords(string fieldName) - { - HashSet stopWords = stopWordsPerField[fieldName]; - return stopWords != null ? stopWords.ToArray(new string[stopWords.Count]) : new string[0]; + { + var stopWords = stopWordsPerField[fieldName]; + return stopWords != null ? stopWords.ToArray() : new string[0]; } /// http://git-wip-us.apache.org/repos/asf/lucenenet/blob/56bfeaab/src/Lucene.Net.Analysis.Common/Analysis/Sinks/DateRecognizerSinkFilter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Sinks/DateRecognizerSinkFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Sinks/DateRecognizerSinkFilter.cs index a04fd51..dc080a6 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Sinks/DateRecognizerSinkFilter.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Sinks/DateRecognizerSinkFilter.cs @@ -1,6 +1,8 @@ using System; +using Lucene.Net.Analysis.Tokenattributes; +using Lucene.Net.Util; -namespace org.apache.lucene.analysis.sinks +namespace Lucene.Net.Analysis.Sinks { /* @@ -19,12 +21,7 @@ namespace org.apache.lucene.analysis.sinks * See the License for the specific language governing permissions and * limitations under the License. */ - - - using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute; - using AttributeSource = org.apache.lucene.util.AttributeSource; - - /// + /// /// Attempts to parse the as a Date using a . /// If the value is a Date, it will add it to the sink. ///

@@ -52,15 +49,15 @@ namespace org.apache.lucene.analysis.sinks this.dateFormat = dateFormat; } - public override bool accept(AttributeSource source) + public override bool Accept(AttributeSource source) { if (termAtt == null) { - termAtt = source.addAttribute(typeof(CharTermAttribute)); + termAtt = source.AddAttribute (); } try { - DateTime date = dateFormat.parse(termAtt.ToString()); //We don't care about the date, just that we can parse it as a date + DateTime date = dateFormat.Parse(termAtt.ToString()); //We don't care about the date, just that we can parse it as a date if (date != null) { return true;