lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From synhers...@apache.org
Subject [3/3] lucenenet git commit: More porting work
Date Tue, 14 Apr 2015 23:32:29 GMT
More porting work


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/b4eaf2fc
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/b4eaf2fc
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/b4eaf2fc

Branch: refs/heads/master
Commit: b4eaf2fc441dfd5d32732eda844ef1e8e62588a1
Parents: 8d7a54f
Author: Itamar Syn-Hershko <itamar@code972.com>
Authored: Wed Apr 15 02:32:11 2015 +0300
Committer: Itamar Syn-Hershko <itamar@code972.com>
Committed: Wed Apr 15 02:32:11 2015 +0300

----------------------------------------------------------------------
 .../Compound/CompoundWordTokenFilterBase.cs     | 365 ++++++-----
 .../Analysis/Core/UpperCaseFilter.cs            | 114 ++--
 .../Ngram/Lucene43EdgeNGramTokenizer.cs         | 609 +++++++++----------
 .../Analysis/Standard/ClassicAnalyzer.cs        | 299 +++++----
 .../Analysis/Standard/ClassicFilter.cs          | 153 +++--
 .../Analysis/Standard/ClassicFilterFactory.cs   |  92 ++-
 .../Analysis/Standard/ClassicTokenizer.cs       | 369 ++++++-----
 .../Analysis/Standard/ClassicTokenizerImpl.cs   |  14 +-
 .../Analysis/Standard/StandardAnalyzer.cs       | 273 +++++----
 .../Analysis/Standard/StandardFilter.cs         | 167 +++--
 .../Analysis/Standard/StandardFilterFactory.cs  |   1 +
 .../Analysis/Standard/StandardTokenizer.cs      |  13 +-
 .../Standard/StandardTokenizerFactory.cs        |  18 +-
 .../Analysis/Standard/StandardTokenizerImpl.cs  |   2 -
 .../Analysis/Standard/UAX29URLEmailAnalyzer.cs  |  43 +-
 .../Analysis/Util/CharArraySet.cs               |   1 -
 .../Analysis/Wikipedia/WikipediaTokenizer.cs    |  23 +-
 .../Wikipedia/WikipediaTokenizerFactory.cs      |   1 +
 src/Lucene.Net.Core/Util/StringHelper.cs        |  10 +-
 19 files changed, 1239 insertions(+), 1328 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b4eaf2fc/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs
index ba8fd6c..c6bc4cd 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs
@@ -2,193 +2,192 @@
 using System.Diagnostics;
 using Lucene.Net.Analysis.Tokenattributes;
 using Lucene.Net.Analysis.Util;
+using Lucene.Net.Support;
 using Lucene.Net.Util;
 
 namespace Lucene.Net.Analysis.Compound
 {
 
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
     /// <summary>
-	/// Base class for decomposition token filters.
-	/// <para>
-	/// 
-	/// <a name="version"></a>
-	/// You must specify the required <seealso cref="LuceneVersion"/> compatibility when creating
-	/// CompoundWordTokenFilterBase:
-	/// <ul>
-	/// <li>As of 3.1, CompoundWordTokenFilterBase correctly handles Unicode 4.0
-	/// supplementary characters in strings and char arrays provided as compound word
-	/// dictionaries.
-	/// <li>As of 4.4, <seealso cref="CompoundWordTokenFilterBase"/> doesn't update offsets.
-	/// </ul>
-	/// </para>
-	/// </summary>
-	public abstract class CompoundWordTokenFilterBase : TokenFilter
-	{
-	  /// <summary>
-	  /// The default for minimal word length that gets decomposed
-	  /// </summary>
-	  public const int DEFAULT_MIN_WORD_SIZE = 5;
-
-	  /// <summary>
-	  /// The default for minimal length of subwords that get propagated to the output of this filter
-	  /// </summary>
-	  public const int DEFAULT_MIN_SUBWORD_SIZE = 2;
-
-	  /// <summary>
-	  /// The default for maximal length of subwords that get propagated to the output of this filter
-	  /// </summary>
-	  public const int DEFAULT_MAX_SUBWORD_SIZE = 15;
-
-	  protected internal readonly LuceneVersion matchVersion;
-	  protected internal readonly CharArraySet dictionary;
-	  protected internal readonly LinkedList<CompoundToken> tokens;
-	  protected internal readonly int minWordSize;
-	  protected internal readonly int minSubwordSize;
-	  protected internal readonly int maxSubwordSize;
-	  protected internal readonly bool onlyLongestMatch;
-
-	  protected internal readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
-	  protected internal readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
-	  private readonly PositionIncrementAttribute posIncAtt = addAttribute(typeof(PositionIncrementAttribute));
-
-	  private AttributeSource.State current;
-
-	  protected internal CompoundWordTokenFilterBase(LuceneVersion matchVersion, TokenStream input, CharArraySet dictionary, bool onlyLongestMatch) : this(matchVersion, input,dictionary,DEFAULT_MIN_WORD_SIZE,DEFAULT_MIN_SUBWORD_SIZE,DEFAULT_MAX_SUBWORD_SIZE, onlyLongestMatch)
-	  {
-	  }
-
-	  protected internal CompoundWordTokenFilterBase(LuceneVersion matchVersion, TokenStream input, CharArraySet dictionary) : this(matchVersion, input,dictionary,DEFAULT_MIN_WORD_SIZE,DEFAULT_MIN_SUBWORD_SIZE,DEFAULT_MAX_SUBWORD_SIZE, false)
-	  {
-	  }
-
-	  protected internal CompoundWordTokenFilterBase(LuceneVersion matchVersion, TokenStream input, CharArraySet dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, bool onlyLongestMatch) : base(input)
-	  {
-		this.matchVersion = matchVersion;
-		this.tokens = new LinkedList<CompoundToken>();
-		if (minWordSize < 0)
-		{
-		  throw new System.ArgumentException("minWordSize cannot be negative");
-		}
-		this.minWordSize = minWordSize;
-		if (minSubwordSize < 0)
-		{
-		  throw new System.ArgumentException("minSubwordSize cannot be negative");
-		}
-		this.minSubwordSize = minSubwordSize;
-		if (maxSubwordSize < 0)
-		{
-		  throw new System.ArgumentException("maxSubwordSize cannot be negative");
-		}
-		this.maxSubwordSize = maxSubwordSize;
-		this.onlyLongestMatch = onlyLongestMatch;
-		this.dictionary = dictionary;
-	  }
-
-	  public override bool IncrementToken()
-	  {
-		if (tokens.Count > 0)
-		{
-		  Debug.Assert(current != null);
-		  CompoundToken token = tokens.First.Value; tokens.RemoveFirst();
-		  RestoreState(current); // keep all other attributes untouched
-		  termAtt.SetEmpty().Append(token.txt);
-		  offsetAtt.SetOffset(token.startOffset, token.endOffset);
-		  posIncAtt.PositionIncrement = 0;
-		  return true;
-		}
-
-		current = null; // not really needed, but for safety
-		if (input.incrementToken())
-		{
-		  // Only words longer than minWordSize get processed
-		  if (termAtt.length() >= this.minWordSize)
-		  {
-			decompose();
-			// only capture the state if we really need it for producing new tokens
-			if (tokens.Count > 0)
-			{
-			  current = captureState();
-			}
-		  }
-		  // return original token:
-		  return true;
-		}
-		else
-		{
-		  return false;
-		}
-	  }
-
-	  /// <summary>
-	  /// Decomposes the current <seealso cref="#termAtt"/> and places <seealso cref="CompoundToken"/> instances in the <seealso cref="#tokens"/> list.
-	  /// The original token may not be placed in the list, as it is automatically passed through this filter.
-	  /// </summary>
-	  protected internal abstract void decompose();
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
-	  public override void reset()
-	  {
-		base.reset();
-		tokens.Clear();
-		current = null;
-	  }
-
-	  /// <summary>
-	  /// Helper class to hold decompounded token information
-	  /// </summary>
-	  protected internal class CompoundToken
-	  {
-		  private readonly CompoundWordTokenFilterBase outerInstance;
-
-		public readonly string txt;
-		public readonly int startOffset, endOffset;
-
-		/// <summary>
-		/// Construct the compound token based on a slice of the current <seealso cref="CompoundWordTokenFilterBase#termAtt"/>. </summary>
-		public CompoundToken(CompoundWordTokenFilterBase outerInstance, int offset, int length)
-		{
-			this.outerInstance = outerInstance;
-		  this.txt = outerInstance.termAtt.subSequence(offset, offset + length);
-
-		  // offsets of the original word
-		  int startOff = outerInstance.offsetAtt.startOffset();
-		  int endOff = outerInstance.offsetAtt.endOffset();
-
-		  if (outerInstance.matchVersion.onOrAfter(LuceneVersion.LUCENE_44) || endOff - startOff != outerInstance.termAtt.length())
-		  {
-			// if length by start + end offsets doesn't match the term text then assume
-			// this is a synonym and don't adjust the offsets.
-			this.startOffset = startOff;
-			this.endOffset = endOff;
-		  }
-		  else
-		  {
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int newStart = startOff + offset;
-			int newStart = startOff + offset;
-			this.startOffset = newStart;
-			this.endOffset = newStart + length;
-		  }
-		}
-
-	  }
-	}
-
+    /// Base class for decomposition token filters.
+    /// <para>
+    /// 
+    /// <a name="version"></a>
+    /// You must specify the required <seealso cref="LuceneVersion"/> compatibility when creating
+    /// CompoundWordTokenFilterBase:
+    /// <ul>
+    /// <li>As of 3.1, CompoundWordTokenFilterBase correctly handles Unicode 4.0
+    /// supplementary characters in strings and char arrays provided as compound word
+    /// dictionaries.
+    /// <li>As of 4.4, <seealso cref="CompoundWordTokenFilterBase"/> doesn't update offsets.
+    /// </ul>
+    /// </para>
+    /// </summary>
+    public abstract class CompoundWordTokenFilterBase : TokenFilter
+    {
+        /// <summary>
+        /// The default for minimal word length that gets decomposed
+        /// </summary>
+        public const int DEFAULT_MIN_WORD_SIZE = 5;
+
+        /// <summary>
+        /// The default for minimal length of subwords that get propagated to the output of this filter
+        /// </summary>
+        public const int DEFAULT_MIN_SUBWORD_SIZE = 2;
+
+        /// <summary>
+        /// The default for maximal length of subwords that get propagated to the output of this filter
+        /// </summary>
+        public const int DEFAULT_MAX_SUBWORD_SIZE = 15;
+
+        protected internal readonly LuceneVersion matchVersion;
+        protected internal readonly CharArraySet dictionary;
+        protected internal readonly LinkedList<CompoundToken> tokens;
+        protected internal readonly int minWordSize;
+        protected internal readonly int minSubwordSize;
+        protected internal readonly int maxSubwordSize;
+        protected internal readonly bool onlyLongestMatch;
+
+        protected internal readonly CharTermAttribute termAtt;
+        protected internal readonly IOffsetAttribute offsetAtt;
+        private readonly IPositionIncrementAttribute posIncAtt;
+
+        private AttributeSource.State current;
+
+        protected CompoundWordTokenFilterBase(LuceneVersion matchVersion, TokenStream input, CharArraySet dictionary, bool onlyLongestMatch)
+            : this(matchVersion, input, dictionary, DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, onlyLongestMatch)
+        {
+        }
+
+        protected CompoundWordTokenFilterBase(LuceneVersion matchVersion, TokenStream input, CharArraySet dictionary)
+            : this(matchVersion, input, dictionary, DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, false)
+        {
+        }
+
+        protected CompoundWordTokenFilterBase(LuceneVersion matchVersion, TokenStream input, CharArraySet dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, bool onlyLongestMatch)
+            : base(input)
+        {
+            termAtt = AddAttribute<ICharTermAttribute>() as CharTermAttribute;
+            offsetAtt = AddAttribute<IOffsetAttribute>();
+            posIncAtt = AddAttribute<IPositionIncrementAttribute>();
+
+            this.matchVersion = matchVersion;
+            this.tokens = new LinkedList<CompoundToken>();
+            if (minWordSize < 0)
+            {
+                throw new System.ArgumentException("minWordSize cannot be negative");
+            }
+            this.minWordSize = minWordSize;
+            if (minSubwordSize < 0)
+            {
+                throw new System.ArgumentException("minSubwordSize cannot be negative");
+            }
+            this.minSubwordSize = minSubwordSize;
+            if (maxSubwordSize < 0)
+            {
+                throw new System.ArgumentException("maxSubwordSize cannot be negative");
+            }
+            this.maxSubwordSize = maxSubwordSize;
+            this.onlyLongestMatch = onlyLongestMatch;
+            this.dictionary = dictionary;
+        }
+
+        public override bool IncrementToken()
+        {
+            if (tokens.Count > 0)
+            {
+                Debug.Assert(current != null);
+                CompoundToken token = tokens.First.Value; tokens.RemoveFirst();
+                RestoreState(current); // keep all other attributes untouched
+                termAtt.SetEmpty().Append(token.txt);
+                offsetAtt.SetOffset(token.startOffset, token.endOffset);
+                posIncAtt.PositionIncrement = 0;
+                return true;
+            }
+
+            current = null; // not really needed, but for safety
+            if (input.IncrementToken())
+            {
+                // Only words longer than minWordSize get processed
+                if (termAtt.Length >= this.minWordSize)
+                {
+                    Decompose();
+                    // only capture the state if we really need it for producing new tokens
+                    if (tokens.Count > 0)
+                    {
+                        current = CaptureState();
+                    }
+                }
+                // return original token:
+                return true;
+            }
+            else
+            {
+                return false;
+            }
+        }
+
+        /// <summary>
+        /// Decomposes the current <seealso cref="#termAtt"/> and places <seealso cref="CompoundToken"/> instances in the <seealso cref="#tokens"/> list.
+        /// The original token may not be placed in the list, as it is automatically passed through this filter.
+        /// </summary>
+        protected abstract void Decompose();
+
+        public override void Reset()
+        {
+            base.Reset();
+            tokens.Clear();
+            current = null;
+        }
+
+        /// <summary>
+        /// Helper class to hold decompounded token information
+        /// </summary>
+        protected internal class CompoundToken
+        {
+            public readonly ICharSequence txt;
+            public readonly int startOffset, endOffset;
+
+            /// <summary>
+            /// Construct the compound token based on a slice of the current <seealso cref="CompoundWordTokenFilterBase#termAtt"/>. </summary>
+            public CompoundToken(CompoundWordTokenFilterBase outerInstance, int offset, int length)
+            {
+                this.txt = outerInstance.termAtt.SubSequence(offset, offset + length);
+
+                // offsets of the original word
+                int startOff = outerInstance.offsetAtt.StartOffset();
+                int endOff = outerInstance.offsetAtt.EndOffset();
+
+                if (outerInstance.matchVersion.OnOrAfter(LuceneVersion.LUCENE_44) || endOff - startOff != outerInstance.termAtt.Length)
+                {
+                    // if length by start + end offsets doesn't match the term text then assume
+                    // this is a synonym and don't adjust the offsets.
+                    this.startOffset = startOff;
+                    this.endOffset = endOff;
+                }
+                else
+                {
+                    int newStart = startOff + offset;
+                    this.startOffset = newStart;
+                    this.endOffset = newStart + length;
+                }
+            }
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b4eaf2fc/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilter.cs
index 6b722ad..c8b5f5f 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilter.cs
@@ -4,65 +4,65 @@ using Lucene.Net.Util;
 
 namespace Lucene.Net.Analysis.Core
 {
-
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
     /// <summary>
-	/// Normalizes token text to UPPER CASE.
-	/// <a name="version"/>
-	/// <para>You must specify the required <seealso cref="LuceneVersion"/>
-	/// compatibility when creating UpperCaseFilter
-	/// 
-	/// </para>
-	/// <para><b>NOTE:</b> In Unicode, this transformation may lose information when the
-	/// upper case character represents more than one lower case character. Use this filter
-	/// when you require uppercase tokens.  Use the <seealso cref="LowerCaseFilter"/> for 
-	/// general search matching
-	/// </para>
-	/// </summary>
-	public sealed class UpperCaseFilter : TokenFilter
-	{
-	  private readonly CharacterUtils charUtils;
-        private readonly ICharTermAttribute termAtt;;
+    /// Normalizes token text to UPPER CASE.
+    /// <a name="version"/>
+    /// <para>You must specify the required <seealso cref="LuceneVersion"/>
+    /// compatibility when creating UpperCaseFilter
+    /// 
+    /// </para>
+    /// <para><b>NOTE:</b> In Unicode, this transformation may lose information when the
+    /// upper case character represents more than one lower case character. Use this filter
+    /// when you require uppercase tokens.  Use the <seealso cref="LowerCaseFilter"/> for 
+    /// general search matching
+    /// </para>
+    /// </summary>
+    public sealed class UpperCaseFilter : TokenFilter
+    {
+        private readonly CharacterUtils charUtils;
+        private readonly ICharTermAttribute termAtt;
 
-	  /// <summary>
-	  /// Create a new UpperCaseFilter, that normalizes token text to upper case.
-	  /// </summary>
-	  /// <param name="matchVersion"> See <a href="#version">above</a> </param>
-	  /// <param name="in"> TokenStream to filter </param>
-	  public UpperCaseFilter(LuceneVersion matchVersion, TokenStream @in) : base(@in)
-	  {
-	      termAtt = AddAttribute<ICharTermAttribute>();
-	      termAtt = AddAttribute<ICharTermAttribute>();
-		charUtils = CharacterUtils.GetInstance(matchVersion);
-	  }
+        /// <summary>
+        /// Create a new UpperCaseFilter, that normalizes token text to upper case.
+        /// </summary>
+        /// <param name="matchVersion"> See <a href="#version">above</a> </param>
+        /// <param name="in"> TokenStream to filter </param>
+        public UpperCaseFilter(LuceneVersion matchVersion, TokenStream @in)
+            : base(@in)
+        {
+            termAtt = AddAttribute<ICharTermAttribute>();
+            termAtt = AddAttribute<ICharTermAttribute>();
+            charUtils = CharacterUtils.GetInstance(matchVersion);
+        }
 
-	  public override bool IncrementToken()
-	  {
-		if (input.IncrementToken())
-		{
-		  charUtils.ToUpper(termAtt.Buffer(), 0, termAtt.Length);
-		  return true;
-		}
-		else
-		{
-		  return false;
-		}
-	  }
-	}
+        public override bool IncrementToken()
+        {
+            if (input.IncrementToken())
+            {
+                charUtils.ToUpper(termAtt.Buffer(), 0, termAtt.Length);
+                return true;
+            }
+            else
+            {
+                return false;
+            }
+        }
+    }
 
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b4eaf2fc/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43EdgeNGramTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43EdgeNGramTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43EdgeNGramTokenizer.cs
index 3827b36..c277918 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43EdgeNGramTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43EdgeNGramTokenizer.cs
@@ -1,323 +1,308 @@
 using System;
 using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
 using Reader = System.IO.TextReader;
 using Version = Lucene.Net.Util.LuceneVersion;
 
 namespace Lucene.Net.Analysis.Ngram
 {
 
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
     /// <summary>
-	/// Old version of <seealso cref="EdgeNGramTokenizer"/> which doesn't handle correctly
-	/// supplementary characters.
-	/// </summary>
-	[Obsolete]
-	public sealed class Lucene43EdgeNGramTokenizer : Tokenizer
-	{
-	  public const Side DEFAULT_SIDE = Side.FRONT;
-	  public const int DEFAULT_MAX_GRAM_SIZE = 1;
-	  public const int DEFAULT_MIN_GRAM_SIZE = 1;
-
-	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
-	  private readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
-	  private readonly PositionIncrementAttribute posIncrAtt = addAttribute(typeof(PositionIncrementAttribute));
-
-	  /// <summary>
-	  /// Specifies which side of the input the n-gram should be generated from </summary>
-	  public enum Side
-	  {
-
-		/// <summary>
-		/// Get the n-gram from the front of the input </summary>
-//JAVA TO C# CONVERTER TODO TASK: The following line could not be converted:
-		FRONT
-		{
-		  public String getLabel() { return "front"
-		}
-	  },
-
-		/// <summary>
-		/// Get the n-gram from the end of the input </summary>
-//JAVA TO C# CONVERTER TODO TASK: The following line could not be converted:
-		BACK
-		{
-		  public String getLabel()
-		  {
-			  return "back";
-		  }
-		}
-
-		public = 
-
-		// Get the appropriate Side from a string
-		public static Side getSide(String sideName)
-		{
-//JAVA TO C# CONVERTER TODO TASK: The following line could not be converted:
-		  if (FRONT.getLabel().equals(sideName))
-		  {
-			return FRONT;
-		  }
-//JAVA TO C# CONVERTER TODO TASK: The following line could not be converted:
-		  if (BACK.getLabel().equals(sideName))
-		  {
-			return BACK;
-		  }
-		  return null;
-		}
-	}
-
-	  private int minGram;
-	  private int maxGram;
-	  private int gramSize;
-	  private Side side;
-	  private bool started;
-	  private int inLen; // length of the input AFTER trim()
-	  private int charsRead; // length of the input
-	  private string inStr;
-
-
-	  /// <summary>
-	  /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
-	  /// </summary>
-	  /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
-	  /// <param name="input"> <seealso cref="Reader"/> holding the input to be tokenized </param>
-	  /// <param name="side"> the <seealso cref="Side"/> from which to chop off an n-gram </param>
-	  /// <param name="minGram"> the smallest n-gram to generate </param>
-	  /// <param name="maxGram"> the largest n-gram to generate </param>
-	  [Obsolete]
-	  public Lucene43EdgeNGramTokenizer(Version version, Reader input, Side side, int minGram, int maxGram) : base(input)
-	  {
-		init(version, side, minGram, maxGram);
-	  }
-
-	  /// <summary>
-	  /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
-	  /// </summary>
-	  /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
-	  /// <param name="factory"> <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/> to use </param>
-	  /// <param name="input"> <seealso cref="Reader"/> holding the input to be tokenized </param>
-	  /// <param name="side"> the <seealso cref="Side"/> from which to chop off an n-gram </param>
-	  /// <param name="minGram"> the smallest n-gram to generate </param>
-	  /// <param name="maxGram"> the largest n-gram to generate </param>
-	  [Obsolete]
-	  public Lucene43EdgeNGramTokenizer(Version version, AttributeFactory factory, Reader input, Side side, int minGram, int maxGram) : base(factory, input)
-	  {
-		init(version, side, minGram, maxGram);
-	  }
-
-	  /// <summary>
-	  /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
-	  /// </summary>
-	  /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
-	  /// <param name="input"> <seealso cref="Reader"/> holding the input to be tokenized </param>
-	  /// <param name="sideLabel"> the name of the <seealso cref="Side"/> from which to chop off an n-gram </param>
-	  /// <param name="minGram"> the smallest n-gram to generate </param>
-	  /// <param name="maxGram"> the largest n-gram to generate </param>
-	  [Obsolete]
-	  public Lucene43EdgeNGramTokenizer(Version version, Reader input, string sideLabel, int minGram, int maxGram) : this(version, input, Side.getSide(sideLabel), minGram, maxGram)
-	  {
-	  }
-
-	  /// <summary>
-	  /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
-	  /// </summary>
-	  /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
-	  /// <param name="factory"> <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/> to use </param>
-	  /// <param name="input"> <seealso cref="Reader"/> holding the input to be tokenized </param>
-	  /// <param name="sideLabel"> the name of the <seealso cref="Side"/> from which to chop off an n-gram </param>
-	  /// <param name="minGram"> the smallest n-gram to generate </param>
-	  /// <param name="maxGram"> the largest n-gram to generate </param>
-	  [Obsolete]
-	  public Lucene43EdgeNGramTokenizer(Version version, AttributeFactory factory, Reader input, string sideLabel, int minGram, int maxGram) : this(version, factory, input, Side.getSide(sideLabel), minGram, maxGram)
-	  {
-	  }
-
-	  /// <summary>
-	  /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
-	  /// </summary>
-	  /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
-	  /// <param name="input"> <seealso cref="Reader"/> holding the input to be tokenized </param>
-	  /// <param name="minGram"> the smallest n-gram to generate </param>
-	  /// <param name="maxGram"> the largest n-gram to generate </param>
-	  public Lucene43EdgeNGramTokenizer(Version version, Reader input, int minGram, int maxGram) : this(version, input, Side.FRONT, minGram, maxGram)
-	  {
-	  }
-
-	  /// <summary>
-	  /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
-	  /// </summary>
-	  /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
-	  /// <param name="factory"> <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/> to use </param>
-	  /// <param name="input"> <seealso cref="Reader"/> holding the input to be tokenized </param>
-	  /// <param name="minGram"> the smallest n-gram to generate </param>
-	  /// <param name="maxGram"> the largest n-gram to generate </param>
-	  public Lucene43EdgeNGramTokenizer(Version version, AttributeFactory factory, Reader input, int minGram, int maxGram) : this(version, factory, input, Side.FRONT, minGram, maxGram)
-	  {
-	  }
-
-	  private void init(Version version, Side side, int minGram, int maxGram)
-	  {
-		if (version == null)
-		{
-		  throw new System.ArgumentException("version must not be null");
-		}
-
-		if (side == null)
-		{
-		  throw new System.ArgumentException("sideLabel must be either front or back");
-		}
-
-		if (minGram < 1)
-		{
-		  throw new System.ArgumentException("minGram must be greater than zero");
-		}
-
-		if (minGram > maxGram)
-		{
-		  throw new System.ArgumentException("minGram must not be greater than maxGram");
-		}
-
-		if (version.onOrAfter(Version.LUCENE_44))
-		{
-		  if (side == Side.BACK)
-		  {
-			throw new System.ArgumentException("Side.BACK is not supported anymore as of Lucene 4.4");
-		  }
-		}
-		else
-		{
-		  maxGram = Math.Min(maxGram, 1024);
-		}
-
-		this.minGram = minGram;
-		this.maxGram = maxGram;
-		this.side = side;
-	  }
-
-	  /// <summary>
-	  /// Returns the next token in the stream, or null at EOS. </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
-	  public override bool incrementToken()
-	  {
-		clearAttributes();
-		// if we are just starting, read the whole input
-		if (!started)
-		{
-		  started = true;
-		  gramSize = minGram;
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int limit = side == Side.FRONT ? maxGram : 1024;
-		  int limit = side == Side.FRONT ? maxGram : 1024;
-		  char[] chars = new char[Math.Min(1024, limit)];
-		  charsRead = 0;
-		  // TODO: refactor to a shared readFully somewhere:
-		  bool exhausted = false;
-		  while (charsRead < limit)
-		  {
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int inc = input.read(chars, charsRead, chars.length-charsRead);
-			int inc = input.read(chars, charsRead, chars.Length - charsRead);
-			if (inc == -1)
-			{
-			  exhausted = true;
-			  break;
-			}
-			charsRead += inc;
-			if (charsRead == chars.Length && charsRead < limit)
-			{
-			  chars = ArrayUtil.grow(chars);
-			}
-		  }
-
-		  inStr = new string(chars, 0, charsRead);
-		  inStr = inStr.Trim();
-
-		  if (!exhausted)
-		  {
-			// Read extra throwaway chars so that on end() we
-			// report the correct offset:
-			char[] throwaway = new char[1024];
-			while (true)
-			{
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int inc = input.read(throwaway, 0, throwaway.length);
-			  int inc = input.read(throwaway, 0, throwaway.Length);
-			  if (inc == -1)
-			  {
-				break;
-			  }
-			  charsRead += inc;
-			}
-		  }
-
-		  inLen = inStr.length();
-		  if (inLen == 0)
-		  {
-			return false;
-		  }
-		  posIncrAtt.PositionIncrement = 1;
-		}
-		else
-		{
-		  posIncrAtt.PositionIncrement = 0;
-		}
-
-		// if the remaining input is too short, we can't generate any n-grams
-		if (gramSize > inLen)
-		{
-		  return false;
-		}
-
-		// if we have hit the end of our n-gram size range, quit
-		if (gramSize > maxGram || gramSize > inLen)
-		{
-		  return false;
-		}
-
-		// grab gramSize chars from front or back
-		int start = side == Side.FRONT ? 0 : inLen - gramSize;
-		int end = start + gramSize;
-		termAtt.setEmpty().append(inStr, start, end);
-		offsetAtt.setOffset(correctOffset(start), correctOffset(end));
-		gramSize++;
-		return true;
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void end() throws java.io.IOException
-	  public override void end()
-	  {
-		base.end();
-		// set final offset
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int finalOffset = correctOffset(charsRead);
-		int finalOffset = correctOffset(charsRead);
-		this.offsetAtt.setOffset(finalOffset, finalOffset);
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
-	  public override void reset()
-	  {
-		base.reset();
-		started = false;
-	  }
-}
+    /// Old version of <seealso cref="EdgeNGramTokenizer"/> which doesn't handle correctly
+    /// supplementary characters.
+    /// </summary>
+    [Obsolete]
+    public sealed class Lucene43EdgeNGramTokenizer : Tokenizer
+    {
+        public const Side DEFAULT_SIDE = Side.FRONT;
+        public const int DEFAULT_MAX_GRAM_SIZE = 1;
+        public const int DEFAULT_MIN_GRAM_SIZE = 1;
+
+        private readonly CharTermAttribute termAtt;
+        private readonly OffsetAttribute offsetAtt;
+        private readonly PositionIncrementAttribute posIncrAtt;
+
+        /// <summary>
+        /// Specifies which side of the input the n-gram should be generated from </summary>
+        public enum Side
+        {
+
+            /// <summary>
+            /// Get the n-gram from the front of the input </summary>
+            FRONT,
+
+            /// <summary>
+            /// Get the n-gram from the end of the input </summary>
+            BACK,
+        }
+
+        private static string GetSideLabel(Side side)
+        {
+            if (side == Side.FRONT) return "front";
+            if (side == Side.BACK) return "back";
+            return null;
+        }
+
+
+        // Get the appropriate Side from a string
+        internal static Side? GetSide(String sideName)
+        {
+            if (GetSideLabel(Side.FRONT).Equals(sideName))
+            {
+                return Side.FRONT;
+            }
+            if (GetSideLabel(Side.BACK).Equals(sideName))
+            {
+                return Side.BACK;
+            }
+            return null;
+        }
+
+        private int minGram;
+        private int maxGram;
+        private int gramSize;
+        private Side side;
+        private bool started;
+        private int inLen; // length of the input AFTER trim()
+        private int charsRead; // length of the input
+        private string inStr;
+
+
+        /// <summary>
+        /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
+        /// </summary>
+        /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
+        /// <param name="input"> <seealso cref="Reader"/> holding the input to be tokenized </param>
+        /// <param name="side"> the <seealso cref="Side"/> from which to chop off an n-gram </param>
+        /// <param name="minGram"> the smallest n-gram to generate </param>
+        /// <param name="maxGram"> the largest n-gram to generate </param>
+        [Obsolete]
+        public Lucene43EdgeNGramTokenizer(Version version, Reader input, Side side, int minGram, int maxGram)
+            : base(input)
+        {
+            init(version, side, minGram, maxGram);
+        }
+
+        /// <summary>
+        /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
+        /// </summary>
+        /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
+        /// <param name="factory"> <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/> to use </param>
+        /// <param name="input"> <seealso cref="Reader"/> holding the input to be tokenized </param>
+        /// <param name="side"> the <seealso cref="Side"/> from which to chop off an n-gram </param>
+        /// <param name="minGram"> the smallest n-gram to generate </param>
+        /// <param name="maxGram"> the largest n-gram to generate </param>
+        [Obsolete]
+        public Lucene43EdgeNGramTokenizer(Version version, AttributeFactory factory, Reader input, Side side, int minGram, int maxGram)
+            : base(factory, input)
+        {
+            init(version, side, minGram, maxGram);
+        }
+
+        /// <summary>
+        /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
+        /// </summary>
+        /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
+        /// <param name="input"> <seealso cref="Reader"/> holding the input to be tokenized </param>
+        /// <param name="sideLabel"> the name of the <seealso cref="Side"/> from which to chop off an n-gram </param>
+        /// <param name="minGram"> the smallest n-gram to generate </param>
+        /// <param name="maxGram"> the largest n-gram to generate </param>
+        [Obsolete]
+        public Lucene43EdgeNGramTokenizer(Version version, Reader input, string sideLabel, int minGram, int maxGram)
+            : this(version, input, GetSide(sideLabel), minGram, maxGram)
+        {
+        }
+
+        /// <summary>
+        /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
+        /// </summary>
+        /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
+        /// <param name="factory"> <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/> to use </param>
+        /// <param name="input"> <seealso cref="Reader"/> holding the input to be tokenized </param>
+        /// <param name="sideLabel"> the name of the <seealso cref="Side"/> from which to chop off an n-gram </param>
+        /// <param name="minGram"> the smallest n-gram to generate </param>
+        /// <param name="maxGram"> the largest n-gram to generate </param>
+        [Obsolete]
+        public Lucene43EdgeNGramTokenizer(Version version, AttributeFactory factory, Reader input, string sideLabel, int minGram, int maxGram)
+            : this(version, factory, input, GetSide(sideLabel), minGram, maxGram)
+        {
+        }
+
+        /// <summary>
+        /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
+        /// </summary>
+        /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
+        /// <param name="input"> <seealso cref="Reader"/> holding the input to be tokenized </param>
+        /// <param name="minGram"> the smallest n-gram to generate </param>
+        /// <param name="maxGram"> the largest n-gram to generate </param>
+        public Lucene43EdgeNGramTokenizer(Version version, Reader input, int minGram, int maxGram)
+            : this(version, input, Side.FRONT, minGram, maxGram)
+        {
+        }
+
+        /// <summary>
+        /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
+        /// </summary>
+        /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
+        /// <param name="factory"> <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/> to use </param>
+        /// <param name="input"> <seealso cref="Reader"/> holding the input to be tokenized </param>
+        /// <param name="minGram"> the smallest n-gram to generate </param>
+        /// <param name="maxGram"> the largest n-gram to generate </param>
+        public Lucene43EdgeNGramTokenizer(Version version, AttributeFactory factory, Reader input, int minGram, int maxGram)
+            : this(version, factory, input, Side.FRONT, minGram, maxGram)
+        {
+        }
+
+        private void init(Version version, Side side, int minGram, int maxGram)
+        {
+            if (version == null)
+            {
+                throw new System.ArgumentException("version must not be null");
+            }
+
+            if (side == null)
+            {
+                throw new System.ArgumentException("sideLabel must be either front or back");
+            }
+
+            if (minGram < 1)
+            {
+                throw new System.ArgumentException("minGram must be greater than zero");
+            }
+
+            if (minGram > maxGram)
+            {
+                throw new System.ArgumentException("minGram must not be greater than maxGram");
+            }
+
+            if (version.OnOrAfter(Version.LUCENE_44))
+            {
+                if (side == Side.BACK)
+                {
+                    throw new System.ArgumentException("Side.BACK is not supported anymore as of Lucene 4.4");
+                }
+            }
+            else
+            {
+                maxGram = Math.Min(maxGram, 1024);
+            }
+
+            this.minGram = minGram;
+            this.maxGram = maxGram;
+            this.side = side;
+        }
+
+        /// <summary>
+        /// Returns the next token in the stream, or null at EOS. </summary>
+        public override bool IncrementToken()
+        {
+            ClearAttributes();
+            // if we are just starting, read the whole input
+            if (!started)
+            {
+                started = true;
+                gramSize = minGram;
+                int limit = side == Side.FRONT ? maxGram : 1024;
+                char[] chars = new char[Math.Min(1024, limit)];
+                charsRead = 0;
+                // TODO: refactor to a shared readFully somewhere:
+                bool exhausted = false;
+                while (charsRead < limit)
+                {
+                    int inc = input.Read(chars, charsRead, chars.Length - charsRead);
+                    if (inc <= 0)
+                    {
+                        exhausted = true;
+                        break;
+                    }
+                    charsRead += inc;
+                    if (charsRead == chars.Length && charsRead < limit)
+                    {
+                        chars = ArrayUtil.Grow(chars);
+                    }
+                }
+
+                inStr = new string(chars, 0, charsRead);
+                inStr = inStr.Trim();
+
+                if (!exhausted)
+                {
+                    // Read extra throwaway chars so that on end() we
+                    // report the correct offset:
+                    var throwaway = new char[1024];
+                    while (true)
+                    {
+                        int inc = input.Read(throwaway, 0, throwaway.Length);
+                        if (inc <= 0)
+                        {
+                            break;
+                        }
+                        charsRead += inc;
+                    }
+                }
+
+                inLen = inStr.Length;
+                if (inLen == 0)
+                {
+                    return false;
+                }
+                posIncrAtt.PositionIncrement = 1;
+            }
+            else
+            {
+                posIncrAtt.PositionIncrement = 0;
+            }
+
+            // if the remaining input is too short, we can't generate any n-grams
+            if (gramSize > inLen)
+            {
+                return false;
+            }
+
+            // if we have hit the end of our n-gram size range, quit
+            if (gramSize > maxGram || gramSize > inLen)
+            {
+                return false;
+            }
+
+            // grab gramSize chars from front or back
+            int start = side == Side.FRONT ? 0 : inLen - gramSize;
+            int end = start + gramSize;
+            termAtt.SetEmpty().Append(inStr, start, end);
+            offsetAtt.SetOffset(CorrectOffset(start), CorrectOffset(end));
+            gramSize++;
+            return true;
+        }
+
+        public override void End()
+        {
+            base.End();
+            // set final offset
+            int finalOffset = CorrectOffset(charsRead);
+            this.offsetAtt.SetOffset(finalOffset, finalOffset);
+        }
+
+        public override void Reset()
+        {
+            base.Reset();
+            started = false;
+        }
+    }
 
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b4eaf2fc/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicAnalyzer.cs
index de32d23..0dd0529 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicAnalyzer.cs
@@ -1,164 +1,149 @@
 using Lucene.Net.Analysis.Core;
-using Lucene.Net.Analysis.Standard;
 using Lucene.Net.Analysis.Util;
-using StopwordAnalyzerBase = Lucene.Net.Analysis.Util.StopwordAnalyzerBase;
+using Lucene.Net.Util;
+using org.apache.lucene.analysis.standard;
+using Reader = System.IO.TextReader;
 
-namespace org.apache.lucene.analysis.standard
+namespace Lucene.Net.Analysis.Standard
 {
 
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
-
-	using org.apache.lucene.analysis;
-	using LowerCaseFilter = LowerCaseFilter;
-	using StopAnalyzer = StopAnalyzer;
-	using StopFilter = StopFilter;
-	using CharArraySet = CharArraySet;
-	using StopwordAnalyzerBase = StopwordAnalyzerBase;
-	using WordlistLoader = WordlistLoader;
-	using Version = org.apache.lucene.util.Version;
-
-
-	/// <summary>
-	/// Filters <seealso cref="ClassicTokenizer"/> with <seealso cref="ClassicFilter"/>, {@link
-	/// LowerCaseFilter} and <seealso cref="StopFilter"/>, using a list of
-	/// English stop words.
-	/// 
-	/// <a name="version"/>
-	/// <para>You must specify the required <seealso cref="Version"/>
-	/// compatibility when creating ClassicAnalyzer:
-	/// <ul>
-	///   <li> As of 3.1, StopFilter correctly handles Unicode 4.0
-	///         supplementary characters in stopwords
-	///   <li> As of 2.9, StopFilter preserves position
-	///        increments
-	///   <li> As of 2.4, Tokens incorrectly identified as acronyms
-	///        are corrected (see <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1068</a>)
-	/// </ul>
-	/// 
-	/// ClassicAnalyzer was named StandardAnalyzer in Lucene versions prior to 3.1. 
-	/// As of 3.1, <seealso cref="StandardAnalyzer"/> implements Unicode text segmentation,
-	/// as specified by UAX#29.
-	/// </para>
-	/// </summary>
-	public sealed class ClassicAnalyzer : StopwordAnalyzerBase
-	{
-
-	  /// <summary>
-	  /// Default maximum allowed token length </summary>
-	  public const int DEFAULT_MAX_TOKEN_LENGTH = 255;
-
-	  private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
-
-	  /// <summary>
-	  /// An unmodifiable set containing some common English words that are usually not
-	  /// useful for searching. 
-	  /// </summary>
-	  public static readonly CharArraySet STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
-
-	  /// <summary>
-	  /// Builds an analyzer with the given stop words. </summary>
-	  /// <param name="matchVersion"> Lucene version to match See {@link
-	  /// <a href="#version">above</a>} </param>
-	  /// <param name="stopWords"> stop words  </param>
-	  public ClassicAnalyzer(Version matchVersion, CharArraySet stopWords) : base(matchVersion, stopWords)
-	  {
-	  }
-
-	  /// <summary>
-	  /// Builds an analyzer with the default stop words ({@link
-	  /// #STOP_WORDS_SET}). </summary>
-	  /// <param name="matchVersion"> Lucene version to match See {@link
-	  /// <a href="#version">above</a>} </param>
-	  public ClassicAnalyzer(Version matchVersion) : this(matchVersion, STOP_WORDS_SET)
-	  {
-	  }
-
-	  /// <summary>
-	  /// Builds an analyzer with the stop words from the given reader. </summary>
-	  /// <seealso cref= WordlistLoader#getWordSet(Reader, Version) </seealso>
-	  /// <param name="matchVersion"> Lucene version to match See {@link
-	  /// <a href="#version">above</a>} </param>
-	  /// <param name="stopwords"> Reader to read stop words from  </param>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public ClassicAnalyzer(org.apache.lucene.util.Version matchVersion, java.io.Reader stopwords) throws java.io.IOException
-	  public ClassicAnalyzer(Version matchVersion, Reader stopwords) : this(matchVersion, loadStopwordSet(stopwords, matchVersion))
-	  {
-	  }
-
-	  /// <summary>
-	  /// Set maximum allowed token length.  If a token is seen
-	  /// that exceeds this length then it is discarded.  This
-	  /// setting only takes effect the next time tokenStream or
-	  /// tokenStream is called.
-	  /// </summary>
-	  public int MaxTokenLength
-	  {
-		  set
-		  {
-			maxTokenLength = value;
-		  }
-		  get
-		  {
-			return maxTokenLength;
-		  }
-	  }
-
-
-//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
-//ORIGINAL LINE: @Override protected TokenStreamComponents createComponents(final String fieldName, final java.io.Reader reader)
-	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
-	  {
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final ClassicTokenizer src = new ClassicTokenizer(matchVersion, reader);
-		ClassicTokenizer src = new ClassicTokenizer(matchVersion, reader);
-		src.MaxTokenLength = maxTokenLength;
-		TokenStream tok = new ClassicFilter(src);
-		tok = new LowerCaseFilter(matchVersion, tok);
-		tok = new StopFilter(matchVersion, tok, stopwords);
-		return new TokenStreamComponentsAnonymousInnerClassHelper(this, src, tok, reader);
-	  }
-
-	  private class TokenStreamComponentsAnonymousInnerClassHelper : TokenStreamComponents
-	  {
-		  private readonly ClassicAnalyzer outerInstance;
-
-		  private Reader reader;
-		  private org.apache.lucene.analysis.standard.ClassicTokenizer src;
-
-		  public TokenStreamComponentsAnonymousInnerClassHelper(ClassicAnalyzer outerInstance, org.apache.lucene.analysis.standard.ClassicTokenizer src, TokenStream tok, Reader reader) : base(src, tok)
-		  {
-			  this.outerInstance = outerInstance;
-			  this.reader = reader;
-			  this.src = src;
-		  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override protected void setReader(final java.io.Reader reader) throws java.io.IOException
-//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
-		  protected internal override Reader Reader
-		  {
-			  set
-			  {
-				src.MaxTokenLength = outerInstance.maxTokenLength;
-				base.Reader = value;
-			  }
-		  }
-	  }
-	}
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+    /// <summary>
+    /// Filters <seealso cref="ClassicTokenizer"/> with <seealso cref="ClassicFilter"/>, {@link
+    /// LowerCaseFilter} and <seealso cref="StopFilter"/>, using a list of
+    /// English stop words.
+    /// 
+    /// <a name="version"/>
+    /// <para>You must specify the required <seealso cref="LuceneVersion"/>
+    /// compatibility when creating ClassicAnalyzer:
+    /// <ul>
+    ///   <li> As of 3.1, StopFilter correctly handles Unicode 4.0
+    ///         supplementary characters in stopwords
+    ///   <li> As of 2.9, StopFilter preserves position
+    ///        increments
+    ///   <li> As of 2.4, Tokens incorrectly identified as acronyms
+    ///        are corrected (see <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1068</a>)
+    /// </ul>
+    /// 
+    /// ClassicAnalyzer was named StandardAnalyzer in Lucene versions prior to 3.1. 
+    /// As of 3.1, <seealso cref="StandardAnalyzer"/> implements Unicode text segmentation,
+    /// as specified by UAX#29.
+    /// </para>
+    /// </summary>
+    public sealed class ClassicAnalyzer : StopwordAnalyzerBase
+    {
+
+        /// <summary>
+        /// Default maximum allowed token length </summary>
+        public const int DEFAULT_MAX_TOKEN_LENGTH = 255;
+
+        private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
+
+        /// <summary>
+        /// An unmodifiable set containing some common English words that are usually not
+        /// useful for searching. 
+        /// </summary>
+        public static readonly CharArraySet STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
+
+        /// <summary>
+        /// Builds an analyzer with the given stop words. </summary>
+        /// <param name="matchVersion"> Lucene version to match See {@link
+        /// <a href="#version">above</a>} </param>
+        /// <param name="stopWords"> stop words  </param>
+        public ClassicAnalyzer(LuceneVersion matchVersion, CharArraySet stopWords)
+            : base(matchVersion, stopWords)
+        {
+        }
+
+        /// <summary>
+        /// Builds an analyzer with the default stop words ({@link
+        /// #STOP_WORDS_SET}). </summary>
+        /// <param name="matchVersion"> Lucene version to match See {@link
+        /// <a href="#version">above</a>} </param>
+        public ClassicAnalyzer(LuceneVersion matchVersion)
+            : this(matchVersion, STOP_WORDS_SET)
+        {
+        }
+
+        /// <summary>
+        /// Builds an analyzer with the stop words from the given reader. </summary>
+        /// <seealso cref= WordlistLoader#getWordSet(Reader, Version) </seealso>
+        /// <param name="matchVersion"> Lucene version to match See {@link
+        /// <a href="#version">above</a>} </param>
+        /// <param name="stopwords"> Reader to read stop words from  </param>
+        public ClassicAnalyzer(LuceneVersion matchVersion, Reader stopwords)
+            : this(matchVersion, loadStopwordSet(stopwords, matchVersion))
+        {
+        }
+
+        /// <summary>
+        /// Set maximum allowed token length.  If a token is seen
+        /// that exceeds this length then it is discarded.  This
+        /// setting only takes effect the next time tokenStream or
+        /// tokenStream is called.
+        /// </summary>
+        public int MaxTokenLength
+        {
+            set
+            {
+                maxTokenLength = value;
+            }
+            get
+            {
+                return maxTokenLength;
+            }
+        }
+
+
+        public override TokenStreamComponents CreateComponents(string fieldName, Reader reader)
+        {
+            var src = new ClassicTokenizer(matchVersion, reader);
+            src.MaxTokenLength = maxTokenLength;
+            TokenStream tok = new ClassicFilter(src);
+            tok = new LowerCaseFilter(matchVersion, tok);
+            tok = new StopFilter(matchVersion, tok, stopwords);
+            return new TokenStreamComponentsAnonymousInnerClassHelper(this, src, tok, reader);
+        }
+
+        private class TokenStreamComponentsAnonymousInnerClassHelper : TokenStreamComponents
+        {
+            private readonly ClassicAnalyzer outerInstance;
+
+            private Reader reader;
+            private ClassicTokenizer src;
+
+            public TokenStreamComponentsAnonymousInnerClassHelper(ClassicAnalyzer outerInstance, ClassicTokenizer src, TokenStream tok, Reader reader)
+                : base(src, tok)
+            {
+                this.outerInstance = outerInstance;
+                this.reader = reader;
+                this.src = src;
+            }
+
+            protected override Reader Reader
+            {
+                set
+                {
+                    src.MaxTokenLength = outerInstance.maxTokenLength;
+                    base.Reader = value;
+                }
+            }
+        }
+    }
 
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b4eaf2fc/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicFilter.cs
index 9ee4b32..60bd1dd 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicFilter.cs
@@ -1,92 +1,85 @@
-namespace org.apache.lucene.analysis.standard
-{
-
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
+using Lucene.Net.Analysis.Tokenattributes;
 
-	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-	using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+namespace Lucene.Net.Analysis.Standard
+{
 
-	/// <summary>
-	/// Normalizes tokens extracted with <seealso cref="ClassicTokenizer"/>. </summary>
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+    /// <summary>
+    /// Normalizes tokens extracted with <seealso cref="ClassicTokenizer"/>. </summary>
 
-	public class ClassicFilter : TokenFilter
-	{
+    public class ClassicFilter : TokenFilter
+    {
 
-	  /// <summary>
-	  /// Construct filtering <i>in</i>. </summary>
-	  public ClassicFilter(TokenStream @in) : base(@in)
-	  {
-	  }
+        /// <summary>
+        /// Construct filtering <i>in</i>. </summary>
+        public ClassicFilter(TokenStream @in)
+            : base(@in)
+        {
+            typeAtt = AddAttribute<ITypeAttribute>();
+            termAtt = AddAttribute<ICharTermAttribute>();
+        }
 
-	  private static readonly string APOSTROPHE_TYPE = ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.APOSTROPHE];
-	  private static readonly string ACRONYM_TYPE = ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.ACRONYM];
+        private static readonly string APOSTROPHE_TYPE = ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.APOSTROPHE];
+        private static readonly string ACRONYM_TYPE = ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.ACRONYM];
 
-	  // this filters uses attribute type
-	  private readonly TypeAttribute typeAtt = addAttribute(typeof(TypeAttribute));
-	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+        // this filters uses attribute type
+        private readonly ITypeAttribute typeAtt;
+        private readonly ICharTermAttribute termAtt;
 
-	  /// <summary>
-	  /// Returns the next token in the stream, or null at EOS.
-	  /// <para>Removes <tt>'s</tt> from the end of words.
-	  /// </para>
-	  /// <para>Removes dots from acronyms.
-	  /// </para>
-	  /// </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
-	  public override bool incrementToken()
-	  {
-		if (!input.incrementToken())
-		{
-		  return false;
-		}
+        /// <summary>
+        /// Returns the next token in the stream, or null at EOS.
+        /// <para>Removes <tt>'s</tt> from the end of words.
+        /// </para>
+        /// <para>Removes dots from acronyms.
+        /// </para>
+        /// </summary>
+        public override bool IncrementToken()
+        {
+            if (!input.IncrementToken())
+            {
+                return false;
+            }
 
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final char[] buffer = termAtt.buffer();
-		char[] buffer = termAtt.buffer();
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int bufferLength = termAtt.length();
-		int bufferLength = termAtt.length();
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final String type = typeAtt.type();
-		string type = typeAtt.type();
+            char[] buffer = termAtt.Buffer();
+            int bufferLength = termAtt.Length;
+            string type = typeAtt.Type;
 
-		if (type == APOSTROPHE_TYPE && bufferLength >= 2 && buffer[bufferLength - 2] == '\'' && (buffer[bufferLength - 1] == 's' || buffer[bufferLength - 1] == 'S')) // remove 's
-		{
-		  // Strip last 2 characters off
-		  termAtt.Length = bufferLength - 2;
-		} // remove dots
-		else if (type == ACRONYM_TYPE)
-		{
-		  int upto = 0;
-		  for (int i = 0;i < bufferLength;i++)
-		  {
-			char c = buffer[i];
-			if (c != '.')
-			{
-			  buffer[upto++] = c;
-			}
-		  }
-		  termAtt.Length = upto;
-		}
+            if (type == APOSTROPHE_TYPE && bufferLength >= 2 && buffer[bufferLength - 2] == '\'' && (buffer[bufferLength - 1] == 's' || buffer[bufferLength - 1] == 'S')) // remove 's
+            {
+                // Strip last 2 characters off
+                termAtt.Length = bufferLength - 2;
+            } // remove dots
+            else if (type == ACRONYM_TYPE)
+            {
+                int upto = 0;
+                for (int i = 0; i < bufferLength; i++)
+                {
+                    char c = buffer[i];
+                    if (c != '.')
+                    {
+                        buffer[upto++] = c;
+                    }
+                }
+                termAtt.Length = upto;
+            }
 
-		return true;
-	  }
-	}
+            return true;
+        }
+    }
 
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b4eaf2fc/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicFilterFactory.cs
index 2107ccc..45d7cd0 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicFilterFactory.cs
@@ -1,55 +1,53 @@
 using System.Collections.Generic;
-using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+using Lucene.Net.Analysis.Util;
 
-namespace org.apache.lucene.analysis.standard
+namespace Lucene.Net.Analysis.Standard
 {
 
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+    /// <summary>
+    /// Factory for <seealso cref="ClassicFilter"/>.
+    /// <pre class="prettyprint">
+    /// &lt;fieldType name="text_clssc" class="solr.TextField" positionIncrementGap="100"&gt;
+    ///   &lt;analyzer&gt;
+    ///     &lt;tokenizer class="solr.ClassicTokenizerFactory"/&gt;
+    ///     &lt;filter class="solr.ClassicFilterFactory"/&gt;
+    ///   &lt;/analyzer&gt;
+    /// &lt;/fieldType&gt;</pre>
+    /// </summary>
+    public class ClassicFilterFactory : TokenFilterFactory
+    {
 
-	using TokenFilterFactory = TokenFilterFactory;
+        /// <summary>
+        /// Creates a new ClassicFilterFactory </summary>
+        public ClassicFilterFactory(IDictionary<string, string> args)
+            : base(args)
+        {
+            if (args.Count > 0)
+            {
+                throw new System.ArgumentException("Unknown parameters: " + args);
+            }
+        }
 
-	/// <summary>
-	/// Factory for <seealso cref="ClassicFilter"/>.
-	/// <pre class="prettyprint">
-	/// &lt;fieldType name="text_clssc" class="solr.TextField" positionIncrementGap="100"&gt;
-	///   &lt;analyzer&gt;
-	///     &lt;tokenizer class="solr.ClassicTokenizerFactory"/&gt;
-	///     &lt;filter class="solr.ClassicFilterFactory"/&gt;
-	///   &lt;/analyzer&gt;
-	/// &lt;/fieldType&gt;</pre>
-	/// </summary>
-	public class ClassicFilterFactory : TokenFilterFactory
-	{
-
-	  /// <summary>
-	  /// Creates a new ClassicFilterFactory </summary>
-	  public ClassicFilterFactory(IDictionary<string, string> args) : base(args)
-	  {
-		if (args.Count > 0)
-		{
-		  throw new System.ArgumentException("Unknown parameters: " + args);
-		}
-	  }
-
-	  public override TokenFilter create(TokenStream input)
-	  {
-		return new ClassicFilter(input);
-	  }
-	}
+        public override TokenStream Create(TokenStream input)
+        {
+            return new ClassicFilter(input);
+        }
+    }
 
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b4eaf2fc/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizer.cs
index f9c680e..3ef7a9e 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizer.cs
@@ -15,198 +15,185 @@
  * limitations under the License.
  */
 
-using Lucene.Net.Analysis.Standard;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
+using org.apache.lucene.analysis.standard;
+using Reader = System.IO.TextReader;
 
-namespace org.apache.lucene.analysis.standard
+namespace Lucene.Net.Analysis.Standard
 {
-
-
-	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-	using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
-	using Version = org.apache.lucene.util.Version;
-
-	/// <summary>
-	/// A grammar-based tokenizer constructed with JFlex
-	/// 
-	/// <para> This should be a good tokenizer for most European-language documents:
-	/// 
-	/// <ul>
-	///   <li>Splits words at punctuation characters, removing punctuation. However, a 
-	///     dot that's not followed by whitespace is considered part of a token.
-	///   <li>Splits words at hyphens, unless there's a number in the token, in which case
-	///     the whole token is interpreted as a product number and is not split.
-	///   <li>Recognizes email addresses and internet hostnames as one token.
-	/// </ul>
-	/// 
-	/// </para>
-	/// <para>Many applications have specific tokenizer needs.  If this tokenizer does
-	/// not suit your application, please consider copying this source code
-	/// directory to your project and maintaining your own grammar-based tokenizer.
-	/// 
-	/// ClassicTokenizer was named StandardTokenizer in Lucene versions prior to 3.1.
-	/// As of 3.1, <seealso cref="StandardTokenizer"/> implements Unicode text segmentation,
-	/// as specified by UAX#29.
-	/// </para>
-	/// </summary>
-
-	public sealed class ClassicTokenizer : Tokenizer
-	{
-	  /// <summary>
-	  /// A private instance of the JFlex-constructed scanner </summary>
-	  private StandardTokenizerInterface scanner;
-
-	  public const int ALPHANUM = 0;
-	  public const int APOSTROPHE = 1;
-	  public const int ACRONYM = 2;
-	  public const int COMPANY = 3;
-	  public const int EMAIL = 4;
-	  public const int HOST = 5;
-	  public const int NUM = 6;
-	  public const int CJ = 7;
-
-	  public const int ACRONYM_DEP = 8;
-
-	  /// <summary>
-	  /// String token types that correspond to token type int constants </summary>
-	  public static readonly string[] TOKEN_TYPES = new string [] {"<ALPHANUM>", "<APOSTROPHE>", "<ACRONYM>", "<COMPANY>", "<EMAIL>", "<HOST>", "<NUM>", "<CJ>", "<ACRONYM_DEP>"};
-
-	  private int skippedPositions;
-
-	  private int maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
-
-	  /// <summary>
-	  /// Set the max allowed token length.  Any token longer
-	  ///  than this is skipped. 
-	  /// </summary>
-	  public int MaxTokenLength
-	  {
-		  set
-		  {
-			if (value < 1)
-			{
-			  throw new System.ArgumentException("maxTokenLength must be greater than zero");
-			}
-			this.maxTokenLength = value;
-		  }
-		  get
-		  {
-			return maxTokenLength;
-		  }
-	  }
-
-
-	  /// <summary>
-	  /// Creates a new instance of the <seealso cref="ClassicTokenizer"/>.  Attaches
-	  /// the <code>input</code> to the newly created JFlex scanner.
-	  /// </summary>
-	  /// <param name="input"> The input reader
-	  /// 
-	  /// See http://issues.apache.org/jira/browse/LUCENE-1068 </param>
-	  public ClassicTokenizer(Version matchVersion, Reader input) : base(input)
-	  {
-		init(matchVersion);
-	  }
-
-	  /// <summary>
-	  /// Creates a new ClassicTokenizer with a given <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/> 
-	  /// </summary>
-	  public ClassicTokenizer(Version matchVersion, AttributeFactory factory, Reader input) : base(factory, input)
-	  {
-		init(matchVersion);
-	  }
-
-	  private void init(Version matchVersion)
-	  {
-		this.scanner = new ClassicTokenizerImpl(input);
-	  }
-
-	  // this tokenizer generates three attributes:
-	  // term offset, positionIncrement and type
-	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
-	  private readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
-	  private readonly PositionIncrementAttribute posIncrAtt = addAttribute(typeof(PositionIncrementAttribute));
-	  private readonly TypeAttribute typeAtt = addAttribute(typeof(TypeAttribute));
-
-	  /*
-	   * (non-Javadoc)
-	   *
-	   * @see org.apache.lucene.analysis.TokenStream#next()
-	   */
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
-	  public override bool incrementToken()
-	  {
-		clearAttributes();
-		skippedPositions = 0;
-
-		while (true)
-		{
-		  int tokenType = scanner.NextToken;
-
-		  if (tokenType == StandardTokenizerInterface_Fields.YYEOF)
-		  {
-			return false;
-		  }
-
-		  if (scanner.yylength() <= maxTokenLength)
-		  {
-			posIncrAtt.PositionIncrement = skippedPositions + 1;
-			scanner.getText(termAtt);
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int start = scanner.yychar();
-			int start = scanner.yychar();
-			offsetAtt.setOffset(correctOffset(start), correctOffset(start + termAtt.length()));
-
-			if (tokenType == ClassicTokenizer.ACRONYM_DEP)
-			{
-			  typeAtt.Type = ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.HOST];
-			  termAtt.Length = termAtt.length() - 1; // remove extra '.'
-			}
-			else
-			{
-			  typeAtt.Type = ClassicTokenizer.TOKEN_TYPES[tokenType];
-			}
-			return true;
-		  }
-		  else
-			// When we skip a too-long term, we still increment the
-			// position increment
-		  {
-			skippedPositions++;
-		  }
-		}
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public final void end() throws java.io.IOException
-	  public override void end()
-	  {
-		base.end();
-		// set final offset
-		int finalOffset = correctOffset(scanner.yychar() + scanner.yylength());
-		offsetAtt.setOffset(finalOffset, finalOffset);
-		// adjust any skipped tokens
-		posIncrAtt.PositionIncrement = posIncrAtt.PositionIncrement + skippedPositions;
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void close() throws java.io.IOException
-	  public override void close()
-	  {
-		base.close();
-		scanner.yyreset(input);
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
-	  public override void reset()
-	  {
-		base.reset();
-		scanner.yyreset(input);
-		skippedPositions = 0;
-	  }
-	}
+    /// <summary>
+    /// A grammar-based tokenizer constructed with JFlex
+    /// 
+    /// <para> This should be a good tokenizer for most European-language documents:
+    /// 
+    /// <ul>
+    ///   <li>Splits words at punctuation characters, removing punctuation. However, a 
+    ///     dot that's not followed by whitespace is considered part of a token.
+    ///   <li>Splits words at hyphens, unless there's a number in the token, in which case
+    ///     the whole token is interpreted as a product number and is not split.
+    ///   <li>Recognizes email addresses and internet hostnames as one token.
+    /// </ul>
+    /// 
+    /// </para>
+    /// <para>Many applications have specific tokenizer needs.  If this tokenizer does
+    /// not suit your application, please consider copying this source code
+    /// directory to your project and maintaining your own grammar-based tokenizer.
+    /// 
+    /// ClassicTokenizer was named StandardTokenizer in Lucene versions prior to 3.1.
+    /// As of 3.1, <seealso cref="StandardTokenizer"/> implements Unicode text segmentation,
+    /// as specified by UAX#29.
+    /// </para>
+    /// </summary>
+
+    public sealed class ClassicTokenizer : Tokenizer
+    {
+        /// <summary>
+        /// A private instance of the JFlex-constructed scanner </summary>
+        private StandardTokenizerInterface scanner;
+
+        public const int ALPHANUM = 0;
+        public const int APOSTROPHE = 1;
+        public const int ACRONYM = 2;
+        public const int COMPANY = 3;
+        public const int EMAIL = 4;
+        public const int HOST = 5;
+        public const int NUM = 6;
+        public const int CJ = 7;
+
+        public const int ACRONYM_DEP = 8;
+
+        /// <summary>
+        /// String token types that correspond to token type int constants </summary>
+        public static readonly string[] TOKEN_TYPES = new string[] { "<ALPHANUM>", "<APOSTROPHE>", "<ACRONYM>", "<COMPANY>", "<EMAIL>", "<HOST>", "<NUM>", "<CJ>", "<ACRONYM_DEP>" };
+
+        private int skippedPositions;
+
+        private int maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
+
+        /// <summary>
+        /// Set the max allowed token length.  Any token longer
+        ///  than this is skipped. 
+        /// </summary>
+        public int MaxTokenLength
+        {
+            set
+            {
+                if (value < 1)
+                {
+                    throw new System.ArgumentException("maxTokenLength must be greater than zero");
+                }
+                this.maxTokenLength = value;
+            }
+            get
+            {
+                return maxTokenLength;
+            }
+        }
+
+
+        /// <summary>
+        /// Creates a new instance of the <seealso cref="ClassicTokenizer"/>.  Attaches
+        /// the <code>input</code> to the newly created JFlex scanner.
+        /// </summary>
+        /// <param name="input"> The input reader
+        /// 
+        /// See http://issues.apache.org/jira/browse/LUCENE-1068 </param>
+        public ClassicTokenizer(LuceneVersion matchVersion, Reader input)
+            : base(input)
+        {
+            Init(matchVersion);
+        }
+
+        /// <summary>
+        /// Creates a new ClassicTokenizer with a given <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/> 
+        /// </summary>
+        public ClassicTokenizer(LuceneVersion matchVersion, AttributeFactory factory, Reader input)
+            : base(factory, input)
+        {
+            Init(matchVersion);
+        }
+
+        private void Init(LuceneVersion matchVersion)
+        {
+            this.scanner = new ClassicTokenizerImpl(input);
+        }
+
+        // this tokenizer generates three attributes:
+        // term offset, positionIncrement and type
+        private readonly CharTermAttribute termAtt;
+        private readonly OffsetAttribute offsetAtt;
+        private readonly PositionIncrementAttribute posIncrAtt;
+        private readonly TypeAttribute typeAtt;
+        /*
+         * (non-Javadoc)
+         *
+         * @see org.apache.lucene.analysis.TokenStream#next()
+         */
+        public override bool IncrementToken()
+        {
+            ClearAttributes();
+            skippedPositions = 0;
+
+            while (true)
+            {
+                int tokenType = scanner.NextToken;
+
+                if (tokenType == StandardTokenizerInterface_Fields.YYEOF)
+                {
+                    return false;
+                }
+
+                if (scanner.yylength() <= maxTokenLength)
+                {
+                    posIncrAtt.PositionIncrement = skippedPositions + 1;
+                    scanner.getText(termAtt);
+
+                    int start = scanner.yychar();
+                    offsetAtt.SetOffset(CorrectOffset(start), CorrectOffset(start + termAtt.Length));
+
+                    if (tokenType == ClassicTokenizer.ACRONYM_DEP)
+                    {
+                        typeAtt.Type = ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.HOST];
+                        termAtt.Length = termAtt.Length - 1; // remove extra '.'
+                    }
+                    else
+                    {
+                        typeAtt.Type = ClassicTokenizer.TOKEN_TYPES[tokenType];
+                    }
+                    return true;
+                }
+                else
+                // When we skip a too-long term, we still increment the
+                // position increment
+                {
+                    skippedPositions++;
+                }
+            }
+        }
+
+        public override void End()
+        {
+            base.End();
+            // set final offset
+            int finalOffset = CorrectOffset(scanner.yychar() + scanner.yylength());
+            offsetAtt.SetOffset(finalOffset, finalOffset);
+            // adjust any skipped tokens
+            posIncrAtt.PositionIncrement = posIncrAtt.PositionIncrement + skippedPositions;
+        }
+
+        public override void Dispose()
+        {
+            base.Dispose();
+            scanner.yyreset(input);
+        }
+
+        public override void Reset()
+        {
+            base.Reset();
+            scanner.yyreset(input);
+            skippedPositions = 0;
+        }
+    }
 
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b4eaf2fc/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerImpl.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerImpl.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerImpl.cs
index 4d30289..f2ad424 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerImpl.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerImpl.cs
@@ -1,7 +1,9 @@
 /* The following code was generated by JFlex 1.5.1 */
 using System;
 using System.IO;
+using Lucene.Net.Analysis.Tokenattributes;
 using org.apache.lucene.analysis.standard;
+using Reader = System.IO.TextReader;
 
 namespace Lucene.Net.Analysis.Standard
 {
@@ -286,9 +288,9 @@ namespace Lucene.Net.Analysis.Standard
 	/// <summary>
 	/// Fills CharTermAttribute with the current token text.
 	/// </summary>
-	public void getText(CharTermAttribute t)
+	public void getText(ICharTermAttribute t)
 	{
-	  t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
+	  t.CopyBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
 	}
 
 
@@ -359,7 +361,7 @@ namespace Lucene.Net.Analysis.Standard
 		}
 
 		/* finally: fill the buffer with new input */
-		int numRead = zzReader.read(zzBuffer, zzEndRead, zzBuffer.Length - zzEndRead);
+		int numRead = zzReader.Read(zzBuffer, zzEndRead, zzBuffer.Length - zzEndRead);
 
 		if (numRead > 0)
 		{
@@ -369,7 +371,7 @@ namespace Lucene.Net.Analysis.Standard
 		// unlikely but not impossible: read 0 characters, but not at end of stream    
 		if (numRead == 0)
 		{
-		  int c = zzReader.read();
+		  int c = zzReader.Read();
 		  if (c == -1)
 		  {
 			return true;
@@ -389,8 +391,6 @@ namespace Lucene.Net.Analysis.Standard
 	  /// <summary>
 	  /// Closes the input stream.
 	  /// </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public final void yyclose() throws java.io.IOException
 	  public void yyclose()
 	  {
 		zzAtEOF = true; // indicate end of file
@@ -398,7 +398,7 @@ namespace Lucene.Net.Analysis.Standard
 
 		if (zzReader != null)
 		{
-		  zzReader.close();
+		  zzReader.Close();
 		}
 	  }
 


Mime
View raw message