lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From synhers...@apache.org
Subject [2/2] lucenenet git commit: Some more work on Analysis.Common
Date Wed, 31 Dec 2014 22:18:37 GMT
Some more work on Analysis.Common


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/5155ef8c
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/5155ef8c
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/5155ef8c

Branch: refs/heads/master
Commit: 5155ef8c59faef8149fad4506f5230bf5092e9af
Parents: f81d543
Author: Itamar Syn-Hershko <itamar@code972.com>
Authored: Thu Jan 1 00:08:25 2015 +0200
Committer: Itamar Syn-Hershko <itamar@code972.com>
Committed: Thu Jan 1 00:08:25 2015 +0200

----------------------------------------------------------------------
 .../Analysis/Core/KeywordAnalyzer.cs            |   1 -
 .../Analysis/Core/KeywordTokenizer.cs           |   7 +-
 .../Analysis/Core/KeywordTokenizerFactory.cs    |   1 -
 .../Analysis/Core/LetterTokenizer.cs            | 140 +++----
 .../Analysis/Core/LowerCaseFilter.cs            | 105 ++---
 .../Analysis/Core/LowerCaseFilterFactory.cs     |   1 -
 .../Analysis/Core/LowerCaseTokenizer.cs         | 144 +++----
 .../Analysis/Core/LowerCaseTokenizerFactory.cs  |   7 +-
 .../Analysis/Core/SimpleAnalyzer.cs             |   2 +-
 .../Analysis/Core/StopAnalyzer.cs               |   1 -
 .../Analysis/Core/StopFilter.cs                 | 220 +++++------
 .../Analysis/Core/TypeTokenFilterFactory.cs     |   3 +-
 .../Analysis/Core/UpperCaseFilter.cs            |  26 +-
 .../Analysis/Core/WhitespaceAnalyzer.cs         |   1 -
 .../Analysis/Core/WhitespaceTokenizer.cs        | 126 +++---
 .../Analysis/Core/WhitespaceTokenizerFactory.cs | 100 ++---
 .../Analysis/Synonym/SlowSynonymMap.cs          | 388 +++++++++----------
 .../Analysis/Util/CharArrayMap.cs               |   4 +-
 .../Analysis/Util/CharArraySet.cs               |   8 +-
 .../Analysis/Util/CharTokenizer.cs              |  24 +-
 .../Analysis/Util/CharacterUtils.cs             | 103 ++---
 src/Lucene.Net.Core/Analysis/Analyzer.cs        |   4 +-
 src/Lucene.Net.Core/Analysis/Tokenizer.cs       |   4 -
 23 files changed, 686 insertions(+), 734 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5155ef8c/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordAnalyzer.cs
index 4d13021..61cd710 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordAnalyzer.cs
@@ -1,5 +1,4 @@
 using System.IO;
-using org.apache.lucene.analysis.core;
 
 namespace Lucene.Net.Analysis.Core
 {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5155ef8c/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordTokenizer.cs
index 6d2cbde..cc6c638 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordTokenizer.cs
@@ -33,8 +33,8 @@ namespace Lucene.Net.Analysis.Core
 
         private bool done = false;
         private int finalOffset;
-        private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
-        private OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
+        private readonly ICharTermAttribute termAtt;
+        private readonly IOffsetAttribute offsetAtt;
 
         public KeywordTokenizer(TextReader input)
             : this(input, DEFAULT_BUFFER_SIZE)
@@ -44,6 +44,9 @@ namespace Lucene.Net.Analysis.Core
         public KeywordTokenizer(TextReader input, int bufferSize)
             : base(input)
         {
+            termAtt = AddAttribute<ICharTermAttribute>();
+            offsetAtt = AddAttribute<IOffsetAttribute>();
+
             if (bufferSize <= 0)
             {
                 throw new System.ArgumentException("bufferSize must be > 0");

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5155ef8c/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordTokenizerFactory.cs
index 8c3929f..be983d9 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordTokenizerFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordTokenizerFactory.cs
@@ -2,7 +2,6 @@
 using System.IO;
 using Lucene.Net.Analysis.Util;
 using Lucene.Net.Util;
-using org.apache.lucene.analysis.util;
 
 namespace Lucene.Net.Analysis.Core
 {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5155ef8c/src/Lucene.Net.Analysis.Common/Analysis/Core/LetterTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/LetterTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/LetterTokenizer.cs
index 3a85d5d..b48b320 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/LetterTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/LetterTokenizer.cs
@@ -2,83 +2,83 @@
 using Lucene.Net.Analysis.Util;
 using Lucene.Net.Support;
 using Lucene.Net.Util;
-using org.apache.lucene.analysis.util;
 
 namespace Lucene.Net.Analysis.Core
 {
 
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
     /// <summary>
-	/// A LetterTokenizer is a tokenizer that divides text at non-letters. That's to
-	/// say, it defines tokens as maximal strings of adjacent letters, as defined by
-	/// java.lang.Character.isLetter() predicate.
-	/// <para>
-	/// Note: this does a decent job for most European languages, but does a terrible
-	/// job for some Asian languages, where words are not separated by spaces.
-	/// </para>
-	/// <para>
-	/// <a name="version"/>
-	/// You must specify the required <seealso cref="Version"/> compatibility when creating
-	/// <seealso cref="LetterTokenizer"/>:
-	/// <ul>
-	/// <li>As of 3.1, <seealso cref="CharTokenizer"/> uses an int based API to normalize and
-	/// detect token characters. See <seealso cref="CharTokenizer#isTokenChar(int)"/> and
-	/// <seealso cref="CharTokenizer#normalize(int)"/> for details.</li>
-	/// </ul>
-	/// </para>
-	/// </summary>
+    /// A LetterTokenizer is a tokenizer that divides text at non-letters. That's to
+    /// say, it defines tokens as maximal strings of adjacent letters, as defined by
+    /// java.lang.Character.isLetter() predicate.
+    /// <para>
+    /// Note: this does a decent job for most European languages, but does a terrible
+    /// job for some Asian languages, where words are not separated by spaces.
+    /// </para>
+    /// <para>
+    /// <a name="version"/>
+    /// You must specify the required <seealso cref="Version"/> compatibility when creating
+    /// <seealso cref="LetterTokenizer"/>:
+    /// <ul>
+    /// <li>As of 3.1, <seealso cref="CharTokenizer"/> uses an int based API to normalize and
+    /// detect token characters. See <seealso cref="CharTokenizer#isTokenChar(int)"/> and
+    /// <seealso cref="CharTokenizer#normalize(int)"/> for details.</li>
+    /// </ul>
+    /// </para>
+    /// </summary>
 
-	public class LetterTokenizer : CharTokenizer
-	{
+    public class LetterTokenizer : CharTokenizer
+    {
 
-	  /// <summary>
-	  /// Construct a new LetterTokenizer.
-	  /// </summary>
-	  /// <param name="matchVersion">
-	  ///          Lucene version to match See <seealso cref="<a href="#version">above</a>"/> </param>
-	  /// <param name="in">
-	  ///          the input to split up into tokens </param>
-	  public LetterTokenizer(Version matchVersion, TextReader @in) : base(matchVersion, @in)
-	  {
-	  }
+        /// <summary>
+        /// Construct a new LetterTokenizer.
+        /// </summary>
+        /// <param name="matchVersion">
+        ///          Lucene version to match See <seealso cref="<a href="#version">above</a>"/> </param>
+        /// <param name="in">
+        ///          the input to split up into tokens </param>
+        public LetterTokenizer(Version matchVersion, TextReader @in)
+            : base(matchVersion, @in)
+        {
+        }
 
-	  /// <summary>
-	  /// Construct a new LetterTokenizer using a given
-	  /// <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>.
-	  /// </summary>
-	  /// <param name="matchVersion">
-	  ///          Lucene version to match See <seealso cref="<a href="#version">above</a>"/> </param>
-	  /// <param name="factory">
-	  ///          the attribute factory to use for this <seealso cref="Tokenizer"/> </param>
-	  /// <param name="in">
-	  ///          the input to split up into tokens </param>
-	  public LetterTokenizer(Version matchVersion, AttributeSource.AttributeFactory factory, TextReader @in) : base(matchVersion, factory, @in)
-	  {
-	  }
-
-	  /// <summary>
-	  /// Collects only characters which satisfy
-	  /// <seealso cref="Character#isLetter(int)"/>.
-	  /// </summary>
-	  protected internal override bool isTokenChar(int c)
-	  {
-		return char.IsLetter(c);
-	  }
-	}
+        /// <summary>
+        /// Construct a new LetterTokenizer using a given
+        /// <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>.
+        /// </summary>
+        /// <param name="matchVersion">
+        ///          Lucene version to match See <seealso cref="<a href="#version">above</a>"/> </param>
+        /// <param name="factory">
+        ///          the attribute factory to use for this <seealso cref="Tokenizer"/> </param>
+        /// <param name="in">
+        ///          the input to split up into tokens </param>
+        public LetterTokenizer(Version matchVersion, AttributeSource.AttributeFactory factory, TextReader @in)
+            : base(matchVersion, factory, @in)
+        {
+        }
 
+        /// <summary>
+        /// Collects only characters which satisfy
+        /// <seealso cref="Character#isLetter(int)"/>.
+        /// </summary>	  
+        protected override bool IsTokenChar(char c)
+        {
+            return char.IsLetter(c);
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5155ef8c/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseFilter.cs
index b3c0c58..fe9654d 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseFilter.cs
@@ -1,63 +1,64 @@
 using Lucene.Net.Analysis.Tokenattributes;
 using Lucene.Net.Analysis.Util;
 using Lucene.Net.Util;
-using org.apache.lucene.analysis.util;
 
 namespace Lucene.Net.Analysis.Core
 {
 
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
     /// <summary>
-	/// Normalizes token text to lower case.
-	/// <a name="version"/>
-	/// <para>You must specify the required <seealso cref="Version"/>
-	/// compatibility when creating LowerCaseFilter:
-	/// <ul>
-	///   <li> As of 3.1, supplementary characters are properly lowercased.
-	/// </ul>
-	/// </para>
-	/// </summary>
-	public sealed class LowerCaseFilter : TokenFilter
-	{
-	  private readonly CharacterUtils charUtils;
-	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+    /// Normalizes token text to lower case.
+    /// <a name="version"/>
+    /// <para>You must specify the required <seealso cref="Version"/>
+    /// compatibility when creating LowerCaseFilter:
+    /// <ul>
+    ///   <li> As of 3.1, supplementary characters are properly lowercased.
+    /// </ul>
+    /// </para>
+    /// </summary>
+    public sealed class LowerCaseFilter : TokenFilter
+    {
+        private readonly CharacterUtils charUtils;
+        private readonly ICharTermAttribute termAtt;
 
-	  /// <summary>
-	  /// Create a new LowerCaseFilter, that normalizes token text to lower case.
-	  /// </summary>
-	  /// <param name="matchVersion"> See <a href="#version">above</a> </param>
-	  /// <param name="in"> TokenStream to filter </param>
-	  public LowerCaseFilter(Version matchVersion, TokenStream @in) : base(@in)
-	  {
-		charUtils = CharacterUtils.getInstance(matchVersion);
-	  }
+        /// <summary>
+        /// Create a new LowerCaseFilter, that normalizes token text to lower case.
+        /// </summary>
+        /// <param name="matchVersion"> See <a href="#version">above</a> </param>
+        /// <param name="in"> TokenStream to filter </param>
+        public LowerCaseFilter(Version matchVersion, TokenStream @in)
+            : base(@in)
+        {
+            termAtt = AddAttribute<ICharTermAttribute>();
+            charUtils = CharacterUtils.GetInstance(matchVersion);
+        }
 
-	  public override bool IncrementToken()
-	  {
-		if (input.incrementToken())
-		{
-		  charUtils.ToLower(termAtt.Buffer(), 0, termAtt.Length);
-		  return true;
-		}
-		else
-		{
-		  return false;
-		}
-	  }
-	}
+        public override bool IncrementToken()
+        {
+            if (input.IncrementToken())
+            {
+                charUtils.ToLower(termAtt.Buffer(), 0, termAtt.Length);
+                return true;
+            }
+            else
+            {
+                return false;
+            }
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5155ef8c/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseFilterFactory.cs
index c2efbd1..98b44bd 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseFilterFactory.cs
@@ -1,6 +1,5 @@
 using System.Collections.Generic;
 using Lucene.Net.Analysis.Util;
-using org.apache.lucene.analysis.util;
 
 namespace Lucene.Net.Analysis.Core
 {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5155ef8c/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseTokenizer.cs
index 659f9f3..c3c8250 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseTokenizer.cs
@@ -6,79 +6,81 @@ using Lucene.Net.Util;
 namespace Lucene.Net.Analysis.Core
 {
 
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
     /// <summary>
-	/// LowerCaseTokenizer performs the function of LetterTokenizer
-	/// and LowerCaseFilter together.  It divides text at non-letters and converts
-	/// them to lower case.  While it is functionally equivalent to the combination
-	/// of LetterTokenizer and LowerCaseFilter, there is a performance advantage
-	/// to doing the two tasks at once, hence this (redundant) implementation.
-	/// <P>
-	/// Note: this does a decent job for most European languages, but does a terrible
-	/// job for some Asian languages, where words are not separated by spaces.
-	/// </p>
-	/// <para>
-	/// <a name="version"/>
-	/// You must specify the required <seealso cref="Version"/> compatibility when creating
-	/// <seealso cref="LowerCaseTokenizer"/>:
-	/// <ul>
-	/// <li>As of 3.1, <seealso cref="CharTokenizer"/> uses an int based API to normalize and
-	/// detect token characters. See <seealso cref="CharTokenizer#isTokenChar(int)"/> and
-	/// <seealso cref="CharTokenizer#normalize(int)"/> for details.</li>
-	/// </ul>
-	/// </para>
-	/// </summary>
-	public sealed class LowerCaseTokenizer : LetterTokenizer
-	{
+    /// LowerCaseTokenizer performs the function of LetterTokenizer
+    /// and LowerCaseFilter together.  It divides text at non-letters and converts
+    /// them to lower case.  While it is functionally equivalent to the combination
+    /// of LetterTokenizer and LowerCaseFilter, there is a performance advantage
+    /// to doing the two tasks at once, hence this (redundant) implementation.
+    /// <P>
+    /// Note: this does a decent job for most European languages, but does a terrible
+    /// job for some Asian languages, where words are not separated by spaces.
+    /// </p>
+    /// <para>
+    /// <a name="version"/>
+    /// You must specify the required <seealso cref="Version"/> compatibility when creating
+    /// <seealso cref="LowerCaseTokenizer"/>:
+    /// <ul>
+    /// <li>As of 3.1, <seealso cref="CharTokenizer"/> uses an int based API to normalize and
+    /// detect token characters. See <seealso cref="CharTokenizer#isTokenChar(int)"/> and
+    /// <seealso cref="CharTokenizer#normalize(int)"/> for details.</li>
+    /// </ul>
+    /// </para>
+    /// </summary>
+    public sealed class LowerCaseTokenizer : LetterTokenizer
+    {
 
-	  /// <summary>
-	  /// Construct a new LowerCaseTokenizer.
-	  /// </summary>
-	  /// <param name="matchVersion">
-	  ///          Lucene version to match See <seealso cref="<a href="#version">above</a>"/>
-	  /// </param>
-	  /// <param name="in">
-	  ///          the input to split up into tokens </param>
-	  public LowerCaseTokenizer(Version matchVersion, TextReader @in) : base(matchVersion, @in)
-	  {
-	  }
+        /// <summary>
+        /// Construct a new LowerCaseTokenizer.
+        /// </summary>
+        /// <param name="matchVersion">
+        ///          Lucene version to match See <seealso cref="<a href="#version">above</a>"/>
+        /// </param>
+        /// <param name="in">
+        ///          the input to split up into tokens </param>
+        public LowerCaseTokenizer(Version matchVersion, TextReader @in)
+            : base(matchVersion, @in)
+        {
+        }
 
-	  /// <summary>
-	  /// Construct a new LowerCaseTokenizer using a given
-	  /// <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>.
-	  /// </summary>
-	  /// <param name="matchVersion">
-	  ///          Lucene version to match See <seealso cref="<a href="#version">above</a>"/> </param>
-	  /// <param name="factory">
-	  ///          the attribute factory to use for this <seealso cref="Tokenizer"/> </param>
-	  /// <param name="in">
-	  ///          the input to split up into tokens </param>
-	  public LowerCaseTokenizer(Version matchVersion, AttributeFactory factory, TextReader @in) : base(matchVersion, factory, @in)
-	  {
-	  }
+        /// <summary>
+        /// Construct a new LowerCaseTokenizer using a given
+        /// <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>.
+        /// </summary>
+        /// <param name="matchVersion">
+        ///          Lucene version to match See <seealso cref="<a href="#version">above</a>"/> </param>
+        /// <param name="factory">
+        ///          the attribute factory to use for this <seealso cref="Tokenizer"/> </param>
+        /// <param name="in">
+        ///          the input to split up into tokens </param>
+        public LowerCaseTokenizer(Version matchVersion, AttributeFactory factory, TextReader @in)
+            : base(matchVersion, factory, @in)
+        {
+        }
 
-	  /// <summary>
-	  /// Converts char to lower case
-	  /// <seealso cref="Character#toLowerCase(int)"/>.
-	  /// </summary>
-	  protected override int Normalize(int c)
-	  {
-		return char.ToLower(c);
-	  }
-	}
+        /// <summary>
+        /// Converts char to lower case
+        /// <seealso cref="Character#toLowerCase(int)"/>.
+        /// </summary>
+        protected override int Normalize(int c)
+        {
+            return Character.ToLowerCase(c);
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5155ef8c/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseTokenizerFactory.cs
index 3d9b2e2..32e25c1 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseTokenizerFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseTokenizerFactory.cs
@@ -2,7 +2,6 @@
 using System.IO;
 using Lucene.Net.Analysis.Util;
 using Lucene.Net.Util;
-using org.apache.lucene.analysis.util;
 
 namespace Lucene.Net.Analysis.Core
 {
@@ -55,9 +54,9 @@ namespace Lucene.Net.Analysis.Core
         public virtual AbstractAnalysisFactory MultiTermComponent
         {
             get
-		  {
-			return new LowerCaseFilterFactory(new Dictionary<>(OriginalArgs));
-		  }
+            {
+                return new LowerCaseFilterFactory(new Dictionary<string, string>(OriginalArgs));
+            }
         }
     }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5155ef8c/src/Lucene.Net.Analysis.Common/Analysis/Core/SimpleAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/SimpleAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/SimpleAnalyzer.cs
index 56c9133..2710d80 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/SimpleAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/SimpleAnalyzer.cs
@@ -50,7 +50,7 @@ namespace Lucene.Net.Analysis.Core
             this.matchVersion = matchVersion;
         }
 
-        protected internal override TokenStreamComponents createComponents(string fieldName, TextReader reader)
+        public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
         {
             return new TokenStreamComponents(new LowerCaseTokenizer(matchVersion, reader));
         }

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5155ef8c/src/Lucene.Net.Analysis.Common/Analysis/Core/StopAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/StopAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/StopAnalyzer.cs
index 2857938..37020bb 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/StopAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/StopAnalyzer.cs
@@ -3,7 +3,6 @@ using System.IO;
 using Lucene.Net.Analysis.Util;
 using Lucene.Net.Support;
 using Lucene.Net.Util;
-using org.apache.lucene.analysis.util;
 
 namespace Lucene.Net.Analysis.Core
 {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5155ef8c/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilter.cs
index aeaf324..085ef17 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilter.cs
@@ -3,127 +3,129 @@ using Lucene.Net.Analysis.Tokenattributes;
 using Lucene.Net.Analysis.Util;
 using Lucene.Net.Support;
 using Lucene.Net.Util;
-using org.apache.lucene.analysis.util;
+using Reader = System.IO.TextReader;
 
 namespace Lucene.Net.Analysis.Core
 {
 
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
     /// <summary>
-	/// Removes stop words from a token stream.
-	/// 
-	/// <a name="version"/>
-	/// <para>You must specify the required <seealso cref="Version"/>
-	/// compatibility when creating StopFilter:
-	/// <ul>
-	///   <li> As of 3.1, StopFilter correctly handles Unicode 4.0
-	///         supplementary characters in stopwords and position
-	///         increments are preserved
-	/// </ul>
-	/// </para>
-	/// </summary>
-	public sealed class StopFilter : FilteringTokenFilter
-	{
+    /// Removes stop words from a token stream.
+    /// 
+    /// <a name="version"/>
+    /// <para>You must specify the required <seealso cref="Version"/>
+    /// compatibility when creating StopFilter:
+    /// <ul>
+    ///   <li> As of 3.1, StopFilter correctly handles Unicode 4.0
+    ///         supplementary characters in stopwords and position
+    ///         increments are preserved
+    /// </ul>
+    /// </para>
+    /// </summary>
+    public sealed class StopFilter : FilteringTokenFilter
+    {
 
-	  private readonly CharArraySet stopWords;
-	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+        private readonly CharArraySet stopWords;
+        private readonly ICharTermAttribute termAtt;
 
-	  /// <summary>
-	  /// Constructs a filter which removes words from the input TokenStream that are
-	  /// named in the Set.
-	  /// </summary>
-	  /// <param name="matchVersion">
-	  ///          Lucene version to enable correct Unicode 4.0 behavior in the stop
-	  ///          set if Version > 3.0.  See <a href="#version">above</a> for details. </param>
-	  /// <param name="in">
-	  ///          Input stream </param>
-	  /// <param name="stopWords">
-	  ///          A <seealso cref="CharArraySet"/> representing the stopwords. </param>
-	  /// <seealso cref= #makeStopSet(Version, java.lang.String...) </seealso>
-	  public StopFilter(Version matchVersion, TokenStream @in, CharArraySet stopWords) : base(matchVersion, @in)
-	  {
-		this.stopWords = stopWords;
-	  }
+        /// <summary>
+        /// Constructs a filter which removes words from the input TokenStream that are
+        /// named in the Set.
+        /// </summary>
+        /// <param name="matchVersion">
+        ///          Lucene version to enable correct Unicode 4.0 behavior in the stop
+        ///          set if Version > 3.0.  See <a href="#version">above</a> for details. </param>
+        /// <param name="in">
+        ///          Input stream </param>
+        /// <param name="stopWords">
+        ///          A <seealso cref="CharArraySet"/> representing the stopwords. </param>
+        /// <seealso cref= #makeStopSet(Version, java.lang.String...) </seealso>
+        public StopFilter(Version matchVersion, TokenStream @in, CharArraySet stopWords)
+            : base(matchVersion, @in)
+        {
+            termAtt = AddAttribute<ICharTermAttribute>();
+            this.stopWords = stopWords;
+        }
 
-	  /// <summary>
-	  /// Builds a Set from an array of stop words,
-	  /// appropriate for passing into the StopFilter constructor.
-	  /// This permits this stopWords construction to be cached once when
-	  /// an Analyzer is constructed.
-	  /// </summary>
-	  /// <param name="matchVersion"> Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0 </param>
-	  /// <param name="stopWords"> An array of stopwords </param>
-	  /// <seealso cref= #makeStopSet(Version, java.lang.String[], boolean) passing false to ignoreCase </seealso>
-	  public static CharArraySet makeStopSet(Version matchVersion, params string[] stopWords)
-	  {
-		return makeStopSet(matchVersion, stopWords, false);
-	  }
+        /// <summary>
+        /// Builds a Set from an array of stop words,
+        /// appropriate for passing into the StopFilter constructor.
+        /// This permits this stopWords construction to be cached once when
+        /// an Analyzer is constructed.
+        /// </summary>
+        /// <param name="matchVersion"> Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0 </param>
+        /// <param name="stopWords"> An array of stopwords </param>
+        /// <seealso cref= #makeStopSet(Version, java.lang.String[], boolean) passing false to ignoreCase </seealso>
+        public static CharArraySet makeStopSet(Version matchVersion, params string[] stopWords)
+        {
+            return makeStopSet(matchVersion, stopWords, false);
+        }
 
-	  /// <summary>
-	  /// Builds a Set from an array of stop words,
-	  /// appropriate for passing into the StopFilter constructor.
-	  /// This permits this stopWords construction to be cached once when
-	  /// an Analyzer is constructed.
-	  /// </summary>
-	  /// <param name="matchVersion"> Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0 </param>
-	  /// <param name="stopWords"> A List of Strings or char[] or any other toString()-able list representing the stopwords </param>
-	  /// <returns> A Set (<seealso cref="CharArraySet"/>) containing the words </returns>
-	  /// <seealso cref= #makeStopSet(Version, java.lang.String[], boolean) passing false to ignoreCase </seealso>
-	  public static CharArraySet MakeStopSet<T1>(Version matchVersion, IList<T1> stopWords)
-	  {
-		return makeStopSet(matchVersion, stopWords, false);
-	  }
+        /// <summary>
+        /// Builds a Set from an array of stop words,
+        /// appropriate for passing into the StopFilter constructor.
+        /// This permits this stopWords construction to be cached once when
+        /// an Analyzer is constructed.
+        /// </summary>
+        /// <param name="matchVersion"> Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0 </param>
+        /// <param name="stopWords"> A List of Strings or char[] or any other toString()-able list representing the stopwords </param>
+        /// <returns> A Set (<seealso cref="CharArraySet"/>) containing the words </returns>
+        /// <seealso cref= #makeStopSet(Version, java.lang.String[], boolean) passing false to ignoreCase </seealso>
+        public static CharArraySet MakeStopSet<T1>(Version matchVersion, IList<T1> stopWords)
+        {
+            return makeStopSet(matchVersion, stopWords, false);
+        }
 
-	  /// <summary>
-	  /// Creates a stopword set from the given stopword array.
-	  /// </summary>
-	  /// <param name="matchVersion"> Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0 </param>
-	  /// <param name="stopWords"> An array of stopwords </param>
-	  /// <param name="ignoreCase"> If true, all words are lower cased first. </param>
-	  /// <returns> a Set containing the words </returns>
-	  public static CharArraySet MakeStopSet(Version matchVersion, string[] stopWords, bool ignoreCase)
-	  {
-		CharArraySet stopSet = new CharArraySet(matchVersion, stopWords.Length, ignoreCase);
-		stopSet.AddAll(Arrays.AsList(stopWords));
-		return stopSet;
-	  }
+        /// <summary>
+        /// Creates a stopword set from the given stopword array.
+        /// </summary>
+        /// <param name="matchVersion"> Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0 </param>
+        /// <param name="stopWords"> An array of stopwords </param>
+        /// <param name="ignoreCase"> If true, all words are lower cased first. </param>
+        /// <returns> a Set containing the words </returns>
+        public static CharArraySet MakeStopSet(Version matchVersion, string[] stopWords, bool ignoreCase)
+        {
+            CharArraySet stopSet = new CharArraySet(matchVersion, stopWords.Length, ignoreCase);
+            stopSet.AddAll(Arrays.AsList(stopWords));
+            return stopSet;
+        }
 
-	  /// <summary>
-	  /// Creates a stopword set from the given stopword list. </summary>
-	  /// <param name="matchVersion"> Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0 </param>
-	  /// <param name="stopWords"> A List of Strings or char[] or any other toString()-able list representing the stopwords </param>
-	  /// <param name="ignoreCase"> if true, all words are lower cased first </param>
-	  /// <returns> A Set (<seealso cref="CharArraySet"/>) containing the words </returns>
-	  public static CharArraySet makeStopSet<T1>(Version matchVersion, IList<T1> stopWords, bool ignoreCase)
-	  {
-		CharArraySet stopSet = new CharArraySet(matchVersion, stopWords.Count, ignoreCase);
-		stopSet.addAll(stopWords);
-		return stopSet;
-	  }
+        /// <summary>
+        /// Creates a stopword set from the given stopword list. </summary>
+        /// <param name="matchVersion"> Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0 </param>
+        /// <param name="stopWords"> A List of Strings or char[] or any other toString()-able list representing the stopwords </param>
+        /// <param name="ignoreCase"> if true, all words are lower cased first </param>
+        /// <returns> A Set (<seealso cref="CharArraySet"/>) containing the words </returns>
+        public static CharArraySet MakeStopSet<T1>(Version matchVersion, IList<T1> stopWords, bool ignoreCase)
+        {
+            var stopSet = new CharArraySet(matchVersion, stopWords.Count, ignoreCase);
+            stopSet.AddAll(stopWords);
+            return stopSet;
+        }
 
-	  /// <summary>
-	  /// Returns the next input Token whose term() is not a stop word.
-	  /// </summary>
-	  protected internal override bool Accept()
-	  {
-		return !stopWords.contains(termAtt.buffer(), 0, termAtt.length());
-	  }
+        /// <summary>
+        /// Returns the next input Token whose term() is not a stop word.
+        /// </summary>
+        protected internal override bool Accept()
+        {
+            return !stopWords.Contains(termAtt.Buffer(), 0, termAtt.Length);
+        }
 
-	}
+    }
 
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5155ef8c/src/Lucene.Net.Analysis.Common/Analysis/Core/TypeTokenFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/TypeTokenFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/TypeTokenFilterFactory.cs
index 42e82d2..058aa9f 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/TypeTokenFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/TypeTokenFilterFactory.cs
@@ -1,7 +1,6 @@
 using System.Collections.Generic;
 using Lucene.Net.Analysis.Util;
 using Lucene.Net.Support;
-using org.apache.lucene.analysis.util;
 
 namespace Lucene.Net.Analysis.Core
 {
@@ -54,7 +53,7 @@ namespace Lucene.Net.Analysis.Core
             }
         }
 
-        public virtual void inform(ResourceLoader loader)
+        public virtual void Inform(ResourceLoader loader)
         {
             IList<string> files = splitFileNames(stopTypesFiles);
             if (files.Count > 0)

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5155ef8c/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilter.cs
index 286da3a..83c74dd 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilter.cs
@@ -1,7 +1,8 @@
-using Lucene.Net.Analysis.Core;
+using Lucene.Net.Analysis.Tokenattributes;
 using Lucene.Net.Analysis.Util;
+using Lucene.Net.Util;
 
-namespace org.apache.lucene.analysis.core
+namespace Lucene.Net.Analysis.Core
 {
 
 	/*
@@ -20,12 +21,7 @@ namespace org.apache.lucene.analysis.core
 	 * See the License for the specific language governing permissions and
 	 * limitations under the License.
 	 */
-
-	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-	using CharacterUtils = CharacterUtils;
-	using Version = org.apache.lucene.util.Version;
-
-	/// <summary>
+    /// <summary>
 	/// Normalizes token text to UPPER CASE.
 	/// <a name="version"/>
 	/// <para>You must specify the required <seealso cref="Version"/>
@@ -41,7 +37,7 @@ namespace org.apache.lucene.analysis.core
 	public sealed class UpperCaseFilter : TokenFilter
 	{
 	  private readonly CharacterUtils charUtils;
-	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+        private readonly ICharTermAttribute termAtt;;
 
 	  /// <summary>
 	  /// Create a new UpperCaseFilter, that normalizes token text to upper case.
@@ -50,16 +46,16 @@ namespace org.apache.lucene.analysis.core
 	  /// <param name="in"> TokenStream to filter </param>
 	  public UpperCaseFilter(Version matchVersion, TokenStream @in) : base(@in)
 	  {
-		charUtils = CharacterUtils.getInstance(matchVersion);
+	      termAtt = AddAttribute<ICharTermAttribute>();
+	      termAtt = AddAttribute<ICharTermAttribute>();
+		charUtils = CharacterUtils.GetInstance(matchVersion);
 	  }
 
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
-	  public override bool incrementToken()
+	  public override bool IncrementToken()
 	  {
-		if (input.incrementToken())
+		if (input.IncrementToken())
 		{
-		  charUtils.ToUpper(termAtt.buffer(), 0, termAtt.length());
+		  charUtils.ToUpper(termAtt.Buffer(), 0, termAtt.Length);
 		  return true;
 		}
 		else

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5155ef8c/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceAnalyzer.cs
index 4ef12c7..70169bd 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceAnalyzer.cs
@@ -1,7 +1,6 @@
 using System.IO;
 using Lucene.Net.Analysis.Util;
 using Lucene.Net.Util;
-using org.apache.lucene.analysis.core;
 
 namespace Lucene.Net.Analysis.Core
 {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5155ef8c/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizer.cs
index 1ee9e69..ac19c96 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizer.cs
@@ -6,70 +6,72 @@ using Lucene.Net.Util;
 namespace Lucene.Net.Analysis.Core
 {
 
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
     /// <summary>
-	/// A WhitespaceTokenizer is a tokenizer that divides text at whitespace.
-	/// Adjacent sequences of non-Whitespace characters form tokens. <a
-	/// name="version"/>
-	/// <para>
-	/// You must specify the required <seealso cref="Version"/> compatibility when creating
-	/// <seealso cref="WhitespaceTokenizer"/>:
-	/// <ul>
-	/// <li>As of 3.1, <seealso cref="CharTokenizer"/> uses an int based API to normalize and
-	/// detect token characters. See <seealso cref="CharTokenizer#isTokenChar(int)"/> and
-	/// <seealso cref="CharTokenizer#normalize(int)"/> for details.</li>
-	/// </ul>
-	/// </para>
-	/// </summary>
-	public sealed class WhitespaceTokenizer : CharTokenizer
-	{
+    /// A WhitespaceTokenizer is a tokenizer that divides text at whitespace.
+    /// Adjacent sequences of non-Whitespace characters form tokens. <a
+    /// name="version"/>
+    /// <para>
+    /// You must specify the required <seealso cref="Version"/> compatibility when creating
+    /// <seealso cref="WhitespaceTokenizer"/>:
+    /// <ul>
+    /// <li>As of 3.1, <seealso cref="CharTokenizer"/> uses an int based API to normalize and
+    /// detect token characters. See <seealso cref="CharTokenizer#isTokenChar(int)"/> and
+    /// <seealso cref="CharTokenizer#normalize(int)"/> for details.</li>
+    /// </ul>
+    /// </para>
+    /// </summary>
+    public sealed class WhitespaceTokenizer : CharTokenizer
+    {
 
-	  /// Construct a new WhitespaceTokenizer. * <param name="matchVersion"> Lucene version
-	  /// to match See <seealso cref="<a href="#version">above</a>"/>
-	  /// </param>
-	  /// <param name="in">
-	  ///          the input to split up into tokens </param>
-	  public WhitespaceTokenizer(Version matchVersion, TextReader @in) : base(matchVersion, @in)
-	  {
-	  }
+        /// Construct a new WhitespaceTokenizer. * <param name="matchVersion"> Lucene version
+        /// to match See <seealso cref="<a href="#version">above</a>"/>
+        /// </param>
+        /// <param name="in">
+        ///          the input to split up into tokens </param>
+        public WhitespaceTokenizer(Version matchVersion, TextReader @in)
+            : base(matchVersion, @in)
+        {
+        }
 
-	  /// <summary>
-	  /// Construct a new WhitespaceTokenizer using a given
-	  /// <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>.
-	  /// 
-	  /// @param
-	  ///          matchVersion Lucene version to match See
-	  ///          <seealso cref="<a href="#version">above</a>"/> </summary>
-	  /// <param name="factory">
-	  ///          the attribute factory to use for this <seealso cref="Tokenizer"/> </param>
-	  /// <param name="in">
-	  ///          the input to split up into tokens </param>
-	  public WhitespaceTokenizer(Version matchVersion, AttributeFactory factory, TextReader @in) : base(matchVersion, factory, @in)
-	  {
-	  }
+        /// <summary>
+        /// Construct a new WhitespaceTokenizer using a given
+        /// <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>.
+        /// 
+        /// @param
+        ///          matchVersion Lucene version to match See
+        ///          <seealso cref="<a href="#version">above</a>"/> </summary>
+        /// <param name="factory">
+        ///          the attribute factory to use for this <seealso cref="Tokenizer"/> </param>
+        /// <param name="in">
+        ///          the input to split up into tokens </param>
+        public WhitespaceTokenizer(Version matchVersion, AttributeFactory factory, TextReader @in)
+            : base(matchVersion, factory, @in)
+        {
+        }
 
-	  /// <summary>
-	  /// Collects only characters which do not satisfy
-	  /// <seealso cref="Character#isWhitespace(int)"/>.
-	  /// </summary>
-	  protected internal override bool IsTokenChar(char c)
-	  {
-		return !char.IsWhiteSpace(c);
-	  }
-	}
+        /// <summary>
+        /// Collects only characters which do not satisfy
+        /// <seealso cref="Character#isWhitespace(int)"/>.
+        /// </summary>
+        protected override bool IsTokenChar(char c)
+        {
+            return !char.IsWhiteSpace(c);
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5155ef8c/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizerFactory.cs
index 51960b8..7cddbd4 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizerFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizerFactory.cs
@@ -1,58 +1,60 @@
 using System.Collections.Generic;
+using Lucene.Net.Analysis;
 using Lucene.Net.Analysis.Core;
+using Lucene.Net.Util;
 using TokenizerFactory = Lucene.Net.Analysis.Util.TokenizerFactory;
+using Reader = System.IO.TextReader;
 
 namespace org.apache.lucene.analysis.core
 {
 
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
-
-	using TokenizerFactory = TokenizerFactory;
-	using AttributeFactory = org.apache.lucene.util.AttributeSource.AttributeFactory;
-
-
-	/// <summary>
-	/// Factory for <seealso cref="WhitespaceTokenizer"/>. 
-	/// <pre class="prettyprint">
-	/// &lt;fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100"&gt;
-	///   &lt;analyzer&gt;
-	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
-	///   &lt;/analyzer&gt;
-	/// &lt;/fieldType&gt;</pre>
-	/// </summary>
-	public class WhitespaceTokenizerFactory : TokenizerFactory
-	{
-
-	  /// <summary>
-	  /// Creates a new WhitespaceTokenizerFactory </summary>
-	  public WhitespaceTokenizerFactory(IDictionary<string, string> args) : base(args)
-	  {
-		assureMatchVersion();
-		if (args.Count > 0)
-		{
-		  throw new System.ArgumentException("Unknown parameters: " + args);
-		}
-	  }
-
-	  public override WhitespaceTokenizer create(AttributeFactory factory, Reader input)
-	  {
-		return new WhitespaceTokenizer(luceneMatchVersion, factory, input);
-	  }
-	}
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    using TokenizerFactory = TokenizerFactory;
+
+    /// <summary>
+    /// Factory for <seealso cref="WhitespaceTokenizer"/>. 
+    /// <pre class="prettyprint">
+    /// &lt;fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100"&gt;
+    ///   &lt;analyzer&gt;
+    ///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+    ///   &lt;/analyzer&gt;
+    /// &lt;/fieldType&gt;</pre>
+    /// </summary>
+    public class WhitespaceTokenizerFactory : TokenizerFactory
+    {
+
+        /// <summary>
+        /// Creates a new WhitespaceTokenizerFactory </summary>
+        public WhitespaceTokenizerFactory(IDictionary<string, string> args)
+            : base(args)
+        {
+            assureMatchVersion();
+            if (args.Count > 0)
+            {
+                throw new System.ArgumentException("Unknown parameters: " + args);
+            }
+        }
+
+        public override Tokenizer Create(AttributeSource.AttributeFactory factory, Reader input)
+        {
+            return new WhitespaceTokenizer(luceneMatchVersion, factory, input);
+        }
+    }
 
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5155ef8c/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymMap.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymMap.cs b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymMap.cs
index ab54cf5..3129a83 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymMap.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymMap.cs
@@ -7,202 +7,196 @@ using org.apache.lucene.analysis.util;
 namespace Lucene.Net.Analysis.Synonym
 {
 
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
     /// <summary>
-	/// Mapping rules for use with <seealso cref="SlowSynonymFilter"/> </summary>
-	/// @deprecated (3.4) use <seealso cref="SynonymFilterFactory"/> instead. only for precise index backwards compatibility. this factory will be removed in Lucene 5.0 
-	[Obsolete("(3.4) use <seealso cref="SynonymFilterFactory"/> instead. only for precise index backwards compatibility. this factory will be removed in Lucene 5.0")]
-	internal class SlowSynonymMap
-	{
-	  /// <summary>
-	  /// @lucene.internal </summary>
-	  public CharArrayMap<SlowSynonymMap> submap; // recursive: Map<String, SynonymMap>
-	  /// <summary>
-	  /// @lucene.internal </summary>
-	  public Token[] synonyms;
-	  internal int flags;
-
-	  internal const int INCLUDE_ORIG = 0x01;
-	  internal const int IGNORE_CASE = 0x02;
-
-	  public SlowSynonymMap()
-	  {
-	  }
-	  public SlowSynonymMap(bool ignoreCase)
-	  {
-		if (ignoreCase_Renamed)
-		{
-			flags |= IGNORE_CASE;
-		}
-	  }
-
-	  public virtual bool includeOrig()
-	  {
-		  return (flags & INCLUDE_ORIG) != 0;
-	  }
-	  public virtual bool ignoreCase()
-	  {
-		  return (flags & IGNORE_CASE) != 0;
-	  }
-
-	  /// <param name="singleMatch">  List<String>, the sequence of strings to match </param>
-	  /// <param name="replacement">  List<Token> the list of tokens to use on a match </param>
-	  /// <param name="includeOrig">  sets a flag on this mapping signaling the generation of matched tokens in addition to the replacement tokens </param>
-	  /// <param name="mergeExisting"> merge the replacement tokens with any other mappings that exist </param>
-	  public virtual void add(IList<string> singleMatch, IList<Token> replacement, bool includeOrig, bool mergeExisting)
-	  {
-		SlowSynonymMap currMap = this;
-		foreach (string str in singleMatch)
-		{
-		  if (currMap.submap == null)
-		  {
-			// for now hardcode at 4.0, as its what the old code did.
-			// would be nice to fix, but shouldn't store a version in each submap!!!
-			currMap.submap = new CharArrayMap<>(Version.LUCENE_CURRENT, 1, ignoreCase());
-		  }
-
-		  SlowSynonymMap map = currMap.submap.get(str);
-		  if (map == null)
-		  {
-			map = new SlowSynonymMap();
-			map.flags |= flags & IGNORE_CASE;
-			currMap.submap.put(str, map);
-		  }
-
-		  currMap = map;
-		}
-
-		if (currMap.synonyms != null && !mergeExisting)
-		{
-		  throw new System.ArgumentException("SynonymFilter: there is already a mapping for " + singleMatch);
-		}
-		IList<Token> superset = currMap.synonyms == null ? replacement : mergeTokens(currMap.synonyms, replacement);
-		currMap.synonyms = superset.ToArray();
-		if (includeOrig_Renamed)
-		{
-			currMap.flags |= INCLUDE_ORIG;
-		}
-	  }
-
-
-	  public override string ToString()
-	  {
-		StringBuilder sb = new StringBuilder("<");
-		if (synonyms != null)
-		{
-		  sb.Append("[");
-		  for (int i = 0; i < synonyms.Length; i++)
-		  {
-			if (i != 0)
-			{
-				sb.Append(',');
-			}
-			sb.Append(synonyms[i]);
-		  }
-		  if ((flags & INCLUDE_ORIG) != 0)
-		  {
-			sb.Append(",ORIG");
-		  }
-		  sb.Append("],");
-		}
-		sb.Append(submap);
-		sb.Append(">");
-		return sb.ToString();
-	  }
-
-
-
-	  /// <summary>
-	  /// Produces a List<Token> from a List<String> </summary>
-	  public static IList<Token> makeTokens(IList<string> strings)
-	  {
-		IList<Token> ret = new List<Token>(strings.Count);
-		foreach (string str in strings)
-		{
-		  //Token newTok = new Token(str,0,0,"SYNONYM");
-		  Token newTok = new Token(str, 0,0,"SYNONYM");
-		  ret.Add(newTok);
-		}
-		return ret;
-	  }
-
-
-	  /// <summary>
-	  /// Merge two lists of tokens, producing a single list with manipulated positionIncrements so that
-	  /// the tokens end up at the same position.
-	  /// 
-	  /// Example:  [a b] merged with [c d] produces [a/b c/d]  ('/' denotes tokens in the same position)
-	  /// Example:  [a,5 b,2] merged with [c d,4 e,4] produces [c a,5/d b,2 e,2]  (a,n means a has posInc=n)
-	  /// 
-	  /// </summary>
-	  public static IList<Token> mergeTokens(IList<Token> lst1, IList<Token> lst2)
-	  {
-		List<Token> result = new List<Token>();
-		if (lst1 == null || lst2 == null)
-		{
-		  if (lst2 != null)
-		  {
-			  result.AddRange(lst2);
-		  }
-		  if (lst1 != null)
-		  {
-			  result.AddRange(lst1);
-		  }
-		  return result;
-		}
-
-		int pos = 0;
-		IEnumerator<Token> iter1 = lst1.GetEnumerator();
-		IEnumerator<Token> iter2 = lst2.GetEnumerator();
-//JAVA TO C# CONVERTER TODO TASK: Java iterators are only converted within the context of 'while' and 'for' loops:
-		Token tok1 = iter1.hasNext() ? iter1.next() : null;
-//JAVA TO C# CONVERTER TODO TASK: Java iterators are only converted within the context of 'while' and 'for' loops:
-		Token tok2 = iter2.hasNext() ? iter2.next() : null;
-		int pos1 = tok1 != null ? tok1.PositionIncrement : 0;
-		int pos2 = tok2 != null ? tok2.PositionIncrement : 0;
-		while (tok1 != null || tok2 != null)
-		{
-		  while (tok1 != null && (pos1 <= pos2 || tok2 == null))
-		  {
-			Token tok = new Token(tok1.startOffset(), tok1.endOffset(), tok1.type());
-			tok.copyBuffer(tok1.buffer(), 0, tok1.length());
-			tok.PositionIncrement = pos1 - pos;
-			result.Add(tok);
-			pos = pos1;
-//JAVA TO C# CONVERTER TODO TASK: Java iterators are only converted within the context of 'while' and 'for' loops:
-			tok1 = iter1.hasNext() ? iter1.next() : null;
-			pos1 += tok1 != null ? tok1.PositionIncrement : 0;
-		  }
-		  while (tok2 != null && (pos2 <= pos1 || tok1 == null))
-		  {
-			Token tok = new Token(tok2.startOffset(), tok2.endOffset(), tok2.type());
-			tok.copyBuffer(tok2.buffer(), 0, tok2.length());
-			tok.PositionIncrement = pos2 - pos;
-			result.Add(tok);
-			pos = pos2;
-//JAVA TO C# CONVERTER TODO TASK: Java iterators are only converted within the context of 'while' and 'for' loops:
-			tok2 = iter2.hasNext() ? iter2.next() : null;
-			pos2 += tok2 != null ? tok2.PositionIncrement : 0;
-		  }
-		}
-		return result;
-	  }
-
-	}
-
+    /// Mapping rules for use with <seealso cref="SlowSynonymFilter"/> </summary>
+    /// @deprecated (3.4) use <seealso cref="SynonymFilterFactory"/> instead. only for precise index backwards compatibility. this factory will be removed in Lucene 5.0 
+    [Obsolete("(3.4) use <seealso cref=\"SynonymFilterFactory\"/> instead. only for precise index backwards compatibility. this factory will be removed in Lucene 5.0")]
+    internal class SlowSynonymMap
+    {
+        /// <summary>
+        /// @lucene.internal </summary>
+        public CharArrayMap<SlowSynonymMap> submap; // recursive: Map<String, SynonymMap>
+        /// <summary>
+        /// @lucene.internal </summary>
+        public Token[] synonyms;
+        internal int flags;
+
+        internal const int INCLUDE_ORIG = 0x01;
+        internal const int IGNORE_CASE = 0x02;
+
+        public SlowSynonymMap()
+        {
+        }
+        public SlowSynonymMap(bool ignoreCase)
+        {
+            if (ignoreCase)
+            {
+                flags |= IGNORE_CASE;
+            }
+        }
+
+        public virtual bool IncludeOrig()
+        {
+            return (flags & INCLUDE_ORIG) != 0;
+        }
+        public virtual bool IgnoreCase()
+        {
+            return (flags & IGNORE_CASE) != 0;
+        }
+
+        /// <param name="singleMatch">  List<String>, the sequence of strings to match </param>
+        /// <param name="replacement">  List<Token> the list of tokens to use on a match </param>
+        /// <param name="includeOrig">  sets a flag on this mapping signaling the generation of matched tokens in addition to the replacement tokens </param>
+        /// <param name="mergeExisting"> merge the replacement tokens with any other mappings that exist </param>
+        public virtual void Add(IList<string> singleMatch, IList<Token> replacement, bool includeOrig, bool mergeExisting)
+        {
+            var currMap = this;
+            foreach (string str in singleMatch)
+            {
+                if (currMap.submap == null)
+                {
+                    // for now hardcode at 4.0, as its what the old code did.
+                    // would be nice to fix, but shouldn't store a version in each submap!!!
+                    currMap.submap = new CharArrayMap<SlowSynonymMap>(Lucene.Net.Util.Version.LUCENE_CURRENT, 1, IgnoreCase());
+                }
+
+                var map = currMap.submap.Get(str);
+                if (map == null)
+                {
+                    map = new SlowSynonymMap();
+                    map.flags |= flags & IGNORE_CASE;
+                    currMap.submap.put(str, map);
+                }
+
+                currMap = map;
+            }
+
+            if (currMap.synonyms != null && !mergeExisting)
+            {
+                throw new System.ArgumentException("SynonymFilter: there is already a mapping for " + singleMatch);
+            }
+            IList<Token> superset = currMap.synonyms == null ? replacement : MergeTokens(currMap.synonyms, replacement);
+            currMap.synonyms = superset.ToArray();
+            if (includeOrig)
+            {
+                currMap.flags |= INCLUDE_ORIG;
+            }
+        }
+
+
+        public override string ToString()
+        {
+            StringBuilder sb = new StringBuilder("<");
+            if (synonyms != null)
+            {
+                sb.Append("[");
+                for (int i = 0; i < synonyms.Length; i++)
+                {
+                    if (i != 0)
+                    {
+                        sb.Append(',');
+                    }
+                    sb.Append(synonyms[i]);
+                }
+                if ((flags & INCLUDE_ORIG) != 0)
+                {
+                    sb.Append(",ORIG");
+                }
+                sb.Append("],");
+            }
+            sb.Append(submap);
+            sb.Append(">");
+            return sb.ToString();
+        }
+
+
+
+        /// <summary>
+        /// Produces a List<Token> from a List<String> </summary>
+        public static IList<Token> MakeTokens(IList<string> strings)
+        {
+            IList<Token> ret = new List<Token>(strings.Count);
+            foreach (string str in strings)
+            {
+                //Token newTok = new Token(str,0,0,"SYNONYM");
+                Token newTok = new Token(str, 0, 0, "SYNONYM");
+                ret.Add(newTok);
+            }
+            return ret;
+        }
+
+
+        /// <summary>
+        /// Merge two lists of tokens, producing a single list with manipulated positionIncrements so that
+        /// the tokens end up at the same position.
+        /// 
+        /// Example:  [a b] merged with [c d] produces [a/b c/d]  ('/' denotes tokens in the same position)
+        /// Example:  [a,5 b,2] merged with [c d,4 e,4] produces [c a,5/d b,2 e,2]  (a,n means a has posInc=n)
+        /// 
+        /// </summary>
+        public static IList<Token> MergeTokens(IList<Token> lst1, IList<Token> lst2)
+        {
+            var result = new List<Token>();
+            if (lst1 == null || lst2 == null)
+            {
+                if (lst2 != null)
+                {
+                    result.AddRange(lst2);
+                }
+                if (lst1 != null)
+                {
+                    result.AddRange(lst1);
+                }
+                return result;
+            }
+
+            int pos = 0;
+            var iter1 = lst1.GetEnumerator();
+            var iter2 = lst2.GetEnumerator();
+            var tok1 = iter1.MoveNext() ? iter1.Current : null;
+            var tok2 = iter2.MoveNext() ? iter2.Current : null;
+            int pos1 = tok1 != null ? tok1.PositionIncrement : 0;
+            int pos2 = tok2 != null ? tok2.PositionIncrement : 0;
+            while (tok1 != null || tok2 != null)
+            {
+                while (tok1 != null && (pos1 <= pos2 || tok2 == null))
+                {
+                    var tok = new Token(tok1.StartOffset(), tok1.EndOffset(), tok1.Type);
+                    tok.CopyBuffer(tok1.Buffer(), 0, tok1.Length);
+                    tok.PositionIncrement = pos1 - pos;
+                    result.Add(tok);
+                    pos = pos1;
+                    tok1 = iter1.MoveNext() ? iter1.Current : null;
+                    pos1 += tok1 != null ? tok1.PositionIncrement : 0;
+                }
+                while (tok2 != null && (pos2 <= pos1 || tok1 == null))
+                {
+                    var tok = new Token(tok2.StartOffset(), tok2.EndOffset(), tok2.Type);
+                    tok.CopyBuffer(tok2.Buffer(), 0, tok2.Length);
+                    tok.PositionIncrement = pos2 - pos;
+                    result.Add(tok);
+                    pos = pos2;
+                    tok2 = iter2.MoveNext() ? iter2.Current : null;
+                    pos2 += tok2 != null ? tok2.PositionIncrement : 0;
+                }
+            }
+            return result;
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5155ef8c/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayMap.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayMap.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayMap.cs
index e608b1f..fba8b3a 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayMap.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayMap.cs
@@ -76,9 +76,7 @@ namespace org.apache.lucene.analysis.util
 	  /// <param name="ignoreCase">
 	  ///          <code>false</code> if and only if the set should be case sensitive
 	  ///          otherwise <code>true</code>. </param>
-//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
-//ORIGINAL LINE: @SuppressWarnings("unchecked") public CharArrayMap(org.apache.lucene.util.Version matchVersion, int startSize, boolean ignoreCase)
-	  public CharArrayMap(Version matchVersion, int startSize, bool ignoreCase)
+	  public CharArrayMap(Lucene.Net.Util.Version matchVersion, int startSize, bool ignoreCase)
 	  {
 		this.ignoreCase = ignoreCase;
 		int size_Renamed = INIT_SIZE;

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5155ef8c/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs
index 370c56a..6b8a9db 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs
@@ -102,7 +102,7 @@ namespace Lucene.Net.Analysis.Util
 	  /// Clears all entries in this set. This method is supported for reusing, but not <seealso cref="Set#remove"/>. </summary>
 	  public void Clear()
 	  {
-		map.clear();
+		map.Clear();
 	  }
 
 	  /// <summary>
@@ -111,19 +111,19 @@ namespace Lucene.Net.Analysis.Util
 	  /// </summary>
 	  public virtual bool Contains(char[] text, int off, int len)
 	  {
-		return map.containsKey(text, off, len);
+		return map.ContainsKey(text, off, len);
 	  }
 
 	  /// <summary>
 	  /// true if the <code>CharSequence</code> is in the set </summary>
 	  public virtual bool Contains(string cs)
 	  {
-		return map.containsKey(cs);
+		return map.ContainsKey(cs);
 	  }
 
 	  public bool Contains(object o)
 	  {
-		return map.containsKey(o);
+		return map.ContainsKey(o);
 	  }
 
 	  public bool Add(object o)

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5155ef8c/src/Lucene.Net.Analysis.Common/Analysis/Util/CharTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharTokenizer.cs
index 1cd6395..df61504 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharTokenizer.cs
@@ -3,7 +3,6 @@ using System.IO;
 using Lucene.Net.Analysis.Tokenattributes;
 using Lucene.Net.Support;
 using Lucene.Net.Util;
-using org.apache.lucene.analysis.util;
 
 namespace Lucene.Net.Analysis.Util
 {
@@ -74,10 +73,13 @@ namespace Lucene.Net.Analysis.Util
         ///          Lucene version to match </param>
         /// <param name="input">
         ///          the input to split up into tokens </param>
-        public CharTokenizer(Version matchVersion, TextReader input)
+        protected CharTokenizer(Version matchVersion, TextReader input)
             : base(input)
         {
-            charUtils = CharacterUtils.getInstance(matchVersion);
+            termAtt = AddAttribute<ICharTermAttribute>();
+            offsetAtt = AddAttribute<IOffsetAttribute>();
+
+            charUtils = CharacterUtils.GetInstance(matchVersion);
         }
 
         /// <summary>
@@ -93,18 +95,18 @@ namespace Lucene.Net.Analysis.Util
             : base(factory, input)
         {
             _input = input;
-            charUtils = CharacterUtils.getInstance(matchVersion);
+            charUtils = CharacterUtils.GetInstance(matchVersion);
         }
 
         private int offset = 0, bufferIndex = 0, dataLen = 0, finalOffset = 0;
         private const int MAX_WORD_LEN = 255;
         private const int IO_BUFFER_SIZE = 4096;
 
-        private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
-        private readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
+        private readonly ICharTermAttribute termAtt;
+        private readonly IOffsetAttribute offsetAtt;
 
         private readonly CharacterUtils charUtils;
-        private readonly CharacterUtils.CharacterBuffer ioBuffer = CharacterUtils.newCharacterBuffer(IO_BUFFER_SIZE);
+        private readonly CharacterUtils.CharacterBuffer ioBuffer = CharacterUtils.NewCharacterBuffer(IO_BUFFER_SIZE);
 
         /// <summary>
         /// Returns true iff a codepoint should be included in a token. This tokenizer
@@ -112,7 +114,7 @@ namespace Lucene.Net.Analysis.Util
         /// predicate. Codepoints for which this is false are used to define token
         /// boundaries and are not included in tokens.
         /// </summary>
-        protected internal abstract bool IsTokenChar(char c);
+        protected abstract bool IsTokenChar(char c);
 
         /// <summary>
         /// Called on each token character to normalize it before it is added to the
@@ -136,7 +138,7 @@ namespace Lucene.Net.Analysis.Util
                 if (bufferIndex >= dataLen)
                 {
                     offset += dataLen;
-                    charUtils.fill(ioBuffer, input); // read supplementary char aware with CharacterUtils
+                    charUtils.Fill(ioBuffer, input); // read supplementary char aware with CharacterUtils
                     if (ioBuffer.Length == 0)
                     {
                         dataLen = 0; // so next offset += dataLen won't decrement offset
@@ -154,11 +156,11 @@ namespace Lucene.Net.Analysis.Util
                     bufferIndex = 0;
                 }
                 // use CharacterUtils here to support < 3.1 UTF-16 code unit behavior if the char based methods are gone
-                int c = charUtils.codePointAt(ioBuffer.Buffer, bufferIndex, ioBuffer.Length);
+                int c = charUtils.CodePointAt(ioBuffer.Buffer, bufferIndex, ioBuffer.Length);
                 int charCount = Character.CharCount(c);
                 bufferIndex += charCount;
 
-                if (isTokenChar(c)) // if it's a token char
+                if (IsTokenChar(c)) // if it's a token char
                 {
                     if (length == 0) // start of token
                     {


Mime
View raw message