lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From synhers...@apache.org
Subject [06/34] lucenenet git commit: Raw porting of Lucene.Net.Analysis.Common
Date Fri, 07 Nov 2014 23:12:10 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailAnalyzer.cs
new file mode 100644
index 0000000..2067ff6
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailAnalyzer.cs
@@ -0,0 +1,150 @@
+using Lucene.Net.Analysis.Core;
+
+namespace org.apache.lucene.analysis.standard
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using LowerCaseFilter = LowerCaseFilter;
+	using StopAnalyzer = StopAnalyzer;
+	using StopFilter = StopFilter;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using Version = org.apache.lucene.util.Version;
+
+
+	/// <summary>
+	/// Filters <seealso cref="org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer"/>
+	/// with <seealso cref="org.apache.lucene.analysis.standard.StandardFilter"/>,
+	/// <seealso cref="LowerCaseFilter"/> and
+	/// <seealso cref="StopFilter"/>, using a list of
+	/// English stop words.
+	/// 
+	/// <a name="version"/>
+	/// <para>
+	///   You must specify the required <seealso cref="org.apache.lucene.util.Version"/>
+	///   compatibility when creating UAX29URLEmailAnalyzer
+	/// </para>
+	/// </summary>
+	public sealed class UAX29URLEmailAnalyzer : StopwordAnalyzerBase
+	{
+
+	  /// <summary>
+	  /// Default maximum allowed token length </summary>
+	  public const int DEFAULT_MAX_TOKEN_LENGTH = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
+
+	  private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
+
+	  /// <summary>
+	  /// An unmodifiable set containing some common English words that are usually not
+	  /// useful for searching. 
+	  /// </summary>
+	  public static readonly CharArraySet STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words. </summary>
+	  /// <param name="matchVersion"> Lucene version to match See {@link
+	  /// <a href="#version">above</a>} </param>
+	  /// <param name="stopWords"> stop words  </param>
+	  public UAX29URLEmailAnalyzer(Version matchVersion, CharArraySet stopWords) : base(matchVersion, stopWords)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words ({@link
+	  /// #STOP_WORDS_SET}). </summary>
+	  /// <param name="matchVersion"> Lucene version to match See {@link
+	  /// <a href="#version">above</a>} </param>
+	  public UAX29URLEmailAnalyzer(Version matchVersion) : this(matchVersion, STOP_WORDS_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the stop words from the given reader. </summary>
+	  /// <seealso cref= org.apache.lucene.analysis.util.WordlistLoader#getWordSet(java.io.Reader, org.apache.lucene.util.Version) </seealso>
+	  /// <param name="matchVersion"> Lucene version to match See {@link
+	  /// <a href="#version">above</a>} </param>
+	  /// <param name="stopwords"> Reader to read stop words from  </param>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public UAX29URLEmailAnalyzer(org.apache.lucene.util.Version matchVersion, java.io.Reader stopwords) throws java.io.IOException
+	  public UAX29URLEmailAnalyzer(Version matchVersion, Reader stopwords) : this(matchVersion, loadStopwordSet(stopwords, matchVersion))
+	  {
+	  }
+
+	  /// <summary>
+	  /// Set maximum allowed token length.  If a token is seen
+	  /// that exceeds this length then it is discarded.  This
+	  /// setting only takes effect the next time tokenStream or
+	  /// tokenStream is called.
+	  /// </summary>
+	  public int MaxTokenLength
+	  {
+		  set
+		  {
+			maxTokenLength = value;
+		  }
+		  get
+		  {
+			return maxTokenLength;
+		  }
+	  }
+
+
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: @Override protected TokenStreamComponents createComponents(final String fieldName, final java.io.Reader reader)
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final UAX29URLEmailTokenizer src = new UAX29URLEmailTokenizer(matchVersion, reader);
+		UAX29URLEmailTokenizer src = new UAX29URLEmailTokenizer(matchVersion, reader);
+		src.MaxTokenLength = maxTokenLength;
+		TokenStream tok = new StandardFilter(matchVersion, src);
+		tok = new LowerCaseFilter(matchVersion, tok);
+		tok = new StopFilter(matchVersion, tok, stopwords);
+		return new TokenStreamComponentsAnonymousInnerClassHelper(this, src, tok, reader);
+	  }
+
+	  private class TokenStreamComponentsAnonymousInnerClassHelper : TokenStreamComponents
+	  {
+		  private readonly UAX29URLEmailAnalyzer outerInstance;
+
+		  private Reader reader;
+		  private org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer src;
+
+		  public TokenStreamComponentsAnonymousInnerClassHelper(UAX29URLEmailAnalyzer outerInstance, org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer src, TokenStream tok, Reader reader) : base(src, tok)
+		  {
+			  this.outerInstance = outerInstance;
+			  this.reader = reader;
+			  this.src = src;
+		  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override protected void setReader(final java.io.Reader reader) throws java.io.IOException
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+		  protected internal override Reader Reader
+		  {
+			  set
+			  {
+				src.MaxTokenLength = outerInstance.maxTokenLength;
+				base.Reader = value;
+			  }
+		  }
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizer.cs
new file mode 100644
index 0000000..83ac99c
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizer.cs
@@ -0,0 +1,221 @@
+namespace org.apache.lucene.analysis.standard
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using UAX29URLEmailTokenizerImpl31 = org.apache.lucene.analysis.standard.std31.UAX29URLEmailTokenizerImpl31;
+	using UAX29URLEmailTokenizerImpl34 = org.apache.lucene.analysis.standard.std34.UAX29URLEmailTokenizerImpl34;
+	using UAX29URLEmailTokenizerImpl36 = org.apache.lucene.analysis.standard.std36.UAX29URLEmailTokenizerImpl36;
+	using UAX29URLEmailTokenizerImpl40 = org.apache.lucene.analysis.standard.std40.UAX29URLEmailTokenizerImpl40;
+	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// This class implements Word Break rules from the Unicode Text Segmentation 
+	/// algorithm, as specified in                 `
+	/// <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a> 
+	/// URLs and email addresses are also tokenized according to the relevant RFCs.
+	/// <p/>
+	/// Tokens produced are of the following types:
+	/// <ul>
+	///   <li>&lt;ALPHANUM&gt;: A sequence of alphabetic and numeric characters</li>
+	///   <li>&lt;NUM&gt;: A number</li>
+	///   <li>&lt;URL&gt;: A URL</li>
+	///   <li>&lt;EMAIL&gt;: An email address</li>
+	///   <li>&lt;SOUTHEAST_ASIAN&gt;: A sequence of characters from South and Southeast
+	///       Asian languages, including Thai, Lao, Myanmar, and Khmer</li>
+	///   <li>&lt;IDEOGRAPHIC&gt;: A single CJKV ideographic character</li>
+	///   <li>&lt;HIRAGANA&gt;: A single hiragana character</li>
+	/// </ul>
+	/// <a name="version"/>
+	/// <para>You must specify the required <seealso cref="Version"/>
+	/// compatibility when creating UAX29URLEmailTokenizer:
+	/// <ul>
+	///   <li> As of 3.4, Hiragana and Han characters are no longer wrongly split
+	///   from their combining characters. If you use a previous version number,
+	///   you get the exact broken behavior for backwards compatibility.
+	/// </ul>
+	/// </para>
+	/// </summary>
+
+	public sealed class UAX29URLEmailTokenizer : Tokenizer
+	{
+	  /// <summary>
+	  /// A private instance of the JFlex-constructed scanner </summary>
+	  private readonly StandardTokenizerInterface scanner;
+
+	  public const int ALPHANUM = 0;
+	  public const int NUM = 1;
+	  public const int SOUTHEAST_ASIAN = 2;
+	  public const int IDEOGRAPHIC = 3;
+	  public const int HIRAGANA = 4;
+	  public const int KATAKANA = 5;
+	  public const int HANGUL = 6;
+	  public const int URL = 7;
+	  public const int EMAIL = 8;
+
+	  /// <summary>
+	  /// String token types that correspond to token type int constants </summary>
+	  public static readonly string[] TOKEN_TYPES = new string [] {StandardTokenizer.TOKEN_TYPES[StandardTokenizer.ALPHANUM], StandardTokenizer.TOKEN_TYPES[StandardTokenizer.NUM], StandardTokenizer.TOKEN_TYPES[StandardTokenizer.SOUTHEAST_ASIAN], StandardTokenizer.TOKEN_TYPES[StandardTokenizer.IDEOGRAPHIC], StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HIRAGANA], StandardTokenizer.TOKEN_TYPES[StandardTokenizer.KATAKANA], StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HANGUL], "<URL>", "<EMAIL>"};
+
+	  private int skippedPositions;
+
+	  private int maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
+
+	  /// <summary>
+	  /// Set the max allowed token length.  Any token longer
+	  ///  than this is skipped. 
+	  /// </summary>
+	  public int MaxTokenLength
+	  {
+		  set
+		  {
+			if (value < 1)
+			{
+			  throw new System.ArgumentException("maxTokenLength must be greater than zero");
+			}
+			this.maxTokenLength = value;
+		  }
+		  get
+		  {
+			return maxTokenLength;
+		  }
+	  }
+
+
+	  /// <summary>
+	  /// Creates a new instance of the UAX29URLEmailTokenizer.  Attaches
+	  /// the <code>input</code> to the newly created JFlex scanner.
+	  /// </summary>
+	  /// <param name="input"> The input reader </param>
+	  public UAX29URLEmailTokenizer(Version matchVersion, Reader input) : base(input)
+	  {
+		this.scanner = getScannerFor(matchVersion);
+	  }
+
+	  /// <summary>
+	  /// Creates a new UAX29URLEmailTokenizer with a given <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>
+	  /// </summary>
+	  public UAX29URLEmailTokenizer(Version matchVersion, AttributeFactory factory, Reader input) : base(factory, input)
+	  {
+		this.scanner = getScannerFor(matchVersion);
+	  }
+
+	  private StandardTokenizerInterface getScannerFor(Version matchVersion)
+	  {
+		// best effort NPE if you dont call reset
+		if (matchVersion.onOrAfter(Version.LUCENE_47))
+		{
+		  return new UAX29URLEmailTokenizerImpl(input);
+		}
+		else if (matchVersion.onOrAfter(Version.LUCENE_40))
+		{
+		  return new UAX29URLEmailTokenizerImpl40(input);
+		}
+		else if (matchVersion.onOrAfter(Version.LUCENE_36))
+		{
+		  return new UAX29URLEmailTokenizerImpl36(input);
+		}
+		else if (matchVersion.onOrAfter(Version.LUCENE_34))
+		{
+		  return new UAX29URLEmailTokenizerImpl34(input);
+		}
+		else
+		{
+		  return new UAX29URLEmailTokenizerImpl31(input);
+		}
+	  }
+
+	  // this tokenizer generates three attributes:
+	  // term offset, positionIncrement and type
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
+	  private readonly PositionIncrementAttribute posIncrAtt = addAttribute(typeof(PositionIncrementAttribute));
+	  private readonly TypeAttribute typeAtt = addAttribute(typeof(TypeAttribute));
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		clearAttributes();
+		skippedPositions = 0;
+
+		while (true)
+		{
+		  int tokenType = scanner.NextToken;
+
+		  if (tokenType == StandardTokenizerInterface_Fields.YYEOF)
+		  {
+			return false;
+		  }
+
+		  if (scanner.yylength() <= maxTokenLength)
+		  {
+			posIncrAtt.PositionIncrement = skippedPositions + 1;
+			scanner.getText(termAtt);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int start = scanner.yychar();
+			int start = scanner.yychar();
+			offsetAtt.setOffset(correctOffset(start), correctOffset(start + termAtt.length()));
+			typeAtt.Type = TOKEN_TYPES[tokenType];
+			return true;
+		  }
+		  else
+			// When we skip a too-long term, we still increment the
+			// position increment
+		  {
+			skippedPositions++;
+		  }
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final void end() throws java.io.IOException
+	  public override void end()
+	  {
+		base.end();
+		// set final offset
+		int finalOffset = correctOffset(scanner.yychar() + scanner.yylength());
+		offsetAtt.setOffset(finalOffset, finalOffset);
+		// adjust any skipped tokens
+		posIncrAtt.PositionIncrement = posIncrAtt.PositionIncrement + skippedPositions;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void close() throws java.io.IOException
+	  public override void close()
+	  {
+		base.close();
+		scanner.yyreset(input);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		base.reset();
+		scanner.yyreset(input);
+		skippedPositions = 0;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizerFactory.cs
new file mode 100644
index 0000000..350fdbb
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizerFactory.cs
@@ -0,0 +1,61 @@
+using System.Collections.Generic;
+using TokenizerFactory = Lucene.Net.Analysis.Util.TokenizerFactory;
+
+namespace org.apache.lucene.analysis.standard
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenizerFactory = TokenizerFactory;
+	using AttributeFactory = org.apache.lucene.util.AttributeSource.AttributeFactory;
+
+
+	/// <summary>
+	/// Factory for <seealso cref="UAX29URLEmailTokenizer"/>. 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_urlemail" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.UAX29URLEmailTokenizerFactory" maxTokenLength="255"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre> 
+	/// </summary>
+	public class UAX29URLEmailTokenizerFactory : TokenizerFactory
+	{
+	  private readonly int maxTokenLength;
+
+	  /// <summary>
+	  /// Creates a new UAX29URLEmailTokenizerFactory </summary>
+	  public UAX29URLEmailTokenizerFactory(IDictionary<string, string> args) : base(args)
+	  {
+		assureMatchVersion();
+		maxTokenLength = getInt(args, "maxTokenLength", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override UAX29URLEmailTokenizer create(AttributeFactory factory, Reader input)
+	  {
+		UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(luceneMatchVersion, factory, input);
+		tokenizer.MaxTokenLength = maxTokenLength;
+		return tokenizer;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishAnalyzer.cs
new file mode 100644
index 0000000..f3c9cce
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishAnalyzer.cs
@@ -0,0 +1,139 @@
+using System;
+
+namespace org.apache.lucene.analysis.sv
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using SnowballFilter = org.apache.lucene.analysis.snowball.SnowballFilter;
+	using StandardFilter = org.apache.lucene.analysis.standard.StandardFilter;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using WordlistLoader = org.apache.lucene.analysis.util.WordlistLoader;
+	using IOUtils = org.apache.lucene.util.IOUtils;
+	using Version = org.apache.lucene.util.Version;
+	using SwedishStemmer = org.tartarus.snowball.ext.SwedishStemmer;
+
+	/// <summary>
+	/// <seealso cref="Analyzer"/> for Swedish.
+	/// </summary>
+	public sealed class SwedishAnalyzer : StopwordAnalyzerBase
+	{
+	  private readonly CharArraySet stemExclusionSet;
+
+	  /// <summary>
+	  /// File containing default Swedish stopwords. </summary>
+	  public const string DEFAULT_STOPWORD_FILE = "swedish_stop.txt";
+
+	  /// <summary>
+	  /// Returns an unmodifiable instance of the default stop words set. </summary>
+	  /// <returns> default stop words set. </returns>
+	  public static CharArraySet DefaultStopSet
+	  {
+		  get
+		  {
+			return DefaultSetHolder.DEFAULT_STOP_SET;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+	  /// accesses the static final set the first time.;
+	  /// </summary>
+	  private class DefaultSetHolder
+	  {
+		internal static readonly CharArraySet DEFAULT_STOP_SET;
+
+		static DefaultSetHolder()
+		{
+		  try
+		  {
+			DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(typeof(SnowballFilter), DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+		  }
+		  catch (IOException)
+		  {
+			// default set should always be present as it is part of the
+			// distribution (JAR)
+			throw new Exception("Unable to load default stopword set");
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words: <seealso cref="#DEFAULT_STOPWORD_FILE"/>.
+	  /// </summary>
+	  public SwedishAnalyzer(Version matchVersion) : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  public SwedishAnalyzer(Version matchVersion, CharArraySet stopwords) : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
+	  /// provided this analyzer will add a <seealso cref="SetKeywordMarkerFilter"/> before
+	  /// stemming.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  /// <param name="stemExclusionSet"> a set of terms not to be stemmed </param>
+	  public SwedishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) : base(matchVersion, stopwords)
+	  {
+		this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
+	  }
+
+	  /// <summary>
+	  /// Creates a
+	  /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  /// which tokenizes all the text in the provided <seealso cref="Reader"/>.
+	  /// </summary>
+	  /// <returns> A
+	  ///         <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  ///         built from an <seealso cref="StandardTokenizer"/> filtered with
+	  ///         <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>
+	  ///         , <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
+	  ///         provided and <seealso cref="SnowballFilter"/>. </returns>
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader);
+		Tokenizer source = new StandardTokenizer(matchVersion, reader);
+		TokenStream result = new StandardFilter(matchVersion, source);
+		result = new LowerCaseFilter(matchVersion, result);
+		result = new StopFilter(matchVersion, result, stopwords);
+		if (!stemExclusionSet.Empty)
+		{
+		  result = new SetKeywordMarkerFilter(result, stemExclusionSet);
+		}
+		result = new SnowballFilter(result, new SwedishStemmer());
+		return new TokenStreamComponents(source, result);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishLightStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishLightStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishLightStemFilter.cs
new file mode 100644
index 0000000..256a618
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishLightStemFilter.cs
@@ -0,0 +1,66 @@
+namespace org.apache.lucene.analysis.sv
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="SwedishLightStemmer"/> to stem Swedish
+	/// words.
+	/// <para>
+	/// To prevent terms from being stemmed use an instance of
+	/// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
+	/// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
+	/// </para>
+	/// </summary>
+	public sealed class SwedishLightStemFilter : TokenFilter
+	{
+	  private readonly SwedishLightStemmer stemmer = new SwedishLightStemmer();
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+	  public SwedishLightStemFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (!keywordAttr.Keyword)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			termAtt.Length = newlen;
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishLightStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishLightStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishLightStemFilterFactory.cs
new file mode 100644
index 0000000..ebfde41
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishLightStemFilterFactory.cs
@@ -0,0 +1,55 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.sv
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="SwedishLightStemFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_svlgtstem" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///     &lt;filter class="solr.SwedishLightStemFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class SwedishLightStemFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new SwedishLightStemFilterFactory </summary>
+	  public SwedishLightStemFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new SwedishLightStemFilter(input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishLightStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishLightStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishLightStemmer.cs
new file mode 100644
index 0000000..523b489
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishLightStemmer.cs
@@ -0,0 +1,114 @@
+namespace org.apache.lucene.analysis.sv
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	/* 
+	 * This algorithm is updated based on code located at:
+	 * http://members.unine.ch/jacques.savoy/clef/
+	 * 
+	 * Full copyright for that code follows:
+	 */
+
+	/*
+	 * Copyright (c) 2005, Jacques Savoy
+	 * All rights reserved.
+	 *
+	 * Redistribution and use in source and binary forms, with or without 
+	 * modification, are permitted provided that the following conditions are met:
+	 *
+	 * Redistributions of source code must retain the above copyright notice, this 
+	 * list of conditions and the following disclaimer. Redistributions in binary 
+	 * form must reproduce the above copyright notice, this list of conditions and
+	 * the following disclaimer in the documentation and/or other materials 
+	 * provided with the distribution. Neither the name of the author nor the names 
+	 * of its contributors may be used to endorse or promote products derived from 
+	 * this software without specific prior written permission.
+	 * 
+	 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+	 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+	 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+	 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
+	 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
+	 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
+	 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
+	 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
+	 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+	 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+	 * POSSIBILITY OF SUCH DAMAGE.
+	 */
+
+	using org.apache.lucene.analysis.util;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
+	/// <summary>
+	/// Light Stemmer for Swedish.
+	/// <para>
+	/// This stemmer implements the algorithm described in:
+	/// <i>Report on CLEF-2003 Monolingual Tracks</i>
+	/// Jacques Savoy
+	/// </para>
+	/// </summary>
+	public class SwedishLightStemmer
+	{
+
+	  public virtual int stem(char[] s, int len)
+	  {
+		if (len > 4 && s[len - 1] == 's')
+		{
+		  len--;
+		}
+
+		if (len > 7 && (StemmerUtil.EndsWith(s, len, "elser") || StemmerUtil.EndsWith(s, len, "heten")))
+		{
+		  return len - 5;
+		}
+
+		if (len > 6 && (StemmerUtil.EndsWith(s, len, "arne") || StemmerUtil.EndsWith(s, len, "erna") || StemmerUtil.EndsWith(s, len, "ande") || StemmerUtil.EndsWith(s, len, "else") || StemmerUtil.EndsWith(s, len, "aste") || StemmerUtil.EndsWith(s, len, "orna") || StemmerUtil.EndsWith(s, len, "aren")))
+		{
+		  return len - 4;
+		}
+
+		if (len > 5 && (StemmerUtil.EndsWith(s, len, "are") || StemmerUtil.EndsWith(s, len, "ast") || StemmerUtil.EndsWith(s, len, "het")))
+		{
+		  return len - 3;
+		}
+
+		if (len > 4 && (StemmerUtil.EndsWith(s, len, "ar") || StemmerUtil.EndsWith(s, len, "er") || StemmerUtil.EndsWith(s, len, "or") || StemmerUtil.EndsWith(s, len, "en") || StemmerUtil.EndsWith(s, len, "at") || StemmerUtil.EndsWith(s, len, "te") || StemmerUtil.EndsWith(s, len, "et")))
+		{
+		  return len - 2;
+		}
+
+		if (len > 3)
+		{
+		  switch (s[len - 1])
+		  {
+			case 't':
+			case 'a':
+			case 'e':
+			case 'n':
+				return len - 1;
+		  }
+		}
+
+		return len;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Synonym/FSTSynonymFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/FSTSynonymFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/FSTSynonymFilterFactory.cs
new file mode 100644
index 0000000..c38f1dd
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/FSTSynonymFilterFactory.cs
@@ -0,0 +1,186 @@
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using Lucene.Net.Analysis.Core;
+using Lucene.Net.Analysis.Util;
+using org.apache.lucene.analysis.core;
+using org.apache.lucene.analysis.synonym;
+using org.apache.lucene.analysis.util;
+
+namespace Lucene.Net.Analysis.Synonym
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+    /// @deprecated (3.4) use <seealso cref="SynonymFilterFactory"/> instead. this is only a backwards compatibility
+	///                   mechanism that will be removed in Lucene 5.0 
+	// NOTE: rename this to "SynonymFilterFactory" and nuke that delegator in Lucene 5.0!
+	[Obsolete("(3.4) use <seealso cref="SynonymFilterFactory"/> instead. this is only a backwards compatibility")]
+	internal sealed class FSTSynonymFilterFactory : TokenFilterFactory, ResourceLoaderAware
+	{
+	  private readonly bool ignoreCase;
+	  private readonly string tokenizerFactory;
+	  private readonly string synonyms;
+	  private readonly string format;
+	  private readonly bool expand;
+	  private readonly IDictionary<string, string> tokArgs = new Dictionary<string, string>();
+
+	  private SynonymMap map;
+
+	  public FSTSynonymFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		ignoreCase = getBoolean(args, "ignoreCase", false);
+		synonyms = require(args, "synonyms");
+		format = get(args, "format");
+		expand = getBoolean(args, "expand", true);
+
+		tokenizerFactory = get(args, "tokenizerFactory");
+		if (tokenizerFactory != null)
+		{
+		  assureMatchVersion();
+		  tokArgs["luceneMatchVersion"] = LuceneMatchVersion.ToString();
+		  for (IEnumerator<string> itr = args.Keys.GetEnumerator(); itr.MoveNext();)
+		  {
+			string key = itr.Current;
+			tokArgs[key.replaceAll("^tokenizerFactory\\.","")] = args[key];
+			itr.remove();
+		  }
+		}
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		// if the fst is null, it means there's actually no synonyms... just return the original stream
+		// as there is nothing to do here.
+		return map.fst == null ? input : new SynonymFilter(input, map, ignoreCase);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void inform(ResourceLoader loader) throws java.io.IOException
+	  public void inform(ResourceLoader loader)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final TokenizerFactory factory = tokenizerFactory == null ? null : loadTokenizerFactory(loader, tokenizerFactory);
+		TokenizerFactory factory = tokenizerFactory == null ? null : loadTokenizerFactory(loader, tokenizerFactory);
+
+		Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this, factory);
+
+		try
+		{
+		  string formatClass = format;
+		  if (format == null || format.Equals("solr"))
+		  {
+			formatClass = typeof(SolrSynonymParser).Name;
+		  }
+		  else if (format.Equals("wordnet"))
+		  {
+			formatClass = typeof(WordnetSynonymParser).Name;
+		  }
+		  // TODO: expose dedup as a parameter?
+		  map = loadSynonyms(loader, formatClass, true, analyzer);
+		}
+		catch (ParseException e)
+		{
+		  throw new IOException("Error parsing synonyms file:", e);
+		}
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper : Analyzer
+	  {
+		  private readonly FSTSynonymFilterFactory outerInstance;
+
+		  private TokenizerFactory factory;
+
+		  public AnalyzerAnonymousInnerClassHelper(FSTSynonymFilterFactory outerInstance, TokenizerFactory factory)
+		  {
+			  this.outerInstance = outerInstance;
+			  this.factory = factory;
+		  }
+
+		  protected internal override Analyzer.TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_CURRENT, reader) : factory.create(reader);
+			TokenStream stream = outerInstance.ignoreCase ? new LowerCaseFilter(Version.LUCENE_CURRENT, tokenizer) : tokenizer;
+			return new Analyzer.TokenStreamComponents(tokenizer, stream);
+		  }
+	  }
+
+	  /// <summary>
+	  /// Load synonyms with the given <seealso cref="SynonymMap.Parser"/> class.
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private org.apache.lucene.analysis.synonym.SynonymMap loadSynonyms(ResourceLoader loader, String cname, boolean dedup, org.apache.lucene.analysis.Analyzer analyzer) throws java.io.IOException, java.text.ParseException
+	  private SynonymMap loadSynonyms(ResourceLoader loader, string cname, bool dedup, Analyzer analyzer)
+	  {
+		CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder().onMalformedInput(CodingErrorAction.REPORT).onUnmappableCharacter(CodingErrorAction.REPORT);
+
+		SynonymMap.Parser parser;
+		Type clazz = loader.findClass(cname, typeof(SynonymMap.Parser));
+		try
+		{
+		  parser = clazz.getConstructor(typeof(bool), typeof(bool), typeof(Analyzer)).newInstance(dedup, expand, analyzer);
+		}
+		catch (Exception e)
+		{
+		  throw new Exception(e);
+		}
+
+		File synonymFile = new File(synonyms);
+		if (synonymFile.exists())
+		{
+		  decoder.reset();
+		  parser.parse(new InputStreamReader(loader.openResource(synonyms), decoder));
+		}
+		else
+		{
+		  IList<string> files = splitFileNames(synonyms);
+		  foreach (string file in files)
+		  {
+			decoder.reset();
+			parser.parse(new InputStreamReader(loader.openResource(file), decoder));
+		  }
+		}
+		return parser.build();
+	  }
+
+	  // (there are no tests for this functionality)
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private TokenizerFactory loadTokenizerFactory(ResourceLoader loader, String cname) throws java.io.IOException
+	  private TokenizerFactory loadTokenizerFactory(ResourceLoader loader, string cname)
+	  {
+		Type clazz = loader.findClass(cname, typeof(TokenizerFactory));
+		try
+		{
+		  TokenizerFactory tokFactory = clazz.getConstructor(typeof(IDictionary)).newInstance(tokArgs);
+		  if (tokFactory is ResourceLoaderAware)
+		  {
+			((ResourceLoaderAware) tokFactory).inform(loader);
+		  }
+		  return tokFactory;
+		}
+		catch (Exception e)
+		{
+		  throw new Exception(e);
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymFilter.cs
new file mode 100644
index 0000000..15abb7a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymFilter.cs
@@ -0,0 +1,317 @@
+using System;
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.synonym
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+	using AttributeSource = org.apache.lucene.util.AttributeSource;
+
+
+	/// <summary>
+	/// SynonymFilter handles multi-token synonyms with variable position increment offsets.
+	/// <para>
+	/// The matched tokens from the input stream may be optionally passed through (includeOrig=true)
+	/// or discarded.  If the original tokens are included, the position increments may be modified
+	/// to retain absolute positions after merging with the synonym tokenstream.
+	/// </para>
+	/// <para>
+	/// Generated synonyms will start at the same position as the first matched source token.
+	/// </para>
+	/// </summary>
+	/// @deprecated (3.4) use <seealso cref="SynonymFilterFactory"/> instead. only for precise index backwards compatibility. this factory will be removed in Lucene 5.0 
+	[Obsolete("(3.4) use <seealso cref="SynonymFilterFactory"/> instead. only for precise index backwards compatibility. this factory will be removed in Lucene 5.0")]
+	internal sealed class SlowSynonymFilter : TokenFilter
+	{
+
+	  private readonly SlowSynonymMap map; // Map<String, SynonymMap>
+	  private IEnumerator<AttributeSource> replacement; // iterator over generated tokens
+
+	  public SlowSynonymFilter(TokenStream @in, SlowSynonymMap map) : base(@in)
+	  {
+		if (map == null)
+		{
+		  throw new System.ArgumentException("map is required");
+		}
+
+		this.map = map;
+		// just ensuring these attributes exist...
+		addAttribute(typeof(CharTermAttribute));
+		addAttribute(typeof(PositionIncrementAttribute));
+		addAttribute(typeof(OffsetAttribute));
+		addAttribute(typeof(TypeAttribute));
+	  }
+
+
+	  /*
+	   * Need to worry about multiple scenarios:
+	   *  - need to go for the longest match
+	   *    a b => foo      #shouldn't match if "a b" is followed by "c d"
+	   *    a b c d => bar
+	   *  - need to backtrack - retry matches for tokens already read
+	   *     a b c d => foo
+	   *       b c => bar
+	   *     If the input stream is "a b c x", one will consume "a b c d"
+	   *     trying to match the first rule... all but "a" should be
+	   *     pushed back so a match may be made on "b c".
+	   *  - don't try and match generated tokens (thus need separate queue)
+	   *    matching is not recursive.
+	   *  - handle optional generation of original tokens in all these cases,
+	   *    merging token streams to preserve token positions.
+	   *  - preserve original positionIncrement of first matched token
+	   */
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		while (true)
+		{
+		  // if there are any generated tokens, return them... don't try any
+		  // matches against them, as we specifically don't want recursion.
+//JAVA TO C# CONVERTER TODO TASK: Java iterators are only converted within the context of 'while' and 'for' loops:
+		  if (replacement != null && replacement.hasNext())
+		  {
+//JAVA TO C# CONVERTER TODO TASK: Java iterators are only converted within the context of 'while' and 'for' loops:
+			copy(this, replacement.next());
+			return true;
+		  }
+
+		  // common case fast-path of first token not matching anything
+		  AttributeSource firstTok = nextTok();
+		  if (firstTok == null)
+		  {
+			  return false;
+		  }
+		  CharTermAttribute termAtt = firstTok.addAttribute(typeof(CharTermAttribute));
+		  SlowSynonymMap result = map.submap != null ? map.submap.get(termAtt.buffer(), 0, termAtt.length()) : null;
+		  if (result == null)
+		  {
+			copy(this, firstTok);
+			return true;
+		  }
+
+		  // fast-path failed, clone ourselves if needed
+		  if (firstTok == this)
+		  {
+			firstTok = cloneAttributes();
+		  }
+		  // OK, we matched a token, so find the longest match.
+
+		  matched = new LinkedList<>();
+
+		  result = match(result);
+
+		  if (result == null)
+		  {
+			// no match, simply return the first token read.
+			copy(this, firstTok);
+			return true;
+		  }
+
+		  // reuse, or create new one each time?
+		  List<AttributeSource> generated = new List<AttributeSource>(result.synonyms.Length + matched.Count + 1);
+
+		  //
+		  // there was a match... let's generate the new tokens, merging
+		  // in the matched tokens (position increments need adjusting)
+		  //
+		  AttributeSource lastTok = matched.Count == 0 ? firstTok : matched.Last.Value;
+		  bool includeOrig = result.includeOrig();
+
+		  AttributeSource origTok = includeOrig ? firstTok : null;
+		  PositionIncrementAttribute firstPosIncAtt = firstTok.addAttribute(typeof(PositionIncrementAttribute));
+		  int origPos = firstPosIncAtt.PositionIncrement; // position of origTok in the original stream
+		  int repPos = 0; // curr position in replacement token stream
+		  int pos = 0; // current position in merged token stream
+
+		  for (int i = 0; i < result.synonyms.Length; i++)
+		  {
+			Token repTok = result.synonyms[i];
+			AttributeSource newTok = firstTok.cloneAttributes();
+			CharTermAttribute newTermAtt = newTok.addAttribute(typeof(CharTermAttribute));
+			OffsetAttribute newOffsetAtt = newTok.addAttribute(typeof(OffsetAttribute));
+			PositionIncrementAttribute newPosIncAtt = newTok.addAttribute(typeof(PositionIncrementAttribute));
+
+			OffsetAttribute lastOffsetAtt = lastTok.addAttribute(typeof(OffsetAttribute));
+
+			newOffsetAtt.setOffset(newOffsetAtt.startOffset(), lastOffsetAtt.endOffset());
+			newTermAtt.copyBuffer(repTok.buffer(), 0, repTok.length());
+			repPos += repTok.PositionIncrement;
+			if (i == 0) // make position of first token equal to original
+			{
+				repPos = origPos;
+			}
+
+			// if necessary, insert original tokens and adjust position increment
+			while (origTok != null && origPos <= repPos)
+			{
+			  PositionIncrementAttribute origPosInc = origTok.addAttribute(typeof(PositionIncrementAttribute));
+			  origPosInc.PositionIncrement = origPos - pos;
+			  generated.Add(origTok);
+			  pos += origPosInc.PositionIncrement;
+			  origTok = matched.Count == 0 ? null : matched.RemoveFirst();
+			  if (origTok != null)
+			  {
+				origPosInc = origTok.addAttribute(typeof(PositionIncrementAttribute));
+				origPos += origPosInc.PositionIncrement;
+			  }
+			}
+
+			newPosIncAtt.PositionIncrement = repPos - pos;
+			generated.Add(newTok);
+			pos += newPosIncAtt.PositionIncrement;
+		  }
+
+		  // finish up any leftover original tokens
+		  while (origTok != null)
+		  {
+			PositionIncrementAttribute origPosInc = origTok.addAttribute(typeof(PositionIncrementAttribute));
+			origPosInc.PositionIncrement = origPos - pos;
+			generated.Add(origTok);
+			pos += origPosInc.PositionIncrement;
+			origTok = matched.Count == 0 ? null : matched.RemoveFirst();
+			if (origTok != null)
+			{
+			  origPosInc = origTok.addAttribute(typeof(PositionIncrementAttribute));
+			  origPos += origPosInc.PositionIncrement;
+			}
+		  }
+
+		  // what if we replaced a longer sequence with a shorter one?
+		  // a/0 b/5 =>  foo/0
+		  // should I re-create the gap on the next buffered token?
+
+		  replacement = generated.GetEnumerator();
+		  // Now return to the top of the loop to read and return the first
+		  // generated token.. The reason this is done is that we may have generated
+		  // nothing at all, and may need to continue with more matching logic.
+		}
+	  }
+
+
+	  //
+	  // Defer creation of the buffer until the first time it is used to
+	  // optimize short fields with no matches.
+	  //
+	  private LinkedList<AttributeSource> buffer;
+	  private LinkedList<AttributeSource> matched;
+
+	  private bool exhausted;
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private org.apache.lucene.util.AttributeSource nextTok() throws java.io.IOException
+	  private AttributeSource nextTok()
+	  {
+		if (buffer != null && buffer.Count > 0)
+		{
+		  return buffer.RemoveFirst();
+		}
+		else
+		{
+		  if (!exhausted && input.incrementToken())
+		  {
+			return this;
+		  }
+		  else
+		  {
+			exhausted = true;
+			return null;
+		  }
+		}
+	  }
+
+	  private void pushTok(AttributeSource t)
+	  {
+		if (buffer == null)
+		{
+			buffer = new LinkedList<>();
+		}
+		buffer.AddFirst(t);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private SlowSynonymMap match(SlowSynonymMap map) throws java.io.IOException
+	  private SlowSynonymMap match(SlowSynonymMap map)
+	  {
+		SlowSynonymMap result = null;
+
+		if (map.submap != null)
+		{
+		  AttributeSource tok = nextTok();
+		  if (tok != null)
+		  {
+			// clone ourselves.
+			if (tok == this)
+			{
+			  tok = cloneAttributes();
+			}
+			// check for positionIncrement!=1?  if>1, should not match, if==0, check multiple at this level?
+			CharTermAttribute termAtt = tok.getAttribute(typeof(CharTermAttribute));
+			SlowSynonymMap subMap = map.submap.get(termAtt.buffer(), 0, termAtt.length());
+
+			if (subMap != null)
+			{
+			  // recurse
+			  result = match(subMap);
+			}
+
+			if (result != null)
+			{
+			  matched.AddFirst(tok);
+			}
+			else
+			{
+			  // push back unmatched token
+			  pushTok(tok);
+			}
+		  }
+		}
+
+		// if no longer sequence matched, so if this node has synonyms, it's the match.
+		if (result == null && map.synonyms != null)
+		{
+		  result = map;
+		}
+
+		return result;
+	  }
+
+	  private void copy(AttributeSource target, AttributeSource source)
+	  {
+		if (target != source)
+		{
+		  source.copyTo(target);
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		input.reset();
+		replacement = null;
+		exhausted = false;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymFilterFactory.cs
new file mode 100644
index 0000000..5e76e47
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymFilterFactory.cs
@@ -0,0 +1,391 @@
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.Text;
+using Lucene.Net.Analysis.Util;
+
+namespace org.apache.lucene.analysis.synonym
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using org.apache.lucene.analysis.util;
+
+
+	/// <summary>
+	/// Factory for <seealso cref="SlowSynonymFilter"/> (only used with luceneMatchVersion < 3.4)
+	/// <pre class="prettyprint" >
+	/// &lt;fieldType name="text_synonym" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="false"
+	///             expand="true" tokenizerFactory="solr.WhitespaceTokenizerFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre> </summary>
+	/// @deprecated (3.4) use <seealso cref="SynonymFilterFactory"/> instead. only for precise index backwards compatibility. this factory will be removed in Lucene 5.0 
+	[Obsolete("(3.4) use <seealso cref="SynonymFilterFactory"/> instead. only for precise index backwards compatibility. this factory will be removed in Lucene 5.0")]
+	internal sealed class SlowSynonymFilterFactory : TokenFilterFactory, ResourceLoaderAware
+	{
+	  private readonly string synonyms;
+	  private readonly bool ignoreCase;
+	  private readonly bool expand;
+	  private readonly string tf;
+	  private readonly IDictionary<string, string> tokArgs = new Dictionary<string, string>();
+
+	  public SlowSynonymFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		synonyms = require(args, "synonyms");
+		ignoreCase = getBoolean(args, "ignoreCase", false);
+		expand = getBoolean(args, "expand", true);
+
+		tf = get(args, "tokenizerFactory");
+		if (tf != null)
+		{
+		  assureMatchVersion();
+		  tokArgs["luceneMatchVersion"] = LuceneMatchVersion.ToString();
+		  for (IEnumerator<string> itr = args.Keys.GetEnumerator(); itr.MoveNext();)
+		  {
+			string key = itr.Current;
+			tokArgs[key.replaceAll("^tokenizerFactory\\.","")] = args[key];
+			itr.remove();
+		  }
+		}
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void inform(ResourceLoader loader) throws java.io.IOException
+	  public void inform(ResourceLoader loader)
+	  {
+		TokenizerFactory tokFactory = null;
+		if (tf != null)
+		{
+		  tokFactory = loadTokenizerFactory(loader, tf);
+		}
+
+		IEnumerable<string> wlist = loadRules(synonyms, loader);
+
+		synMap = new SlowSynonymMap(ignoreCase);
+		parseRules(wlist, synMap, "=>", ",", expand,tokFactory);
+	  }
+
+	  /// <returns> a list of all rules </returns>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: protected Iterable<String> loadRules(String synonyms, ResourceLoader loader) throws java.io.IOException
+	  protected internal IEnumerable<string> loadRules(string synonyms, ResourceLoader loader)
+	  {
+		IList<string> wlist = null;
+		File synonymFile = new File(synonyms);
+		if (synonymFile.exists())
+		{
+		  wlist = getLines(loader, synonyms);
+		}
+		else
+		{
+		  IList<string> files = splitFileNames(synonyms);
+		  wlist = new List<>();
+		  foreach (string file in files)
+		  {
+			IList<string> lines = getLines(loader, file.Trim());
+			wlist.AddRange(lines);
+		  }
+		}
+		return wlist;
+	  }
+
+	  private SlowSynonymMap synMap;
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: static void parseRules(Iterable<String> rules, SlowSynonymMap map, String mappingSep, String synSep, boolean expansion, TokenizerFactory tokFactory) throws java.io.IOException
+	  internal static void parseRules(IEnumerable<string> rules, SlowSynonymMap map, string mappingSep, string synSep, bool expansion, TokenizerFactory tokFactory)
+	  {
+		int count = 0;
+		foreach (string rule in rules)
+		{
+		  // To use regexes, we need an expression that specifies an odd number of chars.
+		  // This can't really be done with string.split(), and since we need to
+		  // do unescaping at some point anyway, we wouldn't be saving any effort
+		  // by using regexes.
+
+		  IList<string> mapping = splitSmart(rule, mappingSep, false);
+
+		  IList<IList<string>> source;
+		  IList<IList<string>> target;
+
+		  if (mapping.Count > 2)
+		  {
+			throw new System.ArgumentException("Invalid Synonym Rule:" + rule);
+		  }
+		  else if (mapping.Count == 2)
+		  {
+			source = getSynList(mapping[0], synSep, tokFactory);
+			target = getSynList(mapping[1], synSep, tokFactory);
+		  }
+		  else
+		  {
+			source = getSynList(mapping[0], synSep, tokFactory);
+			if (expansion)
+			{
+			  // expand to all arguments
+			  target = source;
+			}
+			else
+			{
+			  // reduce to first argument
+			  target = new List<>(1);
+			  target.Add(source[0]);
+			}
+		  }
+
+		  bool includeOrig = false;
+		  foreach (IList<string> fromToks in source)
+		  {
+			count++;
+			foreach (IList<string> toToks in target)
+			{
+			  map.add(fromToks, SlowSynonymMap.makeTokens(toToks), includeOrig, true);
+			}
+		  }
+		}
+	  }
+
+	  // a , b c , d e f => [[a],[b,c],[d,e,f]]
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private static java.util.List<java.util.List<String>> getSynList(String str, String separator, TokenizerFactory tokFactory) throws java.io.IOException
+	  private static IList<IList<string>> getSynList(string str, string separator, TokenizerFactory tokFactory)
+	  {
+		IList<string> strList = splitSmart(str, separator, false);
+		// now split on whitespace to get a list of token strings
+		IList<IList<string>> synList = new List<IList<string>>();
+		foreach (string toks in strList)
+		{
+		  IList<string> tokList = tokFactory == null ? splitWS(toks, true) : splitByTokenizer(toks, tokFactory);
+		  synList.Add(tokList);
+		}
+		return synList;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private static java.util.List<String> splitByTokenizer(String source, TokenizerFactory tokFactory) throws java.io.IOException
+	  private static IList<string> splitByTokenizer(string source, TokenizerFactory tokFactory)
+	  {
+		StringReader reader = new StringReader(source);
+		TokenStream ts = loadTokenizer(tokFactory, reader);
+		IList<string> tokList = new List<string>();
+		try
+		{
+		  CharTermAttribute termAtt = ts.addAttribute(typeof(CharTermAttribute));
+		  ts.reset();
+		  while (ts.incrementToken())
+		  {
+			if (termAtt.length() > 0)
+			{
+			  tokList.Add(termAtt.ToString());
+			}
+		  }
+		}
+		finally
+		{
+		  reader.close();
+		}
+		return tokList;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private TokenizerFactory loadTokenizerFactory(ResourceLoader loader, String cname) throws java.io.IOException
+	  private TokenizerFactory loadTokenizerFactory(ResourceLoader loader, string cname)
+	  {
+		Type clazz = loader.findClass(cname, typeof(TokenizerFactory));
+		try
+		{
+		  TokenizerFactory tokFactory = clazz.getConstructor(typeof(IDictionary)).newInstance(tokArgs);
+		  if (tokFactory is ResourceLoaderAware)
+		  {
+			((ResourceLoaderAware) tokFactory).inform(loader);
+		  }
+		  return tokFactory;
+		}
+		catch (Exception e)
+		{
+		  throw new Exception(e);
+		}
+	  }
+
+	  private static TokenStream loadTokenizer(TokenizerFactory tokFactory, Reader reader)
+	  {
+		return tokFactory.create(reader);
+	  }
+
+	  public SlowSynonymMap SynonymMap
+	  {
+		  get
+		  {
+			return synMap;
+		  }
+	  }
+
+	  public override SlowSynonymFilter create(TokenStream input)
+	  {
+		return new SlowSynonymFilter(input,synMap);
+	  }
+
+	  public static IList<string> splitWS(string s, bool decode)
+	  {
+		List<string> lst = new List<string>(2);
+		StringBuilder sb = new StringBuilder();
+		int pos = 0, end = s.Length;
+		while (pos < end)
+		{
+		  char ch = s[pos++];
+		  if (char.IsWhiteSpace(ch))
+		  {
+			if (sb.Length > 0)
+			{
+			  lst.Add(sb.ToString());
+			  sb = new StringBuilder();
+			}
+			continue;
+		  }
+
+		  if (ch == '\\')
+		  {
+			if (!decode)
+			{
+				sb.Append(ch);
+			}
+			if (pos >= end) // ERROR, or let it go?
+			{
+				break;
+			}
+			ch = s[pos++];
+			if (decode)
+			{
+			  switch (ch)
+			  {
+				case 'n' :
+					ch = '\n';
+					break;
+				case 't' :
+					ch = '\t';
+					break;
+				case 'r' :
+					ch = '\r';
+					break;
+				case 'b' :
+					ch = '\b';
+					break;
+				case 'f' :
+					ch = '\f';
+					break;
+			  }
+			}
+		  }
+
+		  sb.Append(ch);
+		}
+
+		if (sb.Length > 0)
+		{
+		  lst.Add(sb.ToString());
+		}
+
+		return lst;
+	  }
+
+	  /// <summary>
+	  /// Splits a backslash escaped string on the separator.
+	  /// <para>
+	  /// Current backslash escaping supported:
+	  /// <br> \n \t \r \b \f are escaped the same as a Java String
+	  /// <br> Other characters following a backslash are produced verbatim (\c => c)
+	  /// 
+	  /// </para>
+	  /// </summary>
+	  /// <param name="s">  the string to split </param>
+	  /// <param name="separator"> the separator to split on </param>
+	  /// <param name="decode"> decode backslash escaping </param>
+	  public static IList<string> splitSmart(string s, string separator, bool decode)
+	  {
+		List<string> lst = new List<string>(2);
+		StringBuilder sb = new StringBuilder();
+		int pos = 0, end = s.Length;
+		while (pos < end)
+		{
+		  if (s.StartsWith(separator,pos))
+		  {
+			if (sb.Length > 0)
+			{
+			  lst.Add(sb.ToString());
+			  sb = new StringBuilder();
+			}
+			pos += separator.Length;
+			continue;
+		  }
+
+		  char ch = s[pos++];
+		  if (ch == '\\')
+		  {
+			if (!decode)
+			{
+				sb.Append(ch);
+			}
+			if (pos >= end) // ERROR, or let it go?
+			{
+				break;
+			}
+			ch = s[pos++];
+			if (decode)
+			{
+			  switch (ch)
+			  {
+				case 'n' :
+					ch = '\n';
+					break;
+				case 't' :
+					ch = '\t';
+					break;
+				case 'r' :
+					ch = '\r';
+					break;
+				case 'b' :
+					ch = '\b';
+					break;
+				case 'f' :
+					ch = '\f';
+					break;
+			  }
+			}
+		  }
+
+		  sb.Append(ch);
+		}
+
+		if (sb.Length > 0)
+		{
+		  lst.Add(sb.ToString());
+		}
+
+		return lst;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymMap.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymMap.cs b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymMap.cs
new file mode 100644
index 0000000..cfc7d71
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymMap.cs
@@ -0,0 +1,210 @@
+using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace org.apache.lucene.analysis.synonym
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using org.apache.lucene.analysis.util;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// Mapping rules for use with <seealso cref="SlowSynonymFilter"/> </summary>
+	/// @deprecated (3.4) use <seealso cref="SynonymFilterFactory"/> instead. only for precise index backwards compatibility. this factory will be removed in Lucene 5.0 
+	[Obsolete("(3.4) use <seealso cref="SynonymFilterFactory"/> instead. only for precise index backwards compatibility. this factory will be removed in Lucene 5.0")]
+	internal class SlowSynonymMap
+	{
+	  /// <summary>
+	  /// @lucene.internal </summary>
+	  public CharArrayMap<SlowSynonymMap> submap; // recursive: Map<String, SynonymMap>
+	  /// <summary>
+	  /// @lucene.internal </summary>
+	  public Token[] synonyms;
+	  internal int flags;
+
+	  internal const int INCLUDE_ORIG = 0x01;
+	  internal const int IGNORE_CASE = 0x02;
+
+	  public SlowSynonymMap()
+	  {
+	  }
+	  public SlowSynonymMap(bool ignoreCase)
+	  {
+		if (ignoreCase_Renamed)
+		{
+			flags |= IGNORE_CASE;
+		}
+	  }
+
+	  public virtual bool includeOrig()
+	  {
+		  return (flags & INCLUDE_ORIG) != 0;
+	  }
+	  public virtual bool ignoreCase()
+	  {
+		  return (flags & IGNORE_CASE) != 0;
+	  }
+
+	  /// <param name="singleMatch">  List<String>, the sequence of strings to match </param>
+	  /// <param name="replacement">  List<Token> the list of tokens to use on a match </param>
+	  /// <param name="includeOrig">  sets a flag on this mapping signaling the generation of matched tokens in addition to the replacement tokens </param>
+	  /// <param name="mergeExisting"> merge the replacement tokens with any other mappings that exist </param>
+	  public virtual void add(IList<string> singleMatch, IList<Token> replacement, bool includeOrig, bool mergeExisting)
+	  {
+		SlowSynonymMap currMap = this;
+		foreach (string str in singleMatch)
+		{
+		  if (currMap.submap == null)
+		  {
+			// for now hardcode at 4.0, as its what the old code did.
+			// would be nice to fix, but shouldn't store a version in each submap!!!
+			currMap.submap = new CharArrayMap<>(Version.LUCENE_CURRENT, 1, ignoreCase());
+		  }
+
+		  SlowSynonymMap map = currMap.submap.get(str);
+		  if (map == null)
+		  {
+			map = new SlowSynonymMap();
+			map.flags |= flags & IGNORE_CASE;
+			currMap.submap.put(str, map);
+		  }
+
+		  currMap = map;
+		}
+
+		if (currMap.synonyms != null && !mergeExisting)
+		{
+		  throw new System.ArgumentException("SynonymFilter: there is already a mapping for " + singleMatch);
+		}
+		IList<Token> superset = currMap.synonyms == null ? replacement : mergeTokens(currMap.synonyms, replacement);
+		currMap.synonyms = superset.ToArray();
+		if (includeOrig_Renamed)
+		{
+			currMap.flags |= INCLUDE_ORIG;
+		}
+	  }
+
+
+	  public override string ToString()
+	  {
+		StringBuilder sb = new StringBuilder("<");
+		if (synonyms != null)
+		{
+		  sb.Append("[");
+		  for (int i = 0; i < synonyms.Length; i++)
+		  {
+			if (i != 0)
+			{
+				sb.Append(',');
+			}
+			sb.Append(synonyms[i]);
+		  }
+		  if ((flags & INCLUDE_ORIG) != 0)
+		  {
+			sb.Append(",ORIG");
+		  }
+		  sb.Append("],");
+		}
+		sb.Append(submap);
+		sb.Append(">");
+		return sb.ToString();
+	  }
+
+
+
+	  /// <summary>
+	  /// Produces a List<Token> from a List<String> </summary>
+	  public static IList<Token> makeTokens(IList<string> strings)
+	  {
+		IList<Token> ret = new List<Token>(strings.Count);
+		foreach (string str in strings)
+		{
+		  //Token newTok = new Token(str,0,0,"SYNONYM");
+		  Token newTok = new Token(str, 0,0,"SYNONYM");
+		  ret.Add(newTok);
+		}
+		return ret;
+	  }
+
+
+	  /// <summary>
+	  /// Merge two lists of tokens, producing a single list with manipulated positionIncrements so that
+	  /// the tokens end up at the same position.
+	  /// 
+	  /// Example:  [a b] merged with [c d] produces [a/b c/d]  ('/' denotes tokens in the same position)
+	  /// Example:  [a,5 b,2] merged with [c d,4 e,4] produces [c a,5/d b,2 e,2]  (a,n means a has posInc=n)
+	  /// 
+	  /// </summary>
+	  public static IList<Token> mergeTokens(IList<Token> lst1, IList<Token> lst2)
+	  {
+		List<Token> result = new List<Token>();
+		if (lst1 == null || lst2 == null)
+		{
+		  if (lst2 != null)
+		  {
+			  result.AddRange(lst2);
+		  }
+		  if (lst1 != null)
+		  {
+			  result.AddRange(lst1);
+		  }
+		  return result;
+		}
+
+		int pos = 0;
+		IEnumerator<Token> iter1 = lst1.GetEnumerator();
+		IEnumerator<Token> iter2 = lst2.GetEnumerator();
+//JAVA TO C# CONVERTER TODO TASK: Java iterators are only converted within the context of 'while' and 'for' loops:
+		Token tok1 = iter1.hasNext() ? iter1.next() : null;
+//JAVA TO C# CONVERTER TODO TASK: Java iterators are only converted within the context of 'while' and 'for' loops:
+		Token tok2 = iter2.hasNext() ? iter2.next() : null;
+		int pos1 = tok1 != null ? tok1.PositionIncrement : 0;
+		int pos2 = tok2 != null ? tok2.PositionIncrement : 0;
+		while (tok1 != null || tok2 != null)
+		{
+		  while (tok1 != null && (pos1 <= pos2 || tok2 == null))
+		  {
+			Token tok = new Token(tok1.startOffset(), tok1.endOffset(), tok1.type());
+			tok.copyBuffer(tok1.buffer(), 0, tok1.length());
+			tok.PositionIncrement = pos1 - pos;
+			result.Add(tok);
+			pos = pos1;
+//JAVA TO C# CONVERTER TODO TASK: Java iterators are only converted within the context of 'while' and 'for' loops:
+			tok1 = iter1.hasNext() ? iter1.next() : null;
+			pos1 += tok1 != null ? tok1.PositionIncrement : 0;
+		  }
+		  while (tok2 != null && (pos2 <= pos1 || tok1 == null))
+		  {
+			Token tok = new Token(tok2.startOffset(), tok2.endOffset(), tok2.type());
+			tok.copyBuffer(tok2.buffer(), 0, tok2.length());
+			tok.PositionIncrement = pos2 - pos;
+			result.Add(tok);
+			pos = pos2;
+//JAVA TO C# CONVERTER TODO TASK: Java iterators are only converted within the context of 'while' and 'for' loops:
+			tok2 = iter2.hasNext() ? iter2.next() : null;
+			pos2 += tok2 != null ? tok2.PositionIncrement : 0;
+		  }
+		}
+		return result;
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SolrSynonymParser.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SolrSynonymParser.cs b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SolrSynonymParser.cs
new file mode 100644
index 0000000..b0fb325
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SolrSynonymParser.cs
@@ -0,0 +1,218 @@
+using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace org.apache.lucene.analysis.synonym
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using CharsRef = org.apache.lucene.util.CharsRef;
+
+	/// <summary>
+	/// Parser for the Solr synonyms format.
+	/// <ol>
+	///   <li> Blank lines and lines starting with '#' are comments.
+	///   <li> Explicit mappings match any token sequence on the LHS of "=>"
+	///        and replace with all alternatives on the RHS.  These types of mappings
+	///        ignore the expand parameter in the constructor.
+	///        Example:
+	///        <blockquote>i-pod, i pod => ipod</blockquote>
+	///   <li> Equivalent synonyms may be separated with commas and give
+	///        no explicit mapping.  In this case the mapping behavior will
+	///        be taken from the expand parameter in the constructor.  This allows
+	///        the same synonym file to be used in different synonym handling strategies.
+	///        Example:
+	///        <blockquote>ipod, i-pod, i pod</blockquote>
+	/// 
+	///   <li> Multiple synonym mapping entries are merged.
+	///        Example:
+	///        <blockquote>
+	///         foo => foo bar<br>
+	///         foo => baz<br><br>
+	///         is equivalent to<br><br>
+	///         foo => foo bar, baz
+	///        </blockquote>
+	///  </ol>
+	/// @lucene.experimental
+	/// </summary>
+	public class SolrSynonymParser : SynonymMap.Parser
+	{
+	  private readonly bool expand;
+
+	  public SolrSynonymParser(bool dedup, bool expand, Analyzer analyzer) : base(dedup, analyzer)
+	  {
+		this.expand = expand;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void parse(java.io.Reader in) throws java.io.IOException, java.text.ParseException
+	  public override void parse(Reader @in)
+	  {
+		LineNumberReader br = new LineNumberReader(@in);
+		try
+		{
+		  addInternal(br);
+		}
+		catch (System.ArgumentException e)
+		{
+		  ParseException ex = new ParseException("Invalid synonym rule at line " + br.LineNumber, 0);
+		  ex.initCause(e);
+		  throw ex;
+		}
+		finally
+		{
+		  br.close();
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private void addInternal(java.io.BufferedReader in) throws java.io.IOException
+	  private void addInternal(BufferedReader @in)
+	  {
+		string line = null;
+		while ((line = @in.readLine()) != null)
+		{
+		  if (line.Length == 0 || line[0] == '#')
+		  {
+			continue; // ignore empty lines and comments
+		  }
+
+		  CharsRef[] inputs;
+		  CharsRef[] outputs;
+
+		  // TODO: we could process this more efficiently.
+		  string[] sides = Split(line, "=>");
+		  if (sides.Length > 1) // explicit mapping
+		  {
+			if (sides.Length != 2)
+			{
+			  throw new System.ArgumentException("more than one explicit mapping specified on the same line");
+			}
+			string[] inputStrings = Split(sides[0], ",");
+			inputs = new CharsRef[inputStrings.Length];
+			for (int i = 0; i < inputs.Length; i++)
+			{
+			  inputs[i] = analyze(unescape(inputStrings[i]).Trim(), new CharsRef());
+			}
+
+			string[] outputStrings = Split(sides[1], ",");
+			outputs = new CharsRef[outputStrings.Length];
+			for (int i = 0; i < outputs.Length; i++)
+			{
+			  outputs[i] = analyze(unescape(outputStrings[i]).Trim(), new CharsRef());
+			}
+		  }
+		  else
+		  {
+			string[] inputStrings = Split(line, ",");
+			inputs = new CharsRef[inputStrings.Length];
+			for (int i = 0; i < inputs.Length; i++)
+			{
+			  inputs[i] = analyze(unescape(inputStrings[i]).Trim(), new CharsRef());
+			}
+			if (expand)
+			{
+			  outputs = inputs;
+			}
+			else
+			{
+			  outputs = new CharsRef[1];
+			  outputs[0] = inputs[0];
+			}
+		  }
+
+		  // currently we include the term itself in the map,
+		  // and use includeOrig = false always.
+		  // this is how the existing filter does it, but its actually a bug,
+		  // especially if combined with ignoreCase = true
+		  for (int i = 0; i < inputs.Length; i++)
+		  {
+			for (int j = 0; j < outputs.Length; j++)
+			{
+			  add(inputs[i], outputs[j], false);
+			}
+		  }
+		}
+	  }
+
+	  private static string[] Split(string s, string separator)
+	  {
+		List<string> list = new List<string>(2);
+		StringBuilder sb = new StringBuilder();
+		int pos = 0, end = s.Length;
+		while (pos < end)
+		{
+		  if (s.StartsWith(separator,pos))
+		  {
+			if (sb.Length > 0)
+			{
+			  list.Add(sb.ToString());
+			  sb = new StringBuilder();
+			}
+			pos += separator.Length;
+			continue;
+		  }
+
+		  char ch = s[pos++];
+		  if (ch == '\\')
+		  {
+			sb.Append(ch);
+			if (pos >= end) // ERROR, or let it go?
+			{
+				break;
+			}
+			ch = s[pos++];
+		  }
+
+		  sb.Append(ch);
+		}
+
+		if (sb.Length > 0)
+		{
+		  list.Add(sb.ToString());
+		}
+
+		return list.ToArray();
+	  }
+
+	  private string unescape(string s)
+	  {
+		if (s.IndexOf("\\", StringComparison.Ordinal) >= 0)
+		{
+		  StringBuilder sb = new StringBuilder();
+		  for (int i = 0; i < s.Length; i++)
+		  {
+			char ch = s[i];
+			if (ch == '\\' && i < s.Length - 1)
+			{
+			  sb.Append(s[++i]);
+			}
+			else
+			{
+			  sb.Append(ch);
+			}
+		  }
+		  return sb.ToString();
+		}
+		return s;
+	  }
+	}
+
+}
\ No newline at end of file


Mime
View raw message