lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From synhers...@apache.org
Subject [1/5] lucenenet git commit: More porting work
Date Fri, 30 Jan 2015 12:39:24 GMT
Repository: lucenenet
Updated Branches:
  refs/heads/master 0fae0c4eb -> 69f29113e


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/69f29113/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
index 4fe2822..f0392f9 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
@@ -1,558 +1,549 @@
 using System;
+using System.IO;
 using Lucene.Net.Analysis.Core;
+using Lucene.Net.Analysis.Tokenattributes;
 using Lucene.Net.Analysis.Util;
+using Version = Lucene.Net.Util.Version;
 
 namespace Lucene.Net.Analysis.Miscellaneous
 {
 
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
     /// <summary>
-	/// Efficient Lucene analyzer/tokenizer that preferably operates on a String rather than a
-	/// <seealso cref="java.io.Reader"/>, that can flexibly separate text into terms via a regular expression <seealso cref="Pattern"/>
-	/// (with behaviour identical to <seealso cref="String#split(String)"/>),
-	/// and that combines the functionality of
-	/// <seealso cref="LetterTokenizer"/>,
-	/// <seealso cref="LowerCaseTokenizer"/>,
-	/// <seealso cref="WhitespaceTokenizer"/>,
-	/// <seealso cref="StopFilter"/> into a single efficient
-	/// multi-purpose class.
-	/// <para>
-	/// If you are unsure how exactly a regular expression should look like, consider 
-	/// prototyping by simply trying various expressions on some test texts via
-	/// <seealso cref="String#split(String)"/>. Once you are satisfied, give that regex to 
-	/// PatternAnalyzer. Also see <a target="_blank" 
-	/// href="http://java.sun.com/docs/books/tutorial/extra/regex/">Java Regular Expression Tutorial</a>.
-	/// </para>
-	/// <para>
-	/// This class can be considerably faster than the "normal" Lucene tokenizers. 
-	/// It can also serve as a building block in a compound Lucene
-	/// <seealso cref="org.apache.lucene.analysis.TokenFilter"/> chain. For example as in this 
-	/// stemming example:
-	/// <pre>
-	/// PatternAnalyzer pat = ...
-	/// TokenStream tokenStream = new SnowballFilter(
-	///     pat.tokenStream("content", "James is running round in the woods"), 
-	///     "English"));
-	/// </pre>
-	/// </para>
-	/// </summary>
-	/// @deprecated (4.0) use the pattern-based analysis in the analysis/pattern package instead. 
-	[Obsolete("(4.0) use the pattern-based analysis in the analysis/pattern package instead.")]
-	public sealed class PatternAnalyzer : Analyzer
-	{
-
-	  /// <summary>
-	  /// <code>"\\W+"</code>; Divides text at non-letters (NOT Character.isLetter(c)) </summary>
-	  public static readonly Pattern NON_WORD_PATTERN = Pattern.compile("\\W+");
-
-	  /// <summary>
-	  /// <code>"\\s+"</code>; Divides text at whitespaces (Character.isWhitespace(c)) </summary>
-	  public static readonly Pattern WHITESPACE_PATTERN = Pattern.compile("\\s+");
-
-	  private static readonly CharArraySet EXTENDED_ENGLISH_STOP_WORDS = CharArraySet.unmodifiableSet(new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("a", "about", "above", "across", "adj", "after", "afterwards", "again", "against", "albeit", "all", "almost", "alone", "along", "already", "also", "although", "always", "among", "amongst", "an", "and", "another", "any", "anyhow", "anyone", "anything", "anywhere", "are", "around", "as", "at", "be", "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand", "behind", "being", "below", "beside", "besides", "between", "beyond", "both", "but", "by", "can", "cannot", "co", "could", "down", "during", "each", "eg", "either", "else", "elsewhere", "enough", "etc", "even", "ever", "every", "everyone", "everything", "everywhere", "except", "few", "first", "for", "former", "formerly", "from", "further", "had", "has", "have", "he", "hence", "her", "here", "hereafter", "hereby", "herein", "hereupon", "hers", "herself", 
 "him", "himself", "his", "how", "however", "i", "ie", "if", "in", "inc", "indeed", "into", "is", "it", "its", "itself", "last", "latter", "latterly", "least", "less", "ltd", "many", "may", "me", "meanwhile", "might", "more", "moreover", "most", "mostly", "much", "must", "my", "myself", "namely", "neither", "never", "nevertheless", "next", "no", "nobody", "none", "noone", "nor", "not", "nothing", "now", "nowhere", "of", "off", "often", "on", "once one", "only", "onto", "or", "other", "others", "otherwise", "our", "ours", "ourselves", "out", "over", "own", "per", "perhaps", "rather", "s", "same", "seem", "seemed", "seeming", "seems", "several", "she", "should", "since", "so", "some", "somehow", "someone", "something", "sometime", "sometimes", "somewhere", "still", "such", "t", "than", "that", "the", "their", "them", "themselves", "then", "thence", "there", "thereafter", "thereby", "therefor", "therein", "thereupon", "these", "they", "this", "those", "though", "through", "throughout", 
 "thru", "thus", "to", "together", "too", "toward", "towards", "under", "until", "up", "upon", "us", "very", "via", "was", "we", "well", "were", "what", "whatever", "whatsoever", "when", "whence", "whenever", "whensoever", "where", "whereafter", "whereas", "whereat", "whereby", "wherefrom", "wherein", "whereinto", "whereof", "whereon", "whereto", "whereunto", "whereupon", "wherever", "wherewith", "whether", "which", "whichever", "whichsoever", "while", "whilst", "whither", "who", "whoever", "whole", "whom", "whomever", "whomsoever", "whose", "whosoever", "why", "will", "with", "within", "without", "would", "xsubj", "xcal", "xauthor", "xother ", "xnote", "yet", "you", "your", "yours", "yourself", "yourselves"), true));
-
-	  /// <summary>
-	  /// A lower-casing word analyzer with English stop words (can be shared
-	  /// freely across threads without harm); global per class loader.
-	  /// </summary>
-	  public static readonly PatternAnalyzer DEFAULT_ANALYZER = new PatternAnalyzer(Version.LUCENE_CURRENT, NON_WORD_PATTERN, true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
-
-	  /// <summary>
-	  /// A lower-casing word analyzer with <b>extended </b> English stop words
-	  /// (can be shared freely across threads without harm); global per class
-	  /// loader. The stop words are borrowed from
-	  /// http://thomas.loc.gov/home/stopwords.html, see
-	  /// http://thomas.loc.gov/home/all.about.inquery.html
-	  /// </summary>
-	  public static readonly PatternAnalyzer EXTENDED_ANALYZER = new PatternAnalyzer(Version.LUCENE_CURRENT, NON_WORD_PATTERN, true, EXTENDED_ENGLISH_STOP_WORDS);
-
-	  private readonly Pattern pattern;
-	  private readonly bool toLowerCase;
-	  private readonly CharArraySet stopWords;
-
-	  private readonly Version matchVersion;
-
-	  /// <summary>
-	  /// Constructs a new instance with the given parameters.
-	  /// </summary>
-	  /// <param name="matchVersion"> currently does nothing </param>
-	  /// <param name="pattern">
-	  ///            a regular expression delimiting tokens </param>
-	  /// <param name="toLowerCase">
-	  ///            if <code>true</code> returns tokens after applying
-	  ///            String.toLowerCase() </param>
-	  /// <param name="stopWords">
-	  ///            if non-null, ignores all tokens that are contained in the
-	  ///            given stop set (after previously having applied toLowerCase()
-	  ///            if applicable). For example, created via
-	  ///            <seealso cref="StopFilter#makeStopSet(Version, String[])"/>and/or
-	  ///            <seealso cref="WordlistLoader"/>as in
-	  ///            <code>WordlistLoader.getWordSet(new File("samples/fulltext/stopwords.txt")</code>
-	  ///            or <a href="http://www.unine.ch/info/clef/">other stop words
-	  ///            lists </a>. </param>
-	  public PatternAnalyzer(Version matchVersion, Pattern pattern, bool toLowerCase, CharArraySet stopWords)
-	  {
-		if (pattern == null)
-		{
-		  throw new System.ArgumentException("pattern must not be null");
-		}
-
-		if (eqPattern(NON_WORD_PATTERN, pattern))
-		{
-			pattern = NON_WORD_PATTERN;
-		}
-		else if (eqPattern(WHITESPACE_PATTERN, pattern))
-		{
-			pattern = WHITESPACE_PATTERN;
-		}
-
-		if (stopWords != null && stopWords.size() == 0)
-		{
-			stopWords = null;
-		}
-
-		this.pattern = pattern;
-		this.toLowerCase = toLowerCase;
-		this.stopWords = stopWords;
-		this.matchVersion = matchVersion;
-	  }
-
-	  /// <summary>
-	  /// Creates a token stream that tokenizes the given string into token terms
-	  /// (aka words).
-	  /// </summary>
-	  /// <param name="fieldName">
-	  ///            the name of the field to tokenize (currently ignored). </param>
-	  /// <param name="reader">
-	  ///            reader (e.g. charfilter) of the original text. can be null. </param>
-	  /// <param name="text">
-	  ///            the string to tokenize </param>
-	  /// <returns> a new token stream </returns>
-	  public TokenStreamComponents createComponents(string fieldName, Reader reader, string text)
-	  {
-		// Ideally the Analyzer superclass should have a method with the same signature, 
-		// with a default impl that simply delegates to the StringReader flavour. 
-		if (reader == null)
-		{
-		  reader = new FastStringReader(text);
-		}
-
-		if (pattern == NON_WORD_PATTERN) // fast path
-		{
-		  return new TokenStreamComponents(new FastStringTokenizer(reader, true, toLowerCase, stopWords));
-		} // fast path
-		else if (pattern == WHITESPACE_PATTERN)
-		{
-		  return new TokenStreamComponents(new FastStringTokenizer(reader, false, toLowerCase, stopWords));
-		}
-
-		Tokenizer tokenizer = new PatternTokenizer(reader, pattern, toLowerCase);
-		TokenStream result = (stopWords != null) ? new StopFilter(matchVersion, tokenizer, stopWords) : tokenizer;
-		return new TokenStreamComponents(tokenizer, result);
-	  }
-
-	  /// <summary>
-	  /// Creates a token stream that tokenizes all the text in the given Reader;
-	  /// This implementation forwards to <code>tokenStream(String, Reader, String)</code> and is
-	  /// less efficient than <code>tokenStream(String, Reader, String)</code>.
-	  /// </summary>
-	  /// <param name="fieldName">
-	  ///            the name of the field to tokenize (currently ignored). </param>
-	  /// <param name="reader">
-	  ///            the reader delivering the text </param>
-	  /// <returns> a new token stream </returns>
-	  public override TokenStreamComponents createComponents(string fieldName, Reader reader)
-	  {
-		return createComponents(fieldName, reader, null);
-	  }
-
-	  /// <summary>
-	  /// Indicates whether some other object is "equal to" this one.
-	  /// </summary>
-	  /// <param name="other">
-	  ///            the reference object with which to compare. </param>
-	  /// <returns> true if equal, false otherwise </returns>
-	  public override bool Equals(object other)
-	  {
-		if (this == other)
-		{
-			return true;
-		}
-		if (this == DEFAULT_ANALYZER && other == EXTENDED_ANALYZER)
-		{
-			return false;
-		}
-		if (other == DEFAULT_ANALYZER && this == EXTENDED_ANALYZER)
-		{
-			return false;
-		}
-
-		if (other is PatternAnalyzer)
-		{
-		  PatternAnalyzer p2 = (PatternAnalyzer) other;
-		  return toLowerCase == p2.toLowerCase && eqPattern(pattern, p2.pattern) && eq(stopWords, p2.stopWords);
-		}
-		return false;
-	  }
-
-	  /// <summary>
-	  /// Returns a hash code value for the object.
-	  /// </summary>
-	  /// <returns> the hash code. </returns>
-	  public override int GetHashCode()
-	  {
-		if (this == DEFAULT_ANALYZER) // fast path
-		{
-			return -1218418418;
-		}
-		if (this == EXTENDED_ANALYZER) // fast path
-		{
-			return 1303507063;
-		}
-
-		int h = 1;
-		h = 31 * h + pattern.pattern().GetHashCode();
-		h = 31 * h + pattern.flags();
-		h = 31 * h + (toLowerCase ? 1231 : 1237);
-		h = 31 * h + (stopWords != null ? stopWords.GetHashCode() : 0);
-		return h;
-	  }
-
-	  /// <summary>
-	  /// equality where o1 and/or o2 can be null </summary>
-	  private static bool eq(object o1, object o2)
-	  {
-		return (o1 == o2) || (o1 != null ? o1.Equals(o2) : false);
-	  }
-
-	  /// <summary>
-	  /// assumes p1 and p2 are not null </summary>
-	  private static bool eqPattern(Pattern p1, Pattern p2)
-	  {
-		return p1 == p2 || (p1.flags() == p2.flags() && p1.pattern().Equals(p2.pattern()));
-	  }
-
-	  /// <summary>
-	  /// Reads until end-of-stream and returns all read chars, finally closes the stream.
-	  /// </summary>
-	  /// <param name="input"> the input stream </param>
-	  /// <exception cref="IOException"> if an I/O error occurs while reading the stream </exception>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: private static String toString(java.io.Reader input) throws java.io.IOException
-	  private static string ToString(Reader input)
-	  {
-		if (input is FastStringReader) // fast path
-		{
-		  return ((FastStringReader) input).String;
-		}
-
-		try
-		{
-		  int len = 256;
-		  char[] buffer = new char[len];
-		  char[] output = new char[len];
-
-		  len = 0;
-		  int n;
-		  while ((n = input.read(buffer)) >= 0)
-		  {
-			if (len + n > output.Length) // grow capacity
-			{
-			  char[] tmp = new char[Math.Max(output.Length << 1, len + n)];
-			  Array.Copy(output, 0, tmp, 0, len);
-			  Array.Copy(buffer, 0, tmp, len, n);
-			  buffer = output; // use larger buffer for future larger bulk reads
-			  output = tmp;
-			}
-			else
-			{
-			  Array.Copy(buffer, 0, output, len, n);
-			}
-			len += n;
-		  }
-
-		  return new string(output, 0, len);
-		}
-		finally
-		{
-		  input.close();
-		}
-	  }
-
-
-	  ///////////////////////////////////////////////////////////////////////////////
-	  // Nested classes:
-	  ///////////////////////////////////////////////////////////////////////////////
-	  /// <summary>
-	  /// The work horse; performance isn't fantastic, but it's not nearly as bad
-	  /// as one might think - kudos to the Sun regex developers.
-	  /// </summary>
-	  private sealed class PatternTokenizer : Tokenizer
-	  {
-
-		internal readonly Pattern pattern;
-		internal string str;
-		internal readonly bool toLowerCase;
-		internal Matcher matcher;
-		internal int pos = 0;
-		internal bool initialized = false;
-		internal static readonly Locale locale = Locale.Default;
-		internal readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
-		internal readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
-
-		public PatternTokenizer(Reader input, Pattern pattern, bool toLowerCase) : base(input)
-		{
-		  this.pattern = pattern;
-		  this.matcher = pattern.matcher("");
-		  this.toLowerCase = toLowerCase;
-		}
-
-		public override bool incrementToken()
-		{
-		  if (!initialized)
-		  {
-			throw new System.InvalidOperationException("Consumer did not call reset().");
-		  }
-		  if (matcher == null)
-		  {
-			  return false;
-		  }
-		  clearAttributes();
-		  while (true) // loop takes care of leading and trailing boundary cases
-		  {
-			int start = pos;
-			int end_Renamed;
-			bool isMatch = matcher.find();
-			if (isMatch)
-			{
-			  end_Renamed = matcher.start();
-			  pos = matcher.end();
-			}
-			else
-			{
-			  end_Renamed = str.Length;
-			  matcher = null; // we're finished
-			}
-
-			if (start != end_Renamed) // non-empty match (header/trailer)
-			{
-			  string text = str.Substring(start, end_Renamed - start);
-			  if (toLowerCase)
-			  {
-				  text = text.ToLower(locale);
-			  }
-			  termAtt.setEmpty().append(text);
-			  offsetAtt.setOffset(correctOffset(start), correctOffset(end_Renamed));
-			  return true;
-			}
-			if (!isMatch)
-			{
-				return false;
-			}
-		  }
-		}
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public final void end() throws java.io.IOException
-		public override void end()
-		{
-		  base.end();
-		  // set final offset
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int finalOffset = correctOffset(str.length());
-		  int finalOffset = correctOffset(str.Length);
-		  this.offsetAtt.setOffset(finalOffset, finalOffset);
-		}
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void close() throws java.io.IOException
-		public override void close()
-		{
-		  base.close();
-		  this.initialized = false;
-		}
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
-		public override void reset()
-		{
-		  base.reset();
-		  this.str = PatternAnalyzer.ToString(input);
-		  this.matcher = pattern.matcher(this.str);
-		  this.pos = 0;
-		  this.initialized = true;
-		}
-	  }
-
-
-	  ///////////////////////////////////////////////////////////////////////////////
-	  // Nested classes:
-	  ///////////////////////////////////////////////////////////////////////////////
-	  /// <summary>
-	  /// Special-case class for best performance in common cases; this class is
-	  /// otherwise unnecessary.
-	  /// </summary>
-	  private sealed class FastStringTokenizer : Tokenizer
-	  {
-
-		internal string str;
-		internal int pos;
-		internal readonly bool isLetter;
-		internal readonly bool toLowerCase;
-		internal readonly CharArraySet stopWords;
-		internal static readonly Locale locale = Locale.Default;
-		internal readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
-		internal readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
-
-		public FastStringTokenizer(Reader input, bool isLetter, bool toLowerCase, CharArraySet stopWords) : base(input)
-		{
-		  this.isLetter = isLetter;
-		  this.toLowerCase = toLowerCase;
-		  this.stopWords = stopWords;
-		}
-
-		public override bool incrementToken()
-		{
-		  if (str == null)
-		  {
-			throw new System.InvalidOperationException("Consumer did not call reset().");
-		  }
-		  clearAttributes();
-		  // cache loop instance vars (performance)
-		  string s = str;
-		  int len = s.Length;
-		  int i = pos;
-		  bool letter = isLetter;
-
-		  int start = 0;
-		  string text;
-		  do
-		  {
-			// find beginning of token
-			text = null;
-			while (i < len && !isTokenChar(s[i], letter))
-			{
-			  i++;
-			}
-
-			if (i < len) // found beginning; now find end of token
-			{
-			  start = i;
-			  while (i < len && isTokenChar(s[i], letter))
-			  {
-				i++;
-			  }
-
-			  text = s.Substring(start, i - start);
-			  if (toLowerCase)
-			  {
-				  text = text.ToLower(locale);
-			  }
-	//          if (toLowerCase) {            
-	////            use next line once JDK 1.5 String.toLowerCase() performance regression is fixed
-	////            see http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6265809
-	//            text = s.substring(start, i).toLowerCase(); 
-	////            char[] chars = new char[i-start];
-	////            for (int j=start; j < i; j++) chars[j-start] = Character.toLowerCase(s.charAt(j));
-	////            text = new String(chars);
-	//          } else {
-	//            text = s.substring(start, i);
-	//          }
-			}
-		  } while (text != null && isStopWord(text));
-
-		  pos = i;
-		  if (text == null)
-		  {
-			return false;
-		  }
-		  termAtt.setEmpty().append(text);
-		  offsetAtt.setOffset(correctOffset(start), correctOffset(i));
-		  return true;
-		}
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public final void end() throws java.io.IOException
-		public override void end()
-		{
-		  base.end();
-		  // set final offset
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int finalOffset = str.length();
-		  int finalOffset = str.Length;
-		  this.offsetAtt.setOffset(correctOffset(finalOffset), correctOffset(finalOffset));
-		}
-
-		internal bool isTokenChar(char c, bool isLetter)
-		{
-		  return isLetter ? char.IsLetter(c) :!char.IsWhiteSpace(c);
-		}
-
-		internal bool isStopWord(string text)
-		{
-		  return stopWords != null && stopWords.contains(text);
-		}
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void close() throws java.io.IOException
-		public override void close()
-		{
-		  base.close();
-		  this.str = null;
-		}
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
-		public override void reset()
-		{
-		  base.reset();
-		  this.str = PatternAnalyzer.ToString(input);
-		  this.pos = 0;
-		}
-	  }
-
-
-	  ///////////////////////////////////////////////////////////////////////////////
-	  // Nested classes:
-	  ///////////////////////////////////////////////////////////////////////////////
-	  /// <summary>
-	  /// A StringReader that exposes it's contained string for fast direct access.
-	  /// Might make sense to generalize this to CharSequence and make it public?
-	  /// </summary>
-	  internal sealed class FastStringReader : StringReader
-	  {
-
-		internal readonly string s;
-
-		internal FastStringReader(string s) : base(s)
-		{
-		  this.s = s;
-		}
-
-		internal string String
-		{
-			get
-			{
-			  return s;
-			}
-		}
-	  }
-
-	}
-
+    /// Efficient Lucene analyzer/tokenizer that preferably operates on a String rather than a
+    /// <seealso cref="TextReader"/>, that can flexibly separate text into terms via a regular expression <seealso cref="Pattern"/>
+    /// (with behaviour identical to <seealso cref="String#split(String)"/>),
+    /// and that combines the functionality of
+    /// <seealso cref="LetterTokenizer"/>,
+    /// <seealso cref="LowerCaseTokenizer"/>,
+    /// <seealso cref="WhitespaceTokenizer"/>,
+    /// <seealso cref="StopFilter"/> into a single efficient
+    /// multi-purpose class.
+    /// <para>
+    /// If you are unsure how exactly a regular expression should look like, consider 
+    /// prototyping by simply trying various expressions on some test texts via
+    /// <seealso cref="String#split(String)"/>. Once you are satisfied, give that regex to 
+    /// PatternAnalyzer. Also see <a target="_blank" 
+    /// href="http://java.sun.com/docs/books/tutorial/extra/regex/">Java Regular Expression Tutorial</a>.
+    /// </para>
+    /// <para>
+    /// This class can be considerably faster than the "normal" Lucene tokenizers. 
+    /// It can also serve as a building block in a compound Lucene
+    /// <seealso cref="TokenFilter"/> chain. For example as in this 
+    /// stemming example:
+    /// <pre>
+    /// PatternAnalyzer pat = ...
+    /// TokenStream tokenStream = new SnowballFilter(
+    ///     pat.tokenStream("content", "James is running round in the woods"), 
+    ///     "English"));
+    /// </pre>
+    /// </para>
+    /// </summary>
+    /// @deprecated (4.0) use the pattern-based analysis in the analysis/pattern package instead. 
+    [Obsolete("(4.0) use the pattern-based analysis in the analysis/pattern package instead.")]
+    public sealed class PatternAnalyzer : Analyzer
+    {
+
+        /// <summary>
+        /// <code>"\\W+"</code>; Divides text at non-letters (NOT Character.isLetter(c)) </summary>
+        public static readonly Pattern NON_WORD_PATTERN = Pattern.compile("\\W+");
+
+        /// <summary>
+        /// <code>"\\s+"</code>; Divides text at whitespaces (Character.isWhitespace(c)) </summary>
+        public static readonly Pattern WHITESPACE_PATTERN = Pattern.compile("\\s+");
+
+        private static readonly CharArraySet EXTENDED_ENGLISH_STOP_WORDS = CharArraySet.unmodifiableSet(new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("a", "about", "above", "across", "adj", "after", "afterwards", "again", "against", "albeit", "all", "almost", "alone", "along", "already", "also", "although", "always", "among", "amongst", "an", "and", "another", "any", "anyhow", "anyone", "anything", "anywhere", "are", "around", "as", "at", "be", "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand", "behind", "being", "below", "beside", "besides", "between", "beyond", "both", "but", "by", "can", "cannot", "co", "could", "down", "during", "each", "eg", "either", "else", "elsewhere", "enough", "etc", "even", "ever", "every", "everyone", "everything", "everywhere", "except", "few", "first", "for", "former", "formerly", "from", "further", "had", "has", "have", "he", "hence", "her", "here", "hereafter", "hereby", "herein", "hereupon", "hers", "herse
 lf", "him", "himself", "his", "how", "however", "i", "ie", "if", "in", "inc", "indeed", "into", "is", "it", "its", "itself", "last", "latter", "latterly", "least", "less", "ltd", "many", "may", "me", "meanwhile", "might", "more", "moreover", "most", "mostly", "much", "must", "my", "myself", "namely", "neither", "never", "nevertheless", "next", "no", "nobody", "none", "noone", "nor", "not", "nothing", "now", "nowhere", "of", "off", "often", "on", "once one", "only", "onto", "or", "other", "others", "otherwise", "our", "ours", "ourselves", "out", "over", "own", "per", "perhaps", "rather", "s", "same", "seem", "seemed", "seeming", "seems", "several", "she", "should", "since", "so", "some", "somehow", "someone", "something", "sometime", "sometimes", "somewhere", "still", "such", "t", "than", "that", "the", "their", "them", "themselves", "then", "thence", "there", "thereafter", "thereby", "therefor", "therein", "thereupon", "these", "they", "this", "those", "though", "through", "througho
 ut", "thru", "thus", "to", "together", "too", "toward", "towards", "under", "until", "up", "upon", "us", "very", "via", "was", "we", "well", "were", "what", "whatever", "whatsoever", "when", "whence", "whenever", "whensoever", "where", "whereafter", "whereas", "whereat", "whereby", "wherefrom", "wherein", "whereinto", "whereof", "whereon", "whereto", "whereunto", "whereupon", "wherever", "wherewith", "whether", "which", "whichever", "whichsoever", "while", "whilst", "whither", "who", "whoever", "whole", "whom", "whomever", "whomsoever", "whose", "whosoever", "why", "will", "with", "within", "without", "would", "xsubj", "xcal", "xauthor", "xother ", "xnote", "yet", "you", "your", "yours", "yourself", "yourselves"), true));
+
+        /// <summary>
+        /// A lower-casing word analyzer with English stop words (can be shared
+        /// freely across threads without harm); global per class loader.
+        /// </summary>
+        public static readonly PatternAnalyzer DEFAULT_ANALYZER = new PatternAnalyzer(Version.LUCENE_CURRENT, NON_WORD_PATTERN, true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
+
+        /// <summary>
+        /// A lower-casing word analyzer with <b>extended </b> English stop words
+        /// (can be shared freely across threads without harm); global per class
+        /// loader. The stop words are borrowed from
+        /// http://thomas.loc.gov/home/stopwords.html, see
+        /// http://thomas.loc.gov/home/all.about.inquery.html
+        /// </summary>
+        public static readonly PatternAnalyzer EXTENDED_ANALYZER = new PatternAnalyzer(Version.LUCENE_CURRENT, NON_WORD_PATTERN, true, EXTENDED_ENGLISH_STOP_WORDS);
+
+        private readonly Pattern pattern;
+        private readonly bool toLowerCase;
+        private readonly CharArraySet stopWords;
+
+        private readonly Version matchVersion;
+
+        /// <summary>
+        /// Constructs a new instance with the given parameters.
+        /// </summary>
+        /// <param name="matchVersion"> currently does nothing </param>
+        /// <param name="pattern">
+        ///            a regular expression delimiting tokens </param>
+        /// <param name="toLowerCase">
+        ///            if <code>true</code> returns tokens after applying
+        ///            String.toLowerCase() </param>
+        /// <param name="stopWords">
+        ///            if non-null, ignores all tokens that are contained in the
+        ///            given stop set (after previously having applied toLowerCase()
+        ///            if applicable). For example, created via
+        ///            <seealso cref="StopFilter#makeStopSet(Version, String[])"/>and/or
+        ///            <seealso cref="WordlistLoader"/>as in
+        ///            <code>WordlistLoader.getWordSet(new File("samples/fulltext/stopwords.txt")</code>
+        ///            or <a href="http://www.unine.ch/info/clef/">other stop words
+        ///            lists </a>. </param>
+        public PatternAnalyzer(Version matchVersion, Pattern pattern, bool toLowerCase, CharArraySet stopWords)
+        {
+            if (pattern == null)
+            {
+                throw new System.ArgumentException("pattern must not be null");
+            }
+
+            if (eqPattern(NON_WORD_PATTERN, pattern))
+            {
+                pattern = NON_WORD_PATTERN;
+            }
+            else if (eqPattern(WHITESPACE_PATTERN, pattern))
+            {
+                pattern = WHITESPACE_PATTERN;
+            }
+
+            if (stopWords != null && stopWords.Size == 0)
+            {
+                stopWords = null;
+            }
+
+            this.pattern = pattern;
+            this.toLowerCase = toLowerCase;
+            this.stopWords = stopWords;
+            this.matchVersion = matchVersion;
+        }
+
+        /// <summary>
+        /// Creates a token stream that tokenizes the given string into token terms
+        /// (aka words).
+        /// </summary>
+        /// <param name="fieldName">
+        ///            the name of the field to tokenize (currently ignored). </param>
+        /// <param name="reader">
+        ///            reader (e.g. charfilter) of the original text. can be null. </param>
+        /// <param name="text">
+        ///            the string to tokenize </param>
+        /// <returns> a new token stream </returns>
+        public TokenStreamComponents createComponents(string fieldName, TextReader reader, string text)
+        {
+            // Ideally the Analyzer superclass should have a method with the same signature, 
+            // with a default impl that simply delegates to the StringReader flavour. 
+            if (reader == null)
+            {
+                reader = new FastStringReader(text);
+            }
+
+            if (pattern == NON_WORD_PATTERN) // fast path
+            {
+                return new TokenStreamComponents(new FastStringTokenizer(reader, true, toLowerCase, stopWords));
+            } // fast path
+            else if (pattern == WHITESPACE_PATTERN)
+            {
+                return new TokenStreamComponents(new FastStringTokenizer(reader, false, toLowerCase, stopWords));
+            }
+
+            Tokenizer tokenizer = new PatternTokenizer(reader, pattern, toLowerCase);
+            TokenStream result = (stopWords != null) ? new StopFilter(matchVersion, tokenizer, stopWords) : tokenizer;
+            return new TokenStreamComponents(tokenizer, result);
+        }
+
+        /// <summary>
+        /// Creates a token stream that tokenizes all the text in the given Reader;
+        /// This implementation forwards to <code>tokenStream(String, Reader, String)</code> and is
+        /// less efficient than <code>tokenStream(String, Reader, String)</code>.
+        /// </summary>
+        /// <param name="fieldName">
+        ///            the name of the field to tokenize (currently ignored). </param>
+        /// <param name="reader">
+        ///            the reader delivering the text </param>
+        /// <returns> a new token stream </returns>
+        public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
+        {
+            return createComponents(fieldName, reader, null);
+        }
+
+        /// <summary>
+        /// Indicates whether some other object is "equal to" this one.
+        /// </summary>
+        /// <param name="other">
+        ///            the reference object with which to compare. </param>
+        /// <returns> true if equal, false otherwise </returns>
+        public override bool Equals(object other)
+        {
+            if (this == other)
+            {
+                return true;
+            }
+            if (this == DEFAULT_ANALYZER && other == EXTENDED_ANALYZER)
+            {
+                return false;
+            }
+            if (other == DEFAULT_ANALYZER && this == EXTENDED_ANALYZER)
+            {
+                return false;
+            }
+
+            var p2 = other as PatternAnalyzer;
+            if (p2 != null)
+            {
+                return toLowerCase == p2.toLowerCase && eqPattern(pattern, p2.pattern) && eq(stopWords, p2.stopWords);
+            }
+            return false;
+        }
+
+        /// <summary>
+        /// Returns a hash code value for the object.
+        /// </summary>
+        /// <returns> the hash code. </returns>
+        public override int GetHashCode()
+        {
+            if (this == DEFAULT_ANALYZER) // fast path
+            {
+                return -1218418418;
+            }
+            if (this == EXTENDED_ANALYZER) // fast path
+            {
+                return 1303507063;
+            }
+
+            int h = 1;
+            h = 31 * h + pattern.pattern().GetHashCode();
+            h = 31 * h + pattern.flags();
+            h = 31 * h + (toLowerCase ? 1231 : 1237);
+            h = 31 * h + (stopWords != null ? stopWords.GetHashCode() : 0);
+            return h;
+        }
+
+        /// <summary>
+        /// equality where o1 and/or o2 can be null </summary>
+        private static bool eq(object o1, object o2)
+        {
+            return (o1 == o2) || (o1 != null ? o1.Equals(o2) : false);
+        }
+
+        /// <summary>
+        /// assumes p1 and p2 are not null </summary>
+        private static bool eqPattern(Pattern p1, Pattern p2)
+        {
+            return p1 == p2 || (p1.flags() == p2.flags() && p1.pattern().Equals(p2.pattern()));
+        }
+
+        /// <summary>
+        /// Reads until end-of-stream and returns all read chars, finally closes the stream.
+        /// </summary>
+        /// <param name="input"> the input stream </param>
+        /// <exception cref="IOException"> if an I/O error occurs while reading the stream </exception>
+        private static string ToString(TextReader input)
+        {
+            var reader = input as FastStringReader;
+            if (reader != null) // fast path
+            {
+                return reader.String;
+            }
+
+            try
+            {
+                int len = 256;
+                char[] buffer = new char[len];
+                char[] output = new char[len];
+
+                len = 0;
+                int n;
+                while ((n = input.Read(buffer)) >= 0)
+                {
+                    if (len + n > output.Length) // grow capacity
+                    {
+                        char[] tmp = new char[Math.Max(output.Length << 1, len + n)];
+                        Array.Copy(output, 0, tmp, 0, len);
+                        Array.Copy(buffer, 0, tmp, len, n);
+                        buffer = output; // use larger buffer for future larger bulk reads
+                        output = tmp;
+                    }
+                    else
+                    {
+                        Array.Copy(buffer, 0, output, len, n);
+                    }
+                    len += n;
+                }
+
+                return new string(output, 0, len);
+            }
+            finally
+            {
+                input.Dispose();
+            }
+        }
+
+
+        ///////////////////////////////////////////////////////////////////////////////
+        // Nested classes:
+        ///////////////////////////////////////////////////////////////////////////////
+        /// <summary>
+        /// The work horse; performance isn't fantastic, but it's not nearly as bad
+        /// as one might think - kudos to the Sun regex developers.
+        /// </summary>
+        private sealed class PatternTokenizer : Tokenizer
+        {
+            private readonly Pattern pattern;
+            private string str;
+            private readonly bool toLowerCase;
+            private Matcher matcher;
+            private int pos = 0;
+            private bool initialized = false;
+            private static readonly Locale locale = Locale.Default;
+            private readonly ICharTermAttribute termAtt;
+            private readonly IOffsetAttribute offsetAtt;
+
+            public PatternTokenizer(TextReader input, Pattern pattern, bool toLowerCase)
+                : base(input)
+            {
+                termAtt = AddAttribute<ICharTermAttribute>();
+                offsetAtt = AddAttribute<IOffsetAttribute>();
+                this.pattern = pattern;
+                this.matcher = pattern.matcher("");
+                this.toLowerCase = toLowerCase;
+            }
+
+            public override bool IncrementToken()
+            {
+                if (!initialized)
+                {
+                    throw new System.InvalidOperationException("Consumer did not call reset().");
+                }
+                if (matcher == null)
+                {
+                    return false;
+                }
+                ClearAttributes();
+                while (true) // loop takes care of leading and trailing boundary cases
+                {
+                    int start = pos;
+                    int end_Renamed;
+                    bool isMatch = matcher.find();
+                    if (isMatch)
+                    {
+                        end_Renamed = matcher.start();
+                        pos = matcher.end();
+                    }
+                    else
+                    {
+                        end_Renamed = str.Length;
+                        matcher = null; // we're finished
+                    }
+
+                    if (start != end_Renamed) // non-empty match (header/trailer)
+                    {
+                        string text = str.Substring(start, end_Renamed - start);
+                        if (toLowerCase)
+                        {
+                            text = text.ToLower(locale);
+                        }
+                        termAtt.SetEmpty().Append(text);
+                        offsetAtt.SetOffset(CorrectOffset(start), CorrectOffset(end_Renamed));
+                        return true;
+                    }
+                    if (!isMatch)
+                    {
+                        return false;
+                    }
+                }
+            }
+
+            public override void End()
+            {
+                base.End();
+                // set final offset
+                int finalOffset = CorrectOffset(str.Length);
+                this.offsetAtt.SetOffset(finalOffset, finalOffset);
+            }
+
+            public override void Dispose()
+            {
+                base.Dispose();
+                this.initialized = false;
+            }
+
+            public override void Reset()
+            {
+                base.Reset();
+                this.str = PatternAnalyzer.ToString(input);
+                this.matcher = pattern.matcher(this.str);
+                this.pos = 0;
+                this.initialized = true;
+            }
+        }
+
+
+        ///////////////////////////////////////////////////////////////////////////////
+        // Nested classes:
+        ///////////////////////////////////////////////////////////////////////////////
+        /// <summary>
+        /// Special-case class for best performance in common cases; this class is
+        /// otherwise unnecessary.
+        /// </summary>
+        private sealed class FastStringTokenizer : Tokenizer
+        {
+            private string str;
+            private int pos;
+            private readonly bool isLetter;
+            private readonly bool toLowerCase;
+            private readonly CharArraySet stopWords;
+            private static readonly Locale locale = Locale.Default;
+            private readonly ICharTermAttribute termAtt;
+            private readonly IOffsetAttribute offsetAtt;
+
+            public FastStringTokenizer(TextReader input, bool isLetter, bool toLowerCase, CharArraySet stopWords)
+                : base(input)
+            {
+                termAtt = AddAttribute<ICharTermAttribute>();
+                offsetAtt = AddAttribute<IOffsetAttribute>();
+
+                this.isLetter = isLetter;
+                this.toLowerCase = toLowerCase;
+                this.stopWords = stopWords;
+            }
+
+            public override bool IncrementToken()
+            {
+                if (str == null)
+                {
+                    throw new System.InvalidOperationException("Consumer did not call reset().");
+                }
+                ClearAttributes();
+                // cache loop instance vars (performance)
+                string s = str;
+                int len = s.Length;
+                int i = pos;
+                bool letter = isLetter;
+
+                int start = 0;
+                string text;
+                do
+                {
+                    // find beginning of token
+                    text = null;
+                    while (i < len && !isTokenChar(s[i], letter))
+                    {
+                        i++;
+                    }
+
+                    if (i < len) // found beginning; now find end of token
+                    {
+                        start = i;
+                        while (i < len && isTokenChar(s[i], letter))
+                        {
+                            i++;
+                        }
+
+                        text = s.Substring(start, i - start);
+                        if (toLowerCase)
+                        {
+                            text = text.ToLower(locale);
+                        }
+                        //          if (toLowerCase) {            
+                        ////            use next line once JDK 1.5 String.toLowerCase() performance regression is fixed
+                        ////            see http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6265809
+                        //            text = s.substring(start, i).toLowerCase(); 
+                        ////            char[] chars = new char[i-start];
+                        ////            for (int j=start; j < i; j++) chars[j-start] = Character.toLowerCase(s.charAt(j));
+                        ////            text = new String(chars);
+                        //          } else {
+                        //            text = s.substring(start, i);
+                        //          }
+                    }
+                } while (text != null && isStopWord(text));
+
+                pos = i;
+                if (text == null)
+                {
+                    return false;
+                }
+                termAtt.SetEmpty().Append(text);
+                offsetAtt.SetOffset(CorrectOffset(start), CorrectOffset(i));
+                return true;
+            }
+
+            public override void End()
+            {
+                base.End();
+                // set final offset
+                int finalOffset = str.Length;
+                this.offsetAtt.SetOffset(CorrectOffset(finalOffset), CorrectOffset(finalOffset));
+            }
+
+            private bool isTokenChar(char c, bool isLetter)
+            {
+                return isLetter ? char.IsLetter(c) : !char.IsWhiteSpace(c);
+            }
+
+            private bool isStopWord(string text)
+            {
+                return stopWords != null && stopWords.Contains(text);
+            }
+
+            public override void Dispose()
+            {
+                base.Dispose();
+                this.str = null;
+            }
+
+            public override void Reset()
+            {
+                base.Reset();
+                this.str = PatternAnalyzer.ToString(input);
+                this.pos = 0;
+            }
+        }
+
+
+        ///////////////////////////////////////////////////////////////////////////////
+        // Nested classes:
+        ///////////////////////////////////////////////////////////////////////////////
+        /// <summary>
+        /// A StringReader that exposes it's contained string for fast direct access.
+        /// Might make sense to generalize this to CharSequence and make it public?
+        /// </summary>
+        internal sealed class FastStringReader : StringReader
+        {
+
+            internal readonly string s;
+
+            internal FastStringReader(string s)
+                : base(s)
+            {
+                this.s = s;
+            }
+
+            internal string String
+            {
+                get
+                {
+                    return s;
+                }
+            }
+        }
+
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/69f29113/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternKeywordMarkerFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternKeywordMarkerFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternKeywordMarkerFilter.cs
index 3886da0..56a2331 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternKeywordMarkerFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternKeywordMarkerFilter.cs
@@ -1,62 +1,60 @@
-using Lucene.Net.Analysis.Miscellaneous;
+using Lucene.Net.Analysis.Tokenattributes;
 
-namespace org.apache.lucene.analysis.miscellaneous
+namespace Lucene.Net.Analysis.Miscellaneous
 {
 
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
-
-	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
-
-	/// <summary>
-	/// Marks terms as keywords via the <seealso cref="KeywordAttribute"/>. Each token
-	/// that matches the provided pattern is marked as a keyword by setting
-	/// <seealso cref="KeywordAttribute#setKeyword(boolean)"/> to <code>true</code>.
-	/// </summary>
-	public sealed class PatternKeywordMarkerFilter : KeywordMarkerFilter
-	{
-	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
-	  private readonly Matcher matcher;
-
-	  /// <summary>
-	  /// Create a new <seealso cref="PatternKeywordMarkerFilter"/>, that marks the current
-	  /// token as a keyword if the tokens term buffer matches the provided
-	  /// <seealso cref="Pattern"/> via the <seealso cref="KeywordAttribute"/>.
-	  /// </summary>
-	  /// <param name="in">
-	  ///          TokenStream to filter </param>
-	  /// <param name="pattern">
-	  ///          the pattern to apply to the incoming term buffer
-	  ///  </param>
-	  public PatternKeywordMarkerFilter(TokenStream @in, Pattern pattern) : base(@in)
-	  {
-		this.matcher = pattern.matcher("");
-	  }
-
-	  protected internal override bool Keyword
-	  {
-		  get
-		  {
-			matcher.reset(termAtt);
-			return matcher.matches();
-		  }
-	  }
-
-	}
-
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+    /// <summary>
+    /// Marks terms as keywords via the <seealso cref="KeywordAttribute"/>. Each token
+    /// that matches the provided pattern is marked as a keyword by setting
+    /// <seealso cref="KeywordAttribute#setKeyword(boolean)"/> to <code>true</code>.
+    /// </summary>
+    public sealed class PatternKeywordMarkerFilter : KeywordMarkerFilter
+    {
+        private readonly ICharTermAttribute termAtt;
+        private readonly Matcher matcher;
+
+        /// <summary>
+        /// Create a new <seealso cref="PatternKeywordMarkerFilter"/>, that marks the current
+        /// token as a keyword if the tokens term buffer matches the provided
+        /// <seealso cref="Pattern"/> via the <seealso cref="KeywordAttribute"/>.
+        /// </summary>
+        /// <param name="in">
+        ///          TokenStream to filter </param>
+        /// <param name="pattern">
+        ///          the pattern to apply to the incoming term buffer
+        ///  </param>
+        public PatternKeywordMarkerFilter(TokenStream @in, Pattern pattern)
+            : base(@in)
+        {
+            termAtt = AddAttribute<ICharTermAttribute>();
+
+            this.matcher = pattern.matcher("");
+        }
+
+        protected internal override bool Keyword
+        {
+            get
+            {
+                matcher.Reset(termAtt);
+                return matcher.matches();
+            }
+        }
+
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/69f29113/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PerFieldAnalyzerWrapper.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PerFieldAnalyzerWrapper.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PerFieldAnalyzerWrapper.cs
index f61b230..52c8ded 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PerFieldAnalyzerWrapper.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PerFieldAnalyzerWrapper.cs
@@ -1,93 +1,94 @@
 using System.Collections.Generic;
 
-namespace org.apache.lucene.analysis.miscellaneous
+namespace Lucene.Net.Analysis.Miscellaneous
 {
 
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
 
 
 
-	/// <summary>
-	/// This analyzer is used to facilitate scenarios where different
-	/// fields require different analysis techniques.  Use the Map
-	/// argument in <seealso cref="#PerFieldAnalyzerWrapper(Analyzer, java.util.Map)"/>
-	/// to add non-default analyzers for fields.
-	/// 
-	/// <para>Example usage:
-	/// 
-	/// <pre class="prettyprint">
-	/// {@code
-	/// Map<String,Analyzer> analyzerPerField = new HashMap<>();
-	/// analyzerPerField.put("firstname", new KeywordAnalyzer());
-	/// analyzerPerField.put("lastname", new KeywordAnalyzer());
-	/// 
-	/// PerFieldAnalyzerWrapper aWrapper =
-	///   new PerFieldAnalyzerWrapper(new StandardAnalyzer(version), analyzerPerField);
-	/// }
-	/// </pre>
-	/// 
-	/// </para>
-	/// <para>In this example, StandardAnalyzer will be used for all fields except "firstname"
-	/// and "lastname", for which KeywordAnalyzer will be used.
-	/// 
-	/// </para>
-	/// <para>A PerFieldAnalyzerWrapper can be used like any other analyzer, for both indexing
-	/// and query parsing.
-	/// </para>
-	/// </summary>
-	public sealed class PerFieldAnalyzerWrapper : AnalyzerWrapper
-	{
-	  private readonly Analyzer defaultAnalyzer;
-	  private readonly IDictionary<string, Analyzer> fieldAnalyzers;
+    /// <summary>
+    /// This analyzer is used to facilitate scenarios where different
+    /// fields require different analysis techniques.  Use the Map
+    /// argument in <seealso cref="#PerFieldAnalyzerWrapper(Analyzer, java.util.Map)"/>
+    /// to add non-default analyzers for fields.
+    /// 
+    /// <para>Example usage:
+    /// 
+    /// <pre class="prettyprint">
+    /// {@code
+    /// Map<String,Analyzer> analyzerPerField = new HashMap<>();
+    /// analyzerPerField.put("firstname", new KeywordAnalyzer());
+    /// analyzerPerField.put("lastname", new KeywordAnalyzer());
+    /// 
+    /// PerFieldAnalyzerWrapper aWrapper =
+    ///   new PerFieldAnalyzerWrapper(new StandardAnalyzer(version), analyzerPerField);
+    /// }
+    /// </pre>
+    /// 
+    /// </para>
+    /// <para>In this example, StandardAnalyzer will be used for all fields except "firstname"
+    /// and "lastname", for which KeywordAnalyzer will be used.
+    /// 
+    /// </para>
+    /// <para>A PerFieldAnalyzerWrapper can be used like any other analyzer, for both indexing
+    /// and query parsing.
+    /// </para>
+    /// </summary>
+    public sealed class PerFieldAnalyzerWrapper : AnalyzerWrapper
+    {
+        private readonly Analyzer defaultAnalyzer;
+        private readonly IDictionary<string, Analyzer> fieldAnalyzers;
 
-	  /// <summary>
-	  /// Constructs with default analyzer.
-	  /// </summary>
-	  /// <param name="defaultAnalyzer"> Any fields not specifically
-	  /// defined to use a different analyzer will use the one provided here. </param>
-	  public PerFieldAnalyzerWrapper(Analyzer defaultAnalyzer) : this(defaultAnalyzer, null)
-	  {
-	  }
+        /// <summary>
+        /// Constructs with default analyzer.
+        /// </summary>
+        /// <param name="defaultAnalyzer"> Any fields not specifically
+        /// defined to use a different analyzer will use the one provided here. </param>
+        public PerFieldAnalyzerWrapper(Analyzer defaultAnalyzer)
+            : this(defaultAnalyzer, null)
+        {
+        }
 
-	  /// <summary>
-	  /// Constructs with default analyzer and a map of analyzers to use for 
-	  /// specific fields.
-	  /// </summary>
-	  /// <param name="defaultAnalyzer"> Any fields not specifically
-	  /// defined to use a different analyzer will use the one provided here. </param>
-	  /// <param name="fieldAnalyzers"> a Map (String field name to the Analyzer) to be 
-	  /// used for those fields  </param>
-	  public PerFieldAnalyzerWrapper(Analyzer defaultAnalyzer, IDictionary<string, Analyzer> fieldAnalyzers) : base(PER_FIELD_REUSE_STRATEGY)
-	  {
-		this.defaultAnalyzer = defaultAnalyzer;
-		this.fieldAnalyzers = (fieldAnalyzers != null) ? fieldAnalyzers : System.Linq.Enumerable.Empty<string, Analyzer>();
-	  }
+        /// <summary>
+        /// Constructs with default analyzer and a map of analyzers to use for 
+        /// specific fields.
+        /// </summary>
+        /// <param name="defaultAnalyzer"> Any fields not specifically
+        /// defined to use a different analyzer will use the one provided here. </param>
+        /// <param name="fieldAnalyzers"> a Map (String field name to the Analyzer) to be 
+        /// used for those fields  </param>
+        public PerFieldAnalyzerWrapper(Analyzer defaultAnalyzer, IDictionary<string, Analyzer> fieldAnalyzers)
+            : base(PER_FIELD_REUSE_STRATEGY)
+        {
+            this.defaultAnalyzer = defaultAnalyzer;
+            this.fieldAnalyzers = (fieldAnalyzers != null) ? fieldAnalyzers : System.Linq.Enumerable.Empty<string, Analyzer>();
+        }
 
-	  protected internal override Analyzer getWrappedAnalyzer(string fieldName)
-	  {
-		Analyzer analyzer = fieldAnalyzers[fieldName];
-		return (analyzer != null) ? analyzer : defaultAnalyzer;
-	  }
-
-	  public override string ToString()
-	  {
-		return "PerFieldAnalyzerWrapper(" + fieldAnalyzers + ", default=" + defaultAnalyzer + ")";
-	  }
-	}
+        protected override Analyzer GetWrappedAnalyzer(string fieldName)
+        {
+            Analyzer analyzer = fieldAnalyzers[fieldName];
+            return analyzer ?? defaultAnalyzer;
+        }
 
+        public override string ToString()
+        {
+            return "PerFieldAnalyzerWrapper(" + fieldAnalyzers + ", default=" + defaultAnalyzer + ")";
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/69f29113/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs
index de8b8ba..86f9548 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs
@@ -1,4 +1,7 @@
-namespace org.apache.lucene.analysis.miscellaneous
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Miscellaneous;
+
+namespace org.apache.lucene.analysis.miscellaneous
 {
 
 	/*
@@ -28,7 +31,7 @@
 	public class PrefixAndSuffixAwareTokenFilter : TokenStream
 	{
 
-	  private PrefixAwareTokenFilter suffix;
+	  private readonly PrefixAwareTokenFilter suffix;
 
 	  public PrefixAndSuffixAwareTokenFilter(TokenStream prefix, TokenStream input, TokenStream suffix) : base(suffix)
 	  {
@@ -68,29 +71,24 @@
 
 	  public virtual Token updateInputToken(Token inputToken, Token lastPrefixToken)
 	  {
-		inputToken.setOffset(lastPrefixToken.endOffset() + inputToken.startOffset(), lastPrefixToken.endOffset() + inputToken.endOffset());
+		inputToken.SetOffset(lastPrefixToken.endOffset() + inputToken.startOffset(), lastPrefixToken.endOffset() + inputToken.endOffset());
 		return inputToken;
 	  }
 
 	  public virtual Token updateSuffixToken(Token suffixToken, Token lastInputToken)
 	  {
-		suffixToken.setOffset(lastInputToken.endOffset() + suffixToken.startOffset(), lastInputToken.endOffset() + suffixToken.endOffset());
+		suffixToken.SetOffset(lastInputToken.endOffset() + suffixToken.startOffset(), lastInputToken.endOffset() + suffixToken.endOffset());
 		return suffixToken;
 	  }
 
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
-	  public override bool incrementToken()
+	  public override bool IncrementToken()
 	  {
 		return suffix.incrementToken();
 	  }
 
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
-	  public override void reset()
+	  public override void Reset()
 	  {
-		suffix.reset();
+		suffix.Reset();
 	  }
 
 

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/69f29113/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAwareTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAwareTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAwareTokenFilter.cs
index 7835e7a..13fd361 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAwareTokenFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAwareTokenFilter.cs
@@ -1,246 +1,224 @@
-namespace org.apache.lucene.analysis.miscellaneous
-{
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
 
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
-
-	using FlagsAttribute = org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
-	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-	using PayloadAttribute = org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
-	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-	using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
-	using BytesRef = org.apache.lucene.util.BytesRef;
-
-
-	/// <summary>
-	/// Joins two token streams and leaves the last token of the first stream available
-	/// to be used when updating the token values in the second stream based on that token.
-	/// 
-	/// The default implementation adds last prefix token end offset to the suffix token start and end offsets.
-	/// <p/>
-	/// <b>NOTE:</b> This filter might not behave correctly if used with custom Attributes, i.e. Attributes other than
-	/// the ones located in org.apache.lucene.analysis.tokenattributes. 
-	/// </summary>
-	public class PrefixAwareTokenFilter : TokenStream
-	{
-
-	  private TokenStream prefix;
-	  private TokenStream suffix;
-
-	  private CharTermAttribute termAtt;
-	  private PositionIncrementAttribute posIncrAtt;
-	  private PayloadAttribute payloadAtt;
-	  private OffsetAttribute offsetAtt;
-	  private TypeAttribute typeAtt;
-	  private FlagsAttribute flagsAtt;
-
-	  private CharTermAttribute p_termAtt;
-	  private PositionIncrementAttribute p_posIncrAtt;
-	  private PayloadAttribute p_payloadAtt;
-	  private OffsetAttribute p_offsetAtt;
-	  private TypeAttribute p_typeAtt;
-	  private FlagsAttribute p_flagsAtt;
-
-	  public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) : base(suffix)
-	  {
-		this.suffix = suffix;
-		this.prefix = prefix;
-		prefixExhausted = false;
-
-		termAtt = addAttribute(typeof(CharTermAttribute));
-		posIncrAtt = addAttribute(typeof(PositionIncrementAttribute));
-		payloadAtt = addAttribute(typeof(PayloadAttribute));
-		offsetAtt = addAttribute(typeof(OffsetAttribute));
-		typeAtt = addAttribute(typeof(TypeAttribute));
-		flagsAtt = addAttribute(typeof(FlagsAttribute));
-
-		p_termAtt = prefix.addAttribute(typeof(CharTermAttribute));
-		p_posIncrAtt = prefix.addAttribute(typeof(PositionIncrementAttribute));
-		p_payloadAtt = prefix.addAttribute(typeof(PayloadAttribute));
-		p_offsetAtt = prefix.addAttribute(typeof(OffsetAttribute));
-		p_typeAtt = prefix.addAttribute(typeof(TypeAttribute));
-		p_flagsAtt = prefix.addAttribute(typeof(FlagsAttribute));
-	  }
-
-	  private Token previousPrefixToken = new Token();
-	  private Token reusableToken = new Token();
-
-	  private bool prefixExhausted;
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
-	  public override bool incrementToken()
-	  {
-		if (!prefixExhausted)
-		{
-		  Token nextToken = getNextPrefixInputToken(reusableToken);
-		  if (nextToken == null)
-		  {
-			prefixExhausted = true;
-		  }
-		  else
-		  {
-			previousPrefixToken.reinit(nextToken);
-			// Make it a deep copy
-			BytesRef p = previousPrefixToken.Payload;
-			if (p != null)
-			{
-			  previousPrefixToken.Payload = p.clone();
-			}
-			CurrentToken = nextToken;
-			return true;
-		  }
-		}
-
-		Token nextToken = getNextSuffixInputToken(reusableToken);
-		if (nextToken == null)
-		{
-		  return false;
-		}
-
-		nextToken = updateSuffixToken(nextToken, previousPrefixToken);
-		CurrentToken = nextToken;
-		return true;
-	  }
-
-	  private Token CurrentToken
-	  {
-		  set
-		  {
-			if (value == null)
-			{
-				return;
-			}
-			clearAttributes();
-			termAtt.copyBuffer(value.buffer(), 0, value.length());
-			posIncrAtt.PositionIncrement = value.PositionIncrement;
-			flagsAtt.Flags = value.Flags;
-			offsetAtt.setOffset(value.startOffset(), value.endOffset());
-			typeAtt.Type = value.type();
-			payloadAtt.Payload = value.Payload;
-		  }
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: private org.apache.lucene.analysis.Token getNextPrefixInputToken(org.apache.lucene.analysis.Token token) throws java.io.IOException
-	  private Token getNextPrefixInputToken(Token token)
-	  {
-		if (!prefix.incrementToken())
-		{
-			return null;
-		}
-		token.copyBuffer(p_termAtt.buffer(), 0, p_termAtt.length());
-		token.PositionIncrement = p_posIncrAtt.PositionIncrement;
-		token.Flags = p_flagsAtt.Flags;
-		token.setOffset(p_offsetAtt.startOffset(), p_offsetAtt.endOffset());
-		token.Type = p_typeAtt.type();
-		token.Payload = p_payloadAtt.Payload;
-		return token;
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: private org.apache.lucene.analysis.Token getNextSuffixInputToken(org.apache.lucene.analysis.Token token) throws java.io.IOException
-	  private Token getNextSuffixInputToken(Token token)
-	  {
-		if (!suffix.incrementToken())
-		{
-			return null;
-		}
-		token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
-		token.PositionIncrement = posIncrAtt.PositionIncrement;
-		token.Flags = flagsAtt.Flags;
-		token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
-		token.Type = typeAtt.type();
-		token.Payload = payloadAtt.Payload;
-		return token;
-	  }
-
-	  /// <summary>
-	  /// The default implementation adds last prefix token end offset to the suffix token start and end offsets.
-	  /// </summary>
-	  /// <param name="suffixToken"> a token from the suffix stream </param>
-	  /// <param name="lastPrefixToken"> the last token from the prefix stream </param>
-	  /// <returns> consumer token </returns>
-	  public virtual Token updateSuffixToken(Token suffixToken, Token lastPrefixToken)
-	  {
-		suffixToken.setOffset(lastPrefixToken.endOffset() + suffixToken.startOffset(), lastPrefixToken.endOffset() + suffixToken.endOffset());
-		return suffixToken;
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void end() throws java.io.IOException
-	  public override void end()
-	  {
-		prefix.end();
-		suffix.end();
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void close() throws java.io.IOException
-	  public override void close()
-	  {
-		prefix.close();
-		suffix.close();
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
-	  public override void reset()
-	  {
-		base.reset();
-		if (prefix != null)
-		{
-		  prefixExhausted = false;
-		  prefix.reset();
-		}
-		if (suffix != null)
-		{
-		  suffix.reset();
-		}
-
-
-	  }
-
-	  public virtual TokenStream Prefix
-	  {
-		  get
-		  {
-			return prefix;
-		  }
-		  set
-		  {
-			this.prefix = value;
-		  }
-	  }
-
-
-	  public virtual TokenStream Suffix
-	  {
-		  get
-		  {
-			return suffix;
-		  }
-		  set
-		  {
-			this.suffix = value;
-		  }
-	  }
-
-	}
+namespace Lucene.Net.Analysis.Miscellaneous
+{
 
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+    /// <summary>
+    /// Joins two token streams and leaves the last token of the first stream available
+    /// to be used when updating the token values in the second stream based on that token.
+    /// 
+    /// The default implementation adds last prefix token end offset to the suffix token start and end offsets.
+    /// <p/>
+    /// <b>NOTE:</b> This filter might not behave correctly if used with custom Attributes, i.e. Attributes other than
+    /// the ones located in org.apache.lucene.analysis.tokenattributes. 
+    /// </summary>
+    public class PrefixAwareTokenFilter : TokenStream
+    {
+
+        private TokenStream prefix;
+        private TokenStream suffix;
+
+        private readonly ICharTermAttribute termAtt;
+        private readonly IPositionIncrementAttribute posIncrAtt;
+        private readonly IPayloadAttribute payloadAtt;
+        private readonly IOffsetAttribute offsetAtt;
+        private readonly ITypeAttribute typeAtt;
+        private readonly IFlagsAttribute flagsAtt;
+
+        private readonly ICharTermAttribute p_termAtt;
+        private readonly IPositionIncrementAttribute p_posIncrAtt;
+        private readonly IPayloadAttribute p_payloadAtt;
+        private readonly IOffsetAttribute p_offsetAtt;
+        private readonly ITypeAttribute p_typeAtt;
+        private readonly IFlagsAttribute p_flagsAtt;
+
+        public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix)
+            : base(suffix)
+        {
+            this.suffix = suffix;
+            this.prefix = prefix;
+            prefixExhausted = false;
+
+            termAtt = AddAttribute<ICharTermAttribute>();
+            posIncrAtt = AddAttribute(typeof(PositionIncrementAttribute));
+            payloadAtt = AddAttribute(typeof(PayloadAttribute));
+            offsetAtt = AddAttribute(typeof(OffsetAttribute));
+            typeAtt = AddAttribute(typeof(TypeAttribute));
+            flagsAtt = AddAttribute(typeof(FlagsAttribute));
+
+            p_termAtt = prefix.AddAttribute(typeof(CharTermAttribute));
+            p_posIncrAtt = prefix.AddAttribute(typeof(PositionIncrementAttribute));
+            p_payloadAtt = prefix.AddAttribute(typeof(PayloadAttribute));
+            p_offsetAtt = prefix.AddAttribute(typeof(OffsetAttribute));
+            p_typeAtt = prefix.AddAttribute(typeof(TypeAttribute));
+            p_flagsAtt = prefix.AddAttribute(typeof(FlagsAttribute));
+        }
+
+        private readonly Token previousPrefixToken = new Token();
+        private readonly Token reusableToken = new Token();
+
+        private bool prefixExhausted;
+
+        public override bool IncrementToken()
+        {
+            if (!prefixExhausted)
+            {
+                Token nextToken = getNextPrefixInputToken(reusableToken);
+                if (nextToken == null)
+                {
+                    prefixExhausted = true;
+                }
+                else
+                {
+                    previousPrefixToken.Reinit(nextToken);
+                    // Make it a deep copy
+                    BytesRef p = previousPrefixToken.Payload;
+                    if (p != null)
+                    {
+                        previousPrefixToken.Payload = p.Clone();
+                    }
+                    CurrentToken = nextToken;
+                    return true;
+                }
+            }
+
+            Token nextToken = getNextSuffixInputToken(reusableToken);
+            if (nextToken == null)
+            {
+                return false;
+            }
+
+            nextToken = updateSuffixToken(nextToken, previousPrefixToken);
+            CurrentToken = nextToken;
+            return true;
+        }
+
+        private Token CurrentToken
+        {
+            set
+            {
+                if (value == null)
+                {
+                    return;
+                }
+                ClearAttributes();
+                termAtt.CopyBuffer(value.buffer(), 0, value.length());
+                posIncrAtt.PositionIncrement = value.PositionIncrement;
+                flagsAtt.Flags = value.Flags;
+                offsetAtt.setOffset(value.startOffset(), value.endOffset());
+                typeAtt.Type = value.type();
+                payloadAtt.Payload = value.Payload;
+            }
+        }
+
+        private Token getNextPrefixInputToken(Token token)
+        {
+            if (!prefix.IncrementToken())
+            {
+                return null;
+            }
+            token.copyBuffer(p_termAtt.buffer(), 0, p_termAtt.length());
+            token.PositionIncrement = p_posIncrAtt.PositionIncrement;
+            token.Flags = p_flagsAtt.Flags;
+            token.setOffset(p_offsetAtt.startOffset(), p_offsetAtt.endOffset());
+            token.Type = p_typeAtt.type();
+            token.Payload = p_payloadAtt.Payload;
+            return token;
+        }
+
+        private Token getNextSuffixInputToken(Token token)
+        {
+            if (!suffix.IncrementToken())
+            {
+                return null;
+            }
+            token.CopyBuffer(termAtt.buffer(), 0, termAtt.length());
+            token.PositionIncrement = posIncrAtt.PositionIncrement;
+            token.Flags = flagsAtt.Flags;
+            token.SetOffset(offsetAtt.StartOffset(), offsetAtt.EndOffset());
+            token.Type = typeAtt.Type;
+            token.Payload = payloadAtt.Payload;
+            return token;
+        }
+
+        /// <summary>
+        /// The default implementation adds last prefix token end offset to the suffix token start and end offsets.
+        /// </summary>
+        /// <param name="suffixToken"> a token from the suffix stream </param>
+        /// <param name="lastPrefixToken"> the last token from the prefix stream </param>
+        /// <returns> consumer token </returns>
+        public virtual Token updateSuffixToken(Token suffixToken, Token lastPrefixToken)
+        {
+            suffixToken.setOffset(lastPrefixToken.endOffset() + suffixToken.startOffset(), lastPrefixToken.endOffset() + suffixToken.endOffset());
+            return suffixToken;
+        }
+
+        public override void End()
+        {
+            prefix.End();
+            suffix.End();
+        }
+
+        public override void Dispose()
+        {
+            prefix.Dispose();
+            suffix.Dispose();
+        }
+
+        public override void Reset()
+        {
+            base.Reset();
+            if (prefix != null)
+            {
+                prefixExhausted = false;
+                prefix.Reset();
+            }
+            if (suffix != null)
+            {
+                suffix.Reset();
+            }
+        }
+
+        public virtual TokenStream Prefix
+        {
+            get
+            {
+                return prefix;
+            }
+            set
+            {
+                this.prefix = value;
+            }
+        }
+
+
+        public virtual TokenStream Suffix
+        {
+            get
+            {
+                return suffix;
+            }
+            set
+            {
+                this.suffix = value;
+            }
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/69f29113/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenFilter.cs
index 59b8dcb..85b997e 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenFilter.cs
@@ -1,4 +1,5 @@
-using Lucene.Net.Analysis.Util;
+using Lucene.Net.Analysis.Miscellaneous;
+using Lucene.Net.Analysis.Util;
 
 namespace org.apache.lucene.analysis.ngram
 {
@@ -20,7 +21,7 @@ namespace org.apache.lucene.analysis.ngram
 	 * limitations under the License.
 	 */
 
-	using CodepointCountFilter = org.apache.lucene.analysis.miscellaneous.CodepointCountFilter;
+	using CodepointCountFilter = CodepointCountFilter;
 	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/69f29113/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs
index af4c555..5fe93c3 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs
@@ -1,7 +1,11 @@
 using System;
 using System.Collections.Generic;
+using System.IO;
 using Lucene.Net.Analysis.Core;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
 using org.apache.lucene.analysis.util;
+using Version = System.Version;
 
 namespace Lucene.Net.Analysis.Util
 {
@@ -44,7 +48,7 @@ namespace Lucene.Net.Analysis.Util
 
 	  /// <summary>
 	  /// the luceneVersion arg </summary>
-	  protected internal readonly Lucene.Net.Util.Version? luceneMatchVersion;
+	  protected internal readonly Lucene.Net.Util.Version luceneMatchVersion;
 
         /// <summary>
 	  /// Initialize this factory via a set of key-value pairs.
@@ -75,7 +79,6 @@ namespace Lucene.Net.Analysis.Util
 	  {
 		if (luceneMatchVersion == null)
 		{
-//JAVA TO C# CONVERTER WARNING: The .NET Type.FullName property will not always yield results identical to the Java Class.getName method:
 		  throw new System.ArgumentException("Configuration Error: Factory '" + this.GetType().FullName + "' needs a 'luceneMatchVersion' parameter");
 		}
 	  }
@@ -278,8 +281,6 @@ namespace Lucene.Net.Analysis.Util
 	  /// Returns as <seealso cref="CharArraySet"/> from wordFiles, which
 	  /// can be a comma-separated list of filenames
 	  /// </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: protected final CharArraySet getWordSet(ResourceLoader loader, String wordFiles, boolean ignoreCase) throws java.io.IOException
 	  protected internal CharArraySet GetWordSet(ResourceLoader loader, string wordFiles, bool ignoreCase)
 	  {
 		assureMatchVersion();
@@ -292,8 +293,8 @@ namespace Lucene.Net.Analysis.Util
 		  words = new CharArraySet(luceneMatchVersion, files.Count * 10, ignoreCase);
 		  foreach (string file in files)
 		  {
-			IList<string> wlist = getLines(loader, file.Trim());
-			words.addAll(StopFilter.makeStopSet(luceneMatchVersion, wlist, ignoreCase));
+			var wlist = getLines(loader, file.Trim());
+			words.AddAll(StopFilter.makeStopSet(luceneMatchVersion, wlist, ignoreCase));
 		  }
 		}
 		return words;
@@ -324,7 +325,7 @@ namespace Lucene.Net.Analysis.Util
 		  foreach (string file in files)
 		  {
 			InputStream stream = null;
-			Reader reader = null;
+			TextReader reader = null;
 			try
 			{
 			  stream = loader.openResource(file.Trim());

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/69f29113/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs
index e6d7cac..02d5ac8 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs
@@ -71,7 +71,7 @@ namespace Lucene.Net.Analysis.Util
 	  /// <param name="ignoreCase">
 	  ///          <code>false</code> if and only if the set should be case sensitive
 	  ///          otherwise <code>true</code>. </param>
-	  public CharArraySet(Version matchVersion, int startSize, bool ignoreCase) : this(new CharArrayMap<>(matchVersion, startSize, ignoreCase))
+	  public CharArraySet(Lucene.Net.Util.Version matchVersion, int startSize, bool ignoreCase) : this(new CharArrayMap<>(matchVersion, startSize, ignoreCase))
 	  {
 	  }
 

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/69f29113/src/Lucene.Net.Analysis.Common/Analysis/Util/FilteringTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/FilteringTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/FilteringTokenFilter.cs
index d5eb9fd..a52cc83 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/FilteringTokenFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/FilteringTokenFilter.cs
@@ -44,7 +44,7 @@ namespace Lucene.Net.Analysis.Util
         }
 
         protected internal readonly Version version;
-        private readonly PositionIncrementAttribute posIncrAtt = addAttribute(typeof(PositionIncrementAttribute));
+        private readonly IPositionIncrementAttribute posIncrAtt;
         private bool enablePositionIncrements; // no init needed, as ctor enforces setting value!
         private bool first = true;
         private int skippedPositions;
@@ -56,9 +56,10 @@ namespace Lucene.Net.Analysis.Util
         /// <param name="input">                    the input to consume </param>
         /// @deprecated enablePositionIncrements=false is not supported anymore as of Lucene 4.4 
         [Obsolete("enablePositionIncrements=false is not supported anymore as of Lucene 4.4")]
-        public FilteringTokenFilter(Version version, bool enablePositionIncrements, TokenStream input)
+        public FilteringTokenFilter(Lucene.Net.Util.Version version, bool enablePositionIncrements, TokenStream input)
             : this(version, input)
         {
+            posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
             CheckPositionIncrement(version, enablePositionIncrements);
             this.enablePositionIncrements = enablePositionIncrements;
         }


Mime
View raw message