lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From paulir...@apache.org
Subject [31/53] [abbrv] charfilter namespace
Date Thu, 07 Nov 2013 13:53:46 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d6717ed8/src/contrib/Analyzers/Miscellaneous/PatternAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Miscellaneous/PatternAnalyzer.cs b/src/contrib/Analyzers/Miscellaneous/PatternAnalyzer.cs
index 8a6caee..47c971b 100644
--- a/src/contrib/Analyzers/Miscellaneous/PatternAnalyzer.cs
+++ b/src/contrib/Analyzers/Miscellaneous/PatternAnalyzer.cs
@@ -21,8 +21,10 @@
 
 using System;
 using System.Collections.Generic;
+using System.Globalization;
 using System.IO;
 using System.Text.RegularExpressions;
+using Lucene.Net.Analysis.Core;
 using Lucene.Net.Analysis.Tokenattributes;
 using Lucene.Net.Analysis.Util;
 using Version = Lucene.Net.Util.Version;
@@ -68,49 +70,49 @@ namespace Lucene.Net.Analysis.Miscellaneous
         public static readonly Regex WHITESPACE_PATTERN = new Regex("\\s+", RegexOptions.Compiled);
 
         private static readonly CharArraySet EXTENDED_ENGLISH_STOP_WORDS =
-          CharArraySet.UnmodifiableSet(new CharArraySet((IEnumerable<string>)new[]{
-      "a", "about", "above", "across", "adj", "after", "afterwards",
-      "again", "against", "albeit", "all", "almost", "alone", "along",
-      "already", "also", "although", "always", "among", "amongst", "an",
-      "and", "another", "any", "anyhow", "anyone", "anything",
-      "anywhere", "are", "around", "as", "at", "be", "became", "because",
-      "become", "becomes", "becoming", "been", "before", "beforehand",
-      "behind", "being", "below", "beside", "besides", "between",
-      "beyond", "both", "but", "by", "can", "cannot", "co", "could",
-      "down", "during", "each", "eg", "either", "else", "elsewhere",
-      "enough", "etc", "even", "ever", "every", "everyone", "everything",
-      "everywhere", "except", "few", "first", "for", "former",
-      "formerly", "from", "further", "had", "has", "have", "he", "hence",
-      "her", "here", "hereafter", "hereby", "herein", "hereupon", "hers",
-      "herself", "him", "himself", "his", "how", "however", "i", "ie", "if",
-      "in", "inc", "indeed", "into", "is", "it", "its", "itself", "last",
-      "latter", "latterly", "least", "less", "ltd", "many", "may", "me",
-      "meanwhile", "might", "more", "moreover", "most", "mostly", "much",
-      "must", "my", "myself", "namely", "neither", "never",
-      "nevertheless", "next", "no", "nobody", "none", "noone", "nor",
-      "not", "nothing", "now", "nowhere", "of", "off", "often", "on",
-      "once one", "only", "onto", "or", "other", "others", "otherwise",
-      "our", "ours", "ourselves", "out", "over", "own", "per", "perhaps",
-      "rather", "s", "same", "seem", "seemed", "seeming", "seems",
-      "several", "she", "should", "since", "so", "some", "somehow",
-      "someone", "something", "sometime", "sometimes", "somewhere",
-      "still", "such", "t", "than", "that", "the", "their", "them",
-      "themselves", "then", "thence", "there", "thereafter", "thereby",
-      "therefor", "therein", "thereupon", "these", "they", "this",
-      "those", "though", "through", "throughout", "thru", "thus", "to",
-      "together", "too", "toward", "towards", "under", "until", "up",
-      "upon", "us", "very", "via", "was", "we", "well", "were", "what",
-      "whatever", "whatsoever", "when", "whence", "whenever",
-      "whensoever", "where", "whereafter", "whereas", "whereat",
-      "whereby", "wherefrom", "wherein", "whereinto", "whereof",
-      "whereon", "whereto", "whereunto", "whereupon", "wherever",
-      "wherewith", "whether", "which", "whichever", "whichsoever",
-      "while", "whilst", "whither", "who", "whoever", "whole", "whom",
-      "whomever", "whomsoever", "whose", "whosoever", "why", "will",
-      "with", "within", "without", "would", "xsubj", "xcal", "xauthor",
-      "xother ", "xnote", "yet", "you", "your", "yours", "yourself",
-      "yourselves"
-    }, true));
+          CharArraySet.UnmodifiableSet(new CharArraySet(Version.LUCENE_CURRENT, new[]{
+              "a", "about", "above", "across", "adj", "after", "afterwards",
+              "again", "against", "albeit", "all", "almost", "alone", "along",
+              "already", "also", "although", "always", "among", "amongst", "an",
+              "and", "another", "any", "anyhow", "anyone", "anything",
+              "anywhere", "are", "around", "as", "at", "be", "became", "because",
+              "become", "becomes", "becoming", "been", "before", "beforehand",
+              "behind", "being", "below", "beside", "besides", "between",
+              "beyond", "both", "but", "by", "can", "cannot", "co", "could",
+              "down", "during", "each", "eg", "either", "else", "elsewhere",
+              "enough", "etc", "even", "ever", "every", "everyone", "everything",
+              "everywhere", "except", "few", "first", "for", "former",
+              "formerly", "from", "further", "had", "has", "have", "he", "hence",
+              "her", "here", "hereafter", "hereby", "herein", "hereupon", "hers",
+              "herself", "him", "himself", "his", "how", "however", "i", "ie", "if",
+              "in", "inc", "indeed", "into", "is", "it", "its", "itself", "last",
+              "latter", "latterly", "least", "less", "ltd", "many", "may", "me",
+              "meanwhile", "might", "more", "moreover", "most", "mostly", "much",
+              "must", "my", "myself", "namely", "neither", "never",
+              "nevertheless", "next", "no", "nobody", "none", "noone", "nor",
+              "not", "nothing", "now", "nowhere", "of", "off", "often", "on",
+              "once one", "only", "onto", "or", "other", "others", "otherwise",
+              "our", "ours", "ourselves", "out", "over", "own", "per", "perhaps",
+              "rather", "s", "same", "seem", "seemed", "seeming", "seems",
+              "several", "she", "should", "since", "so", "some", "somehow",
+              "someone", "something", "sometime", "sometimes", "somewhere",
+              "still", "such", "t", "than", "that", "the", "their", "them",
+              "themselves", "then", "thence", "there", "thereafter", "thereby",
+              "therefor", "therein", "thereupon", "these", "they", "this",
+              "those", "though", "through", "throughout", "thru", "thus", "to",
+              "together", "too", "toward", "towards", "under", "until", "up",
+              "upon", "us", "very", "via", "was", "we", "well", "were", "what",
+              "whatever", "whatsoever", "when", "whence", "whenever",
+              "whensoever", "where", "whereafter", "whereas", "whereat",
+              "whereby", "wherefrom", "wherein", "whereinto", "whereof",
+              "whereon", "whereto", "whereunto", "whereupon", "wherever",
+              "wherewith", "whether", "which", "whichever", "whichsoever",
+              "while", "whilst", "whither", "who", "whoever", "whole", "whom",
+              "whomever", "whomsoever", "whose", "whosoever", "why", "will",
+              "with", "within", "without", "would", "xsubj", "xcal", "xauthor",
+              "xother ", "xnote", "yet", "you", "your", "yours", "yourself",
+              "yourselves"
+            }, true));
 
         /*
          * A lower-casing word analyzer with English stop words (can be shared
@@ -180,30 +182,30 @@ namespace Lucene.Net.Analysis.Miscellaneous
          *            the string to tokenize
          * @return a new token stream
          */
-        public TokenStream TokenStream(String fieldName, String text)
-        {
-            // Ideally the Analyzer superclass should have a method with the same signature,

-            // with a default impl that simply delegates to the StringReader flavour. 
-            if (text == null)
-                throw new ArgumentException("text must not be null");
-
-            TokenStream stream;
-            if (Regex == NON_WORD_PATTERN)
-            { // fast path
-                stream = new FastStringTokenizer(text, true, toLowerCase, stopWords);
-            }
-            else if (Regex == WHITESPACE_PATTERN)
-            { // fast path
-                stream = new FastStringTokenizer(text, false, toLowerCase, stopWords);
-            }
-            else
-            {
-                stream = new RegexTokenizer(text, Regex, toLowerCase);
-                if (stopWords != null) stream = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
stream, stopWords);
-            }
-
-            return stream;
-        }
+        //public TokenStream TokenStream(String fieldName, String text)
+        //{
+        //    // Ideally the Analyzer superclass should have a method with the same signature,

+        //    // with a default impl that simply delegates to the StringReader flavour. 
+        //    if (text == null)
+        //        throw new ArgumentException("text must not be null");
+
+        //    TokenStream stream;
+        //    if (Regex == NON_WORD_PATTERN)
+        //    { // fast path
+        //        stream = new FastStringTokenizer(text, true, toLowerCase, stopWords);
+        //    }
+        //    else if (Regex == WHITESPACE_PATTERN)
+        //    { // fast path
+        //        stream = new FastStringTokenizer(text, false, toLowerCase, stopWords);
+        //    }
+        //    else
+        //    {
+        //        stream = new RegexTokenizer(text, Regex, toLowerCase);
+        //        if (stopWords != null) stream = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
stream, stopWords);
+        //    }
+
+        //    return stream;
+        //}
 
         /*
          * Creates a token stream that tokenizes all the text in the given Reader;
@@ -216,24 +218,51 @@ namespace Lucene.Net.Analysis.Miscellaneous
          *            the reader delivering the text
          * @return a new token stream
          */
-        public override TokenStream TokenStream(String fieldName, TextReader reader)
+        //public override TokenStream TokenStream(String fieldName, TextReader reader)
+        //{
+        //    if (reader is FastStringReader)
+        //    { // fast path
+        //        return TokenStream(fieldName, ((FastStringReader)reader).GetString());
+        //    }
+
+        //    try
+        //    {
+        //        String text = ToString(reader);
+        //        return TokenStream(fieldName, text);
+        //    }
+        //    catch (IOException e)
+        //    {
+        //        throw new Exception("Wrapped Exception", e);
+        //    }
+        //}
+
+
+        public TokenStreamComponents CreateComponents(string fieldName, TextReader reader,
string text)
         {
-            if (reader is FastStringReader)
-            { // fast path
-                return TokenStream(fieldName, ((FastStringReader)reader).GetString());
-            }
+            if (reader == null)
+                reader = new FastStringReader(text);
 
-            try
-            {
-                String text = ToString(reader);
-                return TokenStream(fieldName, text);
+            if (Regex == NON_WORD_PATTERN)
+            { // fast path
+                return new TokenStreamComponents(new FastStringTokenizer(reader, true, toLowerCase,
stopWords));
             }
-            catch (IOException e)
-            {
-                throw new Exception("Wrapped Exception", e);
+            else if (Regex == WHITESPACE_PATTERN)
+            { // fast path
+                return new TokenStreamComponents(new FastStringTokenizer(reader, false, toLowerCase,
stopWords));
             }
+
+            Tokenizer tokenizer = new RegexTokenizer(reader, Regex, toLowerCase);
+            TokenStream result = (stopWords != null) ? new StopFilter(matchVersion, tokenizer,
stopWords) : tokenizer;
+            return new TokenStreamComponents(tokenizer, result);
         }
 
+
+        public override Analyzer.TokenStreamComponents CreateComponents(string fieldName,
TextReader reader)
+        {
+            return CreateComponents(fieldName, reader, null);
+        }
+
+
         /*
          * Indicates whether some other object is "equal to" this one.
          * 
@@ -249,7 +278,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
 
             if (other is PatternAnalyzer)
             {
-                PatternAnalyzer p2 = (PatternAnalyzer)other;
+                var p2 = (PatternAnalyzer)other;
                 return
                   toLowerCase == p2.toLowerCase &&
                   EqRegex(Regex, p2.Regex) &&
@@ -296,6 +325,11 @@ namespace Lucene.Net.Analysis.Miscellaneous
          */
         private static String ToString(TextReader input)
         {
+            if (input is FastStringReader) // fast path
+            {
+                return ((FastStringReader) input).GetString();
+            }
+
             try
             {
                 int len = 256;
@@ -304,7 +338,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
 
                 len = 0;
                 int n;
-                while ((n = input.Read(buffer, 0, buffer.Length)) != 0)
+                while ((n = input.Read(buffer, 0, buffer.Length)) >= 0)
                 {
                     if (len + n > output.Length)
                     { // grow capacity
@@ -337,23 +371,23 @@ namespace Lucene.Net.Analysis.Miscellaneous
          * The work horse; performance isn't fantastic, but it's not nearly as bad
          * as one might think - kudos to the Sun regex developers.
          */
-        private sealed class RegexTokenizer : TokenStream
+        private sealed class RegexTokenizer : Tokenizer
         {
-
-            private readonly String str;
+            private readonly Regex regex;
+            private String str;
             private readonly bool toLowerCase;
             private Match matcher;
             private int pos = 0;
-            private static readonly System.Globalization.CultureInfo locale = System.Globalization.CultureInfo.CurrentCulture;
-            private ITermAttribute termAtt;
+            private static readonly CultureInfo locale = CultureInfo.CurrentCulture;
+            private ICharTermAttribute termAtt;
             private IOffsetAttribute offsetAtt;
 
-            public RegexTokenizer(String str, Regex regex, bool toLowerCase)
+            public RegexTokenizer(TextReader input, Regex regex, bool toLowerCase)
+                :base(input) 
             {
-                this.str = str;
-                this.matcher = regex.Match(str);
+                this.matcher = regex.Match("");
                 this.toLowerCase = toLowerCase;
-                this.termAtt = AddAttribute<ITermAttribute>();
+                this.termAtt = AddAttribute<ICharTermAttribute>();
                 this.offsetAtt = AddAttribute<IOffsetAttribute>();
             }
 
@@ -380,23 +414,31 @@ namespace Lucene.Net.Analysis.Miscellaneous
 
                     if (start != end)
                     { // non-empty match (header/trailer)
-                        String text = str.Substring(start, end - start);
+                        var text = str.Substring(start, end);
                         if (toLowerCase) text = text.ToLower(locale);
-                        termAtt.SetTermBuffer(text);
-                        offsetAtt.SetOffset(start, end);
+                        termAtt.SetEmpty().Append(text);
+                        offsetAtt.SetOffset(CorrectOffset(start), CorrectOffset(end));
                         return true;
                     }
-                    return false;
+                    if (!isMatch) return false;
                 }
             }
 
             public override sealed void End()
             {
                 // set final offset
-                int finalOffset = str.Length;
+                int finalOffset = CorrectOffset(str.Length);
                 this.offsetAtt.SetOffset(finalOffset, finalOffset);
             }
 
+            public override void Reset()
+            {
+                base.Reset();
+                this.str = PatternAnalyzer.ToString(input);
+                this.matcher = regex.Match(this.str);
+                this.pos = 0;
+            }
+
             protected override void Dispose(bool disposing)
             {
                 // Do Nothing
@@ -411,25 +453,25 @@ namespace Lucene.Net.Analysis.Miscellaneous
          * Special-case class for best performance in common cases; this class is
          * otherwise unnecessary.
          */
-        private sealed class FastStringTokenizer : TokenStream
+        private sealed class FastStringTokenizer : Tokenizer
         {
 
             private readonly String str;
             private int pos;
             private readonly bool isLetter;
             private readonly bool toLowerCase;
-            private readonly ISet<string> stopWords;
-            private static readonly System.Globalization.CultureInfo locale = System.Globalization.CultureInfo.CurrentCulture;
-            private ITermAttribute termAtt;
+            private readonly CharArraySet stopWords;
+            private static readonly CultureInfo locale = CultureInfo.CurrentCulture;
+            private ICharTermAttribute termAtt;
             private IOffsetAttribute offsetAtt;
 
-            public FastStringTokenizer(String str, bool isLetter, bool toLowerCase, ISet<string>
stopWords)
+            public FastStringTokenizer(TextReader input, bool isLetter, bool toLowerCase,
CharArraySet stopWords)
+                :base(input)
             {
-                this.str = str;
                 this.isLetter = isLetter;
                 this.toLowerCase = toLowerCase;
                 this.stopWords = stopWords;
-                this.termAtt = AddAttribute<ITermAttribute>();
+                this.termAtt = AddAttribute<ICharTermAttribute>();
                 this.offsetAtt = AddAttribute<IOffsetAttribute>();
             }
 

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d6717ed8/src/contrib/Analyzers/Miscellaneous/PrefixAwareTokenStream.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Miscellaneous/PrefixAwareTokenStream.cs b/src/contrib/Analyzers/Miscellaneous/PrefixAwareTokenStream.cs
index 45e1d19..4dabfa3 100644
--- a/src/contrib/Analyzers/Miscellaneous/PrefixAwareTokenStream.cs
+++ b/src/contrib/Analyzers/Miscellaneous/PrefixAwareTokenStream.cs
@@ -16,168 +16,206 @@
  */
 
 using Lucene.Net.Analysis.Tokenattributes;
-using Lucene.Net.Index;
+using Lucene.Net.Util;
 
 namespace Lucene.Net.Analysis.Miscellaneous
 {
     /// <summary>
     /// Joins two token streams and leaves the last token of the first stream available
     /// to be used when updating the token values in the second stream based on that token.
-    /// 
+    ///
     /// The default implementation adds last prefix token end offset to the suffix token
start and end offsets.
     /// <p/>
     /// <b>NOTE:</b> This filter might not behave correctly if used with custom
Attributes, i.e. Attributes other than
-    /// the ones located in Lucene.Net.Analysis.TokenAttributes. 
+    /// the ones located in org.apache.lucene.analysis.tokenattributes. 
     /// </summary>
     public class PrefixAwareTokenFilter : TokenStream
     {
-        private readonly IFlagsAttribute _flagsAtt;
-        private readonly IOffsetAttribute _offsetAtt;
-        private readonly IFlagsAttribute _pFlagsAtt;
-
-        private readonly IOffsetAttribute _pOffsetAtt;
-        private readonly IPayloadAttribute _pPayloadAtt;
-        private readonly IPositionIncrementAttribute _pPosIncrAtt;
-        private readonly ITermAttribute _pTermAtt;
-        private readonly ITypeAttribute _pTypeAtt;
-        private readonly IPayloadAttribute _payloadAtt;
-        private readonly IPositionIncrementAttribute _posIncrAtt;
-
-        private readonly Token _previousPrefixToken = new Token();
-        private readonly Token _reusableToken = new Token();
-        private readonly ITermAttribute _termAtt;
-        private readonly ITypeAttribute _typeAtt;
-
-        private bool _prefixExhausted;
-
-        public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) : base(suffix)
+        private TokenStream prefix;
+        private TokenStream suffix;
+
+        private ICharTermAttribute termAtt;
+        private IPositionIncrementAttribute posIncrAtt;
+        private IPayloadAttribute payloadAtt;
+        private IOffsetAttribute offsetAtt;
+        private ITypeAttribute typeAtt;
+        private IFlagsAttribute flagsAtt;
+
+        private ICharTermAttribute p_termAtt;
+        private IPositionIncrementAttribute p_posIncrAtt;
+        private IPayloadAttribute p_payloadAtt;
+        private IOffsetAttribute p_offsetAtt;
+        private ITypeAttribute p_typeAtt;
+        private IFlagsAttribute p_flagsAtt;
+
+        public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix)
+            : base(suffix)
         {
-            Suffix = suffix;
-            Prefix = prefix;
-            _prefixExhausted = false;
-
-            // ReSharper disable DoNotCallOverridableMethodsInConstructor
-            _termAtt = AddAttribute<ITermAttribute>();
-            _posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
-            _payloadAtt = AddAttribute<IPayloadAttribute>();
-            _offsetAtt = AddAttribute<IOffsetAttribute>();
-            _typeAtt = AddAttribute<ITypeAttribute>();
-            _flagsAtt = AddAttribute<IFlagsAttribute>();
-            // ReSharper restore DoNotCallOverridableMethodsInConstructor
-
-            _pTermAtt = prefix.AddAttribute<ITermAttribute>();
-            _pPosIncrAtt = prefix.AddAttribute<IPositionIncrementAttribute>();
-            _pPayloadAtt = prefix.AddAttribute<IPayloadAttribute>();
-            _pOffsetAtt = prefix.AddAttribute<IOffsetAttribute>();
-            _pTypeAtt = prefix.AddAttribute<ITypeAttribute>();
-            _pFlagsAtt = prefix.AddAttribute<IFlagsAttribute>();
+            this.suffix = suffix;
+            this.prefix = prefix;
+            prefixExhausted = false;
+
+            termAtt = AddAttribute<ICharTermAttribute>();
+            posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
+            payloadAtt = AddAttribute<IPayloadAttribute>();
+            offsetAtt = AddAttribute<IOffsetAttribute>();
+            typeAtt = AddAttribute<ITypeAttribute>();
+            flagsAtt = AddAttribute<IFlagsAttribute>();
+
+            p_termAtt = prefix.AddAttribute<ICharTermAttribute>();
+            p_posIncrAtt = prefix.AddAttribute<IPositionIncrementAttribute>();
+            p_payloadAtt = prefix.AddAttribute<IPayloadAttribute>();
+            p_offsetAtt = prefix.AddAttribute<IOffsetAttribute>();
+            p_typeAtt = prefix.AddAttribute<ITypeAttribute>();
+            p_flagsAtt = prefix.AddAttribute<IFlagsAttribute>();
         }
 
-        public TokenStream Prefix { get; set; }
+        private Token previousPrefixToken = new Token();
+        private Token reusableToken = new Token();
 
-        public TokenStream Suffix { get; set; }
+        private bool prefixExhausted;
 
-        public override sealed bool IncrementToken()
+        public override bool IncrementToken()
         {
-            if (!_prefixExhausted)
+            Token nextToken;
+            if (!prefixExhausted)
             {
-                Token nextToken = GetNextPrefixInputToken(_reusableToken);
+                nextToken = GetNextPrefixInputToken(reusableToken);
                 if (nextToken == null)
                 {
-                    _prefixExhausted = true;
+                    prefixExhausted = true;
                 }
                 else
                 {
-                    _previousPrefixToken.Reinit(nextToken);
+                    previousPrefixToken.Reinit(nextToken);
                     // Make it a deep copy
-                    Payload p = _previousPrefixToken.Payload;
+                    var p = previousPrefixToken.Payload;
                     if (p != null)
                     {
-                        _previousPrefixToken.Payload = (Payload) p.Clone();
+                        previousPrefixToken.Payload = (BytesRef)p.Clone();
                     }
                     SetCurrentToken(nextToken);
                     return true;
                 }
             }
 
-            Token nextSuffixToken = GetNextSuffixInputToken(_reusableToken);
-            if (nextSuffixToken == null)
+            nextToken = GetNextSuffixInputToken(reusableToken);
+            if (nextToken == null)
             {
                 return false;
             }
 
-            nextSuffixToken = UpdateSuffixToken(nextSuffixToken, _previousPrefixToken);
-            SetCurrentToken(nextSuffixToken);
+            nextToken = UpdateSuffixToken(nextToken, previousPrefixToken);
+            SetCurrentToken(nextToken);
             return true;
         }
 
         private void SetCurrentToken(Token token)
         {
             if (token == null) return;
+
             ClearAttributes();
-            _termAtt.SetTermBuffer(token.TermBuffer(), 0, token.TermLength());
-            _posIncrAtt.PositionIncrement = token.PositionIncrement;
-            _flagsAtt.Flags =token.Flags;
-            _offsetAtt.SetOffset(token.StartOffset, token.EndOffset);
-            _typeAtt.Type = token.Type;
-            _payloadAtt.Payload = token.Payload;
+            termAtt.CopyBuffer(token.Buffer, 0, token.Length);
+            posIncrAtt.PositionIncrement = token.PositionIncrement;
+            flagsAtt.Flags = token.Flags;
+            offsetAtt.SetOffset(token.StartOffset, token.EndOffset);
+            typeAtt.Type = token.Type;
+            payloadAtt.Payload = token.Payload;
         }
 
         private Token GetNextPrefixInputToken(Token token)
         {
-            if (!Prefix.IncrementToken()) return null;
-            token.SetTermBuffer(_pTermAtt.TermBuffer(), 0, _pTermAtt.TermLength());
-            token.PositionIncrement = _pPosIncrAtt.PositionIncrement;
-            token.Flags = _pFlagsAtt.Flags;
-            token.SetOffset(_pOffsetAtt.StartOffset, _pOffsetAtt.EndOffset);
-            token.Type = _pTypeAtt.Type;
-            token.Payload = _pPayloadAtt.Payload;
+            if (!prefix.IncrementToken()) return null;
+
+            token.CopyBuffer(p_termAtt.Buffer, 0, p_termAtt.Length);
+            token.PositionIncrement = p_posIncrAtt.PositionIncrement;
+            token.Flags = p_flagsAtt.Flags;
+            token.SetOffset(p_offsetAtt.StartOffset, p_offsetAtt.EndOffset);
+            token.Type = p_typeAtt.Type;
+            token.Payload = p_payloadAtt.Payload;
             return token;
         }
 
         private Token GetNextSuffixInputToken(Token token)
         {
-            if (!Suffix.IncrementToken()) return null;
-            token.SetTermBuffer(_termAtt.TermBuffer(), 0, _termAtt.TermLength());
-            token.PositionIncrement = _posIncrAtt.PositionIncrement;
-            token.Flags = _flagsAtt.Flags;
-            token.SetOffset(_offsetAtt.StartOffset, _offsetAtt.EndOffset);
-            token.Type = _typeAtt.Type;
-            token.Payload = _payloadAtt.Payload;
+            if (!suffix.IncrementToken()) return null;
+
+            token.CopyBuffer(termAtt.Buffer, 0, termAtt.Length);
+            token.PositionIncrement = posIncrAtt.PositionIncrement;
+            token.Flags = flagsAtt.Flags;
+            token.SetOffset(offsetAtt.StartOffset, offsetAtt.EndOffset);
+            token.Type = typeAtt.Type;
+            token.Payload = payloadAtt.Payload;
             return token;
         }
 
         /// <summary>
-        /// The default implementation adds last prefix token end offset to the suffix token
start and end offsets.
+        /// The default implementation adds last prefix token end offset 
+        /// to the suffix token start and end offsets.
         /// </summary>
-        /// <param name="suffixToken">a token from the suffix stream</param>
-        /// <param name="lastPrefixToken">the last token from the prefix stream</param>
-        /// <returns>consumer token</returns>
+        /// <param name="suffixToken">A token from the suffix stream.</param>
+        /// <param name="lastPrefixToken">The last token from the prefix stream.</param>
+        /// <returns>Consumer token.</returns>
         public virtual Token UpdateSuffixToken(Token suffixToken, Token lastPrefixToken)
         {
-            suffixToken.StartOffset = lastPrefixToken.EndOffset + suffixToken.StartOffset;
-            suffixToken.EndOffset = lastPrefixToken.EndOffset + suffixToken.EndOffset;
+            suffixToken.SetOffset(lastPrefixToken.EndOffset + suffixToken.StartOffset,
+                                  lastPrefixToken.EndOffset + suffixToken.EndOffset);
             return suffixToken;
         }
 
+
+        public override void End()
+        {
+            prefix.End();
+            suffix.End();
+        }
+
+
+        // was public override void Dispose
+        // changed to follow standard .NET dispose pattern
         protected override void Dispose(bool disposing)
         {
-            Prefix.Dispose();
-            Suffix.Dispose();
+            prefix.Dispose();
+            suffix.Dispose();
         }
 
         public override void Reset()
         {
             base.Reset();
+            if (prefix != null)
+            {
+                prefixExhausted = false;
+                prefix.Reset();
+            }
+            if (suffix != null)
+            {
+                suffix.Reset();
+            }
+        }
 
-            if (Prefix != null)
+
+        public TokenStream Prefix
+        {
+            get
             {
-                _prefixExhausted = false;
-                Prefix.Reset();
+                return prefix;
             }
+            set
+            {
+                prefix = value;
+            }
+        }
 
-            if (Suffix != null)
-                Suffix.Reset();
+        public TokenStream Suffix
+        {
+            get
+            {
+                return suffix;
+            }
+            set
+            {
+                suffix = value;
+            }
         }
     }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d6717ed8/src/contrib/Analyzers/Miscellaneous/SingleTokenTokenStream.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Miscellaneous/SingleTokenTokenStream.cs b/src/contrib/Analyzers/Miscellaneous/SingleTokenTokenStream.cs
index b24c0f3..35b2e71 100644
--- a/src/contrib/Analyzers/Miscellaneous/SingleTokenTokenStream.cs
+++ b/src/contrib/Analyzers/Miscellaneous/SingleTokenTokenStream.cs
@@ -38,7 +38,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
             Debug.Assert(token != null, "Token was null!");
             _singleToken = (Token) token.Clone();
 
-            _tokenAtt = (Attribute)AddAttribute<ITermAttribute>();
+            _tokenAtt = (Attribute)AddAttribute<ICharTermAttribute>();
 
             Debug.Assert(_tokenAtt is Token);
         }

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d6717ed8/src/contrib/Analyzers/Support/StreamReaderExtensions.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Support/StreamReaderExtensions.cs b/src/contrib/Analyzers/Support/StreamReaderExtensions.cs
new file mode 100644
index 0000000..f215e45
--- /dev/null
+++ b/src/contrib/Analyzers/Support/StreamReaderExtensions.cs
@@ -0,0 +1,13 @@
+using System.IO;
+
+namespace Lucene.Net.Analysis.Support
+{
+    public static class StreamReaderExtensions
+    {
+        public static void Reset(this StreamReader sr)
+        {
+            sr.BaseStream.Position = 0;
+            sr.DiscardBufferedData();
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d6717ed8/src/contrib/Analyzers/Util/CharFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Util/CharFilterFactory.cs b/src/contrib/Analyzers/Util/CharFilterFactory.cs
new file mode 100644
index 0000000..89688e2
--- /dev/null
+++ b/src/contrib/Analyzers/Util/CharFilterFactory.cs
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.IO;
+
+namespace Lucene.Net.Analysis.Util
+{
+    /// <summary>
+    /// Abstract parent class for analysis factories that create
+    /// {@link CharFilter} instances.
+    /// </summary>
+    public abstract class CharFilterFactory : AbstractAnalysisFactory
+    {
+        private static readonly AnalysisSPILoader<CharFilterFactory> Loader =
+            new AnalysisSPILoader<CharFilterFactory>(typeof (CharFilterFactory));
+
+        /// <summary>
+        /// Looks up a CharFilter by name from context classpath.
+        /// </summary>
+        /// <param name="name"></param>
+        /// <param name="args"></param>
+        /// <returns>Returns an instance of the looked up CharFilter.</returns>
+        public static CharFilterFactory ForName(string name, IDictionary<string, string>
args)
+        {
+            return Loader.NewInstance(name, args);
+        }
+
+        /// <summary>
+        /// Looks up a CharFilter class by name from context classpath.
+        /// </summary>
+        /// <param name="name"></param>
+        /// <returns>Returns the type of the looked up CharFilter.</returns>
+        public static Type LookupType(string name)
+        {
+            return Loader.LookupClass(name);
+        }
+
+        /// <summary>
+        /// Returns a list of all available CharFilter names.
+        /// </summary>
+        /// <returns>Returns a list of all available CharFilter names.</returns>
+        public static ICollection<string> AvailableCharFilters()
+        {
+            return Loader.AvailableServices;
+        }
+
+        /// <summary>
+        /// Reloads the factory list from the given {@link ClassLoader}.
+        /// Changes to the factories are visible after the method ends, all
+        /// iterators ({@link #availableCharFilters()},...) stay consistent. 
+        /// 
+        /// <p><b>NOTE:</b> Only new factories are added, existing ones
are
+        /// never removed or replaced.
+        /// 
+        /// <p><em>This method is expensive and should only be called for discovery
+        /// of new factories on the given classpath/classloader!</em></p></p>
+        /// </summary>
+        public static void ReloadCharFilters()
+        {
+            Loader.Reload();
+        }
+
+        /// <summary>
+        /// Initialize this factory via a set of key-value pairs.
+        /// </summary>
+        /// <param name="args"></param>
+        protected CharFilterFactory(IDictionary<string, string> args)
+            : base(args)
+        {
+            
+        }
+
+        /// <summary>
+        /// Wraps the given TextReader with a CharFilter.
+        /// </summary>
+        /// <param name="input"></param>
+        /// <returns></returns>
+        public abstract StreamReader Create(StreamReader input);
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d6717ed8/src/contrib/Analyzers/Util/RollingCharBuffer.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Util/RollingCharBuffer.cs b/src/contrib/Analyzers/Util/RollingCharBuffer.cs
new file mode 100644
index 0000000..caa4355
--- /dev/null
+++ b/src/contrib/Analyzers/Util/RollingCharBuffer.cs
@@ -0,0 +1,195 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.IO;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis.Util
+{
+    /// <summary>
+    /// Acts like a forever growing char[] as you read
+    /// characters into it from the provided reader, but
+    /// internally it uses a circular buffer to only hold the
+    /// characters that haven't been freed yet.  This is like a
+    /// PushbackReader, except you don't have to specify
+    /// up-front the max size of the buffer, but you do have to
+    /// periodically call {@link #freeBefore}. 
+    /// </summary>
+    public sealed class RollingCharBuffer
+    {
+        private TextReader reader;
+
+        private char[] buffer = new char[512];
+
+        // Next array index to write to in buffer:
+        private int nextWrite;
+
+        // Next absolute position to read from reader:
+        private int nextPos;
+
+        // How many valid chars (wrapped) are in the buffer:
+        private int count;
+
+        // True if we hit EOF
+        private bool end;
+
+        /// <summary>
+        /// Clear array and switch to new reader.
+        /// </summary>
+        /// <param name="reader"></param>
+        public void Reset(TextReader reader)
+        {
+            this.reader = reader;
+            nextPos = 0;
+            nextWrite = 0;
+            count = 0;
+            end = false;
+        }
+
+        /// <summary>
+        /// Absolute position read. NOTE: pos must not jump
+        /// ahead by more than 1! I.e., it's OK to read arbitrarily
+        /// far back (just not prior to the last {@link
+        /// #freeBefore}), but NOT ok to read arbitrarily far
+        /// ahead. Returns -1 if you hit EOF.
+        /// </summary>
+        /// <param name="pos"></param>
+        /// <returns></returns>
+        public int Get(int pos)
+        {
+            if (pos == nextPos)
+            {
+                if (end)
+                {
+                    return -1;
+                }
+                if (count == buffer.Length)
+                {
+                    // Grow
+                    var newBuffer = new char[ArrayUtil.Oversize(1 + count, RamUsageEstimator.NUM_BYTES_CHAR)];
+                    Array.Copy(buffer, nextWrite, newBuffer, 0, buffer.Length - nextWrite);
+                    Array.Copy(buffer, 0, newBuffer, buffer.Length - nextWrite, nextWrite);
+                    nextWrite = buffer.Length;
+                    buffer = newBuffer;
+                }
+                if (nextWrite == buffer.Length)
+                {
+                    nextWrite = 0;
+                }
+
+                var toRead = buffer.Length - Math.Max(count, nextWrite);
+                var readCount = reader.Read(buffer, nextWrite, toRead);
+                if (readCount == -1)
+                {
+                    end = true;
+                    return -1;
+                }
+                var ch = buffer[nextWrite];
+                nextWrite += readCount;
+                count += readCount;
+                nextPos += readCount;
+                return ch;
+            }
+            else
+            {
+                // Cannot read from future (except by 1):
+                // assert pos < nextPos;
+                if (pos >= nextPos)
+                    throw new InvalidOperationException("Cannot read from future (except
by 1).");
+
+                // Cannot read from already freed past:
+                // assert nextPos - pos <= count: "nextPos=" + nextPos + " pos=" + pos
+ " count=" + count;
+                if (nextPos - pos > count)
+                    throw new InvalidOperationException("nextPos=" + nextPos + " pos=" +
pos + " count=" + count);
+
+                return buffer[GetIndex(pos)];
+            }
+        }
+
+        // For assert:
+        private bool InBounds(int pos)
+        {
+            return pos >= 0 && pos < nextPos && pos >= nextPos -
count;
+        }
+
+        private int GetIndex(int pos)
+        {
+            var index = nextWrite - (nextPos - pos);
+            if (index < 0)
+            {
+                // Wrap:
+                index += buffer.Length;
+                //assert index >= 0;
+                if (index < 0)
+                    throw new InvalidOperationException();
+            }
+            return index;
+        }
+
+
+        public char[] Get(int posStart, int length)
+        {
+            if (length <= 0)
+                throw new ArgumentException("Must be greater than zero.", "length");
+
+            if (!InBounds(posStart))
+                throw new ArgumentException("posStart=" + posStart + " length=" + length,
"posStart");
+
+            var startIndex = GetIndex(posStart);
+            var endIndex = GetIndex(posStart + length);
+
+            var result = new char[length];
+            if (endIndex >= startIndex && length < buffer.Length)
+            {
+                Array.Copy(buffer, startIndex, result, 0, endIndex - startIndex);
+            }
+            else
+            {
+                // wrapped:
+                var part1 = buffer.Length - startIndex;
+                Array.Copy(buffer, startIndex, result, 0, part1);
+                Array.Copy(buffer, 0, result, buffer.Length-startIndex, length-part1);
+            }
+            return result;
+        }
+
+        /// <summary>
+        /// Call this to notify us that no chars before this
+        /// absolute position are needed anymore.
+        /// </summary>
+        /// <param name="pos"></param>
+        public void FreeBefore(int pos)
+        {
+            if (pos < 0)
+                throw new ArgumentException("Must be greater than or equal to zero.", "pos");
+
+            if (pos > nextPos)
+                throw new ArgumentException("Must be less than or equal to nextPos", "pos");
+
+            var newCount = nextPos - pos;
+
+            if (newCount > count)
+                throw new InvalidOperationException("newCount=" + newCount + " count=" +
count);
+
+            if (newCount > buffer.Length)
+                throw new InvalidOperationException("newCount=" + newCount + " buf.length="
+ buffer.Length);
+
+            count = newCount;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d6717ed8/src/core/Analysis/CharFilter.cs
----------------------------------------------------------------------
diff --git a/src/core/Analysis/CharFilter.cs b/src/core/Analysis/CharFilter.cs
index b8a4332..1d1ecb7 100644
--- a/src/core/Analysis/CharFilter.cs
+++ b/src/core/Analysis/CharFilter.cs
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+using System.IO;
+
 namespace Lucene.Net.Analysis
 {
 	
@@ -27,11 +29,12 @@ namespace Lucene.Net.Analysis
 	/// <version>  $Id$
 	/// 
 	/// </version>
-	public abstract class CharFilter : System.IO.TextReader
+	public abstract class CharFilter : StreamReader
 	{
-        protected readonly System.IO.TextReader input;
-		
-		public CharFilter(System.IO.TextReader input)
+        protected readonly StreamReader input;
+
+        public CharFilter(StreamReader input)
+            : base(input.BaseStream)
 		{
 			this.input = input;
 		}


Mime
View raw message