lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ccurr...@apache.org
Subject [Lucene.Net] svn commit: r1204353 [9/9] - in /incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src: contrib/Analyzers/ contrib/Analyzers/AR/ contrib/Analyzers/BR/ contrib/Analyzers/CJK/ contrib/Analyzers/Cn/ contrib/Analyzers/Compound/ contrib/Analyzers/Compoun...
Date Mon, 21 Nov 2011 04:44:59 GMT
Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Shingle/ShingleFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Shingle/ShingleFilter.cs?rev=1204353&r1=1204352&r2=1204353&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Shingle/ShingleFilter.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Shingle/ShingleFilter.cs Mon Nov 21 04:44:55 2011
@@ -1,4 +1,4 @@
-/*
+/*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
@@ -17,368 +17,366 @@
 
 using System;
 using System.Collections.Generic;
-using System.IO;
+using System.Linq;
 using System.Text;
 using Lucene.Net.Analysis;
 using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
 
 namespace Lucene.Net.Analyzers.Shingle
 {
-    /// <summary>
-    /// <p>A ShingleFilter constructs shingles (token n-grams) from a token stream.
-    /// In other words, it creates combinations of tokens as a single token.</p>
-    /// 
-    /// <p>For example, the sentence "please divide this sentence into shingles"
-    /// might be tokenized into shingles "please divide", "divide this",
-    /// "this sentence", "sentence into", and "into shingles".</p>
-    ///     
-    /// <p>This filter handles position increments > 1 by inserting filler tokens
-    /// (tokens with termtext "_"). It does not handle a position increment of 0. </p>
-    /// </summary>
-    public class ShingleFilter : TokenFilter
+    /**
+ * <p>A ShingleFilter constructs shingles (token n-grams) from a token stream.
+ * In other words, it creates combinations of tokens as a single token.
+ *
+ * <p>For example, the sentence "please divide this sentence into shingles"
+ * might be tokenized into shingles "please divide", "divide this",
+ * "this sentence", "sentence into", and "into shingles".
+ *
+ * <p>This filter handles position increments > 1 by inserting filler tokens
+ * (tokens with termtext "_"). It does not handle a position increment of 0.
+ */
+    public sealed class ShingleFilter : TokenFilter
     {
-        /// <summary>
-        /// Filler token for when positionIncrement is more than 1
-        /// </summary>
-        public static readonly char[] FillerToken = {'_'};
-
-        /// <summary>
-        /// Default maximum shingle size is 2.
-        /// </summary>
-        public static readonly int DefaultMaxShingleSize = 2;
-
-        /// <summary>
-        /// The string to use when joining adjacent tokens to form a shingle
-        /// </summary>
-        public static readonly string TokenSeparator = " ";
-
-        private readonly OffsetAttribute _offsetAtt;
-        private readonly PositionIncrementAttribute _posIncrAtt;
-
-        private readonly LinkedList<State> _shingleBuf = new LinkedList<State>();
-        private readonly TermAttribute _termAtt;
-        private readonly TypeAttribute _typeAtt;
-        private State _currentToken;
-        private int[] _endOffsets;
-        private bool _hasCurrentToken;
-
-        /// <summary>
-        /// Maximum shingle size (number of tokens)
-        /// </summary>
-        private int _maxShingleSize;
-
-        private State _nextToken;
-        private int _numFillerTokensToInsert;
-
-        /// <summary>
-        /// By default, we output unigrams (individual tokens) as well as shingles (token n-grams).
-        /// </summary>
-        private bool _outputUnigrams = true;
-
-        private int _shingleBufferPosition;
-        private StringBuilder[] _shingles;
-        private String _tokenType = "shingle";
-
-        /// <summary>
-        /// Constructs a ShingleFilter with the specified single size from the TokenStream
-        /// </summary>
-        /// <param name="input">input token stream</param>
-        /// <param name="maxShingleSize">maximum shingle size produced by the filter.</param>
-        public ShingleFilter(TokenStream input, int maxShingleSize) : base(input)
-        {
-            SetMaxShingleSize(maxShingleSize);
-
-            // ReSharper disable DoNotCallOverridableMethodsInConstructor
-            _termAtt = (TermAttribute) AddAttribute(typeof (TermAttribute));
-            _offsetAtt = (OffsetAttribute) AddAttribute(typeof (OffsetAttribute));
-            _posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof (PositionIncrementAttribute));
-            _typeAtt = (TypeAttribute) AddAttribute(typeof (TypeAttribute));
-            // ReSharper restore DoNotCallOverridableMethodsInConstructor
-        }
 
-        /// <summary>
-        /// Construct a ShingleFilter with default shingle size.
-        /// </summary>
-        /// <param name="input">input stream</param>
-        public ShingleFilter(TokenStream input) :
-            this(input, DefaultMaxShingleSize)
+        private LinkedList<State> shingleBuf = new LinkedList<State>();
+        private StringBuilder[] shingles;
+        private String tokenType = "shingle";
+
+        /**
+         * filler token for when positionIncrement is more than 1
+         */
+        public static readonly char[] FILLER_TOKEN = { '_' };
+
+
+        /**
+         * default maximum shingle size is 2.
+         */
+        public const int DEFAULT_MAX_SHINGLE_SIZE = 2;
+
+        /**
+         * The string to use when joining adjacent tokens to form a shingle
+         */
+        public const String TOKEN_SEPARATOR = " ";
+
+        /**
+         * By default, we output unigrams (individual tokens) as well as shingles
+         * (token n-grams).
+         */
+        private bool outputUnigrams = true;
+
+        /**
+         * maximum shingle size (number of tokens)
+         */
+        private int maxShingleSize;
+
+        /**
+         * Constructs a ShingleFilter with the specified single size from the
+         * {@link TokenStream} <code>input</code>
+         *
+         * @param input input stream
+         * @param maxShingleSize maximum shingle size produced by the filter.
+         */
+        public ShingleFilter(TokenStream input, int maxShingleSize)
+            : base(input)
         {
-        }
-
-        /// <summary>
-        /// Construct a ShingleFilter with the specified token type for shingle tokens.
-        /// </summary>
-        /// <param name="input">input stream</param>
-        /// <param name="tokenType">token type for shingle tokens</param>
-        public ShingleFilter(TokenStream input, String tokenType) :
-            this(input, DefaultMaxShingleSize)
-        {
-            SetTokenType(tokenType);
-        }
-
-        /// <summary>
-        /// Set the type of the shingle tokens produced by this filter. (default: "shingle")
-        /// </summary>
-        /// <param name="tokenType">token TokenType</param>
-        public void SetTokenType(String tokenType)
+            SetMaxShingleSize(maxShingleSize);
+            this.termAtt = AddAttribute<TermAttribute>(); ;
+            this.offsetAtt = AddAttribute<OffsetAttribute>(); ;
+            this.posIncrAtt = AddAttribute<PositionIncrementAttribute>(); ;
+            this.typeAtt = AddAttribute<TypeAttribute>(); ;
+        }
+
+        /**
+         * Construct a ShingleFilter with default shingle size.
+         *
+         * @param input input stream
+         */
+        public ShingleFilter(TokenStream input)
+            : this(input, DEFAULT_MAX_SHINGLE_SIZE)
         {
-            _tokenType = tokenType;
         }
 
-        /// <summary>
-        /// Shall the output stream contain the input tokens (unigrams) as well as shingles? (default: true.)
-        /// </summary>
-        /// <param name="outputUnigrams">Whether or not the output stream shall contain the input tokens (unigrams)</param>
+        /**
+         * Construct a ShingleFilter with the specified token type for shingle tokens.
+         *
+         * @param input input stream
+         * @param tokenType token type for shingle tokens
+         */
+        public ShingleFilter(TokenStream input, String tokenType)
+            : this(input, DEFAULT_MAX_SHINGLE_SIZE)
+        {
+            setTokenType(tokenType);
+        }
+
+        /**
+         * Set the type of the shingle tokens produced by this filter.
+         * (default: "shingle")
+         *
+         * @param tokenType token tokenType
+         */
+        public void setTokenType(String tokenType)
+        {
+            this.tokenType = tokenType;
+        }
+
+        /**
+         * Shall the output stream contain the input tokens (unigrams) as well as
+         * shingles? (default: true.)
+         *
+         * @param outputUnigrams Whether or not the output stream shall contain
+         * the input tokens (unigrams)
+         */
         public void SetOutputUnigrams(bool outputUnigrams)
         {
-            _outputUnigrams = outputUnigrams;
+            this.outputUnigrams = outputUnigrams;
         }
 
-        /// <summary>
-        /// Set the max shingle size (default: 2)
-        /// </summary>
-        /// <param name="maxShingleSize">max size of output shingles</param>
+        /**
+         * Set the max shingle size (default: 2)
+         *
+         * @param maxShingleSize max size of output shingles
+         */
         public void SetMaxShingleSize(int maxShingleSize)
         {
             if (maxShingleSize < 2)
-                throw new ArgumentException("Max shingle size must be >= 2", "maxShingleSize");
-
-            _shingles = new StringBuilder[maxShingleSize];
-
-            for (int i = 0; i < _shingles.Length; i++)
             {
-                _shingles[i] = new StringBuilder();
+                throw new ArgumentException("Max shingle size must be >= 2");
             }
-
-            _maxShingleSize = maxShingleSize;
+            shingles = new StringBuilder[maxShingleSize];
+            for (int i = 0; i < shingles.Length; i++)
+            {
+                shingles[i] = new StringBuilder();
+            }
+            this.maxShingleSize = maxShingleSize;
         }
 
-        /// <summary>
-        /// Clear the StringBuilders that are used for storing the output shingles.
-        /// </summary>
+        /**
+         * Clear the StringBuilders that are used for storing the output shingles.
+         */
         private void ClearShingles()
         {
-            foreach (StringBuilder t in _shingles)
+            for (int i = 0; i < shingles.Length; i++)
             {
-                t.Length = 0;
+                shingles[i].Clear();
             }
         }
 
-        /// <summary>
-        /// See Lucene.Net.Analysis.TokenStream.Next()
-        /// </summary>
-        /// <returns></returns>
-        public override bool IncrementToken()
+        private AttributeSource.State nextToken;
+        private int shingleBufferPosition;
+        private int[] endOffsets;
+
+        /* (non-Javadoc)
+         * @see org.apache.lucene.analysis.TokenStream#next()
+         */
+        public sealed override bool IncrementToken()
         {
             while (true)
             {
-                if (_nextToken == null)
+                if (nextToken == null)
                 {
                     if (!FillShingleBuffer())
+                    {
                         return false;
+                    }
                 }
 
-                _nextToken = _shingleBuf.First.Value;
+                nextToken = shingleBuf.First.Value;
 
-                if (_outputUnigrams)
+                if (outputUnigrams)
                 {
-                    if (_shingleBufferPosition == 0)
+                    if (shingleBufferPosition == 0)
                     {
-                        RestoreState(_nextToken);
-                        _posIncrAtt.SetPositionIncrement(1);
-                        _shingleBufferPosition++;
+                        RestoreState(nextToken);
+                        posIncrAtt.SetPositionIncrement(1);
+                        shingleBufferPosition++;
                         return true;
                     }
                 }
-                else if (_shingleBufferPosition%_maxShingleSize == 0)
+                else if (shingleBufferPosition % this.maxShingleSize == 0)
                 {
-                    _shingleBufferPosition++;
+                    shingleBufferPosition++;
                 }
 
-                if (_shingleBufferPosition < _shingleBuf.Count)
+                if (shingleBufferPosition < shingleBuf.Count)
                 {
-                    RestoreState(_nextToken);
-                    _typeAtt.SetType(_tokenType);
-                    _offsetAtt.SetOffset(_offsetAtt.StartOffset(), _endOffsets[_shingleBufferPosition]);
-                    StringBuilder buf = _shingles[_shingleBufferPosition];
+                    RestoreState(nextToken);
+                    typeAtt.SetType(tokenType);
+                    offsetAtt.SetOffset(offsetAtt.StartOffset(), endOffsets[shingleBufferPosition]);
+                    StringBuilder buf = shingles[shingleBufferPosition];
                     int termLength = buf.Length;
-                    char[] termBuffer = _termAtt.TermBuffer();
-                    if (termBuffer.Length < termLength)
-                        termBuffer = _termAtt.ResizeTermBuffer(termLength);
-                    buf.CopyTo(0, termBuffer, 0, termLength);
-                    _termAtt.SetTermLength(termLength);
-                    if ((! _outputUnigrams) && _shingleBufferPosition%_maxShingleSize == 1)
+                    char[] TermBuffer = termAtt.TermBuffer();
+                    if (TermBuffer.Length < termLength)
+                        TermBuffer = termAtt.ResizeTermBuffer(termLength);
+                    buf.CopyTo(0, TermBuffer, 0, termLength);
+                    termAtt.SetTermLength(termLength);
+                    if ((!outputUnigrams) && shingleBufferPosition % this.maxShingleSize == 1)
                     {
-                        _posIncrAtt.SetPositionIncrement(1);
+                        posIncrAtt.SetPositionIncrement(1);
                     }
                     else
                     {
-                        _posIncrAtt.SetPositionIncrement(0);
+                        posIncrAtt.SetPositionIncrement(0);
                     }
-                    _shingleBufferPosition++;
-                    if (_shingleBufferPosition == _shingleBuf.Count)
+                    shingleBufferPosition++;
+                    if (shingleBufferPosition == shingleBuf.Count)
                     {
-                        _nextToken = null;
-                        _shingleBufferPosition = 0;
+                        nextToken = null;
+                        shingleBufferPosition = 0;
                     }
                     return true;
                 }
-
-                _nextToken = null;
-                _shingleBufferPosition = 0;
+                else
+                {
+                    nextToken = null;
+                    shingleBufferPosition = 0;
+                }
             }
         }
 
-        /// <summary>
-        /// <p>
-        /// Get the next token from the input stream and push it on the token buffer.
-        /// If we encounter a token with position increment > 1, we put filler tokens
-        /// on the token buffer.
-        /// </p>
-        /// Returns null when the end of the input stream is reached.
-        /// </summary>
-        /// <returns>the next token, or null if at end of input stream</returns>
+        private int numFillerTokensToInsert;
+        private AttributeSource.State currentToken;
+        private bool hasCurrentToken;
+
+        private TermAttribute termAtt;
+        private OffsetAttribute offsetAtt;
+        private PositionIncrementAttribute posIncrAtt;
+        private TypeAttribute typeAtt;
+
+        /**
+         * Get the next token from the input stream and push it on the token buffer.
+         * If we encounter a token with position increment > 1, we put filler tokens
+         * on the token buffer.
+         * <p/>
+         * Returns null when the end of the input stream is reached.
+         * @return the next token, or null if at end of input stream
+         * @throws IOException if the input stream has a problem
+         */
         private bool GetNextToken()
         {
+
             while (true)
             {
-                if (_numFillerTokensToInsert > 0)
+                if (numFillerTokensToInsert > 0)
                 {
-                    if (_currentToken == null)
+                    if (currentToken == null)
                     {
-                        _currentToken = CaptureState();
+                        currentToken = CaptureState();
                     }
                     else
                     {
-                        RestoreState(_currentToken);
+                        RestoreState(currentToken);
                     }
-                    _numFillerTokensToInsert--;
+                    numFillerTokensToInsert--;
                     // A filler token occupies no space
-                    _offsetAtt.SetOffset(_offsetAtt.StartOffset(), _offsetAtt.StartOffset());
-                    _termAtt.SetTermBuffer(FillerToken, 0, FillerToken.Length);
+                    offsetAtt.SetOffset(offsetAtt.StartOffset(), offsetAtt.StartOffset());
+                    termAtt.SetTermBuffer(FILLER_TOKEN, 0, FILLER_TOKEN.Length);
                     return true;
                 }
 
-                if (_hasCurrentToken)
+                if (hasCurrentToken)
                 {
-                    if (_currentToken != null)
+                    if (currentToken != null)
                     {
-                        RestoreState(_currentToken);
-                        _currentToken = null;
+                        RestoreState(currentToken);
+                        currentToken = null;
                     }
-                    _hasCurrentToken = false;
+                    hasCurrentToken = false;
                     return true;
                 }
 
-                if (!input.IncrementToken())
-                    return false;
+                if (!input.IncrementToken()) return false;
+                hasCurrentToken = true;
 
-                _hasCurrentToken = true;
-
-                if (_posIncrAtt.GetPositionIncrement() > 1)
-                    _numFillerTokensToInsert = _posIncrAtt.GetPositionIncrement() - 1;
+                if (posIncrAtt.GetPositionIncrement() > 1)
+                {
+                    numFillerTokensToInsert = posIncrAtt.GetPositionIncrement() - 1;
+                }
             }
         }
 
-        /// <summary>
-        /// Fill the output buffer with new shingles.
-        /// </summary>
-        /// <exception cref="IOException">throws IOException if there's a problem getting the next token</exception>
-        /// <returns></returns>
+        /**
+         * Fill the output buffer with new shingles.
+         *
+         * @throws IOException if there's a problem getting the next token
+         */
         private bool FillShingleBuffer()
         {
             bool addedToken = false;
-
-            // Try to fill the shingle buffer.
-
+            /*
+             * Try to fill the shingle buffer.
+             */
             do
             {
-                if (!GetNextToken())
+                if (GetNextToken())
+                {
+                    shingleBuf.AddLast(CaptureState());
+                    if (shingleBuf.Count > maxShingleSize)
+                    {
+                        shingleBuf.RemoveFirst();
+                    }
+                    addedToken = true;
+                }
+                else
+                {
                     break;
+                }
+            } while (shingleBuf.Count < maxShingleSize);
 
-                _shingleBuf.AddLast(CaptureState());
-
-                if (_shingleBuf.Count > _maxShingleSize)
-                    _shingleBuf.RemoveFirst();
-
-                addedToken = true;
-            } while (_shingleBuf.Count < _maxShingleSize);
-
-            if (_shingleBuf.Count == 0)
+            if (shingleBuf.Count == 0)
+            {
                 return false;
+            }
 
+            /*
+             * If no new token could be added to the shingle buffer, we have reached
+             * the end of the input stream and have to discard the least recent token.
+             */
+            if (!addedToken)
+            {
+                shingleBuf.RemoveFirst();
+            }
 
-            // If no new token could be added to the shingle buffer, we have reached
-            // the end of the input stream and have to discard the least recent token.
-
-            if (! addedToken)
-                _shingleBuf.RemoveFirst();
-
-            if (_shingleBuf.Count == 0)
+            if (shingleBuf.Count == 0)
+            {
                 return false;
+            }
 
             ClearShingles();
 
-            _endOffsets = new int[_shingleBuf.Count];
-            for (int i = 0; i < _endOffsets.Length; i++)
-            {
-                _endOffsets[i] = 0;
-            }
-
-            int shingleIndex = 0;
+            endOffsets = new int[shingleBuf.Count];
+            // Set all offsets to 0
+            endOffsets.Initialize();
 
-            foreach (State state in _shingleBuf)
+            int i = 0;
+            for (IEnumerator<State> it = shingleBuf.GetEnumerator(); it.MoveNext(); )
             {
-                RestoreState(state);
-
-                for (int j = shingleIndex; j < _shingles.Length; j++)
+                RestoreState(it.Current);
+                for (int j = i; j < shingles.Length; j++)
                 {
-                    if (_shingles[j].Length != 0)
-                        _shingles[j].Append(TokenSeparator);
-
-                    _shingles[j].Append(_termAtt.TermBuffer(), 0, _termAtt.TermLength());
+                    if (shingles[j].Length != 0)
+                    {
+                        shingles[j].Append(TOKEN_SEPARATOR);
+                    }
+                    shingles[j].Append(termAtt.TermBuffer().Take(termAtt.TermLength()).ToArray());
                 }
 
-                _endOffsets[shingleIndex] = _offsetAtt.EndOffset();
-                shingleIndex++;
+                endOffsets[i] = offsetAtt.EndOffset();
+                i++;
             }
 
             return true;
         }
 
-        /// <summary>
-        /// Deprecated: Will be removed in Lucene 3.0. This method is readonly, as it should not be overridden. 
-        /// Delegates to the backwards compatibility layer.
-        /// </summary>
-        /// <param name="reusableToken"></param>
-        /// <returns></returns>
-        [Obsolete("The new IncrementToken() and AttributeSource APIs should be used instead.")]
-        public override sealed Token Next(Token reusableToken)
-        {
-            return base.Next(reusableToken);
-        }
-
-        /// <summary>
-        /// Deprecated: Will be removed in Lucene 3.0. This method is readonly, as it should not be overridden. 
-        /// Delegates to the backwards compatibility layer.
-        /// </summary>
-        /// <returns></returns>
-        [Obsolete("The returned Token is a \"full private copy\" (not re-used across calls to Next()) but will be slower than calling Next(Token) or using the new IncrementToken() method with the new AttributeSource API.")]
-        public override sealed Token Next()
-        {
-            return base.Next();
-        }
-
         public override void Reset()
         {
             base.Reset();
-
-            _nextToken = null;
-            _shingleBufferPosition = 0;
-            _shingleBuf.Clear();
-            _numFillerTokensToInsert = 0;
-            _currentToken = null;
-            _hasCurrentToken = false;
+            nextToken = null;
+            shingleBufferPosition = 0;
+            shingleBuf.Clear();
+            numFillerTokensToInsert = 0;
+            currentToken = null;
+            hasCurrentToken = false;
         }
     }
 }
\ No newline at end of file

Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Shingle/ShingleMatrixFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Shingle/ShingleMatrixFilter.cs?rev=1204353&r1=1204352&r2=1204353&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Shingle/ShingleMatrixFilter.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Shingle/ShingleMatrixFilter.cs Mon Nov 21 04:44:55 2011
@@ -1,4 +1,4 @@
-/*
+/*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
@@ -20,10 +20,11 @@ using System.Collections.Generic;
 using System.Linq;
 using System.Text;
 using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Shingle.Codec;
+using Lucene.Net.Analysis.Shingle.Matrix;
 using Lucene.Net.Analysis.Tokenattributes;
 using Lucene.Net.Analyzers.Miscellaneous;
-using Lucene.Net.Analyzers.Shingle.Codec;
-using Lucene.Net.Analyzers.Shingle.Matrix;
+using Lucene.Net.Support;
 using FlagsAttribute = Lucene.Net.Analysis.Tokenattributes.FlagsAttribute;
 
 namespace Lucene.Net.Analyzers.Shingle
@@ -103,7 +104,7 @@ namespace Lucene.Net.Analyzers.Shingle
     /// <b>NOTE:</b> This filter might not behave correctly if used with custom Attributes, i.e. Attributes other than
     /// the ones located in org.apache.lucene.analysis.tokenattributes.</p> 
     /// </summary>
-    public class ShingleMatrixFilter : TokenStream
+    public sealed class ShingleMatrixFilter : TokenStream
     {
         public static Char DefaultSpacerCharacter = '_';
         public static TokenSettingsCodec DefaultSettingsCodec = new OneDimensionalNonWeightedTokenSettingsCodec();
@@ -135,8 +136,8 @@ namespace Lucene.Net.Analyzers.Shingle
         /// to get the same behaviour.
         /// </p>
         /// </summary>
-        private readonly HashSet<SupportClass.EquatableList<Token>> _shinglesSeen =
-            new HashSet<SupportClass.EquatableList<Token>>(); 
+        private readonly HashSet<EquatableList<Token>> _shinglesSeen =
+            new HashSet<EquatableList<Token>>(); 
 
         private readonly TermAttribute _termAtt;
         private readonly TypeAttribute _typeAtt;
@@ -158,13 +159,13 @@ namespace Lucene.Net.Analyzers.Shingle
         /// todo: don't touch the matrix! use a bool, set the input stream to null or something, and keep track of where in the matrix we are at.
         /// 
         /// </summary>
-        /// <param name="matrix">the input based for creating shingles. Does not need to contain any information until ShingleMatrixFilter.Next(Token) is called the first time.</param>
+        /// <param name="matrix">the input based for creating shingles. Does not need to contain any information until ShingleMatrixFilter.IncrementToken() is called the first time.</param>
         /// <param name="minimumShingleSize">minimum number of tokens in any shingle.</param>
         /// <param name="maximumShingleSize">maximum number of tokens in any shingle.</param>
         /// <param name="spacerCharacter">character to use between texts of the token parts in a shingle. null for none.</param>
         /// <param name="ignoringSinglePrefixOrSuffixShingle">if true, shingles that only contains permutation of the first of the last column will not be produced as shingles. Useful when adding boundary marker tokens such as '^' and '$'.</param>
         /// <param name="settingsCodec">codec used to read input token weight and matrix positioning.</param>
-        public ShingleMatrixFilter(Matrix.Matrix matrix, int minimumShingleSize, int maximumShingleSize, Char spacerCharacter, bool ignoringSinglePrefixOrSuffixShingle, TokenSettingsCodec settingsCodec)
+        public ShingleMatrixFilter(Matrix matrix, int minimumShingleSize, int maximumShingleSize, Char spacerCharacter, bool ignoringSinglePrefixOrSuffixShingle, TokenSettingsCodec settingsCodec)
         {
             Matrix = matrix;
             MinimumShingleSize = minimumShingleSize;
@@ -174,23 +175,23 @@ namespace Lucene.Net.Analyzers.Shingle
             _settingsCodec = settingsCodec;
 
             // ReSharper disable DoNotCallOverridableMethodsInConstructor
-            _termAtt = (TermAttribute) AddAttribute(typeof (TermAttribute));
-            _posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof (PositionIncrementAttribute));
-            _payloadAtt = (PayloadAttribute) AddAttribute(typeof (PayloadAttribute));
-            _offsetAtt = (OffsetAttribute) AddAttribute(typeof (OffsetAttribute));
-            _typeAtt = (TypeAttribute) AddAttribute(typeof (TypeAttribute));
-            _flagsAtt = (FlagsAttribute) AddAttribute(typeof (FlagsAttribute));
+            _termAtt = AddAttribute<TermAttribute>();
+            _posIncrAtt = AddAttribute<PositionIncrementAttribute>();
+            _payloadAtt = AddAttribute<PayloadAttribute>();
+            _offsetAtt = AddAttribute<OffsetAttribute>();
+            _typeAtt = AddAttribute<TypeAttribute>();
+            _flagsAtt = AddAttribute<FlagsAttribute>();
             // ReSharper restore DoNotCallOverridableMethodsInConstructor
 
             // set the input to be an empty token stream, we already have the data.
             _input = new EmptyTokenStream();
 
-            _inTermAtt = (TermAttribute) _input.AddAttribute(typeof (TermAttribute));
-            _inPosIncrAtt = (PositionIncrementAttribute) _input.AddAttribute(typeof (PositionIncrementAttribute));
-            _inPayloadAtt = (PayloadAttribute) _input.AddAttribute(typeof (PayloadAttribute));
-            _inOffsetAtt = (OffsetAttribute) _input.AddAttribute(typeof (OffsetAttribute));
-            _inTypeAtt = (TypeAttribute) _input.AddAttribute(typeof (TypeAttribute));
-            _inFlagsAtt = (FlagsAttribute) _input.AddAttribute(typeof (FlagsAttribute));
+            _inTermAtt = _input.AddAttribute<TermAttribute>();
+            _inPosIncrAtt = _input.AddAttribute<PositionIncrementAttribute>();
+            _inPayloadAtt = _input.AddAttribute<PayloadAttribute>();
+            _inOffsetAtt = _input.AddAttribute<OffsetAttribute>();
+            _inTypeAtt = _input.AddAttribute<TypeAttribute>();
+            _inFlagsAtt = _input.AddAttribute<FlagsAttribute>();
         }
 
         /// <summary>
@@ -250,27 +251,27 @@ namespace Lucene.Net.Analyzers.Shingle
             _settingsCodec = settingsCodec;
 
             // ReSharper disable DoNotCallOverridableMethodsInConstructor
-            _termAtt = (TermAttribute) AddAttribute(typeof (TermAttribute));
-            _posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof (PositionIncrementAttribute));
-            _payloadAtt = (PayloadAttribute) AddAttribute(typeof (PayloadAttribute));
-            _offsetAtt = (OffsetAttribute) AddAttribute(typeof (OffsetAttribute));
-            _typeAtt = (TypeAttribute) AddAttribute(typeof (TypeAttribute));
-            _flagsAtt = (FlagsAttribute) AddAttribute(typeof (FlagsAttribute));
+            _termAtt = AddAttribute<TermAttribute>();
+            _posIncrAtt = AddAttribute<PositionIncrementAttribute>();
+            _payloadAtt = AddAttribute<PayloadAttribute>();
+            _offsetAtt = AddAttribute<OffsetAttribute>();
+            _typeAtt = AddAttribute<TypeAttribute>();
+            _flagsAtt = AddAttribute<FlagsAttribute>();
             // ReSharper restore DoNotCallOverridableMethodsInConstructor
 
-            _inTermAtt = (TermAttribute) input.AddAttribute(typeof (TermAttribute));
-            _inPosIncrAtt = (PositionIncrementAttribute) input.AddAttribute(typeof (PositionIncrementAttribute));
-            _inPayloadAtt = (PayloadAttribute) input.AddAttribute(typeof (PayloadAttribute));
-            _inOffsetAtt = (OffsetAttribute) input.AddAttribute(typeof (OffsetAttribute));
-            _inTypeAtt = (TypeAttribute) input.AddAttribute(typeof (TypeAttribute));
-            _inFlagsAtt = (FlagsAttribute) input.AddAttribute(typeof (FlagsAttribute));
+            _inTermAtt = input.AddAttribute<TermAttribute>();
+            _inPosIncrAtt = input.AddAttribute<PositionIncrementAttribute>();
+            _inPayloadAtt = input.AddAttribute<PayloadAttribute>();
+            _inOffsetAtt = input.AddAttribute<OffsetAttribute>();
+            _inTypeAtt = input.AddAttribute<TypeAttribute>();
+            _inFlagsAtt = input.AddAttribute<FlagsAttribute>();
         }
 
         public int MinimumShingleSize { get; set; }
 
         public int MaximumShingleSize { get; set; }
 
-        public Matrix.Matrix Matrix { get; set; }
+        public Matrix Matrix { get; set; }
 
         public Char? SpacerCharacter { get; set; }
 
@@ -287,7 +288,7 @@ namespace Lucene.Net.Analyzers.Shingle
         {
             if (Matrix == null)
             {
-                Matrix = new Matrix.Matrix();
+                Matrix = new Matrix();
 
                 // fill matrix with maximumShingleSize columns
                 while (Matrix.Columns.Count < MaximumShingleSize && ReadColumn())
@@ -333,25 +334,16 @@ namespace Lucene.Net.Analyzers.Shingle
             return token;
         }
 
-        /// <summary>
-        /// Deprecated: Will be removed in Lucene 3.0. This method is final, as it should not be overridden. Delegates to the backwards compatibility layer.
-        /// </summary>
-        /// <param name="reusableToken"></param>
-        /// <returns></returns>
-        [Obsolete("The new IncrementToken() and AttributeSource APIs should be used instead.")]
-        public override sealed Token Next(Token reusableToken)
-        {
-            return base.Next(reusableToken);
-        }
-        
-        /// <summary>
-        /// Deprecated: Will be removed in Lucene 3.0. This method is final, as it should not be overridden. Delegates to the backwards compatibility layer.
-        /// </summary>
-        /// <returns></returns>
-        [Obsolete("The returned Token is a \"full private copy\" (not re-used across calls to Next()) but will be slower than calling Next(Token) or using the new IncrementToken() method with the new AttributeSource API.")]
-        public override sealed Token Next()
+        private Token GetNextToken(Token token)
         {
-            return base.Next();
+            if (!this.IncrementToken()) return null;
+            token.SetTermBuffer(_termAtt.TermBuffer(), 0, _termAtt.TermLength());
+            token.SetPositionIncrement(_posIncrAtt.GetPositionIncrement());
+            token.SetFlags(_flagsAtt.GetFlags());
+            token.SetOffset(_offsetAtt.StartOffset(), _offsetAtt.EndOffset());
+            token.SetType(_typeAtt.Type());
+            token.SetPayload(_payloadAtt.GetPayload());
+            return token;
         }
 
         /// <summary>
@@ -377,12 +369,12 @@ namespace Lucene.Net.Analyzers.Shingle
                         _currentShingleLength == 1 && 
                         (_currentPermutationRows[_currentPermutationTokensStartOffset].Column.IsFirst || _currentPermutationRows[_currentPermutationTokensStartOffset].Column.IsLast))
                     {
-                        return Next();
+                        return GetNextToken(reusableToken);
                     }
 
                     var termLength = 0;
 
-                    var shingle = new SupportClass.EquatableList<Token>();
+                    var shingle = new EquatableList<Token>();
 
                     for (int i = 0; i < _currentShingleLength; i++)
                     {
@@ -521,7 +513,7 @@ namespace Lucene.Net.Analyzers.Shingle
         }
 
         /// <summary>
-        /// Final touch of a shingle token before it is passed on to the consumer from method <see cref="Next(Token)"/>.
+        /// Final touch of a shingle token before it is passed on to the consumer from method <see cref="IncrementToken()"/>.
         /// 
         /// Calculates and sets type, flags, position increment, start/end offsets and weight.
         /// </summary>

Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Shingle/TokenPositioner.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Shingle/TokenPositioner.cs?rev=1204353&r1=1204352&r2=1204353&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Shingle/TokenPositioner.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Shingle/TokenPositioner.cs Mon Nov 21 04:44:55 2011
@@ -1,4 +1,4 @@
-/*
+/*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.

Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Sinks/DateRecognizerSinkFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Sinks/DateRecognizerSinkFilter.cs?rev=1204353&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Sinks/DateRecognizerSinkFilter.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Sinks/DateRecognizerSinkFilter.cs Mon Nov 21 04:44:55 2011
@@ -0,0 +1,60 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis.Sinks
+{
+    /**
+  * Attempts to parse the {@link org.apache.lucene.analysis.Token#termBuffer()} as a Date using a <see cref="System.IFormatProvider"/>.
+  * If the value is a Date, it will add it to the sink.
+  * <p/> 
+  *
+  **/
+    public class DateRecognizerSinkFilter : TeeSinkTokenFilter.SinkFilter
+    {
+        public const string DATE_TYPE = "date";
+
+        protected IFormatProvider dateFormat;
+        protected TermAttribute termAtt;
+
+        /**
+         * Uses <see cref="System.Globalization.CultureInfo.CurrentCulture.DateTimeFormatInfo"/> as the <see cref="IFormatProvider"/> object.
+         */
+        public DateRecognizerSinkFilter()
+            : this(System.Globalization.CultureInfo.CurrentCulture)
+        {
+
+        }
+
+        public DateRecognizerSinkFilter(IFormatProvider dateFormat)
+        {
+            this.dateFormat = dateFormat;
+        }
+
+        public override bool Accept(AttributeSource source)
+        {
+            if (termAtt == null)
+            {
+                termAtt = source.AddAttribute<TermAttribute>();
+            }
+            try
+            {
+                DateTime date = DateTime.Parse(termAtt.Term(), dateFormat);//We don't care about the date, just that we can parse it as a date
+                if (date != null)
+                {
+                    return true;
+                }
+            }
+            catch (FormatException e)
+            {
+
+            }
+
+            return false;
+        }
+
+    }
+}
\ No newline at end of file

Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Sinks/TokenRangeSinkFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Sinks/TokenRangeSinkFilter.cs?rev=1204353&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Sinks/TokenRangeSinkFilter.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Sinks/TokenRangeSinkFilter.cs Mon Nov 21 04:44:55 2011
@@ -0,0 +1,42 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis.Sinks
+{
+    public class TokenRangeSinkFilter : TeeSinkTokenFilter.SinkFilter
+    {
+        private int lower;
+        private int upper;
+        private int count;
+
+        public TokenRangeSinkFilter(int lower, int upper)
+        {
+            this.lower = lower;
+            this.upper = upper;
+        }
+
+        public override bool Accept(AttributeSource source)
+        {
+            try
+            {
+                if (count >= lower && count < upper)
+                {
+                    return true;
+                }
+                return false;
+            }
+            finally
+            {
+                count++;
+            }
+        }
+
+        public override void Reset()
+        {
+            count = 0;
+        }
+    }
+}

Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Sinks/TokenTypeSinkFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Sinks/TokenTypeSinkFilter.cs?rev=1204353&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Sinks/TokenTypeSinkFilter.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Sinks/TokenTypeSinkFilter.cs Mon Nov 21 04:44:55 2011
@@ -0,0 +1,30 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis.Sinks
+{
+    public class TokenTypeSinkFilter : TeeSinkTokenFilter.SinkFilter
+    {
+        private string typeToMatch;
+        private TypeAttribute typeAtt;
+
+        public TokenTypeSinkFilter(string typeToMatch)
+        {
+            this.typeToMatch = typeToMatch;
+        }
+
+        public override bool Accept(AttributeSource source)
+        {
+            if (typeAtt == null)
+            {
+                typeAtt = source.AddAttribute<TypeAttribute>();
+            }
+
+            return typeToMatch.Equals(typeAtt.Type());
+        }
+    }
+}

Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Th/ThaiAnalyzer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Th/ThaiAnalyzer.cs?rev=1204353&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Th/ThaiAnalyzer.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Th/ThaiAnalyzer.cs Mon Nov 21 04:44:55 2011
@@ -0,0 +1,73 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Analysis.Standard;
+using Version=Lucene.Net.Util.Version;
+
+namespace Lucene.Net.Analysis.Th
+{
+    /**
+     * {@link Analyzer} for Thai language. It uses {@link java.text.BreakIterator} to break words.
+     * @version 0.2
+     *
+     * <p><b>NOTE</b>: This class uses the same {@link Version}
+     * dependent settings as {@link StandardAnalyzer}.</p>
+     */
+    public class ThaiAnalyzer : Analyzer
+    {
+        private readonly Version matchVersion;
+
+        public ThaiAnalyzer(Version matchVersion)
+        {
+            SetOverridesTokenStreamMethod(typeof(ThaiAnalyzer));
+            this.matchVersion = matchVersion;
+        }
+
+        public override TokenStream TokenStream(String fieldName, TextReader reader)
+        {
+            TokenStream ts = new StandardTokenizer(matchVersion, reader);
+            ts = new StandardFilter(ts);
+            ts = new ThaiWordFilter(ts);
+            ts = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
+                                ts, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
+            return ts;
+        }
+
+        private class SavedStreams
+        {
+            protected internal Tokenizer source;
+            protected internal TokenStream result;
+        };
+
+        public override TokenStream ReusableTokenStream(String fieldName, TextReader reader)
+        {
+            if (overridesTokenStreamMethod)
+            {
+                // LUCENE-1678: force fallback to tokenStream() if we
+                // have been subclassed and that subclass overrides
+                // tokenStream but not reusableTokenStream
+                return TokenStream(fieldName, reader);
+            }
+
+            SavedStreams streams = (SavedStreams)GetPreviousTokenStream();
+            if (streams == null)
+            {
+                streams = new SavedStreams();
+                streams.source = new StandardTokenizer(matchVersion, reader);
+                streams.result = new StandardFilter(streams.source);
+                streams.result = new ThaiWordFilter(streams.result);
+                streams.result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
+                                                streams.result, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
+                SetPreviousTokenStream(streams);
+            }
+            else
+            {
+                streams.source.Reset(reader);
+                streams.result.Reset(); // reset the ThaiWordFilter's state
+            }
+            return streams.result;
+        }
+    }
+}
\ No newline at end of file

Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Th/ThaiWordFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Th/ThaiWordFilter.cs?rev=1204353&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Th/ThaiWordFilter.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Th/ThaiWordFilter.cs Mon Nov 21 04:44:55 2011
@@ -0,0 +1,84 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Text.RegularExpressions;
+using Lucene.Net.Analysis.Tokenattributes;
+
+namespace Lucene.Net.Analysis.Th
+{
+    /**
+     * {@link TokenFilter} that use {@link java.text.BreakIterator} to break each 
+     * Token that is Thai into separate Token(s) for each Thai word.
+     * <p>WARNING: this filter may not work correctly with all JREs.
+     * It is known to work with Sun/Oracle and Harmony JREs.
+     */
+    public sealed class ThaiWordFilter : TokenFilter
+    {
+        //private BreakIterator breaker = null;
+
+        private TermAttribute termAtt;
+        private OffsetAttribute offsetAtt;
+
+        private State thaiState = null;
+        // I'm sure this is far slower than if we just created a simple UnicodeBlock class
+        // considering this is used on a single char, we have to create a new string for it,
+        // via ToString(), so we can then run a costly(?) regex on it.  Yikes.
+        private Regex _isThaiRegex = new Regex(@"\p{IsThai}", RegexOptions.Compiled);
+
+        public ThaiWordFilter(TokenStream input)
+            : base(input)
+        {
+            throw new NotSupportedException("PORT ISSUES");
+            //breaker = BreakIterator.getWordInstance(new Locale("th"));
+            //termAtt = AddAttribute<TermAttribute>();
+            //offsetAtt = AddAttribute<OffsetAttribute>();
+        }
+
+        public sealed override bool IncrementToken()
+        {
+            //int end;
+            //if (thaiState != null)
+            //{
+            //    int start = breaker.Current();
+            //    end = breaker.next();
+            //    if (end != BreakIterator.DONE)
+            //    {
+            //        RestoreState(thaiState);
+            //        termAtt.SetTermBuffer(termAtt.TermBuffer(), start, end - start);
+            //        offsetAtt.SetOffset(offsetAtt.StartOffset() + start, offsetAtt.StartOffset() + end);
+            //        return true;
+            //    }
+            //    thaiState = null;
+            //}
+
+            //if (input.IncrementToken() == false || termAtt.TermLength() == 0)
+            //    return false;
+
+            //String text = termAtt.Term();
+            //if (!_isThaiRegex.Match(new string(new[]{text[0]})).Success)
+            //{
+            //    termAtt.SetTermBuffer(text.ToLower());
+            //    return true;
+            //}
+
+            //thaiState = CaptureState();
+
+            //breaker.SetText(text);
+            //end = breaker.next();
+            //if (end != BreakIterator.DONE)
+            //{
+            //    termAtt.SetTermBuffer(text, 0, end);
+            //    offsetAtt.SetOffset(offsetAtt.StartOffset(), offsetAtt.StartOffset() + end);
+            //    return true;
+            //}
+            return false;
+        }
+
+        public override void Reset()
+        {
+            base.Reset();
+            thaiState = null;
+        }
+    }
+}
\ No newline at end of file

Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/WordlistLoader.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/WordlistLoader.cs?rev=1204353&r1=1204352&r2=1204353&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/WordlistLoader.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/WordlistLoader.cs Mon Nov 21 04:44:55 2011
@@ -1,125 +1,125 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
-*/
-
-using System;
-using System.IO;
-using System.Collections;
-
-namespace Lucene.Net.Analysis
-{
-	/// <summary>
-	/// Loads a text file and adds every line as an entry to a Hashtable. Every line
-	/// should contain only one word. If the file is not found or on any error, an
-	/// empty table is returned.
-	/// </summary>
-	public class WordlistLoader
-	{
-		/// <summary>
-		/// Load words table from the file
-		/// </summary>
-		/// <param name="path">Path to the wordlist</param>
-		/// <param name="wordfile">Name of the wordlist</param>
-		/// <returns></returns>
-		public static Hashtable GetWordtable( String path, String wordfile ) 
-		{
-			if ( path == null || wordfile == null ) 
-			{
-				return new Hashtable();
-			}
-			return GetWordtable(new FileInfo(path + "\\" + wordfile));
-		}
-
-		/// <summary>
-		/// Load words table from the file
-		/// </summary>
-		/// <param name="wordfile">Complete path to the wordlist</param>
-		/// <returns></returns>
-		public static Hashtable GetWordtable( String wordfile ) 
-		{
-			if ( wordfile == null ) 
-			{
-				return new Hashtable();
-			}
-			return GetWordtable( new FileInfo( wordfile ) );
-		}
-
-		/// <summary>
-		/// Load words table from the file 
-		/// </summary>
-		/// <param name="wordfile">File containing the wordlist</param>
-		/// <returns></returns>
-		public static Hashtable GetWordtable( FileInfo wordfile ) 
-		{
-			if ( wordfile == null ) 
-			{
-				return new Hashtable();
-			}			
-			StreamReader lnr = new StreamReader(wordfile.FullName);
-			return GetWordtable(lnr);
-		}
-
-		/// <summary>
-		/// Reads lines from a Reader and adds every line as an entry to a HashSet (omitting
-		/// leading and trailing whitespace). Every line of the Reader should contain only
-		/// one word. The words need to be in lowercase if you make use of an
-		/// Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
-		/// </summary>
-		/// <param name="reader">Reader containing the wordlist</param>
-		/// <returns>A Hashtable with the reader's words</returns>
-		public static Hashtable GetWordtable(TextReader reader)
-		{
-			Hashtable result = new Hashtable();			
-			try 
-			{				
-				ArrayList stopWords = new ArrayList();
-				String word = null;
-				while ( ( word = reader.ReadLine() ) != null ) 
-				{
-					stopWords.Add(word.Trim());
-				}
-				result = MakeWordTable( (String[])stopWords.ToArray(typeof(string)), stopWords.Count);
-			}
-				// On error, use an empty table
-			catch (IOException) 
-			{
-				result = new Hashtable();
-			}
-			return result;
-		}
-
-
-		/// <summary>
-		/// Builds the wordlist table.
-		/// </summary>
-		/// <param name="words">Word that where read</param>
-		/// <param name="length">Amount of words that where read into <tt>words</tt></param>
-		/// <returns></returns>
-		private static Hashtable MakeWordTable( String[] words, int length ) 
-		{
-			Hashtable table = new Hashtable( length );
-			for ( int i = 0; i < length; i++ ) 
-			{
-				table.Add(words[i], words[i]);
-			}
-			return table;
-		}
-	}
-}
\ No newline at end of file
+///*
+// *
+// * Licensed to the Apache Software Foundation (ASF) under one
+// * or more contributor license agreements.  See the NOTICE file
+// * distributed with this work for additional information
+// * regarding copyright ownership.  The ASF licenses this file
+// * to you under the Apache License, Version 2.0 (the
+// * "License"); you may not use this file except in compliance
+// * with the License.  You may obtain a copy of the License at
+// *
+// *   http://www.apache.org/licenses/LICENSE-2.0
+// *
+// * Unless required by applicable law or agreed to in writing,
+// * software distributed under the License is distributed on an
+// * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// * KIND, either express or implied.  See the License for the
+// * specific language governing permissions and limitations
+// * under the License.
+// *
+//*/
+
+//using System;
+//using System.IO;
+//using System.Collections;
+
+//namespace Lucene.Net.Analysis
+//{
+//    /// <summary>
+//    /// Loads a text file and adds every line as an entry to a Hashtable. Every line
+//    /// should contain only one word. If the file is not found or on any error, an
+//    /// empty table is returned.
+//    /// </summary>
+//    public class WordlistLoader
+//    {
+//        /// <summary>
+//        /// Load words table from the file
+//        /// </summary>
+//        /// <param name="path">Path to the wordlist</param>
+//        /// <param name="wordfile">Name of the wordlist</param>
+//        /// <returns></returns>
+//        public static Hashtable GetWordSet( String path, String wordfile ) 
+//        {
+//            if ( path == null || wordfile == null ) 
+//            {
+//                return new Hashtable();
+//            }
+//            return GetWordSet(new FileInfo(path + "\\" + wordfile));
+//        }
+
+//        /// <summary>
+//        /// Load words table from the file
+//        /// </summary>
+//        /// <param name="wordfile">Complete path to the wordlist</param>
+//        /// <returns></returns>
+//        public static Hashtable GetWordSet( String wordfile ) 
+//        {
+//            if ( wordfile == null ) 
+//            {
+//                return new Hashtable();
+//            }
+//            return GetWordSet( new FileInfo( wordfile ) );
+//        }
+
+//        /// <summary>
+//        /// Load words table from the file 
+//        /// </summary>
+//        /// <param name="wordfile">File containing the wordlist</param>
+//        /// <returns></returns>
+//        public static Hashtable GetWordSet( FileInfo wordfile ) 
+//        {
+//            if ( wordfile == null ) 
+//            {
+//                return new Hashtable();
+//            }			
+//            StreamReader lnr = new StreamReader(wordfile.FullName);
+//            return GetWordSet(lnr);
+//        }
+
+//        /// <summary>
+//        /// Reads lines from a Reader and adds every line as an entry to a HashSet (omitting
+//        /// leading and trailing whitespace). Every line of the Reader should contain only
+//        /// one word. The words need to be in lowercase if you make use of an
+//        /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
+//        /// </summary>
+//        /// <param name="reader">Reader containing the wordlist</param>
+//        /// <returns>A Hashtable with the reader's words</returns>
+//        public static Hashtable GetWordSet(TextReader reader)
+//        {
+//            Hashtable result = new Hashtable();			
+//            try 
+//            {				
+//                ArrayList stopWords = new ArrayList();
+//                String word = null;
+//                while ( ( word = reader.ReadLine() ) != null ) 
+//                {
+//                    stopWords.Add(word.Trim());
+//                }
+//                result = MakeWordTable( (String[])stopWords.ToArray(typeof(string)), stopWords.Count);
+//            }
+//                // On error, use an empty table
+//            catch (IOException) 
+//            {
+//                result = new Hashtable();
+//            }
+//            return result;
+//        }
+
+
+//        /// <summary>
+//        /// Builds the wordlist table.
+//        /// </summary>
+//        /// <param name="words">Word that where read</param>
+//        /// <param name="length">Amount of words that where read into <tt>words</tt></param>
+//        /// <returns></returns>
+//        private static Hashtable MakeWordTable( String[] words, int length ) 
+//        {
+//            Hashtable table = new Hashtable( length );
+//            for ( int i = 0; i < length; i++ ) 
+//            {
+//                table.Add(words[i], words[i]);
+//            }
+//            return table;
+//        }
+//    }
+//}

Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/core/Analysis/TeeSinkTokenFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/core/Analysis/TeeSinkTokenFilter.cs?rev=1204353&r1=1204352&r2=1204353&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/core/Analysis/TeeSinkTokenFilter.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/core/Analysis/TeeSinkTokenFilter.cs Mon Nov 21 04:44:55 2011
@@ -182,7 +182,7 @@ namespace Lucene.Net.Analysis
 			/// <summary> Called by <see cref="SinkTokenStream.Reset()" />. This method does nothing by default
 			/// and can optionally be overridden.
 			/// </summary>
-			public void  Reset()
+			public virtual void Reset()
 			{
 				// nothing to do; can be overridden
 			}



Mime
View raw message