lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nightowl...@apache.org
Subject [24/62] [abbrv] lucenenet git commit: Deleted obsolete Contrib folder
Date Sat, 01 Apr 2017 01:09:17 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/Shingle/ShingleMatrixFilter.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Shingle/ShingleMatrixFilter.cs b/src/contrib/Analyzers/Shingle/ShingleMatrixFilter.cs
deleted file mode 100644
index f9130e8..0000000
--- a/src/contrib/Analyzers/Shingle/ShingleMatrixFilter.cs
+++ /dev/null
@@ -1,638 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using Lucene.Net.Analysis.Miscellaneous;
-using Lucene.Net.Analysis.Shingle.Codec;
-using Lucene.Net.Analysis.Shingle.Matrix;
-using Lucene.Net.Analysis.Tokenattributes;
-using Lucene.Net.Support;
-
-namespace Lucene.Net.Analysis.Shingle
-{
-    /// <summary>
-    /// <p>A ShingleMatrixFilter constructs shingles (token n-grams) from a token stream.
-    /// In other words, it creates combinations of tokens as a single token.</p>
-    ///
-    /// <p>For example, the sentence "please divide this sentence into shingles"
-    /// might be tokenized into shingles "please divide", "divide this",
-    /// "this sentence", "sentence into", and "into shingles".</p>
-    ///
-    /// <p>Using a shingle filter at index and query time can in some instances
-    /// be used to replace phrase queries, especially them with 0 slop.</p>
-    ///
-    /// <p>Without a spacer character
-    /// it can be used to handle composition and decomposition of words
-    /// such as searching for "multi dimensional" instead of "multidimensional".
-    /// It is a rather common human problem at query time
-    /// in several languages, notably the northern Germanic branch.</p>
-    ///
-    /// <p>Shingles are amongst many things also known to solve problems
-    /// in spell checking, language detection and document clustering.</p>
-    ///
-    /// <p>This filter is backed by a three dimensional column oriented matrix
-    /// used to create permutations of the second dimension, the rows,
-    /// and leaves the third, the z-axis, for for multi token synonyms.</p>
-    ///
-    /// <p>In order to use this filter you need to define a way of positioning
-    /// the input stream tokens in the matrix. This is done using a
-    /// ShingleMatrixFilter.TokenSettingsCodec.
-    /// There are three simple implementations for demonstrational purposes,
-    /// see ShingleMatrixFilter.OneDimensionalNonWeightedTokenSettingsCodec,
-    /// ShingleMatrixFilter.TwoDimensionalNonWeightedSynonymTokenSettingsCodec
-    /// and ShingleMatrixFilter.SimpleThreeDimensionalTokenSettingsCodec.</p>
-    ///
-    /// <p>Consider this token matrix:</p>
-    /// <pre>
-    ///  Token[column][row][z-axis]{
-    ///    {{hello}, {greetings, and, salutations}},
-    ///    {{world}, {earth}, {tellus}}
-    ///  };
-    /// </pre>
-    ///
-    /// It would produce the following 2-3 gram sized shingles:
-    ///
-    /// <pre>
-    /// "hello_world"
-    /// "greetings_and"
-    /// "greetings_and_salutations"
-    /// "and_salutations"
-    /// "and_salutations_world"
-    /// "salutations_world"
-    /// "hello_earth"
-    /// "and_salutations_earth"
-    /// "salutations_earth"
-    /// "hello_tellus"
-    /// "and_salutations_tellus"
-    /// "salutations_tellus"
-    ///  </pre>
-    ///
-    /// <p>This implementation can be rather heap demanding
-    /// if (maximum shingle size - minimum shingle size) is a great number and the stream contains many columns,
-    /// or if each column contains a great number of rows.</p>
-    ///
-    /// <p>The problem is that in order avoid producing duplicates
-    /// the filter needs to keep track of any shingle already produced and returned to the consumer.</p>
-    ///
-    /// <p>There is a bit of resource management to handle this
-    /// but it would of course be much better if the filter was written
-    /// so it never created the same shingle more than once in the first place.</p>
-    ///
-    /// <p>The filter also has basic support for calculating weights for the shingles
-    /// based on the weights of the tokens from the input stream, output shingle size, etc.
-    /// See CalculateShingleWeight.
-    /// <p/>
-    /// <b>NOTE:</b> This filter might not behave correctly if used with custom Attributes, i.e. Attributes other than
-    /// the ones located in org.apache.lucene.analysis.tokenattributes.</p> 
-    /// </summary>
-    public sealed class ShingleMatrixFilter : TokenStream
-    {
-        public static Char DefaultSpacerCharacter = '_';
-        public static TokenSettingsCodec DefaultSettingsCodec = new OneDimensionalNonWeightedTokenSettingsCodec();
-        public static bool IgnoringSinglePrefixOrSuffixShingleByDefault;
-
-        private readonly IFlagsAttribute _flagsAtt;
-        private readonly IFlagsAttribute _inFlagsAtt;
-
-        private readonly IOffsetAttribute _inOffsetAtt;
-        private readonly IPayloadAttribute _inPayloadAtt;
-        private readonly IPositionIncrementAttribute _inPosIncrAtt;
-        private readonly ITermAttribute _inTermAtt;
-        private readonly ITypeAttribute _inTypeAtt;
-        private readonly TokenStream _input;
-        private readonly IOffsetAttribute _offsetAtt;
-        private readonly IPayloadAttribute _payloadAtt;
-        private readonly IPositionIncrementAttribute _posIncrAtt;
-        private readonly Token _requestNextToken = new Token();
-        private readonly Token _reusableToken = new Token();
-        private readonly TokenSettingsCodec _settingsCodec;
-
-        /// <summary>
-        /// A set containing shingles that has been the result of a call to Next(Token),
-        /// used to avoid producing the same shingle more than once.
-        /// 
-        /// <p>
-        /// NOTE: The Java List implementation uses a different equality comparison scheme
-        /// than .NET's Generic List. So We have to use a custom IEqualityComparer implementation 
-        /// to get the same behaviour.
-        /// </p>
-        /// </summary>
-        private readonly HashSet<EquatableList<Token>> _shinglesSeen =
-            new HashSet<EquatableList<Token>>(); 
-
-        private readonly ITermAttribute _termAtt;
-        private readonly ITypeAttribute _typeAtt;
-        private List<Token> _currentPermuationTokens;
-
-        // Index to what row a token in currentShingleTokens represents
-        private List<Row> _currentPermutationRows;
-
-        private int _currentPermutationTokensStartOffset;
-        private int _currentShingleLength;
-        private MatrixPermutationIterator _permutations;
-        private Token _readColumnBuf;
-
-
-        /// <summary>
-        /// Creates a shingle filter based on a user defined matrix.
-        /// 
-        /// The filter /will/ delete columns from the input matrix! You will not be able to reset the filter if you used this constructor.
-        /// todo: don't touch the matrix! use a bool, set the input stream to null or something, and keep track of where in the matrix we are at.
-        /// 
-        /// </summary>
-        /// <param name="matrix">the input based for creating shingles. Does not need to contain any information until ShingleMatrixFilter.IncrementToken() is called the first time.</param>
-        /// <param name="minimumShingleSize">minimum number of tokens in any shingle.</param>
-        /// <param name="maximumShingleSize">maximum number of tokens in any shingle.</param>
-        /// <param name="spacerCharacter">character to use between texts of the token parts in a shingle. null for none.</param>
-        /// <param name="ignoringSinglePrefixOrSuffixShingle">if true, shingles that only contains permutation of the first of the last column will not be produced as shingles. Useful when adding boundary marker tokens such as '^' and '$'.</param>
-        /// <param name="settingsCodec">codec used to read input token weight and matrix positioning.</param>
-        public ShingleMatrixFilter(Matrix.Matrix matrix, int minimumShingleSize, int maximumShingleSize, Char spacerCharacter, bool ignoringSinglePrefixOrSuffixShingle, TokenSettingsCodec settingsCodec)
-        {
-            Matrix = matrix;
-            MinimumShingleSize = minimumShingleSize;
-            MaximumShingleSize = maximumShingleSize;
-            SpacerCharacter = spacerCharacter;
-            IsIgnoringSinglePrefixOrSuffixShingle = ignoringSinglePrefixOrSuffixShingle;
-            _settingsCodec = settingsCodec;
-
-            // ReSharper disable DoNotCallOverridableMethodsInConstructor
-            _termAtt = AddAttribute<ITermAttribute>();
-            _posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
-            _payloadAtt = AddAttribute<IPayloadAttribute>();
-            _offsetAtt = AddAttribute<IOffsetAttribute>();
-            _typeAtt = AddAttribute<ITypeAttribute>();
-            _flagsAtt = AddAttribute<IFlagsAttribute>();
-            // ReSharper restore DoNotCallOverridableMethodsInConstructor
-
-            // set the input to be an empty token stream, we already have the data.
-            _input = new EmptyTokenStream();
-
-            _inTermAtt = _input.AddAttribute<ITermAttribute>();
-            _inPosIncrAtt = _input.AddAttribute<IPositionIncrementAttribute>();
-            _inPayloadAtt = _input.AddAttribute<IPayloadAttribute>();
-            _inOffsetAtt = _input.AddAttribute<IOffsetAttribute>();
-            _inTypeAtt = _input.AddAttribute<ITypeAttribute>();
-            _inFlagsAtt = _input.AddAttribute<IFlagsAttribute>();
-        }
-
-        /// <summary>
-        /// Creates a shingle filter using default settings.
-        /// 
-        /// See ShingleMatrixFilter.DefaultSpacerCharacter, 
-        /// ShingleMatrixFilter.IgnoringSinglePrefixOrSuffixShingleByDefault, 
-        /// and ShingleMatrixFilter.DefaultSettingsCodec
-        /// </summary>
-        /// <param name="input">stream from which to construct the matrix</param>
-        /// <param name="minimumShingleSize">minimum number of tokens in any shingle.</param>
-        /// <param name="maximumShingleSize">maximum number of tokens in any shingle.</param>
-        public ShingleMatrixFilter(TokenStream input, int minimumShingleSize, int maximumShingleSize)
-            : this(input, minimumShingleSize, maximumShingleSize, DefaultSpacerCharacter) { }
-
-        /// <summary>
-        /// Creates a shingle filter using default settings.
-        /// 
-        /// See IgnoringSinglePrefixOrSuffixShingleByDefault, and DefaultSettingsCodec
-        /// </summary>
-        /// <param name="input">stream from which to construct the matrix</param>
-        /// <param name="minimumShingleSize">minimum number of tokens in any shingle.</param>
-        /// <param name="maximumShingleSize">maximum number of tokens in any shingle.</param>
-        /// <param name="spacerCharacter">character to use between texts of the token parts in a shingle. null for none. </param>
-        public ShingleMatrixFilter(TokenStream input, int minimumShingleSize, int maximumShingleSize, Char? spacerCharacter)
-            : this( input, minimumShingleSize, maximumShingleSize, spacerCharacter, IgnoringSinglePrefixOrSuffixShingleByDefault) { }
-
-        /// <summary>
-        /// Creates a shingle filter using the default <see cref="TokenSettingsCodec"/>.
-        /// 
-        /// See DefaultSettingsCodec
-        /// </summary>
-        /// <param name="input">stream from which to construct the matrix</param>
-        /// <param name="minimumShingleSize">minimum number of tokens in any shingle.</param>
-        /// <param name="maximumShingleSize">maximum number of tokens in any shingle.</param>
-        /// <param name="spacerCharacter">character to use between texts of the token parts in a shingle. null for none.</param>
-        /// <param name="ignoringSinglePrefixOrSuffixShingle">if true, shingles that only contains permutation of the first of the last column will not be produced as shingles. Useful when adding boundary marker tokens such as '^' and '$'.</param>
-        public ShingleMatrixFilter(TokenStream input, int minimumShingleSize, int maximumShingleSize, Char? spacerCharacter, bool ignoringSinglePrefixOrSuffixShingle)
-            : this(input, minimumShingleSize, maximumShingleSize, spacerCharacter, ignoringSinglePrefixOrSuffixShingle, DefaultSettingsCodec) { }
-
-        /// <summary>
-        /// Creates a shingle filter with ad hoc parameter settings.
-        /// </summary>
-        /// <param name="input">stream from which to construct the matrix</param>
-        /// <param name="minimumShingleSize">minimum number of tokens in any shingle.</param>
-        /// <param name="maximumShingleSize">maximum number of tokens in any shingle.</param>
-        /// <param name="spacerCharacter">character to use between texts of the token parts in a shingle. null for none.</param>
-        /// <param name="ignoringSinglePrefixOrSuffixShingle">if true, shingles that only contains permutation of the first of the last column will not be produced as shingles. Useful when adding boundary marker tokens such as '^' and '$'.</param>
-        /// <param name="settingsCodec">codec used to read input token weight and matrix positioning.</param>
-        public ShingleMatrixFilter(TokenStream input, int minimumShingleSize, int maximumShingleSize, Char? spacerCharacter, bool ignoringSinglePrefixOrSuffixShingle, TokenSettingsCodec settingsCodec)
-        {
-            _input = input;
-            MinimumShingleSize = minimumShingleSize;
-            MaximumShingleSize = maximumShingleSize;
-            SpacerCharacter = spacerCharacter;
-            IsIgnoringSinglePrefixOrSuffixShingle = ignoringSinglePrefixOrSuffixShingle;
-            _settingsCodec = settingsCodec;
-
-            // ReSharper disable DoNotCallOverridableMethodsInConstructor
-            _termAtt = AddAttribute<ITermAttribute>();
-            _posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
-            _payloadAtt = AddAttribute<IPayloadAttribute>();
-            _offsetAtt = AddAttribute<IOffsetAttribute>();
-            _typeAtt = AddAttribute<ITypeAttribute>();
-            _flagsAtt = AddAttribute<IFlagsAttribute>();
-            // ReSharper restore DoNotCallOverridableMethodsInConstructor
-
-            _inTermAtt = input.AddAttribute<ITermAttribute>();
-            _inPosIncrAtt = input.AddAttribute<IPositionIncrementAttribute>();
-            _inPayloadAtt = input.AddAttribute<IPayloadAttribute>();
-            _inOffsetAtt = input.AddAttribute<IOffsetAttribute>();
-            _inTypeAtt = input.AddAttribute<ITypeAttribute>();
-            _inFlagsAtt = input.AddAttribute<IFlagsAttribute>();
-        }
-
-        public int MinimumShingleSize { get; set; }
-
-        public int MaximumShingleSize { get; set; }
-
-        public Matrix.Matrix Matrix { get; set; }
-
-        public Char? SpacerCharacter { get; set; }
-
-        public bool IsIgnoringSinglePrefixOrSuffixShingle { get; set; }
-
-        public override void Reset()
-        {
-            _permutations = null;
-            _shinglesSeen.Clear();
-            _input.Reset();
-        }
-
-        protected override void Dispose(bool disposing)
-        {
-            // Do nothing
-        }
-
-        public override sealed bool IncrementToken()
-        {
-            if (Matrix == null)
-            {
-                Matrix = new Matrix.Matrix();
-
-                // fill matrix with maximumShingleSize columns
-                while (Matrix.Columns.Count < MaximumShingleSize && ReadColumn())
-                {
-                    // this loop looks ugly
-                }
-            }
-
-            // This loop exists in order to avoid recursive calls to the next method
-            // as the complexity of a large matrix
-            // then would require a multi gigabyte sized stack.
-            Token token;
-            do
-            {
-                token = ProduceNextToken(_reusableToken);
-            } while (token == _requestNextToken);
-            
-            if (token == null) 
-                return false;
-
-            ClearAttributes();
-
-            _termAtt.SetTermBuffer(token.TermBuffer(), 0, token.TermLength());
-            _posIncrAtt.PositionIncrement = token.PositionIncrement;
-            _flagsAtt.Flags = token.Flags;
-            _offsetAtt.SetOffset(token.StartOffset, token.EndOffset);
-            _typeAtt.Type = token.Type;
-            _payloadAtt.Payload = token.Payload;
-
-            return true;
-        }
-
-        private Token GetNextInputToken(Token token)
-        {
-            if (!_input.IncrementToken()) return null;
-
-            token.SetTermBuffer(_inTermAtt.TermBuffer(), 0, _inTermAtt.TermLength());
-            token.PositionIncrement = _inPosIncrAtt.PositionIncrement;
-            token.Flags = _inFlagsAtt.Flags;
-            token.SetOffset(_inOffsetAtt.StartOffset, _inOffsetAtt.EndOffset);
-            token.Type = _inTypeAtt.Type;
-            token.Payload = _inPayloadAtt.Payload;
-            return token;
-        }
-
-        private Token GetNextToken(Token token)
-        {
-            if (!this.IncrementToken()) return null;
-            token.SetTermBuffer(_termAtt.TermBuffer(), 0, _termAtt.TermLength());
-            token.PositionIncrement = _posIncrAtt.PositionIncrement;
-            token.Flags = _flagsAtt.Flags;
-            token.SetOffset(_offsetAtt.StartOffset, _offsetAtt.EndOffset);
-            token.Type = _typeAtt.Type;
-            token.Payload = _payloadAtt.Payload;
-            return token;
-        }
-
-        /// <summary>
-        /// This method exists in order to avoid recursive calls to the method
-        /// as the complexity of a fairly small matrix then easily would require
-        /// a gigabyte sized stack per thread.
-        /// </summary>
-        /// <param name="reusableToken"></param>
-        /// <returns>null if exhausted, instance request_next_token if one more call is required for an answer, 
-        /// or instance parameter resuableToken.</returns>
-        private Token ProduceNextToken(Token reusableToken)
-        {
-            if (_currentPermuationTokens != null)
-            {
-                _currentShingleLength++;
-
-                if (_currentShingleLength + _currentPermutationTokensStartOffset <= _currentPermuationTokens.Count
-                    && _currentShingleLength <= MaximumShingleSize)
-                {
-                    // it is possible to create at least one more shingle of the current matrix permutation
-
-                    if (IsIgnoringSinglePrefixOrSuffixShingle && 
-                        _currentShingleLength == 1 && 
-                        (_currentPermutationRows[_currentPermutationTokensStartOffset].Column.IsFirst || _currentPermutationRows[_currentPermutationTokensStartOffset].Column.IsLast))
-                    {
-                        return GetNextToken(reusableToken);
-                    }
-
-                    var termLength = 0;
-
-                    var shingle = new EquatableList<Token>();
-
-                    for (int i = 0; i < _currentShingleLength; i++)
-                    {
-                        var shingleToken = _currentPermuationTokens[i + _currentPermutationTokensStartOffset];
-                        termLength += shingleToken.TermLength();
-                        shingle.Add(shingleToken);
-                    }
-                    if (SpacerCharacter != null)
-                        termLength += _currentShingleLength - 1;
-
-                    // only produce shingles that not already has been created
-                    if (!_shinglesSeen.Add(shingle))
-                        return _requestNextToken;
-
-                    // shingle token factory
-                    var sb = new StringBuilder(termLength + 10); // paranormal ability to foresee the future. ;)
-                    foreach (var shingleToken in shingle)
-                    {
-                        if (SpacerCharacter != null &&  sb.Length > 0)
-                            sb.Append(SpacerCharacter);
-
-                        sb.Append(shingleToken.TermBuffer(), 0, shingleToken.TermLength());
-                    }
-
-                    reusableToken.SetTermBuffer(sb.ToString());
-                    UpdateToken(reusableToken, shingle, _currentPermutationTokensStartOffset, _currentPermutationRows,
-                                _currentPermuationTokens);
-
-                    return reusableToken;
-                }
-
-                // it is NOT possible to create one more shingles of the current matrix permutation
-                if (_currentPermutationTokensStartOffset < _currentPermuationTokens.Count - 1)
-                {
-                    // reset shingle size and move one step to the right in the current tokens permutation
-                    _currentPermutationTokensStartOffset++;
-                    _currentShingleLength = MinimumShingleSize - 1;
-                    return _requestNextToken;
-                }
-
-
-                // todo does this ever occur?
-                if (_permutations == null)
-                    return null;
-
-                if (!_permutations.HasNext())
-                {
-                    // load more data (if available) to the matrix
-
-                    // don't really care, we just read it.
-                    if (_input != null)
-                        ReadColumn();
-
-                    // get rid of resources
-
-                    // delete the first column in the matrix
-                    var deletedColumn = Matrix.Columns[0];
-                    Matrix.Columns.RemoveAt(0);
-
-                    // remove all shingles seen that include any of the tokens from the deleted column.
-                    var deletedColumnTokens = deletedColumn.Rows.SelectMany(row => row.Tokens).ToList();
-                    
-                    // I'm a little concerned about this part of the code, because the unit tests currently 
-                    // don't cover this scenario. (I put a break point here, and ran the unit tests in debug mode 
-                    // and this code block was never hit... I also changed it significatly from the Java version
-                    // to use RemoveWhere and LINQ. 
-                    //
-                    // TODO: Write a unit test to cover this and make sure this is a good port! -thoward
-
-                    // linq version
-                    _shinglesSeen.RemoveWhere(
-                        shingle => (shingle.Find(deletedColumnTokens.Contains) != default(Token)));
-
-                    //// initial conversion
-                    //var shinglesSeenIterator = _shinglesSeen.ToList();
-                    //foreach (var shingle in shinglesSeenIterator)
-                    //{
-                    //    foreach (var deletedColumnToken in deletedColumnTokens)
-                    //    {
-                    //        if (shingle.Contains(deletedColumnToken))
-                    //        {
-                    //            _shinglesSeen.Remove(shingle);
-                    //            break;
-                    //        }
-                    //    }
-                    //}
-
-                    // exhausted
-                    if (Matrix.Columns.Count < MinimumShingleSize)
-                        return null;
-
-                    // create permutations of the matrix it now looks
-                    _permutations = Matrix.PermutationIterator();
-                }
-
-                NextTokensPermutation();
-                return _requestNextToken;
-            }
-
-            if (_permutations == null)
-                _permutations = Matrix.PermutationIterator();
-
-            if (!_permutations.HasNext())
-                return null;
-
-            NextTokensPermutation();
-
-            return _requestNextToken;
-        }
-
-        /// <summary>
-        /// Get next permutation of row combinations,
-        /// creates list of all tokens in the row and
-        /// an index from each such token to what row they exist in.
-        /// finally resets the current (next) shingle size and offset. 
-        /// </summary>
-        private void NextTokensPermutation()
-        {
-            var rowsPermutation = _permutations.Next();
-            var currentPermutationRows = new List<Row>();
-            var currentPermuationTokens = new List<Token>();
-
-            foreach (var row in rowsPermutation)
-            {
-                foreach (var token in row.Tokens)
-                {
-                    currentPermuationTokens.Add(token);
-                    currentPermutationRows.Add(row);
-                }
-            }
-            _currentPermuationTokens = currentPermuationTokens;
-            _currentPermutationRows = currentPermutationRows;
-
-            _currentPermutationTokensStartOffset = 0;
-            _currentShingleLength = MinimumShingleSize - 1;
-        }
-
-        /// <summary>
-        /// Final touch of a shingle token before it is passed on to the consumer from method <see cref="IncrementToken()"/>.
-        /// 
-        /// Calculates and sets type, flags, position increment, start/end offsets and weight.
-        /// </summary>
-        /// <param name="token">Shingle Token</param>
-        /// <param name="shingle">Tokens used to produce the shingle token.</param>
-        /// <param name="currentPermutationStartOffset">Start offset in parameter currentPermutationTokens</param>
-        /// <param name="currentPermutationRows">index to Matrix.Column.Row from the position of tokens in parameter currentPermutationTokens</param>
-        /// <param name="currentPermuationTokens">tokens of the current permutation of rows in the matrix. </param>
-        public void UpdateToken(Token token, List<Token> shingle, int currentPermutationStartOffset, List<Row> currentPermutationRows, List<Token> currentPermuationTokens)
-        {
-            token.Type = typeof(ShingleMatrixFilter).Name;
-            token.Flags = 0;
-            token.PositionIncrement = 1;
-            token.StartOffset = (shingle[0]).StartOffset;
-            token.EndOffset = shingle[shingle.Count - 1].EndOffset;
-
-            _settingsCodec.SetWeight(
-                token, 
-                CalculateShingleWeight(token, shingle, currentPermutationStartOffset, currentPermutationRows, currentPermuationTokens)
-                );
-        }
-
-        /// <summary>
-        /// Evaluates the new shingle token weight.
-        /// 
-        /// for (shingle part token in shingle)
-        /// weight +=  shingle part token weight * (1 / sqrt(all shingle part token weights summed))
-        /// 
-        /// This algorithm gives a slightly greater score for longer shingles
-        /// and is rather penalising to great shingle token part weights.
-        /// </summary>
-        /// <param name="shingleToken">token returned to consumer</param>
-        /// <param name="shingle">tokens the tokens used to produce the shingle token.</param>
-        /// <param name="currentPermutationStartOffset">start offset in parameter currentPermutationRows and currentPermutationTokens.</param>
-        /// <param name="currentPermutationRows">an index to what matrix row a token in parameter currentPermutationTokens exist.</param>
-        /// <param name="currentPermuationTokens">all tokens in the current row permutation of the matrix. A sub list (parameter offset, parameter shingle.size) equals parameter shingle.</param>
-        /// <returns>weight to be set for parameter shingleToken </returns>
-        public float CalculateShingleWeight(Token shingleToken, List<Token> shingle, int currentPermutationStartOffset, List<Row> currentPermutationRows, List<Token> currentPermuationTokens)
-        {
-            var weights = new double[shingle.Count];
-
-            double total = 0f;
-            double top = 0d;
-
-            for (int i = 0; i < weights.Length; i++)
-            {
-                weights[i] = _settingsCodec.GetWeight(shingle[i]);
-
-                double tmp = weights[i];
-
-                if (tmp > top)
-                    top = tmp;
-
-                total += tmp;
-            }
-
-            double factor = 1d/Math.Sqrt(total);
-
-            double weight = weights.Sum(partWeight => partWeight*factor);
-
-            return (float) weight;
-        }
-
-        /// <summary>
-        /// Loads one column from the token stream.
-        /// 
-        /// When the last token is read from the token stream it will column.setLast(true);
-        /// </summary>
-        /// <returns>true if it manage to read one more column from the input token stream</returns>
-        private bool ReadColumn()
-        {
-            Token token;
-
-            if (_readColumnBuf != null)
-            {
-                token = _readColumnBuf;
-                _readColumnBuf = null;
-            }
-            else
-            {
-                token = GetNextInputToken(new Token());
-            }
-
-            if (token == null)
-                return false;
-
-            var currentReaderColumn = new Column(Matrix);
-            var currentReaderRow = new Row(currentReaderColumn);
-
-            currentReaderRow.Tokens.AddLast(token);
-
-            TokenPositioner tokenPositioner;
-            while ((_readColumnBuf = GetNextInputToken(new Token())) != null &&
-                   (tokenPositioner = _settingsCodec.GetTokenPositioner(_readColumnBuf)) != TokenPositioner.NewColumn)
-            {
-                if (tokenPositioner == TokenPositioner.SameRow)
-                {
-                    currentReaderRow.Tokens.AddLast(_readColumnBuf);
-                }
-                else
-                {
-                    currentReaderRow = new Row(currentReaderColumn);
-                    currentReaderRow.Tokens.AddLast(_readColumnBuf);
-                }
-                _readColumnBuf = null;
-            }
-
-            if (_readColumnBuf == null)
-            {
-                _readColumnBuf = GetNextInputToken(new Token());
-
-                if (_readColumnBuf == null)
-                    currentReaderColumn.IsLast = true;
-            }
-
-            return true;
-        }
-    }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/Shingle/TokenPositioner.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Shingle/TokenPositioner.cs b/src/contrib/Analyzers/Shingle/TokenPositioner.cs
deleted file mode 100644
index 9146888..0000000
--- a/src/contrib/Analyzers/Shingle/TokenPositioner.cs
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-namespace Lucene.Net.Analysis.Shingle
-{
-    public class TokenPositioner
-    {
-        public static readonly TokenPositioner NewColumn = new TokenPositioner(0);
-        public static readonly TokenPositioner NewRow = new TokenPositioner(1);
-        public static readonly TokenPositioner SameRow = new TokenPositioner(2);
-
-        private TokenPositioner(int index)
-        {
-            Index = index;
-        }
-
-        public int Index { get; private set; }
-    }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/Sinks/DateRecognizerSinkFilter.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Sinks/DateRecognizerSinkFilter.cs b/src/contrib/Analyzers/Sinks/DateRecognizerSinkFilter.cs
deleted file mode 100644
index b0969fc..0000000
--- a/src/contrib/Analyzers/Sinks/DateRecognizerSinkFilter.cs
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
-*/
-
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using Lucene.Net.Analysis.Tokenattributes;
-using Lucene.Net.Util;
-
-namespace Lucene.Net.Analysis.Sinks
-{
-    /*
-  * Attempts to parse the {@link org.apache.lucene.analysis.Token#termBuffer()} as a Date using a <see cref="System.IFormatProvider"/>.
-  * If the value is a Date, it will add it to the sink.
-  * <p/> 
-  *
-  **/
-    public class DateRecognizerSinkFilter : TeeSinkTokenFilter.SinkFilter
-    {
-        public const string DATE_TYPE = "date";
-
-        protected IFormatProvider dateFormat;
-        protected ITermAttribute termAtt;
-
-        /*
-         * Uses <see cref="System.Globalization.CultureInfo.CurrentCulture.DateTimeFormatInfo"/> as the <see cref="IFormatProvider"/> object.
-         */
-        public DateRecognizerSinkFilter()
-            : this(System.Globalization.CultureInfo.CurrentCulture)
-        {
-
-        }
-
-        public DateRecognizerSinkFilter(IFormatProvider dateFormat)
-        {
-            this.dateFormat = dateFormat;
-        }
-
-        public override bool Accept(AttributeSource source)
-        {
-            if (termAtt == null)
-            {
-                termAtt = source.AddAttribute<ITermAttribute>();
-            }
-            try
-            {
-                DateTime date = DateTime.Parse(termAtt.Term, dateFormat);//We don't care about the date, just that we can parse it as a date
-                if (date != null)
-                {
-                    return true;
-                }
-            }
-            catch (FormatException)
-            {
-
-            }
-
-            return false;
-        }
-
-    }
-}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/Sinks/TokenRangeSinkFilter.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Sinks/TokenRangeSinkFilter.cs b/src/contrib/Analyzers/Sinks/TokenRangeSinkFilter.cs
deleted file mode 100644
index 9d3b8e4..0000000
--- a/src/contrib/Analyzers/Sinks/TokenRangeSinkFilter.cs
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
-*/
-
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using Lucene.Net.Util;
-
-namespace Lucene.Net.Analysis.Sinks
-{
-    public class TokenRangeSinkFilter : TeeSinkTokenFilter.SinkFilter
-    {
-        private int lower;
-        private int upper;
-        private int count;
-
-        public TokenRangeSinkFilter(int lower, int upper)
-        {
-            this.lower = lower;
-            this.upper = upper;
-        }
-
-        public override bool Accept(AttributeSource source)
-        {
-            try
-            {
-                if (count >= lower && count < upper)
-                {
-                    return true;
-                }
-                return false;
-            }
-            finally
-            {
-                count++;
-            }
-        }
-
-        public override void Reset()
-        {
-            count = 0;
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/Sinks/TokenTypeSinkFilter.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Sinks/TokenTypeSinkFilter.cs b/src/contrib/Analyzers/Sinks/TokenTypeSinkFilter.cs
deleted file mode 100644
index 7fc3785..0000000
--- a/src/contrib/Analyzers/Sinks/TokenTypeSinkFilter.cs
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
-*/
-
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using Lucene.Net.Analysis.Tokenattributes;
-using Lucene.Net.Util;
-
-namespace Lucene.Net.Analysis.Sinks
-{
-    public class TokenTypeSinkFilter : TeeSinkTokenFilter.SinkFilter
-    {
-        private string typeToMatch;
-        private ITypeAttribute typeAtt;
-
-        public TokenTypeSinkFilter(string typeToMatch)
-        {
-            this.typeToMatch = typeToMatch;
-        }
-
-        public override bool Accept(AttributeSource source)
-        {
-            if (typeAtt == null)
-            {
-                typeAtt = source.AddAttribute<ITypeAttribute>();
-            }
-
-            return typeToMatch.Equals(typeAtt.Type);
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/Th/ThaiAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Th/ThaiAnalyzer.cs b/src/contrib/Analyzers/Th/ThaiAnalyzer.cs
deleted file mode 100644
index 2948e2f..0000000
--- a/src/contrib/Analyzers/Th/ThaiAnalyzer.cs
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
-*/
-
-using System;
-using System.Collections.Generic;
-using System.IO;
-using System.Linq;
-using System.Text;
-using Lucene.Net.Analysis.Standard;
-using Version=Lucene.Net.Util.Version;
-
-namespace Lucene.Net.Analysis.Th
-{
-    /*
-     * {@link Analyzer} for Thai language. It uses {@link java.text.BreakIterator} to break words.
-     * @version 0.2
-     *
-     * <p><b>NOTE</b>: This class uses the same {@link Version}
-     * dependent settings as {@link StandardAnalyzer}.</p>
-     */
-    public class ThaiAnalyzer : Analyzer
-    {
-        private readonly Version matchVersion;
-
-        public ThaiAnalyzer(Version matchVersion)
-        {
-            SetOverridesTokenStreamMethod<ThaiAnalyzer>();
-            this.matchVersion = matchVersion;
-        }
-
-        public override TokenStream TokenStream(String fieldName, TextReader reader)
-        {
-            TokenStream ts = new StandardTokenizer(matchVersion, reader);
-            ts = new StandardFilter(ts);
-            ts = new ThaiWordFilter(ts);
-            ts = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
-                                ts, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
-            return ts;
-        }
-
-        private class SavedStreams
-        {
-            protected internal Tokenizer source;
-            protected internal TokenStream result;
-        };
-
-        public override TokenStream ReusableTokenStream(String fieldName, TextReader reader)
-        {
-            if (overridesTokenStreamMethod)
-            {
-                // LUCENE-1678: force fallback to tokenStream() if we
-                // have been subclassed and that subclass overrides
-                // tokenStream but not reusableTokenStream
-                return TokenStream(fieldName, reader);
-            }
-
-            SavedStreams streams = (SavedStreams)PreviousTokenStream;
-            if (streams == null)
-            {
-                streams = new SavedStreams();
-                streams.source = new StandardTokenizer(matchVersion, reader);
-                streams.result = new StandardFilter(streams.source);
-                streams.result = new ThaiWordFilter(streams.result);
-                streams.result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
-                                                streams.result, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
-                PreviousTokenStream = streams;
-            }
-            else
-            {
-                streams.source.Reset(reader);
-                streams.result.Reset(); // reset the ThaiWordFilter's state
-            }
-            return streams.result;
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/Th/ThaiWordFilter.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Th/ThaiWordFilter.cs b/src/contrib/Analyzers/Th/ThaiWordFilter.cs
deleted file mode 100644
index 7711bc2..0000000
--- a/src/contrib/Analyzers/Th/ThaiWordFilter.cs
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
-*/
-
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Text.RegularExpressions;
-using Lucene.Net.Analysis.Tokenattributes;
-
-namespace Lucene.Net.Analysis.Th
-{
-    /*
-     * {@link TokenFilter} that use {@link java.text.BreakIterator} to break each 
-     * Token that is Thai into separate Token(s) for each Thai word.
-     * <p>WARNING: this filter may not work correctly with all JREs.
-     * It is known to work with Sun/Oracle and Harmony JREs.
-     */
-    public sealed class ThaiWordFilter : TokenFilter
-    {
-        //private BreakIterator breaker = null;
-
-        private ITermAttribute termAtt;
-        private IOffsetAttribute offsetAtt;
-
-        private State thaiState = null;
-        // I'm sure this is far slower than if we just created a simple UnicodeBlock class
-        // considering this is used on a single char, we have to create a new string for it,
-        // via ToString(), so we can then run a costly(?) regex on it.  Yikes.
-        private Regex _isThaiRegex = new Regex(@"\p{IsThai}", RegexOptions.Compiled);
-
-        public ThaiWordFilter(TokenStream input)
-            : base(input)
-        {
-            throw new NotSupportedException("PORT ISSUES");
-            //breaker = BreakIterator.getWordInstance(new Locale("th"));
-            //termAtt = AddAttribute<TermAttribute>();
-            //offsetAtt = AddAttribute<OffsetAttribute>();
-        }
-
-        public sealed override bool IncrementToken()
-        {
-            //int end;
-            //if (thaiState != null)
-            //{
-            //    int start = breaker.Current();
-            //    end = breaker.next();
-            //    if (end != BreakIterator.DONE)
-            //    {
-            //        RestoreState(thaiState);
-            //        termAtt.SetTermBuffer(termAtt.TermBuffer(), start, end - start);
-            //        offsetAtt.SetOffset(offsetAtt.StartOffset() + start, offsetAtt.StartOffset() + end);
-            //        return true;
-            //    }
-            //    thaiState = null;
-            //}
-
-            //if (input.IncrementToken() == false || termAtt.TermLength() == 0)
-            //    return false;
-
-            //String text = termAtt.Term();
-            //if (!_isThaiRegex.Match(new string(new[]{text[0]})).Success)
-            //{
-            //    termAtt.SetTermBuffer(text.ToLower());
-            //    return true;
-            //}
-
-            //thaiState = CaptureState();
-
-            //breaker.SetText(text);
-            //end = breaker.next();
-            //if (end != BreakIterator.DONE)
-            //{
-            //    termAtt.SetTermBuffer(text, 0, end);
-            //    offsetAtt.SetOffset(offsetAtt.StartOffset(), offsetAtt.StartOffset() + end);
-            //    return true;
-            //}
-            return false;
-        }
-
-        public override void Reset()
-        {
-            base.Reset();
-            thaiState = null;
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/WordlistLoader.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/WordlistLoader.cs b/src/contrib/Analyzers/WordlistLoader.cs
deleted file mode 100644
index 16fdbae..0000000
--- a/src/contrib/Analyzers/WordlistLoader.cs
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*/
-
-//using System;
-//using System.IO;
-//using System.Collections;
-
-//namespace Lucene.Net.Analysis
-//{
-//    /// <summary>
-//    /// Loads a text file and adds every line as an entry to a Hashtable. Every line
-//    /// should contain only one word. If the file is not found or on any error, an
-//    /// empty table is returned.
-//    /// </summary>
-//    public class WordlistLoader
-//    {
-//        /// <summary>
-//        /// Load words table from the file
-//        /// </summary>
-//        /// <param name="path">Path to the wordlist</param>
-//        /// <param name="wordfile">Name of the wordlist</param>
-//        /// <returns></returns>
-//        public static Hashtable GetWordSet( String path, String wordfile ) 
-//        {
-//            if ( path == null || wordfile == null ) 
-//            {
-//                return new Hashtable();
-//            }
-//            return GetWordSet(new FileInfo(path + "\\" + wordfile));
-//        }
-
-//        /// <summary>
-//        /// Load words table from the file
-//        /// </summary>
-//        /// <param name="wordfile">Complete path to the wordlist</param>
-//        /// <returns></returns>
-//        public static Hashtable GetWordSet( String wordfile ) 
-//        {
-//            if ( wordfile == null ) 
-//            {
-//                return new Hashtable();
-//            }
-//            return GetWordSet( new FileInfo( wordfile ) );
-//        }
-
-//        /// <summary>
-//        /// Load words table from the file 
-//        /// </summary>
-//        /// <param name="wordfile">File containing the wordlist</param>
-//        /// <returns></returns>
-//        public static Hashtable GetWordSet( FileInfo wordfile ) 
-//        {
-//            if ( wordfile == null ) 
-//            {
-//                return new Hashtable();
-//            }            
-//            StreamReader lnr = new StreamReader(wordfile.FullName);
-//            return GetWordSet(lnr);
-//        }
-
-//        /// <summary>
-//        /// Reads lines from a Reader and adds every line as an entry to a HashSet (omitting
-//        /// leading and trailing whitespace). Every line of the Reader should contain only
-//        /// one word. The words need to be in lowercase if you make use of an
-//        /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
-//        /// </summary>
-//        /// <param name="reader">Reader containing the wordlist</param>
-//        /// <returns>A Hashtable with the reader's words</returns>
-//        public static Hashtable GetWordSet(TextReader reader)
-//        {
-//            Hashtable result = new Hashtable();            
-//            try 
-//            {                
-//                ArrayList stopWords = new ArrayList();
-//                String word = null;
-//                while ( ( word = reader.ReadLine() ) != null ) 
-//                {
-//                    stopWords.Add(word.Trim());
-//                }
-//                result = MakeWordTable( (String[])stopWords.ToArray(typeof(string)), stopWords.Count);
-//            }
-//                // On error, use an empty table
-//            catch (IOException) 
-//            {
-//                result = new Hashtable();
-//            }
-//            return result;
-//        }
-
-
-//        /// <summary>
-//        /// Builds the wordlist table.
-//        /// </summary>
-//        /// <param name="words">Word that where read</param>
-//        /// <param name="length">Amount of words that where read into <tt>words</tt></param>
-//        /// <returns></returns>
-//        private static Hashtable MakeWordTable( String[] words, int length ) 
-//        {
-//            Hashtable table = new Hashtable( length );
-//            for ( int i = 0; i < length; i++ ) 
-//            {
-//                table.Add(words[i], words[i]);
-//            }
-//            return table;
-//        }
-//    }
-//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Core/Analysis/Ext/Analysis.Ext.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Core/Analysis/Ext/Analysis.Ext.cs b/src/contrib/Core/Analysis/Ext/Analysis.Ext.cs
deleted file mode 100644
index beec3fd..0000000
--- a/src/contrib/Core/Analysis/Ext/Analysis.Ext.cs
+++ /dev/null
@@ -1,166 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-using System;
-using System.Collections;
-using System.Collections.Generic;
-using System.Text;
-using System.IO;
-
-using Lucene.Net.Analysis;
-using Lucene.Net.Analysis.Tokenattributes;
-using Lucene.Net.Util;
-
-
-namespace Lucene.Net.Analysis.Ext
-{
-    /// <summary>
-    /// This analyzer targets short fields where *word* like searches are required.
-    /// [SomeUser@GMAIL.com 1234567890] will be tokenized as
-    /// [s.o.m.e.u.s.e.r..g.m.a.i.l..com..1.2.3.4.5.6.7.8.9.0] (read .'s as blank)
-    /// 
-    /// Usage: 
-    /// QueryParser p = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "fieldName", new SingleCharTokenAnalyzer());
-    /// p.SetDefaultOperator(QueryParser.Operator.AND);
-    /// p.SetEnablePositionIncrements(true);
-    /// 
-    /// TopDocs td = src.Search(p.Parse("678"), 10);
-    /// or
-    /// TopDocs td = src.Search(p.Parse("\"gmail.com 1234\""), 10);
-    /// </summary>
-    public class SingleCharTokenAnalyzer : Analyzer
-    {
-        /// <summary>
-        /// </summary>
-        public override TokenStream TokenStream(string fieldName, TextReader reader)
-        {
-            TokenStream t = null;
-            t = new LetterOrDigitTokenizer(reader);
-            t = new LowerCaseFilter(t);
-            t = new ASCIIFoldingFilter(t);
-            t = new SingleCharTokenizer(t);
-
-            return t;
-        }
-                
-        class SingleCharTokenizer : Tokenizer
-        {
-            TokenStream _input = null;
-
-            ITermAttribute _termAttribute = null;
-            IOffsetAttribute _offsetAttribute = null;
-            IPositionIncrementAttribute _positionIncrementAttribute = null;
-
-            char[] _buffer = null;
-            int _offset = -1;
-            int _length = -1;
-            int _offsetInStream = -1;
-
-            public SingleCharTokenizer(TokenStream input): base(input)
-            {
-                _input = input;
-                _termAttribute = AddAttribute<ITermAttribute>();
-                _offsetAttribute = AddAttribute<IOffsetAttribute>();
-                _positionIncrementAttribute = AddAttribute<IPositionIncrementAttribute>();
-            }
-
-            public override bool IncrementToken()
-            {
-                int positionIncrement = 0;
-                if (_buffer == null || _offset >= _length)
-                {
-                    if (!_input.IncrementToken()) return false;
-
-                    _offset = 0;
-                    _buffer = _termAttribute.TermBuffer();
-                    _length = _termAttribute.TermLength();
-                    positionIncrement++;
-                    _offsetInStream++;
-                }
-
-                _offsetAttribute.SetOffset(_offsetInStream, _offsetInStream + 1);
-                _offsetInStream++;
-
-                positionIncrement++;
-                _positionIncrementAttribute.PositionIncrement = positionIncrement;
-
-                _termAttribute.SetTermLength(1);
-                _termAttribute.SetTermBuffer(_buffer[_offset++].ToString());
-
-                return true;
-            }
-
-            public override void Reset()
-            {
-                _buffer = null;
-                _offset = -1;
-                _length = -1;
-                _offsetInStream = -1;
-
-                base.Reset();
-            }
-
-            protected override void Dispose(bool disposing)
-            {
-                _input.Close();
-                base.Dispose(disposing);
-            }
-        }
-    }
-
-    /// <summary>
-    /// Another Analyzer. Every char which is not a letter or digit is treated as a word separator.
-    /// [Name.Surname@gmail.com 123.456 ğüşıöç%ĞÜŞİÖÇ$ΑΒΓΔΕΖ#АБВГДЕ SSß] will be tokenized as
-    /// [name surname gmail com 123 456 gusioc gusioc αβγδεζ абвгде ssss]
-    /// 
-    /// No problem with searches like someuser@gmail or 123.456 since they are
-    /// converted to phrase-query as "someuser gmail" or "123 456".
-    /// </summary>
-    public class UnaccentedWordAnalyzer : Analyzer
-    {
-        /// <summary>
-        /// </summary>
-        public override TokenStream TokenStream(string fieldName, TextReader reader)
-        {
-            TokenStream t = null;
-            t = new LetterOrDigitTokenizer(reader);
-            t = new LowerCaseFilter(t);
-            t = new ASCIIFoldingFilter(t);
-
-            return t;
-        }
-    }
-
-    /// <summary>
-    /// if a char is not a letter or digit, it is a word separator
-    /// </summary>
-    public class LetterOrDigitTokenizer : CharTokenizer
-    {
-        /// <summary>
-        /// </summary>
-        public LetterOrDigitTokenizer(TextReader reader): base(reader)
-        {
-        }
-
-        /// <summary>
-        /// </summary>
-        protected override bool IsTokenChar(char c)
-        {
-            return char.IsLetterOrDigit(c);
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Core/Contrib.Core.csproj
----------------------------------------------------------------------
diff --git a/src/contrib/Core/Contrib.Core.csproj b/src/contrib/Core/Contrib.Core.csproj
deleted file mode 100644
index bced134..0000000
--- a/src/contrib/Core/Contrib.Core.csproj
+++ /dev/null
@@ -1,163 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<!--
-
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements.  See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership.  The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License.  You may obtain a copy of the License at
-
-   http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied.  See the License for the
- specific language governing permissions and limitations
- under the License.
-
--->
-<Project ToolsVersion="4.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <PropertyGroup>
-    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
-    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
-    <ProductVersion>9.0.30729</ProductVersion>
-    <SchemaVersion>2.0</SchemaVersion>
-    <ProjectGuid>{FEF899EB-610C-4D3C-A556-A01F56F4AFE0}</ProjectGuid>
-    <AppDesignerFolder>Properties</AppDesignerFolder>
-    <RootNamespace>Lucene.Net</RootNamespace>
-    <AssemblyName>Lucene.Net.Contrib.Core</AssemblyName>
-    <FileAlignment>512</FileAlignment>
-    <FileUpgradeFlags></FileUpgradeFlags>
-    <OldToolsVersion>3.5</OldToolsVersion>
-    <UpgradeBackupLocation />
-    <PublishUrl>publish\</PublishUrl>
-    <Install>true</Install>
-    <InstallFrom>Disk</InstallFrom>
-    <UpdateEnabled>false</UpdateEnabled>
-    <UpdateMode>Foreground</UpdateMode>
-    <UpdateInterval>7</UpdateInterval>
-    <UpdateIntervalUnits>Days</UpdateIntervalUnits>
-    <UpdatePeriodically>false</UpdatePeriodically>
-    <UpdateRequired>false</UpdateRequired>
-    <MapFileExtensions>true</MapFileExtensions>
-    <ApplicationRevision>0</ApplicationRevision>
-    <ApplicationVersion>1.0.0.%2a</ApplicationVersion>
-    <IsWebBootstrapper>false</IsWebBootstrapper>
-    <UseApplicationTrust>false</UseApplicationTrust>
-    <BootstrapperEnabled>true</BootstrapperEnabled>
-  </PropertyGroup>
-  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
-    <TargetFrameworkVersion>v4.0</TargetFrameworkVersion>
-    <Framework>$(TargetFrameworkVersion.Replace("v", "NET").Replace(".", ""))</Framework>
-    <DebugSymbols>true</DebugSymbols>
-    <DebugType>full</DebugType>
-    <Optimize>false</Optimize>
-    <OutputPath>..\..\..\build\bin\contrib\Core\$(Configuration.Replace("35", ""))\$(Framework)\</OutputPath>
-    <DefineConstants>DEBUG;TRACE;$(Framework)</DefineConstants>
-    <ErrorReport>prompt</ErrorReport>
-    <WarningLevel>4</WarningLevel>
-    <NoWarn>618</NoWarn>
-    <OutputType>Library</OutputType>
-  </PropertyGroup>
-  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug35|AnyCPU' ">
-    <TargetFrameworkVersion>v3.5</TargetFrameworkVersion>
-    <Framework>$(TargetFrameworkVersion.Replace("v", "NET").Replace(".", ""))</Framework>
-    <DebugSymbols>true</DebugSymbols>
-    <DebugType>full</DebugType>
-    <Optimize>false</Optimize>
-    <OutputPath>..\..\..\build\bin\contrib\Core\$(Configuration.Replace("35", ""))\$(Framework)\</OutputPath>
-    <DefineConstants>DEBUG;TRACE;$(Framework)</DefineConstants>
-    <ErrorReport>prompt</ErrorReport>
-    <WarningLevel>4</WarningLevel>
-    <NoWarn>618</NoWarn>
-    <OutputType>Library</OutputType>
-  </PropertyGroup>
-  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
-    <TargetFrameworkVersion>v4.0</TargetFrameworkVersion>
-    <Framework>$(TargetFrameworkVersion.Replace("v", "NET").Replace(".", ""))</Framework>
-    <DebugType>pdbonly</DebugType>
-    <Optimize>true</Optimize>
-    <OutputPath>..\..\..\build\bin\contrib\Core\$(Configuration.Replace("35", ""))\$(Framework)\</OutputPath>
-    <DefineConstants>TRACE;$(Framework)</DefineConstants>
-    <ErrorReport>prompt</ErrorReport>
-    <WarningLevel>4</WarningLevel>
-    <DocumentationFile>..\..\..\build\bin\contrib\Core\$(Configuration.Replace("35", ""))\$(Framework)\Lucene.Net.Contrib.Core.XML</DocumentationFile>
-    <NoWarn>618</NoWarn>
-    <DebugSymbols>true</DebugSymbols>
-    <OutputType>Library</OutputType>
-  </PropertyGroup>
-  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release35|AnyCPU' ">
-    <TargetFrameworkVersion>v3.5</TargetFrameworkVersion>
-    <Framework>$(TargetFrameworkVersion.Replace("v", "NET").Replace(".", ""))</Framework>
-    <DebugType>pdbonly</DebugType>
-    <Optimize>true</Optimize>
-    <OutputPath>..\..\..\build\bin\contrib\Core\$(Configuration.Replace("35", ""))\$(Framework)\</OutputPath>
-    <DefineConstants>TRACE;$(Framework)</DefineConstants>
-    <ErrorReport>prompt</ErrorReport>
-    <WarningLevel>4</WarningLevel>
-    <DocumentationFile>..\..\..\build\bin\contrib\Core\$(Configuration.Replace("35", ""))\$(Framework)\Lucene.Net.Contrib.Core.XML</DocumentationFile>
-    <NoWarn>618</NoWarn>
-    <DebugSymbols>true</DebugSymbols>
-    <OutputType>Library</OutputType>
-  </PropertyGroup>
-  <PropertyGroup>
-    <SignAssembly>true</SignAssembly>
-  </PropertyGroup>
-  <PropertyGroup>
-    <AssemblyOriginatorKeyFile>Lucene.Net.snk</AssemblyOriginatorKeyFile>
-  </PropertyGroup>
-  <ItemGroup>
-    <Reference Include="System" />
-    <Reference Condition="'$(Framework)' == 'NET35'" Include="System.Core" />
-  </ItemGroup>
-  <ItemGroup>
-    <Compile Include="Analysis\Ext\Analysis.Ext.cs" />
-    <Compile Include="Index\FieldEnumerator.cs" />
-    <Compile Include="Index\SegmentsGenCommit.cs" />
-    <Compile Include="Index\TermVectorEnumerator.cs" />
-    <Compile Include="Properties\AssemblyInfo.cs" />
-    <Compile Include="Util\Cache\SegmentCache.cs" />
-  </ItemGroup>
-  <ItemGroup>
-    <ProjectReference Include="..\..\core\Lucene.Net.csproj">
-      <Project>{5D4AD9BE-1FFB-41AB-9943-25737971BF57}</Project>
-      <Name>Lucene.Net</Name>
-    </ProjectReference>
-  </ItemGroup>
-  <ItemGroup>
-    <BootstrapperPackage Include=".NETFramework,Version=v4.0">
-      <Visible>False</Visible>
-      <ProductName>Microsoft .NET Framework 4 %28x86 and x64%29</ProductName>
-      <Install>true</Install>
-    </BootstrapperPackage>
-    <BootstrapperPackage Include="Microsoft.Net.Client.3.5">
-      <Visible>False</Visible>
-      <ProductName>.NET Framework 3.5 SP1 Client Profile</ProductName>
-      <Install>false</Install>
-    </BootstrapperPackage>
-    <BootstrapperPackage Include="Microsoft.Net.Framework.3.5.SP1">
-      <Visible>False</Visible>
-      <ProductName>.NET Framework 3.5 SP1</ProductName>
-      <Install>false</Install>
-    </BootstrapperPackage>
-    <BootstrapperPackage Include="Microsoft.Windows.Installer.3.1">
-      <Visible>False</Visible>
-      <ProductName>Windows Installer 3.1</ProductName>
-      <Install>true</Install>
-    </BootstrapperPackage>
-  </ItemGroup>
-  <ItemGroup>
-    <None Include="Lucene.Net.snk" />
-  </ItemGroup>
-  <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
-  <!-- To modify your build process, add your task inside one of the targets below and uncomment it. 
-       Other similar extension points exist, see Microsoft.Common.targets.
-  <Target Name="BeforeBuild">
-  </Target>
-  <Target Name="AfterBuild">
-  </Target>
-  -->
-</Project>
\ No newline at end of file


Mime
View raw message