lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nightowl...@apache.org
Subject [25/62] [abbrv] lucenenet git commit: Deleted obsolete Contrib folder
Date Sat, 01 Apr 2017 01:09:18 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/Ru/RussianLetterTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Ru/RussianLetterTokenizer.cs b/src/contrib/Analyzers/Ru/RussianLetterTokenizer.cs
deleted file mode 100644
index 03ef873..0000000
--- a/src/contrib/Analyzers/Ru/RussianLetterTokenizer.cs
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
-*/
-
-using System;
-using System.IO;
-using Lucene.Net.Analysis;
-using Lucene.Net.Util;
-
-namespace Lucene.Net.Analysis.Ru
-{
-    ///<summary>
-    /// A RussianLetterTokenizer is a {@link Tokenizer} that extends {@link LetterTokenizer}
-    /// by also allowing the basic latin digits 0-9. 
-    ///</summary>
-    public class RussianLetterTokenizer : CharTokenizer
-    {
-        public RussianLetterTokenizer(TextReader _in)
-            : base(_in)
-        {
-        }
-
-        public RussianLetterTokenizer(AttributeSource source, TextReader _in)
-            : base(source, _in)
-        {
-        }
-
-        public RussianLetterTokenizer(AttributeSource.AttributeFactory factory, TextReader __in)
-            : base(factory, __in)
-        {
-        }
-
-        /*
-         * Collects only characters which satisfy
-         * {@link Character#isLetter(char)}.
-         */
-        protected override bool IsTokenChar(char c)
-        {
-            if (char.IsLetter(c) || (c >= '0' && c <= '9'))
-                return true;
-            else
-                return false;
-        }
-    }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/Ru/RussianLowerCaseFilter.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Ru/RussianLowerCaseFilter.cs b/src/contrib/Analyzers/Ru/RussianLowerCaseFilter.cs
deleted file mode 100644
index 274f7d4..0000000
--- a/src/contrib/Analyzers/Ru/RussianLowerCaseFilter.cs
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
-*/
-
-using System;
-using Lucene.Net.Analysis;
-using Lucene.Net.Analysis.Tokenattributes;
-
-namespace Lucene.Net.Analysis.Ru
-{
-    /// <summary>
-    /// Normalizes token text to lower case.
-    /// </summary>
-    [Obsolete("Use LowerCaseFilter instead, which has the same functionality. This filter will be removed in Lucene 4.0")]
-    public sealed class RussianLowerCaseFilter : TokenFilter
-    {
-        private ITermAttribute termAtt;
-
-        public RussianLowerCaseFilter(TokenStream _in)
-            : base(_in)
-        {
-            termAtt = AddAttribute<ITermAttribute>();
-        }
-
-        public sealed override bool IncrementToken()
-        {
-            if (input.IncrementToken())
-            {
-                char[] chArray = termAtt.TermBuffer();
-                int chLen = termAtt.TermLength();
-                for (int i = 0; i < chLen; i++)
-                {
-                    chArray[i] = char.ToLower(chArray[i]);
-                }
-                return true;
-            }
-            else
-            {
-                return false;
-            }
-        }
-    }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/Ru/RussianStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Ru/RussianStemFilter.cs b/src/contrib/Analyzers/Ru/RussianStemFilter.cs
deleted file mode 100644
index 1f00695..0000000
--- a/src/contrib/Analyzers/Ru/RussianStemFilter.cs
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
-*/
-
-using System;
-using Lucene.Net.Analysis;
-using Lucene.Net.Analysis.Tokenattributes;
-
-namespace Lucene.Net.Analysis.Ru
-{
-    /*
-    * A {@link TokenFilter} that stems Russian words. 
-    * <p>
-    * The implementation was inspired by GermanStemFilter.
-    * The input should be filtered by {@link LowerCaseFilter} before passing it to RussianStemFilter ,
-    * because RussianStemFilter only works with lowercase characters.
-    * </p>
-    */
-    public sealed class RussianStemFilter : TokenFilter
-    {
-        /*
-         * The actual token in the input stream.
-         */
-        private RussianStemmer stemmer = null;
-
-        private ITermAttribute termAtt;
-
-        public RussianStemFilter(TokenStream _in)
-            : base(_in)
-        {
-            stemmer = new RussianStemmer();
-            termAtt = AddAttribute<ITermAttribute>();
-        }
-        /*
-         * Returns the next token in the stream, or null at EOS
-         */
-        public sealed override bool IncrementToken()
-        {
-            if (input.IncrementToken())
-            {
-                String term = termAtt.Term;
-                String s = stemmer.Stem(term);
-                if (s != null && !s.Equals(term))
-                    termAtt.SetTermBuffer(s);
-                return true;
-            }
-            else
-            {
-                return false;
-            }
-        }
-
-
-        // I don't get the point of this.  All methods in java are private, so they can't be
-        // overridden...You can't really subclass any of its behavior.  I've commented it out,
-        // as it doesn't compile as is. - cc
-        ////*
-        // * Set a alternative/custom {@link RussianStemmer} for this filter.
-        // */
-        //public void SetStemmer(RussianStemmer stemmer)
-        //{
-        //    if (stemmer != null)
-        //    {
-        //        this.stemmer = stemmer;
-        //    }
-        //}
-    }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/Ru/RussianStemmer.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Ru/RussianStemmer.cs b/src/contrib/Analyzers/Ru/RussianStemmer.cs
deleted file mode 100644
index eaf25e5..0000000
--- a/src/contrib/Analyzers/Ru/RussianStemmer.cs
+++ /dev/null
@@ -1,615 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
-*/
-
-using System;
-using System.Text;
-
-namespace Lucene.Net.Analysis.Ru
-{
-    /*
- * Russian stemming algorithm implementation (see http://snowball.sourceforge.net for detailed description).
- */
-    public class RussianStemmer
-    {
-        // positions of RV, R1 and R2 respectively
-        private int RV, R1, R2;
-
-        // letters (currently unused letters are commented out)
-        private const char A = '\u0430';
-        //private const char B = '\u0431';
-        private const char V = '\u0432';
-        private const char G = '\u0433';
-        //private const char D = '\u0434';
-        private const char E = '\u0435';
-        //private const char ZH = '\u0436';
-        //private const char Z = '\u0437';
-        private const char I = '\u0438';
-        private const char I_ = '\u0439';
-        //private const char K = '\u043A';
-        private const char L = '\u043B';
-        private const char M = '\u043C';
-        private const char N = '\u043D';
-        private const char O = '\u043E';
-        //private const char P = '\u043F';
-        //private const char R = '\u0440';
-        private const char S = '\u0441';
-        private const char T = '\u0442';
-        private const char U = '\u0443';
-        //private const char F = '\u0444';
-        private const char X = '\u0445';
-        //private const char TS = '\u0446';
-        //private const char CH = '\u0447';
-        private const char SH = '\u0448';
-        private const char SHCH = '\u0449';
-        //private const char HARD = '\u044A';
-        private const char Y = '\u044B';
-        private const char SOFT = '\u044C';
-        private const char AE = '\u044D';
-        private const char IU = '\u044E';
-        private const char IA = '\u044F';
-
-        // stem definitions
-        private static char[] vowels = { A, E, I, O, U, Y, AE, IU, IA };
-
-        private static char[][] perfectiveGerundEndings1 = {
-                                                               new[] {V},
-                                                               new[] {V, SH, I},
-                                                               new[] {V, SH, I, S, SOFT}
-                                                           };
-
-        private static char[][] perfectiveGerund1Predessors = {
-                                                                  new[] {A},
-                                                                  new[] {IA}
-                                                              };
-
-        private static char[][] perfectiveGerundEndings2 = {
-                                                               new[] {I, V},
-                                                               new[] {Y, V},
-                                                               new[] {I, V, SH, I},
-                                                               new[] {Y, V, SH, I},
-                                                               new[] {I, V, SH, I, S, SOFT},
-                                                               new[] {Y, V, SH, I, S, SOFT}
-                                                           };
-
-        private static char[][] adjectiveEndings = {
-                                                       new[] {E, E},
-                                                       new[] {I, E},
-                                                       new[] {Y, E},
-                                                       new[] {O, E},
-                                                       new[] {E, I_},
-                                                       new[] {I, I_},
-                                                       new[] {Y, I_},
-                                                       new[] {O, I_},
-                                                       new[] {E, M},
-                                                       new[] {I, M},
-                                                       new[] {Y, M},
-                                                       new[] {O, M},
-                                                       new[] {I, X},
-                                                       new[] {Y, X},
-                                                       new[] {U, IU},
-                                                       new[] {IU, IU},
-                                                       new[] {A, IA},
-                                                       new[] {IA, IA},
-                                                       new[] {O, IU},
-                                                       new[] {E, IU},
-                                                       new[] {I, M, I},
-                                                       new[] {Y, M, I},
-                                                       new[] {E, G, O},
-                                                       new[] {O, G, O},
-                                                       new[] {E, M, U},
-                                                       new[] {O, M, U}
-                                                   };
-
-        private static char[][] participleEndings1 = {
-                                                         new[] {SHCH},
-                                                         new[] {E, M},
-                                                         new[] {N, N},
-                                                         new[] {V, SH},
-                                                         new[] {IU, SHCH}
-                                                     };
-
-        private static char[][] participleEndings2 = {
-                                                         new[] {I, V, SH},
-                                                         new[] {Y, V, SH},
-                                                         new[] {U, IU, SHCH}
-                                                     };
-
-        private static char[][] participle1Predessors = {
-                                                            new[] {A},
-                                                            new[] {IA}
-                                                        };
-
-        private static char[][] reflexiveEndings = {
-                                                       new[] {S, IA},
-                                                       new[] {S, SOFT}
-                                                   };
-
-        private static char[][] verbEndings1 = {
-                                                   new[] {I_},
-                                                   new[] {L},
-                                                   new[] {N},
-                                                   new[] {L, O},
-                                                   new[] {N, O},
-                                                   new[] {E, T},
-                                                   new[] {IU, T},
-                                                   new[] {L, A},
-                                                   new[] {N, A},
-                                                   new[] {L, I},
-                                                   new[] {E, M},
-                                                   new[] {N, Y},
-                                                   new[] {E, T, E},
-                                                   new[] {I_, T, E},
-                                                   new[] {T, SOFT},
-                                                   new[] {E, SH, SOFT},
-                                                   new[] {N, N, O}
-                                               };
-
-        private static char[][] verbEndings2 = {
-                                                   new[] {IU},
-                                                   new[] {U, IU},
-                                                   new[] {E, N},
-                                                   new[] {E, I_},
-                                                   new[] {IA, T},
-                                                   new[] {U, I_},
-                                                   new[] {I, L},
-                                                   new[] {Y, L},
-                                                   new[] {I, M},
-                                                   new[] {Y, M},
-                                                   new[] {I, T},
-                                                   new[] {Y, T},
-                                                   new[] {I, L, A},
-                                                   new[] {Y, L, A},
-                                                   new[] {E, N, A},
-                                                   new[] {I, T, E},
-                                                   new[] {I, L, I},
-                                                   new[] {Y, L, I},
-                                                   new[] {I, L, O},
-                                                   new[] {Y, L, O},
-                                                   new[] {E, N, O},
-                                                   new[] {U, E, T},
-                                                   new[] {U, IU, T},
-                                                   new[] {E, N, Y},
-                                                   new[] {I, T, SOFT},
-                                                   new[] {Y, T, SOFT},
-                                                   new[] {I, SH, SOFT},
-                                                   new[] {E, I_, T, E},
-                                                   new[] {U, I_, T, E}
-                                               };
-
-        private static char[][] verb1Predessors = {
-                                                      new[] {A},
-                                                      new[] {IA}
-                                                  };
-
-        private static char[][] nounEndings = {
-                                                  new[] {A},
-                                                  new[] {U},
-                                                  new[] {I_},
-                                                  new[] {O},
-                                                  new[] {U},
-                                                  new[] {E},
-                                                  new[] {Y},
-                                                  new[] {I},
-                                                  new[] {SOFT},
-                                                  new[] {IA},
-                                                  new[] {E, V},
-                                                  new[] {O, V},
-                                                  new[] {I, E},
-                                                  new[] {SOFT, E},
-                                                  new[] {IA, X},
-                                                  new[] {I, IU},
-                                                  new[] {E, I},
-                                                  new[] {I, I},
-                                                  new[] {E, I_},
-                                                  new[] {O, I_},
-                                                  new[] {E, M},
-                                                  new[] {A, M},
-                                                  new[] {O, M},
-                                                  new[] {A, X},
-                                                  new[] {SOFT, IU},
-                                                  new[] {I, IA},
-                                                  new[] {SOFT, IA},
-                                                  new[] {I, I_},
-                                                  new[] {IA, M},
-                                                  new[] {IA, M, I},
-                                                  new[] {A, M, I},
-                                                  new[] {I, E, I_},
-                                                  new[] {I, IA, M},
-                                                  new[] {I, E, M},
-                                                  new[] {I, IA, X},
-                                                  new[] {I, IA, M, I}
-                                              };
-
-        private static char[][] superlativeEndings = {
-                                                         new[] {E, I_, SH},
-                                                         new[] {E, I_, SH, E}
-                                                     };
-
-        private static char[][] derivationalEndings = {
-                                                          new[] {O, S, T},
-                                                          new[] {O, S, T, SOFT}
-                                                      };
-
-        /*
-         * RussianStemmer constructor comment.
-         */
-        public RussianStemmer()
-        {
-        }
-
-        /*
-         * Adjectival ending is an adjective ending,
-         * optionally preceded by participle ending.
-         * Creation date: (17/03/2002 12:14:58 AM)
-         * @param stemmingZone java.lang.StringBuilder
-         */
-        private bool adjectival(StringBuilder stemmingZone)
-        {
-            // look for adjective ending in a stemming zone
-            if (!findAndRemoveEnding(stemmingZone, adjectiveEndings))
-                return false;
-            // if adjective ending was found, try for participle ending.
-            // variable r is unused, we are just interested in the side effect of
-            // findAndRemoveEnding():
-            bool r =
-                findAndRemoveEnding(stemmingZone, participleEndings1, participle1Predessors)
-                ||
-                findAndRemoveEnding(stemmingZone, participleEndings2);
-            return true;
-        }
-
-        /*
-         * Derivational endings
-         * Creation date: (17/03/2002 12:14:58 AM)
-         * @param stemmingZone java.lang.StringBuilder
-         */
-        private bool derivational(StringBuilder stemmingZone)
-        {
-            int endingLength = findEnding(stemmingZone, derivationalEndings);
-            if (endingLength == 0)
-                // no derivational ending found
-                return false;
-            else
-            {
-                // Ensure that the ending locates in R2
-                if (R2 - RV <= stemmingZone.Length - endingLength)
-                {
-                    stemmingZone.Length = stemmingZone.Length - endingLength;
-                    return true;
-                }
-                else
-                {
-                    return false;
-                }
-            }
-        }
-
-        /*
-         * Finds ending among given ending class and returns the length of ending found(0, if not found).
-         * Creation date: (17/03/2002 8:18:34 PM)
-         */
-        private int findEnding(StringBuilder stemmingZone, int startIndex, char[][] theEndingClass)
-        {
-            bool match = false;
-            for (int i = theEndingClass.Length - 1; i >= 0; i--)
-            {
-                char[] theEnding = theEndingClass[i];
-                // check if the ending is bigger than stemming zone
-                if (startIndex < theEnding.Length - 1)
-                {
-                    match = false;
-                    continue;
-                }
-                match = true;
-                int stemmingIndex = startIndex;
-                for (int j = theEnding.Length - 1; j >= 0; j--)
-                {
-                    if (stemmingZone[stemmingIndex--] != theEnding[j])
-                    {
-                        match = false;
-                        break;
-                    }
-                }
-                // check if ending was found
-                if (match)
-                {
-                    return theEndingClass[i].Length; // cut ending
-                }
-            }
-            return 0;
-        }
-
-        private int findEnding(StringBuilder stemmingZone, char[][] theEndingClass)
-        {
-            return findEnding(stemmingZone, stemmingZone.Length - 1, theEndingClass);
-        }
-
-        /*
-         * Finds the ending among the given class of endings and removes it from stemming zone.
-         * Creation date: (17/03/2002 8:18:34 PM)
-         */
-        private bool findAndRemoveEnding(StringBuilder stemmingZone, char[][] theEndingClass)
-        {
-            int endingLength = findEnding(stemmingZone, theEndingClass);
-            if (endingLength == 0)
-                // not found
-                return false;
-            else
-            {
-                stemmingZone.Length = stemmingZone.Length - endingLength;
-                // cut the ending found
-                return true;
-            }
-        }
-
-        /*
-         * Finds the ending among the given class of endings, then checks if this ending was
-         * preceded by any of given predecessors, and if so, removes it from stemming zone.
-         * Creation date: (17/03/2002 8:18:34 PM)
-         */
-        private bool findAndRemoveEnding(StringBuilder stemmingZone,
-            char[][] theEndingClass, char[][] thePredessors)
-        {
-            int endingLength = findEnding(stemmingZone, theEndingClass);
-            if (endingLength == 0)
-                // not found
-                return false;
-            else
-            {
-                int predessorLength =
-                    findEnding(stemmingZone,
-                        stemmingZone.Length - endingLength - 1,
-                        thePredessors);
-                if (predessorLength == 0)
-                    return false;
-                else
-                {
-                    stemmingZone.Length = stemmingZone.Length - endingLength;
-                    // cut the ending found
-                    return true;
-                }
-            }
-
-        }
-
-        /*
-         * Marks positions of RV, R1 and R2 in a given word.
-         * Creation date: (16/03/2002 3:40:11 PM)
-         */
-        private void markPositions(String word)
-        {
-            RV = 0;
-            R1 = 0;
-            R2 = 0;
-            int i = 0;
-            // find RV
-            while (word.Length > i && !isVowel(word[i]))
-            {
-                i++;
-            }
-            if (word.Length - 1 < ++i)
-                return; // RV zone is empty
-            RV = i;
-            // find R1
-            while (word.Length > i && isVowel(word[i]))
-            {
-                i++;
-            }
-            if (word.Length - 1 < ++i)
-                return; // R1 zone is empty
-            R1 = i;
-            // find R2
-            while (word.Length > i && !isVowel(word[i]))
-            {
-                i++;
-            }
-            if (word.Length - 1 < ++i)
-                return; // R2 zone is empty
-            while (word.Length > i && isVowel(word[i]))
-            {
-                i++;
-            }
-            if (word.Length - 1 < ++i)
-                return; // R2 zone is empty
-            R2 = i;
-        }
-
-        /*
-         * Checks if character is a vowel..
-         * Creation date: (16/03/2002 10:47:03 PM)
-         * @return bool
-         * @param letter char
-         */
-        private bool isVowel(char letter)
-        {
-            for (int i = 0; i < vowels.Length; i++)
-            {
-                if (letter == vowels[i])
-                    return true;
-            }
-            return false;
-        }
-
-        /*
-         * Noun endings.
-         * Creation date: (17/03/2002 12:14:58 AM)
-         * @param stemmingZone java.lang.StringBuilder
-         */
-        private bool noun(StringBuilder stemmingZone)
-        {
-            return findAndRemoveEnding(stemmingZone, nounEndings);
-        }
-
-        /*
-         * Perfective gerund endings.
-         * Creation date: (17/03/2002 12:14:58 AM)
-         * @param stemmingZone java.lang.StringBuilder
-         */
-        private bool perfectiveGerund(StringBuilder stemmingZone)
-        {
-            return findAndRemoveEnding(
-                stemmingZone,
-                perfectiveGerundEndings1,
-                perfectiveGerund1Predessors)
-                || findAndRemoveEnding(stemmingZone, perfectiveGerundEndings2);
-        }
-
-        /*
-         * Reflexive endings.
-         * Creation date: (17/03/2002 12:14:58 AM)
-         * @param stemmingZone java.lang.StringBuilder
-         */
-        private bool reflexive(StringBuilder stemmingZone)
-        {
-            return findAndRemoveEnding(stemmingZone, reflexiveEndings);
-        }
-
-        /*
-         * Insert the method's description here.
-         * Creation date: (17/03/2002 12:14:58 AM)
-         * @param stemmingZone java.lang.StringBuilder
-         */
-        private bool removeI(StringBuilder stemmingZone)
-        {
-            if (stemmingZone.Length > 0
-                && stemmingZone[stemmingZone.Length - 1] == I)
-            {
-                stemmingZone.Length = stemmingZone.Length - 1;
-                return true;
-            }
-            else
-            {
-                return false;
-            }
-        }
-
-        /*
-         * Insert the method's description here.
-         * Creation date: (17/03/2002 12:14:58 AM)
-         * @param stemmingZone java.lang.StringBuilder
-         */
-        private bool removeSoft(StringBuilder stemmingZone)
-        {
-            if (stemmingZone.Length > 0
-                && stemmingZone[stemmingZone.Length - 1] == SOFT)
-            {
-                stemmingZone.Length = stemmingZone.Length - 1;
-                return true;
-            }
-            else
-            {
-                return false;
-            }
-        }
-
-        /*
-         * Finds the stem for given Russian word.
-         * Creation date: (16/03/2002 3:36:48 PM)
-         * @return java.lang.String
-         * @param input java.lang.String
-         */
-        public virtual String Stem(String input)
-        {
-            markPositions(input);
-            if (RV == 0)
-                return input; //RV wasn't detected, nothing to stem
-            StringBuilder stemmingZone = new StringBuilder(input.Substring(RV));
-            // stemming goes on in RV
-            // Step 1
-
-            if (!perfectiveGerund(stemmingZone))
-            {
-                reflexive(stemmingZone);
-                // variable r is unused, we are just interested in the flow that gets
-                // created by logical expression: apply adjectival(); if that fails,
-                // apply verb() etc
-                bool r =
-                    adjectival(stemmingZone)
-                    || Verb(stemmingZone)
-                    || noun(stemmingZone);
-            }
-            // Step 2
-            removeI(stemmingZone);
-            // Step 3
-            derivational(stemmingZone);
-            // Step 4
-            Superlative(stemmingZone);
-            UndoubleN(stemmingZone);
-            removeSoft(stemmingZone);
-            // return result
-            return input.Substring(0, RV) + stemmingZone.ToString();
-        }
-
-        /*
-         * Superlative endings.
-         * Creation date: (17/03/2002 12:14:58 AM)
-         * @param stemmingZone java.lang.StringBuilder
-         */
-        private bool Superlative(StringBuilder stemmingZone)
-        {
-            return findAndRemoveEnding(stemmingZone, superlativeEndings);
-        }
-
-        /*
-         * Undoubles N.
-         * Creation date: (17/03/2002 12:14:58 AM)
-         * @param stemmingZone java.lang.StringBuilder
-         */
-        private bool UndoubleN(StringBuilder stemmingZone)
-        {
-            char[][] doubleN = {
-                                   new[] {N, N}
-                               };
-            if (findEnding(stemmingZone, doubleN) != 0)
-            {
-                stemmingZone.Length = stemmingZone.Length - 1;
-                return true;
-            }
-            else
-            {
-                return false;
-            }
-        }
-
-        /*
-         * Verb endings.
-         * Creation date: (17/03/2002 12:14:58 AM)
-         * @param stemmingZone java.lang.StringBuilder
-         */
-        private bool Verb(StringBuilder stemmingZone)
-        {
-            return findAndRemoveEnding(
-                stemmingZone,
-                verbEndings1,
-                verb1Predessors)
-                || findAndRemoveEnding(stemmingZone, verbEndings2);
-        }
-
-        /*
-         * Static method for stemming.
-         */
-        public static String StemWord(String theWord)
-        {
-            RussianStemmer stemmer = new RussianStemmer();
-            return stemmer.Stem(theWord);
-        }
-    }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/Shingle/Codec/OneDimensionalNonWeightedTokenSettingsCodec.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Shingle/Codec/OneDimensionalNonWeightedTokenSettingsCodec.cs b/src/contrib/Analyzers/Shingle/Codec/OneDimensionalNonWeightedTokenSettingsCodec.cs
deleted file mode 100644
index fb5f59d..0000000
--- a/src/contrib/Analyzers/Shingle/Codec/OneDimensionalNonWeightedTokenSettingsCodec.cs
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-namespace Lucene.Net.Analysis.Shingle.Codec
-{
-    /// <summary>
-    /// Using this codec makes a ShingleMatrixFilter act like ShingleFilter.
-    /// It produces the most simple sort of shingles, ignoring token position increments, etc.
-    /// 
-    /// It adds each token as a new column.
-    /// </summary>
-    public class OneDimensionalNonWeightedTokenSettingsCodec : TokenSettingsCodec
-    {
-        public override TokenPositioner GetTokenPositioner(Token token)
-        {
-            return TokenPositioner.NewColumn;
-        }
-
-        public override void SetTokenPositioner(Token token, TokenPositioner tokenPositioner)
-        {
-        }
-
-        public override float GetWeight(Token token)
-        {
-            return 1f;
-        }
-
-        public override void SetWeight(Token token, float weight)
-        {
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/Shingle/Codec/SimpleThreeDimensionalTokenSettingsCodec.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Shingle/Codec/SimpleThreeDimensionalTokenSettingsCodec.cs b/src/contrib/Analyzers/Shingle/Codec/SimpleThreeDimensionalTokenSettingsCodec.cs
deleted file mode 100644
index cdd401e..0000000
--- a/src/contrib/Analyzers/Shingle/Codec/SimpleThreeDimensionalTokenSettingsCodec.cs
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-using System.IO;
-using Lucene.Net.Analysis.Payloads;
-using Lucene.Net.Index;
-
-namespace Lucene.Net.Analysis.Shingle.Codec
-{
-    /// <summary>
-    /// A full featured codec not to be used for something serious.
-    /// 
-    /// It takes complete control of
-    /// payload for weight
-    /// and the bit flags for positioning in the matrix.
-    /// 
-    /// Mainly exist for demonstrational purposes.
-    /// </summary>
-    public class SimpleThreeDimensionalTokenSettingsCodec : TokenSettingsCodec
-    {
-        /// <summary>
-        /// 
-        /// </summary>
-        /// <param name="token"></param>
-        /// <returns>the token flags int value as TokenPosition</returns>
-        public override TokenPositioner GetTokenPositioner(Token token)
-        {
-            switch (token.Flags)
-            {
-                case 0:
-                    return TokenPositioner.NewColumn;
-                case 1:
-                    return TokenPositioner.NewRow;
-                case 2:
-                    return TokenPositioner.SameRow;
-            }
-            throw new IOException("Unknown matrix positioning of token " + token);
-        }
-
-        /// <summary>
-        /// Sets the TokenPositioner as token flags int value.
-        /// </summary>
-        /// <param name="token"></param>
-        /// <param name="tokenPositioner"></param>
-        public override void SetTokenPositioner(Token token, TokenPositioner tokenPositioner)
-        {
-            token.Flags = tokenPositioner.Index;
-        }
-
-        /// <summary>
-        /// Returns a 32 bit float from the payload, or 1f it null.
-        /// </summary>
-        /// <param name="token"></param>
-        /// <returns></returns>
-        public override float GetWeight(Token token)
-        {
-            if (token.Payload == null || token.Payload.GetData() == null)
-                return 1f;
-
-            return PayloadHelper.DecodeFloat(token.Payload.GetData());
-        }
-
-        /// <summary>
-        /// Stores a 32 bit float in the payload, or set it to null if 1f;
-        /// </summary>
-        /// <param name="token"></param>
-        /// <param name="weight"></param>
-        public override void SetWeight(Token token, float weight)
-        {
-            token.Payload = weight == 1f ? null : new Payload(PayloadHelper.EncodeFloat(weight));
-        }
-    }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/Shingle/Codec/TokenSettingsCodec.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Shingle/Codec/TokenSettingsCodec.cs b/src/contrib/Analyzers/Shingle/Codec/TokenSettingsCodec.cs
deleted file mode 100644
index 47777d5..0000000
--- a/src/contrib/Analyzers/Shingle/Codec/TokenSettingsCodec.cs
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-namespace Lucene.Net.Analysis.Shingle.Codec
-{
-    /// <summary>
-    /// Strategy used to code and decode meta data of the tokens from the input stream
-    /// regarding how to position the tokens in the matrix, set and retreive weight, etc.
-    /// </summary>
-    public abstract class TokenSettingsCodec
-    {
-        /// <summary>
-        /// Retrieves information on how a Token is to be inserted to a ShingleMatrixFilter.Matrix.
-        /// </summary>
-        /// <param name="token"></param>
-        /// <returns></returns>
-        public abstract TokenPositioner GetTokenPositioner(Token token);
-
-        /// <summary>
-        /// Sets information on how a Token is to be inserted to a ShingleMatrixFilter.Matrix.
-        /// </summary>
-        /// <param name="token"></param>
-        /// <param name="tokenPositioner"></param>
-        public abstract void SetTokenPositioner(Token token, TokenPositioner tokenPositioner);
-
-        /// <summary>
-        /// Have this method return 1f in order to 'disable' weights.
-        /// </summary>
-        /// <param name="token"></param>
-        /// <returns></returns>
-        public abstract float GetWeight(Token token);
-
-        /// <summary>
-        /// Have this method do nothing in order to 'disable' weights.
-        /// </summary>
-        /// <param name="token"></param>
-        /// <param name="weight"></param>
-        public abstract void SetWeight(Token token, float weight);
-    }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/Shingle/Codec/TwoDimensionalNonWeightedSynonymTokenSettingsCodec.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Shingle/Codec/TwoDimensionalNonWeightedSynonymTokenSettingsCodec.cs b/src/contrib/Analyzers/Shingle/Codec/TwoDimensionalNonWeightedSynonymTokenSettingsCodec.cs
deleted file mode 100644
index 446cf26..0000000
--- a/src/contrib/Analyzers/Shingle/Codec/TwoDimensionalNonWeightedSynonymTokenSettingsCodec.cs
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-using System;
-
-namespace Lucene.Net.Analysis.Shingle.Codec
-{
-    /// <summary>
-    /// A codec that creates a two dimensional matrix
-    /// by treating tokens from the input stream with 0 position increment
-    /// as new rows to the current column.
-    /// </summary>
-    public class TwoDimensionalNonWeightedSynonymTokenSettingsCodec : TokenSettingsCodec
-    {
-        public override TokenPositioner GetTokenPositioner(Token token)
-        {
-            return
-                token.PositionIncrement == 0 ? TokenPositioner.NewRow : TokenPositioner.NewColumn;
-        }
-
-        public override void SetTokenPositioner(Token token, TokenPositioner tokenPositioner)
-        {
-            throw new NotSupportedException();
-        }
-
-        public override float GetWeight(Token token)
-        {
-            return 1f;
-        }
-
-        public override void SetWeight(Token token, float weight)
-        {
-        }
-    }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/Shingle/Matrix/Column.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Shingle/Matrix/Column.cs b/src/contrib/Analyzers/Shingle/Matrix/Column.cs
deleted file mode 100644
index 8d44300..0000000
--- a/src/contrib/Analyzers/Shingle/Matrix/Column.cs
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-using System;
-using System.Collections.Generic;
-
-namespace Lucene.Net.Analysis.Shingle.Matrix
-{
-    public class Column
-    {
-        public Column(Token token, Analysis.Shingle.Matrix.Matrix enclosingInstance)
-            : this(enclosingInstance)
-        {
-            var row = new Row(this);
-            row.Tokens.AddLast(token);
-        }
-
-        public Column(Analysis.Shingle.Matrix.Matrix enclosingInstance)
-        {
-            Rows = new List<Row>();
-            Matrix = enclosingInstance;
-
-            lock (Matrix)
-            {
-                if (Matrix.Columns.Count == 0)
-                    IsFirst = true;
-            }
-            Matrix.Columns.Add(this);
-        }
-
-        public Matrix Matrix { get; private set; }
-
-        public List<Row> Rows { get; private set; }
-
-        public int Index
-        {
-            get { return Matrix.Columns.IndexOf(this); }
-        }
-
-        public bool IsFirst { get; set; }
-
-        public bool IsLast { get; set; }
-
-        public override String ToString()
-        {
-            return "Column{" +
-                   "first=" + IsFirst +
-                   ", last=" + IsLast +
-                   ", rows=" + Rows +
-                   '}';
-        }
-    }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/Shingle/Matrix/Matrix.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Shingle/Matrix/Matrix.cs b/src/contrib/Analyzers/Shingle/Matrix/Matrix.cs
deleted file mode 100644
index 0431026..0000000
--- a/src/contrib/Analyzers/Shingle/Matrix/Matrix.cs
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-using System.Collections.Generic;
-
-namespace Lucene.Net.Analysis.Shingle.Matrix
-{
-    /// <summary>
-    /// A column focused matrix in three dimensions:
-    /// 
-    /// <pre>
-    /// Token[column][row][z-axis] {
-    ///     {{hello}, {greetings, and, salutations}},
-    ///     {{world}, {earth}, {tellus}}
-    /// };
-    /// </pre>
-    /// 
-    /// todo consider row groups
-    /// to indicate that shingles is only to contain permutations with texts in that same row group.
-    /// </summary>
-    public class Matrix
-    {
-        public Matrix()
-        {
-            Columns = new List<Column>();
-        }
-
-        public List<Column> Columns { get; private set; }
-
-        public MatrixPermutationIterator PermutationIterator()
-        {
-            return new MatrixPermutationIterator(this);
-        }
-
-        public override string ToString()
-        {
-            return "Matrix{" +
-                   "columns=" + Columns +
-                   '}';
-        }
-    }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/Shingle/Matrix/MatrixPermutationIterator.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Shingle/Matrix/MatrixPermutationIterator.cs b/src/contrib/Analyzers/Shingle/Matrix/MatrixPermutationIterator.cs
deleted file mode 100644
index 2790236..0000000
--- a/src/contrib/Analyzers/Shingle/Matrix/MatrixPermutationIterator.cs
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-using System;
-
-namespace Lucene.Net.Analysis.Shingle.Matrix
-{
-    public class MatrixPermutationIterator
-    {
-        private readonly Analysis.Shingle.Matrix.Matrix _enclosingInstance;
-
-        private readonly int[] _columnRowCounters;
-
-        public MatrixPermutationIterator(Analysis.Shingle.Matrix.Matrix enclosingInstance)
-        {
-            _enclosingInstance = enclosingInstance;
-            _columnRowCounters = new int[_enclosingInstance.Columns.Count];
-        }
-
-        public bool HasNext()
-        {
-            var s = _columnRowCounters.Length;
-            var n = _enclosingInstance.Columns.Count;
-            return s != 0 && n >= s && _columnRowCounters[s - 1] < _enclosingInstance.Columns[s - 1].Rows.Count;
-        }
-
-        public Row[] Next()
-        {
-            if (!HasNext())
-                throw new Exception("no more elements");
-
-            var rows = new Row[_columnRowCounters.Length];
-
-            for (int i = 0; i < _columnRowCounters.Length; i++)
-            {
-                rows[i] = _enclosingInstance.Columns[i].Rows[_columnRowCounters[i]];
-            }
-
-            IncrementColumnRowCounters();
-
-            return rows;
-        }
-
-        private void IncrementColumnRowCounters()
-        {
-            for (int i = 0; i < _columnRowCounters.Length; i++)
-            {
-                _columnRowCounters[i]++;
-
-                if (_columnRowCounters[i] != _enclosingInstance.Columns[i].Rows.Count ||
-                    i >= _columnRowCounters.Length - 1)
-                    break;
-
-                _columnRowCounters[i] = 0;
-            }
-        }
-    }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/Shingle/Matrix/Row.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Shingle/Matrix/Row.cs b/src/contrib/Analyzers/Shingle/Matrix/Row.cs
deleted file mode 100644
index a841f50..0000000
--- a/src/contrib/Analyzers/Shingle/Matrix/Row.cs
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-using System.Collections.Generic;
-
-namespace Lucene.Net.Analysis.Shingle.Matrix
-{
-    public class Row
-    {
-        public Row(Column enclosingInstance)
-        {
-            Tokens = new LinkedList<Token>();
-            Column = enclosingInstance;
-            Column.Rows.Add(this);
-        }
-
-        public Column Column { get; private set; }
-
-        public int Index
-        {
-            get { return Column.Rows.IndexOf(this); }
-        }
-
-        public LinkedList<Token> Tokens { get; set; }
-
-        public override string ToString()
-        {
-            return "Row{" +
-                   "index=" + Index +
-                   ", tokens=" + Tokens +
-                   '}';
-        }
-    }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/Shingle/ShingleAnalyzerWrapper.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Shingle/ShingleAnalyzerWrapper.cs b/src/contrib/Analyzers/Shingle/ShingleAnalyzerWrapper.cs
deleted file mode 100644
index afa3d0b..0000000
--- a/src/contrib/Analyzers/Shingle/ShingleAnalyzerWrapper.cs
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-using System;
-using System.IO;
-using Lucene.Net.Analysis.Standard;
-using Version = Lucene.Net.Util.Version;
-
-namespace Lucene.Net.Analysis.Shingle
-{
-    /*
- * A ShingleAnalyzerWrapper wraps a {@link ShingleFilter} around another {@link Analyzer}.
- * <p>
- * A shingle is another name for a token based n-gram.
- * </p>
- */
-    public class ShingleAnalyzerWrapper : Analyzer
-    {
-
-        protected Analyzer defaultAnalyzer;
-        protected int maxShingleSize = 2;
-        protected bool outputUnigrams = true;
-
-        public ShingleAnalyzerWrapper(Analyzer defaultAnalyzer)
-        {
-            this.defaultAnalyzer = defaultAnalyzer;
-            SetOverridesTokenStreamMethod<ShingleAnalyzerWrapper>();
-        }
-
-        public ShingleAnalyzerWrapper(Analyzer defaultAnalyzer, int maxShingleSize)
-            : this(defaultAnalyzer)
-        {
-
-            this.maxShingleSize = maxShingleSize;
-        }
-
-        /*
-         * Wraps {@link StandardAnalyzer}. 
-         */
-        public ShingleAnalyzerWrapper(Version matchVersion)
-        {
-            this.defaultAnalyzer = new StandardAnalyzer(matchVersion);
-            SetOverridesTokenStreamMethod<ShingleAnalyzerWrapper>();
-        }
-
-        /*
-         * Wraps {@link StandardAnalyzer}. 
-         */
-        public ShingleAnalyzerWrapper(Version matchVersion, int nGramSize)
-            : this(matchVersion)
-        {
-            this.maxShingleSize = nGramSize;
-        }
-
-        /// <summary>
-        /// Gets or sets the max shingle (ngram) size
-        /// </summary>
-        public int MaxShingleSize
-        {
-            get { return maxShingleSize; }
-            set { this.maxShingleSize = value; }
-        }
-        /// <summary>
-        /// Gets or sets whether or not to have the filter pass the original tokens 
-        /// (the "unigrams") to the output stream
-        /// </summary>
-        public bool IsOutputUnigrams
-        {
-            get { return outputUnigrams; }
-            set { this.outputUnigrams = value; }
-        }
-
-        public override TokenStream TokenStream(String fieldName, TextReader reader)
-        {
-            TokenStream wrapped;
-            try
-            {
-                wrapped = defaultAnalyzer.ReusableTokenStream(fieldName, reader);
-            }
-            catch (IOException)
-            {
-                wrapped = defaultAnalyzer.TokenStream(fieldName, reader);
-            }
-            ShingleFilter filter = new ShingleFilter(wrapped);
-            filter.SetMaxShingleSize(maxShingleSize);
-            filter.SetOutputUnigrams(outputUnigrams);
-            return filter;
-        }
-
-        class SavedStreams
-        {
-            protected internal TokenStream wrapped;
-            protected internal ShingleFilter shingle;
-        };
-
-        public override TokenStream ReusableTokenStream(String fieldName, TextReader reader)
-        {
-            if (overridesTokenStreamMethod)
-            {
-                // LUCENE-1678: force fallback to tokenStream() if we
-                // have been subclassed and that subclass overrides
-                // tokenStream but not reusableTokenStream
-                return TokenStream(fieldName, reader);
-            }
-
-            SavedStreams streams = (SavedStreams)PreviousTokenStream;
-            if (streams == null)
-            {
-                streams = new SavedStreams();
-                streams.wrapped = defaultAnalyzer.ReusableTokenStream(fieldName, reader);
-                streams.shingle = new ShingleFilter(streams.wrapped);
-                PreviousTokenStream = streams;
-            }
-            else
-            {
-                TokenStream result = defaultAnalyzer.ReusableTokenStream(fieldName, reader);
-                if (result == streams.wrapped)
-                {
-                    /* the wrapped analyzer reused the stream */
-                    streams.shingle.Reset();
-                }
-                else
-                {
-                    /* the wrapped analyzer did not, create a new shingle around the new one */
-                    streams.wrapped = result;
-                    streams.shingle = new ShingleFilter(streams.wrapped);
-                }
-            }
-            streams.shingle.SetMaxShingleSize(maxShingleSize);
-            streams.shingle.SetOutputUnigrams(outputUnigrams);
-            return streams.shingle;
-        }
-    }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/Shingle/ShingleFilter.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Shingle/ShingleFilter.cs b/src/contrib/Analyzers/Shingle/ShingleFilter.cs
deleted file mode 100644
index 28de576..0000000
--- a/src/contrib/Analyzers/Shingle/ShingleFilter.cs
+++ /dev/null
@@ -1,381 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using Lucene.Net.Analysis.Tokenattributes;
-using Lucene.Net.Util;
-
-namespace Lucene.Net.Analysis.Shingle
-{
-    /*
- * <p>A ShingleFilter constructs shingles (token n-grams) from a token stream.
- * In other words, it creates combinations of tokens as a single token.
- *
- * <p>For example, the sentence "please divide this sentence into shingles"
- * might be tokenized into shingles "please divide", "divide this",
- * "this sentence", "sentence into", and "into shingles".
- *
- * <p>This filter handles position increments > 1 by inserting filler tokens
- * (tokens with termtext "_"). It does not handle a position increment of 0.
- */
-    public sealed class ShingleFilter : TokenFilter
-    {
-
-        private LinkedList<State> shingleBuf = new LinkedList<State>();
-        private StringBuilder[] shingles;
-        private String tokenType = "shingle";
-
-        /*
-         * filler token for when positionIncrement is more than 1
-         */
-        public static readonly char[] FILLER_TOKEN = { '_' };
-
-
-        /*
-         * default maximum shingle size is 2.
-         */
-        public const int DEFAULT_MAX_SHINGLE_SIZE = 2;
-
-        /*
-         * The string to use when joining adjacent tokens to form a shingle
-         */
-        public const String TOKEN_SEPARATOR = " ";
-
-        /*
-         * By default, we output unigrams (individual tokens) as well as shingles
-         * (token n-grams).
-         */
-        private bool outputUnigrams = true;
-
-        /*
-         * maximum shingle size (number of tokens)
-         */
-        private int maxShingleSize;
-
-        /*
-         * Constructs a ShingleFilter with the specified single size from the
-         * {@link TokenStream} <c>input</c>
-         *
-         * @param input input stream
-         * @param maxShingleSize maximum shingle size produced by the filter.
-         */
-        public ShingleFilter(TokenStream input, int maxShingleSize)
-            : base(input)
-        {
-            SetMaxShingleSize(maxShingleSize);
-            this.termAtt = AddAttribute<ITermAttribute>(); ;
-            this.offsetAtt = AddAttribute<IOffsetAttribute>(); ;
-            this.posIncrAtt = AddAttribute<IPositionIncrementAttribute>(); ;
-            this.typeAtt = AddAttribute<ITypeAttribute>(); ;
-        }
-
-        /*
-         * Construct a ShingleFilter with default shingle size.
-         *
-         * @param input input stream
-         */
-        public ShingleFilter(TokenStream input)
-            : this(input, DEFAULT_MAX_SHINGLE_SIZE)
-        {
-        }
-
-        /*
-         * Construct a ShingleFilter with the specified token type for shingle tokens.
-         *
-         * @param input input stream
-         * @param tokenType token type for shingle tokens
-         */
-        public ShingleFilter(TokenStream input, String tokenType)
-            : this(input, DEFAULT_MAX_SHINGLE_SIZE)
-        {
-            setTokenType(tokenType);
-        }
-
-        /*
-         * Set the type of the shingle tokens produced by this filter.
-         * (default: "shingle")
-         *
-         * @param tokenType token tokenType
-         */
-        public void setTokenType(String tokenType)
-        {
-            this.tokenType = tokenType;
-        }
-
-        /*
-         * Shall the output stream contain the input tokens (unigrams) as well as
-         * shingles? (default: true.)
-         *
-         * @param outputUnigrams Whether or not the output stream shall contain
-         * the input tokens (unigrams)
-         */
-        public void SetOutputUnigrams(bool outputUnigrams)
-        {
-            this.outputUnigrams = outputUnigrams;
-        }
-
-        /*
-         * Set the max shingle size (default: 2)
-         *
-         * @param maxShingleSize max size of output shingles
-         */
-        public void SetMaxShingleSize(int maxShingleSize)
-        {
-            if (maxShingleSize < 2)
-            {
-                throw new ArgumentException("Max shingle size must be >= 2");
-            }
-            shingles = new StringBuilder[maxShingleSize];
-            for (int i = 0; i < shingles.Length; i++)
-            {
-                shingles[i] = new StringBuilder();
-            }
-            this.maxShingleSize = maxShingleSize;
-        }
-
-        /*
-         * Clear the StringBuilders that are used for storing the output shingles.
-         */
-        private void ClearShingles()
-        {
-            for (int i = 0; i < shingles.Length; i++)
-            {
-                shingles[i].Length = 0;
-            }
-        }
-
-        private AttributeSource.State nextToken;
-        private int shingleBufferPosition;
-        private int[] endOffsets;
-
-        /* (non-Javadoc)
-         * @see org.apache.lucene.analysis.TokenStream#next()
-         */
-        public sealed override bool IncrementToken()
-        {
-            while (true)
-            {
-                if (nextToken == null)
-                {
-                    if (!FillShingleBuffer())
-                    {
-                        return false;
-                    }
-                }
-
-                nextToken = shingleBuf.First.Value;
-
-                if (outputUnigrams)
-                {
-                    if (shingleBufferPosition == 0)
-                    {
-                        RestoreState(nextToken);
-                        posIncrAtt.PositionIncrement = 1;
-                        shingleBufferPosition++;
-                        return true;
-                    }
-                }
-                else if (shingleBufferPosition % this.maxShingleSize == 0)
-                {
-                    shingleBufferPosition++;
-                }
-
-                if (shingleBufferPosition < shingleBuf.Count)
-                {
-                    RestoreState(nextToken);
-                    typeAtt.Type = tokenType;
-                    offsetAtt.SetOffset(offsetAtt.StartOffset, endOffsets[shingleBufferPosition]);
-                    StringBuilder buf = shingles[shingleBufferPosition];
-                    int termLength = buf.Length;
-                    char[] TermBuffer = termAtt.TermBuffer();
-                    if (TermBuffer.Length < termLength)
-                        TermBuffer = termAtt.ResizeTermBuffer(termLength);
-                    buf.CopyTo(0, TermBuffer, 0, termLength);
-                    termAtt.SetTermLength(termLength);
-                    if ((!outputUnigrams) && shingleBufferPosition % this.maxShingleSize == 1)
-                    {
-                        posIncrAtt.PositionIncrement = 1;
-                    }
-                    else
-                    {
-                        posIncrAtt.PositionIncrement = 0;
-                    }
-                    shingleBufferPosition++;
-                    if (shingleBufferPosition == shingleBuf.Count)
-                    {
-                        nextToken = null;
-                        shingleBufferPosition = 0;
-                    }
-                    return true;
-                }
-                else
-                {
-                    nextToken = null;
-                    shingleBufferPosition = 0;
-                }
-            }
-        }
-
-        private int numFillerTokensToInsert;
-        private AttributeSource.State currentToken;
-        private bool hasCurrentToken;
-
-        private ITermAttribute termAtt;
-        private IOffsetAttribute offsetAtt;
-        private IPositionIncrementAttribute posIncrAtt;
-        private ITypeAttribute typeAtt;
-
-        /*
-         * Get the next token from the input stream and push it on the token buffer.
-         * If we encounter a token with position increment > 1, we put filler tokens
-         * on the token buffer.
-         * <p/>
-         * Returns null when the end of the input stream is reached.
-         * @return the next token, or null if at end of input stream
-         * @throws IOException if the input stream has a problem
-         */
-        private bool GetNextToken()
-        {
-
-            while (true)
-            {
-                if (numFillerTokensToInsert > 0)
-                {
-                    if (currentToken == null)
-                    {
-                        currentToken = CaptureState();
-                    }
-                    else
-                    {
-                        RestoreState(currentToken);
-                    }
-                    numFillerTokensToInsert--;
-                    // A filler token occupies no space
-                    offsetAtt.SetOffset(offsetAtt.StartOffset, offsetAtt.StartOffset);
-                    termAtt.SetTermBuffer(FILLER_TOKEN, 0, FILLER_TOKEN.Length);
-                    return true;
-                }
-
-                if (hasCurrentToken)
-                {
-                    if (currentToken != null)
-                    {
-                        RestoreState(currentToken);
-                        currentToken = null;
-                    }
-                    hasCurrentToken = false;
-                    return true;
-                }
-
-                if (!input.IncrementToken()) return false;
-                hasCurrentToken = true;
-
-                if (posIncrAtt.PositionIncrement > 1)
-                {
-                    numFillerTokensToInsert = posIncrAtt.PositionIncrement - 1;
-                }
-            }
-        }
-
-        /*
-         * Fill the output buffer with new shingles.
-         *
-         * @throws IOException if there's a problem getting the next token
-         */
-        private bool FillShingleBuffer()
-        {
-            bool addedToken = false;
-            /*
-             * Try to fill the shingle buffer.
-             */
-            do
-            {
-                if (GetNextToken())
-                {
-                    shingleBuf.AddLast(CaptureState());
-                    if (shingleBuf.Count > maxShingleSize)
-                    {
-                        shingleBuf.RemoveFirst();
-                    }
-                    addedToken = true;
-                }
-                else
-                {
-                    break;
-                }
-            } while (shingleBuf.Count < maxShingleSize);
-
-            if (shingleBuf.Count == 0)
-            {
-                return false;
-            }
-
-            /*
-             * If no new token could be added to the shingle buffer, we have reached
-             * the end of the input stream and have to discard the least recent token.
-             */
-            if (!addedToken)
-            {
-                shingleBuf.RemoveFirst();
-            }
-
-            if (shingleBuf.Count == 0)
-            {
-                return false;
-            }
-
-            ClearShingles();
-
-            endOffsets = new int[shingleBuf.Count];
-            // Set all offsets to 0
-            endOffsets.Initialize();
-
-            int i = 0;
-            for (IEnumerator<State> it = shingleBuf.GetEnumerator(); it.MoveNext(); )
-            {
-                RestoreState(it.Current);
-                for (int j = i; j < shingles.Length; j++)
-                {
-                    if (shingles[j].Length != 0)
-                    {
-                        shingles[j].Append(TOKEN_SEPARATOR);
-                    }
-                    shingles[j].Append(termAtt.TermBuffer().Take(termAtt.TermLength()).ToArray());
-                }
-
-                endOffsets[i] = offsetAtt.EndOffset;
-                i++;
-            }
-
-            return true;
-        }
-
-        public override void Reset()
-        {
-            base.Reset();
-            nextToken = null;
-            shingleBufferPosition = 0;
-            shingleBuf.Clear();
-            numFillerTokensToInsert = 0;
-            currentToken = null;
-            hasCurrentToken = false;
-        }
-    }
-}
\ No newline at end of file


Mime
View raw message