lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nightowl...@apache.org
Subject [40/62] [abbrv] lucenenet git commit: Deleted obsolete Contrib folder
Date Sat, 01 Apr 2017 01:09:33 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/BR/BrazilianStemmer.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/BR/BrazilianStemmer.cs b/src/contrib/Analyzers/BR/BrazilianStemmer.cs
deleted file mode 100644
index 9ec12ec..0000000
--- a/src/contrib/Analyzers/BR/BrazilianStemmer.cs
+++ /dev/null
@@ -1,1264 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * A stemmer for Brazilian words.
- */
-namespace Lucene.Net.Analysis.BR
-{
-
-    public class BrazilianStemmer
-    {
-
-        /*
-         * Changed term
-         */
-        private string TERM;
-        private string CT;
-        private string R1;
-        private string R2;
-        private string RV;
-
-
-        public BrazilianStemmer()
-        {
-        }
-
-        /*
-         * Stemms the given term to an unique <tt>discriminator</tt>.
-         *
-         * <param name="term"> The term that should be stemmed.</param>
-         * <returns>     Discriminator for <tt>term</tt></returns>
-         */
-        public string Stem(string term)
-        {
-            bool altered = false; // altered the term
-
-            // creates CT
-            createCT(term);
-
-            if (!isIndexable(CT))
-            {
-                return null;
-            }
-            if (!isStemmable(CT))
-            {
-                return CT;
-            }
-
-            R1 = getR1(CT);
-            R2 = getR1(R1);
-            RV = getRV(CT);
-            TERM = term + ";" + CT;
-
-            altered = step1();
-            if (!altered)
-            {
-                altered = step2();
-            }
-
-            if (altered)
-            {
-                step3();
-            }
-            else
-            {
-                step4();
-            }
-
-            step5();
-
-            return CT;
-        }
-
-        /*
-         * Checks a term if it can be processed correctly.
-         *
-         * <returns> true if, and only if, the given term consists in letters.</returns>
-         */
-        private bool isStemmable(string term)
-        {
-            for (int c = 0; c < term.Length; c++)
-            {
-                // Discard terms that contain non-letter characters.
-                if (!char.IsLetter(term[c]))
-                {
-                    return false;
-                }
-            }
-            return true;
-        }
-
-        /*
-         * Checks a term if it can be processed indexed.
-         *
-         * <returns> true if it can be indexed</returns>
-         */
-        private bool isIndexable(string term)
-        {
-            return (term.Length < 30) && (term.Length > 2);
-        }
-
-        /*
-         * See if string is 'a','e','i','o','u'
-       *
-       * <returns>true if is vowel</returns>
-         */
-        private bool isVowel(char value)
-        {
-            return (value == 'a') ||
-                   (value == 'e') ||
-                   (value == 'i') ||
-                   (value == 'o') ||
-                   (value == 'u');
-        }
-
-        /*
-         * Gets R1
-       *
-       * R1 - is the region after the first non-vowel follwing a vowel,
-       *      or is the null region at the end of the word if there is
-       *      no such non-vowel.
-       *
-       * <returns>null or a string representing R1</returns>
-         */
-        private string getR1(string value)
-        {
-            int i;
-            int j;
-
-            // be-safe !!!
-            if (value == null)
-            {
-                return null;
-            }
-
-            // find 1st vowel
-            i = value.Length - 1;
-            for (j = 0; j < i; j++)
-            {
-                if (isVowel(value[j]))
-                {
-                    break;
-                }
-            }
-
-            if (!(j < i))
-            {
-                return null;
-            }
-
-            // find 1st non-vowel
-            for (; j < i; j++)
-            {
-                if (!(isVowel(value[j])))
-                {
-                    break;
-                }
-            }
-
-            if (!(j < i))
-            {
-                return null;
-            }
-
-            return value.Substring(j + 1);
-        }
-
-        /*
-         * Gets RV
-       *
-       * RV - IF the second letter is a consoant, RV is the region after
-       *      the next following vowel,
-       *
-       *      OR if the first two letters are vowels, RV is the region
-       *      after the next consoant,
-       *
-       *      AND otherwise (consoant-vowel case) RV is the region after
-       *      the third letter.
-       *
-       *      BUT RV is the end of the word if this positions cannot be
-       *      found.
-       *
-       * <returns>null or a string representing RV</returns>
-         */
-        private string getRV(string value)
-        {
-            int i;
-            int j;
-
-            // be-safe !!!
-            if (value == null)
-            {
-                return null;
-            }
-
-            i = value.Length - 1;
-
-            // RV - IF the second letter is a consoant, RV is the region after
-            //      the next following vowel,
-            if ((i > 0) && !isVowel(value[1]))
-            {
-                // find 1st vowel
-                for (j = 2; j < i; j++)
-                {
-                    if (isVowel(value[j]))
-                    {
-                        break;
-                    }
-                }
-
-                if (j < i)
-                {
-                    return value.Substring(j + 1);
-                }
-            }
-
-
-            // RV - OR if the first two letters are vowels, RV is the region
-            //      after the next consoant,
-            if ((i > 1) &&
-                isVowel(value[0]) &&
-                isVowel(value[1]))
-            {
-                // find 1st consoant
-                for (j = 2; j < i; j++)
-                {
-                    if (!isVowel(value[j]))
-                    {
-                        break;
-                    }
-                }
-
-                if (j < i)
-                {
-                    return value.Substring(j + 1);
-                }
-            }
-
-            // RV - AND otherwise (consoant-vowel case) RV is the region after
-            //      the third letter.
-            if (i > 2)
-            {
-                return value.Substring(3);
-            }
-
-            return null;
-        }
-
-        /*
-       * 1) Turn to lowercase
-       * 2) Remove accents
-       * 3) ã -> a ; õ -> o
-       * 4) ç -> c
-       *
-       * <returns>null or a string transformed</returns>
-         */
-        private string changeTerm(string value)
-        {
-            int j;
-            string r = "";
-
-            // be-safe !!!
-            if (value == null)
-            {
-                return null;
-            }
-
-            value = value.ToLower();
-            for (j = 0; j < value.Length; j++)
-            {
-                if ((value[j] == 'á') ||
-                    (value[j] == 'â') ||
-                    (value[j] == 'ã'))
-                {
-                    r = r + "a"; continue;
-                }
-                if ((value[j] == 'é') ||
-                    (value[j] == 'ê'))
-                {
-                    r = r + "e"; continue;
-                }
-                if (value[j] == 'í')
-                {
-                    r = r + "i"; continue;
-                }
-                if ((value[j] == 'ó') ||
-                    (value[j] == 'ô') ||
-                    (value[j] == 'õ'))
-                {
-                    r = r + "o"; continue;
-                }
-                if ((value[j] == 'ú') ||
-                    (value[j] == 'ü'))
-                {
-                    r = r + "u"; continue;
-                }
-                if (value[j] == 'ç')
-                {
-                    r = r + "c"; continue;
-                }
-                if (value[j] == 'ñ')
-                {
-                    r = r + "n"; continue;
-                }
-
-                r = r + value[j];
-            }
-
-            return r;
-        }
-
-        /*
-       * Check if a string ends with a suffix
-       *
-       * <returns>true if the string ends with the specified suffix</returns>
-         */
-        private bool suffix(string value, string suffix)
-        {
-
-            // be-safe !!!
-            if ((value == null) || (suffix == null))
-            {
-                return false;
-            }
-
-            if (suffix.Length > value.Length)
-            {
-                return false;
-            }
-
-            return value.Substring(value.Length - suffix.Length).Equals(suffix);
-        }
-
-        /*
-       * Replace a string suffix by another
-       *
-       * <returns>the replaced string</returns>
-         */
-        private string replaceSuffix(string value, string toReplace, string changeTo)
-        {
-            string vvalue;
-
-            // be-safe !!!
-            if ((value == null) ||
-                (toReplace == null) ||
-                (changeTo == null))
-            {
-                return value;
-            }
-
-            vvalue = removeSuffix(value, toReplace);
-
-            if (value.Equals(vvalue))
-            {
-                return value;
-            }
-            else
-            {
-                return vvalue + changeTo;
-            }
-        }
-
-        /*
-       * Remove a string suffix
-       *
-       * <returns>the string without the suffix</returns>
-         */
-        private string removeSuffix(string value, string toRemove)
-        {
-            // be-safe !!!
-            if ((value == null) ||
-                (toRemove == null) ||
-                !suffix(value, toRemove))
-            {
-                return value;
-            }
-
-            return value.Substring(0, value.Length - toRemove.Length);
-        }
-
-        /*
-       * See if a suffix is preceded by a string
-       *
-       * <returns>true if the suffix is preceded</returns>
-         */
-        private bool suffixPreceded(string value, string _suffix, string preceded)
-        {
-            // be-safe !!!
-            if ((value == null) ||
-                (_suffix == null) ||
-                (preceded == null) ||
-                !suffix(value, _suffix))
-            {
-                return false;
-            }
-
-            return suffix(removeSuffix(value, _suffix), preceded);
-        }
-
-
-
-
-        /*
-         * Creates CT (changed term) , substituting * 'ã' and 'õ' for 'a~' and 'o~'.
-         */
-        private void createCT(string term)
-        {
-            CT = changeTerm(term);
-
-            if (CT.Length < 2) return;
-
-            // if the first character is ... , remove it
-            if ((CT[0] == '"') ||
-                (CT[0] == '\'') ||
-                (CT[0] == '-') ||
-                (CT[0] == ',') ||
-                (CT[0] == ';') ||
-                (CT[0] == '.') ||
-                (CT[0] == '?') ||
-                (CT[0] == '!')
-                )
-            {
-                CT = CT.Substring(1);
-            }
-
-            if (CT.Length < 2) return;
-
-            // if the last character is ... , remove it
-            if ((CT[CT.Length - 1] == '-') ||
-                (CT[CT.Length - 1] == ',') ||
-                (CT[CT.Length - 1] == ';') ||
-                (CT[CT.Length - 1] == '.') ||
-                (CT[CT.Length - 1] == '?') ||
-                (CT[CT.Length - 1] == '!') ||
-                (CT[CT.Length - 1] == '\'') ||
-                (CT[CT.Length - 1] == '"')
-                )
-            {
-                CT = CT.Substring(0, CT.Length - 1);
-            }
-        }
-
-
-        /*
-         * Standart suffix removal.
-       * Search for the longest among the following suffixes, and perform
-       * the following actions:
-       *
-       * <returns>false if no ending was removed</returns>
-         */
-        private bool step1()
-        {
-            if (CT == null) return false;
-
-            // suffix lenght = 7
-            if (suffix(CT, "uciones") && suffix(R2, "uciones"))
-            {
-                CT = replaceSuffix(CT, "uciones", "u"); return true;
-            }
-
-            // suffix lenght = 6
-            if (CT.Length >= 6)
-            {
-                if (suffix(CT, "imentos") && suffix(R2, "imentos"))
-                {
-                    CT = removeSuffix(CT, "imentos"); return true;
-                }
-                if (suffix(CT, "amentos") && suffix(R2, "amentos"))
-                {
-                    CT = removeSuffix(CT, "amentos"); return true;
-                }
-                if (suffix(CT, "adores") && suffix(R2, "adores"))
-                {
-                    CT = removeSuffix(CT, "adores"); return true;
-                }
-                if (suffix(CT, "adoras") && suffix(R2, "adoras"))
-                {
-                    CT = removeSuffix(CT, "adoras"); return true;
-                }
-                if (suffix(CT, "logias") && suffix(R2, "logias"))
-                {
-                    replaceSuffix(CT, "logias", "log"); return true;
-                }
-                if (suffix(CT, "encias") && suffix(R2, "encias"))
-                {
-                    CT = replaceSuffix(CT, "encias", "ente"); return true;
-                }
-                if (suffix(CT, "amente") && suffix(R1, "amente"))
-                {
-                    CT = removeSuffix(CT, "amente"); return true;
-                }
-                if (suffix(CT, "idades") && suffix(R2, "idades"))
-                {
-                    CT = removeSuffix(CT, "idades"); return true;
-                }
-            }
-
-            // suffix lenght = 5
-            if (CT.Length >= 5)
-            {
-                if (suffix(CT, "acoes") && suffix(R2, "acoes"))
-                {
-                    CT = removeSuffix(CT, "acoes"); return true;
-                }
-                if (suffix(CT, "imento") && suffix(R2, "imento"))
-                {
-                    CT = removeSuffix(CT, "imento"); return true;
-                }
-                if (suffix(CT, "amento") && suffix(R2, "amento"))
-                {
-                    CT = removeSuffix(CT, "amento"); return true;
-                }
-                if (suffix(CT, "adora") && suffix(R2, "adora"))
-                {
-                    CT = removeSuffix(CT, "adora"); return true;
-                }
-                if (suffix(CT, "ismos") && suffix(R2, "ismos"))
-                {
-                    CT = removeSuffix(CT, "ismos"); return true;
-                }
-                if (suffix(CT, "istas") && suffix(R2, "istas"))
-                {
-                    CT = removeSuffix(CT, "istas"); return true;
-                }
-                if (suffix(CT, "logia") && suffix(R2, "logia"))
-                {
-                    CT = replaceSuffix(CT, "logia", "log"); return true;
-                }
-                if (suffix(CT, "ucion") && suffix(R2, "ucion"))
-                {
-                    CT = replaceSuffix(CT, "ucion", "u"); return true;
-                }
-                if (suffix(CT, "encia") && suffix(R2, "encia"))
-                {
-                    CT = replaceSuffix(CT, "encia", "ente"); return true;
-                }
-                if (suffix(CT, "mente") && suffix(R2, "mente"))
-                {
-                    CT = removeSuffix(CT, "mente"); return true;
-                }
-                if (suffix(CT, "idade") && suffix(R2, "idade"))
-                {
-                    CT = removeSuffix(CT, "idade"); return true;
-                }
-            }
-
-            // suffix lenght = 4
-            if (CT.Length >= 4)
-            {
-                if (suffix(CT, "acao") && suffix(R2, "acao"))
-                {
-                    CT = removeSuffix(CT, "acao"); return true;
-                }
-                if (suffix(CT, "ezas") && suffix(R2, "ezas"))
-                {
-                    CT = removeSuffix(CT, "ezas"); return true;
-                }
-                if (suffix(CT, "icos") && suffix(R2, "icos"))
-                {
-                    CT = removeSuffix(CT, "icos"); return true;
-                }
-                if (suffix(CT, "icas") && suffix(R2, "icas"))
-                {
-                    CT = removeSuffix(CT, "icas"); return true;
-                }
-                if (suffix(CT, "ismo") && suffix(R2, "ismo"))
-                {
-                    CT = removeSuffix(CT, "ismo"); return true;
-                }
-                if (suffix(CT, "avel") && suffix(R2, "avel"))
-                {
-                    CT = removeSuffix(CT, "avel"); return true;
-                }
-                if (suffix(CT, "ivel") && suffix(R2, "ivel"))
-                {
-                    CT = removeSuffix(CT, "ivel"); return true;
-                }
-                if (suffix(CT, "ista") && suffix(R2, "ista"))
-                {
-                    CT = removeSuffix(CT, "ista"); return true;
-                }
-                if (suffix(CT, "osos") && suffix(R2, "osos"))
-                {
-                    CT = removeSuffix(CT, "osos"); return true;
-                }
-                if (suffix(CT, "osas") && suffix(R2, "osas"))
-                {
-                    CT = removeSuffix(CT, "osas"); return true;
-                }
-                if (suffix(CT, "ador") && suffix(R2, "ador"))
-                {
-                    CT = removeSuffix(CT, "ador"); return true;
-                }
-                if (suffix(CT, "ivas") && suffix(R2, "ivas"))
-                {
-                    CT = removeSuffix(CT, "ivas"); return true;
-                }
-                if (suffix(CT, "ivos") && suffix(R2, "ivos"))
-                {
-                    CT = removeSuffix(CT, "ivos"); return true;
-                }
-                if (suffix(CT, "iras") &&
-                    suffix(RV, "iras") &&
-                    suffixPreceded(CT, "iras", "e"))
-                {
-                    CT = replaceSuffix(CT, "iras", "ir"); return true;
-                }
-            }
-
-            // suffix lenght = 3
-            if (CT.Length >= 3)
-            {
-                if (suffix(CT, "eza") && suffix(R2, "eza"))
-                {
-                    CT = removeSuffix(CT, "eza"); return true;
-                }
-                if (suffix(CT, "ico") && suffix(R2, "ico"))
-                {
-                    CT = removeSuffix(CT, "ico"); return true;
-                }
-                if (suffix(CT, "ica") && suffix(R2, "ica"))
-                {
-                    CT = removeSuffix(CT, "ica"); return true;
-                }
-                if (suffix(CT, "oso") && suffix(R2, "oso"))
-                {
-                    CT = removeSuffix(CT, "oso"); return true;
-                }
-                if (suffix(CT, "osa") && suffix(R2, "osa"))
-                {
-                    CT = removeSuffix(CT, "osa"); return true;
-                }
-                if (suffix(CT, "iva") && suffix(R2, "iva"))
-                {
-                    CT = removeSuffix(CT, "iva"); return true;
-                }
-                if (suffix(CT, "ivo") && suffix(R2, "ivo"))
-                {
-                    CT = removeSuffix(CT, "ivo"); return true;
-                }
-                if (suffix(CT, "ira") &&
-                    suffix(RV, "ira") &&
-                    suffixPreceded(CT, "ira", "e"))
-                {
-                    CT = replaceSuffix(CT, "ira", "ir"); return true;
-                }
-            }
-
-            // no ending was removed by step1
-            return false;
-        }
-
-
-        /*
-         * Verb suffixes.
-       *
-       * Search for the longest among the following suffixes in RV,
-       * and if found, delete.
-       *
-       * <returns>false if no ending was removed</returns>
-        */
-        private bool step2()
-        {
-            if (RV == null) return false;
-
-            // suffix lenght = 7
-            if (RV.Length >= 7)
-            {
-                if (suffix(RV, "issemos"))
-                {
-                    CT = removeSuffix(CT, "issemos"); return true;
-                }
-                if (suffix(RV, "essemos"))
-                {
-                    CT = removeSuffix(CT, "essemos"); return true;
-                }
-                if (suffix(RV, "assemos"))
-                {
-                    CT = removeSuffix(CT, "assemos"); return true;
-                }
-                if (suffix(RV, "ariamos"))
-                {
-                    CT = removeSuffix(CT, "ariamos"); return true;
-                }
-                if (suffix(RV, "eriamos"))
-                {
-                    CT = removeSuffix(CT, "eriamos"); return true;
-                }
-                if (suffix(RV, "iriamos"))
-                {
-                    CT = removeSuffix(CT, "iriamos"); return true;
-                }
-            }
-
-            // suffix lenght = 6
-            if (RV.Length >= 6)
-            {
-                if (suffix(RV, "iremos"))
-                {
-                    CT = removeSuffix(CT, "iremos"); return true;
-                }
-                if (suffix(RV, "eremos"))
-                {
-                    CT = removeSuffix(CT, "eremos"); return true;
-                }
-                if (suffix(RV, "aremos"))
-                {
-                    CT = removeSuffix(CT, "aremos"); return true;
-                }
-                if (suffix(RV, "avamos"))
-                {
-                    CT = removeSuffix(CT, "avamos"); return true;
-                }
-                if (suffix(RV, "iramos"))
-                {
-                    CT = removeSuffix(CT, "iramos"); return true;
-                }
-                if (suffix(RV, "eramos"))
-                {
-                    CT = removeSuffix(CT, "eramos"); return true;
-                }
-                if (suffix(RV, "aramos"))
-                {
-                    CT = removeSuffix(CT, "aramos"); return true;
-                }
-                if (suffix(RV, "asseis"))
-                {
-                    CT = removeSuffix(CT, "asseis"); return true;
-                }
-                if (suffix(RV, "esseis"))
-                {
-                    CT = removeSuffix(CT, "esseis"); return true;
-                }
-                if (suffix(RV, "isseis"))
-                {
-                    CT = removeSuffix(CT, "isseis"); return true;
-                }
-                if (suffix(RV, "arieis"))
-                {
-                    CT = removeSuffix(CT, "arieis"); return true;
-                }
-                if (suffix(RV, "erieis"))
-                {
-                    CT = removeSuffix(CT, "erieis"); return true;
-                }
-                if (suffix(RV, "irieis"))
-                {
-                    CT = removeSuffix(CT, "irieis"); return true;
-                }
-            }
-
-
-            // suffix lenght = 5
-            if (RV.Length >= 5)
-            {
-                if (suffix(RV, "irmos"))
-                {
-                    CT = removeSuffix(CT, "irmos"); return true;
-                }
-                if (suffix(RV, "iamos"))
-                {
-                    CT = removeSuffix(CT, "iamos"); return true;
-                }
-                if (suffix(RV, "armos"))
-                {
-                    CT = removeSuffix(CT, "armos"); return true;
-                }
-                if (suffix(RV, "ermos"))
-                {
-                    CT = removeSuffix(CT, "ermos"); return true;
-                }
-                if (suffix(RV, "areis"))
-                {
-                    CT = removeSuffix(CT, "areis"); return true;
-                }
-                if (suffix(RV, "ereis"))
-                {
-                    CT = removeSuffix(CT, "ereis"); return true;
-                }
-                if (suffix(RV, "ireis"))
-                {
-                    CT = removeSuffix(CT, "ireis"); return true;
-                }
-                if (suffix(RV, "asses"))
-                {
-                    CT = removeSuffix(CT, "asses"); return true;
-                }
-                if (suffix(RV, "esses"))
-                {
-                    CT = removeSuffix(CT, "esses"); return true;
-                }
-                if (suffix(RV, "isses"))
-                {
-                    CT = removeSuffix(CT, "isses"); return true;
-                }
-                if (suffix(RV, "astes"))
-                {
-                    CT = removeSuffix(CT, "astes"); return true;
-                }
-                if (suffix(RV, "assem"))
-                {
-                    CT = removeSuffix(CT, "assem"); return true;
-                }
-                if (suffix(RV, "essem"))
-                {
-                    CT = removeSuffix(CT, "essem"); return true;
-                }
-                if (suffix(RV, "issem"))
-                {
-                    CT = removeSuffix(CT, "issem"); return true;
-                }
-                if (suffix(RV, "ardes"))
-                {
-                    CT = removeSuffix(CT, "ardes"); return true;
-                }
-                if (suffix(RV, "erdes"))
-                {
-                    CT = removeSuffix(CT, "erdes"); return true;
-                }
-                if (suffix(RV, "irdes"))
-                {
-                    CT = removeSuffix(CT, "irdes"); return true;
-                }
-                if (suffix(RV, "ariam"))
-                {
-                    CT = removeSuffix(CT, "ariam"); return true;
-                }
-                if (suffix(RV, "eriam"))
-                {
-                    CT = removeSuffix(CT, "eriam"); return true;
-                }
-                if (suffix(RV, "iriam"))
-                {
-                    CT = removeSuffix(CT, "iriam"); return true;
-                }
-                if (suffix(RV, "arias"))
-                {
-                    CT = removeSuffix(CT, "arias"); return true;
-                }
-                if (suffix(RV, "erias"))
-                {
-                    CT = removeSuffix(CT, "erias"); return true;
-                }
-                if (suffix(RV, "irias"))
-                {
-                    CT = removeSuffix(CT, "irias"); return true;
-                }
-                if (suffix(RV, "estes"))
-                {
-                    CT = removeSuffix(CT, "estes"); return true;
-                }
-                if (suffix(RV, "istes"))
-                {
-                    CT = removeSuffix(CT, "istes"); return true;
-                }
-                if (suffix(RV, "areis"))
-                {
-                    CT = removeSuffix(CT, "areis"); return true;
-                }
-                if (suffix(RV, "aveis"))
-                {
-                    CT = removeSuffix(CT, "aveis"); return true;
-                }
-            }
-
-            // suffix lenght = 4
-            if (RV.Length >= 4)
-            {
-                if (suffix(RV, "aria"))
-                {
-                    CT = removeSuffix(CT, "aria"); return true;
-                }
-                if (suffix(RV, "eria"))
-                {
-                    CT = removeSuffix(CT, "eria"); return true;
-                }
-                if (suffix(RV, "iria"))
-                {
-                    CT = removeSuffix(CT, "iria"); return true;
-                }
-                if (suffix(RV, "asse"))
-                {
-                    CT = removeSuffix(CT, "asse"); return true;
-                }
-                if (suffix(RV, "esse"))
-                {
-                    CT = removeSuffix(CT, "esse"); return true;
-                }
-                if (suffix(RV, "isse"))
-                {
-                    CT = removeSuffix(CT, "isse"); return true;
-                }
-                if (suffix(RV, "aste"))
-                {
-                    CT = removeSuffix(CT, "aste"); return true;
-                }
-                if (suffix(RV, "este"))
-                {
-                    CT = removeSuffix(CT, "este"); return true;
-                }
-                if (suffix(RV, "iste"))
-                {
-                    CT = removeSuffix(CT, "iste"); return true;
-                }
-                if (suffix(RV, "arei"))
-                {
-                    CT = removeSuffix(CT, "arei"); return true;
-                }
-                if (suffix(RV, "erei"))
-                {
-                    CT = removeSuffix(CT, "erei"); return true;
-                }
-                if (suffix(RV, "irei"))
-                {
-                    CT = removeSuffix(CT, "irei"); return true;
-                }
-                if (suffix(RV, "aram"))
-                {
-                    CT = removeSuffix(CT, "aram"); return true;
-                }
-                if (suffix(RV, "eram"))
-                {
-                    CT = removeSuffix(CT, "eram"); return true;
-                }
-                if (suffix(RV, "iram"))
-                {
-                    CT = removeSuffix(CT, "iram"); return true;
-                }
-                if (suffix(RV, "avam"))
-                {
-                    CT = removeSuffix(CT, "avam"); return true;
-                }
-                if (suffix(RV, "arem"))
-                {
-                    CT = removeSuffix(CT, "arem"); return true;
-                }
-                if (suffix(RV, "erem"))
-                {
-                    CT = removeSuffix(CT, "erem"); return true;
-                }
-                if (suffix(RV, "irem"))
-                {
-                    CT = removeSuffix(CT, "irem"); return true;
-                }
-                if (suffix(RV, "ando"))
-                {
-                    CT = removeSuffix(CT, "ando"); return true;
-                }
-                if (suffix(RV, "endo"))
-                {
-                    CT = removeSuffix(CT, "endo"); return true;
-                }
-                if (suffix(RV, "indo"))
-                {
-                    CT = removeSuffix(CT, "indo"); return true;
-                }
-                if (suffix(RV, "arao"))
-                {
-                    CT = removeSuffix(CT, "arao"); return true;
-                }
-                if (suffix(RV, "erao"))
-                {
-                    CT = removeSuffix(CT, "erao"); return true;
-                }
-                if (suffix(RV, "irao"))
-                {
-                    CT = removeSuffix(CT, "irao"); return true;
-                }
-                if (suffix(RV, "adas"))
-                {
-                    CT = removeSuffix(CT, "adas"); return true;
-                }
-                if (suffix(RV, "idas"))
-                {
-                    CT = removeSuffix(CT, "idas"); return true;
-                }
-                if (suffix(RV, "aras"))
-                {
-                    CT = removeSuffix(CT, "aras"); return true;
-                }
-                if (suffix(RV, "eras"))
-                {
-                    CT = removeSuffix(CT, "eras"); return true;
-                }
-                if (suffix(RV, "iras"))
-                {
-                    CT = removeSuffix(CT, "iras"); return true;
-                }
-                if (suffix(RV, "avas"))
-                {
-                    CT = removeSuffix(CT, "avas"); return true;
-                }
-                if (suffix(RV, "ares"))
-                {
-                    CT = removeSuffix(CT, "ares"); return true;
-                }
-                if (suffix(RV, "eres"))
-                {
-                    CT = removeSuffix(CT, "eres"); return true;
-                }
-                if (suffix(RV, "ires"))
-                {
-                    CT = removeSuffix(CT, "ires"); return true;
-                }
-                if (suffix(RV, "ados"))
-                {
-                    CT = removeSuffix(CT, "ados"); return true;
-                }
-                if (suffix(RV, "idos"))
-                {
-                    CT = removeSuffix(CT, "idos"); return true;
-                }
-                if (suffix(RV, "amos"))
-                {
-                    CT = removeSuffix(CT, "amos"); return true;
-                }
-                if (suffix(RV, "emos"))
-                {
-                    CT = removeSuffix(CT, "emos"); return true;
-                }
-                if (suffix(RV, "imos"))
-                {
-                    CT = removeSuffix(CT, "imos"); return true;
-                }
-                if (suffix(RV, "iras"))
-                {
-                    CT = removeSuffix(CT, "iras"); return true;
-                }
-                if (suffix(RV, "ieis"))
-                {
-                    CT = removeSuffix(CT, "ieis"); return true;
-                }
-            }
-
-            // suffix lenght = 3
-            if (RV.Length >= 3)
-            {
-                if (suffix(RV, "ada"))
-                {
-                    CT = removeSuffix(CT, "ada"); return true;
-                }
-                if (suffix(RV, "ida"))
-                {
-                    CT = removeSuffix(CT, "ida"); return true;
-                }
-                if (suffix(RV, "ara"))
-                {
-                    CT = removeSuffix(CT, "ara"); return true;
-                }
-                if (suffix(RV, "era"))
-                {
-                    CT = removeSuffix(CT, "era"); return true;
-                }
-                if (suffix(RV, "ira"))
-                {
-                    CT = removeSuffix(CT, "ava"); return true;
-                }
-                if (suffix(RV, "iam"))
-                {
-                    CT = removeSuffix(CT, "iam"); return true;
-                }
-                if (suffix(RV, "ado"))
-                {
-                    CT = removeSuffix(CT, "ado"); return true;
-                }
-                if (suffix(RV, "ido"))
-                {
-                    CT = removeSuffix(CT, "ido"); return true;
-                }
-                if (suffix(RV, "ias"))
-                {
-                    CT = removeSuffix(CT, "ias"); return true;
-                }
-                if (suffix(RV, "ais"))
-                {
-                    CT = removeSuffix(CT, "ais"); return true;
-                }
-                if (suffix(RV, "eis"))
-                {
-                    CT = removeSuffix(CT, "eis"); return true;
-                }
-                if (suffix(RV, "ira"))
-                {
-                    CT = removeSuffix(CT, "ira"); return true;
-                }
-                if (suffix(RV, "ear"))
-                {
-                    CT = removeSuffix(CT, "ear"); return true;
-                }
-            }
-
-            // suffix lenght = 2
-            if (RV.Length >= 2)
-            {
-                if (suffix(RV, "ia"))
-                {
-                    CT = removeSuffix(CT, "ia"); return true;
-                }
-                if (suffix(RV, "ei"))
-                {
-                    CT = removeSuffix(CT, "ei"); return true;
-                }
-                if (suffix(RV, "am"))
-                {
-                    CT = removeSuffix(CT, "am"); return true;
-                }
-                if (suffix(RV, "em"))
-                {
-                    CT = removeSuffix(CT, "em"); return true;
-                }
-                if (suffix(RV, "ar"))
-                {
-                    CT = removeSuffix(CT, "ar"); return true;
-                }
-                if (suffix(RV, "er"))
-                {
-                    CT = removeSuffix(CT, "er"); return true;
-                }
-                if (suffix(RV, "ir"))
-                {
-                    CT = removeSuffix(CT, "ir"); return true;
-                }
-                if (suffix(RV, "as"))
-                {
-                    CT = removeSuffix(CT, "as"); return true;
-                }
-                if (suffix(RV, "es"))
-                {
-                    CT = removeSuffix(CT, "es"); return true;
-                }
-                if (suffix(RV, "is"))
-                {
-                    CT = removeSuffix(CT, "is"); return true;
-                }
-                if (suffix(RV, "eu"))
-                {
-                    CT = removeSuffix(CT, "eu"); return true;
-                }
-                if (suffix(RV, "iu"))
-                {
-                    CT = removeSuffix(CT, "iu"); return true;
-                }
-                if (suffix(RV, "iu"))
-                {
-                    CT = removeSuffix(CT, "iu"); return true;
-                }
-                if (suffix(RV, "ou"))
-                {
-                    CT = removeSuffix(CT, "ou"); return true;
-                }
-            }
-
-            // no ending was removed by step2
-            return false;
-        }
-
-        /*
-         * Delete suffix 'i' if in RV and preceded by 'c'
-       *
-        */
-        private void step3()
-        {
-            if (RV == null) return;
-
-            if (suffix(RV, "i") && suffixPreceded(RV, "i", "c"))
-            {
-                CT = removeSuffix(CT, "i");
-            }
-
-        }
-
-        /*
-         * Residual suffix
-       *
-       * If the word ends with one of the suffixes (os a i o á í ó)
-       * in RV, delete it
-       *
-        */
-        private void step4()
-        {
-            if (RV == null) return;
-
-            if (suffix(RV, "os"))
-            {
-                CT = removeSuffix(CT, "os"); return;
-            }
-            if (suffix(RV, "a"))
-            {
-                CT = removeSuffix(CT, "a"); return;
-            }
-            if (suffix(RV, "i"))
-            {
-                CT = removeSuffix(CT, "i"); return;
-            }
-            if (suffix(RV, "o"))
-            {
-                CT = removeSuffix(CT, "o"); return;
-            }
-
-        }
-
-        /*
-         * If the word ends with one of ( e é ê) in RV,delete it,
-       * and if preceded by 'gu' (or 'ci') with the 'u' (or 'i') in RV,
-       * delete the 'u' (or 'i')
-       *
-       * Or if the word ends ç remove the cedilha
-       *
-        */
-        private void step5()
-        {
-            if (RV == null) return;
-
-            if (suffix(RV, "e"))
-            {
-                if (suffixPreceded(RV, "e", "gu"))
-                {
-                    CT = removeSuffix(CT, "e");
-                    CT = removeSuffix(CT, "u");
-                    return;
-                }
-
-                if (suffixPreceded(RV, "e", "ci"))
-                {
-                    CT = removeSuffix(CT, "e");
-                    CT = removeSuffix(CT, "i");
-                    return;
-                }
-
-                CT = removeSuffix(CT, "e"); return;
-            }
-        }
-
-        /*
-         * For log and debug purpose
-         *
-         * <returns> TERM, CT, RV, R1 and R2</returns>
-         */
-        public string Log()
-        {
-            return " (TERM = " + TERM + ")" +
-                   " (CT = " + CT + ")" +
-                   " (RV = " + RV + ")" +
-                   " (R1 = " + R1 + ")" +
-                   " (R2 = " + R2 + ")";
-        }
-
-    }
-
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/CJK/CJKAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/CJK/CJKAnalyzer.cs b/src/contrib/Analyzers/CJK/CJKAnalyzer.cs
deleted file mode 100644
index fb21358..0000000
--- a/src/contrib/Analyzers/CJK/CJKAnalyzer.cs
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
-*/
-
-using System;
-using System.Collections.Generic;
-using System.IO;
-using System.Collections;
-using Lucene.Net.Analysis;
-using Version = Lucene.Net.Util.Version;
-
-namespace Lucene.Net.Analysis.CJK
-{
-    /// <summary>
-    /// Filters CJKTokenizer with StopFilter.
-    /// 
-    /// <author>Che, Dong</author>
-    /// </summary>
-    public class CJKAnalyzer : Analyzer
-    {
-        //~ Static fields/initializers ---------------------------------------------
-
-        /// <summary>
-        /// An array containing some common English words that are not usually
-        /// useful for searching. and some double-byte interpunctions.....
-        /// </summary>
-        // TODO make this final in 3.1 -
-        // this might be revised and merged with StopFilter stop words too
-        [Obsolete("use GetDefaultStopSet() instead")] public static String[] STOP_WORDS =
-            {
-                "a", "and", "are", "as", "at", "be",
-                "but", "by", "for", "if", "in",
-                "into", "is", "it", "no", "not",
-                "of", "on", "or", "s", "such", "t",
-                "that", "the", "their", "then",
-                "there", "these", "they", "this",
-                "to", "was", "will", "with", "",
-                "www"
-            };
-
-        //~ Instance fields --------------------------------------------------------
-
-        /// <summary>
-        /// Returns an unmodifiable instance of the default stop-words set.
-        /// </summary>
-        /// <returns>Returns an unmodifiable instance of the default stop-words set.</returns>
-        public static ISet<string> GetDefaultStopSet()
-        {
-            return DefaultSetHolder.DEFAULT_STOP_SET;
-        }
-
-        private static class DefaultSetHolder
-        {
-            internal static ISet<string> DEFAULT_STOP_SET =
-                CharArraySet.UnmodifiableSet(new CharArraySet((IEnumerable<string>)STOP_WORDS, false));
-        }
-
-        /// <summary>
-        /// stop word list
-        /// </summary>
-        private ISet<string> stopTable;
-
-        private readonly Version matchVersion;
-
-        //~ Constructors -----------------------------------------------------------
-
-        public CJKAnalyzer(Version matchVersion)
-            : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
-        {
-
-        }
-
-        public CJKAnalyzer(Version matchVersion, ISet<string> stopWords)
-        {
-            stopTable = CharArraySet.UnmodifiableSet(CharArraySet.Copy(stopWords));
-            this.matchVersion = matchVersion;
-        }
-
-        /// <summary>
-        /// Builds an analyzer which removes words in the provided array.
-        /// </summary>
-        /// <param name="stopWords">stop word array</param>
-        public CJKAnalyzer(Version matchVersion, params string[] stopWords)
-        {
-            stopTable = StopFilter.MakeStopSet(stopWords);
-            this.matchVersion = matchVersion;
-        }
-
-        //~ Methods ----------------------------------------------------------------
-
-        /// <summary>
-        /// get token stream from input
-        /// </summary>
-        /// <param name="fieldName">lucene field name</param>
-        /// <param name="reader">input reader</param>
-        /// <returns>Token Stream</returns>
-        public override sealed TokenStream TokenStream(String fieldName, TextReader reader)
-        {
-            return new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
-                                  new CJKTokenizer(reader), stopTable);
-        }
-
-        private class SavedStreams
-        {
-            protected internal Tokenizer source;
-            protected internal TokenStream result;
-        };
-
-        /*
-         * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text 
-         * in the provided {@link Reader}.
-         *
-         * @param fieldName lucene field name
-         * @param reader    Input {@link Reader}
-         * @return A {@link TokenStream} built from {@link CJKTokenizer}, filtered with
-         *    {@link StopFilter}
-         */
-        public override sealed TokenStream ReusableTokenStream(String fieldName, TextReader reader)
-        {
-            /* tokenStream() is final, no back compat issue */
-            SavedStreams streams = (SavedStreams) PreviousTokenStream;
-            if (streams == null)
-            {
-                streams = new SavedStreams();
-                streams.source = new CJKTokenizer(reader);
-                streams.result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
-                                                streams.source, stopTable);
-                PreviousTokenStream = streams;
-            }
-            else
-            {
-                streams.source.Reset(reader);
-            }
-            return streams.result;
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/CJK/CJKTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/CJK/CJKTokenizer.cs b/src/contrib/Analyzers/CJK/CJKTokenizer.cs
deleted file mode 100644
index 6be5a6e..0000000
--- a/src/contrib/Analyzers/CJK/CJKTokenizer.cs
+++ /dev/null
@@ -1,399 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
-*/
-
-using System;
-using System.Globalization;
-using System.IO;
-using System.Text;
-using System.Text.RegularExpressions;
-using Lucene.Net.Analysis;
-using Lucene.Net.Analysis.Tokenattributes;
-using Lucene.Net.Util;
-
-namespace Lucene.Net.Analysis.CJK
-{
-    /// <summary>
-    /// <p>
-    /// CJKTokenizer was modified from StopTokenizer which does a decent job for
-    /// most European languages. and it perferm other token method for double-byte
-    /// chars: the token will return at each two charactors with overlap match.<br/>
-    /// Example: "java C1C2C3C4" will be segment to: "java" "C1C2" "C2C3" "C3C4" it
-    /// also need filter filter zero length token ""<br/>
-    /// for Digit: digit, '+', '#' will token as letter<br/>
-    /// for more info on Asia language(Chinese Japanese Korean) text segmentation:
-    /// please search  <a
-    /// href="http://www.google.com/search?q=word+chinese+segment">google</a>
-    /// </p>
-    /// 
-    /// @author Che, Dong
-    /// @version $Id: CJKTokenizer.java,v 1.3 2003/01/22 20:54:47 otis Exp $
-    /// </summary>
-    public sealed class CJKTokenizer : Tokenizer
-    {
-        //~ Static fields/initializers ---------------------------------------------
-        /// <summary>
-        /// Word token type
-        /// </summary>
-        internal static readonly int WORD_TYPE = 0;
-
-        /// <summary>
-        /// Single byte token type
-        /// </summary>
-        internal static readonly int SINGLE_TOKEN_TYPE = 1;
-
-        /// <summary>
-        /// Double byte token type
-        /// </summary>
-        internal static readonly int DOUBLE_TOKEN_TYPE = 2;
-
-        /// <summary>
-        /// Names for token types
-        /// </summary>
-        internal static readonly String[] TOKEN_TYPE_NAMES = { "word", "single", "double" };
-
-        /// <summary>
-        /// Max word length
-        /// </summary>
-        internal static readonly int MAX_WORD_LEN = 255;
-
-        /// <summary>
-        /// buffer size
-        /// </summary>
-        internal static readonly int IO_BUFFER_SIZE = 256;
-
-        //~ Instance fields --------------------------------------------------------
-
-        /// <summary>
-        /// word offset, used to imply which character(in ) is parsed
-        /// </summary>
-        private int offset = 0;
-
-        /// <summary>
-        /// the index used only for ioBuffer
-        /// </summary>
-        private int bufferIndex = 0;
-
-        /// <summary>
-        /// data length
-        /// </summary>
-        private int dataLen = 0;
-
-        /// <summary>
-        /// character buffer, store the characters which are used to compose <br/>
-        /// the returned Token
-        /// </summary>
-        private char[] buffer = new char[MAX_WORD_LEN];
-
-        /// <summary>
-        /// I/O buffer, used to store the content of the input(one of the <br/>
-        /// members of Tokenizer)
-        /// </summary>
-        private char[] ioBuffer = new char[IO_BUFFER_SIZE];
-
-        /// <summary>
-        /// word type: single=>ASCII  double=>non-ASCII word=>default
-        /// </summary>
-        private int tokenType = WORD_TYPE;
-
-        /// <summary>
-        /// tag: previous character is a cached double-byte character  "C1C2C3C4"
-        /// ----(set the C1 isTokened) C1C2 "C2C3C4" ----(set the C2 isTokened)
-        /// C1C2 C2C3 "C3C4" ----(set the C3 isTokened) "C1C2 C2C3 C3C4"
-        /// </summary>
-        private bool preIsTokened = false;
-
-        private ITermAttribute termAtt;
-        private IOffsetAttribute offsetAtt;
-        private ITypeAttribute typeAtt;
-
-        //~ Constructors -----------------------------------------------------------
-
-        /// <summary>
-        /// Construct a token stream processing the given input.
-        /// </summary>
-        /// <param name="_in">I/O reader</param>
-        public CJKTokenizer(TextReader _in)
-            : base(_in)
-        {
-            Init();
-        }
-
-        public CJKTokenizer(AttributeSource source, TextReader _in)
-            : base(source, _in)
-        {
-            Init();
-        }
-
-        public CJKTokenizer(AttributeFactory factory, TextReader _in)
-            : base(factory, _in)
-        {
-            Init();
-        }
-
-        private void Init()
-        {
-            termAtt = AddAttribute<ITermAttribute>();
-            offsetAtt = AddAttribute<IOffsetAttribute>();
-            typeAtt = AddAttribute<ITypeAttribute>();
-        }
-
-        //~ Methods ----------------------------------------------------------------
-
-        /*
-         * Returns true for the next token in the stream, or false at EOS.
-         * See http://java.sun.com/j2se/1.3/docs/api/java/lang/char.UnicodeBlock.html
-         * for detail.
-         *
-         * @return false for end of stream, true otherwise
-         *
-         * @throws java.io.IOException - throw IOException when read error <br>
-         *         happened in the InputStream
-         *
-         */
-
-        Regex isBasicLatin = new Regex(@"\p{IsBasicLatin}", RegexOptions.Compiled);
-        Regex isHalfWidthAndFullWidthForms = new Regex(@"\p{IsHalfwidthandFullwidthForms}", RegexOptions.Compiled);
-
-        public override bool IncrementToken()
-        {
-            ClearAttributes();
-            /* how many character(s) has been stored in buffer */
-
-            while (true)
-            {
-                // loop until we find a non-empty token
-
-                int length = 0;
-
-                /* the position used to create Token */
-                int start = offset;
-
-                while (true)
-                {
-                    // loop until we've found a full token
-                    /* current character */
-                    char c;
-
-                    offset++;
-
-                    if (bufferIndex >= dataLen)
-                    {
-                        dataLen = input.Read(ioBuffer, 0, ioBuffer.Length);
-                        bufferIndex = 0;
-                    }
-
-                    if (dataLen == 0) // input.Read returns 0 when its empty, not -1, as in java
-                    {
-                        if (length > 0)
-                        {
-                            if (preIsTokened == true)
-                            {
-                                length = 0;
-                                preIsTokened = false;
-                            }
-                            else
-                            {
-                                offset--;
-                            }
-
-                            break;
-                        }
-                        else
-                        {
-                            offset--;
-                            return false;
-                        }
-                    }
-                    else
-                    {
-                        //get current character
-                        c = ioBuffer[bufferIndex++];
-                    }
-
-                    //TODO: Using a Regex to determine the UnicodeCategory is probably slower than
-                    //      If we just created a small class that would look it up for us, which 
-                    //      would likely be trivial, however time-consuming.  I can't imagine a Regex
-                    //      being fast for this, considering we have to pull a char from the buffer,
-                    //      and convert it to a string before we run a regex on it. - cc
-                    bool isHalfFullForm = isHalfWidthAndFullWidthForms.Match(c.ToString()).Success;
-                    //if the current character is ASCII or Extend ASCII
-                    if ((isBasicLatin.Match(c.ToString()).Success) || (isHalfFullForm))
-                    {
-                        if (isHalfFullForm)
-                        {
-                            int i = (int) c;
-                            if (i >= 65281 && i <= 65374)
-                            {
-                                // convert certain HALFWIDTH_AND_FULLWIDTH_FORMS to BASIC_LATIN
-                                i = i - 65248;
-                                c = (char) i;
-                            }
-                        }
-
-                        // if the current character is a letter or "_" "+" "#"
-                        if (char.IsLetterOrDigit(c)
-                            || ((c == '_') || (c == '+') || (c == '#'))
-                            )
-                        {
-                            if (length == 0)
-                            {
-                                // "javaC1C2C3C4linux" <br>
-                                //      ^--: the current character begin to token the ASCII
-                                // letter
-                                start = offset - 1;
-                            }
-                            else if (tokenType == DOUBLE_TOKEN_TYPE)
-                            {
-                                // "javaC1C2C3C4linux" <br>
-                                //              ^--: the previous non-ASCII
-                                // : the current character
-                                offset--;
-                                bufferIndex--;
-
-                                if (preIsTokened == true)
-                                {
-                                    // there is only one non-ASCII has been stored
-                                    length = 0;
-                                    preIsTokened = false;
-                                    break;
-                                }
-                                else
-                                {
-                                    break;
-                                }
-                            }
-
-                            // store the LowerCase(c) in the buffer
-                            buffer[length++] = char.ToLower(c); // TODO: is java invariant?  If so, this should be ToLowerInvariant()
-                            tokenType = SINGLE_TOKEN_TYPE;
-
-                            // break the procedure if buffer overflowed!
-                            if (length == MAX_WORD_LEN)
-                            {
-                                break;
-                            }
-                        }
-                        else if (length > 0)
-                        {
-                            if (preIsTokened)
-                            {
-                                length = 0;
-                                preIsTokened = false;
-                            }
-                            else
-                            {
-                                break;
-                            }
-                        }
-                    }
-                    else
-                    {
-                        // non-ASCII letter, e.g."C1C2C3C4"
-                        if (char.IsLetter(c))
-                        {
-                            if (length == 0)
-                            {
-                                start = offset - 1;
-                                buffer[length++] = c;
-                                tokenType = DOUBLE_TOKEN_TYPE;
-                            }
-                            else
-                            {
-                                if (tokenType == SINGLE_TOKEN_TYPE)
-                                {
-                                    offset--;
-                                    bufferIndex--;
-
-                                    //return the previous ASCII characters
-                                    break;
-                                }
-                                else
-                                {
-                                    buffer[length++] = c;
-                                    tokenType = DOUBLE_TOKEN_TYPE;
-
-                                    if (length == 2)
-                                    {
-                                        offset--;
-                                        bufferIndex--;
-                                        preIsTokened = true;
-
-                                        break;
-                                    }
-                                }
-                            }
-                        }
-                        else if (length > 0)
-                        {
-                            if (preIsTokened == true)
-                            {
-                                // empty the buffer
-                                length = 0;
-                                preIsTokened = false;
-                            }
-                            else
-                            {
-                                break;
-                            }
-                        }
-                    }
-                }
-
-                if (length > 0)
-                {
-                    termAtt.SetTermBuffer(buffer, 0, length);
-                    offsetAtt.SetOffset(CorrectOffset(start), CorrectOffset(start + length));
-                    typeAtt.Type = TOKEN_TYPE_NAMES[tokenType];
-                    return true;
-                }
-                else if (dataLen == 0)
-                {
-                    offset--;
-                    return false;
-                }
-
-                // Cycle back and try for the next token (don't
-                // return an empty string)
-            }
-        }
-
-        public override void End()
-        {
-            // set final offset
-            int finalOffset = CorrectOffset(offset);
-            this.offsetAtt.SetOffset(finalOffset, finalOffset);
-        }
-
-        public override void Reset()
-        {
-            base.Reset();
-            offset = bufferIndex = dataLen = 0;
-            preIsTokened = false;
-            tokenType = WORD_TYPE;
-        }
-
-        public override void Reset(TextReader reader)
-        {
-            base.Reset(reader);
-            Reset();
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/Cn/ChineseAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Cn/ChineseAnalyzer.cs b/src/contrib/Analyzers/Cn/ChineseAnalyzer.cs
deleted file mode 100644
index 1ec050a..0000000
--- a/src/contrib/Analyzers/Cn/ChineseAnalyzer.cs
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
-*/
-
-using System;
-using System.IO;
-using System.Text;
-using System.Collections;
-
-using Lucene.Net.Analysis;
-
-namespace Lucene.Net.Analysis.Cn
-{
-    /// <summary>
-    /// An <see cref="Analyzer"/> that tokenizes text with <see cref="ChineseTokenizer"/> and
-    /// filters with <see cref="ChineseFilter"/>
-    /// </summary>
-    public class ChineseAnalyzer : Analyzer
-    {
-
-        public ChineseAnalyzer()
-        {
-        }
-
-        /// <summary>
-        /// Creates a TokenStream which tokenizes all the text in the provided Reader.
-        /// </summary>
-        /// <returns>A TokenStream build from a ChineseTokenizer filtered with ChineseFilter.</returns>
-        public override sealed TokenStream TokenStream(String fieldName, TextReader reader)
-        {
-            TokenStream result = new ChineseTokenizer(reader);
-            result = new ChineseFilter(result);
-            return result;
-        }
-
-        private class SavedStreams
-        {
-            protected internal Tokenizer source;
-            protected internal TokenStream result;
-        };
-
-        /// <summary>
-        /// Returns a (possibly reused) <see cref="TokenStream"/> which tokenizes all the text in the
-        /// provided <see cref="TextReader"/>.
-        /// </summary>
-        /// <returns>
-        ///   A <see cref="TokenStream"/> built from a <see cref="ChineseTokenizer"/> 
-        ///   filtered with <see cref="ChineseFilter"/>.
-        /// </returns>
-        public override TokenStream ReusableTokenStream(String fieldName, TextReader reader)
-        {
-            /* tokenStream() is final, no back compat issue */
-            SavedStreams streams = (SavedStreams) PreviousTokenStream;
-            if (streams == null)
-            {
-                streams = new SavedStreams();
-                streams.source = new ChineseTokenizer(reader);
-                streams.result = new ChineseFilter(streams.source);
-                PreviousTokenStream = streams;
-            }
-            else
-            {
-                streams.source.Reset(reader);
-            }
-            return streams.result;
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/Cn/ChineseFilter.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Cn/ChineseFilter.cs b/src/contrib/Analyzers/Cn/ChineseFilter.cs
deleted file mode 100644
index e5c83a5..0000000
--- a/src/contrib/Analyzers/Cn/ChineseFilter.cs
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
-*/
-
-using System;
-using System.Collections.Generic;
-using System.IO;
-using System.Collections;
-using System.Globalization;
-
-using Lucene.Net.Analysis;
-using Lucene.Net.Analysis.Tokenattributes;
-
-namespace Lucene.Net.Analysis.Cn
-{
-    // TODO: convert this XML code to valid .NET
-    /// <summary>
-    /// A {@link TokenFilter} with a stop word table.  
-    /// <ul>
-    /// <li>Numeric tokens are removed.</li>
-    /// <li>English tokens must be larger than 1 char.</li>
-    /// <li>One Chinese char as one Chinese word.</li>
-    /// </ul>
-    /// TO DO:
-    /// <ol>
-    /// <li>Add Chinese stop words, such as \ue400</li>
-    /// <li>Dictionary based Chinese word extraction</li>
-    /// <li>Intelligent Chinese word extraction</li>
-    /// </ol>
-    /// </summary>
-    public sealed class ChineseFilter : TokenFilter
-    {
-        // Only English now, Chinese to be added later.
-        public static String[] STOP_WORDS =
-            {
-                "and", "are", "as", "at", "be", "but", "by",
-                "for", "if", "in", "into", "is", "it",
-                "no", "not", "of", "on", "or", "such",
-                "that", "the", "their", "then", "there", "these",
-                "they", "this", "to", "was", "will", "with"
-            };
-
-        private CharArraySet stopTable;
-        private ITermAttribute termAtt;
-
-        public ChineseFilter(TokenStream _in)
-            : base(_in)
-        {
-            stopTable = new CharArraySet((IEnumerable<string>)STOP_WORDS, false);
-            termAtt = AddAttribute<ITermAttribute>();
-        }
-
-        public override bool IncrementToken()
-        {
-            while (input.IncrementToken())
-            {
-                char[] text = termAtt.TermBuffer();
-                int termLength = termAtt.TermLength();
-
-                // why not key off token type here assuming ChineseTokenizer comes first?
-                if (!stopTable.Contains(text, 0, termLength))
-                {
-                    switch (char.GetUnicodeCategory(text[0]))
-                    {
-                        case UnicodeCategory.LowercaseLetter:
-                        case UnicodeCategory.UppercaseLetter:
-                            // English word/token should larger than 1 char.
-                            if (termLength > 1)
-                            {
-                                return true;
-                            }
-                            break;
-                        case UnicodeCategory.OtherLetter:
-                            // One Chinese char as one Chinese word.
-                            // Chinese word extraction to be added later here.
-                            return true;
-                    }
-                }
-            }
-            return false;
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/Cn/ChineseTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Cn/ChineseTokenizer.cs b/src/contrib/Analyzers/Cn/ChineseTokenizer.cs
deleted file mode 100644
index 69947aa..0000000
--- a/src/contrib/Analyzers/Cn/ChineseTokenizer.cs
+++ /dev/null
@@ -1,191 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
-*/
-
-using System;
-using System.IO;
-using System.Text;
-using System.Collections;
-using System.Globalization;
-
-using Lucene.Net.Analysis;
-using Lucene.Net.Analysis.Tokenattributes;
-using Lucene.Net.Util;
-
-namespace Lucene.Net.Analysis.Cn
-{
-    /// <summary>
-    /// Tokenize Chinese text as individual chinese chars.
-    /// <p>
-    /// The difference between ChineseTokenizer and
-    /// CJKTokenizer is that they have different
-    /// token parsing logic.
-    /// </p>
-    /// <p>
-    /// For example, if the Chinese text
-    /// "C1C2C3C4" is to be indexed:
-    /// <ul>
-    /// <li>The tokens returned from ChineseTokenizer are C1, C2, C3, C4</li>
-    /// <li>The tokens returned from the CJKTokenizer are C1C2, C2C3, C3C4.</li>
-    /// </ul>
-    /// </p>
-    /// <p>
-    /// Therefore the index created by CJKTokenizer is much larger.
-    /// </p>
-    /// <p>
-    /// The problem is that when searching for C1, C1C2, C1C3,
-    /// C4C2, C1C2C3 ... the ChineseTokenizer works, but the
-    /// CJKTokenizer will not work.
-    /// </p>
-    /// </summary> 
-    public sealed class ChineseTokenizer : Tokenizer
-    {
-        public ChineseTokenizer(TextReader _in)
-            : base(_in)
-        {
-            Init();
-        }
-
-        public ChineseTokenizer(AttributeSource source, TextReader _in)
-            : base(source, _in)
-        {
-            Init();
-        }
-
-        public ChineseTokenizer(AttributeFactory factory, TextReader _in)
-            : base(factory, _in)
-        {
-            Init();
-        }
-
-        private void Init()
-        {
-            termAtt = AddAttribute<ITermAttribute>();
-            offsetAtt = AddAttribute<IOffsetAttribute>();
-        }
-
-        private int offset = 0, bufferIndex = 0, dataLen = 0;
-        private static readonly int MAX_WORD_LEN = 255;
-        private static readonly int IO_BUFFER_SIZE = 1024;
-        private readonly char[] buffer = new char[MAX_WORD_LEN];
-        private readonly char[] ioBuffer = new char[IO_BUFFER_SIZE];
-
-        private int length;
-        private int start;
-
-        private ITermAttribute termAtt;
-        private IOffsetAttribute offsetAtt;
-
-        private void Push(char c)
-        {
-            if (length == 0) start = offset - 1; // start of token
-            buffer[length++] = Char.ToLower(c); // buffer it
-        }
-
-        private bool Flush()
-        {
-
-            if (length > 0)
-            {
-                termAtt.SetTermBuffer(buffer, 0, length);
-                offsetAtt.SetOffset(CorrectOffset(start), CorrectOffset(start + length));
-                return true;
-            }
-            else
-                return false;
-        }
-
-
-        public override bool IncrementToken()
-        {
-            ClearAttributes();
-
-            length = 0;
-            start = offset;
-
-
-            while (true)
-            {
-
-                char c;
-                offset++;
-
-                if (bufferIndex >= dataLen)
-                {
-                    dataLen = input.Read(ioBuffer, 0, ioBuffer.Length);
-                    bufferIndex = 0;
-                }
-
-                if (dataLen == 0)
-                {
-                    offset--;
-                    return Flush();
-                }
-                else
-                    c = ioBuffer[bufferIndex++];
-
-
-                switch (char.GetUnicodeCategory(c))
-                {
-
-                    case UnicodeCategory.DecimalDigitNumber:
-                    case UnicodeCategory.LowercaseLetter:
-                    case UnicodeCategory.UppercaseLetter:
-                        Push(c);
-                        if (length == MAX_WORD_LEN) return Flush();
-                        break;
-
-                    case UnicodeCategory.OtherLetter:
-                        if (length > 0)
-                        {
-                            bufferIndex--;
-                            offset--;
-                            return Flush();
-                        }
-                        Push(c);
-                        return Flush();
-
-                    default:
-                        if (length > 0) return Flush();
-                        break;
-                }
-            }
-        }
-
-        public override sealed void End()
-        {
-            // set final offset
-            int finalOffset = CorrectOffset(offset);
-            this.offsetAtt.SetOffset(finalOffset, finalOffset);
-        }
-
-        public override void Reset()
-        {
-            base.Reset();
-            offset = bufferIndex = dataLen = 0;
-        }
-
-        public override void Reset(TextReader input)
-        {
-            base.Reset(input);
-            Reset();
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/Compound/CompoundWordTokenFilterBase.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Compound/CompoundWordTokenFilterBase.cs b/src/contrib/Analyzers/Compound/CompoundWordTokenFilterBase.cs
deleted file mode 100644
index af3f702..0000000
--- a/src/contrib/Analyzers/Compound/CompoundWordTokenFilterBase.cs
+++ /dev/null
@@ -1,230 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-using System;
-using System.Collections.Generic;
-using Lucene.Net.Analysis.Tokenattributes;
-
-namespace Lucene.Net.Analysis.Compound
-{
-
-    /*
-     * Base class for decomposition token filters.
-     */
-    public abstract class CompoundWordTokenFilterBase : TokenFilter
-    {
-        /*
-         * The default for minimal word length that gets decomposed
-         */
-        public static readonly int DEFAULT_MIN_WORD_SIZE = 5;
-
-        /*
-         * The default for minimal length of subwords that get propagated to the output of this filter
-         */
-        public static readonly int DEFAULT_MIN_SUBWORD_SIZE = 2;
-
-        /*
-         * The default for maximal length of subwords that get propagated to the output of this filter
-         */
-        public static readonly int DEFAULT_MAX_SUBWORD_SIZE = 15;
-
-        protected readonly CharArraySet dictionary;
-        protected readonly LinkedList<Token> tokens;
-        protected readonly int minWordSize;
-        protected readonly int minSubwordSize;
-        protected readonly int maxSubwordSize;
-        protected readonly bool onlyLongestMatch;
-
-        private ITermAttribute termAtt;
-        private IOffsetAttribute offsetAtt;
-        private IFlagsAttribute flagsAtt;
-        private IPositionIncrementAttribute posIncAtt;
-        private ITypeAttribute typeAtt;
-        private IPayloadAttribute payloadAtt;
-
-        private readonly Token wrapper = new Token();
-
-        protected CompoundWordTokenFilterBase(TokenStream input, String[] dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, bool onlyLongestMatch)
-            : this(input, MakeDictionary(dictionary), minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch)
-        {
-
-        }
-
-        protected CompoundWordTokenFilterBase(TokenStream input, String[] dictionary, bool onlyLongestMatch)
-            : this(input, MakeDictionary(dictionary), DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, onlyLongestMatch)
-        {
-
-        }
-
-        protected CompoundWordTokenFilterBase(TokenStream input, ISet<string> dictionary, bool onlyLongestMatch)
-            : this(input, dictionary, DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, onlyLongestMatch)
-        {
-
-        }
-
-        protected CompoundWordTokenFilterBase(TokenStream input, String[] dictionary)
-            : this(input, MakeDictionary(dictionary), DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, false)
-        {
-
-        }
-
-        protected CompoundWordTokenFilterBase(TokenStream input, ISet<string> dictionary)
-            : this(input, dictionary, DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, false)
-        {
-
-        }
-
-        protected CompoundWordTokenFilterBase(TokenStream input, ISet<string> dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, bool onlyLongestMatch)
-            : base(input)
-        {
-            this.tokens = new LinkedList<Token>();
-            this.minWordSize = minWordSize;
-            this.minSubwordSize = minSubwordSize;
-            this.maxSubwordSize = maxSubwordSize;
-            this.onlyLongestMatch = onlyLongestMatch;
-
-            if (dictionary is CharArraySet)
-            {
-                this.dictionary = (CharArraySet)dictionary;
-            }
-            else
-            {
-                this.dictionary = new CharArraySet(dictionary.Count, false);
-                AddAllLowerCase(this.dictionary, dictionary);
-            }
-
-            termAtt = AddAttribute<ITermAttribute>();
-            offsetAtt = AddAttribute<IOffsetAttribute>();
-            flagsAtt = AddAttribute<IFlagsAttribute>();
-            posIncAtt = AddAttribute<IPositionIncrementAttribute>();
-            typeAtt = AddAttribute<ITypeAttribute>();
-            payloadAtt = AddAttribute<IPayloadAttribute>();
-        }
-
-        /*
-         * Create a set of words from an array
-         * The resulting Set does case insensitive matching
-         * TODO We should look for a faster dictionary lookup approach.
-         * @param dictionary 
-         * @return {@link Set} of lowercased terms 
-         */
-        public static ISet<string> MakeDictionary(String[] dictionary)
-        {
-            // is the below really case insensitive? 
-            CharArraySet dict = new CharArraySet(dictionary.Length, false);
-            AddAllLowerCase(dict, dictionary);
-            return dict;
-        }
-
-        private void setToken(Token token)
-        {
-            ClearAttributes();
-            termAtt.SetTermBuffer(token.TermBuffer(), 0, token.TermLength());
-            flagsAtt.Flags = token.Flags;
-            typeAtt.Type = token.Type;
-            offsetAtt.SetOffset(token.StartOffset, token.EndOffset);
-            posIncAtt.PositionIncrement = token.PositionIncrement;
-            payloadAtt.Payload = token.Payload;
-        }
-
-        public sealed override bool IncrementToken()
-        {
-            if (tokens.Count > 0)
-            {
-                setToken((Token)tokens.First.Value);
-                tokens.RemoveFirst();
-                return true;
-            }
-
-            if (input.IncrementToken() == false)
-                return false;
-
-            wrapper.SetTermBuffer(termAtt.TermBuffer(), 0, termAtt.TermLength());
-            wrapper.StartOffset = offsetAtt.StartOffset;
-            wrapper.EndOffset = offsetAtt.EndOffset;
-            wrapper.Flags = flagsAtt.Flags;
-            wrapper.Type = typeAtt.Type;
-            wrapper.PositionIncrement = posIncAtt.PositionIncrement;
-            wrapper.Payload = payloadAtt.Payload;
-
-            Decompose(wrapper);
-
-            if (tokens.Count > 0)
-            {
-                setToken(tokens.First.Value);
-                tokens.RemoveFirst();
-                return true;
-            }
-            else
-            {
-                return false;
-            }
-        }
-
-        protected static void AddAllLowerCase(ISet<string> target, ICollection<string> col)
-        {
-            foreach (var str in col)
-            {
-                target.Add(str.ToLower(System.Globalization.CultureInfo.GetCultureInfo("en-US")));
-            }
-        }
-
-        protected static char[] MakeLowerCaseCopy(char[] buffer)
-        {
-            char[] result = new char[buffer.Length];
-            Array.Copy(buffer, 0, result, 0, buffer.Length);
-
-            for (int i = 0; i < buffer.Length; ++i)
-            {
-                result[i] = char.ToLower(buffer[i]); // Is java invariant?
-            }
-
-            return result;
-        }
-
-        protected Token CreateToken(int offset, int length,
-            Token prototype)
-        {
-            int newStart = prototype.StartOffset + offset;
-            Token t = prototype.Clone(prototype.TermBuffer(), offset, length, newStart, newStart + length);
-            t.PositionIncrement = 0;
-            return t;
-        }
-
-        protected void Decompose(Token token)
-        {
-            // In any case we give the original token back
-            tokens.AddLast((Token)token.Clone());
-
-            // Only words longer than minWordSize get processed
-            if (token.TermLength() < this.minWordSize)
-            {
-                return;
-            }
-
-            DecomposeInternal(token);
-        }
-
-        protected abstract void DecomposeInternal(Token token);
-
-        public override void Reset()
-        {
-            base.Reset();
-            tokens.Clear();
-        }
-    }
-}
\ No newline at end of file


Mime
View raw message