lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From d...@apache.org
Subject svn commit: r1069573 [3/3] - in /incubator/lucene.net: tags/Lucene.Net_2_9_2/contrib/Analyzers/ tags/Lucene.Net_2_9_2/contrib/Analyzers/BR/ tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/ tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net...
Date Thu, 10 Feb 2011 21:17:45 GMT
Added: incubator/lucene.net/trunk/C#/contrib/Analyzers/Lucene.Net.Analyzers/BR/BrazilianStemmer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/contrib/Analyzers/Lucene.Net.Analyzers/BR/BrazilianStemmer.cs?rev=1069573&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/contrib/Analyzers/Lucene.Net.Analyzers/BR/BrazilianStemmer.cs (added)
+++ incubator/lucene.net/trunk/C#/contrib/Analyzers/Lucene.Net.Analyzers/BR/BrazilianStemmer.cs Thu Feb 10 21:17:43 2011
@@ -0,0 +1,1264 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * A stemmer for Brazilian words.
+ */
+namespace Lucene.Net.Analysis.BR
+{
+
+    public class BrazilianStemmer
+    {
+
+        /**
+         * Changed term
+         */
+        private string TERM;
+        private string CT;
+        private string R1;
+        private string R2;
+        private string RV;
+
+
+        public BrazilianStemmer()
+        {
+        }
+
+        /**
+         * Stemms the given term to an unique <tt>discriminator</tt>.
+         *
+         * @param term  The term that should be stemmed.
+         * @return      Discriminator for <tt>term</tt>
+         */
+        public string Stem(string term)
+        {
+            bool altered = false; // altered the term
+
+            // creates CT
+            createCT(term);
+
+            if (!isIndexable(CT))
+            {
+                return null;
+            }
+            if (!isStemmable(CT))
+            {
+                return CT;
+            }
+
+            R1 = getR1(CT);
+            R2 = getR1(R1);
+            RV = getRV(CT);
+            TERM = term + ";" + CT;
+
+            altered = step1();
+            if (!altered)
+            {
+                altered = step2();
+            }
+
+            if (altered)
+            {
+                step3();
+            }
+            else
+            {
+                step4();
+            }
+
+            step5();
+
+            return CT;
+        }
+
+        /**
+         * Checks a term if it can be processed correctly.
+         *
+         * @return  true if, and only if, the given term consists in letters.
+         */
+        private bool isStemmable(string term)
+        {
+            for (int c = 0; c < term.Length; c++)
+            {
+                // Discard terms that contain non-letter characters.
+                if (!char.IsLetter(term[c]))
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        /**
+         * Checks a term if it can be processed indexed.
+         *
+         * @return  true if it can be indexed
+         */
+        private bool isIndexable(string term)
+        {
+            return (term.Length < 30) && (term.Length > 2);
+        }
+
+        /**
+         * See if string is 'a','e','i','o','u'
+       *
+       * @return true if is vowel
+         */
+        private bool isVowel(char value)
+        {
+            return (value == 'a') ||
+                   (value == 'e') ||
+                   (value == 'i') ||
+                   (value == 'o') ||
+                   (value == 'u');
+        }
+
+        /**
+         * Gets R1
+       *
+       * R1 - is the region after the first non-vowel follwing a vowel,
+       *      or is the null region at the end of the word if there is
+       *      no such non-vowel.
+       *
+       * @return null or a string representing R1
+         */
+        private string getR1(string value)
+        {
+            int i;
+            int j;
+
+            // be-safe !!!
+            if (value == null)
+            {
+                return null;
+            }
+
+            // find 1st vowel
+            i = value.Length - 1;
+            for (j = 0; j < i; j++)
+            {
+                if (isVowel(value[j]))
+                {
+                    break;
+                }
+            }
+
+            if (!(j < i))
+            {
+                return null;
+            }
+
+            // find 1st non-vowel
+            for (; j < i; j++)
+            {
+                if (!(isVowel(value[j])))
+                {
+                    break;
+                }
+            }
+
+            if (!(j < i))
+            {
+                return null;
+            }
+
+            return value.Substring(j + 1);
+        }
+
+        /**
+         * Gets RV
+       *
+       * RV - IF the second letter is a consoant, RV is the region after
+       *      the next following vowel,
+       *
+       *      OR if the first two letters are vowels, RV is the region
+       *      after the next consoant,
+       *
+       *      AND otherwise (consoant-vowel case) RV is the region after
+       *      the third letter.
+       *
+       *      BUT RV is the end of the word if this positions cannot be
+       *      found.
+       *
+       * @return null or a string representing RV
+         */
+        private string getRV(string value)
+        {
+            int i;
+            int j;
+
+            // be-safe !!!
+            if (value == null)
+            {
+                return null;
+            }
+
+            i = value.Length - 1;
+
+            // RV - IF the second letter is a consoant, RV is the region after
+            //      the next following vowel,
+            if ((i > 0) && !isVowel(value[1]))
+            {
+                // find 1st vowel
+                for (j = 2; j < i; j++)
+                {
+                    if (isVowel(value[j]))
+                    {
+                        break;
+                    }
+                }
+
+                if (j < i)
+                {
+                    return value.Substring(j + 1);
+                }
+            }
+
+
+            // RV - OR if the first two letters are vowels, RV is the region
+            //      after the next consoant,
+            if ((i > 1) &&
+                isVowel(value[0]) &&
+                isVowel(value[1]))
+            {
+                // find 1st consoant
+                for (j = 2; j < i; j++)
+                {
+                    if (!isVowel(value[j]))
+                    {
+                        break;
+                    }
+                }
+
+                if (j < i)
+                {
+                    return value.Substring(j + 1);
+                }
+            }
+
+            // RV - AND otherwise (consoant-vowel case) RV is the region after
+            //      the third letter.
+            if (i > 2)
+            {
+                return value.Substring(3);
+            }
+
+            return null;
+        }
+
+        /**
+       * 1) Turn to lowercase
+       * 2) Remove accents
+       * 3) ã -> a ; õ -> o
+       * 4) ç -> c
+       *
+       * @return null or a string transformed
+         */
+        private string changeTerm(string value)
+        {
+            int j;
+            string r = "";
+
+            // be-safe !!!
+            if (value == null)
+            {
+                return null;
+            }
+
+            value = value.ToLower();
+            for (j = 0; j < value.Length; j++)
+            {
+                if ((value[j] == 'á') ||
+                    (value[j] == 'â') ||
+                    (value[j] == 'ã'))
+                {
+                    r = r + "a"; continue;
+                }
+                if ((value[j] == 'é') ||
+                    (value[j] == 'ê'))
+                {
+                    r = r + "e"; continue;
+                }
+                if (value[j] == 'í')
+                {
+                    r = r + "i"; continue;
+                }
+                if ((value[j] == 'ó') ||
+                    (value[j] == 'ô') ||
+                    (value[j] == 'õ'))
+                {
+                    r = r + "o"; continue;
+                }
+                if ((value[j] == 'ú') ||
+                    (value[j] == 'ü'))
+                {
+                    r = r + "u"; continue;
+                }
+                if (value[j] == 'ç')
+                {
+                    r = r + "c"; continue;
+                }
+                if (value[j] == 'ñ')
+                {
+                    r = r + "n"; continue;
+                }
+
+                r = r + value[j];
+            }
+
+            return r;
+        }
+
+        /**
+       * Check if a string ends with a suffix
+       *
+       * @return true if the string ends with the specified suffix
+         */
+        private bool suffix(string value, string suffix)
+        {
+
+            // be-safe !!!
+            if ((value == null) || (suffix == null))
+            {
+                return false;
+            }
+
+            if (suffix.Length > value.Length)
+            {
+                return false;
+            }
+
+            return value.Substring(value.Length - suffix.Length).Equals(suffix);
+        }
+
+        /**
+       * Replace a string suffix by another
+       *
+       * @return the replaced string
+         */
+        private string replaceSuffix(string value, string toReplace, string changeTo)
+        {
+            string vvalue;
+
+            // be-safe !!!
+            if ((value == null) ||
+                (toReplace == null) ||
+                (changeTo == null))
+            {
+                return value;
+            }
+
+            vvalue = removeSuffix(value, toReplace);
+
+            if (value.Equals(vvalue))
+            {
+                return value;
+            }
+            else
+            {
+                return vvalue + changeTo;
+            }
+        }
+
+        /**
+       * Remove a string suffix
+       *
+       * @return the string without the suffix
+         */
+        private string removeSuffix(string value, string toRemove)
+        {
+            // be-safe !!!
+            if ((value == null) ||
+                (toRemove == null) ||
+                !suffix(value, toRemove))
+            {
+                return value;
+            }
+
+            return value.Substring(0, value.Length - toRemove.Length);
+        }
+
+        /**
+       * See if a suffix is preceded by a string
+       *
+       * @return true if the suffix is preceded
+         */
+        private bool suffixPreceded(string value, string _suffix, string preceded)
+        {
+            // be-safe !!!
+            if ((value == null) ||
+                (_suffix == null) ||
+                (preceded == null) ||
+                !suffix(value, _suffix))
+            {
+                return false;
+            }
+
+            return suffix(removeSuffix(value, _suffix), preceded);
+        }
+
+
+
+
+        /**
+         * Creates CT (changed term) , substituting * 'ã' and 'õ' for 'a~' and 'o~'.
+         */
+        private void createCT(string term)
+        {
+            CT = changeTerm(term);
+
+            if (CT.Length < 2) return;
+
+            // if the first character is ... , remove it
+            if ((CT[0] == '"') ||
+                (CT[0] == '\'') ||
+                (CT[0] == '-') ||
+                (CT[0] == ',') ||
+                (CT[0] == ';') ||
+                (CT[0] == '.') ||
+                (CT[0] == '?') ||
+                (CT[0] == '!')
+                )
+            {
+                CT = CT.Substring(1);
+            }
+
+            if (CT.Length < 2) return;
+
+            // if the last character is ... , remove it
+            if ((CT[CT.Length - 1] == '-') ||
+                (CT[CT.Length - 1] == ',') ||
+                (CT[CT.Length - 1] == ';') ||
+                (CT[CT.Length - 1] == '.') ||
+                (CT[CT.Length - 1] == '?') ||
+                (CT[CT.Length - 1] == '!') ||
+                (CT[CT.Length - 1] == '\'') ||
+                (CT[CT.Length - 1] == '"')
+                )
+            {
+                CT = CT.Substring(0, CT.Length - 1);
+            }
+        }
+
+
+        /**
+         * Standart suffix removal.
+       * Search for the longest among the following suffixes, and perform
+       * the following actions:
+       *
+       * @return false if no ending was removed
+         */
+        private bool step1()
+        {
+            if (CT == null) return false;
+
+            // suffix lenght = 7
+            if (suffix(CT, "uciones") && suffix(R2, "uciones"))
+            {
+                CT = replaceSuffix(CT, "uciones", "u"); return true;
+            }
+
+            // suffix lenght = 6
+            if (CT.Length >= 6)
+            {
+                if (suffix(CT, "imentos") && suffix(R2, "imentos"))
+                {
+                    CT = removeSuffix(CT, "imentos"); return true;
+                }
+                if (suffix(CT, "amentos") && suffix(R2, "amentos"))
+                {
+                    CT = removeSuffix(CT, "amentos"); return true;
+                }
+                if (suffix(CT, "adores") && suffix(R2, "adores"))
+                {
+                    CT = removeSuffix(CT, "adores"); return true;
+                }
+                if (suffix(CT, "adoras") && suffix(R2, "adoras"))
+                {
+                    CT = removeSuffix(CT, "adoras"); return true;
+                }
+                if (suffix(CT, "logias") && suffix(R2, "logias"))
+                {
+                    replaceSuffix(CT, "logias", "log"); return true;
+                }
+                if (suffix(CT, "encias") && suffix(R2, "encias"))
+                {
+                    CT = replaceSuffix(CT, "encias", "ente"); return true;
+                }
+                if (suffix(CT, "amente") && suffix(R1, "amente"))
+                {
+                    CT = removeSuffix(CT, "amente"); return true;
+                }
+                if (suffix(CT, "idades") && suffix(R2, "idades"))
+                {
+                    CT = removeSuffix(CT, "idades"); return true;
+                }
+            }
+
+            // suffix lenght = 5
+            if (CT.Length >= 5)
+            {
+                if (suffix(CT, "acoes") && suffix(R2, "acoes"))
+                {
+                    CT = removeSuffix(CT, "acoes"); return true;
+                }
+                if (suffix(CT, "imento") && suffix(R2, "imento"))
+                {
+                    CT = removeSuffix(CT, "imento"); return true;
+                }
+                if (suffix(CT, "amento") && suffix(R2, "amento"))
+                {
+                    CT = removeSuffix(CT, "amento"); return true;
+                }
+                if (suffix(CT, "adora") && suffix(R2, "adora"))
+                {
+                    CT = removeSuffix(CT, "adora"); return true;
+                }
+                if (suffix(CT, "ismos") && suffix(R2, "ismos"))
+                {
+                    CT = removeSuffix(CT, "ismos"); return true;
+                }
+                if (suffix(CT, "istas") && suffix(R2, "istas"))
+                {
+                    CT = removeSuffix(CT, "istas"); return true;
+                }
+                if (suffix(CT, "logia") && suffix(R2, "logia"))
+                {
+                    CT = replaceSuffix(CT, "logia", "log"); return true;
+                }
+                if (suffix(CT, "ucion") && suffix(R2, "ucion"))
+                {
+                    CT = replaceSuffix(CT, "ucion", "u"); return true;
+                }
+                if (suffix(CT, "encia") && suffix(R2, "encia"))
+                {
+                    CT = replaceSuffix(CT, "encia", "ente"); return true;
+                }
+                if (suffix(CT, "mente") && suffix(R2, "mente"))
+                {
+                    CT = removeSuffix(CT, "mente"); return true;
+                }
+                if (suffix(CT, "idade") && suffix(R2, "idade"))
+                {
+                    CT = removeSuffix(CT, "idade"); return true;
+                }
+            }
+
+            // suffix lenght = 4
+            if (CT.Length >= 4)
+            {
+                if (suffix(CT, "acao") && suffix(R2, "acao"))
+                {
+                    CT = removeSuffix(CT, "acao"); return true;
+                }
+                if (suffix(CT, "ezas") && suffix(R2, "ezas"))
+                {
+                    CT = removeSuffix(CT, "ezas"); return true;
+                }
+                if (suffix(CT, "icos") && suffix(R2, "icos"))
+                {
+                    CT = removeSuffix(CT, "icos"); return true;
+                }
+                if (suffix(CT, "icas") && suffix(R2, "icas"))
+                {
+                    CT = removeSuffix(CT, "icas"); return true;
+                }
+                if (suffix(CT, "ismo") && suffix(R2, "ismo"))
+                {
+                    CT = removeSuffix(CT, "ismo"); return true;
+                }
+                if (suffix(CT, "avel") && suffix(R2, "avel"))
+                {
+                    CT = removeSuffix(CT, "avel"); return true;
+                }
+                if (suffix(CT, "ivel") && suffix(R2, "ivel"))
+                {
+                    CT = removeSuffix(CT, "ivel"); return true;
+                }
+                if (suffix(CT, "ista") && suffix(R2, "ista"))
+                {
+                    CT = removeSuffix(CT, "ista"); return true;
+                }
+                if (suffix(CT, "osos") && suffix(R2, "osos"))
+                {
+                    CT = removeSuffix(CT, "osos"); return true;
+                }
+                if (suffix(CT, "osas") && suffix(R2, "osas"))
+                {
+                    CT = removeSuffix(CT, "osas"); return true;
+                }
+                if (suffix(CT, "ador") && suffix(R2, "ador"))
+                {
+                    CT = removeSuffix(CT, "ador"); return true;
+                }
+                if (suffix(CT, "ivas") && suffix(R2, "ivas"))
+                {
+                    CT = removeSuffix(CT, "ivas"); return true;
+                }
+                if (suffix(CT, "ivos") && suffix(R2, "ivos"))
+                {
+                    CT = removeSuffix(CT, "ivos"); return true;
+                }
+                if (suffix(CT, "iras") &&
+                    suffix(RV, "iras") &&
+                    suffixPreceded(CT, "iras", "e"))
+                {
+                    CT = replaceSuffix(CT, "iras", "ir"); return true;
+                }
+            }
+
+            // suffix lenght = 3
+            if (CT.Length >= 3)
+            {
+                if (suffix(CT, "eza") && suffix(R2, "eza"))
+                {
+                    CT = removeSuffix(CT, "eza"); return true;
+                }
+                if (suffix(CT, "ico") && suffix(R2, "ico"))
+                {
+                    CT = removeSuffix(CT, "ico"); return true;
+                }
+                if (suffix(CT, "ica") && suffix(R2, "ica"))
+                {
+                    CT = removeSuffix(CT, "ica"); return true;
+                }
+                if (suffix(CT, "oso") && suffix(R2, "oso"))
+                {
+                    CT = removeSuffix(CT, "oso"); return true;
+                }
+                if (suffix(CT, "osa") && suffix(R2, "osa"))
+                {
+                    CT = removeSuffix(CT, "osa"); return true;
+                }
+                if (suffix(CT, "iva") && suffix(R2, "iva"))
+                {
+                    CT = removeSuffix(CT, "iva"); return true;
+                }
+                if (suffix(CT, "ivo") && suffix(R2, "ivo"))
+                {
+                    CT = removeSuffix(CT, "ivo"); return true;
+                }
+                if (suffix(CT, "ira") &&
+                    suffix(RV, "ira") &&
+                    suffixPreceded(CT, "ira", "e"))
+                {
+                    CT = replaceSuffix(CT, "ira", "ir"); return true;
+                }
+            }
+
+            // no ending was removed by step1
+            return false;
+        }
+
+
+        /**
+         * Verb suffixes.
+       *
+       * Search for the longest among the following suffixes in RV,
+       * and if found, delete.
+       *
+       * @return false if no ending was removed
+        */
+        private bool step2()
+        {
+            if (RV == null) return false;
+
+            // suffix lenght = 7
+            if (RV.Length >= 7)
+            {
+                if (suffix(RV, "issemos"))
+                {
+                    CT = removeSuffix(CT, "issemos"); return true;
+                }
+                if (suffix(RV, "essemos"))
+                {
+                    CT = removeSuffix(CT, "essemos"); return true;
+                }
+                if (suffix(RV, "assemos"))
+                {
+                    CT = removeSuffix(CT, "assemos"); return true;
+                }
+                if (suffix(RV, "ariamos"))
+                {
+                    CT = removeSuffix(CT, "ariamos"); return true;
+                }
+                if (suffix(RV, "eriamos"))
+                {
+                    CT = removeSuffix(CT, "eriamos"); return true;
+                }
+                if (suffix(RV, "iriamos"))
+                {
+                    CT = removeSuffix(CT, "iriamos"); return true;
+                }
+            }
+
+            // suffix lenght = 6
+            if (RV.Length >= 6)
+            {
+                if (suffix(RV, "iremos"))
+                {
+                    CT = removeSuffix(CT, "iremos"); return true;
+                }
+                if (suffix(RV, "eremos"))
+                {
+                    CT = removeSuffix(CT, "eremos"); return true;
+                }
+                if (suffix(RV, "aremos"))
+                {
+                    CT = removeSuffix(CT, "aremos"); return true;
+                }
+                if (suffix(RV, "avamos"))
+                {
+                    CT = removeSuffix(CT, "avamos"); return true;
+                }
+                if (suffix(RV, "iramos"))
+                {
+                    CT = removeSuffix(CT, "iramos"); return true;
+                }
+                if (suffix(RV, "eramos"))
+                {
+                    CT = removeSuffix(CT, "eramos"); return true;
+                }
+                if (suffix(RV, "aramos"))
+                {
+                    CT = removeSuffix(CT, "aramos"); return true;
+                }
+                if (suffix(RV, "asseis"))
+                {
+                    CT = removeSuffix(CT, "asseis"); return true;
+                }
+                if (suffix(RV, "esseis"))
+                {
+                    CT = removeSuffix(CT, "esseis"); return true;
+                }
+                if (suffix(RV, "isseis"))
+                {
+                    CT = removeSuffix(CT, "isseis"); return true;
+                }
+                if (suffix(RV, "arieis"))
+                {
+                    CT = removeSuffix(CT, "arieis"); return true;
+                }
+                if (suffix(RV, "erieis"))
+                {
+                    CT = removeSuffix(CT, "erieis"); return true;
+                }
+                if (suffix(RV, "irieis"))
+                {
+                    CT = removeSuffix(CT, "irieis"); return true;
+                }
+            }
+
+
+            // suffix lenght = 5
+            if (RV.Length >= 5)
+            {
+                if (suffix(RV, "irmos"))
+                {
+                    CT = removeSuffix(CT, "irmos"); return true;
+                }
+                if (suffix(RV, "iamos"))
+                {
+                    CT = removeSuffix(CT, "iamos"); return true;
+                }
+                if (suffix(RV, "armos"))
+                {
+                    CT = removeSuffix(CT, "armos"); return true;
+                }
+                if (suffix(RV, "ermos"))
+                {
+                    CT = removeSuffix(CT, "ermos"); return true;
+                }
+                if (suffix(RV, "areis"))
+                {
+                    CT = removeSuffix(CT, "areis"); return true;
+                }
+                if (suffix(RV, "ereis"))
+                {
+                    CT = removeSuffix(CT, "ereis"); return true;
+                }
+                if (suffix(RV, "ireis"))
+                {
+                    CT = removeSuffix(CT, "ireis"); return true;
+                }
+                if (suffix(RV, "asses"))
+                {
+                    CT = removeSuffix(CT, "asses"); return true;
+                }
+                if (suffix(RV, "esses"))
+                {
+                    CT = removeSuffix(CT, "esses"); return true;
+                }
+                if (suffix(RV, "isses"))
+                {
+                    CT = removeSuffix(CT, "isses"); return true;
+                }
+                if (suffix(RV, "astes"))
+                {
+                    CT = removeSuffix(CT, "astes"); return true;
+                }
+                if (suffix(RV, "assem"))
+                {
+                    CT = removeSuffix(CT, "assem"); return true;
+                }
+                if (suffix(RV, "essem"))
+                {
+                    CT = removeSuffix(CT, "essem"); return true;
+                }
+                if (suffix(RV, "issem"))
+                {
+                    CT = removeSuffix(CT, "issem"); return true;
+                }
+                if (suffix(RV, "ardes"))
+                {
+                    CT = removeSuffix(CT, "ardes"); return true;
+                }
+                if (suffix(RV, "erdes"))
+                {
+                    CT = removeSuffix(CT, "erdes"); return true;
+                }
+                if (suffix(RV, "irdes"))
+                {
+                    CT = removeSuffix(CT, "irdes"); return true;
+                }
+                if (suffix(RV, "ariam"))
+                {
+                    CT = removeSuffix(CT, "ariam"); return true;
+                }
+                if (suffix(RV, "eriam"))
+                {
+                    CT = removeSuffix(CT, "eriam"); return true;
+                }
+                if (suffix(RV, "iriam"))
+                {
+                    CT = removeSuffix(CT, "iriam"); return true;
+                }
+                if (suffix(RV, "arias"))
+                {
+                    CT = removeSuffix(CT, "arias"); return true;
+                }
+                if (suffix(RV, "erias"))
+                {
+                    CT = removeSuffix(CT, "erias"); return true;
+                }
+                if (suffix(RV, "irias"))
+                {
+                    CT = removeSuffix(CT, "irias"); return true;
+                }
+                if (suffix(RV, "estes"))
+                {
+                    CT = removeSuffix(CT, "estes"); return true;
+                }
+                if (suffix(RV, "istes"))
+                {
+                    CT = removeSuffix(CT, "istes"); return true;
+                }
+                if (suffix(RV, "areis"))
+                {
+                    CT = removeSuffix(CT, "areis"); return true;
+                }
+                if (suffix(RV, "aveis"))
+                {
+                    CT = removeSuffix(CT, "aveis"); return true;
+                }
+            }
+
+            // suffix lenght = 4
+            if (RV.Length >= 4)
+            {
+                if (suffix(RV, "aria"))
+                {
+                    CT = removeSuffix(CT, "aria"); return true;
+                }
+                if (suffix(RV, "eria"))
+                {
+                    CT = removeSuffix(CT, "eria"); return true;
+                }
+                if (suffix(RV, "iria"))
+                {
+                    CT = removeSuffix(CT, "iria"); return true;
+                }
+                if (suffix(RV, "asse"))
+                {
+                    CT = removeSuffix(CT, "asse"); return true;
+                }
+                if (suffix(RV, "esse"))
+                {
+                    CT = removeSuffix(CT, "esse"); return true;
+                }
+                if (suffix(RV, "isse"))
+                {
+                    CT = removeSuffix(CT, "isse"); return true;
+                }
+                if (suffix(RV, "aste"))
+                {
+                    CT = removeSuffix(CT, "aste"); return true;
+                }
+                if (suffix(RV, "este"))
+                {
+                    CT = removeSuffix(CT, "este"); return true;
+                }
+                if (suffix(RV, "iste"))
+                {
+                    CT = removeSuffix(CT, "iste"); return true;
+                }
+                if (suffix(RV, "arei"))
+                {
+                    CT = removeSuffix(CT, "arei"); return true;
+                }
+                if (suffix(RV, "erei"))
+                {
+                    CT = removeSuffix(CT, "erei"); return true;
+                }
+                if (suffix(RV, "irei"))
+                {
+                    CT = removeSuffix(CT, "irei"); return true;
+                }
+                if (suffix(RV, "aram"))
+                {
+                    CT = removeSuffix(CT, "aram"); return true;
+                }
+                if (suffix(RV, "eram"))
+                {
+                    CT = removeSuffix(CT, "eram"); return true;
+                }
+                if (suffix(RV, "iram"))
+                {
+                    CT = removeSuffix(CT, "iram"); return true;
+                }
+                if (suffix(RV, "avam"))
+                {
+                    CT = removeSuffix(CT, "avam"); return true;
+                }
+                if (suffix(RV, "arem"))
+                {
+                    CT = removeSuffix(CT, "arem"); return true;
+                }
+                if (suffix(RV, "erem"))
+                {
+                    CT = removeSuffix(CT, "erem"); return true;
+                }
+                if (suffix(RV, "irem"))
+                {
+                    CT = removeSuffix(CT, "irem"); return true;
+                }
+                if (suffix(RV, "ando"))
+                {
+                    CT = removeSuffix(CT, "ando"); return true;
+                }
+                if (suffix(RV, "endo"))
+                {
+                    CT = removeSuffix(CT, "endo"); return true;
+                }
+                if (suffix(RV, "indo"))
+                {
+                    CT = removeSuffix(CT, "indo"); return true;
+                }
+                if (suffix(RV, "arao"))
+                {
+                    CT = removeSuffix(CT, "arao"); return true;
+                }
+                if (suffix(RV, "erao"))
+                {
+                    CT = removeSuffix(CT, "erao"); return true;
+                }
+                if (suffix(RV, "irao"))
+                {
+                    CT = removeSuffix(CT, "irao"); return true;
+                }
+                if (suffix(RV, "adas"))
+                {
+                    CT = removeSuffix(CT, "adas"); return true;
+                }
+                if (suffix(RV, "idas"))
+                {
+                    CT = removeSuffix(CT, "idas"); return true;
+                }
+                if (suffix(RV, "aras"))
+                {
+                    CT = removeSuffix(CT, "aras"); return true;
+                }
+                if (suffix(RV, "eras"))
+                {
+                    CT = removeSuffix(CT, "eras"); return true;
+                }
+                if (suffix(RV, "iras"))
+                {
+                    CT = removeSuffix(CT, "iras"); return true;
+                }
+                if (suffix(RV, "avas"))
+                {
+                    CT = removeSuffix(CT, "avas"); return true;
+                }
+                if (suffix(RV, "ares"))
+                {
+                    CT = removeSuffix(CT, "ares"); return true;
+                }
+                if (suffix(RV, "eres"))
+                {
+                    CT = removeSuffix(CT, "eres"); return true;
+                }
+                if (suffix(RV, "ires"))
+                {
+                    CT = removeSuffix(CT, "ires"); return true;
+                }
+                if (suffix(RV, "ados"))
+                {
+                    CT = removeSuffix(CT, "ados"); return true;
+                }
+                if (suffix(RV, "idos"))
+                {
+                    CT = removeSuffix(CT, "idos"); return true;
+                }
+                if (suffix(RV, "amos"))
+                {
+                    CT = removeSuffix(CT, "amos"); return true;
+                }
+                if (suffix(RV, "emos"))
+                {
+                    CT = removeSuffix(CT, "emos"); return true;
+                }
+                if (suffix(RV, "imos"))
+                {
+                    CT = removeSuffix(CT, "imos"); return true;
+                }
+                if (suffix(RV, "iras"))
+                {
+                    CT = removeSuffix(CT, "iras"); return true;
+                }
+                if (suffix(RV, "ieis"))
+                {
+                    CT = removeSuffix(CT, "ieis"); return true;
+                }
+            }
+
+            // suffix lenght = 3
+            if (RV.Length >= 3)
+            {
+                if (suffix(RV, "ada"))
+                {
+                    CT = removeSuffix(CT, "ada"); return true;
+                }
+                if (suffix(RV, "ida"))
+                {
+                    CT = removeSuffix(CT, "ida"); return true;
+                }
+                if (suffix(RV, "ara"))
+                {
+                    CT = removeSuffix(CT, "ara"); return true;
+                }
+                if (suffix(RV, "era"))
+                {
+                    CT = removeSuffix(CT, "era"); return true;
+                }
+                if (suffix(RV, "ira"))
+                {
+                    CT = removeSuffix(CT, "ava"); return true;
+                }
+                if (suffix(RV, "iam"))
+                {
+                    CT = removeSuffix(CT, "iam"); return true;
+                }
+                if (suffix(RV, "ado"))
+                {
+                    CT = removeSuffix(CT, "ado"); return true;
+                }
+                if (suffix(RV, "ido"))
+                {
+                    CT = removeSuffix(CT, "ido"); return true;
+                }
+                if (suffix(RV, "ias"))
+                {
+                    CT = removeSuffix(CT, "ias"); return true;
+                }
+                if (suffix(RV, "ais"))
+                {
+                    CT = removeSuffix(CT, "ais"); return true;
+                }
+                if (suffix(RV, "eis"))
+                {
+                    CT = removeSuffix(CT, "eis"); return true;
+                }
+                if (suffix(RV, "ira"))
+                {
+                    CT = removeSuffix(CT, "ira"); return true;
+                }
+                if (suffix(RV, "ear"))
+                {
+                    CT = removeSuffix(CT, "ear"); return true;
+                }
+            }
+
+            // suffix lenght = 2
+            if (RV.Length >= 2)
+            {
+                if (suffix(RV, "ia"))
+                {
+                    CT = removeSuffix(CT, "ia"); return true;
+                }
+                if (suffix(RV, "ei"))
+                {
+                    CT = removeSuffix(CT, "ei"); return true;
+                }
+                if (suffix(RV, "am"))
+                {
+                    CT = removeSuffix(CT, "am"); return true;
+                }
+                if (suffix(RV, "em"))
+                {
+                    CT = removeSuffix(CT, "em"); return true;
+                }
+                if (suffix(RV, "ar"))
+                {
+                    CT = removeSuffix(CT, "ar"); return true;
+                }
+                if (suffix(RV, "er"))
+                {
+                    CT = removeSuffix(CT, "er"); return true;
+                }
+                if (suffix(RV, "ir"))
+                {
+                    CT = removeSuffix(CT, "ir"); return true;
+                }
+                if (suffix(RV, "as"))
+                {
+                    CT = removeSuffix(CT, "as"); return true;
+                }
+                if (suffix(RV, "es"))
+                {
+                    CT = removeSuffix(CT, "es"); return true;
+                }
+                if (suffix(RV, "is"))
+                {
+                    CT = removeSuffix(CT, "is"); return true;
+                }
+                if (suffix(RV, "eu"))
+                {
+                    CT = removeSuffix(CT, "eu"); return true;
+                }
+                if (suffix(RV, "iu"))
+                {
+                    CT = removeSuffix(CT, "iu"); return true;
+                }
+                if (suffix(RV, "iu"))
+                {
+                    CT = removeSuffix(CT, "iu"); return true;
+                }
+                if (suffix(RV, "ou"))
+                {
+                    CT = removeSuffix(CT, "ou"); return true;
+                }
+            }
+
+            // no ending was removed by step2
+            return false;
+        }
+
+        /**
+         * Delete suffix 'i' if in RV and preceded by 'c'
+       *
+        */
+        private void step3()
+        {
+            if (RV == null) return;
+
+            if (suffix(RV, "i") && suffixPreceded(RV, "i", "c"))
+            {
+                CT = removeSuffix(CT, "i");
+            }
+
+        }
+
+        /**
+         * Residual suffix
+       *
+       * If the word ends with one of the suffixes (os a i o á í ó)
+       * in RV, delete it
+       *
+        */
+        private void step4()
+        {
+            if (RV == null) return;
+
+            if (suffix(RV, "os"))
+            {
+                CT = removeSuffix(CT, "os"); return;
+            }
+            if (suffix(RV, "a"))
+            {
+                CT = removeSuffix(CT, "a"); return;
+            }
+            if (suffix(RV, "i"))
+            {
+                CT = removeSuffix(CT, "i"); return;
+            }
+            if (suffix(RV, "o"))
+            {
+                CT = removeSuffix(CT, "o"); return;
+            }
+
+        }
+
+        /**
+         * If the word ends with one of ( e é ê) in RV,delete it,
+       * and if preceded by 'gu' (or 'ci') with the 'u' (or 'i') in RV,
+       * delete the 'u' (or 'i')
+       *
+       * Or if the word ends ç remove the cedilha
+       *
+        */
+        private void step5()
+        {
+            if (RV == null) return;
+
+            if (suffix(RV, "e"))
+            {
+                if (suffixPreceded(RV, "e", "gu"))
+                {
+                    CT = removeSuffix(CT, "e");
+                    CT = removeSuffix(CT, "u");
+                    return;
+                }
+
+                if (suffixPreceded(RV, "e", "ci"))
+                {
+                    CT = removeSuffix(CT, "e");
+                    CT = removeSuffix(CT, "i");
+                    return;
+                }
+
+                CT = removeSuffix(CT, "e"); return;
+            }
+        }
+
+        /**
+         * For log and debug purpose
+         *
+         * @return  TERM, CT, RV, R1 and R2
+         */
+        public string Log()
+        {
+            return " (TERM = " + TERM + ")" +
+                   " (CT = " + CT + ")" +
+                   " (RV = " + RV + ")" +
+                   " (R1 = " + R1 + ")" +
+                   " (R2 = " + R2 + ")";
+        }
+
+    }
+
+}
\ No newline at end of file

Added: incubator/lucene.net/trunk/C#/contrib/Analyzers/Lucene.Net.Analyzers/Lucene.Net.Analyzers.csproj
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/contrib/Analyzers/Lucene.Net.Analyzers/Lucene.Net.Analyzers.csproj?rev=1069573&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/contrib/Analyzers/Lucene.Net.Analyzers/Lucene.Net.Analyzers.csproj (added)
+++ incubator/lucene.net/trunk/C#/contrib/Analyzers/Lucene.Net.Analyzers/Lucene.Net.Analyzers.csproj Thu Feb 10 21:17:43 2011
@@ -0,0 +1,65 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="3.5" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <ProductVersion>9.0.21022</ProductVersion>
+    <SchemaVersion>2.0</SchemaVersion>
+    <ProjectGuid>{4286E961-9143-4821-B46D-3D39D3736386}</ProjectGuid>
+    <OutputType>Library</OutputType>
+    <AppDesignerFolder>Properties</AppDesignerFolder>
+    <RootNamespace>Lucene.Net.Analyzers</RootNamespace>
+    <AssemblyName>Lucene.Net.Analyzers</AssemblyName>
+    <TargetFrameworkVersion>v2.0</TargetFrameworkVersion>
+    <FileAlignment>512</FileAlignment>
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+    <DebugSymbols>true</DebugSymbols>
+    <DebugType>full</DebugType>
+    <Optimize>false</Optimize>
+    <OutputPath>bin\Debug\</OutputPath>
+    <DefineConstants>DEBUG;TRACE</DefineConstants>
+    <ErrorReport>prompt</ErrorReport>
+    <WarningLevel>4</WarningLevel>
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+    <DebugType>pdbonly</DebugType>
+    <Optimize>true</Optimize>
+    <OutputPath>bin\Release\</OutputPath>
+    <DefineConstants>TRACE</DefineConstants>
+    <ErrorReport>prompt</ErrorReport>
+    <WarningLevel>4</WarningLevel>
+  </PropertyGroup>
+  <ItemGroup>
+    <Reference Include="Lucene.Net, Version=2.9.2.2, Culture=neutral, processorArchitecture=MSIL">
+      <SpecificVersion>False</SpecificVersion>
+      <HintPath>..\Lucene.Net.dll</HintPath>
+    </Reference>
+    <Reference Include="System" />
+    <Reference Include="System.Data" />
+    <Reference Include="System.Xml" />
+  </ItemGroup>
+  <ItemGroup>
+    <Compile Include="AR\ArabicAnalyzer.cs" />
+    <Compile Include="AR\ArabicLetterTokenizer.cs" />
+    <Compile Include="AR\ArabicNormalizationFilter.cs" />
+    <Compile Include="AR\ArabicNormalizer.cs" />
+    <Compile Include="AR\ArabicStemFilter.cs" />
+    <Compile Include="AR\ArabicStemmer.cs" />
+    <Compile Include="BR\BrazilianAnalyzer.cs" />
+    <Compile Include="BR\BrazilianStemFilter.cs" />
+    <Compile Include="BR\BrazilianStemmer.cs" />
+    <Compile Include="Properties\AssemblyInfo.cs" />
+  </ItemGroup>
+  <ItemGroup>
+    <EmbeddedResource Include="AR\ArabicStopWords.txt" />
+  </ItemGroup>
+  <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+  <!-- To modify your build process, add your task inside one of the targets below and uncomment it. 
+       Other similar extension points exist, see Microsoft.Common.targets.
+  <Target Name="BeforeBuild">
+  </Target>
+  <Target Name="AfterBuild">
+  </Target>
+  -->
+</Project>
\ No newline at end of file

Added: incubator/lucene.net/trunk/C#/contrib/Analyzers/Lucene.Net.Analyzers/Lucene.Net.Analyzers.csproj.user
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/contrib/Analyzers/Lucene.Net.Analyzers/Lucene.Net.Analyzers.csproj.user?rev=1069573&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/contrib/Analyzers/Lucene.Net.Analyzers/Lucene.Net.Analyzers.csproj.user (added)
+++ incubator/lucene.net/trunk/C#/contrib/Analyzers/Lucene.Net.Analyzers/Lucene.Net.Analyzers.csproj.user Thu Feb 10 21:17:43 2011
@@ -0,0 +1 @@
+<Project xmlns="http://schemas.microsoft.com/developer/msbuild/2003" />
\ No newline at end of file

Added: incubator/lucene.net/trunk/C#/contrib/Analyzers/Lucene.Net.Analyzers/Properties/AssemblyInfo.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/contrib/Analyzers/Lucene.Net.Analyzers/Properties/AssemblyInfo.cs?rev=1069573&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/contrib/Analyzers/Lucene.Net.Analyzers/Properties/AssemblyInfo.cs (added)
+++ incubator/lucene.net/trunk/C#/contrib/Analyzers/Lucene.Net.Analyzers/Properties/AssemblyInfo.cs Thu Feb 10 21:17:43 2011
@@ -0,0 +1,36 @@
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following 
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+[assembly: AssemblyTitle("Lucene.Net.Analyzers")]
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCompany("The Apache Software Foundation")]
+[assembly: AssemblyProduct("Lucene.Net.Analyzers")]
+[assembly: AssemblyCopyright("Copyright 2006 - 2011 The Apache Software Foundation")]
+[assembly: AssemblyTrademark("Copyright 2006 - 2011 The Apache Software Foundation")]
+[assembly: AssemblyCulture("")]
+
+// Setting ComVisible to false makes the types in this assembly not visible 
+// to COM components.  If you need to access a type in this assembly from 
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed to COM
+[assembly: Guid("36a962fb-a8be-4238-88c4-32568216e247")]
+
+// Version information for an assembly consists of the following four values:
+//
+//      Major Version
+//      Minor Version 
+//      Build Number
+//      Revision
+//
+// You can specify all the values or you can default the Build and Revision Numbers 
+// by using the '*' as shown below:
+// [assembly: AssemblyVersion("1.0.*")]
+[assembly: AssemblyVersion("2.9.2.1")]
+[assembly: AssemblyFileVersion("2.9.2.1")]

Added: incubator/lucene.net/trunk/C#/contrib/Analyzers/Test/AR/TestArabicAnalyzer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/contrib/Analyzers/Test/AR/TestArabicAnalyzer.cs?rev=1069573&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/contrib/Analyzers/Test/AR/TestArabicAnalyzer.cs (added)
+++ incubator/lucene.net/trunk/C#/contrib/Analyzers/Test/AR/TestArabicAnalyzer.cs Thu Feb 10 21:17:43 2011
@@ -0,0 +1,101 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.IO;
+using System.Collections;
+
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
+using NUnit.Framework;
+
+namespace Lucene.Net.Analysis.AR
+{
+
+
+    /**
+     * Test the Arabic Analyzer
+     *
+     */
+    [TestFixture]
+    public class TestArabicAnalyzer : BaseTokenStreamTestCase
+    {
+
+        /** This test fails with NPE when the 
+         * stopwords file is missing in classpath */
+        [Test]
+        public void TestResourcesAvailable()
+        {
+            new ArabicAnalyzer();
+        }
+
+        /**
+         * Some simple tests showing some features of the analyzer, how some regular forms will conflate
+         */
+        [Test]
+        public void TestBasicFeatures()
+        {
+            ArabicAnalyzer a = new ArabicAnalyzer();
+            AssertAnalyzesTo(a, "كبير", new String[] { "كبير" });
+            AssertAnalyzesTo(a, "كبيرة", new String[] { "كبير" }); // feminine marker
+
+            AssertAnalyzesTo(a, "مشروب", new String[] { "مشروب" });
+            AssertAnalyzesTo(a, "مشروبات", new String[] { "مشروب" }); // plural -at
+
+            AssertAnalyzesTo(a, "أمريكيين", new String[] { "امريك" }); // plural -in
+            AssertAnalyzesTo(a, "امريكي", new String[] { "امريك" }); // singular with bare alif
+
+            AssertAnalyzesTo(a, "كتاب", new String[] { "كتاب" });
+            AssertAnalyzesTo(a, "الكتاب", new String[] { "كتاب" }); // definite article
+
+            AssertAnalyzesTo(a, "ما ملكت أيمانكم", new String[] { "ملكت", "ايمانكم" });
+            AssertAnalyzesTo(a, "الذين ملكت أيمانكم", new String[] { "ملكت", "ايمانكم" }); // stopwords
+        }
+
+        /**
+         * Simple tests to show things are getting reset correctly, etc.
+         */
+        [Test]
+        public void TestReusableTokenStream()
+        {
+            ArabicAnalyzer a = new ArabicAnalyzer();
+            AssertAnalyzesToReuse(a, "كبير", new String[] { "كبير" });
+            AssertAnalyzesToReuse(a, "كبيرة", new String[] { "كبير" }); // feminine marker
+        }
+
+        /**
+         * Non-arabic text gets treated in a similar way as SimpleAnalyzer.
+         */
+        [Test]
+        public void TestEnglishInput()
+        {
+            AssertAnalyzesTo(new ArabicAnalyzer(), "English text.", new String[] {
+        "english", "text" });
+        }
+
+        /**
+         * Test that custom stopwords work, and are not case-sensitive.
+         */
+        [Test]
+        public void TestCustomStopwords()
+        {
+            ArabicAnalyzer a = new ArabicAnalyzer(new String[] { "the", "and", "a" });
+            AssertAnalyzesTo(a, "The quick brown fox.", new String[] { "quick", "brown", "fox" });
+        }
+    }
+}
\ No newline at end of file

Added: incubator/lucene.net/trunk/C#/contrib/Analyzers/Test/AR/TestArabicNormalizationFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/contrib/Analyzers/Test/AR/TestArabicNormalizationFilter.cs?rev=1069573&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/contrib/Analyzers/Test/AR/TestArabicNormalizationFilter.cs (added)
+++ incubator/lucene.net/trunk/C#/contrib/Analyzers/Test/AR/TestArabicNormalizationFilter.cs Thu Feb 10 21:17:43 2011
@@ -0,0 +1,131 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.IO;
+using System.Collections;
+
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
+using NUnit.Framework;
+
+namespace Lucene.Net.Analysis.AR
+{
+
+
+    /**
+     * Test the Arabic Normalization Filter
+     *
+     */
+    [TestFixture]
+    public class TestArabicNormalizationFilter : BaseTokenStreamTestCase
+    {
+
+        [Test]
+        public void TestAlifMadda()
+        {
+            Check("آجن", "اجن");
+        }
+
+        [Test]
+        public void TestAlifHamzaAbove()
+        {
+            Check("أحمد", "احمد");
+        }
+
+        [Test]
+        public void TestAlifHamzaBelow()
+        {
+            Check("إعاذ", "اعاذ");
+        }
+
+        [Test]
+        public void TestAlifMaksura()
+        {
+            Check("بنى", "بني");
+        }
+
+        [Test]
+        public void TestTehMarbuta()
+        {
+            Check("فاطمة", "فاطمه");
+        }
+
+        [Test]
+        public void TestTatweel()
+        {
+            Check("روبرـــــت", "روبرت");
+        }
+
+        [Test]
+        public void TestFatha()
+        {
+            Check("مَبنا", "مبنا");
+        }
+
+        [Test]
+        public void TestKasra()
+        {
+            Check("علِي", "علي");
+        }
+
+        [Test]
+        public void TestDamma()
+        {
+            Check("بُوات", "بوات");
+        }
+
+        [Test]
+        public void TestFathatan()
+        {
+            Check("ولداً", "ولدا");
+        }
+
+        [Test]
+        public void TestKasratan()
+        {
+            Check("ولدٍ", "ولد");
+        }
+
+        [Test]
+        public void TestDammatan()
+        {
+            Check("ولدٌ", "ولد");
+        }
+
+        [Test]
+        public void TestSukun()
+        {
+            Check("نلْسون", "نلسون");
+        }
+
+        [Test]
+        public void TestShaddah()
+        {
+            Check("هتميّ", "هتمي");
+        }
+
+        private void Check(string input, string expected)
+        {
+            ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(new StringReader(input));
+            ArabicNormalizationFilter filter = new ArabicNormalizationFilter(tokenStream);
+            AssertTokenStreamContents(filter, new String[] { expected });
+        }
+
+    }
+}
\ No newline at end of file

Added: incubator/lucene.net/trunk/C#/contrib/Analyzers/Test/AR/TestArabicStemFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/contrib/Analyzers/Test/AR/TestArabicStemFilter.cs?rev=1069573&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/contrib/Analyzers/Test/AR/TestArabicStemFilter.cs (added)
+++ incubator/lucene.net/trunk/C#/contrib/Analyzers/Test/AR/TestArabicStemFilter.cs Thu Feb 10 21:17:43 2011
@@ -0,0 +1,174 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.IO;
+using System.Collections;
+
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
+
+using NUnit.Framework;
+
+
+namespace Lucene.Net.Analysis.AR
+{
+
+
+    /**
+     * Test the Arabic Normalization Filter
+     *
+     */
+    [NUnit.Framework.TestFixture]
+    public class TestArabicStemFilter : BaseTokenStreamTestCase
+    {
+        [Test]
+        public void TestAlPrefix()
+        {
+            Check("الحسن", "حسن");
+        }
+
+        [Test]
+        public void TestWalPrefix()
+        {
+            Check("والحسن", "حسن");
+        }
+
+        [Test]
+        public void TestBalPrefix()
+        {
+            Check("بالحسن", "حسن");
+        }
+
+        [Test]
+        public void TestKalPrefix()
+        {
+            Check("كالحسن", "حسن");
+        }
+
+        [Test]
+        public void TestFalPrefix()
+        {
+            Check("فالحسن", "حسن");
+        }
+
+        [Test]
+        public void TestLlPrefix()
+        {
+            Check("للاخر", "اخر");
+        }
+
+        [Test]
+        public void TestWaPrefix()
+        {
+            Check("وحسن", "حسن");
+        }
+
+        [Test]
+        public void TestAhSuffix()
+        {
+            Check("زوجها", "زوج");
+        }
+
+        [Test]
+        public void TestAnSuffix()
+        {
+            Check("ساهدان", "ساهد");
+        }
+
+        [Test]
+        public void TestAtSuffix()
+        {
+            Check("ساهدات", "ساهد");
+        }
+
+        [Test]
+        public void TestWnSuffix()
+        {
+            Check("ساهدون", "ساهد");
+        }
+
+        [Test]
+        public void TestYnSuffix()
+        {
+            Check("ساهدين", "ساهد");
+        }
+
+        [Test]
+        public void TestYhSuffix()
+        {
+            Check("ساهديه", "ساهد");
+        }
+
+        [Test]
+        public void TestYpSuffix()
+        {
+            Check("ساهدية", "ساهد");
+        }
+
+        [Test]
+        public void TestHSuffix()
+        {
+            Check("ساهده", "ساهد");
+        }
+
+        [Test]
+        public void TestPSuffix()
+        {
+            Check("ساهدة", "ساهد");
+        }
+
+        [Test]
+        public void TestYSuffix()
+        {
+            Check("ساهدي", "ساهد");
+        }
+
+        [Test]
+        public void TestComboPrefSuf()
+        {
+            Check("وساهدون", "ساهد");
+        }
+
+        [Test]
+        public void TestComboSuf()
+        {
+            Check("ساهدهات", "ساهد");
+        }
+
+        [Test]
+        public void TestShouldntStem()
+        {
+            Check("الو", "الو");
+        }
+
+        [Test]
+        public void TestNonArabic()
+        {
+            Check("English", "English");
+        }
+
+        private void Check(string input, string expected)
+        {
+            ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(new StringReader(input));
+            ArabicStemFilter filter = new ArabicStemFilter(tokenStream);
+            AssertTokenStreamContents(filter, new String[] { expected });
+        }
+
+    }
+}
\ No newline at end of file

Added: incubator/lucene.net/trunk/C#/contrib/Analyzers/Test/Properties/AssemblyInfo.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/contrib/Analyzers/Test/Properties/AssemblyInfo.cs?rev=1069573&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/contrib/Analyzers/Test/Properties/AssemblyInfo.cs (added)
+++ incubator/lucene.net/trunk/C#/contrib/Analyzers/Test/Properties/AssemblyInfo.cs Thu Feb 10 21:17:43 2011
@@ -0,0 +1,36 @@
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following 
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+[assembly: AssemblyTitle("Lucene.Net.Analyzers.Test")]
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCompany("The Apache Software Foundation")]
+[assembly: AssemblyProduct("Lucene.Net.Analyzers.Test")]
+[assembly: AssemblyCopyright("Copyright 2006 - 2011 The Apache Software Foundation")]
+[assembly: AssemblyTrademark("Copyright 2006 - 2011 The Apache Software Foundation")]
+[assembly: AssemblyCulture("")]
+
+// Setting ComVisible to false makes the types in this assembly not visible 
+// to COM components.  If you need to access a type in this assembly from 
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed to COM
+[assembly: Guid("36a962fb-a8be-4238-88c4-32568216e247")]
+
+// Version information for an assembly consists of the following four values:
+//
+//      Major Version
+//      Minor Version 
+//      Build Number
+//      Revision
+//
+// You can specify all the values or you can default the Build and Revision Numbers 
+// by using the '*' as shown below:
+// [assembly: AssemblyVersion("1.0.*")]
+[assembly: AssemblyVersion("2.9.2.1")]
+[assembly: AssemblyFileVersion("2.9.2.1")]

Added: incubator/lucene.net/trunk/C#/contrib/Analyzers/Test/Test.csproj
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/contrib/Analyzers/Test/Test.csproj?rev=1069573&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/contrib/Analyzers/Test/Test.csproj (added)
+++ incubator/lucene.net/trunk/C#/contrib/Analyzers/Test/Test.csproj Thu Feb 10 21:17:43 2011
@@ -0,0 +1,67 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="3.5" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <ProductVersion>9.0.21022</ProductVersion>
+    <SchemaVersion>2.0</SchemaVersion>
+    <ProjectGuid>{67D27628-F1D5-4499-9818-B669731925C8}</ProjectGuid>
+    <OutputType>Library</OutputType>
+    <AppDesignerFolder>Properties</AppDesignerFolder>
+    <RootNamespace>Lucene.Net.Analyzers</RootNamespace>
+    <AssemblyName>Lucene.Net.Analyzers.Test</AssemblyName>
+    <TargetFrameworkVersion>v2.0</TargetFrameworkVersion>
+    <FileAlignment>512</FileAlignment>
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+    <DebugSymbols>true</DebugSymbols>
+    <DebugType>full</DebugType>
+    <Optimize>false</Optimize>
+    <OutputPath>bin\Debug\</OutputPath>
+    <DefineConstants>DEBUG;TRACE</DefineConstants>
+    <ErrorReport>prompt</ErrorReport>
+    <WarningLevel>4</WarningLevel>
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+    <DebugType>pdbonly</DebugType>
+    <Optimize>true</Optimize>
+    <OutputPath>bin\Release\</OutputPath>
+    <DefineConstants>TRACE</DefineConstants>
+    <ErrorReport>prompt</ErrorReport>
+    <WarningLevel>4</WarningLevel>
+  </PropertyGroup>
+  <ItemGroup>
+    <Reference Include="Lucene.Net, Version=2.9.2.2, Culture=neutral, processorArchitecture=MSIL">
+      <SpecificVersion>False</SpecificVersion>
+      <HintPath>..\Lucene.Net.dll</HintPath>
+    </Reference>
+    <Reference Include="Lucene.Net.Test, Version=2.9.2.1, Culture=neutral, processorArchitecture=MSIL">
+      <SpecificVersion>False</SpecificVersion>
+      <HintPath>..\Lucene.Net.Test.dll</HintPath>
+    </Reference>
+    <Reference Include="nunit.framework, Version=2.5.2.9222, Culture=neutral, PublicKeyToken=96d09a1eb7f44a77, processorArchitecture=MSIL" />
+    <Reference Include="System" />
+    <Reference Include="System.Data" />
+    <Reference Include="System.Xml" />
+  </ItemGroup>
+  <ItemGroup>
+    <Compile Include="AR\TestArabicAnalyzer.cs" />
+    <Compile Include="AR\TestArabicNormalizationFilter.cs" />
+    <Compile Include="AR\TestArabicStemFilter.cs" />
+    <Compile Include="Properties\AssemblyInfo.cs" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\Lucene.Net.Analyzers\Lucene.Net.Analyzers.csproj">
+      <Project>{4286E961-9143-4821-B46D-3D39D3736386}</Project>
+      <Name>Lucene.Net.Analyzers</Name>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+  <!-- To modify your build process, add your task inside one of the targets below and uncomment it. 
+       Other similar extension points exist, see Microsoft.Common.targets.
+  <Target Name="BeforeBuild">
+  </Target>
+  <Target Name="AfterBuild">
+  </Target>
+  -->
+</Project>
\ No newline at end of file



Mime
View raw message