lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nightowl...@apache.org
Subject [29/62] [abbrv] lucenenet git commit: Deleted obsolete Contrib folder
Date Sat, 01 Apr 2017 01:09:22 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/En/KStemmer.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/En/KStemmer.cs b/src/contrib/Analyzers/En/KStemmer.cs
deleted file mode 100644
index a180eaa..0000000
--- a/src/contrib/Analyzers/En/KStemmer.cs
+++ /dev/null
@@ -1,1759 +0,0 @@
-´╗┐using System;
-using System.Collections.Generic;
-using System.Diagnostics;
-using Lucene.Net.Analysis.En;
-
-namespace Lucene.Net.Analysis.En
-{
-    public class KStemmer
-    {
-        private const int MaxWordLen = 50;
-
-        static private readonly String[] exceptionWords = {"aide", "bathe", "caste",
-      "cute", "dame", "dime", "doge", "done", "dune", "envelope", "gage",
-      "grille", "grippe", "lobe", "mane", "mare", "nape", "node", "pane",
-      "pate", "plane", "pope", "programme", "quite", "ripe", "rote", "rune",
-      "sage", "severe", "shoppe", "sine", "slime", "snipe", "steppe", "suite",
-      "swinge", "tare", "tine", "tope", "tripe", "twine"};
-
-        static private readonly String[][] directConflations = { new[] {"aging", "age"},
-      new[] {"going", "go"}, new[] {"goes", "go"}, new[] {"lying", "lie"}, new[] {"using", "use"},
-      new[] {"owing", "owe"}, new[] {"suing", "sue"}, new[] {"dying", "die"}, new[] {"tying", "tie"},
-      new[] {"vying", "vie"}, new[] {"aged", "age"}, new[] {"used", "use"},new[] {"vied", "vie"},
-      new[] {"cued", "cue"}, new[] {"died", "die"}, new[] {"eyed", "eye"}, new[] {"hued", "hue"},
-      new[] {"iced", "ice"}, new[] {"lied", "lie"}, new[] {"owed", "owe"}, new[] {"sued", "sue"},
-      new[] {"toed", "toe"}, new[] {"tied", "tie"}, new[] {"does", "do"}, new[] {"doing", "do"},
-      new[] {"aeronautical", "aeronautics"}, new[] {"mathematical", "mathematics"},
-      new[] {"political", "politics"}, new[] {"metaphysical", "metaphysics"},
-      new[] {"cylindrical", "cylinder"}, new[] {"nazism", "nazi"},
-      new[] {"ambiguity", "ambiguous"}, new[] {"barbarity", "barbarous"},
-      new[] {"credulity", "credulous"}, new[] {"generosity", "generous"},
-      new[] {"spontaneity", "spontaneous"}, new[] {"unanimity", "unanimous"},
-      new[] {"voracity", "voracious"}, new[] {"fled", "flee"}, new[] {"miscarriage", "miscarry"}};
-
-        static private readonly String[][] countryNationality = {
-      new[] {"afghan", "afghanistan"}, new[] {"african", "africa"},
-      new[] {"albanian", "albania"}, new[] {"algerian", "algeria"},
-      new[] {"american", "america"}, new[] {"andorran", "andorra"}, new[] {"angolan", "angola"},
-      new[] {"arabian", "arabia"}, new[] {"argentine", "argentina"},
-      new[] {"armenian", "armenia"}, new[] {"asian", "asia"}, new[] {"australian", "australia"},
-      new[] {"austrian", "austria"}, new[] {"azerbaijani", "azerbaijan"},
-      new[] {"azeri", "azerbaijan"}, new[] {"bangladeshi", "bangladesh"},
-      new[] {"belgian", "belgium"}, new[] {"bermudan", "bermuda"}, new[] {"bolivian", "bolivia"},
-      new[] {"bosnian", "bosnia"}, new[] {"botswanan", "botswana"},
-      new[] {"brazilian", "brazil"}, new[] {"british", "britain"},
-      new[] {"bulgarian", "bulgaria"}, new[] {"burmese", "burma"},
-      new[] {"californian", "california"}, new[] {"cambodian", "cambodia"},
-      new[] {"canadian", "canada"}, new[] {"chadian", "chad"}, new[] {"chilean", "chile"},
-      new[] {"chinese", "china"}, new[] {"colombian", "colombia"}, new[] {"croat", "croatia"},
-      new[] {"croatian", "croatia"}, new[] {"cuban", "cuba"}, new[] {"cypriot", "cyprus"},
-      new[] {"czechoslovakian", "czechoslovakia"}, new[] {"danish", "denmark"},
-      new[] {"egyptian", "egypt"}, new[] {"equadorian", "equador"},
-      new[] {"eritrean", "eritrea"}, new[] {"estonian", "estonia"},
-      new[] {"ethiopian", "ethiopia"}, new[] {"european", "europe"}, new[] {"fijian", "fiji"},
-      new[] {"filipino", "philippines"}, new[] {"finnish", "finland"},
-      new[] {"french", "france"}, new[] {"gambian", "gambia"}, new[] {"georgian", "georgia"},
-      new[] {"german", "germany"}, new[] {"ghanian", "ghana"}, new[] {"greek", "greece"},
-      new[] {"grenadan", "grenada"}, new[] {"guamian", "guam"},
-      new[] {"guatemalan", "guatemala"}, new[] {"guinean", "guinea"},
-      new[] {"guyanan", "guyana"}, new[] {"haitian", "haiti"}, new[] {"hawaiian", "hawaii"},
-      new[] {"holland", "dutch"}, new[] {"honduran", "honduras"}, new[] {"hungarian", "hungary"},
-      new[] {"icelandic", "iceland"}, new[] {"indonesian", "indonesia"},
-      new[] {"iranian", "iran"}, new[] {"iraqi", "iraq"}, new[] {"iraqui", "iraq"},
-      new[] {"irish", "ireland"}, new[] {"israeli", "israel"},
-      new[] {"italian", "italy"},
-      new[] {"jamaican", "jamaica"},
-      new[] {"japanese", "japan"},
-      new[] {"jordanian", "jordan"},
-      new[] {"kampuchean", "cambodia"},
-      new[] {"kenyan", "kenya"},
-      new[] {"korean", "korea"},
-      new[] {"kuwaiti", "kuwait"},
-      new[] {"lankan", "lanka"},
-      new[] {"laotian", "laos"},
-      new[] {"latvian", "latvia"},
-      new[] {"lebanese", "lebanon"},
-      new[] {"liberian", "liberia"},
-      new[] {"libyan", "libya"},
-      new[] {"lithuanian", "lithuania"},
-      new[] {"macedonian", "macedonia"},
-      new[] {"madagascan", "madagascar"},
-      new[] {"malaysian", "malaysia"},
-      new[] {"maltese", "malta"},
-      new[] {"mauritanian", "mauritania"},
-      new[] {"mexican", "mexico"},
-      new[] {"micronesian", "micronesia"},
-      new[] {"moldovan", "moldova"},
-      new[] {"monacan", "monaco"},
-      new[] {"mongolian", "mongolia"},
-      new[] {"montenegran", "montenegro"},
-      new[] {"moroccan", "morocco"},
-      new[] {"myanmar", "burma"},
-      new[] {"namibian", "namibia"},
-      new[] {"nepalese", "nepal"},
-      //new[]  {"netherlands", "dutch"},
-      new[] {"nicaraguan", "nicaragua"}, new[] {"nigerian", "nigeria"},
-      new[] {"norwegian", "norway"}, new[] {"omani", "oman"}, new[] {"pakistani", "pakistan"},
-      new[] {"panamanian", "panama"}, new[] {"papuan", "papua"},
-      new[] {"paraguayan", "paraguay"}, new[] {"peruvian", "peru"},
-      new[] {"portuguese", "portugal"}, new[] {"romanian", "romania"},
-      new[] {"rumania", "romania"}, new[] {"rumanian", "romania"}, new[] {"russian", "russia"},
-      new[] {"rwandan", "rwanda"}, new[] {"samoan", "samoa"}, new[] {"scottish", "scotland"},
-      new[] {"serb", "serbia"}, new[] {"serbian", "serbia"}, new[] {"siam", "thailand"},
-      new[] {"siamese", "thailand"}, new[] {"slovakia", "slovak"}, new[] {"slovakian", "slovak"},
-      new[] {"slovenian", "slovenia"}, new[] {"somali", "somalia"},
-      new[] {"somalian", "somalia"}, new[] {"spanish", "spain"}, new[] {"swedish", "sweden"},
-      new[] {"swiss", "switzerland"}, new[] {"syrian", "syria"}, new[] {"taiwanese", "taiwan"},
-      new[] {"tanzanian", "tanzania"}, new[] {"texan", "texas"}, new[] {"thai", "thailand"},
-      new[] {"tunisian", "tunisia"}, new[] {"turkish", "turkey"}, new[] {"ugandan", "uganda"},
-      new[] {"ukrainian", "ukraine"}, new[] {"uruguayan", "uruguay"},
-      new[] {"uzbek", "uzbekistan"}, new[] {"venezuelan", "venezuela"},
-      new[] {"vietnamese", "viet"}, new[] {"virginian", "virginia"}, new[] {"yemeni", "yemen"},
-      new[] {"yugoslav", "yugoslavia"}, new[] {"yugoslavian", "yugoslavia"},
-      new[] {"zambian", "zambia"}, new[] {"zealander", "zealand"},
-      new[] {"zimbabwean", "zimbabwe"}};
-
-        static private readonly String[] supplementDict = {"aids", "applicator",
-      "capacitor", "digitize", "electromagnet", "ellipsoid", "exosphere",
-      "extensible", "ferromagnet", "graphics", "hydromagnet", "polygraph",
-      "toroid", "superconduct", "backscatter", "connectionism"};
-
-        static private readonly String[] properNouns = {"abrams", "achilles",
-      "acropolis", "adams", "agnes", "aires", "alexander", "alexis", "alfred",
-      "algiers", "alps", "amadeus", "ames", "amos", "andes", "angeles",
-      "annapolis", "antilles", "aquarius", "archimedes", "arkansas", "asher",
-      "ashly", "athens", "atkins", "atlantis", "avis", "bahamas", "bangor",
-      "barbados", "barger", "bering", "brahms", "brandeis", "brussels",
-      "bruxelles", "cairns", "camoros", "camus", "carlos", "celts", "chalker",
-      "charles", "cheops", "ching", "christmas", "cocos", "collins",
-      "columbus", "confucius", "conners", "connolly", "copernicus", "cramer",
-      "cyclops", "cygnus", "cyprus", "dallas", "damascus", "daniels", "davies",
-      "davis", "decker", "denning", "dennis", "descartes", "dickens", "doris",
-      "douglas", "downs", "dreyfus", "dukakis", "dulles", "dumfries",
-      "ecclesiastes", "edwards", "emily", "erasmus", "euphrates", "evans",
-      "everglades", "fairbanks", "federales", "fisher", "fitzsimmons",
-      "fleming", "forbes", "fowler", "france", "francis", "goering",
-      "goodling", "goths", "grenadines", "guiness", "hades", "harding",
-      "harris", "hastings", "hawkes", "hawking", "hayes", "heights",
-      "hercules", "himalayas", "hippocrates", "hobbs", "holmes", "honduras",
-      "hopkins", "hughes", "humphreys", "illinois", "indianapolis",
-      "inverness", "iris", "iroquois", "irving", "isaacs", "italy", "james",
-      "jarvis", "jeffreys", "jesus", "jones", "josephus", "judas", "julius",
-      "kansas", "keynes", "kipling", "kiwanis", "lansing", "laos", "leeds",
-      "levis", "leviticus", "lewis", "louis", "maccabees", "madras",
-      "maimonides", "maldive", "massachusetts", "matthews", "mauritius",
-      "memphis", "mercedes", "midas", "mingus", "minneapolis", "mohammed",
-      "moines", "morris", "moses", "myers", "myknos", "nablus", "nanjing",
-      "nantes", "naples", "neal", "netherlands", "nevis", "nostradamus",
-      "oedipus", "olympus", "orleans", "orly", "papas", "paris", "parker",
-      "pauling", "peking", "pershing", "peter", "peters", "philippines",
-      "phineas", "pisces", "pryor", "pythagoras", "queens", "rabelais",
-      "ramses", "reynolds", "rhesus", "rhodes", "richards", "robins",
-      "rodgers", "rogers", "rubens", "sagittarius", "seychelles", "socrates",
-      "texas", "thames", "thomas", "tiberias", "tunis", "venus", "vilnius",
-      "wales", "warner", "wilkins", "williams", "wyoming", "xmas", "yonkers",
-      "zeus", "frances", "aarhus", "adonis", "andrews", "angus", "antares",
-      "aquinas", "arcturus", "ares", "artemis", "augustus", "ayers",
-      "barnabas", "barnes", "becker", "bejing", "biggs", "billings", "boeing",
-      "boris", "borroughs", "briggs", "buenos", "calais", "caracas", "cassius",
-      "cerberus", "ceres", "cervantes", "chantilly", "chartres", "chester",
-      "connally", "conner", "coors", "cummings", "curtis", "daedalus",
-      "dionysus", "dobbs", "dolores", "edmonds"};
-
-        private class DictEntry
-        {
-            public readonly bool exception;
-            public readonly String root;
-
-            public DictEntry(String root, bool isException)
-            {
-                this.root = root;
-                this.exception = isException;
-            }
-        }
-
-        private static readonly IDictionary<string, DictEntry> dict_ht = initializeDictHash();
-
-        /***
-         * caching off private int maxCacheSize; private CharArrayMap<String> cache =
-         * null; private static final String SAME = "SAME"; // use if stemmed form is
-         * the same
-         ***/
-
-        private readonly OpenStringBuilder word = new OpenStringBuilder(); // OpenStringBuilder
-        private int j; /* index of final letter in stem (within word) */
-        private int k; /*
-                  * INDEX of final letter in word. You must add 1 to k to get
-                  * the current length of word. When you want the length of
-                  * word, use the method wordLength, which returns (k+1).
-                  */
-
-        /***
-         * private void initializeStemHash() { if (maxCacheSize > 0) cache = new
-         * CharArrayMap<String>(maxCacheSize,false); }
-         ***/
-
-        private char finalChar()
-        {
-            return word.charAt(k);
-        }
-
-        private char penultChar()
-        {
-            return word.charAt(k - 1);
-        }
-
-        private bool isVowel(int index)
-        {
-            return !isCons(index);
-        }
-
-        private bool isCons(int index)
-        {
-            var ch = word.charAt(index);
-            if ((ch == 'a') || (ch == 'e') || (ch == 'i') || (ch == 'o') || (ch == 'u')) return false;
-            if ((ch != 'y') || (index == 0)) return true;
-            else return (!isCons(index - 1));
-        }
-
-        private static IDictionary<string, DictEntry> initializeDictHash()
-        {
-            DictEntry defaultEntry;
-            DictEntry entry;
-
-            IDictionary<string, DictEntry> d = new Dictionary<string, DictEntry>(1000);
-
-            for (int i = 0; i < exceptionWords.Length; i++)
-            {
-                if (!d.ContainsKey(exceptionWords[i]))
-                {
-                    entry = new DictEntry(exceptionWords[i], true);
-                    d.Add(exceptionWords[i], entry);
-                }
-                else
-                {
-                    Debug.Write("Warning: Entry [" + exceptionWords[i]
-                        + "] already in dictionary 1");
-                }
-            }
-
-            for (int i = 0; i < directConflations.Length; i++)
-            {
-                if (!d.ContainsKey(directConflations[i][0]))
-                {
-                    entry = new DictEntry(directConflations[i][1], false);
-                    d.Add(directConflations[i][0], entry);
-                }
-                else
-                {
-                    Debug.Write("Warning: Entry [" + directConflations[i][0]
-                        + "] already in dictionary 2");
-                }
-            }
-
-            for (int i = 0; i < countryNationality.Length; i++)
-            {
-                if (!d.ContainsKey(countryNationality[i][0]))
-                {
-                    entry = new DictEntry(countryNationality[i][1], false);
-                    d.Add(countryNationality[i][0], entry);
-                }
-                else
-                {
-                    Debug.Write("Warning: Entry [" + countryNationality[i][0]
-                        + "] already in dictionary 3");
-                }
-            }
-
-            defaultEntry = new DictEntry(null, false);
-
-            string[] array = KStemData1.data;
-
-            for (int i = 0; i < array.Length; i++)
-            {
-                if (!d.ContainsKey(array[i]))
-                {
-                    d.Add(array[i], defaultEntry);
-                }
-                else
-                {
-                    Debug.Write("Warning: Entry [" + array[i]
-                        + "] already in dictionary 4");
-                }
-            }
-
-            array = KStemData2.data;
-            for (int i = 0; i < array.Length; i++)
-            {
-                if (!d.ContainsKey(array[i]))
-                {
-                    d.Add(array[i], defaultEntry);
-                }
-                else
-                {
-                    Debug.Write("Warning: Entry [" + array[i]
-                        + "] already in dictionary 4");
-                }
-            }
-
-            array = KStemData3.data;
-            for (int i = 0; i < array.Length; i++)
-            {
-                if (!d.ContainsKey(array[i]))
-                {
-                    d.Add(array[i], defaultEntry);
-                }
-                else
-                {
-                    Debug.Write("Warning: Entry [" + array[i]
-                        + "] already in dictionary 4");
-                }
-            }
-
-            array = KStemData4.data;
-            for (int i = 0; i < array.Length; i++)
-            {
-                if (!d.ContainsKey(array[i]))
-                {
-                    d.Add(array[i], defaultEntry);
-                }
-                else
-                {
-                    Debug.Write("Warning: Entry [" + array[i]
-                        + "] already in dictionary 4");
-                }
-            }
-
-            array = KStemData5.data;
-            for (int i = 0; i < array.Length; i++)
-            {
-                if (!d.ContainsKey(array[i]))
-                {
-                    d.Add(array[i], defaultEntry);
-                }
-                else
-                {
-                    Debug.Write("Warning: Entry [" + array[i]
-                        + "] already in dictionary 4");
-                }
-            }
-
-            array = KStemData6.data;
-            for (int i = 0; i < array.Length; i++)
-            {
-                if (!d.ContainsKey(array[i]))
-                {
-                    d.Add(array[i], defaultEntry);
-                }
-                else
-                {
-                    Debug.Write("Warning: Entry [" + array[i]
-                        + "] already in dictionary 4");
-                }
-            }
-
-            array = KStemData7.data;
-            for (int i = 0; i < array.Length; i++)
-            {
-                if (!d.ContainsKey(array[i]))
-                {
-                    d.Add(array[i], defaultEntry);
-                }
-                else
-                {
-                    Debug.Write("Warning: Entry [" + array[i]
-                        + "] already in dictionary 4");
-                }
-            }
-
-            for (int i = 0; i < KStemData8.data.Length; i++)
-            {
-                if (!d.ContainsKey(KStemData8.data[i]))
-                {
-                    d.Add(KStemData8.data[i], defaultEntry);
-                }
-                else
-                {
-                    Debug.Write("Warning: Entry [" + KStemData8.data[i]
-                        + "] already in dictionary 4");
-                }
-            }
-
-            for (int i = 0; i < supplementDict.Length; i++)
-            {
-                if (!d.ContainsKey(supplementDict[i]))
-                {
-                    d.Add(supplementDict[i], defaultEntry);
-                }
-                else
-                {
-                    Debug.Write("Warning: Entry [" + supplementDict[i]
-                        + "] already in dictionary 5");
-                }
-            }
-
-            for (int i = 0; i < properNouns.Length; i++)
-            {
-                if (!d.ContainsKey(properNouns[i]))
-                {
-                    d.Add(properNouns[i], defaultEntry);
-                }
-                else
-                {
-                    Debug.Write("Warning: Entry [" + properNouns[i]
-                        + "] already in dictionary 6");
-                }
-            }
-
-            return d;
-        }
-
-        private bool isAlpha(char ch)
-        {
-            return ch >= 'a' && ch <= 'z'; // terms must be lowercased already
-        }
-
-        /* length of stem within word */
-        private int stemLength()
-        {
-            return j + 1;
-        }
-
-        private bool endsIn(char[] s)
-        {
-            if (s.Length > k) return false;
-
-            int r = word.length() - s.Length; /* length of word before this suffix */
-            j = k;
-            for (int r1 = r, i = 0; i < s.Length; i++, r1++)
-            {
-                if (s[i] != word.charAt(r1)) return false;
-            }
-            j = r - 1; /* index of the character BEFORE the posfix */
-            return true;
-        }
-
-        private bool endsIn(char a, char b)
-        {
-            if (2 > k) return false;
-            // check left to right since the endings have often already matched
-            if (word.charAt(k - 1) == a && word.charAt(k) == b)
-            {
-                j = k - 2;
-                return true;
-            }
-            return false;
-        }
-
-        private bool endsIn(char a, char b, char c)
-        {
-            if (3 > k) return false;
-            if (word.charAt(k - 2) == a && word.charAt(k - 1) == b
-                && word.charAt(k) == c)
-            {
-                j = k - 3;
-                return true;
-            }
-            return false;
-        }
-
-        private bool endsIn(char a, char b, char c, char d)
-        {
-            if (4 > k) return false;
-            if (word.charAt(k - 3) == a && word.charAt(k - 2) == b
-                && word.charAt(k - 1) == c && word.charAt(k) == d)
-            {
-                j = k - 4;
-                return true;
-            }
-            return false;
-        }
-
-        private DictEntry wordInDict()
-        {
-            /***
-             * if (matchedEntry != null) { if (dict_ht.get(word.getArray(), 0,
-             * word.size()) != matchedEntry) {
-             * System.out.println("Uh oh... cached entry doesn't match"); } return
-             * matchedEntry; }
-             ***/
-            if (matchedEntry != null) return matchedEntry;
-            String w = word.toString();
-            DictEntry e = dict_ht.ContainsKey(w) ? dict_ht[w] : null;
-            if (e != null && !e.exception)
-            {
-                matchedEntry = e; // only cache if it's not an exception.
-            }
-            // lookups.add(word.toString());
-            return e;
-        }
-
-        /* Convert plurals to singular form, and '-ies' to 'y' */
-        private void plural()
-        {
-            if (word.charAt(k) == 's')
-            {
-                if (endsIn('i', 'e', 's'))
-                {
-                    word.setLength(j + 3);
-                    k--;
-                    if (lookup()) /* ensure calories -> calorie */
-                        return;
-                    k++;
-                    word.unsafeWrite('s');
-                    setSuffix("y");
-                    lookup();
-                }
-                else if (endsIn('e', 's'))
-                {
-                    /* try just removing the "s" */
-                    word.setLength(j + 2);
-                    k--;
-
-                    /*
-                     * note: don't check for exceptions here. So, `aides' -> `aide', but
-                     * `aided' -> `aid'. The exception for double s is used to prevent
-                     * crosses -> crosse. This is actually correct if crosses is a plural
-                     * noun (a type of racket used in lacrosse), but the verb is much more
-                     * common
-                     */
-
-                    /****
-                     * YCS: this was the one place where lookup was not followed by return.
-                     * So restructure it. if ((j>0)&&(lookup(word.toString())) &&
-                     * !((word.charAt(j) == 's') && (word.charAt(j-1) == 's'))) return;
-                     *****/
-                    bool tryE = j > 0
-                        && !((word.charAt(j) == 's') && (word.charAt(j - 1) == 's'));
-                    if (tryE && lookup()) return;
-
-                    /* try removing the "es" */
-
-                    word.setLength(j + 1);
-                    k--;
-                    if (lookup()) return;
-
-                    /* the default is to retain the "e" */
-                    word.unsafeWrite('e');
-                    k++;
-
-                    if (!tryE) lookup(); // if we didn't try the "e" ending before
-                    return;
-                }
-                else
-                {
-                    if (word.length() > 3 && penultChar() != 's' && !endsIn('o', 'u', 's'))
-                    {
-                        /* unless the word ends in "ous" or a double "s", remove the final "s" */
-
-                        word.setLength(k);
-                        k--;
-                        lookup();
-                    }
-                }
-            }
-        }
-
-        private void setSuffix(String s)
-        {
-            setSuff(s, s.Length);
-        }
-
-        /* replace old suffix with s */
-        private void setSuff(String s, int len)
-        {
-            word.setLength(j + 1);
-            for (int l = 0; l < len; l++)
-            {
-                word.unsafeWrite(s[l]);
-            }
-            k = j + len;
-        }
-
-        /* Returns true if the word is found in the dictionary */
-        // almost all uses of lookup() return immediately and are
-        // followed by another lookup in the dict. Store the match
-        // to avoid this double lookup.
-        DictEntry matchedEntry = null;
-
-        private bool lookup()
-        {
-            /******
-             * debugging code String thisLookup = word.toString(); bool added =
-             * lookups.add(thisLookup); if (!added) {
-             * System.out.println("######extra lookup:" + thisLookup); // occaasional
-             * extra lookups aren't necessarily errors... could happen by diff
-             * manipulations // throw new RuntimeException("######extra lookup:" +
-             * thisLookup); } else { // System.out.println("new lookup:" + thisLookup);
-             * }
-             ******/
-            String w = word.toString();
-            matchedEntry = dict_ht.ContainsKey(w) ? dict_ht[w] : null;
-            return matchedEntry != null;
-        }
-
-        // Set<String> lookups = new HashSet<String>();
-
-        /* convert past tense (-ed) to present, and `-ied' to `y' */
-        private void pastTense()
-        {
-            /*
-             * Handle words less than 5 letters with a direct mapping This prevents
-             * (fled -> fl).
-             */
-            if (word.length() <= 4) return;
-
-            if (endsIn('i', 'e', 'd'))
-            {
-                word.setLength(j + 3);
-                k--;
-                if (lookup()) /* we almost always want to convert -ied to -y, but */
-                    return; /* this isn't true for short words (died->die) */
-                k++; /* I don't know any long words that this applies to, */
-                word.unsafeWrite('d'); /* but just in case... */
-                setSuffix("y");
-                lookup();
-                return;
-            }
-
-            /* the vowelInStem() is necessary so we don't stem acronyms */
-            if (endsIn('e', 'd') && vowelInStem())
-            {
-                /* see if the root ends in `e' */
-                word.setLength(j + 2);
-                k = j + 1;
-
-                DictEntry entry = wordInDict();
-                if (entry != null) if (!entry.exception) /*
-                                                * if it's in the dictionary and
-                                                * not an exception
-                                                */
-                        return;
-
-                /* try removing the "ed" */
-                word.setLength(j + 1);
-                k = j;
-                if (lookup()) return;
-
-                /*
-                 * try removing a doubled consonant. if the root isn't found in the
-                 * dictionary, the default is to leave it doubled. This will correctly
-                 * capture `backfilled' -> `backfill' instead of `backfill' ->
-                 * `backfille', and seems correct most of the time
-                 */
-
-                if (doubleC(k))
-                {
-                    word.setLength(k);
-                    k--;
-                    if (lookup()) return;
-                    word.unsafeWrite(word.charAt(k));
-                    k++;
-                    lookup();
-                    return;
-                }
-
-                /* if we have a `un-' prefix, then leave the word alone */
-                /* (this will sometimes screw up with `under-', but we */
-                /* will take care of that later) */
-
-                if ((word.charAt(0) == 'u') && (word.charAt(1) == 'n'))
-                {
-                    word.unsafeWrite('e');
-                    word.unsafeWrite('d');
-                    k = k + 2;
-                    // nolookup()
-                    return;
-                }
-
-                /*
-                 * it wasn't found by just removing the `d' or the `ed', so prefer to end
-                 * with an `e' (e.g., `microcoded' -> `microcode').
-                 */
-
-                word.setLength(j + 1);
-                word.unsafeWrite('e');
-                k = j + 1;
-                // nolookup() - we already tried the "e" ending
-                return;
-            }
-        }
-
-        /* return TRUE if word ends with a double consonant */
-        private bool doubleC(int i)
-        {
-            if (i < 1) return false;
-
-            if (word.charAt(i) != word.charAt(i - 1)) return false;
-            return (isCons(i));
-        }
-
-        private bool vowelInStem()
-        {
-            for (int i = 0; i < stemLength(); i++)
-            {
-                if (isVowel(i)) return true;
-            }
-            return false;
-        }
-
-        /* handle `-ing' endings */
-        private void aspect()
-        {
-            /*
-             * handle short words (aging -> age) via a direct mapping. This prevents
-             * (thing -> the) in the version of this routine that ignores inflectional
-             * variants that are mentioned in the dictionary (when the root is also
-             * present)
-             */
-
-            if (word.length() <= 5) return;
-
-            /* the vowelinstem() is necessary so we don't stem acronyms */
-            if (endsIn('i', 'n', 'g') && vowelInStem())
-            {
-
-                /* try adding an `e' to the stem and check against the dictionary */
-                word.setCharAt(j + 1, 'e');
-                word.setLength(j + 2);
-                k = j + 1;
-
-                DictEntry entry = wordInDict();
-                if (entry != null)
-                {
-                    if (!entry.exception) /* if it's in the dictionary and not an exception */
-                        return;
-                }
-
-                /* adding on the `e' didn't work, so remove it */
-                word.setLength(k);
-                k--; /* note that `ing' has also been removed */
-
-                if (lookup()) return;
-
-                /* if I can remove a doubled consonant and get a word, then do so */
-                if (doubleC(k))
-                {
-                    k--;
-                    word.setLength(k + 1);
-                    if (lookup()) return;
-                    word.unsafeWrite(word.charAt(k)); /* restore the doubled consonant */
-
-                    /* the default is to leave the consonant doubled */
-                    /* (e.g.,`fingerspelling' -> `fingerspell'). Unfortunately */
-                    /* `bookselling' -> `booksell' and `mislabelling' -> `mislabell'). */
-                    /* Without making the algorithm significantly more complicated, this */
-                    /* is the best I can do */
-                    k++;
-                    lookup();
-                    return;
-                }
-
-                /*
-                 * the word wasn't in the dictionary after removing the stem, and then
-                 * checking with and without a final `e'. The default is to add an `e'
-                 * unless the word ends in two consonants, so `microcoding' ->
-                 * `microcode'. The two consonants restriction wouldn't normally be
-                 * necessary, but is needed because we don't try to deal with prefixes and
-                 * compounds, and most of the time it is correct (e.g., footstamping ->
-                 * footstamp, not footstampe; however, decoupled -> decoupl). We can
-                 * prevent almost all of the incorrect stems if we try to do some prefix
-                 * analysis first
-                 */
-
-                if ((j > 0) && isCons(j) && isCons(j - 1))
-                {
-                    k = j;
-                    word.setLength(k + 1);
-                    // nolookup() because we already did according to the comment
-                    return;
-                }
-
-                word.setLength(j + 1);
-                word.unsafeWrite('e');
-                k = j + 1;
-                // nolookup(); we already tried an 'e' ending
-                return;
-            }
-        }
-
-        /*
-         * this routine deals with -ity endings. It accepts -ability, -ibility, and
-         * -ality, even without checking the dictionary because they are so
-         * productive. The first two are mapped to -ble, and the -ity is remove for
-         * the latter
-         */
-        private void ityEndings()
-        {
-            int old_k = k;
-
-            if (endsIn('i', 't', 'y'))
-            {
-                word.setLength(j + 1); /* try just removing -ity */
-                k = j;
-                if (lookup()) return;
-                word.unsafeWrite('e'); /* try removing -ity and adding -e */
-                k = j + 1;
-                if (lookup()) return;
-                word.setCharAt(j + 1, 'i');
-                word.append("ty");
-                k = old_k;
-                /*
-                 * the -ability and -ibility endings are highly productive, so just accept
-                 * them
-                 */
-                if ((j > 0) && (word.charAt(j - 1) == 'i') && (word.charAt(j) == 'l'))
-                {
-                    word.setLength(j - 1);
-                    word.append("le"); /* convert to -ble */
-                    k = j;
-                    lookup();
-                    return;
-                }
-
-                /* ditto for -ivity */
-                if ((j > 0) && (word.charAt(j - 1) == 'i') && (word.charAt(j) == 'v'))
-                {
-                    word.setLength(j + 1);
-                    word.unsafeWrite('e'); /* convert to -ive */
-                    k = j + 1;
-                    lookup();
-                    return;
-                }
-                /* ditto for -ality */
-                if ((j > 0) && (word.charAt(j - 1) == 'a') && (word.charAt(j) == 'l'))
-                {
-                    word.setLength(j + 1);
-                    k = j;
-                    lookup();
-                    return;
-                }
-
-                /*
-                 * if the root isn't in the dictionary, and the variant *is* there, then
-                 * use the variant. This allows `immunity'->`immune', but prevents
-                 * `capacity'->`capac'. If neither the variant nor the root form are in
-                 * the dictionary, then remove the ending as a default
-                 */
-
-                if (lookup()) return;
-
-                /* the default is to remove -ity altogether */
-                word.setLength(j + 1);
-                k = j;
-                // nolookup(), we already did it.
-                return;
-            }
-        }
-
-        /* handle -ence and -ance */
-        private void nceEndings()
-        {
-            int old_k = k;
-            char word_char;
-
-            if (endsIn('n', 'c', 'e'))
-            {
-                word_char = word.charAt(j);
-                if (!((word_char == 'e') || (word_char == 'a'))) return;
-                word.setLength(j);
-                word.unsafeWrite('e'); /* try converting -e/ance to -e (adherance/adhere) */
-                k = j;
-                if (lookup()) return;
-                word.setLength(j); /*
-                          * try removing -e/ance altogether
-                          * (disappearance/disappear)
-                          */
-                k = j - 1;
-                if (lookup()) return;
-                word.unsafeWrite(word_char); /* restore the original ending */
-                word.append("nce");
-                k = old_k;
-                // nolookup() because we restored the original ending
-            }
-            return;
-        }
-
-        /* handle -ness */
-        private void nessEndings()
-        {
-            if (endsIn('n', 'e', 's', 's'))
-            { /*
-                                       * this is a very productive endings, so
-                                       * just accept it
-                                       */
-                word.setLength(j + 1);
-                k = j;
-                if (word.charAt(j) == 'i') word.setCharAt(j, 'y');
-                lookup();
-            }
-            return;
-        }
-
-        /* handle -ism */
-        private void ismEndings()
-        {
-            if (endsIn('i', 's', 'm'))
-            { /*
-                                  * this is a very productive ending, so just
-                                  * accept it
-                                  */
-                word.setLength(j + 1);
-                k = j;
-                lookup();
-            }
-            return;
-        }
-
-        /* this routine deals with -ment endings. */
-        private void mentEndings()
-        {
-            int old_k = k;
-
-            if (endsIn('m', 'e', 'n', 't'))
-            {
-                word.setLength(j + 1);
-                k = j;
-                if (lookup()) return;
-                word.append("ment");
-                k = old_k;
-                // nolookup
-            }
-            return;
-        }
-
-        /* this routine deals with -ize endings. */
-        private void izeEndings()
-        {
-            int old_k = k;
-
-            if (endsIn('i', 'z', 'e'))
-            {
-                word.setLength(j + 1); /* try removing -ize entirely */
-                k = j;
-                if (lookup()) return;
-                word.unsafeWrite('i');
-
-                if (doubleC(j))
-                { /* allow for a doubled consonant */
-                    word.setLength(j);
-                    k = j - 1;
-                    if (lookup()) return;
-                    word.unsafeWrite(word.charAt(j - 1));
-                }
-
-                word.setLength(j + 1);
-                word.unsafeWrite('e'); /* try removing -ize and adding -e */
-                k = j + 1;
-                if (lookup()) return;
-                word.setLength(j + 1);
-                word.append("ize");
-                k = old_k;
-                // nolookup()
-            }
-            return;
-        }
-
-        /* handle -ency and -ancy */
-        private void ncyEndings()
-        {
-            if (endsIn('n', 'c', 'y'))
-            {
-                if (!((word.charAt(j) == 'e') || (word.charAt(j) == 'a'))) return;
-                word.setCharAt(j + 2, 't'); /* try converting -ncy to -nt */
-                word.setLength(j + 3);
-                k = j + 2;
-
-                if (lookup()) return;
-
-                word.setCharAt(j + 2, 'c'); /* the default is to convert it to -nce */
-                word.unsafeWrite('e');
-                k = j + 3;
-                lookup();
-            }
-            return;
-        }
-
-        /* handle -able and -ible */
-        private void bleEndings()
-        {
-            int old_k = k;
-            char word_char;
-
-            if (endsIn('b', 'l', 'e'))
-            {
-                if (!((word.charAt(j) == 'a') || (word.charAt(j) == 'i'))) return;
-                word_char = word.charAt(j);
-                word.setLength(j); /* try just removing the ending */
-                k = j - 1;
-                if (lookup()) return;
-                if (doubleC(k))
-                { /* allow for a doubled consonant */
-                    word.setLength(k);
-                    k--;
-                    if (lookup()) return;
-                    k++;
-                    word.unsafeWrite(word.charAt(k - 1));
-                }
-                word.setLength(j);
-                word.unsafeWrite('e'); /* try removing -a/ible and adding -e */
-                k = j;
-                if (lookup()) return;
-                word.setLength(j);
-                word.append("ate"); /* try removing -able and adding -ate */
-                /* (e.g., compensable/compensate) */
-                k = j + 2;
-                if (lookup()) return;
-                word.setLength(j);
-                word.unsafeWrite(word_char); /* restore the original values */
-                word.append("ble");
-                k = old_k;
-                // nolookup()
-            }
-            return;
-        }
-
-        /*
-         * handle -ic endings. This is fairly straightforward, but this is also the
-         * only place we try *expanding* an ending, -ic -> -ical. This is to handle
-         * cases like `canonic' -> `canonical'
-         */
-        private void icEndings()
-        {
-            if (endsIn('i', 'c'))
-            {
-                word.setLength(j + 3);
-                word.append("al"); /* try converting -ic to -ical */
-                k = j + 4;
-                if (lookup()) return;
-
-                word.setCharAt(j + 1, 'y'); /* try converting -ic to -y */
-                word.setLength(j + 2);
-                k = j + 1;
-                if (lookup()) return;
-
-                word.setCharAt(j + 1, 'e'); /* try converting -ic to -e */
-                if (lookup()) return;
-
-                word.setLength(j + 1); /* try removing -ic altogether */
-                k = j;
-                if (lookup()) return;
-                word.append("ic"); /* restore the original ending */
-                k = j + 2;
-                // nolookup()
-            }
-            return;
-        }
-
-        private static readonly char[] ization = "ization".ToCharArray();
-        private static readonly char[] ition = "ition".ToCharArray();
-        private static readonly char[] ation = "ation".ToCharArray();
-        private static readonly char[] ication = "ication".ToCharArray();
-
-        /* handle some derivational endings */
-        /*
-         * this routine deals with -ion, -ition, -ation, -ization, and -ication. The
-         * -ization ending is always converted to -ize
-         */
-        private void ionEndings()
-        {
-            int old_k = k;
-            if (!endsIn('i', 'o', 'n'))
-            {
-                return;
-            }
-
-            if (endsIn(ization))
-            { /*
-                            * the -ize ending is very productive, so simply
-                            * accept it as the root
-                            */
-                word.setLength(j + 3);
-                word.unsafeWrite('e');
-                k = j + 3;
-                lookup();
-                return;
-            }
-
-            if (endsIn(ition))
-            {
-                word.setLength(j + 1);
-                word.unsafeWrite('e');
-                k = j + 1;
-                if (lookup()) /*
-                     * remove -ition and add `e', and check against the
-                     * dictionary
-                     */
-                    return; /* (e.g., definition->define, opposition->oppose) */
-
-                /* restore original values */
-                word.setLength(j + 1);
-                word.append("ition");
-                k = old_k;
-                // nolookup()
-            }
-            else if (endsIn(ation))
-            {
-                word.setLength(j + 3);
-                word.unsafeWrite('e');
-                k = j + 3;
-                if (lookup()) /* remove -ion and add `e', and check against the dictionary */
-                    return; /* (elmination -> eliminate) */
-
-                word.setLength(j + 1);
-                word.unsafeWrite('e'); /*
-                              * remove -ation and add `e', and check against the
-                              * dictionary
-                              */
-                k = j + 1;
-                if (lookup()) return;
-
-                word.setLength(j + 1);/*
-                             * just remove -ation (resignation->resign) and
-                             * check dictionary
-                             */
-                k = j;
-                if (lookup()) return;
-
-                /* restore original values */
-                word.setLength(j + 1);
-                word.append("ation");
-                k = old_k;
-                // nolookup()
-
-            }
-
-            /*
-             * test -ication after -ation is attempted (e.g., `complication->complicate'
-             * rather than `complication->comply')
-             */
-
-            if (endsIn(ication))
-            {
-                word.setLength(j + 1);
-                word.unsafeWrite('y');
-                k = j + 1;
-                if (lookup()) /*
-                     * remove -ication and add `y', and check against the
-                     * dictionary
-                     */
-                    return; /* (e.g., amplification -> amplify) */
-
-                /* restore original values */
-                word.setLength(j + 1);
-                word.append("ication");
-                k = old_k;
-                // nolookup()
-            }
-
-            // if (endsIn(ion)) {
-            if (true)
-            { // we checked for this earlier... just need to set "j"
-                j = k - 3; // YCS
-
-                word.setLength(j + 1);
-                word.unsafeWrite('e');
-                k = j + 1;
-                if (lookup()) /* remove -ion and add `e', and check against the dictionary */
-                    return;
-
-                word.setLength(j + 1);
-                k = j;
-                if (lookup()) /* remove -ion, and if it's found, treat that as the root */
-                    return;
-
-                /* restore original values */
-                word.setLength(j + 1);
-                word.append("ion");
-                k = old_k;
-                // nolookup()
-            }
-
-            // nolookup(); all of the other paths restored original values
-            return;
-        }
-
-        /*
-         * this routine deals with -er, -or, -ier, and -eer. The -izer ending is
-         * always converted to -ize
-         */
-        private void erAndOrEndings()
-        {
-            int old_k = k;
-
-            if (word.charAt(k) != 'r') return; // YCS
-
-            char word_char; /* so we can remember if it was -er or -or */
-
-            if (endsIn('i', 'z', 'e', 'r'))
-            { /*
-                                       * -ize is very productive, so accept it
-                                       * as the root
-                                       */
-                word.setLength(j + 4);
-                k = j + 3;
-                lookup();
-                return;
-            }
-
-            if (endsIn('e', 'r') || endsIn('o', 'r'))
-            {
-                word_char = word.charAt(j + 1);
-                if (doubleC(j))
-                {
-                    word.setLength(j);
-                    k = j - 1;
-                    if (lookup()) return;
-                    word.unsafeWrite(word.charAt(j - 1)); /* restore the doubled consonant */
-                }
-
-                if (word.charAt(j) == 'i')
-                { /* do we have a -ier ending? */
-                    word.setCharAt(j, 'y');
-                    word.setLength(j + 1);
-                    k = j;
-                    if (lookup()) /* yes, so check against the dictionary */
-                        return;
-                    word.setCharAt(j, 'i'); /* restore the endings */
-                    word.unsafeWrite('e');
-                }
-
-                if (word.charAt(j) == 'e')
-                { /* handle -eer */
-                    word.setLength(j);
-                    k = j - 1;
-                    if (lookup()) return;
-                    word.unsafeWrite('e');
-                }
-
-                word.setLength(j + 2); /* remove the -r ending */
-                k = j + 1;
-                if (lookup()) return;
-                word.setLength(j + 1); /* try removing -er/-or */
-                k = j;
-                if (lookup()) return;
-                word.unsafeWrite('e'); /* try removing -or and adding -e */
-                k = j + 1;
-                if (lookup()) return;
-                word.setLength(j + 1);
-                word.unsafeWrite(word_char);
-                word.unsafeWrite('r'); /* restore the word to the way it was */
-                k = old_k;
-                // nolookup()
-            }
-
-        }
-
-        /*
-         * this routine deals with -ly endings. The -ally ending is always converted
-         * to -al Sometimes this will temporarily leave us with a non-word (e.g.,
-         * heuristically maps to heuristical), but then the -al is removed in the next
-         * step.
-         */
-        private void lyEndings()
-        {
-            int old_k = k;
-
-            if (endsIn('l', 'y'))
-            {
-
-                word.setCharAt(j + 2, 'e'); /* try converting -ly to -le */
-
-                if (lookup()) return;
-                word.setCharAt(j + 2, 'y');
-
-                word.setLength(j + 1); /* try just removing the -ly */
-                k = j;
-
-                if (lookup()) return;
-
-                if ((j > 0) && (word.charAt(j - 1) == 'a') && (word.charAt(j) == 'l')) /*
-                                                                              * always
-                                                                              * convert
-                                                                              * -
-                                                                              * ally
-                                                                              * to
-                                                                              * -
-                                                                              * al
-                                                                              */
-                    return;
-                word.append("ly");
-                k = old_k;
-
-                if ((j > 0) && (word.charAt(j - 1) == 'a') && (word.charAt(j) == 'b'))
-                { /*
-                                                                                * always
-                                                                                * convert
-                                                                                * -
-                                                                                * ably
-                                                                                * to
-                                                                                * -
-                                                                                * able
-                                                                                */
-                    word.setCharAt(j + 2, 'e');
-                    k = j + 2;
-                    return;
-                }
-
-                if (word.charAt(j) == 'i')
-                { /* e.g., militarily -> military */
-                    word.setLength(j);
-                    word.unsafeWrite('y');
-                    k = j;
-                    if (lookup()) return;
-                    word.setLength(j);
-                    word.append("ily");
-                    k = old_k;
-                }
-
-                word.setLength(j + 1); /* the default is to remove -ly */
-
-                k = j;
-                // nolookup()... we already tried removing the "ly" variant
-            }
-            return;
-        }
-
-        /*
-         * this routine deals with -al endings. Some of the endings from the previous
-         * routine are finished up here.
-         */
-        private void alEndings()
-        {
-            int old_k = k;
-
-            if (word.length() < 4) return;
-            if (endsIn('a', 'l'))
-            {
-                word.setLength(j + 1);
-                k = j;
-                if (lookup()) /* try just removing the -al */
-                    return;
-
-                if (doubleC(j))
-                { /* allow for a doubled consonant */
-                    word.setLength(j);
-                    k = j - 1;
-                    if (lookup()) return;
-                    word.unsafeWrite(word.charAt(j - 1));
-                }
-
-                word.setLength(j + 1);
-                word.unsafeWrite('e'); /* try removing the -al and adding -e */
-                k = j + 1;
-                if (lookup()) return;
-
-                word.setLength(j + 1);
-                word.append("um"); /* try converting -al to -um */
-                /* (e.g., optimal - > optimum ) */
-                k = j + 2;
-                if (lookup()) return;
-
-                word.setLength(j + 1);
-                word.append("al"); /* restore the ending to the way it was */
-                k = old_k;
-
-                if ((j > 0) && (word.charAt(j - 1) == 'i') && (word.charAt(j) == 'c'))
-                {
-                    word.setLength(j - 1); /* try removing -ical */
-                    k = j - 2;
-                    if (lookup()) return;
-
-                    word.setLength(j - 1);
-                    word.unsafeWrite('y');/* try turning -ical to -y (e.g., bibliographical) */
-                    k = j - 1;
-                    if (lookup()) return;
-
-                    word.setLength(j - 1);
-                    word.append("ic"); /* the default is to convert -ical to -ic */
-                    k = j;
-                    // nolookup() ... converting ical to ic means removing "al" which we
-                    // already tried
-                    // ERROR
-                    lookup();
-                    return;
-                }
-
-                if (word.charAt(j) == 'i')
-                { /* sometimes -ial endings should be removed */
-                    word.setLength(j); /* (sometimes it gets turned into -y, but we */
-                    k = j - 1; /* aren't dealing with that case for now) */
-                    if (lookup()) return;
-                    word.append("ial");
-                    k = old_k;
-                    lookup();
-                }
-
-            }
-            return;
-        }
-
-        /*
-         * this routine deals with -ive endings. It normalizes some of the -ative
-         * endings directly, and also maps some -ive endings to -ion.
-         */
-        private void iveEndings()
-        {
-            int old_k = k;
-
-            if (endsIn('i', 'v', 'e'))
-            {
-                word.setLength(j + 1); /* try removing -ive entirely */
-                k = j;
-                if (lookup()) return;
-
-                word.unsafeWrite('e'); /* try removing -ive and adding -e */
-                k = j + 1;
-                if (lookup()) return;
-                word.setLength(j + 1);
-                word.append("ive");
-                if ((j > 0) && (word.charAt(j - 1) == 'a') && (word.charAt(j) == 't'))
-                {
-                    word.setCharAt(j - 1, 'e'); /* try removing -ative and adding -e */
-                    word.setLength(j); /* (e.g., determinative -> determine) */
-                    k = j - 1;
-                    if (lookup()) return;
-                    word.setLength(j - 1); /* try just removing -ative */
-                    if (lookup()) return;
-
-                    word.append("ative");
-                    k = old_k;
-                }
-
-                /* try mapping -ive to -ion (e.g., injunctive/injunction) */
-                word.setCharAt(j + 2, 'o');
-                word.setCharAt(j + 3, 'n');
-                if (lookup()) return;
-
-                word.setCharAt(j + 2, 'v'); /* restore the original values */
-                word.setCharAt(j + 3, 'e');
-                k = old_k;
-                // nolookup()
-            }
-            return;
-        }
-
-        public KStemmer() { }
-
-        public String stem(String term)
-        {
-            bool changed = stem(term, term.Length);
-            if (!changed) return term;
-            return asString();
-        }
-
-        /**
-         * Returns the result of the stem (assuming the word was changed) as a String.
-         */
-        public String asString()
-        {
-            String s = getString();
-            if (s != null) return s;
-            return word.toString();
-        }
-
-//          public CharSequence asCharSequence() {
-//            return result != null ? result : word;
-//          }
-
-        String getString()
-        {
-            return result;
-        }
-
-        char[] getChars()
-        {
-            return word.getArray();
-        }
-
-        int getLength()
-        {
-            return word.length();
-        }
-
-        String result;
-
-        private bool matched()
-        {
-            /***
-             * if (!lookups.contains(word.toString())) { throw new
-             * RuntimeException("didn't look up "+word.toString()+" prev="+prevLookup);
-             * }
-             ***/
-            // lookup();
-            return matchedEntry != null;
-        }
-
-        /**
-         * Stems the text in the token. Returns true if changed.
-         */
-        public bool stem(String term, int len)
-        {
-            len = term.Length; // HACK for API compatibility
-            result = null;
-
-            k = len - 1;
-            if ((k <= 1) || (k >= MaxWordLen - 1))
-            {
-                return false; // don't stem
-            }
-
-            // first check the stemmer dictionaries, and avoid using the
-            // cache if it's in there.
-            DictEntry entry = dict_ht.ContainsKey(term) ? dict_ht[term] : null;
-            if (entry != null)
-            {
-                if (entry.root != null)
-                {
-                    result = entry.root;
-                    return true;
-                }
-                return false;
-            }
-
-            /***
-             * caching off is normally faster if (cache == null) initializeStemHash();
-             * 
-             * // now check the cache, before we copy chars to "word" if (cache != null)
-             * { String val = cache.get(term, 0, len); if (val != null) { if (val !=
-             * SAME) { result = val; return true; } return false; } }
-             ***/
-
-            word.reset();
-            // allocate enough space so that an expansion is never needed
-            word.reserve(len + 10);
-            for (int i = 0; i < len; i++)
-            {
-                char ch = term[i];
-                if (!isAlpha(ch)) return false; // don't stem
-                // don't lowercase... it's a requirement that lowercase filter be
-                // used before this stemmer.
-                word.unsafeWrite(ch);
-            }
-
-            matchedEntry = null;
-            /***
-             * lookups.clear(); lookups.add(word.toString());
-             ***/
-
-            /*
-             * This while loop will never be executed more than one time; it is here
-             * only to allow the break statement to be used to escape as soon as a word
-             * is recognized
-             */
-            while (true)
-            {
-                // YCS: extra lookup()s were inserted so we don't need to
-                // do an extra wordInDict() here.
-                plural();
-                if (matched()) break;
-                pastTense();
-                if (matched()) break;
-                aspect();
-                if (matched()) break;
-                ityEndings();
-                if (matched()) break;
-                nessEndings();
-                if (matched()) break;
-                ionEndings();
-                if (matched()) break;
-                erAndOrEndings();
-                if (matched()) break;
-                lyEndings();
-                if (matched()) break;
-                alEndings();
-                if (matched()) break;
-                entry = wordInDict();
-                iveEndings();
-                if (matched()) break;
-                izeEndings();
-                if (matched()) break;
-                mentEndings();
-                if (matched()) break;
-                bleEndings();
-                if (matched()) break;
-                ismEndings();
-                if (matched()) break;
-                icEndings();
-                if (matched()) break;
-                ncyEndings();
-                if (matched()) break;
-                nceEndings();
-                matched();
-                break;
-            }
-
-            /*
-             * try for a direct mapping (allows for cases like `Italian'->`Italy' and
-             * `Italians'->`Italy')
-             */
-            entry = matchedEntry;
-            if (entry != null)
-            {
-                result = entry.root; // may be null, which means that "word" is the stem
-            }
-
-            /***
-             * caching off is normally faster if (cache != null && cache.size() <
-             * maxCacheSize) { char[] key = new char[len]; System.arraycopy(term, 0,
-             * key, 0, len); if (result != null) { cache.put(key, result); } else {
-             * cache.put(key, word.toString()); } }
-             ***/
-
-            /***
-             * if (entry == null) { if (!word.toString().equals(new String(term,0,len)))
-             * { System.out.println("CASE:" + word.toString() + "," + new
-             * String(term,0,len));
-             * 
-             * } }
-             ***/
-
-            // no entry matched means result is "word"
-            return true;
-        }
-    }
-
-    internal class OpenStringBuilder
-    {
-        protected char[] buf;
-        protected int len;
-
-        public OpenStringBuilder()
-            : this(32)
-        {
-        }
-
-        public OpenStringBuilder(int size)
-        {
-            buf = new char[size];
-        }
-
-        public OpenStringBuilder(char[] arr, int len)
-        {
-            set(arr, len);
-        }
-
-        public void setLength(int len) { this.len = len; }
-
-        public void set(char[] arr, int end)
-        {
-            this.buf = arr;
-            this.len = end;
-        }
-
-        public char[] getArray() { return buf; }
-        public int size() { return len; }
-        public int length() { return len; }
-        public int capacity() { return buf.Length; }
-
-        public OpenStringBuilder append(String csq)
-        {
-            return append(csq, 0, csq.Length);
-        }
-
-        public OpenStringBuilder append(String csq, int start, int end)
-        {
-            reserve(end - start);
-            for (int i = start; i < end; i++)
-            {
-                unsafeWrite(csq[i]);
-            }
-            return this;
-        }
-
-        public OpenStringBuilder append(char c)
-        {
-            write(c);
-            return this;
-        }
-
-        public char charAt(int index)
-        {
-            return buf[index];
-        }
-
-        public void setCharAt(int index, char ch)
-        {
-            buf[index] = ch;
-        }
-
-        public void unsafeWrite(char b)
-        {
-            buf[len++] = b;
-        }
-
-        public void unsafeWrite(int b) { unsafeWrite((char)b); }
-
-        public void unsafeWrite(char[] b, int off, int len)
-        {
-            Array.Copy(b, off, buf, this.len, len);
-            this.len += len;
-        }
-
-        protected void resize(int len)
-        {
-            char[] newbuf = new char[Math.Max(buf.Length << 1, len)];
-            Array.Copy(buf, newbuf, size());
-            buf = newbuf;
-        }
-
-        public void reserve(int num)
-        {
-            if (len + num > buf.Length) resize(len + num);
-        }
-
-        public void write(char b)
-        {
-            if (len >= buf.Length)
-            {
-                resize(len + 1);
-            }
-            unsafeWrite(b);
-        }
-
-        public void write(int b) { write((char)b); }
-
-        public void write(char[] b)
-        {
-            write(b, 0, b.Length);
-        }
-
-        public void write(char[] b, int off, int len)
-        {
-            reserve(len);
-            unsafeWrite(b, off, len);
-        }
-
-        public void write(OpenStringBuilder arr)
-        {
-            write(arr.buf, 0, len);
-        }
-
-        //  public void write(String s) {
-        //    reserve(s.Length);
-        //    s.GetChars(0,s.Length,buf, len);
-        //    len +=s.length();
-        //  }
-
-        public void flush()
-        {
-        }
-
-        public void reset()
-        {
-            len = 0;
-        }
-
-        public char[] toCharArray()
-        {
-            char[] newbuf = new char[size()];
-            Array.Copy(buf, newbuf, size());
-            return newbuf;
-        }
-
-        public String toString()
-        {
-            return new String(buf, 0, size());
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/Fa/PersianAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Fa/PersianAnalyzer.cs b/src/contrib/Analyzers/Fa/PersianAnalyzer.cs
deleted file mode 100644
index db6292d..0000000
--- a/src/contrib/Analyzers/Fa/PersianAnalyzer.cs
+++ /dev/null
@@ -1,234 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
-*/
-
-using System;
-using System.Collections.Generic;
-using System.IO;
-using System.Linq;
-using Lucene.Net.Analysis.AR;
-using Version = Lucene.Net.Util.Version;
-
-namespace Lucene.Net.Analysis.Fa
-{
-    /*
-     * {@link Analyzer} for Persian.
-     * <p>
-     * This Analyzer uses {@link ArabicLetterTokenizer} which implies tokenizing around
-     * zero-width non-joiner in addition to whitespace. Some persian-specific variant forms (such as farsi
-     * yeh and keheh) are standardized. "Stemming" is accomplished via stopwords.
-     * </p>
-     */
-    public sealed class PersianAnalyzer : Analyzer
-    {
-
-        /*
-         * File containing default Persian stopwords.
-         * 
-         * Default stopword list is from
-         * http://members.unine.ch/jacques.savoy/clef/index.html The stopword list is
-         * BSD-Licensed.
-         * 
-         */
-        public readonly static String DEFAULT_STOPWORD_FILE = "stopwords.txt";
-
-        /*
-         * Contains the stopwords used with the StopFilter.
-         */
-        private readonly ISet<string> stoptable;
-
-        /*
-         * The comment character in the stopwords file. All lines prefixed with this
-         * will be ignored
-         */
-        public static readonly String STOPWORDS_COMMENT = "#";
-
-        /*
-         * Returns an unmodifiable instance of the default stop-words set.
-         * @return an unmodifiable instance of the default stop-words set.
-         */
-        public static ISet<string> getDefaultStopSet()
-        {
-            return DefaultSetHolder.DEFAULT_STOP_SET;
-        }
-
-        /*
-         * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
-         * accesses the static final set the first time.;
-         */
-        private static class DefaultSetHolder
-        {
-            internal static readonly ISet<string> DEFAULT_STOP_SET;
-
-            static DefaultSetHolder()
-            {
-                try
-                {
-                    DEFAULT_STOP_SET = LoadDefaultStopWordSet();
-                }
-                catch (IOException ex)
-                {
-                    // default set should always be present as it is part of the
-                    // distribution (JAR)
-                    throw new Exception("Unable to load default stopword set");
-                }
-            }
-
-            static ISet<String> LoadDefaultStopWordSet()
-            {
-
-                var stream = System.Reflection.Assembly.GetAssembly(typeof(PersianAnalyzer)).GetManifestResourceStream("Lucene.Net.Analyzers.Fa." + DEFAULT_STOPWORD_FILE);
-                try
-                {
-                    StreamReader reader = new StreamReader(stream, System.Text.Encoding.UTF8);
-                    // make sure it is unmodifiable as we expose it in the outer class
-                    return CharArraySet.UnmodifiableSet(new CharArraySet(WordlistLoader.GetWordSet(reader, STOPWORDS_COMMENT), true));
-                }
-                finally
-                {
-                    stream.Close();
-                }
-            }
-        }
-
-        private readonly Version matchVersion;
-
-        /*
-         * Builds an analyzer with the default stop words:
-         * {@link #DEFAULT_STOPWORD_FILE}.
-         */
-        public PersianAnalyzer(Version matchVersion)
-            : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
-        {
-
-        }
-
-        /*
-         * Builds an analyzer with the given stop words 
-         * 
-         * @param matchVersion
-         *          lucene compatibility version
-         * @param stopwords
-         *          a stopword set
-         */
-        public PersianAnalyzer(Version matchVersion, ISet<string> stopwords)
-        {
-            stoptable = CharArraySet.UnmodifiableSet(CharArraySet.Copy(stopwords));
-            this.matchVersion = matchVersion;
-        }
-
-        /*
-         * Builds an analyzer with the given stop words.
-         * @deprecated use {@link #PersianAnalyzer(Version, Set)} instead
-         */
-        public PersianAnalyzer(Version matchVersion, params string[] stopwords)
-            : this(matchVersion, StopFilter.MakeStopSet(stopwords))
-        {
-
-        }
-
-        /*
-         * Builds an analyzer with the given stop words.
-         * @deprecated use {@link #PersianAnalyzer(Version, Set)} instead
-         */
-        public PersianAnalyzer(Version matchVersion, IDictionary<string, string> stopwords)
-            : this(matchVersion, stopwords.Keys.ToArray())
-        {
-
-        }
-
-        /*
-         * Builds an analyzer with the given stop words. Lines can be commented out
-         * using {@link #STOPWORDS_COMMENT}
-         * @deprecated use {@link #PersianAnalyzer(Version, Set)} instead
-         */
-        public PersianAnalyzer(Version matchVersion, FileInfo stopwords)
-            : this(matchVersion, WordlistLoader.GetWordSet(stopwords, STOPWORDS_COMMENT))
-        {
-
-        }
-
-        /*
-         * Creates a {@link TokenStream} which tokenizes all the text in the provided
-         * {@link Reader}.
-         * 
-         * @return A {@link TokenStream} built from a {@link ArabicLetterTokenizer}
-         *         filtered with {@link LowerCaseFilter}, 
-         *         {@link ArabicNormalizationFilter},
-         *         {@link PersianNormalizationFilter} and Persian Stop words
-         */
-        public override TokenStream TokenStream(String fieldName, TextReader reader)
-        {
-            TokenStream result = new ArabicLetterTokenizer(reader);
-            result = new LowerCaseFilter(result);
-            result = new ArabicNormalizationFilter(result);
-            /* additional persian-specific normalization */
-            result = new PersianNormalizationFilter(result);
-            /*
-             * the order here is important: the stopword list is normalized with the
-             * above!
-             */
-            result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
-                                    result, stoptable);
-            return result;
-        }
-
-        private class SavedStreams
-        {
-            protected internal Tokenizer source;
-            protected internal TokenStream result;
-        }
-
-        /*
-         * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text 
-         * in the provided {@link Reader}.
-         * 
-         * @return A {@link TokenStream} built from a {@link ArabicLetterTokenizer}
-         *         filtered with {@link LowerCaseFilter}, 
-         *         {@link ArabicNormalizationFilter},
-         *         {@link PersianNormalizationFilter} and Persian Stop words
-         */
-        public override TokenStream ReusableTokenStream(String fieldName, TextReader reader)
-        {
-            SavedStreams streams = (SavedStreams)PreviousTokenStream;
-            if (streams == null)
-            {
-                streams = new SavedStreams();
-                streams.source = new ArabicLetterTokenizer(reader);
-                streams.result = new LowerCaseFilter(streams.source);
-                streams.result = new ArabicNormalizationFilter(streams.result);
-                /* additional persian-specific normalization */
-                streams.result = new PersianNormalizationFilter(streams.result);
-                /*
-                 * the order here is important: the stopword list is normalized with the
-                 * above!
-                 */
-                streams.result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
-                                                streams.result, stoptable);
-                PreviousTokenStream = streams;
-            }
-            else
-            {
-                streams.source.Reset(reader);
-            }
-            return streams.result;
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/Fa/PersianNormalizationFilter.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Fa/PersianNormalizationFilter.cs b/src/contrib/Analyzers/Fa/PersianNormalizationFilter.cs
deleted file mode 100644
index 841e907..0000000
--- a/src/contrib/Analyzers/Fa/PersianNormalizationFilter.cs
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
-*/
-
-using Lucene.Net.Analysis.Tokenattributes;
-
-namespace Lucene.Net.Analysis.Fa
-{
-    /*
- * A {@link TokenFilter} that applies {@link PersianNormalizer} to normalize the
- * orthography.
- * 
- */
-
-public sealed class PersianNormalizationFilter : TokenFilter {
-
-  private readonly PersianNormalizer normalizer;
-  private readonly ITermAttribute termAtt;
-
-  public PersianNormalizationFilter(TokenStream input) 
-      :base(input)
-  {
-    normalizer = new PersianNormalizer();
-    termAtt = AddAttribute<ITermAttribute>();
-  }
-
-  public override bool IncrementToken()
-{
-    if (input.IncrementToken()) {
-      int newlen = normalizer.Normalize(termAtt.TermBuffer(), termAtt.TermLength());
-      termAtt.SetTermLength(newlen);
-      return true;
-    } 
-    return false;
-  }
-}
-}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/Fa/PersianNormalizer.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Fa/PersianNormalizer.cs b/src/contrib/Analyzers/Fa/PersianNormalizer.cs
deleted file mode 100644
index 4e0bbb1..0000000
--- a/src/contrib/Analyzers/Fa/PersianNormalizer.cs
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
-*/
-
-using System;
-
-namespace Lucene.Net.Analysis.Fa
-{
-/*
- * Normalizer for Persian.
- * <p>
- * Normalization is done in-place for efficiency, operating on a termbuffer.
- * <p>
- * Normalization is defined as:
- * <ul>
- * <li>Normalization of various heh + hamza forms and heh goal to heh.
- * <li>Normalization of farsi yeh and yeh barree to arabic yeh
- * <li>Normalization of persian keheh to arabic kaf
- * </ul>
- * 
- */
-public class PersianNormalizer {
-  public const char YEH = '\u064A';
-
-  public const char FARSI_YEH = '\u06CC';
-
-  public const char YEH_BARREE = '\u06D2';
-
-  public const char KEHEH = '\u06A9';
-
-  public const char KAF = '\u0643';
-
-  public const char HAMZA_ABOVE = '\u0654';
-
-  public const char HEH_YEH = '\u06C0';
-
-  public const char HEH_GOAL = '\u06C1';
-
-  public const char HEH = '\u0647';
-
-  /*
-   * Normalize an input buffer of Persian text
-   * 
-   * @param s input buffer
-   * @param len length of input buffer
-   * @return length of input buffer after normalization
-   */
-  public int Normalize(char[] s, int len) {
-
-    for (int i = 0; i < len; i++) {
-      switch (s[i]) {
-      case FARSI_YEH:
-      case YEH_BARREE:
-        s[i] = YEH;
-        break;
-      case KEHEH:
-        s[i] = KAF;
-        break;
-      case HEH_YEH:
-      case HEH_GOAL:
-        s[i] = HEH;
-        break;
-      case HAMZA_ABOVE: // necessary for HEH + HAMZA
-        len = Delete(s, i, len);
-        i--;
-        break;
-      default:
-        break;
-      }
-    }
-
-    return len;
-  }
-
-  /*
-   * Delete a character in-place
-   * 
-   * @param s Input Buffer
-   * @param pos Position of character to delete
-   * @param len length of input buffer
-   * @return length of input buffer after deletion
-   */
-  protected int Delete(char[] s, int pos, int len) {
-    if (pos < len)
-      Array.Copy(s, pos + 1, s, pos, len - pos - 1);
-    
-    return len - 1;
-  }
-
-}
-}


Mime
View raw message