lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nightowl...@apache.org
Subject [13/15] lucenenet git commit: Added Lucene.Net.Analysis.Phonetic + tests. Rather than porting over the entire commons-codec library, only the language features were ported and added to this library.
Date Tue, 27 Jun 2017 20:33:58 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/PhoneticEngine.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/PhoneticEngine.cs b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/PhoneticEngine.cs
new file mode 100644
index 0000000..3cf5c7a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/PhoneticEngine.cs
@@ -0,0 +1,578 @@
+// commons-codec version compatibility level: 1.9
+using Lucene.Net.Support;
+using System;
+using System.Collections.Generic;
+using System.Globalization;
+using System.Linq;
+using System.Text;
+using System.Text.RegularExpressions;
+
+namespace Lucene.Net.Analysis.Phonetic.Language.Bm
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Converts words into potential phonetic representations.
+    /// </summary>
+    /// <remarks>
+    /// This is a two-stage process. Firstly, the word is converted into a phonetic representation that takes
+    /// into account the likely source language. Next, this phonetic representation is converted into a
+    /// pan-European 'average' representation, allowing comparison between different versions of essentially
+    /// the same word from different languages.
+    /// <para/>
+    /// This class is intentionally immutable and thread-safe.
+    /// If you wish to alter the settings for a PhoneticEngine, you
+    /// must make a new one with the updated settings.
+    /// <para/>
+    /// Ported from phoneticengine.php
+    /// <para/>
+    /// since 1.6
+    /// </remarks>
+    public class PhoneticEngine
+    {
+        internal Regex WHITESPACE = new Regex("\\s+", RegexOptions.Compiled);
+
+        /// <summary>
+        /// Utility for manipulating a set of phonemes as they are being built up. Not intended for use outside
+        /// this package, and probably not outside the <see cref="PhoneticEngine"/> class.
+        /// <para/>
+        /// since 1.6
+        /// </summary>
+        internal sealed class PhonemeBuilder
+        {
+            /// <summary>
+            /// An empty builder where all phonemes must come from some set of languages. This will contain a single
+            /// phoneme of zero characters. This can then be appended to. This should be the only way to create a new
+            /// phoneme from scratch.
+            /// </summary>
+            /// <param name="languages">The set of languages.</param>
+            /// <returns>A new, empty phoneme builder.</returns>
+            public static PhonemeBuilder Empty(LanguageSet languages)
+            {
+                return new PhonemeBuilder(new Phoneme("", languages));
+            }
+
+            private readonly IList<Phoneme> phonemes;
+
+            private PhonemeBuilder(Phoneme phoneme)
+            {
+                // LUCENENET NOTE: LinkedHashSet cares about insertion order - in .NET, we can just use List<T> for that
+                this.phonemes = new List<Phoneme>();
+                this.phonemes.Add(phoneme);
+            }
+
+            internal PhonemeBuilder(IList<Phoneme> phonemes)
+            {
+                this.phonemes = phonemes;
+            }
+
+            /// <summary>
+            /// Creates a new phoneme builder containing all phonemes in this one extended by <paramref name="str"/>.
+            /// </summary>
+            /// <param name="str">The characters to append to the phonemes.</param>
+            public void Append(ICharSequence str)
+            {
+                foreach (Phoneme ph in this.phonemes)
+                {
+                    ph.Append(str.ToString());
+                }
+            }
+
+            /// <summary>
+            /// Creates a new phoneme builder containing all phonemes in this one extended by <paramref name="str"/>.
+            /// </summary>
+            /// <param name="str">The characters to append to the phonemes.</param>
+            // LUCENENET specific
+            public void Append(string str)
+            {
+                foreach (Phoneme ph in this.phonemes)
+                {
+                    ph.Append(str);
+                }
+            }
+
+            /// <summary>
+            /// Creates a new phoneme builder containing all phonemes in this one extended by <paramref name="str"/>.
+            /// </summary>
+            /// <param name="str">The characters to append to the phonemes.</param>
+            // LUCENENET specific
+            public void Append(StringBuilder str)
+            {
+                foreach (Phoneme ph in this.phonemes)
+                {
+                    ph.Append(str.ToString());
+                }
+            }
+
+            /// <summary>
+            /// Applies the given phoneme expression to all phonemes in this phoneme builder.
+            /// <para/>
+            /// This will lengthen phonemes that have compatible language sets to the expression, and drop those that are
+            /// incompatible.
+            /// </summary>
+            /// <param name="phonemeExpr">The expression to apply.</param>
+            /// <param name="maxPhonemes">The maximum number of phonemes to build up.</param>
+            public void Apply(IPhonemeExpr phonemeExpr, int maxPhonemes)
+            {
+                // LUCENENET NOTE: LinkedHashSet cares about insertion order - in .NET, we can just use List<T> for that
+                IList<Phoneme> newPhonemes = new List<Phoneme>(maxPhonemes);
+
+                //EXPR_continue:
+                foreach (Phoneme left in this.phonemes)
+                {
+                    foreach (Phoneme right in phonemeExpr.Phonemes)
+                    {
+                        LanguageSet languages = left.Languages.RestrictTo(right.Languages);
+                        if (!languages.IsEmpty)
+                        {
+                            Phoneme join = new Phoneme(left, right, languages);
+                            if (newPhonemes.Count < maxPhonemes)
+                            {
+                                newPhonemes.Add(join);
+                                if (newPhonemes.Count >= maxPhonemes)
+                                {
+                                    goto EXPR_break;
+                                }
+                            }
+                        }
+                    }
+                }
+                EXPR_break: { }
+
+                this.phonemes.Clear();
+                // LUCENENET: We need to filter out any duplicates, since we converted from LinkedHashSet
+                // to List.
+                this.phonemes.AddRange(newPhonemes.Where(x => !phonemes.Any(y => y.Equals(x))));
+            }
+
+            /// <summary>
+            /// Gets underlying phoneme set. Please don't mutate.
+            /// </summary>
+            public IList<Phoneme> Phonemes
+            {
+                get { return this.phonemes; }
+            }
+
+            /// <summary>
+            /// Stringifies the phoneme set. This produces a single string of the strings of each phoneme,
+            /// joined with a pipe. This is explicitly provided in place of <see cref="object.ToString()"/> as it is a potentially
+            /// expensive operation, which should be avoided when debugging.
+            /// </summary>
+            /// <returns>The stringified phoneme set.</returns>
+            public string MakeString()
+            {
+                StringBuilder sb = new StringBuilder();
+
+                foreach (Phoneme ph in this.phonemes)
+                {
+                    if (sb.Length > 0)
+                    {
+                        sb.Append("|");
+                    }
+                    sb.Append(ph.GetPhonemeText());
+                }
+
+                return sb.ToString();
+            }
+        }
+
+        /// <summary>
+        /// A function closure capturing the application of a list of rules to an input sequence at a particular offset.
+        /// After invocation, the values <c>i</c> and <c>found</c> are updated. <c>i</c> points to the
+        /// index of the next char in <c>input</c> that must be processed next (the input up to that index having been
+        /// processed already), and <c>found</c> indicates if a matching rule was found or not. In the case where a
+        /// matching rule was found, <c>phonemeBuilder</c> is replaced with a new builder containing the phonemes
+        /// updated by the matching rule.
+        /// <para/>
+        /// Although this class is not thread-safe (it has mutable unprotected fields), it is not shared between threads
+        /// as it is constructed as needed by the calling methods.
+        /// <para/>
+        /// since 1.6
+        /// </summary>
+        private sealed class RulesApplication
+        {
+            private readonly IDictionary<string, IList<Rule>> finalRules;
+            private readonly string input;
+
+            private PhonemeBuilder phonemeBuilder;
+            private int i;
+            private readonly int maxPhonemes;
+            private bool found;
+
+            public RulesApplication(IDictionary<string, IList<Rule>> finalRules, string input,
+                                    PhonemeBuilder phonemeBuilder, int i, int maxPhonemes)
+            {
+                if (finalRules == null)
+                {
+                    throw new ArgumentNullException("The finalRules argument must not be null");
+                }
+                this.finalRules = finalRules;
+                this.phonemeBuilder = phonemeBuilder;
+                this.input = input;
+                this.i = i;
+                this.maxPhonemes = maxPhonemes;
+            }
+
+            public int I
+            {
+                get { return this.i; }
+            }
+
+            public PhonemeBuilder PhonemeBuilder
+            {
+                get { return this.phonemeBuilder; }
+            }
+
+            /// <summary>
+            /// Invokes the rules. Loops over the rules list, stopping at the first one that has a matching context
+            /// and pattern. Then applies this rule to the phoneme builder to produce updated phonemes. If there was no
+            /// match, <c>i</c> is advanced one and the character is silently dropped from the phonetic spelling.
+            /// </summary>
+            /// <returns><c>this</c></returns>
+            public RulesApplication Invoke()
+            {
+                this.found = false;
+                int patternLength = 1;
+                IList<Rule> rules;
+                if (this.finalRules.TryGetValue(input.Substring(i, patternLength), out rules) && rules != null)
+                {
+                    foreach (Rule rule in rules)
+                    {
+                        string pattern = rule.Pattern;
+                        patternLength = pattern.Length;
+                        if (rule.PatternAndContextMatches(this.input, this.i))
+                        {
+                            this.phonemeBuilder.Apply(rule.Phoneme, maxPhonemes);
+                            this.found = true;
+                            break;
+                        }
+                    }
+                }
+
+                if (!this.found)
+                {
+                    patternLength = 1;
+                }
+
+                this.i += patternLength;
+                return this;
+            }
+
+            public bool IsFound
+            {
+                get { return this.found; }
+            }
+        }
+
+        private static readonly IDictionary<NameType, ISet<string>> NAME_PREFIXES = new Dictionary<NameType, ISet<string>>();
+
+        static PhoneticEngine()
+        {
+            NAME_PREFIXES[NameType.ASHKENAZI] =
+                    Collections.UnmodifiableSet(
+                            new HashSet<string>() { "bar", "ben", "da", "de", "van", "von" });
+            NAME_PREFIXES[NameType.SEPHARDIC] =
+                    Collections.UnmodifiableSet(
+                            new HashSet<string>() { "al", "el", "da", "dal", "de", "del", "dela", "de la",
+                                                              "della", "des", "di", "do", "dos", "du", "van", "von" });
+            NAME_PREFIXES[NameType.GENERIC] =
+                    Collections.UnmodifiableSet(
+                            new HashSet<string>() { "da", "dal", "de", "del", "dela", "de la", "della",
+                                                          "des", "di", "do", "dos", "du", "van", "von" });
+        }
+
+        /// <summary>
+        /// Joins some strings with an internal separator.
+        /// </summary>
+        /// <param name="strings">Strings to join.</param>
+        /// <param name="sep">String to separate them with.</param>
+        /// <returns>A single string consisting of each element of <paramref name="strings"/> interleaved by <paramref name="sep"/>.</returns>
+        private static string Join(IEnumerable<string> strings, string sep)
+        {
+            StringBuilder sb = new StringBuilder();
+            using (IEnumerator<string> si = strings.GetEnumerator())
+            {
+                if (si.MoveNext())
+                {
+                    sb.Append(si.Current);
+                }
+                while (si.MoveNext())
+                {
+                    sb.Append(sep).Append(si.Current);
+                }
+            }
+
+            return sb.ToString();
+        }
+
+        private static readonly int DEFAULT_MAX_PHONEMES = 20;
+
+        private readonly Lang lang;
+
+        private readonly NameType nameType;
+
+        private readonly RuleType ruleType;
+
+        private readonly bool concat;
+
+        private readonly int maxPhonemes;
+
+        /// <summary>
+        /// Generates a new, fully-configured phonetic engine.
+        /// </summary>
+        /// <param name="nameType">The type of names it will use.</param>
+        /// <param name="ruleType">The type of rules it will apply.</param>
+        /// <param name="concat">If it will concatenate multiple encodings.</param>
+        public PhoneticEngine(NameType nameType, RuleType ruleType, bool concat)
+            : this(nameType, ruleType, concat, DEFAULT_MAX_PHONEMES)
+        {
+        }
+
+        /// <summary>
+        /// Generates a new, fully-configured phonetic engine.
+        /// <para/>
+        /// since 1.7
+        /// </summary>
+        /// <param name="nameType">The type of names it will use.</param>
+        /// <param name="ruleType">The type of rules it will apply.</param>
+        /// <param name="concat">If it will concatenate multiple encodings.</param>
+        /// <param name="maxPhonemes">The maximum number of phonemes that will be handled.</param>
+        public PhoneticEngine(NameType nameType, RuleType ruleType, bool concat,
+                              int maxPhonemes)
+        {
+            if (ruleType == RuleType.RULES)
+            {
+                throw new ArgumentException("ruleType must not be " + RuleType.RULES);
+            }
+            this.nameType = nameType;
+            this.ruleType = ruleType;
+            this.concat = concat;
+            this.lang = Lang.GetInstance(nameType);
+            this.maxPhonemes = maxPhonemes;
+        }
+
+        /// <summary>
+        /// Applies the final rules to convert from a language-specific phonetic representation to a
+        /// language-independent representation.
+        /// </summary>
+        /// <param name="phonemeBuilder">The current phonemes.</param>
+        /// <param name="finalRules">The final rules to apply.</param>
+        /// <returns>The resulting phonemes.</returns>
+        private PhonemeBuilder ApplyFinalRules(PhonemeBuilder phonemeBuilder,
+                                               IDictionary<string, IList<Rule>> finalRules)
+        {
+            if (finalRules == null)
+            {
+                throw new ArgumentNullException("finalRules can not be null");
+            }
+            if (finalRules.Count == 0)
+            {
+                return phonemeBuilder;
+            }
+
+            ISet<Phoneme> phonemes = new SortedSet<Phoneme>(Phoneme.COMPARER);
+
+            foreach (Phoneme phoneme in phonemeBuilder.Phonemes)
+            {
+                PhonemeBuilder subBuilder = PhonemeBuilder.Empty(phoneme.Languages);
+                string phonemeText = phoneme.GetPhonemeText();
+
+                for (int i = 0; i < phonemeText.Length;)
+                {
+                    RulesApplication rulesApplication =
+                            new RulesApplication(finalRules, phonemeText, subBuilder, i, maxPhonemes).Invoke();
+                    bool found = rulesApplication.IsFound;
+                    subBuilder = rulesApplication.PhonemeBuilder;
+
+                    if (!found)
+                    {
+                        // not found, appending as-is
+                        subBuilder.Append(phonemeText.Substring(i, 1));
+                    }
+
+                    i = rulesApplication.I;
+                }
+
+                phonemes.UnionWith(subBuilder.Phonemes);
+            }
+
+            return new PhonemeBuilder(phonemes.ToList());
+        }
+
+        /// <summary>
+        /// Encodes a string to its phonetic representation.
+        /// </summary>
+        /// <param name="input">The string to encode.</param>
+        /// <returns>The encoding of the input.</returns>
+        public virtual string Encode(string input)
+        {
+            LanguageSet languageSet = this.lang.GuessLanguages(input);
+            return Encode(input, languageSet);
+        }
+
+        /// <summary>
+        /// Encodes an input string into an output phonetic representation, given a set of possible origin languages.
+        /// </summary>
+        /// <param name="input">String to phoneticise; a string with dashes or spaces separating each word.</param>
+        /// <param name="languageSet"></param>
+        /// <returns>A phonetic representation of the input; a string containing '-'-separated phonetic representations of the input.</returns>
+        public virtual string Encode(string input, LanguageSet languageSet)
+        {
+            IDictionary<string, IList<Rule>> rules = Rule.GetInstanceMap(this.nameType, RuleType.RULES, languageSet);
+            // rules common across many (all) languages
+            IDictionary<string, IList<Rule>> finalRules1 = Rule.GetInstanceMap(this.nameType, this.ruleType, "common");
+            // rules that apply to a specific language that may be ambiguous or wrong if applied to other languages
+            IDictionary<string, IList<Rule>> finalRules2 = Rule.GetInstanceMap(this.nameType, this.ruleType, languageSet);
+
+            // tidy the input
+            // lower case is a locale-dependent operation
+            input = input.ToLowerInvariant().Replace('-', ' ').Trim();
+
+            if (this.nameType == NameType.GENERIC)
+            {
+                if (input.Length >= 2 && input.Substring(0, 2 - 0).Equals("d'"))
+                { // check for d'
+                    string remainder = input.Substring(2);
+                    string combined = "d" + remainder;
+                    return "(" + Encode(remainder) + ")-(" + Encode(combined) + ")";
+                }
+                foreach (string l in NAME_PREFIXES[this.nameType])
+                {
+                    // handle generic prefixes
+                    if (input.StartsWith(l + " ", StringComparison.Ordinal))
+                    {
+                        // check for any prefix in the words list
+                        string remainder = input.Substring(l.Length + 1); // input without the prefix
+                        string combined = l + remainder; // input with prefix without space
+                        return "(" + Encode(remainder) + ")-(" + Encode(combined) + ")";
+                    }
+                }
+            }
+
+            IList<string> words = WHITESPACE.Split(input).ToList();
+            IList<string> words2 = new List<string>();
+
+            // special-case handling of word prefixes based upon the name type
+            switch (this.nameType)
+            {
+                case NameType.SEPHARDIC:
+                    foreach (string aWord in words)
+                    {
+                        string[] parts = aWord.Split(new char[] { '\'' }, StringSplitOptions.RemoveEmptyEntries);
+                        string lastPart = parts[parts.Length - 1];
+                        words2.Add(lastPart);
+                    }
+                    words2.RemoveAll(NAME_PREFIXES[this.nameType]);
+                    break;
+                case NameType.ASHKENAZI:
+                    words2.AddRange(words);
+                    words2.RemoveAll(NAME_PREFIXES[this.nameType]);
+                    break;
+                case NameType.GENERIC:
+                    words2.AddRange(words);
+                    break;
+                default:
+                    throw new InvalidOperationException("Unreachable case: " + this.nameType);
+            }
+
+            if (this.concat)
+            {
+                // concat mode enabled
+                input = Join(words2, " ");
+            }
+            else if (words2.Count == 1)
+            {
+                // not a multi-word name
+                //input = words.iterator().next();
+                input = words.FirstOrDefault();
+            }
+            else
+            {
+                // encode each word in a multi-word name separately (normally used for approx matches)
+                StringBuilder result = new StringBuilder();
+                foreach (string word in words2)
+                {
+                    result.Append("-").Append(Encode(word));
+                }
+                // return the result without the leading "-"
+                return result.ToString(1, result.Length - 1);
+            }
+
+            PhonemeBuilder phonemeBuilder = PhonemeBuilder.Empty(languageSet);
+
+            // loop over each char in the input - we will handle the increment manually
+            for (int i = 0; i < input.Length;)
+            {
+                RulesApplication rulesApplication =
+                        new RulesApplication(rules, input, phonemeBuilder, i, maxPhonemes).Invoke();
+                i = rulesApplication.I;
+                phonemeBuilder = rulesApplication.PhonemeBuilder;
+            }
+
+            // Apply the general rules
+            phonemeBuilder = ApplyFinalRules(phonemeBuilder, finalRules1);
+            // Apply the language-specific rules
+            phonemeBuilder = ApplyFinalRules(phonemeBuilder, finalRules2);
+
+            return phonemeBuilder.MakeString();
+        }
+
+        /// <summary>
+        /// Gets the Lang language guessing rules being used.
+        /// </summary>
+        public virtual Lang Lang
+        {
+            get { return this.lang; }
+        }
+
+        /// <summary>
+        /// Gets the <see cref="Bm.NameType"/> being used.
+        /// </summary>
+        public virtual NameType NameType
+        {
+            get { return this.nameType; }
+        }
+
+        /// <summary>
+        /// Gets the <see cref="Bm.RuleType"/> being used.
+        /// </summary>
+        public virtual RuleType RuleType
+        {
+            get { return this.ruleType; }
+        }
+
+        /// <summary>
+        /// Gets if multiple phonetic encodings are concatenated or if just the first one is kept.
+        /// Returns <c>true</c> if multiple phonetic encodings are returned, <c>false</c> if just the first is.
+        /// </summary>
+        public virtual bool IsConcat
+        {
+            get { return this.concat; }
+        }
+
+        /// <summary>
+        /// Gets the maximum number of phonemes the engine will calculate for a given input.
+        /// <para/>
+        /// since 1.7
+        /// </summary>
+        public virtual int MaxPhonemes
+        {
+            get { return this.maxPhonemes; }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ResourceConstants.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ResourceConstants.cs b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ResourceConstants.cs
new file mode 100644
index 0000000..c70d404
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ResourceConstants.cs
@@ -0,0 +1,37 @@
+// commons-codec version compatibility level: 1.9
+using System.Text;
+
+namespace Lucene.Net.Analysis.Phonetic.Language.Bm
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Constants used to process resource files.
+    /// <para/>
+    /// This class is immutable and thread-safe.
+    /// <para/>
+    /// since 1.6
+    /// </summary>
+    internal class ResourceConstants
+    {
+        public static readonly string CMT = "//";
+        public static readonly Encoding ENCODING = Encoding.UTF8;
+        public static readonly string EXT_CMT_END = "*/";
+        public static readonly string EXT_CMT_START = "/*";
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/Rule.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/Rule.cs b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/Rule.cs
new file mode 100644
index 0000000..52f3d9a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/Rule.cs
@@ -0,0 +1,1069 @@
+// commons-codec version compatibility level: 1.9
+using Lucene.Net.Support;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Reflection;
+using System.Text;
+using System.Text.RegularExpressions;
+
+namespace Lucene.Net.Analysis.Phonetic.Language.Bm
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// A phoneme rule.
+    /// </summary>
+    /// <remarks>
+    /// Rules have a pattern, left context, right context, output phoneme, set of languages for which they apply
+    /// and a logical flag indicating if all languages must be in play. A rule matches if:
+    /// <list type="bullet">
+    ///     <item><description>the pattern matches at the current position</description></item>
+    ///     <item><description>the string up until the beginning of the pattern matches the left context</description></item>
+    ///     <item><description>the string from the end of the pattern matches the right context</description></item>
+    ///     <item><description>logical is ALL and all languages are in scope; or</description></item>
+    ///     <item><description>logical is any other value and at least one language is in scope</description></item>
+    /// </list>
+    /// <para/>
+    /// Rules are typically generated by parsing rules resources. In normal use, there will be no need for the user
+    /// to explicitly construct their own.
+    /// <para/>
+    /// Rules are immutable and thread-safe.
+    /// <para/>
+    /// <b>Rules resources</b>
+    /// <para/>
+    /// Rules are typically loaded from resource files. These are UTF-8 encoded text files. They are systematically
+    /// named following the pattern:
+    /// <c>Lucene.Net.Analysis.Phonetic.Language.Bm.<see cref="NameType"/>_<see cref="RuleType"/>_[language].txt</c>
+    /// <para/>
+    /// The format of these resources is the following:
+    /// <list type="table">
+    ///     <item>
+    ///         <term>Rules:</term>
+    ///         <description>
+    ///             whitespace separated, double-quoted strings. There should be 4 columns to each row, and these
+    ///             will be interpreted as:
+    ///             <list type="number">
+    ///                 <item><description>pattern</description></item>
+    ///                 <item><description>left context</description></item>
+    ///                 <item><description>right context</description></item>
+    ///                 <item><description>phoneme</description></item>
+    ///             </list>
+    ///         </description>
+    ///     </item>
+    ///     <item>
+    ///         <term>End-of-line comments:</term>
+    ///         <description>Any occurrence of '//' will cause all text following on that line to be discarded as a comment.</description>
+    ///     </item>
+    ///     <item>
+    ///         <term>Multi-line comments:</term>
+    ///         <description>Any line starting with '/*' will start multi-line commenting mode. This will skip all content until a line ending in '*' and '/' is found.</description>
+    ///     </item>
+    ///     <item>
+    ///         <term>Blank lines:</term>
+    ///         <description>All blank lines will be skipped.</description>
+    ///     </item>
+    /// </list>
+    /// <para/>
+    /// since 1.6
+    /// </remarks>
+    public class Rule
+    {
+        private static Regex PIPE = new Regex("[|]", RegexOptions.Compiled);
+        private static Regex WHITESPACE = new Regex("\\s+", RegexOptions.Compiled);
+        private static Regex PLUS = new Regex("[+]", RegexOptions.Compiled);
+
+        private class AllStringsRMatcher : IRPattern
+        {
+            public bool IsMatch(StringBuilder input)
+            {
+                return true;
+            }
+
+            public bool IsMatch(string input)
+            {
+                return true;
+            }
+
+            public bool IsMatch(ICharSequence input)
+            {
+                return true;
+            }
+        }
+
+        public static readonly IRPattern ALL_STRINGS_RMATCHER = new AllStringsRMatcher();
+
+
+        public static readonly string ALL = "ALL";
+
+        private static readonly string DOUBLE_QUOTE = "\"";
+
+        private static readonly string HASH_INCLUDE = "#include";
+
+        private static readonly IDictionary<NameType, IDictionary<RuleType, IDictionary<string, IDictionary<string, IList<Rule>>>>> RULES =
+                new Dictionary<NameType, IDictionary<RuleType, IDictionary<string, IDictionary<string, IList<Rule>>>>>();
+
+        static Rule()
+        {
+            foreach (NameType s in Enum.GetValues(typeof(NameType)))
+            {
+                IDictionary<RuleType, IDictionary<string, IDictionary<string, IList<Rule>>>> rts =
+                        new Dictionary<RuleType, IDictionary<string, IDictionary<string, IList<Rule>>>>();
+
+                foreach (RuleType rt in Enum.GetValues(typeof(RuleType)))
+                {
+                    IDictionary<string, IDictionary<string, IList<Rule>>> rs = new Dictionary<string, IDictionary<string, IList<Rule>>>();
+
+                    Languages ls = Languages.GetInstance(s);
+                    foreach (string l in ls.GetLanguages())
+                    {
+                        try
+                        {
+                            rs[l] = ParseRules(CreateScanner(s, rt, l), CreateResourceName(s, rt, l));
+                        }
+                        catch (InvalidOperationException e)
+                        {
+                            throw new InvalidOperationException("Problem processing " + CreateResourceName(s, rt, l), e);
+                        }
+                    }
+                    if (!rt.Equals(RuleType.RULES))
+                    {
+                        rs["common"] = ParseRules(CreateScanner(s, rt, "common"), CreateResourceName(s, rt, "common"));
+                    }
+
+                    rts[rt] = Collections.UnmodifiableMap(rs);
+                }
+
+                RULES[s] = Collections.UnmodifiableMap(rts);
+            }
+        }
+
+        private static bool Contains(ICharSequence chars, char input)
+        {
+            for (int i = 0; i < chars.Length; i++)
+            {
+                if (chars[i] == input)
+                {
+                    return true;
+                }
+            }
+            return false;
+        }
+        private static bool Contains(string chars, char input)
+        {
+            for (int i = 0; i < chars.Length; i++)
+            {
+                if (chars[i] == input)
+                {
+                    return true;
+                }
+            }
+            return false;
+        }
+        private static bool Contains(StringBuilder chars, char input)
+        {
+            for (int i = 0; i < chars.Length; i++)
+            {
+                if (chars[i] == input)
+                {
+                    return true;
+                }
+            }
+            return false;
+        }
+
+        private static string CreateResourceName(NameType nameType, RuleType rt, string lang)
+        {
+            return string.Format("{0}_{1}_{2}.txt",
+                                 nameType.GetName(), rt.GetName(), lang);
+        }
+
+        private static TextReader CreateScanner(NameType nameType, RuleType rt, string lang)
+        {
+            string resName = CreateResourceName(nameType, rt, lang);
+            Stream rulesIS = typeof(Languages).GetTypeInfo().Assembly.FindAndGetManifestResourceStream(typeof(Languages), resName);
+
+            if (rulesIS == null)
+            {
+                throw new ArgumentException("Unable to load resource: " + resName);
+            }
+
+            return new StreamReader(rulesIS, ResourceConstants.ENCODING);
+        }
+
+        private static TextReader CreateScanner(string lang)
+        {
+            string resName = string.Format("{0}.txt", lang); 
+            Stream rulesIS = typeof(Languages).GetTypeInfo().Assembly.FindAndGetManifestResourceStream(typeof(Languages), resName);
+
+            if (rulesIS == null)
+            {
+                throw new ArgumentException("Unable to load resource: " + resName);
+            }
+
+            return new StreamReader(rulesIS, ResourceConstants.ENCODING);
+        }
+
+        private static bool EndsWith(ICharSequence input, string suffix)
+        {
+            if (suffix.Length > input.Length)
+            {
+                return false;
+            }
+            for (int i = input.Length - 1, j = suffix.Length - 1; j >= 0; i--, j--)
+            {
+                if (input[i] != suffix[j])
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        private static bool EndsWith(string input, string suffix)
+        {
+            if (suffix.Length > input.Length)
+            {
+                return false;
+            }
+            for (int i = input.Length - 1, j = suffix.Length - 1; j >= 0; i--, j--)
+            {
+                if (input[i] != suffix[j])
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        private static bool EndsWith(StringBuilder input, string suffix)
+        {
+            if (suffix.Length > input.Length)
+            {
+                return false;
+            }
+            for (int i = input.Length - 1, j = suffix.Length - 1; j >= 0; i--, j--)
+            {
+                if (input[i] != suffix[j])
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        /// <summary>
+        /// Gets rules for a combination of name type, rule type and languages.
+        /// </summary>
+        /// <param name="nameType">The <see cref="NameType"/> to consider.</param>
+        /// <param name="rt">The <see cref="RuleType"/> to consider.</param>
+        /// <param name="langs">The set of languages to consider.</param>
+        /// <returns>A list of <see cref="Rule"/>s that apply.</returns>
+        public static IList<Rule> GetInstance(NameType nameType, RuleType rt,
+                                     LanguageSet langs)
+        {
+            IDictionary<string, IList<Rule>> ruleMap = GetInstanceMap(nameType, rt, langs);
+            IList<Rule> allRules = new List<Rule>();
+            foreach (IList<Rule> rules in ruleMap.Values)
+            {
+                allRules.AddRange(rules);
+            }
+            return allRules;
+        }
+
+        /// <summary>
+        /// Gets rules for a combination of name type, rule type and a single language.
+        /// </summary>
+        /// <param name="nameType">The <see cref="NameType"/> to consider.</param>
+        /// <param name="rt">The <see cref="RuleType"/> to consider.</param>
+        /// <param name="lang">The language to consider.</param>
+        /// <returns>A list of <see cref="Rule"/>s that apply.</returns>
+        public static IList<Rule> GetInstance(NameType nameType, RuleType rt, string lang)
+        {
+            return GetInstance(nameType, rt, LanguageSet.From(new HashSet<string>() { lang }));
+        }
+
+        /// <summary>
+        /// Gets rules for a combination of name type, rule type and languages.
+        /// <para/>
+        /// since 1.9
+        /// </summary>
+        /// <param name="nameType">The <see cref="NameType"/> to consider.</param>
+        /// <param name="rt">The <see cref="RuleType"/> to consider.</param>
+        /// <param name="langs">The set of languages to consider.</param>
+        /// <returns>A map containing all <see cref="Rule"/>s that apply, grouped by the first character of the rule pattern.</returns>
+        public static IDictionary<string, IList<Rule>> GetInstanceMap(NameType nameType, RuleType rt,
+                                                             LanguageSet langs)
+        {
+            return langs.IsSingleton ? GetInstanceMap(nameType, rt, langs.GetAny()) :
+                                         GetInstanceMap(nameType, rt, Languages.ANY);
+        }
+
+        /// <summary>
+        /// Gets rules for a combination of name type, rule type and a single language.
+        /// <para/>
+        /// since 1.9
+        /// </summary>
+        /// <param name="nameType">The <see cref="NameType"/> to consider.</param>
+        /// <param name="rt">The <see cref="RuleType"/> to consider.</param>
+        /// <param name="lang">The language to consider.</param>
+        /// <returns>A map containing all <see cref="Rule"/>s that apply, grouped by the first character of the rule pattern.</returns>
+        public static IDictionary<string, IList<Rule>> GetInstanceMap(NameType nameType, RuleType rt,
+                                                             string lang)
+        {
+            IDictionary<RuleType, IDictionary<string, IDictionary<string, IList<Rule>>>> nameTypes;
+            IDictionary<string, IDictionary<string, IList<Rule>>> ruleTypes;
+            IDictionary<string, IList<Rule>> rules = null;
+
+            if (RULES.TryGetValue(nameType, out nameTypes) && nameTypes != null &&
+                nameTypes.TryGetValue(rt, out ruleTypes) && ruleTypes != null &&
+                ruleTypes.TryGetValue(lang, out rules) && rules != null)
+            {
+            }
+            else
+            {
+                throw new ArgumentException(string.Format("No rules found for {0}, {1}, {2}.",
+                                                   nameType.GetName(), rt.GetName(), lang));
+            }
+
+            return rules;
+        }
+
+        private static Phoneme ParsePhoneme(string ph)
+        {
+            int open = ph.IndexOf("[");
+            if (open >= 0)
+            {
+                if (!ph.EndsWith("]", StringComparison.Ordinal))
+                {
+                    throw new ArgumentException("Phoneme expression contains a '[' but does not end in ']'");
+                }
+                string before = ph.Substring(0, open - 0);
+                string input = ph.Substring(open + 1, (ph.Length - 1) - (open + 1));
+                ISet<string> langs = new HashSet<string>(PLUS.Split(input));
+
+                return new Phoneme(before, LanguageSet.From(langs));
+            }
+            else
+            {
+                return new Phoneme(ph, Languages.ANY_LANGUAGE);
+            }
+        }
+
+        private static IPhonemeExpr ParsePhonemeExpr(string ph)
+        {
+            if (ph.StartsWith("(", StringComparison.Ordinal))
+            { // we have a bracketed list of options
+                if (!ph.EndsWith(")", StringComparison.Ordinal))
+                {
+                    throw new ArgumentException("Phoneme starts with '(' so must end with ')'");
+                }
+
+                IList<Phoneme> phs = new List<Phoneme>();
+                string body = ph.Substring(1, (ph.Length - 1) - 1);
+                foreach (string part in PIPE.Split(body))
+                {
+                    phs.Add(ParsePhoneme(part));
+                }
+                if (body.StartsWith("|", StringComparison.Ordinal) || body.EndsWith("|", StringComparison.Ordinal))
+                {
+                    phs.Add(new Phoneme("", Languages.ANY_LANGUAGE));
+                }
+
+                return new PhonemeList(phs);
+            }
+            else
+            {
+                return ParsePhoneme(ph);
+            }
+        }
+
+        private class RuleAnonymousHelper : Rule
+        {
+            private readonly int myLine;
+            private readonly string loc;
+
+            public RuleAnonymousHelper(string pat, string lCon, string rCon, IPhonemeExpr ph, int cLine, string location)
+                : base(pat, lCon, rCon, ph)
+            {
+                this.myLine = cLine;
+                this.loc = location;
+            }
+
+            public override string ToString()
+            {
+                StringBuilder sb = new StringBuilder();
+                sb.Append("Rule");
+                sb.Append("{line=").Append(myLine);
+                sb.Append(", loc='").Append(loc).Append('\'');
+                sb.Append('}');
+                return sb.ToString();
+            }
+        }
+
+        private static IDictionary<string, IList<Rule>> ParseRules(TextReader reader, string location)
+        {
+            IDictionary<string, IList<Rule>> lines = new HashMap<string, IList<Rule>>();
+            int currentLine = 0;
+
+            bool inMultilineComment = false;
+            string rawLine;
+            try
+            {
+                while ((rawLine = reader.ReadLine()) != null)
+                {
+                    currentLine++;
+                    string line = rawLine;
+
+                    if (inMultilineComment)
+                    {
+                        if (line.EndsWith(ResourceConstants.EXT_CMT_END))
+                        {
+                            inMultilineComment = false;
+                        }
+                    }
+                    else
+                    {
+                        if (line.StartsWith(ResourceConstants.EXT_CMT_START))
+                        {
+                            inMultilineComment = true;
+                        }
+                        else
+                        {
+                            // discard comments
+                            int cmtI = line.IndexOf(ResourceConstants.CMT);
+                            if (cmtI >= 0)
+                            {
+                                line = line.Substring(0, cmtI);
+                            }
+
+                            // trim leading-trailing whitespace
+                            line = line.Trim();
+
+                            if (line.Length == 0)
+                            {
+                                continue; // empty lines can be safely skipped
+                            }
+
+                            if (line.StartsWith(HASH_INCLUDE, StringComparison.Ordinal))
+                            {
+                                // include statement
+                                string incl = line.Substring(HASH_INCLUDE.Length).Trim();
+                                if (incl.Contains(" "))
+                                {
+                                    throw new ArgumentException("Malformed import statement '" + rawLine + "' in " +
+                                                                       location);
+                                }
+                                else
+                                {
+                                    lines.PutAll(ParseRules(CreateScanner(incl), location + "->" + incl));
+                                }
+                            }
+                            else
+                            {
+                                // rule
+                                string[] parts = WHITESPACE.Split(line);
+                                if (parts.Length != 4)
+                                {
+                                    throw new ArgumentException("Malformed rule statement split into " + parts.Length +
+                                                                       " parts: " + rawLine + " in " + location);
+                                }
+                                else
+                                {
+                                    try
+                                    {
+                                        string pat = StripQuotes(parts[0]);
+                                        string lCon = StripQuotes(parts[1]);
+                                        string rCon = StripQuotes(parts[2]);
+                                        IPhonemeExpr ph = ParsePhonemeExpr(StripQuotes(parts[3]));
+                                        int cLine = currentLine;
+                                        Rule r = new RuleAnonymousHelper(pat, lCon, rCon, ph, cLine, location);
+
+                                        string patternKey = r.pattern.Substring(0, 1 - 0);
+                                        IList<Rule> rules;
+                                        if (!lines.TryGetValue(patternKey, out rules) || rules == null)
+                                        {
+                                            rules = new List<Rule>();
+                                            lines[patternKey] = rules;
+                                        }
+                                        rules.Add(r);
+                                    }
+                                    catch (ArgumentException e)
+                                    {
+                                        throw new InvalidOperationException("Problem parsing line '" + currentLine + "' in " +
+                                                                        location, e);
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+            finally
+            {
+                reader.Dispose();
+            }
+
+            return lines;
+        }
+
+        private class RPatternHelper : IRPattern
+        {
+            private readonly Func<StringBuilder, bool> isMatchSB;
+            private readonly Func<string, bool> isMatchStr;
+            private readonly Func<ICharSequence, bool> isMatchCS;
+
+            public RPatternHelper(Func<StringBuilder, bool> isMatchSB, Func<string, bool> isMatchStr, Func<ICharSequence, bool> isMatchCS)
+            {
+                this.isMatchSB = isMatchSB;
+                this.isMatchStr = isMatchStr;
+                this.isMatchCS = isMatchCS;
+            }
+
+            public bool IsMatch(StringBuilder input)
+            {
+                return isMatchSB(input);
+            }
+
+            public bool IsMatch(string input)
+            {
+                return isMatchStr(input);
+            }
+
+            public bool IsMatch(ICharSequence input)
+            {
+                return isMatchCS(input);
+            }
+        }
+
+        /// <summary>
+        /// Attempts to compile the regex into direct string ops, falling back to <see cref="Regex"/> and <see cref="Match"/> in the worst case.
+        /// </summary>
+        /// <param name="regex">The regular expression to compile.</param>
+        /// <returns>An RPattern that will match this regex.</returns>
+        private static IRPattern GetPattern(string regex)
+        {
+            bool startsWith = regex.StartsWith("^");
+            bool endsWith = regex.EndsWith("$");
+            string content = regex.Substring(startsWith ? 1 : 0, (endsWith ? regex.Length - 1 : regex.Length) - (startsWith ? 1 : 0));
+            bool boxes = content.Contains("[");
+
+            if (!boxes)
+            {
+                if (startsWith && endsWith)
+                {
+                    // exact match
+                    if (content.Length == 0)
+                    {
+                        // empty
+                        return new RPatternHelper(isMatchSB: (input) =>
+                        {
+                            return input.Length == 0;
+                        }, isMatchStr: (input) =>
+                        {
+                            return input.Length == 0;
+                        }, isMatchCS: (input) =>
+                        {
+                            return input.Length == 0;
+                        });
+                    }
+                    else
+                    {
+
+                        return new RPatternHelper(isMatchSB: (input) =>
+                        {
+                            return input.Equals(content);
+                        }, isMatchStr: (input) =>
+                        {
+                            return input.Equals(content);
+                        }, isMatchCS: (input) =>
+                        {
+                            return input.Equals(content);
+                        });
+                    }
+                }
+                else if ((startsWith || endsWith) && content.Length == 0)
+                {
+                    // matches every string
+                    return ALL_STRINGS_RMATCHER;
+                }
+                else if (startsWith)
+                {
+                    // matches from start
+                    return new RPatternHelper(isMatchSB: (input) =>
+                    {
+                        return StartsWith(input, content);
+                    }, isMatchStr: (input) =>
+                    {
+                        return StartsWith(input, content);
+                    }, isMatchCS: (input) =>
+                    {
+                        return StartsWith(input, content);
+                    });
+
+                }
+                else if (endsWith)
+                {
+                    // matches from start
+                    return new RPatternHelper(isMatchSB: (input) =>
+                    {
+                        return EndsWith(input, content);
+                    }, isMatchStr: (input) =>
+                    {
+                        return EndsWith(input, content);
+                    }, isMatchCS: (input) =>
+                    {
+                        return EndsWith(input, content);
+                    });
+                }
+            }
+            else
+            {
+                bool startsWithBox = content.StartsWith("[", StringComparison.Ordinal);
+                bool endsWithBox = content.EndsWith("]", StringComparison.Ordinal);
+
+                if (startsWithBox && endsWithBox)
+                {
+                    string boxContent = content.Substring(1, (content.Length - 1) - 1);
+                    if (!boxContent.Contains("["))
+                    {
+                        // box containing alternatives
+                        bool negate = boxContent.StartsWith("^", StringComparison.Ordinal);
+                        if (negate)
+                        {
+                            boxContent = boxContent.Substring(1);
+                        }
+                        string bContent = boxContent;
+                        bool shouldMatch = !negate;
+
+                        if (startsWith && endsWith)
+                        {
+                            // exact match
+                            return new RPatternHelper(isMatchSB: (input) =>
+                            {
+                                return input.Length == 1 && Contains(bContent, input[0]) == shouldMatch;
+                            }, isMatchStr: (input) =>
+                            {
+                                return input.Length == 1 && Contains(bContent, input[0]) == shouldMatch;
+                            }, isMatchCS: (input) =>
+                            {
+                                return input.Length == 1 && Contains(bContent, input[0]) == shouldMatch;
+                            });
+                        }
+                        else if (startsWith)
+                        {
+                            // first char
+                            return new RPatternHelper(isMatchSB: (input) =>
+                            {
+                                return input.Length > 0 && Contains(bContent, input[0]) == shouldMatch;
+                            }, isMatchStr: (input) =>
+                            {
+                                return input.Length > 0 && Contains(bContent, input[0]) == shouldMatch;
+                            }, isMatchCS: (input) =>
+                            {
+                                return input.Length > 0 && Contains(bContent, input[0]) == shouldMatch;
+                            });
+                        }
+                        else if (endsWith)
+                        {
+                            // last char
+                            return new RPatternHelper(isMatchSB: (input) =>
+                            {
+                                return input.Length > 0 && Contains(bContent, input[input.Length - 1]) == shouldMatch;
+                            }, isMatchStr: (input) =>
+                            {
+                                return input.Length > 0 && Contains(bContent, input[input.Length - 1]) == shouldMatch;
+                            }, isMatchCS: (input) =>
+                            {
+                                return input.Length > 0 && Contains(bContent, input[input.Length - 1]) == shouldMatch;
+                            });
+                        }
+                    }
+                }
+            }
+            Regex pattern = new Regex(regex, RegexOptions.Compiled);
+
+            return new RPatternHelper(isMatchSB: (input) =>
+            {
+                Match matcher = pattern.Match(input.ToString());
+                return matcher.Success;
+            }, isMatchStr: (input) =>
+            {
+                Match matcher = pattern.Match(input);
+                return matcher.Success;
+            }, isMatchCS: (input) =>
+            {
+                Match matcher = pattern.Match(input.ToString());
+                return matcher.Success;
+            });
+        }
+
+        private static bool StartsWith(ICharSequence input, string prefix)
+        {
+            if (prefix.Length > input.Length)
+            {
+                return false;
+            }
+            for (int i = 0; i < prefix.Length; i++)
+            {
+                if (input[i] != prefix[i])
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        private static bool StartsWith(string input, string prefix)
+        {
+            if (prefix.Length > input.Length)
+            {
+                return false;
+            }
+            for (int i = 0; i < prefix.Length; i++)
+            {
+                if (input[i] != prefix[i])
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        private static bool StartsWith(StringBuilder input, string prefix)
+        {
+            if (prefix.Length > input.Length)
+            {
+                return false;
+            }
+            for (int i = 0; i < prefix.Length; i++)
+            {
+                if (input[i] != prefix[i])
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        private static string StripQuotes(string str)
+        {
+            if (str.StartsWith(DOUBLE_QUOTE, StringComparison.Ordinal))
+            {
+                str = str.Substring(1);
+            }
+
+            if (str.EndsWith(DOUBLE_QUOTE, StringComparison.Ordinal))
+            {
+                str = str.Substring(0, str.Length - 1);
+            }
+
+            return str;
+        }
+
+        private readonly IRPattern lContext;
+
+        private readonly string pattern;
+
+        private readonly IPhonemeExpr phoneme;
+
+        private readonly IRPattern rContext;
+
+        /// <summary>
+        /// Creates a new rule.
+        /// </summary>
+        /// <param name="pattern">The pattern.</param>
+        /// <param name="lContext">The left context.</param>
+        /// <param name="rContext">The right context.</param>
+        /// <param name="phoneme">The resulting phoneme.</param>
+        public Rule(string pattern, string lContext, string rContext, IPhonemeExpr phoneme)
+        {
+            this.pattern = pattern;
+            this.lContext = GetPattern(lContext + "$");
+            this.rContext = GetPattern("^" + rContext);
+            this.phoneme = phoneme;
+        }
+
+        /// <summary>
+        /// Gets the left context pattern. This is a regular expression that must match to the left of the pattern.
+        /// </summary>
+        public virtual IRPattern LContext
+        {
+            get { return this.lContext; }
+        }
+
+        /// <summary>
+        /// Gets the pattern. This is a string-literal that must exactly match.
+        /// </summary>
+        public virtual string Pattern
+        {
+            get { return this.pattern; }
+        }
+
+        /// <summary>
+        /// Gets the phoneme. If the rule matches, this is the phoneme associated with the pattern match.
+        /// </summary>
+        public virtual IPhonemeExpr Phoneme
+        {
+            get { return this.phoneme; }
+        }
+
+        /// <summary>
+        /// Gets the right context pattern. This is a regular expression that must match to the right of the pattern.
+        /// </summary>
+        public virtual IRPattern RContext
+        {
+            get { return this.rContext; }
+        }
+
+        /// <summary>
+        /// Decides if the pattern and context match the input starting at a position. It is a match if the
+        /// <see cref="LContext"/> matches <paramref name="input"/> up to <paramref name="i"/>, <see cref="Pattern"/> matches at <paramref name="i"/> and
+        /// <see cref="RContext"/> matches from the end of the match of <see cref="Pattern"/> to the end of <paramref name="input"/>.
+        /// </summary>
+        /// <param name="input">The input <see cref="ICharSequence"/>.</param>
+        /// <param name="i">The int position within the input.</param>
+        /// <returns><c>true</c> if the pattern and left/right context match, <c>false</c> otherwise.</returns>
+        public virtual bool PatternAndContextMatches(ICharSequence input, int i)
+        {
+            if (i < 0)
+            {
+                throw new ArgumentOutOfRangeException("Can not match pattern at negative indexes");
+            }
+
+            int patternLength = this.pattern.Length;
+            int ipl = i + patternLength;
+
+            if (ipl > input.Length)
+            {
+                // not enough room for the pattern to match
+                return false;
+            }
+
+            // evaluate the pattern, left context and right context
+            // fail early if any of the evaluations is not successful
+            if (!input.SubSequence(i, ipl).Equals(this.pattern))
+            {
+                return false;
+            }
+            else if (!this.rContext.IsMatch(input.SubSequence(ipl, input.Length)))
+            {
+                return false;
+            }
+            return this.lContext.IsMatch(input.SubSequence(0, i));
+        }
+
+        /// <summary>
+        /// Decides if the pattern and context match the input starting at a position. It is a match if the
+        /// <see cref="LContext"/> matches <paramref name="input"/> up to <paramref name="i"/>, <see cref="Pattern"/> matches at <paramref name="i"/> and
+        /// <see cref="RContext"/> matches from the end of the match of <see cref="Pattern"/> to the end of <paramref name="input"/>.
+        /// </summary>
+        /// <param name="input">The input <see cref="string"/>.</param>
+        /// <param name="i">The int position within the input.</param>
+        /// <returns><c>true</c> if the pattern and left/right context match, <c>false</c> otherwise.</returns>
+        // LUCENENET specific
+        public virtual bool PatternAndContextMatches(string input, int i) 
+        {
+            if (i < 0)
+            {
+                throw new ArgumentOutOfRangeException("Can not match pattern at negative indexes");
+            }
+
+            int patternLength = this.pattern.Length;
+            int ipl = i + patternLength;
+
+            if (ipl > input.Length)
+            {
+                // not enough room for the pattern to match
+                return false;
+            }
+
+            // evaluate the pattern, left context and right context
+            // fail early if any of the evaluations is not successful
+            if (!input.Substring(i, (ipl - i)).Equals(this.pattern))
+            {
+                return false;
+            }
+            else if (!this.rContext.IsMatch(input.Substring(ipl, (input.Length - ipl))))
+            {
+                return false;
+            }
+            return this.lContext.IsMatch(input.Substring(0, (i - 0)));
+        }
+
+        /// <summary>
+        /// Decides if the pattern and context match the input starting at a position. It is a match if the
+        /// <see cref="LContext"/> matches <paramref name="input"/> up to <paramref name="i"/>, <see cref="Pattern"/> matches at <paramref name="i"/> and
+        /// <see cref="RContext"/> matches from the end of the match of <see cref="Pattern"/> to the end of <paramref name="input"/>.
+        /// </summary>
+        /// <param name="input">The input <see cref="StringBuilder"/>.</param>
+        /// <param name="i">The int position within the input.</param>
+        /// <returns><c>true</c> if the pattern and left/right context match, <c>false</c> otherwise.</returns>
+        // LUCENENET specific
+        public virtual bool PatternAndContextMatches(StringBuilder input, int i)
+        {
+            if (i < 0)
+            {
+                throw new ArgumentOutOfRangeException("Can not match pattern at negative indexes");
+            }
+
+            int patternLength = this.pattern.Length;
+            int ipl = i + patternLength;
+
+            if (ipl > input.Length)
+            {
+                // not enough room for the pattern to match
+                return false;
+            }
+
+            // evaluate the pattern, left context and right context
+            // fail early if any of the evaluations is not successful
+            if (!input.ToString(i, (ipl - i)).Equals(this.pattern))
+            {
+                return false;
+            }
+            else if (!this.rContext.IsMatch(input.ToString(ipl, (input.Length - ipl))))
+            {
+                return false;
+            }
+            return this.lContext.IsMatch(input.ToString(0, (i - 0)));
+        }
+
+    }
+
+    public sealed class Phoneme : IPhonemeExpr
+    {
+        private class PhonemeComparer : IComparer<Phoneme>
+        {
+            public int Compare(Phoneme o1, Phoneme o2)
+            {
+                for (int i = 0; i < o1.phonemeText.Length; i++)
+                {
+                    if (i >= o2.phonemeText.Length)
+                    {
+                        return +1;
+                    }
+                    int c = o1.phonemeText[i] - o2.phonemeText[i];
+                    if (c != 0)
+                    {
+                        return c;
+                    }
+                }
+
+                if (o1.phonemeText.Length < o2.phonemeText.Length)
+                {
+                    return -1;
+                }
+
+                return 0;
+            }
+        }
+
+        public static readonly IComparer<Phoneme> COMPARER = new PhonemeComparer();
+        private readonly StringBuilder phonemeText;
+        private readonly LanguageSet languages;
+
+        public Phoneme(string phonemeText, LanguageSet languages)
+        {
+            this.phonemeText = new StringBuilder(phonemeText);
+            this.languages = languages;
+        }
+
+        public Phoneme(StringBuilder phonemeText, LanguageSet languages)
+        {
+            this.phonemeText = new StringBuilder(phonemeText.ToString());
+            this.languages = languages;
+        }
+
+        public Phoneme(ICharSequence phonemeText, LanguageSet languages)
+        {
+            this.phonemeText = new StringBuilder(phonemeText.ToString());
+            this.languages = languages;
+        }
+
+        public Phoneme(Phoneme phonemeLeft, Phoneme phonemeRight)
+            : this(phonemeLeft.phonemeText, phonemeLeft.languages)
+        {
+            this.phonemeText.Append(phonemeRight.phonemeText);
+        }
+
+        public Phoneme(Phoneme phonemeLeft, Phoneme phonemeRight, LanguageSet languages)
+            : this(phonemeLeft.phonemeText, languages)
+        {
+            this.phonemeText.Append(phonemeRight.phonemeText);
+        }
+
+        public Phoneme Append(string str)
+        {
+            this.phonemeText.Append(str);
+            return this;
+        }
+
+        public LanguageSet Languages
+        {
+            get { return this.languages; }
+        }
+
+        public IList<Phoneme> Phonemes
+        {
+            get { return new Phoneme[] { this }; }
+        }
+
+        public string GetPhonemeText()
+        {
+            return this.phonemeText.ToString();
+        }
+
+        [Obsolete("since 1.9")]
+        public Phoneme Join(Phoneme right)
+        {
+            return new Phoneme(this.phonemeText.ToString() + right.phonemeText.ToString(),
+                               this.languages.RestrictTo(right.Languages));
+        }
+    }
+
+    public interface IPhonemeExpr
+    {
+        IList<Phoneme> Phonemes { get; }
+    }
+
+    public sealed class PhonemeList : IPhonemeExpr
+    {
+        private readonly IList<Phoneme> phonemes;
+
+        public PhonemeList(IList<Phoneme> phonemes)
+        {
+            this.phonemes = phonemes;
+        }
+
+        public IList<Phoneme> Phonemes
+        {
+            get { return this.phonemes; }
+        }
+    }
+
+    /// <summary>
+    /// A minimal wrapper around the functionality of <see cref="Rule"/> Pattern that we use, to allow for alternate implementations.
+    /// </summary>
+    public interface IRPattern
+    {
+        bool IsMatch(ICharSequence input);
+        bool IsMatch(string input);
+        bool IsMatch(StringBuilder input);
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/RuleType.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/RuleType.cs b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/RuleType.cs
new file mode 100644
index 0000000..ff3af97
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/RuleType.cs
@@ -0,0 +1,68 @@
+// commons-codec version compatibility level: 1.9
+using System;
+
+namespace Lucene.Net.Analysis.Phonetic.Language.Bm
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Types of rule.
+    /// <para/>
+    /// since 1.6
+    /// </summary>
+    public enum RuleType
+    {
+        /// <summary>
+        /// Approximate rules, which will lead to the largest number of phonetic interpretations.
+        /// </summary>
+        APPROX,
+
+        /// <summary>
+        /// Exact rules, which will lead to a minimum number of phonetic interpretations.
+        /// </summary>
+        EXACT,
+
+        /// <summary>
+        /// For internal use only. Please use <see cref="APPROX"/> or <see cref="EXACT"/>.
+        /// </summary>
+        RULES
+    }
+
+    public static class RuleTypeExtensions
+    {
+        /// <summary>
+        /// Gets the rule name.
+        /// </summary>
+        /// <param name="ruleType">The <see cref="RuleType"/>.</param>
+        /// <returns>The rule name.</returns>
+        public static string GetName(this RuleType ruleType)
+        {
+            switch (ruleType)
+            {
+                case RuleType.APPROX:
+                    return "approx";
+                case RuleType.EXACT:
+                    return "exact";
+                case RuleType.RULES:
+                    return "rules";
+            }
+
+            throw new ArgumentException("Invalid ruleType");
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_any.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_any.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_any.txt
new file mode 100644
index 0000000..3f4f4c9
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_any.txt
@@ -0,0 +1,153 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// CONSTONANTS
+"ph"    ""  ""  "f" // foreign
+"sh"    ""  ""  "S" // foreign
+"kh"    ""  ""  "x" // foreign
+
+"gli"   ""  ""  "(gli|l[italian])"
+"gni"   ""  ""  "(gni|ni[italian+french])"
+"gn"    ""  "[aeou]"    "(n[italian+french]|nj[italian+french]|gn)
+"gh"    ""  ""  "g" // It + translit. from Arabic
+"dh"    ""  ""  "d" // translit. from Arabic
+"bh"    ""  ""  "d" // translit. from Arabic
+"th"    ""  ""  "t" // translit. from Arabic
+"lh"    ""  ""  "l" // Port
+"nh"    ""  ""  "nj" // Port
+
+"ig"    "[aeiou]"   ""  "(ig|tS[spanish])"
+"ix"    "[aeiou]"   ""  "S" // Sp
+"tx"    ""  ""  "tS" // Sp
+"tj"    ""  "$"  "tS" // Sp
+"tj"    ""  ""  "dZ" // Sp
+"tg"    ""  ""  "(tg|dZ[spanish])"
+
+"gi"    ""  "[aeou]"    "dZ" // Italian
+"g" ""  "y" "Z" // French
+"gg"    ""  "[ei]"  "(gZ[portuguese+french]|dZ[italian+spanish]|x[spanish])"
+"g" ""  "[ei]"  "(Z[portuguese+french]|dZ[italian+spanish]|x[spanish])"
+
+"guy"   ""  ""  "gi"
+"gue"   ""  "$" "(k[french]|ge)"
+"gu"    ""  "[ei]"  "(g|gv") // not It
+"gu"    ""  "[ao]"  "gv" // not It
+
+"ñ" ""  ""  "(n|nj)"
+"ny"    ""  ""  "nj"
+
+"sc"    ""  "[ei]"  "(s|S[italian])"
+"sç"    ""  "[aeiou]"   "s" // not It
+"ss"    ""  ""  "s"
+"ç" ""  ""  "s"   // not It
+
+"ch"    ""  "[ei]"  "(k[italian]|S[portuguese+french]|tS[spanish]|dZ[spanish])"
+"ch"    ""  ""  "(S|tS[spanish]|dZ[spanish])"
+
+"ci"    ""  "[aeou]"    "(tS[italian]|si)"
+"cc"	""	"[eiyéèê]"	"(tS[italian]|ks[portuguese+french+spanish])"
+"c"	""	"[eiyéèê]"	"(tS[italian]|s[portuguese+french+spanish])"
+   //array("c"	""	"[aou]"	"(k|C[".($portuguese+$spanish)."])" // "C" means that the actual letter could be "ç" (cedille omitted)
+
+"s"	"^"	""	"s"
+"s"	"[aáuiíoóeéêy]"	"[aáuiíoóeéêy]"	"(s[spanish]|z[portuguese+french+italian])"
+"s"	""	"[dglmnrv]"	"(z|Z[portuguese])"
+
+"z"	""	"$"	"(s|ts[italian]|S[portuguese])" // ts It, s/S/Z Port, s in Sp, z Fr
+"z"	""	"[bdgv]"	"(z|dz[italian]|Z[portuguese])" // dz It, Z/z Port, z Sp & Fr
+"z"	""	"[ptckf]"	"(s|ts[italian]|S[portuguese])" // ts It, s/S/z Port, z/s Sp
+"z"	""	""	"(z|dz[italian]|ts[italian]|s[spanish])" // ts/dz It, z Port & Fr, z/s Sp
+
+"que"	""	"$"	"(k[french]|ke)"
+"qu"	""	"[eiu]"	"k"
+"qu"	""	"[ao]"	"(kv|k)" // k is It
+
+"ex"	""	"[aáuiíoóeéêy]"	"(ez[portuguese]|eS[portuguese]|eks|egz)"
+"ex"	""	"[cs]"	"(e[portuguese]|ek)"
+
+"m"	""	"[cdglnrst]"	"(m|n[portuguese])"
+"m"	""	"[bfpv]"	"(m|n[portuguese+spanish])"
+"m"	""	"$"	"(m|n[portuguese])"
+
+"b"	"^"	""	"(b|V[spanish])"
+"v"	"^"	""	"(v|B[spanish])"
+
+ // VOWELS
+"eau"	""	""	"o" // Fr
+
+"ouh"	""	"[aioe]"	"(v[french]|uh)"
+"uh"	""	"[aioe]"	"(v|uh)"
+"ou"	""	"[aioe]"	"v" // french
+"uo"	""	""	"(vo|o)"
+"u"	""	"[aie]"	"v"
+
+"i"	"[aáuoóeéê]"	""	"j"
+"i"	""	"[aeou]"	"j"
+"y"	"[aáuiíoóeéê]"	""	"j"
+"y"	""	"[aeiíou]"	"j"
+"e"	""	"$"	"(e|E[$french])"
+
+"ão"	""	""	"(au|an)" // Port
+"ãe"	""	""	"(aj|an)" // Port
+"ãi"	""	""	"(aj|an)" // Port
+"õe"	""	""	"(oj|on)" // Port
+"où"	""	""	"u" // Fr
+"ou"	""	""	"(ou|u[french])"
+
+"â"	""	""	"a" // Port & Fr
+"à"	""	""	"a" // Port
+"á"	""	""	"a" // Port & Sp
+"ã"	""	""	"(a|an)" // Port
+"é"	""	""	"e"
+"ê"	""	""	"e" // Port & Fr
+"è"	""	""	"e" // Sp & Fr & It
+"í"	""	""	"i" // Port & Sp
+"î"	""	""	"i" // Fr
+"ô"	""	""	"o" // Port & Fr
+"ó"	""	""	"o" // Port & Sp & It
+"õ"	""	""	"(o|on)" // Port
+"ò"	""	""	"o"  // Sp & It
+"ú"	""	""	"u" // Port & Sp
+"ü"	""	""	"u" // Port & Sp
+
+ // LATIN ALPHABET
+"a"	""	""	"a"
+"b"	""	""	"(b|v[spanish])"
+"c"	""	""	"k"
+"d"	""	""	"d"
+"e"	""	""	"e"
+"f"	""	""	"f"
+"g"	""	""	"g"
+"h"	""	""	"h"
+"i"	""	""	"i"
+"j"	""	""	"(x[spanish]|Z)" // not It
+"k"	""	""	"k"
+"l"	""	""	"l"
+"m"	""	""	"m"
+"n"	""	""	"n"
+"o"	""	""	"o"
+"p"	""	""	"p"
+"q"	""	""	"k"
+"r"	""	""	"r"
+"s"	""	""	"(s|S[portuguese])"
+"t"	""	""	"t"
+"u"	""	""	"u"
+"v"	""	""	"(v|b[spanish])"
+"w"	""	""	"v"    // foreign
+"x"	""	""	"(ks|gz|S[portuguese+spanish])"   // S/ks Port & Sp, gz Sp, It only ks
+"y"	""	""	"i"
+"z"	""	""	"z"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_common.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_common.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_common.txt
new file mode 100644
index 0000000..e95a756
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_common.txt
@@ -0,0 +1,219 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include ash_exact_approx_common
+
+// REGRESSIVE ASSIMILATION OF CONSONANTS
+"n"	""	"[bp]"	"m" 
+
+// PECULIARITY OF "h" 
+"h"	""	""	"" 
+"H"	""	""	"(x|)" 
+
+// POLISH OGONEK IMPOSSIBLE
+"F" "" "[bdgkpstvzZ]h" "e"
+"F" "" "[bdgkpstvzZ]x" "e"
+"B" "" "[bdgkpstvzZ]h" "a"
+"B" "" "[bdgkpstvzZ]x" "a"
+
+// "e" and "i" ARE TO BE OMITTED BEFORE (SYLLABIC) n & l: Halperin=Halpern; Frankel = Frankl, Finkelstein = Finklstein
+"e" "[bdfgklmnprsStvzZ]" "[ln]$" ""
+"i" "[bdfgklmnprsStvzZ]" "[ln]$" ""
+"E" "[bdfgklmnprsStvzZ]" "[ln]$" ""
+"I" "[bdfgklmnprsStvzZ]" "[ln]$" ""
+"F" "[bdfgklmnprsStvzZ]" "[ln]$" ""
+"Q" "[bdfgklmnprsStvzZ]" "[ln]$" ""
+"Y" "[bdfgklmnprsStvzZ]" "[ln]$" ""
+
+"e" "[bdfgklmnprsStvzZ]" "[ln][bdfgklmnprsStvzZ]" ""
+"i" "[bdfgklmnprsStvzZ]" "[ln][bdfgklmnprsStvzZ]" ""
+"E" "[bdfgklmnprsStvzZ]" "[ln][bdfgklmnprsStvzZ]" ""
+"I" "[bdfgklmnprsStvzZ]" "[ln][bdfgklmnprsStvzZ]" ""
+"F" "[bdfgklmnprsStvzZ]" "[ln][bdfgklmnprsStvzZ]" ""
+"Q" "[bdfgklmnprsStvzZ]" "[ln][bdfgklmnprsStvzZ]" ""
+"Y" "[bdfgklmnprsStvzZ]" "[ln][bdfgklmnprsStvzZ]" ""
+
+"lEs"	""	""	"(lEs|lz)"  // Applebaum < Appelbaum (English + blend English-something forms as Finklestein)
+"lE"	"[bdfgkmnprStvzZ]"	""	"(lE|l)"  // Applebaum < Appelbaum (English + blend English-something forms as Finklestein)
+
+// SIMPLIFICATION: (TRIPHTHONGS & DIPHTHONGS) -> ONE GENERIC DIPHTHONG "D"
+"aue"	""	""	"D"
+"oue"	""	""	"D"
+    
+"AvE"	""	""	"(D|AvE)"
+"Ave"	""	""	"(D|Ave)"
+"avE"	""	""	"(D|avE)"
+"ave"	""	""	"(D|ave)"
+    
+"OvE"	""	""	"(D|OvE)"
+"Ove"	""	""	"(D|Ove)"
+"ovE"	""	""	"(D|ovE)"
+"ove"	""	""	"(D|ove)"
+    
+"ea"	""	""	"(D|ea)"
+"EA"	""	""	"(D|EA)"
+"Ea"	""	""	"(D|Ea)"
+"eA"	""	""	"(D|eA)"
+             
+"aji"	""	""	"D"
+"ajI"	""	""	"D"
+"aje"	""	""	"D"
+"ajE"	""	""	"D"
+    
+"Aji"	""	""	"D"
+"AjI"	""	""	"D"
+"Aje"	""	""	"D"
+"AjE"	""	""	"D"
+    
+"oji"	""	""	"D"
+"ojI"	""	""	"D"
+"oje"	""	""	"D"
+"ojE"	""	""	"D"
+    
+"Oji"	""	""	"D"
+"OjI"	""	""	"D"
+"Oje"	""	""	"D"
+"OjE"	""	""	"D"
+    
+"eji"	""	""	"D"
+"ejI"	""	""	"D"
+"eje"	""	""	"D"
+"ejE"	""	""	"D"
+    
+"Eji"	""	""	"D"
+"EjI"	""	""	"D"
+"Eje"	""	""	"D"
+"EjE"	""	""	"D"
+    
+"uji"	""	""	"D"
+"ujI"	""	""	"D"
+"uje"	""	""	"D"
+"ujE"	""	""	"D"
+    
+"Uji"	""	""	"D"
+"UjI"	""	""	"D"
+"Uje"	""	""	"D"
+"UjE"	""	""	"D"
+        
+"iji"	""	""	"D"
+"ijI"	""	""	"D"
+"ije"	""	""	"D"
+"ijE"	""	""	"D"
+    
+"Iji"	""	""	"D"
+"IjI"	""	""	"D"
+"Ije"	""	""	"D"
+"IjE"	""	""	"D"
+                         
+"aja"	""	""	"D"
+"ajA"	""	""	"D"
+"ajo"	""	""	"D"
+"ajO"	""	""	"D"
+"aju"	""	""	"D"
+"ajU"	""	""	"D"
+    
+"Aja"	""	""	"D"
+"AjA"	""	""	"D"
+"Ajo"	""	""	"D"
+"AjO"	""	""	"D"
+"Aju"	""	""	"D"
+"AjU"	""	""	"D"
+    
+"oja"	""	""	"D"
+"ojA"	""	""	"D"
+"ojo"	""	""	"D"
+"ojO"	""	""	"D"
+"Aju"	""	""	"D"
+"AjU"	""	""	"D"
+    
+"Oja"	""	""	"D"
+"OjA"	""	""	"D"
+"Ojo"	""	""	"D"
+"OjO"	""	""	"D"
+"Aju"	""	""	"D"
+"AjU"	""	""	"D"
+    
+"eja"	""	""	"D"
+"ejA"	""	""	"D"
+"ejo"	""	""	"D"
+"ejO"	""	""	"D"
+"Aju"	""	""	"D"
+"AjU"	""	""	"D"
+    
+"Eja"	""	""	"D"
+"EjA"	""	""	"D"
+"Ejo"	""	""	"D"
+"EjO"	""	""	"D"
+"Aju"	""	""	"D"
+"AjU"	""	""	"D"
+    
+"uja"	""	""	"D"
+"ujA"	""	""	"D"
+"ujo"	""	""	"D"
+"ujO"	""	""	"D"
+"Aju"	""	""	"D"
+"AjU"	""	""	"D"
+        
+"Uja"	""	""	"D"
+"UjA"	""	""	"D"
+"Ujo"	""	""	"D"
+"UjO"	""	""	"D"
+"Aju"	""	""	"D"
+"AjU"	""	""	"D"
+        
+"ija"	""	""	"D"
+"ijA"	""	""	"D"
+"ijo"	""	""	"D"
+"ijO"	""	""	"D"
+"Aju"	""	""	"D"
+"AjU"	""	""	"D"
+    
+"Ija"	""	""	"D"
+"IjA"	""	""	"D"
+"Ijo"	""	""	"D"
+"IjO"	""	""	"D"                         
+"Aju"	""	""	"D"
+"AjU"	""	""	"D"
+                         
+"j"	""	""	"i"                         
+                         
+// lander = lender = länder 
+"lYndEr"	""	"$"	"lYnder" 
+"lander"	""	"$"	"lYnder" 
+"lAndEr"	""	"$"	"lYnder" 
+"lAnder"	""	"$"	"lYnder" 
+"landEr"	""	"$"	"lYnder" 
+"lender"	""	"$"	"lYnder" 
+"lEndEr"	""	"$"	"lYnder" 
+"lendEr"	""	"$"	"lYnder" 
+"lEnder"	""	"$"	"lYnder" 
+             
+// CONSONANTS {z & Z; s & S} are approximately interchangeable
+"s" "" "[rmnl]" "z"
+"S" "" "[rmnl]" "z"
+"s" "[rmnl]" "" "z"
+"S" "[rmnl]" "" "z"
+    
+"dS" "" "$" "S"
+"dZ" "" "$" "S"
+"Z" "" "$" "S"
+"S" "" "$" "(S|s)"
+"z" "" "$" "(S|s)"
+    
+"S" "" "" "s"
+"dZ" "" "" "z"
+"Z" "" "" "z"
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_cyrillic.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_cyrillic.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_cyrillic.txt
new file mode 100644
index 0000000..4210173
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_cyrillic.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include ash_approx_russian
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_english.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_english.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_english.txt
new file mode 100644
index 0000000..84d8174
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_english.txt
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// VOWELS
+"I" "" "[^aEIeiou]e" "(Q|i|D)" // like in "five"
+"I" "" "$" "i"
+"I" "[aEIeiou]" "" "i"
+"I" "" "[^k]$" "i"
+"Ik" "[lr]" "$" "(ik|Qk)"
+"Ik" "" "$" "ik"
+"sIts" "" "$" "(sits|sQts)"
+"Its" "" "$" "its"
+"I" "" "" "(i|Q)" 
+    
+"lE" "[bdfgkmnprsStvzZ]" "" "(il|li|lY)"  // Applebaum < Appelbaum
+         
+"au" "" "" "(D|a|u)"
+"ou" "" "" "(D|o|u)"
+"ai" "" "" "(D|a|i)"
+"oi" "" "" "(D|o|i)"
+"ui" "" "" "(D|u|i)"
+        
+"E" "D[^aeiEIou]" "" "(i|)" // Weinberg, Shaneberg (shaneberg/shejneberg) --> shejnberg
+"e" "D[^aeiEIou]" "" "(i|)" 
+
+"e" "" "" "i"
+"E" "" "[fklmnprsStv]$" "i"
+"E" "" "ts$" "i"
+"E" "[DaoiEuQY]" "" "i"
+"E" "" "[aoQY]" "i"
+"E" "" "" "(Y|i)"
+      
+"a" "" "" "(a|o)"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_french.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_french.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_french.txt
new file mode 100644
index 0000000..fa8ee99
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_french.txt
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"I" "" "$" "i"
+"I" "[aEIeiou]" "" "i"
+"I" "" "[^k]$" "i"
+"Ik" "[lr]" "$" "(ik|Qk)"
+"Ik" "" "$" "ik"
+"sIts" "" "$" "(sits|sQts)"
+"Its" "" "$" "its"
+"I" "" "" "(i|Q)" 
+
+"au" "" "" "(D|a|u)"
+"ou" "" "" "(D|o|u)"
+"ai" "" "" "(D|a|i)"
+"oi" "" "" "(D|o|i)"
+"ui" "" "" "(D|u|i)"
+      
+"a" "" "" "(a|o)" 
+"e" "" "" "i" 
+    
+"E" "" "[fklmnprsStv]$" "i"
+"E" "" "ts$" "i"
+"E" "[aoiuQ]" "" "i"
+"E" "" "[aoQ]" "i"
+"E" "" "" "(Y|i)"
\ No newline at end of file


Mime
View raw message