lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nightowl...@apache.org
Subject [11/15] lucenenet git commit: Added Lucene.Net.Analysis.Phonetic + tests. Rather than porting over the entire commons-codec library, only the language features were ported and added to this library.
Date Tue, 27 Jun 2017 20:33:56 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_common.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_common.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_common.txt
new file mode 100644
index 0000000..fa0096a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_common.txt
@@ -0,0 +1,223 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// GENERIC
+
+#include gen_exact_approx_common
+
+// DUTCH 
+"van" "^" "[bp]" "(vam|)" 
+"van" "^" "" "(van|)" 
+
+// REGRESSIVE ASSIMILATION OF CONSONANTS
+"n" "" "[bp]" "m" 
+        
+// PECULIARITY OF "h" 
+"h" "" "" "" 
+"H" "" "" "(x|)" 
+
+// "e" and "i" ARE TO BE OMITTED BEFORE (SYLLABIC) n & l: Halperin=Halpern; Frankel = Frankl, Finkelstein = Finklstein
+// but Andersen & Anderson should match
+"sen" "[rmnl]" "$" "(zn|zon)"
+"sen" "" "$" "(sn|son)"
+"sEn" "[rmnl]" "$" "(zn|zon)"
+"sEn" "" "$" "(sn|son)"
+            
+"e" "[bdfgklmnprsStvzZ]" "[ln]$" ""
+"i" "[bdfgklmnprsStvzZ]" "[ln]$" ""
+"E" "[bdfgklmnprsStvzZ]" "[ln]$" ""
+"I" "[bdfgklmnprsStvzZ]" "[ln]$" ""
+"Q" "[bdfgklmnprsStvzZ]" "[ln]$" ""
+"Y" "[bdfgklmnprsStvzZ]" "[ln]$" ""
+
+"e" "[bdfgklmnprsStvzZ]" "[ln][bdfgklmnprsStvzZ]" ""
+"i" "[bdfgklmnprsStvzZ]" "[ln][bdfgklmnprsStvzZ]" ""
+"E" "[bdfgklmnprsStvzZ]" "[ln][bdfgklmnprsStvzZ]" ""
+"I" "[bdfgklmnprsStvzZ]" "[ln][bdfgklmnprsStvzZ]" ""
+"Q" "[bdfgklmnprsStvzZ]" "[ln][bdfgklmnprsStvzZ]" ""
+"Y" "[bdfgklmnprsStvzZ]" "[ln][bdfgklmnprsStvzZ]" ""
+
+"lEs" "" "" "(lEs|lz)"  // Applebaum < Appelbaum (English + blend English-something forms as Finklestein)
+"lE" "[bdfgkmnprStvzZ]" "" "(lE|l)"  // Applebaum < Appelbaum (English + blend English-something forms as Finklestein)
+
+// SIMPLIFICATION: (TRIPHTHONGS & DIPHTHONGS) -> ONE GENERIC DIPHTHONG "D"
+"aue" "" "" "D"
+"oue" "" "" "D"
+    
+"AvE" "" "" "(D|AvE)"
+"Ave" "" "" "(D|Ave)"
+"avE" "" "" "(D|avE)"
+"ave" "" "" "(D|ave)"
+    
+"OvE" "" "" "(D|OvE)"
+"Ove" "" "" "(D|Ove)"
+"ovE" "" "" "(D|ovE)"
+"ove" "" "" "(D|ove)"
+    
+"ea" "" "" "(D|ea)"
+"EA" "" "" "(D|EA)"
+"Ea" "" "" "(D|Ea)"
+"eA" "" "" "(D|eA)"
+             
+"aji" "" "" "D"
+"ajI" "" "" "D"
+"aje" "" "" "D"
+"ajE" "" "" "D"
+    
+"Aji" "" "" "D"
+"AjI" "" "" "D"
+"Aje" "" "" "D"
+"AjE" "" "" "D"
+    
+"oji" "" "" "D"
+"ojI" "" "" "D"
+"oje" "" "" "D"
+"ojE" "" "" "D"
+    
+"Oji" "" "" "D"
+"OjI" "" "" "D"
+"Oje" "" "" "D"
+"OjE" "" "" "D"
+    
+"eji" "" "" "D"
+"ejI" "" "" "D"
+"eje" "" "" "D"
+"ejE" "" "" "D"
+    
+"Eji" "" "" "D"
+"EjI" "" "" "D"
+"Eje" "" "" "D"
+"EjE" "" "" "D"
+    
+"uji" "" "" "D"
+"ujI" "" "" "D"
+"uje" "" "" "D"
+"ujE" "" "" "D"
+    
+"Uji" "" "" "D"
+"UjI" "" "" "D"
+"Uje" "" "" "D"
+"UjE" "" "" "D"
+        
+"iji" "" "" "D"
+"ijI" "" "" "D"
+"ije" "" "" "D"
+"ijE" "" "" "D"
+    
+"Iji" "" "" "D"
+"IjI" "" "" "D"
+"Ije" "" "" "D"
+"IjE" "" "" "D"
+                         
+"aja" "" "" "D"
+"ajA" "" "" "D"
+"ajo" "" "" "D"
+"ajO" "" "" "D"
+"aju" "" "" "D"
+"ajU" "" "" "D"
+    
+"Aja" "" "" "D"
+"AjA" "" "" "D"
+"Ajo" "" "" "D"
+"AjO" "" "" "D"
+"Aju" "" "" "D"
+"AjU" "" "" "D"
+    
+"oja" "" "" "D"
+"ojA" "" "" "D"
+"ojo" "" "" "D"
+"ojO" "" "" "D"
+"Aju" "" "" "D"
+"AjU" "" "" "D"
+    
+"Oja" "" "" "D"
+"OjA" "" "" "D"
+"Ojo" "" "" "D"
+"OjO" "" "" "D"
+"Aju" "" "" "D"
+"AjU" "" "" "D"
+    
+"eja" "" "" "D"
+"ejA" "" "" "D"
+"ejo" "" "" "D"
+"ejO" "" "" "D"
+"Aju" "" "" "D"
+"AjU" "" "" "D"
+    
+"Eja" "" "" "D"
+"EjA" "" "" "D"
+"Ejo" "" "" "D"
+"EjO" "" "" "D"
+"Aju" "" "" "D"
+"AjU" "" "" "D"
+    
+"uja" "" "" "D"
+"ujA" "" "" "D"
+"ujo" "" "" "D"
+"ujO" "" "" "D"
+"Aju" "" "" "D"
+"AjU" "" "" "D"
+        
+"Uja" "" "" "D"
+"UjA" "" "" "D"
+"Ujo" "" "" "D"
+"UjO" "" "" "D"
+"Aju" "" "" "D"
+"AjU" "" "" "D"
+        
+"ija" "" "" "D"
+"ijA" "" "" "D"
+"ijo" "" "" "D"
+"ijO" "" "" "D"
+"Aju" "" "" "D"
+"AjU" "" "" "D"
+    
+"Ija" "" "" "D"
+"IjA" "" "" "D"
+"Ijo" "" "" "D"
+"IjO" "" "" "D"                         
+"Aju" "" "" "D"
+"AjU" "" "" "D"
+                         
+"j" "" "" "i"                         
+                         
+// lander = lender = länder 
+"lYndEr" "" "$" "lYnder" 
+"lander" "" "$" "lYnder" 
+"lAndEr" "" "$" "lYnder" 
+"lAnder" "" "$" "lYnder" 
+"landEr" "" "$" "lYnder" 
+"lender" "" "$" "lYnder" 
+"lEndEr" "" "$" "lYnder" 
+"lendEr" "" "$" "lYnder" 
+"lEnder" "" "$" "lYnder" 
+             
+// CONSONANTS {z & Z; s & S} are approximately interchangeable
+"s" "" "[rmnl]" "z"
+"S" "" "[rmnl]" "z"
+"s" "[rmnl]" "" "z"
+"S" "[rmnl]" "" "z" 
+    
+"dS" "" "$" "S"
+"dZ" "" "$" "S"
+"Z" "" "$" "S"
+"S" "" "$" "(S|s)"
+"z" "" "$" "(S|s)"
+    
+"S" "" "" "s"
+"dZ" "" "" "z"
+"Z" "" "" "z"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_cyrillic.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_cyrillic.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_cyrillic.txt
new file mode 100644
index 0000000..d470aa8
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_cyrillic.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include gen_approx_russian
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_czech.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_czech.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_czech.txt
new file mode 100644
index 0000000..b542861
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_czech.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include gen_approx_french
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_dutch.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_dutch.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_dutch.txt
new file mode 100644
index 0000000..b542861
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_dutch.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include gen_approx_french
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_english.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_english.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_english.txt
new file mode 100644
index 0000000..84d8174
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_english.txt
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// VOWELS
+"I" "" "[^aEIeiou]e" "(Q|i|D)" // like in "five"
+"I" "" "$" "i"
+"I" "[aEIeiou]" "" "i"
+"I" "" "[^k]$" "i"
+"Ik" "[lr]" "$" "(ik|Qk)"
+"Ik" "" "$" "ik"
+"sIts" "" "$" "(sits|sQts)"
+"Its" "" "$" "its"
+"I" "" "" "(i|Q)" 
+    
+"lE" "[bdfgkmnprsStvzZ]" "" "(il|li|lY)"  // Applebaum < Appelbaum
+         
+"au" "" "" "(D|a|u)"
+"ou" "" "" "(D|o|u)"
+"ai" "" "" "(D|a|i)"
+"oi" "" "" "(D|o|i)"
+"ui" "" "" "(D|u|i)"
+        
+"E" "D[^aeiEIou]" "" "(i|)" // Weinberg, Shaneberg (shaneberg/shejneberg) --> shejnberg
+"e" "D[^aeiEIou]" "" "(i|)" 
+
+"e" "" "" "i"
+"E" "" "[fklmnprsStv]$" "i"
+"E" "" "ts$" "i"
+"E" "[DaoiEuQY]" "" "i"
+"E" "" "[aoQY]" "i"
+"E" "" "" "(Y|i)"
+      
+"a" "" "" "(a|o)"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_french.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_french.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_french.txt
new file mode 100644
index 0000000..93a4980
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_french.txt
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"au" "" "" "(D|a|u)"
+"ou" "" "" "(D|o|u)"
+"ai" "" "" "(D|a|i)"
+"oi" "" "" "(D|o|i)"
+"ui" "" "" "(D|u|i)"
+
+"a" "" "" "(a|o)"
+"e" "" "" "i"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_german.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_german.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_german.txt
new file mode 100644
index 0000000..14a5db7
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_german.txt
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+"I" "" "$" "i"
+"I" "[aeiAEIOUouQY]" "" "i"
+"I" "" "[^k]$" "i"
+"Ik" "[lr]" "$" "(ik|Qk)"
+"Ik" "" "$" "ik"
+"sIts" "" "$" "(sits|sQts)"
+"Its" "" "$" "its"
+"I" "" "" "(Q|i)" 
+    
+"AU" "" "" "(D|a|u)"
+"aU" "" "" "(D|a|u)"
+"Au" "" "" "(D|a|u)"
+"au" "" "" "(D|a|u)"
+"ou" "" "" "(D|o|u)"
+"OU" "" "" "(D|o|u)"
+"oU" "" "" "(D|o|u)"
+"Ou" "" "" "(D|o|u)"
+"ai" "" "" "(D|a|i)"
+"Ai" "" "" "(D|a|i)"
+"oi" "" "" "(D|o|i)"
+"Oi" "" "" "(D|o|i)"
+"ui" "" "" "(D|u|i)"
+"Ui" "" "" "(D|u|i)"
+        
+"e" "" "" "i" 
+  
+"E" "" "[fklmnprst]$" "i"
+"E" "" "ts$" "i"
+"E" "" "$" "i"
+"E" "[DaoAOUiuQY]" "" "i"
+"E" "" "[aoAOQY]" "i"
+"E" "" "" "(Y|i)" 
+       
+"O" "" "$" "o"
+"O" "" "[fklmnprst]$" "o"
+"O" "" "ts$" "o"
+"O" "[aoAOUeiuQY]" "" "o"
+"O" "" "" "(o|Y)"
+    
+"a" "" "" "(a|o)" 
+  
+"A" "" "$" "(a|o)" 
+"A" "" "[fklmnprst]$" "(a|o)"
+"A" "" "ts$" "(a|o)"
+"A" "[aoeOUiuQY]" "" "(a|o)"
+"A" "" "" "(a|o|Y)" 
+
+"U" "" "$" "u"
+"U" "[DaoiuUQY]" "" "u"
+"U" "" "[^k]$" "u"
+"Uk" "[lr]" "$" "(uk|Qk)"
+"Uk" "" "$" "uk"
+"sUts" "" "$" "(suts|sQts)"
+"Uts" "" "$" "uts"
+"U" "" "" "(u|Q)" 

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_greek.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_greek.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_greek.txt
new file mode 100644
index 0000000..b542861
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_greek.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include gen_approx_french
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_greeklatin.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_greeklatin.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_greeklatin.txt
new file mode 100644
index 0000000..e492b97
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_greeklatin.txt
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include gen_approx_french
+
+"N" "" "" ""

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_hebrew.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_hebrew.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_hebrew.txt
new file mode 100644
index 0000000..0990004
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_hebrew.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// empty
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_hungarian.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_hungarian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_hungarian.txt
new file mode 100644
index 0000000..46ebf29
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_hungarian.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include gen_approx_french

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_italian.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_italian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_italian.txt
new file mode 100644
index 0000000..46ebf29
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_italian.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include gen_approx_french

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_polish.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_polish.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_polish.txt
new file mode 100644
index 0000000..ce577af
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_polish.txt
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+"aiB" "" "[bp]" "(D|Dm)"
+"oiB" "" "[bp]" "(D|Dm)" 
+"uiB" "" "[bp]" "(D|Dm)" 
+"eiB" "" "[bp]" "(D|Dm)"
+"EiB" "" "[bp]" "(D|Dm)"
+"iiB" "" "[bp]" "(D|Dm)"
+"IiB" "" "[bp]" "(D|Dm)"
+
+"aiB" "" "[dgkstvz]" "(D|Dn)"
+"oiB" "" "[dgkstvz]" "(D|Dn)" 
+"uiB" "" "[dgkstvz]" "(D|Dn)" 
+"eiB" "" "[dgkstvz]" "(D|Dn)"
+"EiB" "" "[dgkstvz]" "(D|Dn)"
+"iiB" "" "[dgkstvz]" "(D|Dn)"
+"IiB" "" "[dgkstvz]" "(D|Dn)"
+
+"B" "" "[bp]" "(o|om|im)" 
+"B" "" "[dgkstvz]" "(o|on|in)" 
+"B" "" "" "o"
+
+"aiF" "" "[bp]" "(D|Dm)"
+"oiF" "" "[bp]" "(D|Dm)" 
+"uiF" "" "[bp]" "(D|Dm)" 
+"eiF" "" "[bp]" "(D|Dm)"
+"EiF" "" "[bp]" "(D|Dm)"
+"iiF" "" "[bp]" "(D|Dm)"
+"IiF" "" "[bp]" "(D|Dm)"
+
+"aiF" "" "[dgkstvz]" "(D|Dn)"
+"oiF" "" "[dgkstvz]" "(D|Dn)" 
+"uiF" "" "[dgkstvz]" "(D|Dn)" 
+"eiF" "" "[dgkstvz]" "(D|Dn)"
+"EiF" "" "[dgkstvz]" "(D|Dn)"
+"iiF" "" "[dgkstvz]" "(D|Dn)"
+"IiF" "" "[dgkstvz]" "(D|Dn)"
+
+"F" "" "[bp]" "(i|im|om)"
+"F" "" "[dgkstvz]" "(i|in|on)"
+"F" "" "" "i" 
+
+"P" "" "" "(o|u)" 
+
+"I" "" "$" "i"
+"I" "" "[^k]$" "i"
+"Ik" "[lr]" "$" "(ik|Qk)"
+"Ik" "" "$" "ik"
+"sIts" "" "$" "(sits|sQts)"
+"Its" "" "$" "its"
+"I" "[aeiAEBFIou]" "" "i"
+"I" "" "" "(i|Q)" 
+
+"au" "" "" "(D|a|u)"
+"ou" "" "" "(D|o|u)"
+"ai" "" "" "(D|a|i)"
+"oi" "" "" "(D|o|i)"
+"ui" "" "" "(D|u|i)"
+
+"a" "" "" "(a|o)" 
+"e" "" "" "i" 
+
+"E" "" "[fklmnprst]$" "i"
+"E" "" "ts$" "i"
+"E" "" "$" "i"
+"E" "[DaoiuQ]" "" "i"
+"E" "" "[aoQ]" "i"
+"E" "" "" "(Y|i)" 

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_portuguese.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_portuguese.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_portuguese.txt
new file mode 100644
index 0000000..b542861
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_portuguese.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include gen_approx_french
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_romanian.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_romanian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_romanian.txt
new file mode 100644
index 0000000..f5c5894
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_romanian.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include gen_approx_polish
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_russian.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_russian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_russian.txt
new file mode 100644
index 0000000..9138487
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_russian.txt
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// VOWELS
+"I" "" "$" "i"
+"I" "" "[^k]$" "i"
+"Ik" "[lr]" "$" "(ik|Qk)"
+"Ik" "" "$" "ik"
+"sIts" "" "$" "(sits|sQts)"
+"Its" "" "$" "its"
+"I" "[aeiEIou]" "" "i"
+"I" "" "" "(i|Q)" 
+        
+"au" "" "" "(D|a|u)"
+"ou" "" "" "(D|o|u)"
+"ai" "" "" "(D|a|i)"
+"oi" "" "" "(D|o|i)"
+"ui" "" "" "(D|u|i)"
+
+"om" "" "[bp]" "(om|im)" 
+"on" "" "[dgkstvz]" "(on|in)" 
+"em" "" "[bp]" "(im|om)" 
+"en" "" "[dgkstvz]" "(in|on)" 
+"Em" "" "[bp]" "(im|Ym|om)" 
+"En" "" "[dgkstvz]" "(in|Yn|on)" 
+                    
+"a" "" "" "(a|o)" 
+"e" "" "" "i" 
+    
+"E" "" "[fklmnprsStv]$" "i"
+"E" "" "ts$" "i"
+"E" "[DaoiuQ]" "" "i"
+"E" "" "[aoQ]" "i"
+"E" "" "" "(Y|i)" 

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_spanish.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_spanish.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_spanish.txt
new file mode 100644
index 0000000..fb3e661
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_spanish.txt
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include gen_approx_french
+
+"B" "" "" "(b|v)"
+"V" "" "" "(b|v)"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_turkish.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_turkish.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_turkish.txt
new file mode 100644
index 0000000..b542861
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_turkish.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include gen_approx_french
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_any.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_any.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_any.txt
new file mode 100644
index 0000000..28fafb9
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_any.txt
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// GENERAL
+   // A, E, I, O, P, U should create variants, 
+   // EE = final "e" (english & french)
+   // V, B from Spanish
+   // but a, e, i, o, u should not create any new variant
+"EE" "" "$" "e"
+    
+"A" "" "" "a"
+"E" "" "" "e"
+"I" "" "" "i"
+"O" "" "" "o"
+"P" "" "" "o"
+"U" "" "" "u"
+    
+"B" "" "" "b"
+"V" "" "" "v"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_approx_common.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_approx_common.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_approx_common.txt
new file mode 100644
index 0000000..1093912
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_approx_common.txt
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// GENERAL
+"h" "" "$" ""
+
+// VOICED - UNVOICED CONSONANTS
+"b" "" "[fktSs]" "p"
+"b" "" "p" ""
+"b" "" "$" "p"
+"p" "" "[vgdZz]" "b" // Ashk: "v" excluded (everythere)
+"p" "" "b" ""
+    
+"v" "" "[pktSs]" "f"
+"v" "" "f" ""
+"v" "" "$" "f"
+"f" "" "[vbgdZz]" "v"
+"f" "" "v" ""
+    
+"g" "" "[pftSs]" "k"
+"g" "" "k" ""
+"g" "" "$" "k"
+"k" "" "[vbdZz]" "g"
+"k" "" "g" ""
+    
+"d" "" "[pfkSs]" "t"
+"d" "" "t" ""
+"d" "" "$" "t"
+"t" "" "[vbgZz]" "d"
+"t" "" "d" ""
+    
+"s" "" "dZ" ""
+"s" "" "tS" ""
+    
+"z" "" "[pfkSt]" "s"
+"z" "" "[sSzZ]" ""
+"s" "" "[sSzZ]" ""
+"Z" "" "[sSzZ]" ""
+"S" "" "[sSzZ]" ""
+       
+// SIMPLIFICATION OF CONSONANT CLUSTERS
+"jnm" "" "" "jm"
+
+// DOUBLE --> SINGLE
+"ji" "^" "" "i"
+"jI" "^" "" "I"
+        
+"a" "" "[aA]" "" 
+"a" "A" "" "" 
+"A" "" "A" ""
+       
+"b" "" "b" ""
+"d" "" "d" ""
+"f" "" "f" ""
+"g" "" "g" ""
+"j" "" "j" ""
+"k" "" "k" ""
+"l" "" "l" ""
+"m" "" "m" ""
+"n" "" "n" ""
+"p" "" "p" ""
+"r" "" "r" ""
+"t" "" "t" ""
+"v" "" "v" ""
+"z" "" "z" ""

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_arabic.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_arabic.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_arabic.txt
new file mode 100644
index 0000000..4f2ead1
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_arabic.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"l" "" "" ""
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_common.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_common.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_common.txt
new file mode 100644
index 0000000..742fc71
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_common.txt
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include gen_exact_approx_common
+
+"H" "" "" ""
+ 
+// VOICED - UNVOICED CONSONANTS
+"s" "[^t]" "[bgZd]" "z"
+"Z" "" "[pfkst]" "S"
+"Z" "" "$" "S"
+"S" "" "[bgzd]" "Z"
+"z" "" "$" "s"
+    
+"ji" "[aAoOeEiIuU]" "" "j"
+"jI" "[aAoOeEiIuU]" "" "j"
+"je" "[aAoOeEiIuU]" "" "j"
+"jE" "[aAoOeEiIuU]" "" "j"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_cyrillic.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_cyrillic.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_cyrillic.txt
new file mode 100644
index 0000000..474f61b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_cyrillic.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include gen_exact_russian
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_czech.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_czech.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_czech.txt
new file mode 100644
index 0000000..474f61b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_czech.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include gen_exact_russian
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_dutch.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_dutch.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_dutch.txt
new file mode 100644
index 0000000..0990004
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_dutch.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// empty
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_english.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_english.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_english.txt
new file mode 100644
index 0000000..474f61b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_english.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include gen_exact_russian
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_french.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_french.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_french.txt
new file mode 100644
index 0000000..0990004
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_french.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// empty
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_german.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_german.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_german.txt
new file mode 100644
index 0000000..7a648f2
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_german.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include gen_exact_any
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_greek.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_greek.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_greek.txt
new file mode 100644
index 0000000..0990004
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_greek.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// empty
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_greeklatin.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_greeklatin.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_greeklatin.txt
new file mode 100644
index 0000000..325ff34
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_greeklatin.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"N" "" "" "n"
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_hebrew.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_hebrew.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_hebrew.txt
new file mode 100644
index 0000000..0990004
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_hebrew.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// empty
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_hungarian.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_hungarian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_hungarian.txt
new file mode 100644
index 0000000..0990004
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_hungarian.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// empty
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_italian.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_italian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_italian.txt
new file mode 100644
index 0000000..0990004
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_italian.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// empty
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_polish.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_polish.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_polish.txt
new file mode 100644
index 0000000..babed2a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_polish.txt
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"B" "" "" "a"
+"F" "" "" "e"
+"P" "" "" "o"
+
+"E" "" "" "e"
+"I" "" "" "i"
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_portuguese.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_portuguese.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_portuguese.txt
new file mode 100644
index 0000000..0990004
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_portuguese.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// empty
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_romanian.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_romanian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_romanian.txt
new file mode 100644
index 0000000..0990004
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_romanian.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// empty
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_russian.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_russian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_russian.txt
new file mode 100644
index 0000000..0a016e0
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_russian.txt
@@ -0,0 +1,19 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"E" "" "" "e"
+"I" "" "" "i"
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_spanish.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_spanish.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_spanish.txt
new file mode 100644
index 0000000..e555114
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_spanish.txt
@@ -0,0 +1,19 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"B" "" "" "b"
+"V" "" "" "v"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_turkish.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_turkish.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_turkish.txt
new file mode 100644
index 0000000..0990004
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_turkish.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// empty
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_hebrew_common.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_hebrew_common.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_hebrew_common.txt
new file mode 100644
index 0000000..2ae2d9d
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_hebrew_common.txt
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include gen_exact_approx_common
+
+"ts" "" "" "C" // for not confusion Gutes [=guts] and Guts [=guc]
+"tS" "" "" "C" // same reason
+"S" "" "" "s"
+"p" "" "" "f"   
+"b" "^" "" "b"    
+"b" "" "" "(b|v)"    
+        
+"ja" "" "" "i"
+"jA" "" "" "i"  
+"je" "" "" "i"
+"jE" "" "" "i"
+"aj" "" "" "i"
+"Aj" "" "" "i"
+"I" "" "" "i"
+"j" "" "" "i"
+    
+"a" "^" "" "1"
+"A" "^" "" "1"
+"e" "^" "" "1"
+"E" "^" "" "1"
+"Y" "^" "" "1"
+    
+"a" "" "$" "1"
+"A" "" "$" "1"
+"e" "" "$" "1"
+"E" "" "$" "1"
+"Y" "" "$" "1"
+    
+"a" "" "" ""
+"A" "" "" ""
+"e" "" "" ""
+"E" "" "" ""
+"Y" "" "" ""
+    
+"oj" "^" "" "(u|vi)"
+"Oj" "^" "" "(u|vi)"
+"uj" "^" "" "(u|vi)"
+"Uj" "^" "" "(u|vi)" 
+    
+"oj" "" "" "u"
+"Oj" "" "" "u"
+"uj" "" "" "u"
+"Uj" "" "" "u" 
+    
+"ou" "^" "" "(u|v|1)"
+"o" "^" "" "(u|v|1)"
+"O" "^" "" "(u|v|1)"
+"U" "^" "" "(u|v|1)"
+"u" "^" "" "(u|v|1)"
+    
+"o" "" "$" "(u|1)"
+"O" "" "$" "(u|1)"
+"u" "" "$" "(u|1)"
+"U" "" "$" "(u|1)"
+    
+"ou" "" "" "u"
+"o" "" "" "u"
+"O" "" "" "u"
+"U" "" "" "u"
+        
+"VV" "" "" "u" // alef/ayin + vov from ruleshebrew
+"V" "" "" "v" // tsvey-vov from ruleshebrew;; only Ashkenazic
+"L" "^" "" "1" // alef/ayin from  ruleshebrew
+"L" "" "$" "1" // alef/ayin from  ruleshebrew
+"L" "" "" " " // alef/ayin from  ruleshebrew
+"WW" "^" "" "(vi|u)" // vav-yod from  ruleshebrew
+"WW" "" "" "u" // vav-yod from  ruleshebrew
+"W" "^" "" "(u|v)" // vav from  ruleshebrew
+"W" "" "" "u" // vav from  ruleshebrew
+    
+    //"g" "" "" "(g|Z)"
+    //"z" "" "" "(z|Z)"
+    //"d" "" "" "(d|dZ)"
+   
+"TB" "" "$" "(t|s)" // tav from ruleshebrew; only Ashkenazic
+"TB" "" "" "t" // tav from ruleshebrew; only Ashkenazic    
+"T" "" "" "t"   // tet from  ruleshebrew
+    
+   //"k" "" "" "(k|x)"
+   //"x" "" "" "(k|x)"
+"K" "" "" "k" // kof and initial kaf from ruleshebrew
+"X" "" "" "x" // khet and final kaf from ruleshebrew
+    
+"H" "^" "" "(x|1)"
+"H" "" "$" "(x|1)"
+"H" "" "" "(x|)"
+"h" "^" "" "1"
+"h" "" "" ""
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_languages.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_languages.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_languages.txt
new file mode 100644
index 0000000..50f1118
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_languages.txt
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+any
+arabic
+cyrillic
+czech
+dutch
+english
+french
+german
+greek
+greeklatin
+hebrew
+hungarian
+italian
+polish
+portuguese
+romanian
+russian
+spanish
+turkish

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_any.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_any.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_any.txt
new file mode 100644
index 0000000..57bb939
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_any.txt
@@ -0,0 +1,367 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+  // format of each entry rule in the table
+  //   (pattern, left context, right context, phonetic)
+  // where
+  //   pattern is a sequence of characters that might appear in the word to be transliterated
+  //   left context is the context that precedes the pattern
+  //   right context is the context that follows the pattern
+  //   phonetic is the result that this rule generates
+  //
+  // note that both left context and right context can be regular expressions
+  // ex: left context of ^ would mean start of word
+  //     left context of [aeiouy] means following a vowel
+  //     right context of [^aeiouy] means preceding a consonant
+  //     right context of e$ means preceding a final e
+
+//GENERIC
+
+// CONVERTING FEMININE TO MASCULINE
+"yna" "" "$" "(in[russian]|ina)" 
+"ina" "" "$" "(in[russian]|ina)" 
+"liova" "" "$" "(lova|lof[russian]|lef[russian])"
+"lova" "" "$" "(lova|lof[russian]|lef[russian]|l[czech]|el[czech])"   
+"kova" "" "$" "(kova|kof[russian]|k[czech]|ek[czech])"   
+"ova" "" "$" "(ova|of[russian]|[czech])"   
+"ová" "" "$" "(ova|[czech])"   
+"eva" "" "$" "(eva|ef[russian])"   
+"aia" "" "$" "(aja|i[russian])"
+"aja" "" "$" "(aja|i[russian])" 
+"aya" "" "$" "(aja|i[russian])" 
+    
+"lowa" "" "$" "(lova|lof[polish]|l[polish]|el[polish])"   
+"kowa" "" "$" "(kova|kof[polish]|k[polish]|ek[polish])"   
+"owa" "" "$" "(ova|of[polish]|)"   
+"lowna" "" "$" "(lovna|levna|l[polish]|el[polish])" 
+"kowna" "" "$" "(kovna|k[polish]|ek[polish])"  
+"owna" "" "$" "(ovna|[polish])"  
+"lówna" "" "$" "(l|el)"  // polish
+"kówna" "" "$" "(k|ek)"  // polish
+"ówna" "" "$" ""         // polish
+"á" "" "$" "(a|i[czech])" 
+"a" "" "$" "(a|i[polish+czech])" 
+    
+// CONSONANTS
+"pf" "" "" "(pf|p|f)" 
+"que" "" "$" "(k[french]|ke|kve)"
+"qu" "" "" "(kv|k)" 
+ 
+"m" "" "[bfpv]" "(m|n)" 
+"m" "[aeiouy]" "[aeiouy]" "m"  
+"m" "[aeiouy]" "" "(m|n[french+portuguese])"  // nasal
+ 
+"ly" "" "[au]" "l" 
+"li" "" "[au]" "l" 
+"lio" "" "" "(lo|le[russian])" 
+"lyo" "" "" "(lo|le[russian])" 
+  //array("ll" "" "" "(l|J[spanish])"  // Disabled Argentinian rule
+"lt" "u" "$" "(lt|[french])" 
+    
+"v" "^" "" "(v|f[german]|b[spanish])" 
+
+"ex" "" "[aáuiíoóeéêy]" "(ez[portuguese]|eS[portuguese]|eks|egz)" 
+"ex" "" "[cs]" "(e[portuguese]|ek)" 
+"x" "u" "$" "(ks|[french])" 
+   
+"ck" "" "" "(k|tsk[polish+czech])"
+"cz" "" "" "(tS|tsz[czech])" // Polish
+   
+    //Proceccing of "h" in various combinations         
+"rh" "^" "" "r"
+"dh" "^" "" "d"
+"bh" "^" "" "b"
+     
+"ph" "" "" "(ph|f)"
+"kh" "" "" "(x[russian+english]|kh)"  
+  
+"lh" "" "" "(lh|l[portuguese])" 
+"nh" "" "" "(nh|nj[portuguese])" 
+        
+"ssch" "" "" "S"      // german
+"chsch" "" "" "xS"    // german
+"tsch" "" "" "tS"     // german 
+    
+    ///"desch" "^" "" "deS" 
+    ///"desh" "^" "" "(dES|de[french])" 
+    ///"des" "^" "[^aeiouy]" "(dEs|de[french])" 
+    
+"sch" "[aeiouy]" "[ei]" "(S|StS[russian]|sk[romanian+italian])" 
+"sch" "[aeiouy]" "" "(S|StS[russian])" 
+"sch" "" "[ei]" "(sk[romanian+italian]|S|StS[russian])"
+"sch" "" "" "(S|StS[russian])"
+"ssh" "" "" "S" 
+    
+"sh" "" "[äöü]" "sh"      // german 
+"sh" "" "[aeiou]" "(S[russian+english]|sh)"
+"sh" "" "" "S" 
+ 
+"zh" "" "" "(Z[english+russian]|zh|tsh[german])" 
+    
+"chs" "" "" "(ks[german]|xs|tSs[russian+english])" 
+"ch" "" "[ei]" "(x|tS[spanish+english+russian]|k[romanian+italian]|S[portuguese+french])" 
+"ch" "" "" "(x|tS[spanish+english+russian]|S[portuguese+french])"  
+ 
+"th" "^" "" "t"     // english+german+greeklatin
+"th" "" "[äöüaeiou]" "(t[english+german+greeklatin]|th)"
+"th" "" "" "t"  // english+german+greeklatin
+   
+"gh" "" "[ei]" "(g[romanian+italian+greeklatin]|gh)" 
+          
+"ouh" "" "[aioe]" "(v[french]|uh)"
+"uh" "" "[aioe]" "(v|uh)"
+"h" "." "$" "" // match h at the end of words, but not as a single letter
+"h" "[aeiouyäöü]" "" ""  // german
+"h" "^" "" "(h|x[romanian+greeklatin]|H[english+romanian+polish+french+portuguese+italian+spanish])" 
+         
+    //Processing of "ci" "ce" & "cy"
+"cia" "" "" "(tSa[polish]|tsa)"  // Polish
+"cią" "" "[bp]" "(tSom|tsom)"     // Polish
+"cią" "" "" "(tSon[polish]|tson)" // Polish
+"cię" "" "[bp]" "(tSem[polish]|tsem)" // Polish
+"cię" "" "" "(tSen[polish]|tsen)" // Polish
+"cie" "" "" "(tSe[polish]|tse)"  // Polish
+"cio" "" "" "(tSo[polish]|tso)"  // Polish
+"ciu" "" "" "(tSu[polish]|tsu)" // Polish
+
+"sci" "" "$" "(Si[italian]|stsi[polish+czech]|dZi[turkish]|tSi[polish+romanian]|tS[romanian]|si)" 
+"sc" "" "[ei]" "(S[italian]|sts[polish+czech]|dZ[turkish]|tS[polish+romanian]|s)" 
+"ci" "" "$" "(tsi[polish+czech]|dZi[turkish]|tSi[polish+romanian]|tS[romanian]|si)" 
+"cy" "" "" "(si|tsi[polish])" 
+"c" "" "[ei]" "(ts[polish+czech]|dZ[turkish]|tS[polish+romanian]|k[greeklatin]|s)" 
+      
+    //Processing of "s"      
+"sç" "" "[aeiou]" "(s|stS[turkish])"
+"ssz" "" "" "S" // polish
+"sz" "^" "" "(S|s[hungarian])" // polish
+"sz" "" "$" "(S|s[hungarian])" // polish
+"sz" "" "" "(S|s[hungarian]|sts[german])" // polish
+"ssp" "" "" "(Sp[german]|sp)"
+"sp" "" "" "(Sp[german]|sp)"
+"sst" "" "" "(St[german]|st)"
+"st" "" "" "(St[german]|st)" 
+"ss" "" "" "s"
+"sj" "^" "" "S" // dutch
+"sj" "" "$" "S" // dutch
+"sj" "" "" "(sj|S[dutch]|sx[spanish]|sZ[romanian+turkish])" 
+  
+"sia" "" "" "(Sa[polish]|sa[polish]|sja)" 
+"sią" "" "[bp]" "(Som[polish]|som)" // polish
+"sią" "" "" "(Son[polish]|son)" // polish
+"się" "" "[bp]" "(Sem[polish]|sem)" // polish
+"się" "" "" "(Sen[polish]|sen)" // polish
+"sie" "" "" "(se|sje|Se[polish]|zi[german])" 
+    
+"sio" "" "" "(So[polish]|so)" 
+"siu" "" "" "(Su[polish]|sju)" 
+     
+"si" "[äöëaáuiíoóeéêy]" "" "(Si[polish]|si|zi[portuguese+french+italian+german])"
+"si" "" "" "(Si[polish]|si|zi[german])"
+"s" "[aáuiíoóeéêy]" "[aáuíoóeéêy]" "(s|z[portuguese+french+italian+german])" 
+"s" "" "[aeouäöë]" "(s|z[german])"
+"s" "[aeiouy]" "[dglmnrv]" "(s|z|Z[portuguese]|[french])" // Groslot
+"s" "" "[dglmnrv]" "(s|z|Z[portuguese])" 
+                 
+    //Processing of "g"   
+"gue" "" "$" "(k[french]|gve)"  // portuguese+spanish
+"gu" "" "[ei]" "(g[french]|gv[portuguese+spanish])" // portuguese+spanish
+"gu" "" "[ao]" "gv"     // portuguese+spanish
+"guy" "" "" "gi"  // french
+    
+"gli" "" "" "(glI|l[italian])" 
+"gni" "" "" "(gnI|ni[italian+french])"
+"gn" "" "[aeou]" "(n[italian+french]|nj[italian+french]|gn)"
+    
+"ggie" "" "" "(je[greeklatin]|dZe)" // dZ is Italian
+"ggi" "" "[aou]" "(j[greeklatin]|dZ)" // dZ is Italian
+        
+"ggi" "[yaeiou]" "[aou]" "(gI|dZ[italian]|j[greeklatin])"  
+"gge" "[yaeiou]" "" "(gE|xe[spanish]|gZe[portuguese+french]|dZe[english+romanian+italian+spanish]|je[greeklatin])" 
+"ggi" "[yaeiou]" "" "(gI|xi[spanish]|gZi[portuguese+french]|dZi[english+romanian+italian+spanish]|i[greeklatin])" 
+"ggi" "" "[aou]" "(gI|dZ[italian]|j[greeklatin])" 
+    
+"gie" "" "$" "(ge|gi[german]|ji[french]|dZe[italian])" 
+"gie" "" "" "(ge|gi[german]|dZe[italian]|je[greeklatin])" 
+"gi" "" "[aou]" "(i[greeklatin]|dZ)" // dZ is Italian
+        
+"ge" "[yaeiou]" "" "(gE|xe[spanish]|Ze[portuguese+french]|dZe[english+romanian+italian+spanish])" 
+"gi" "[yaeiou]" "" "(gI|xi[spanish]|Zi[portuguese+french]|dZi[english+romanian+italian+spanish])" 
+"ge" "" "" "(gE|xe[spanish]|hE[russian]|je[greeklatin]|Ze[portuguese+french]|dZe[english+romanian+italian+spanish])" 
+"gi" "" "" "(gI|xi[spanish]|hI[russian]|i[greeklatin]|Zi[portuguese+french]|dZi[english+romanian+italian+spanish])" 
+"gy" "" "[aeouáéóúüöőű]" "(gi|dj[hungarian])"
+"gy" "" "" "(gi|d[hungarian])" 
+"g" "[yaeiou]" "[aouyei]" "g" 
+"g" "" "[aouei]" "(g|h[russian])" 
+    
+    //Processing of "j"        
+"ij" "" "" "(i|ej[dutch]|ix[spanish]|iZ[french+romanian+turkish+portuguese])" 
+"j" "" "[aoeiuy]" "(j|dZ[english]|x[spanish]|Z[french+romanian+turkish+portuguese])" 
+         
+    //Processing of "z"    
+"rz" "t" "" "(S[polish]|r)" // polish
+"rz" "" "" "(rz|rts[german]|Z[polish]|r[polish]|rZ[polish])" 
+        
+"tz" "" "$" "(ts|tS[english+german])" 
+"tz" "^" "" "(ts[english+german+russian]|tS[english+german])" 
+"tz" "" "" "(ts[english+german+russian]|tz)" 
+    
+"zia" "" "[bcdgkpstwzż]" "(Za[polish]|za[polish]|zja)" 
+"zia" "" "" "(Za[polish]|zja)" 
+"zią" "" "[bp]" "(Zom[polish]|zom)"  // polish
+"zią" "" "" "(Zon[polish]|zon)" // polish
+"zię" "" "[bp]" "(Zem[polish]|zem)" // polish
+"zię" "" "" "(Zen[polish]|zen)" // polish
+"zie" "" "[bcdgkpstwzż]" "(Ze[polish]|ze[polish]|ze|tsi[german])" 
+"zie" "" "" "(ze|Ze[polish]|tsi[german])" 
+"zio" "" "" "(Zo[polish]|zo)" 
+"ziu" "" "" "(Zu[polish]|zju)" 
+"zi" "" "" "(Zi[polish]|zi|tsi[german]|dzi[italian]|tsi[italian]|si[spanish])" 
+
+"z" "" "$" "(s|ts[german]|ts[italian]|S[portuguese])" // ts It, s/S/Z Port, s in Sp, z Fr
+"z" "" "[bdgv]" "(z|dz[italian]|Z[portuguese])" // dz It, Z/z Port, z Sp & Fr
+"z" "" "[ptckf]" "(s|ts[italian]|S[portuguese])" // ts It, s/S/z Port, z/s Sp
+              
+ // VOWELS  
+"aue" "" "" "aue" 
+"oue" "" "" "(oue|ve[french])" 
+"eau" "" "" "o" // French
+        
+"ae" "" "" "(Y[german]|aje[russian]|ae)" 
+"ai" "" "" "aj" 
+"au" "" "" "(au|o[french])" 
+"ay" "" "" "aj" 
+"ão" "" "" "(au|an)" // Port
+"ãe" "" "" "(aj|an)" // Port
+"ãi" "" "" "(aj|an)" // Port
+"ea" "" "" "(ea|ja[romanian])"
+"ee" "" "" "(i[english]|aje[russian]|e)" 
+"ei" "" "" "(aj|ej)"
+"eu" "" "" "(eu|Yj[german]|ej[german]|oj[german]|Y[dutch])"
+"ey" "" "" "(aj|ej)"
+"ia" "" "" "ja" 
+"ie" "" "" "(i[german]|e[polish]|ije[russian]|Q[dutch]|je)" 
+"ii" "" "$" "i" // russian
+"io" "" "" "(jo|e[russian])"
+"iu" "" "" "ju" 
+"iy" "" "$" "i" // russian
+"oe" "" "" "(Y[german]|oje[russian]|u[dutch]|oe)" 
+"oi" "" "" "oj" 
+"oo" "" "" "(u[english]|o)" 
+"ou" "" "" "(ou|u[french+greeklatin]|au[dutch])" 
+"où" "" "" "u" // french
+"oy" "" "" "oj" 
+"õe" "" "" "(oj|on)" // Port
+"ua" "" "" "va"
+"ue" "" "" "(Q[german]|uje[russian]|ve)" 
+"ui" "" "" "(uj|vi|Y[dutch])" 
+"uu" "" "" "(u|Q[dutch])" 
+"uo" "" "" "(vo|o)"
+"uy" "" "" "uj" 
+"ya" "" "" "ja" 
+"ye" "" "" "(je|ije[russian])"
+"yi" "^" "" "i"
+"yi" "" "$" "i" // russian
+"yo" "" "" "(jo|e[russian])"
+"yu" "" "" "ju" 
+"yy" "" "$" "i" // russian
+    
+"i" "[áóéê]" "" "j"
+"y" "[áóéê]" "" "j"
+         
+"e" "^" "" "(e|je[russian])" 
+"e" "" "$" "(e|EE[english+french])" 
+            
+// LANGUAGE SPECIFIC CHARACTERS 
+"ą" "" "[bp]" "om" // polish
+"ą" "" "" "on"  // polish
+"ä" "" "" "Y" 
+"á" "" "" "a" // Port & Sp
+"à" "" "" "a" 
+"â" "" "" "a" 
+"ã" "" "" "(a|an)" // Port
+"ă" "" "" "(e[romanian]|a)" // romanian
+"č" "" "" "tS" // czech
+"ć" "" "" "(tS[polish]|ts)"  // polish
+"ç" "" "" "(s|tS[turkish])"
+"ď" "" "" "(d|dj[czech])"
+"ę" "" "[bp]" "em" // polish
+"ę" "" "" "en" // polish
+"é" "" "" "e" 
+"è" "" "" "e" 
+"ê" "" "" "e" 
+"ě" "" "" "(e|je[czech])" 
+"ğ" "" "" "" // turkish
+"í" "" "" "i" 
+"î" "" "" "i" 
+"ı" "" "" "(i|e[turkish]|[turkish])" 
+"ł" "" "" "l" 
+"ń" "" "" "(n|nj[polish])" // polish
+"ñ" "" "" "(n|nj[spanish])" 
+"ó" "" "" "(u[polish]|o)"  
+"ô" "" "" "o" // Port & Fr
+"õ" "" "" "(o|on[portuguese]|Y[hungarian])" 
+"ò" "" "" "o"  // Sp & It
+"ö" "" "" "Y"
+"ř" "" "" "(r|rZ[czech])"
+"ś" "" "" "(S[polish]|s)" 
+"ş" "" "" "S" // romanian+turkish
+"š" "" "" "S" // czech
+"ţ" "" "" "ts"  // romanian
+"ť" "" "" "(t|tj[czech])"
+"ű" "" "" "Q" // hungarian
+"ü" "" "" "(Q|u[portuguese+spanish])"
+"ú" "" "" "u" 
+"ů" "" "" "u" // czech
+"ù" "" "" "u" // french
+"ý" "" "" "i"  // czech
+"ż" "" "" "Z" // polish
+"ź" "" "" "(Z[polish]|z)" 
+   
+"ß" "" "" "s" // german
+"'" "" "" "" // russian
+"\"" "" "" "" // russian
+ 
+"o" "" "[bcćdgklłmnńrsśtwzźż]" "(O|P[polish])"    
+    
+ // LATIN ALPHABET
+"a" "" "" "A"
+"b" "" "" "B" 
+"c" "" "" "(k|ts[polish+czech]|dZ[turkish])" 
+"d" "" "" "d"
+"e" "" "" "E"
+"f" "" "" "f"
+   //array("g" "" "" "(g|x[dutch])" // Dutch sound disabled
+"g" "" "" "g"
+"h" "" "" "(h|x[romanian]|H[french+portuguese+italian+spanish])" 
+"i" "" "" "I"
+"j" "" "" "(j|x[spanish]|Z[french+romanian+turkish+portuguese])" 
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "O"
+"p" "" "" "p"
+"q" "" "" "k"
+"r" "" "" "r"
+"s" "" "" "(s|S[portuguese])" 
+"t" "" "" "t"
+"u" "" "" "U"
+"v" "" "" "V" 
+"w" "" "" "(v|w[english+dutch])"     
+"x" "" "" "(ks|gz|S[portuguese+spanish])"   // S/ks Port & Sp, gz Sp, It only ks
+"y" "" "" "i"
+"z" "" "" "(z|ts[german]|dz[italian]|ts[italian]|s[spanish])" // ts/dz It, z Port & Fr, z/s Sp

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_arabic.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_arabic.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_arabic.txt
new file mode 100644
index 0000000..00f85e8
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_arabic.txt
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+"ا" "" "" "a" // alif isol & init 
+                
+"ب" "" "" "b1" // ba' isol
+        
+"ت" "" "" "t1" // ta' isol
+        
+"ث" "" "" "t1" // tha' isol
+
+"ج" "" "" "(dZ1|Z1)" // jim isol
+        
+"ح" "" "" "(h1|1)" // h.a' isol
+    
+"خ" "" "" "x1" // kha' isol
+    
+"د" "" "" "d1" // dal isol & init
+           
+"ذ" "" "" "d1" // dhal isol & init
+        
+"ر" "" "" "r1" // dhal isol & init
+    
+"ز" "" "" "z1" // za' isol & init
+        
+"س" "" "" "s1" // sin isol
+    
+"ش" "" "" "S1" // shin isol
+    
+"ص" "" "" "s1" // s.ad isol
+    
+"ض" "" "" "d1" // d.ad isol
+        
+"ط" "" "" "t1" // t.a' isol
+        
+"ظ" "" "" "z1" // z.a' isol
+        
+"ع" "" "" "(h1|1)" // ayin isol 
+    
+"غ" "" "" "g1" // ghayin isol
+    
+"ف" "" "" "f1" // fa' isol
+    
+"ق" "" "" "k1" // qaf isol
+    
+"ك" "" "" "k1" // kaf isol
+    
+"ل" "" "" "l1" // lam isol
+    
+"م" "" "" "m1" // mim isol
+    
+"ن" "" "" "n1" // nun isol
+    
+"ه" "" "" "(h1|1)" // h isol
+        
+"و" "" "" "(u|v1)" // waw, isol + init
+               
+    
+"ي‎" "" "" "(i|j1)" // ya' isol

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_cyrillic.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_cyrillic.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_cyrillic.txt
new file mode 100644
index 0000000..6237de4
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_cyrillic.txt
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// GENERAL
+"ця" "" "" "tsa"
+"цю" "" "" "tsu"
+"циа" "" "" "tsa"
+"цие" "" "" "tse"
+"цио" "" "" "tso"
+"циу" "" "" "tsu"
+"сие" "" "" "se"
+"сио" "" "" "so"
+"зие" "" "" "ze"
+"зио" "" "" "zo"
+"с" "" "с" ""
+
+"гауз" "" "$" "haus"
+"гаус" "" "$" "haus"
+"гольц" "" "$" "holts"
+"геймер" "" "$" "(hejmer|hajmer)"
+"гейм" "" "$" "(hejm|hajm)"
+"гоф" "" "$" "hof"
+"гер" "" "$" "ger"
+"ген" "" "$" "gen"
+"гин" "" "$" "gin"
+"г" "(й|ё|я|ю|ы|а|е|о|и|у)" "(а|е|о|и|у)" "g"
+"г" "" "(а|е|о|и|у)" "(g|h)"
+
+"ля" "" "" "la"
+"лю" "" "" "lu"
+"лё" "" "" "(le|lo)"
+"лио" "" "" "(le|lo)"
+"ле" "" "" "(lE|lo)"
+
+"ийе" "" "" "je"
+"ие" "" "" "je"
+"ыйе" "" "" "je"
+"ые" "" "" "je"
+"ий" "" "(а|о|у)" "j"
+"ый" "" "(а|о|у)" "j"
+"ий" "" "$" "i"
+"ый" "" "$" "i"
+
+"ей" "^" "" "(jej|ej)"
+"е" "(а|е|о|у)" "" "je"
+"е" "^" "" "je"
+"эй" "" "" "ej"
+"ей" "" "" "ej"
+
+"ауе" "" "" "aue"
+"ауэ" "" "" "aue"
+
+"а" "" "" "a"
+"б" "" "" "b"
+"в" "" "" "v"
+"г" "" "" "g"
+"д" "" "" "d"
+"е" "" "" "E"
+"ё" "" "" "(e|jo)"
+"ж" "" "" "Z"
+"з" "" "" "z"
+"и" "" "" "I"
+"й" "" "" "j"
+"к" "" "" "k"
+"л" "" "" "l"
+"м" "" "" "m"
+"н" "" "" "n"
+"о" "" "" "o"
+"п" "" "" "p"
+"р" "" "" "r"
+"с" "" "" "s"
+"т" "" "" "t"
+"у" "" "" "u"
+"ф" "" "" "f"
+"х" "" "" "x"
+"ц" "" "" "ts"
+"ч" "" "" "tS"
+"ш" "" "" "S"
+"щ" "" "" "StS"
+"ъ" "" "" ""
+"ы" "" "" "I"
+"ь" "" "" ""
+"э" "" "" "E"
+"ю" "" "" "ju"
+"я" "" "" "ja"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_czech.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_czech.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_czech.txt
new file mode 100644
index 0000000..bc7a79c
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_czech.txt
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"ch" "" "" "x"
+"qu" "" "" "(k|kv)"    
+"aue" "" "" "aue"
+"ei" "" "" "(ej|aj)"
+"i" "[aou]" "" "j"
+"i" "" "[aeou]" "j"
+
+"č" "" "" "tS"
+"š" "" "" "S"
+"ň" "" "" "n"
+"ť" "" "" "(t|tj)"
+"ď" "" "" "(d|dj)"
+"ř" "" "" "(r|rZ)"
+
+"á" "" "" "a"
+"é" "" "" "e"
+"í" "" "" "i"
+"ó" "" "" "o"
+"ú" "" "" "u"
+"ý" "" "" "i"
+"ě" "" "" "(e|je)"
+"ů" "" "" "u"
+
+// LATIN ALPHABET
+"a" "" "" "a"
+"b" "" "" "b"
+"c" "" "" "ts"
+"d" "" "" "d"
+"e" "" "" "E"
+"f" "" "" "f"
+"g" "" "" "g"
+"h" "" "" "(h|g)"
+"i" "" "" "I"
+"j" "" "" "j"
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "o"
+"p" "" "" "p"
+"q" "" "" "(k|kv)"    
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t"
+"u" "" "" "u"
+"v" "" "" "v"
+"w" "" "" "v"    
+"x" "" "" "ks"    
+"y" "" "" "i"
+"z" "" "" "z" 


Mime
View raw message