commons-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ggreg...@apache.org
Subject svn commit: r1151311 [4/5] - in /commons/proper/codec/trunk: ./ src/java/org/apache/commons/codec/language/bm/ src/resources/ src/resources/org/ src/resources/org/apache/ src/resources/org/apache/commons/ src/resources/org/apache/commons/codec/ src/res...
Date Wed, 27 Jul 2011 02:29:35 GMT
Added: commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_any.txt
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_any.txt?rev=1151311&view=auto
==============================================================================
--- commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_any.txt (added)
+++ commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_any.txt Wed Jul 27 02:29:11 2011
@@ -0,0 +1,367 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+  // format of each entry rule in the table
+  //   (pattern, left context, right context, phonetic)
+  // where
+  //   pattern is a sequence of characters that might appear in the word to be transliterated
+  //   left context is the context that precedes the pattern
+  //   right context is the context that follows the pattern
+  //   phonetic is the result that this rule generates
+  //
+  // note that both left context and right context can be regular expressions
+  // ex: left context of ^ would mean start of word
+  //     left context of [aeiouy] means following a vowel
+  //     right context of [^aeiouy] means preceding a consonant
+  //     right context of e$ means preceding a final e
+
+//GENERIC
+
+// CONVERTING FEMININE TO MASCULINE
+"yna" "" "$" "(in[russian]|ina)" 
+"ina" "" "$" "(in[russian]|ina)" 
+"liova" "" "$" "(lova|lof[russian]|lef[russian])"
+"lova" "" "$" "(lova|lof[russian]|lef[russian]|l[czech]|el[czech])"   
+"kova" "" "$" "(kova|kof[russian]|k[czech]|ek[czech])"   
+"ova" "" "$" "(ova|of[russian]|[czech])"   
+"ová" "" "$" "(ova|[czech])"   
+"eva" "" "$" "(eva|ef[russian])"   
+"aia" "" "$" "(aja|i[russian])"
+"aja" "" "$" "(aja|i[russian])" 
+"aya" "" "$" "(aja|i[russian])" 
+    
+"lowa" "" "$" "(lova|lof[polish]|l[polish]|el[polish])"   
+"kowa" "" "$" "(kova|kof[polish]|k[polish]|ek[polish])"   
+"owa" "" "$" "(ova|of[polish]|)"   
+"lowna" "" "$" "(lovna|levna|l[polish]|el[polish])" 
+"kowna" "" "$" "(kovna|k[polish]|ek[polish])"  
+"owna" "" "$" "(ovna|[polish])"  
+"lówna" "" "$" "(l|el)"  // polish
+"kówna" "" "$" "(k|ek)"  // polish
+"ówna" "" "$" ""         // polish
+"á" "" "$" "(a|i[czech])" 
+"a" "" "$" "(a|i[polish+czech])" 
+    
+// CONSONANTS
+"pf" "" "" "(pf|p|f)" 
+"que" "" "$" "(k[french]|ke|kve)"
+"qu" "" "" "(kv|k)" 
+ 
+"m" "" "[bfpv]" "(m|n)" 
+"m" "[aeiouy]" "[aeiouy]" "m"  
+"m" "[aeiouy]" "" "(m|n[french+portuguese])"  // nasal
+ 
+"ly" "" "[au]" "l" 
+"li" "" "[au]" "l" 
+"lio" "" "" "(lo|le[russian])" 
+"lyo" "" "" "(lo|le[russian])" 
+  //array("ll" "" "" "(l|J[spanish])"  // Disabled Argentinian rule
+"lt" "u" "$" "(lt|[french])" 
+    
+"v" "^" "" "(v|f[german]|b[spanish])" 
+
+"ex" "" "[aáuiíoóeéêy]" "(ez[portuguese]|eS[portuguese]|eks|egz)" 
+"ex" "" "[cs]" "(e[portuguese]|ek)" 
+"x" "u" "$" "(ks|[french])" 
+   
+"ck" "" "" "(k|tsk[polish+czech])"
+"cz" "" "" "(tS|tsz[czech])" // Polish
+   
+    //Proceccing of "h" in various combinations         
+"rh" "^" "" "r"
+"dh" "^" "" "d"
+"bh" "^" "" "b"
+     
+"ph" "" "" "(ph|f)"
+"kh" "" "" "(x[russian+english]|kh)"  
+  
+"lh" "" "" "(lh|l[portuguese])" 
+"nh" "" "" "(nh|nj[portuguese])" 
+        
+"ssch" "" "" "S"      // german
+"chsch" "" "" "xS"    // german
+"tsch" "" "" "tS"     // german 
+    
+    ///"desch" "^" "" "deS" 
+    ///"desh" "^" "" "(dES|de[french])" 
+    ///"des" "^" "[^aeiouy]" "(dEs|de[french])" 
+    
+"sch" "[aeiouy]" "[ei]" "(S|StS[russian]|sk[romanian+italian])" 
+"sch" "[aeiouy]" "" "(S|StS[russian])" 
+"sch" "" "[ei]" "(sk[romanian+italian]|S|StS[russian])"
+"sch" "" "" "(S|StS[russian])"
+"ssh" "" "" "S" 
+    
+"sh" "" "[äöü]" "sh"      // german 
+"sh" "" "[aeiou]" "(S[russian+english]|sh)"
+"sh" "" "" "S" 
+ 
+"zh" "" "" "(Z[english+russian]|zh|tsh[german])" 
+    
+"chs" "" "" "(ks[german]|xs|tSs[russian+english])" 
+"ch" "" "[ei]" "(x|tS[spanish+english+russian]|k[romanian+italian]|S[portuguese+french])" 
+"ch" "" "" "(x|tS[spanish+english+russian]|S[portuguese+french])"  
+ 
+"th" "^" "" "t"     // english+german+greeklatin
+"th" "" "[äöüaeiou]" "(t[english+german+greeklatin]|th)"
+"th" "" "" "t"  // english+german+greeklatin
+   
+"gh" "" "[ei]" "(g[romanian+italian+greeklatin]|gh)" 
+          
+"ouh" "" "[aioe]" "(v[french]|uh)"
+"uh" "" "[aioe]" "(v|uh)" 
+"h" "" "$" "" 
+"h" "[aeiouyäöü]" "" ""  // german
+"h" "^" "" "(h|x[romanian+greeklatin]|H[english+romanian+polish+french+portuguese+italian+spanish])" 
+         
+    //Processing of "ci" "ce" & "cy"
+"cia" "" "" "(tSa[polish]|tsa)"  // Polish
+"cią" "" "[bp]" "(tSom|tsom)"     // Polish
+"cią" "" "" "(tSon[polish]|tson)" // Polish
+"cię" "" "[bp]" "(tSem[polish]|tsem)" // Polish
+"cię" "" "" "(tSen[polish]|tsen)" // Polish
+"cie" "" "" "(tSe[polish]|tse)"  // Polish
+"cio" "" "" "(tSo[polish]|tso)"  // Polish
+"ciu" "" "" "(tSu[polish]|tsu)" // Polish
+
+"sci" "" "$" "(Si[italian]|stsi[polish+czech]|dZi[turkish]|tSi[polish+romanian]|tS[romanian]|si)" 
+"sc" "" "[ei]" "(S[italian]|sts[polish+czech]|dZ[turkish]|tS[polish+romanian]|s)" 
+"ci" "" "$" "(tsi[polish+czech]|dZi[turkish]|tSi[polish+romanian]|tS[romanian]|si)" 
+"cy" "" "" "(si|tsi[polish])" 
+"c" "" "[ei]" "(ts[polish+czech]|dZ[turkish]|tS[polish+romanian]|k[greeklatin]|s)" 
+      
+    //Processing of "s"      
+"sç" "" "[aeiou]" "(s|stS[turkish])"
+"ssz" "" "" "S" // polish
+"sz" "^" "" "(S|s[hungarian])" // polish
+"sz" "" "$" "(S|s[hungarian])" // polish
+"sz" "" "" "(S|s[hungarian]|sts[german])" // polish
+"ssp" "" "" "(Sp[german]|sp)"
+"sp" "" "" "(Sp[german]|sp)"
+"sst" "" "" "(St[german]|st)"
+"st" "" "" "(St[german]|st)" 
+"ss" "" "" "s"
+"sj" "^" "" "S" // dutch
+"sj" "" "$" "S" // dutch
+"sj" "" "" "(sj|S[dutch]|sx[spanish]|sZ[romanian+turkish])" 
+  
+"sia" "" "" "(Sa[polish]|sa[polish]|sja)" 
+"sią" "" "[bp]" "(Som[polish]|som)" // polish
+"sią" "" "" "(Son[polish]|son)" // polish
+"się" "" "[bp]" "(Sem[polish]|sem)" // polish
+"się" "" "" "(Sen[polish]|sen)" // polish
+"sie" "" "" "(se|sje|Se[polish]|zi[german])" 
+    
+"sio" "" "" "(So[polish]|so)" 
+"siu" "" "" "(Su[polish]|sju)" 
+     
+"si" "[äöëaáuiíoóeéêy]" "" "(Si[polish]|si|zi[portuguese+french+italian+german])"
+"si" "" "" "(Si[polish]|si|zi[german])"
+"s" "[aáuiíoóeéêy]" "[aáuíoóeéêy]" "(s|z[portuguese+french+italian+german])" 
+"s" "" "[aeouäöë]" "(s|z[german])"
+"s" "[aeiouy]" "[dglmnrv]" "(s|z|Z[portuguese]|[french])" // Groslot
+"s" "" "[dglmnrv]" "(s|z|Z[portuguese])" 
+                 
+    //Processing of "g"   
+"gue" "" "$" "(k[french]|gve)"  // portuguese+spanish
+"gu" "" "[ei]" "(g[french]|gv[portuguese+spanish])" // portuguese+spanish
+"gu" "" "[ao]" "gv"     // portuguese+spanish
+"guy" "" "" "gi"  // french
+    
+"gli" "" "" "(glI|l[italian])" 
+"gni" "" "" "(gnI|ni[italian+french)])"
+"gn" "" "[aeou]" "(n[italian+french)]|nj[italian+french]|gn)" 
+    
+"ggie" "" "" "(je[greeklatin]|dZe)" // dZ is Italian
+"ggi" "" "[aou]" "(j[greeklatin]|dZ)" // dZ is Italian
+        
+"ggi" "[yaeiou]" "[aou]" "(gI|dZ[italian]|j[greeklatin])"  
+"gge" "[yaeiou]" "" "(gE|xe[spanish]|gZe[portuguese+french]|dZe[english+romanian+italian+spanish]|je[greeklatin])" 
+"ggi" "[yaeiou]" "" "(gI|xi[spanish]|gZi[portuguese+french]|dZi[english+romanian+italian+spanish]|i[greeklatin])" 
+"ggi" "" "[aou]" "(gI|dZ[italian]|j[greeklatin])" 
+    
+"gie" "" "$" "(ge|gi[german]|ji[french]|dZe[italian])" 
+"gie" "" "" "(ge|gi[german]|dZe[italian]|je[greeklatin])" 
+"gi" "" "[aou]" "(i[greeklatin]|dZ)" // dZ is Italian
+        
+"ge" "[yaeiou]" "" "(gE|xe[spanish]|Ze[portuguese+french]|dZe[english+romanian+italian+spanish])" 
+"gi" "[yaeiou]" "" "(gI|xi[spanish]|Zi[portuguese+french]|dZi[english+romanian+italian+spanish])" 
+"ge" "" "" "(gE|xe[spanish]|hE[russian]|je[greeklatin]|Ze[portuguese+french]|dZe[english+romanian+italian+spanish])" 
+"gi" "" "" "(gI|xi[spanish]|hI[russian]|i[greeklatin]|Zi[portuguese+french]|dZi[english+romanian+italian+spanish])" 
+"gy" "" "[aeouáéóúüöőű]" "(gi|dj[hungarian])"
+"gy" "" "" "(gi|d[hungarian])" 
+"g" "[yaeiou]" "[aouyei]" "g" 
+"g" "" "[aouei]" "(g|h[russian])" 
+    
+    //Processing of "j"        
+"ij" "" "" "(i|ej[dutch]|ix[spanish]|iZ[french+romanian+turkish+portuguese])" 
+"j" "" "[aoeiuy]" "(j|dZ[english]|x[spanish]|Z[french+romanian+turkish+portuguese])" 
+         
+    //Processing of "z"    
+"rz" "t" "" "(S[polish]|r)" // polish
+"rz" "" "" "(rz|rts[german]|Z[polish]|r[polish]|rZ[polish])" 
+        
+"tz" "" "$" "(ts|tS[english+german])" 
+"tz" "^" "" "(ts[english+german+russian]|tS[english+german])" 
+"tz" "" "" "(ts[english+german+russian]|tz)" 
+    
+"zia" "" "[bcdgkpstwzż]" "(Za[polish]|za[polish]|zja)" 
+"zia" "" "" "(Za[polish]|zja)" 
+"zią" "" "[bp]" "(Zom[polish]|zom)"  // polish
+"zią" "" "" "(Zon[polish]|zon)" // polish
+"zię" "" "[bp]" "(Zem[polish]|zem)" // polish
+"zię" "" "" "(Zen[polish]|zen)" // polish
+"zie" "" "[bcdgkpstwzż]" "(Ze[polish]|ze[polish]|ze|tsi[german])" 
+"zie" "" "" "(ze|Ze[polish]|tsi[german])" 
+"zio" "" "" "(Zo[polish]|zo)" 
+"ziu" "" "" "(Zu[polish]|zju)" 
+"zi" "" "" "(Zi[polish]|zi|tsi[german]|dzi[italian]|tsi[italian]|si[spanish])" 
+
+"z" "" "$" "(s|ts[german]|ts[italian]|S[portuguese])" // ts It, s/S/Z Port, s in Sp, z Fr
+"z" "" "[bdgv]" "(z|dz[italian]|Z[portuguese])" // dz It, Z/z Port, z Sp & Fr
+"z" "" "[ptckf]" "(s|ts[italian]|S[portuguese])" // ts It, s/S/z Port, z/s Sp
+              
+ // VOWELS  
+"aue" "" "" "aue" 
+"oue" "" "" "(oue|ve[french])" 
+"eau" "" "" "o" // French
+        
+"ae" "" "" "(Y[german]|aje[russian]|ae)" 
+"ai" "" "" "aj" 
+"au" "" "" "(au|o[french])" 
+"ay" "" "" "aj" 
+"ão" "" "" "(au|an)" // Port
+"ãe" "" "" "(aj|an)" // Port
+"ãi" "" "" "(aj|an)" // Port
+"ea" "" "" "(ea|ja[romanian])"
+"ee" "" "" "(i[english]|aje[russian]|e)" 
+"ei" "" "" "(aj|ej)"
+"eu" "" "" "(eu|Yj[german]|ej[german]|oj[german]|Y[dutch])"
+"ey" "" "" "(aj|ej)"
+"ia" "" "" "ja" 
+"ie" "" "" "(i[german]|e[polish]|ije[russian]|Q[dutch]|je)" 
+"ii" "" "$" "i" // russian
+"io" "" "" "(jo|e[russian])"
+"iu" "" "" "ju" 
+"iy" "" "$" "i" // russian
+"oe" "" "" "(Y[german]|oje[russian]|u[dutch]|oe)" 
+"oi" "" "" "oj" 
+"oo" "" "" "(u[english]|o)" 
+"ou" "" "" "(ou|u[french+greeklatin]|au[dutch])" 
+"où" "" "" "u" // french
+"oy" "" "" "oj" 
+"õe" "" "" "(oj|on)" // Port
+"ua" "" "" "va"
+"ue" "" "" "(Q[german]|uje[russian]|ve)" 
+"ui" "" "" "(uj|vi|Y[dutch])" 
+"uu" "" "" "(u|Q[dutch])" 
+"uo" "" "" "(vo|o)"
+"uy" "" "" "uj" 
+"ya" "" "" "ja" 
+"ye" "" "" "(je|ije[russian])"
+"yi" "^" "" "i"
+"yi" "" "$" "i" // russian
+"yo" "" "" "(jo|e[russian])"
+"yu" "" "" "ju" 
+"yy" "" "$" "i" // russian
+    
+"i" "[áóéê]" "" "j"
+"y" "[áóéê]" "" "j"
+         
+"e" "^" "" "(e|je[russian])" 
+"e" "" "$" "(e|EE[english+french])" 
+            
+// LANGUAGE SPECIFIC CHARACTERS 
+"ą" "" "[bp]" "om" // polish
+"ą" "" "" "on"  // polish
+"ä" "" "" "Y" 
+"á" "" "" "a" // Port & Sp
+"à" "" "" "a" 
+"â" "" "" "a" 
+"ã" "" "" "(a|an)" // Port
+"ă" "" "" "(e[romanian]|a)" // romanian
+"č" "" "" "tS" // czech
+"ć" "" "" "(tS[polish]|ts)"  // polish
+"ç" "" "" "(s|tS[turkish])"
+"ď" "" "" "(d|dj[czech])"
+"ę" "" "[bp]" "em" // polish
+"ę" "" "" "en" // polish
+"é" "" "" "e" 
+"è" "" "" "e" 
+"ê" "" "" "e" 
+"ě" "" "" "(e|je[czech])" 
+"ğ" "" "" "" // turkish
+"í" "" "" "i" 
+"î" "" "" "i" 
+"ı" "" "" "(i|e[turkish]|[turkish])" 
+"ł" "" "" "l" 
+"ń" "" "" "(n|nj[polish])" // polish
+"ñ" "" "" "(n|nj[spanish])" 
+"ó" "" "" "(u[polish]|o)"  
+"ô" "" "" "o" // Port & Fr
+"õ" "" "" "(o|on[portuguese]|Y[hungarian])" 
+"ò" "" "" "o"  // Sp & It
+"ö" "" "" "Y"
+"ř" "" "" "(r|rZ[czech])"
+"ś" "" "" "(S[polish]|s)" 
+"ş" "" "" "S" // romanian+turkish
+"Å¡" "" "" "S" // czech
+"Å£" "" "" "ts"  // romanian
+"Å¥" "" "" "(t|tj[czech])"
+"ű" "" "" "Q" // hungarian
+"ü" "" "" "(Q|u[portuguese+spanish])"
+"ú" "" "" "u" 
+"ů" "" "" "u" // czech
+"ù" "" "" "u" // french
+"ý" "" "" "i"  // czech
+"ż" "" "" "Z" // polish
+"ź" "" "" "(Z[polish]|z)" 
+   
+"ß" "" "" "s" // german
+"'" "" "" "" // russian
+"\"" "" "" "" // russian
+ 
+"o" "" "[bcćdgklłmnńrsśtwzźż]" "(O|P[polish])"    
+    
+ // LATIN ALPHABET
+"a" "" "" "A"
+"b" "" "" "B" 
+"c" "" "" "(k|ts[polish+czech]|dZ[turkish])" 
+"d" "" "" "d"
+"e" "" "" "E"
+"f" "" "" "f"
+   //array("g" "" "" "(g|x[dutch])" // Dutch sound disabled
+"g" "" "" "g"
+"h" "" "" "(h|x[romanian]|H[french+portuguese+italian+spanish])" 
+"i" "" "" "I"
+"j" "" "" "(j|x[spanish]|Z[french+romanian+turkish+portuguese])" 
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "O"
+"p" "" "" "p"
+"q" "" "" "k"
+"r" "" "" "r"
+"s" "" "" "(s|S[portuguese])" 
+"t" "" "" "t"
+"u" "" "" "U"
+"v" "" "" "V" 
+"w" "" "" "(v|w[english+dutch])"     
+"x" "" "" "(ks|gz|S[portuguese+spanish])"   // S/ks Port & Sp, gz Sp, It only ks
+"y" "" "" "i"
+"z" "" "" "(z|ts[german]|dz[italian]|ts[italian]|s[spanish])" // ts/dz It, z Port & Fr, z/s Sp

Added: commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_arabic.txt
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_arabic.txt?rev=1151311&view=auto
==============================================================================
--- commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_arabic.txt (added)
+++ commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_arabic.txt Wed Jul 27 02:29:11 2011
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+"ا" "" "" "a" // alif isol & init 
+                
+"ب" "" "" "b1" // ba' isol
+        
+"ت" "" "" "t1" // ta' isol
+        
+"Ø«" "" "" "t1" // tha' isol
+
+"ج" "" "" "(dZ1|Z1)" // jim isol
+        
+"Ø­" "" "" "(h1|1)" // h.a' isol
+    
+"Ø®" "" "" "x1" // kha' isol
+    
+"د" "" "" "d1" // dal isol & init
+           
+"Ø°" "" "" "d1" // dhal isol & init
+        
+"ر" "" "" "r1" // dhal isol & init
+    
+"ز" "" "" "z1" // za' isol & init
+        
+"س" "" "" "s1" // sin isol
+    
+"Ø´" "" "" "S1" // shin isol
+    
+"ص" "" "" "s1" // s.ad isol
+    
+"ض" "" "" "d1" // d.ad isol
+        
+"Ø·" "" "" "t1" // t.a' isol
+        
+"ظ" "" "" "z1" // z.a' isol
+        
+"ع" "" "" "(h1|1)" // ayin isol 
+    
+"غ" "" "" "g1" // ghayin isol
+    
+"ف" "" "" "f1" // fa' isol
+    
+"ق" "" "" "k1" // qaf isol
+    
+"ك" "" "" "k1" // kaf isol
+    
+"ل" "" "" "l1" // lam isol
+    
+"م" "" "" "m1" // mim isol
+    
+"ن" "" "" "n1" // nun isol
+    
+"ه" "" "" "(h1|1)" // h isol
+        
+"و" "" "" "(u|v1)" // waw, isol + init
+               
+    
+"ي‎" "" "" "(i|j1)" // ya' isol

Added: commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_cyrillic.txt
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_cyrillic.txt?rev=1151311&view=auto
==============================================================================
--- commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_cyrillic.txt (added)
+++ commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_cyrillic.txt Wed Jul 27 02:29:11 2011
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// GENERAL
+"ця" "" "" "tsa"
+"цю" "" "" "tsu"
+"циа" "" "" "tsa"
+"цие" "" "" "tse"
+"цио" "" "" "tso"
+"циу" "" "" "tsu"
+"сие" "" "" "se"
+"сио" "" "" "so"
+"зие" "" "" "ze"
+"зио" "" "" "zo"
+"с" "" "с" ""
+
+"гауз" "" "$" "haus"
+"гаус" "" "$" "haus"
+"гольц" "" "$" "holts"
+"геймер" "" "$" "(hejmer|hajmer)"
+"гейм" "" "$" "(hejm|hajm)"
+"гоф" "" "$" "hof"
+"гер" "" "$" "ger"
+"ген" "" "$" "gen"
+"гин" "" "$" "gin"
+"г" "(й|ё|я|ю|ы|а|е|о|и|у)" "(а|е|о|и|у)" "g"
+"г" "" "(а|е|о|и|у)" "(g|h)"
+
+"ля" "" "" "la"
+"лю" "" "" "lu"
+"лё" "" "" "(le|lo)"
+"лио" "" "" "(le|lo)"
+"ле" "" "" "(lE|lo)"
+
+"ийе" "" "" "je"
+"ие" "" "" "je"
+"ыйе" "" "" "je"
+"ые" "" "" "je"
+"ий" "" "(а|о|у)" "j"
+"ый" "" "(а|о|у)" "j"
+"ий" "" "$" "i"
+"ый" "" "$" "i"
+
+"ей" "^" "" "(jej|ej)"
+"е" "(а|е|о|у)" "" "je"
+"е" "^" "" "je"
+"эй" "" "" "ej"
+"ей" "" "" "ej"
+
+"ауе" "" "" "aue"
+"ауэ" "" "" "aue"
+
+"а" "" "" "a"
+"б" "" "" "b"
+"в" "" "" "v"
+"г" "" "" "g"
+"д" "" "" "d"
+"е" "" "" "E"
+"ё" "" "" "(e|jo)"
+"ж" "" "" "Z"
+"з" "" "" "z"
+"и" "" "" "I"
+"й" "" "" "j"
+"к" "" "" "k"
+"л" "" "" "l"
+"м" "" "" "m"
+"н" "" "" "n"
+"о" "" "" "o"
+"п" "" "" "p"
+"р" "" "" "r"
+"с" "" "" "s"
+"т" "" "" "t"
+"у" "" "" "u"
+"ф" "" "" "f"
+"х" "" "" "x"
+"ц" "" "" "ts"
+"ч" "" "" "tS"
+"ш" "" "" "S"
+"щ" "" "" "StS"
+"ъ" "" "" ""
+"ы" "" "" "I"
+"ь" "" "" ""
+"э" "" "" "E"
+"ю" "" "" "ju"
+"я" "" "" "ja"

Added: commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_czech.txt
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_czech.txt?rev=1151311&view=auto
==============================================================================
--- commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_czech.txt (added)
+++ commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_czech.txt Wed Jul 27 02:29:11 2011
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"ch" "" "" "x"
+"qu" "" "" "(k|kv)"    
+"aue" "" "" "aue"
+"ei" "" "" "(ej|aj)"
+"i" "[aou]" "" "j"
+"i" "" "[aeou]" "j"
+
+"č" "" "" "tS"
+"Å¡" "" "" "S"
+"ň" "" "" "n"
+"Å¥" "" "" "(t|tj)"
+"ď" "" "" "(d|dj)"
+"ř" "" "" "(r|rZ)"
+
+"á" "" "" "a"
+"é" "" "" "e"
+"í" "" "" "i"
+"ó" "" "" "o"
+"ú" "" "" "u"
+"ý" "" "" "i"
+"ě" "" "" "(e|je)"
+"ů" "" "" "u"
+
+// LATIN ALPHABET
+"a" "" "" "a"
+"b" "" "" "b"
+"c" "" "" "ts"
+"d" "" "" "d"
+"e" "" "" "E"
+"f" "" "" "f"
+"g" "" "" "g"
+"h" "" "" "(h|g)"
+"i" "" "" "I"
+"j" "" "" "j"
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "o"
+"p" "" "" "p"
+"q" "" "" "(k|kv)"    
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t"
+"u" "" "" "u"
+"v" "" "" "v"
+"w" "" "" "v"    
+"x" "" "" "ks"    
+"y" "" "" "i"
+"z" "" "" "z" 

Added: commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_dutch.txt
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_dutch.txt?rev=1151311&view=auto
==============================================================================
--- commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_dutch.txt (added)
+++ commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_dutch.txt Wed Jul 27 02:29:11 2011
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// CONSONANTS
+"ssj" "" "" "S"
+"sj" "" "" "S"
+"ch" "" "" "x"
+"c" "" "[eiy]" "ts"   
+"ck" "" "" "k"     // German
+"pf" "" "" "(pf|p|f)" // German
+"ph" "" "" "(ph|f)"
+"qu" "" "" "kv"
+"th" "^" "" "t" // German
+"th" "" "[äöüaeiou]" "(t|th)" // German
+"th" "" "" "t" // German
+"ss" "" "" "s"
+"h" "[aeiouy]" "" ""
+
+// VOWELS
+"aue" "" "" "aue" 
+"ou" "" "" "au" 
+"ie" "" "" "(Q|i)" 
+"uu" "" "" "(Q|u)"   
+"ee" "" "" "e"   
+"eu" "" "" "(Y|Yj)" // Dutch Y  
+"aa" "" "" "a"   
+"oo" "" "" "o"   
+"oe" "" "" "u"   
+"ij" "" "" "ej"
+"ui" "" "" "(Y|uj)"
+"ei" "" "" "(ej|aj)" // Dutch ej
+
+"i" "" "[aou]" "j"
+"y" "" "[aeou]" "j"
+"i" "[aou]" "" "j"
+"y" "[aeou]" "" "j"
+
+// LATIN ALPHABET     
+"a" "" "" "a"
+"b" "" "" "b"
+"c" "" "" "k"
+"d" "" "" "d"
+"e" "" "" "e"
+"f" "" "" "f"
+"g" "" "" "(g|x)"
+"h" "" "" "h"
+"i" "" "" "(i|Q)"   
+"j" "" "" "j"
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "o"
+"p" "" "" "p"
+"q" "" "" "k"
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t"
+"u" "" "" "(u|Q)"   
+"v" "" "" "v"
+"w" "" "" "(w|v)"
+"x" "" "" "ks"
+"y" "" "" "i"
+"z" "" "" "z"

Added: commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_english.txt
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_english.txt?rev=1151311&view=auto
==============================================================================
--- commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_english.txt (added)
+++ commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_english.txt Wed Jul 27 02:29:11 2011
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// GENERAL
+
+// CONSONANTS
+"�" "" "" "" // O�Neill
+"'" "" "" "" // O�Neill
+"mc" "^" "" "mak" // McDonald
+"tz" "" "" "ts" // Fitzgerald
+"tch" "" "" "tS"
+"ch" "" "" "(tS|x)"
+"ck" "" "" "k"
+"cc" "" "[iey]" "ks" // success, accent
+"c" "" "c" ""
+"c" "" "[iey]" "s" // circle 
+
+"gh" "^" "" "g" // ghost
+"gh" "" "" "(g|f|w)" // burgh | tough | bough
+"gn" "" "" "(gn|n)"
+"g" "" "[iey]" "(g|dZ)" // get, gem, giant, gigabyte
+// "th" "" "" "(6|8|t)"
+"th" "" "" "t"
+"kh" "" "" "x"
+"ph" "" "" "f"
+"sch" "" "" "(S|sk)"
+"sh" "" "" "S"
+"who" "^" "" "hu"
+"wh" "^" "" "w"
+
+"h" "" "$" "" // hard to find an example that isn't in a name
+"h" "" "[^aeiou]" "" // hard to find an example that isn't in a name
+"h" "^" "" "H"
+
+"kn" "^" "" "n" // knight
+"mb" "" "$" "m"
+"ng" "" "$" "(N|ng)"
+"pn" "^" "" "(pn|n)"
+"ps" "^" "" "(ps|s)"
+"qu" "" "" "kw"
+"tia" "" "" "(So|Sa)"
+"tio" "" "" "So"
+"wr" "^" "" "r"
+"x" "^" "" "z"
+
+// VOWELS
+"y" "^" "" "j"
+"y" "^" "[aeiouy]" "j"
+"yi" "^" "" "i"
+"aue" "" "" "aue" 
+"oue" "" "" "(aue|oue)" 
+"ai" "" "" "(aj|ej|e)" // rain | said
+"ay" "" "" "(aj|ej)" 
+"a" "" "[^aeiou]e" "ej" // plane 
+"ei" "" "" "(ej|aj|i)" // weigh | receive
+"ey" "" "" "(ej|aj|i)" // hey | barley
+"ear" "" "" "ia" // tear
+"ea" "" "" "(i|e)" // reason | treasure
+"ee" "" "" "i" // between
+"e" "" "[^aeiou]e" "i" // meter
+"e" "" "$" "(|E)" // blame, badge
+"ie" "" "" "i" // believe
+"i" "" "[^aeiou]e" "aj" // five
+"oa" "" "" "ou" // toad
+"oi" "" "" "oj" // join
+"oo" "" "" "u" // food
+"ou" "" "" "(u|ou)" // through | tough | could
+"oy" "" "" "oj" // boy
+"o" "" "[^aeiou]e" "ou" // rode
+"u" "" "[^aeiou]e" "(ju|u)" // cute | flute
+"u" "" "r" "(e|u)" // turn -- Morse disagrees, feels it should go to E
+
+// LATIN ALPHABET
+"a" "" "" "(e|o|a)" // hat | call | part
+"b" "" "" "b"
+"c" "" "" "k" // candy
+"d" "" "" "d"
+"e" "" "" "E" // bed
+"f" "" "" "f"
+"g" "" "" "g" 
+"h" "" "" "h"    
+"i" "" "" "I" 
+"j" "" "" "dZ"
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "(o|a)" // hot 
+"p" "" "" "p"
+"q" "" "" "k"
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t"
+"u" "" "" "(u|a)" // put
+"v" "" "" "v"
+"w" "" "" "(w|v)" // the variant "v" is for spellings coming from German/Polish
+"x" "" "" "ks"
+"y" "" "" "i"
+"z" "" "" "z"

Added: commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_french.txt
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_french.txt?rev=1151311&view=auto
==============================================================================
--- commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_french.txt (added)
+++ commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_french.txt Wed Jul 27 02:29:11 2011
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// GENERAL
+
+// CONSONANTS
+"lt" "u" "$" "(lt|)" // Renault
+"c" "n" "$" "(k|)" // Tronc
+//"f" "" "" "(f|)" // Clef
+"d" "" "$" "(t|)" // Durand
+"g" "n" "$" "(k|)" // Gang
+"p" "" "$" "(p|)" // Trop, Champ
+"r" "e" "$" "(r|)" // Barbier
+"t" "" "$" "(t|)" // Murat, Constant
+"z" "" "$" "(s|)" 
+
+"ds" "" "$" "(ds|)" 
+"ps" "" "$" "(ps|)" // Champs
+"rs" "e" "$" "(rs|)" 
+"ts" "" "$" "(ts|)" 
+"s" "" "$" "(s|)" // Denis
+
+"x" "u" "$" "(ks|)" // Arnoux
+
+"s" "[aeéèêiou]" "[^aeéèêiou]" "(s|)" // Deschamps, Malesherbes, Groslot
+"t" "[aeéèêiou]" "[^aeéèêiou]" "(t|)" // Petitjean
+
+"kh" "" "" "x" // foreign
+"ph" "" "" "f"
+
+"ç" "" "" "s"
+"x" "" "" "ks"
+"ch" "" "" "S"
+"c" "" "[eiyéèê]" "s"
+
+"gn" "" "" "(n|gn)"
+"g" "" "[eiy]" "Z" 
+"gue" "" "$" "k"     
+"gu" "" "[eiy]" "g" 
+"aill" "" "e" "aj" // non Jewish
+"ll" "" "e" "(l|j)" // non Jewish
+"que" "" "$" "k"
+"qu" "" "" "k"
+"s" "[aeiouyéèê]" "[aeiouyéèê]" "z"
+"h" "[bdgt]" "" "" // translit from Arabic
+
+"m" "[aeiouy]" "[aeiouy]" "m"  
+"m" "[aeiouy]" "" "(m|n)"  // nasal
+
+"ou" "" "[aeio]" "v" 
+"u" "" "[aeio]" "v" 
+
+// VOWELS
+"aue" "" "" "aue" 
+"eau" "" "" "o" 
+"au" "" "" "(o|au)" // non Jewish
+"ai" "" "" "(e|aj)" // [e] is non Jewish
+"ay" "" "" "(e|aj)" // [e] is non Jewish
+"é" "" "" "e"
+"ê" "" "" "e"
+"è" "" "" "e"
+"à" "" "" "a"
+"â" "" "" "a"
+"où" "" "" "u"
+"ou" "" "" "u"
+"oi" "" "" "(oj|va)" // [va] (actually "ua") is non Jewish
+"ei" "" "" "(aj|ej|e)" // [e] is non Jewish
+"ey" "" "" "(aj|ej|e)" // [e] non Jewish
+"eu" "" "" "(ej|Y)" // non Jewish
+"y" "[ou]" "" "j"
+"e" "" "$" "(e|)"
+"i" "" "[aou]" "j"
+"y" "" "[aoeu]" "j"
+
+// LATIN ALPHABET      
+"a" "" "" "a"
+"b" "" "" "b"
+"c" "" "" "k"
+"d" "" "" "d"
+"e" "" "" "e" 
+"f" "" "" "f"
+"g" "" "" "g"
+"h" "" "" "h"
+"i" "" "" "i" 
+"j" "" "" "Z"
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "o"
+"p" "" "" "p"
+"q" "" "" "k"
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t"
+"u" "" "" "(u|Q)"
+"v" "" "" "v"
+"w" "" "" "v"
+"y" "" "" "i"
+"z" "" "" "z"

Added: commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_german.txt
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_german.txt?rev=1151311&view=auto
==============================================================================
--- commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_german.txt (added)
+++ commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_german.txt Wed Jul 27 02:29:11 2011
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// GENERIC
+
+// CONSONANTS
+"ewitsch" "" "$" "evitS"
+"owitsch" "" "$" "ovitS"
+"evitsch" "" "$" "evitS"
+"ovitsch" "" "$" "ovitS"
+"witsch" "" "$" "vitS"
+"vitsch" "" "$" "vitS"
+"ssch" "" "" "S"
+"chsch" "" "" "xS"
+"sch" "" "" "S"
+
+"ziu" "" "" "tsu"
+"zia" "" "" "tsa"
+"zio" "" "" "tso"
+
+"chs" "" "" "ks"
+"ch" "" "" "x"
+"ck" "" "" "k"
+"c" "" "[eiy]" "ts"
+
+"sp" "^" "" "Sp"
+"st" "^" "" "St"
+"ssp" "" "" "(Sp|sp)"
+"sp" "" "" "(Sp|sp)"
+"sst" "" "" "(St|st)"
+"st" "" "" "(St|st)"
+"pf" "" "" "(pf|p|f)"
+"ph" "" "" "(ph|f)"
+"qu" "" "" "kv"
+
+"ewitz" "" "$" "(evits|evitS)"
+"ewiz" "" "$" "(evits|evitS)"
+"evitz" "" "$" "(evits|evitS)"
+"eviz" "" "$" "(evits|evitS)"
+"owitz" "" "$" "(ovits|ovitS)"
+"owiz" "" "$" "(ovits|ovitS)"
+"ovitz" "" "$" "(ovits|ovitS)"
+"oviz" "" "$" "(ovits|ovitS)"
+"witz" "" "$" "(vits|vitS)"
+"wiz" "" "$" "(vits|vitS)"
+"vitz" "" "$" "(vits|vitS)"
+"viz" "" "$" "(vits|vitS)"
+"tz" "" "" "ts"
+
+"thal" "" "$" "tal"
+"th" "^" "" "t"
+"th" "" "[äöüaeiou]" "(t|th)"
+"th" "" "" "t"
+"rh" "^" "" "r"
+"h" "[aeiouyäöü]" "" ""
+"h" "^" "" "H"
+
+"ss" "" "" "s"
+"s" "" "[äöüaeiouy]" "(z|s)"
+"s" "[aeiouyäöüj]" "[aeiouyäöü]" "z"
+"ß" "" "" "s"
+
+
+// VOWELS
+"ij" "" "$" "i"
+"aue" "" "" "aue"
+"ue" "" "" "Q"
+"ae" "" "" "Y"
+"oe" "" "" "Y"
+"ü" "" "" "Q"
+"ä" "" "" "Y"
+"ö" "" "" "Y"
+"ei" "" "" "(aj|ej)"
+"ey" "" "" "(aj|ej)"
+"eu" "" "" "(Yj|ej|aj|oj)"
+"i" "[aou]" "" "j"
+"y" "[aou]" "" "j"
+"ie" "" "" "I"
+"i" "" "[aou]" "j"
+"y" "" "[aoeu]" "j"
+
+// FOREIGN LETTERs
+"ñ" "" "" "n"
+"ã" "" "" "a"
+"ő" "" "" "o"
+"ű" "" "" "u"
+"ç" "" "" "s"
+
+// LATIN ALPHABET
+"a" "" "" "A"
+"b" "" "" "b"
+"c" "" "" "k"
+"d" "" "" "d"
+"e" "" "" "E"
+"f" "" "" "f"
+"g" "" "" "g"
+"h" "" "" "h"
+"i" "" "" "I"
+"j" "" "" "j"
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "O"
+"p" "" "" "p"
+"q" "" "" "k"
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t"
+"u" "" "" "U"
+"v" "" "" "(f|v)"
+"w" "" "" "v"
+"x" "" "" "ks"
+"y" "" "" "i"
+"z" "" "" "ts"

Added: commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_greek.txt
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_greek.txt?rev=1151311&view=auto
==============================================================================
--- commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_greek.txt (added)
+++ commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_greek.txt Wed Jul 27 02:29:11 2011
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"αυ" "" "$" "af"  // "av" before vowels and voiced consonants, "af" elsewhere
+"αυ" "" "(κ|π|σ|τ|φ|θ|χ|ψ)" "af" 
+"αυ" "" "" "av" 
+"ευ" "" "$" "ef" // "ev" before vowels and voiced consonants, "ef" elsewhere
+"ευ" "" "(κ|π|σ|τ|φ|θ|χ|ψ)" "ef" 
+"ευ" "" "" "ev" 
+"ηυ" "" "$" "if" // "iv" before vowels and voiced consonants, "if" elsewhere
+"ηυ" "" "(κ|π|σ|τ|φ|θ|χ|ψ)" "if" 
+"ηυ" "" "" "iv" 
+"ου" "" "" "u"  // [u:]
+
+"αι" "" "" "aj"  // modern [e]
+"ει" "" "" "ej" // modern [i]
+"οι" "" "" "oj" // modern [i]
+"ωι" "" "" "oj" 
+"ηι" "" "" "ej" 
+"υι" "" "" "i" // modern Greek "i"
+
+"γγ" "(ε|ι|η|α|ο|ω|υ)" "(ε|ι|η)" "(nj|j)"
+"γγ" "" "(ε|ι|η)" "j"
+"γγ" "(ε|ι|η|α|ο|ω|υ)" "" "(ng|g)"
+"γγ" "" "" "g" 
+"γκ" "^" "" "g"
+"γκ" "(ε|ι|η|α|ο|ω|υ)" "(ε|ι|η)" "(nj|j)"
+"γκ" "" "(ε|ι|η)" "j"
+"γκ" "(ε|ι|η|α|ο|ω|υ)" "" "(ng|g)"
+"γκ" "" "" "g" 
+"γι" "" "(α|ο|ω|υ)" "j"
+"γι" "" "" "(gi|i)"
+"γε" "" "(α|ο|ω|υ)" "j"
+"γε" "" "" "(ge|je)"
+
+"κζ" "" "" "gz"
+"τζ" "" "" "dz"
+"σ" "" "(β|γ|δ|μ|ν|ρ)" "z"
+
+"μβ" "" "" "(mb|b)"
+"μπ" "^" "" "b"
+"μπ" "(ε|ι|η|α|ο|ω|υ)" "" "mb"
+"μπ" "" "" "b" // after any consonant
+"ντ" "^" "" "d"
+"ντ" "(ε|ι|η|α|ο|ω|υ)" "" "(nd|nt)" // Greek is "nd" 
+"ντ" "" "" "(nt|d)" // Greek is "d" after any consonant
+
+"ά" "" "" "a"
+"έ" "" "" "e"
+"ή" "" "" "(i|e)" 
+"ί" "" "" "i"   
+"ό" "" "" "o"
+"ύ" "" "" "(Q|i|u)"
+"ώ" "" "" "o"
+"ΰ" "" "" "(Q|i|u)"
+"ϋ" "" "" "(Q|i|u)"
+"ϊ" "" "" "j"
+
+"α" "" "" "a"
+"β" "" "" "(v|b)" // modern "v", old "b"
+"γ" "" "" "g" 
+"δ" "" "" "d"    // modern like "th" in English "them", old "d"
+"ε" "" "" "e"
+"ζ" "" "" "z"
+"η" "" "" "(i|e)" // modern "i", old "e:"
+"ι" "" "" "i"
+"κ" "" "" "k"
+"λ" "" "" "l"
+"μ" "" "" "m"
+"ν" "" "" "n"
+"ξ" "" "" "ks"
+"ο" "" "" "o"
+"π" "" "" "p"
+"ρ" "" "" "r"
+"σ" "" "" "s"
+"ς" "" "" "s"
+"τ" "" "" "t" 
+"υ" "" "" "(Q|i|u)" // modern "i", old like German "ü"
+"φ" "" "" "f" 
+"θ" "" "" "t" // old greek like "th" in English "theme"
+"χ" "" "" "x"
+"ψ" "" "" "ps"
+"ω" "" "" "o"

Added: commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_greeklatin.txt
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_greeklatin.txt?rev=1151311&view=auto
==============================================================================
--- commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_greeklatin.txt (added)
+++ commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_greeklatin.txt Wed Jul 27 02:29:11 2011
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"au" "" "$" "af"
+"au" "" "[kpstfh]" "af"
+"au" "" "" "av"
+"eu" "" "$" "ef"
+"eu" "" "[kpstfh]" "ef"
+"eu" "" "" "ev"
+"ou" "" "" "u"
+
+"gge" "[aeiouy]" "" "(nje|je)" // aggelopoulos
+"ggi" "[aeiouy]" "[aou]" "(nj|j)" 
+"ggi" "[aeiouy]" "" "(ni|i)" 
+"gge" "" "" "je"
+"ggi" "" "" "i"
+"gg" "[aeiouy]" "" "(ng|g)"
+"gg" "" "" "g" 
+"gk" "^" "" "g"
+"gke" "[aeiouy]" "" "(nje|je)"
+"gki" "[aeiouy]" "" "(ni|i)"
+"gke" "" "" "je"
+"gki" "" "" "i"
+"gk" "[aeiouy]" "" "(ng|g)"
+"gk" "" "" "g" 
+"nghi" "" "[aouy]" "Nj"
+"nghi" "" "" "(Ngi|Ni)" 
+"nghe" "" "[aouy]" "Nj"
+"nghe" "" "" "(Nje|Nge)" 
+"ghi" "" "[aouy]" "j"
+"ghi" "" "" "(gi|i)" 
+"ghe" "" "[aouy]" "j"
+"ghe" "" "" "(je|ge)" 
+"ngh" "" "" "Ng"
+"gh" "" "" "g"
+"ngi" "" "[aouy]" "Nj" 
+"ngi" "" "" "(Ngi|Ni)" 
+"nge" "" "[aouy]" "Nj" 
+"nge" "" "" "(Nje|Nge)" 
+"gi" "" "[aouy]" "j" 
+"gi" "" "" "(gi|i)" // what about Pantazis = Pantagis ???
+"ge" "" "[aouy]" "j" 
+"ge" "" "" "(je|ge)" 
+"ng" "" "" "Ng" // fragakis = fraggakis = frangakis; angel = agel = aggel 
+
+"i" "" "[aeou]" "j"
+"i" "[aeou]" "" "j"  
+"y" "" "[aeou]" "j"
+"y" "[aeou]" "" "j"  
+"yi" "" "[aeou]" "j"
+"yi" "" "" "i"
+
+"ch" "" "" "x"
+"kh" "" "" "x"
+"dh" "" "" "d"  // actually as "th" in English "that"
+"dj" "" "" "dZ" // Turkish words
+"ph" "" "" "f"
+"th" "" "" "t"
+"kz" "" "" "gz"
+"tz" "" "" "dz" 
+"s" "" "[bgdmnr]" "z"
+
+"mb" "" "" "(mb|b)" // Liberis = Limperis = Limberis
+"mp" "^" "" "b"
+"mp" "[aeiouy]" "" "mp"
+"mp" "" "" "b"
+"nt" "^" "" "d"
+"nt" "[aeiouy]" "" "(nd|nt)" // Greek "nd"
+"nt" "" "" "(nt|d)" // Greek "d" after any consonant
+
+"á" "" "" "a"  
+"é" "" "" "e"  
+"í" "" "" "i"  
+"ó" "" "" "o"  
+"óu" "" "" "u"  
+"ú" "" "" "u" 
+"ý" "" "" "(i|Q|u)" // [ü]
+
+"a" "" "" "a"
+"b" "" "" "(b|v)" // beta: modern "v", old "b"
+"c" "" "" "k"
+"d" "" "" "d"    // modern like "th" in English "them", old "d"
+"e" "" "" "e"
+"f" "" "" "f" 
+"g" "" "" "g" 
+"h" "" "" "x"
+"i" "" "" "i"
+"j" "" "" "(j|Z)" // Panajotti = Panaiotti; Louijos = Louizos; Pantajis = Pantazis = Pantagis
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"ο" "" "" "o"
+"p" "" "" "p"
+"q" "" "" "k" // foreign
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t" 
+"u" "" "" "u" 
+"v" "" "" "v" 
+"w" "" "" "v" // foreign
+"x" "" "" "ks"
+"y" "" "" "(i|Q|u)" // [ü] 
+"z" "" "" "z"

Added: commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_hebrew.txt
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_hebrew.txt?rev=1151311&view=auto
==============================================================================
--- commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_hebrew.txt (added)
+++ commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_hebrew.txt Wed Jul 27 02:29:11 2011
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// General = Ashkenazic
+
+"אי" "" "" "i"
+"עי" "" "" "i"
+"עו" "" "" "VV"
+"או" "" "" "VV"
+
+"ג׳" "" "" "Z"
+"ד׳" "" "" "dZ"
+
+"א" "" "" "L"
+"ב" "" "" "b"
+"ג" "" "" "g"
+"ד" "" "" "d"
+
+"ה" "^" "" "1"
+"ה" "" "$" "1"
+"ה" "" "" ""
+
+"וו" "" "" "V"
+"וי" "" "" "WW"
+"ו" "" "" "W"
+"ז" "" "" "z"
+"ח" "" "" "X"
+"ט" "" "" "T"
+"יי" "" "" "i"
+"י" "" "" "i"
+"ך" "" "" "X"
+"כ" "^" "" "K"
+"כ" "" "" "k"
+"ל" "" "" "l"
+"ם" "" "" "m"
+"מ" "" "" "m"
+"ן" "" "" "n"
+"× " "" "" "n"
+"ס" "" "" "s"
+"×¢" "" "" "L"
+"×£" "" "" "f"
+"פ" "" "" "f"
+"×¥" "" "" "C"
+"צ" "" "" "C"
+"ק" "" "" "K"
+"ר" "" "" "r"
+"ש" "" "" "s"
+"ת" "" "" "TB" // only Ashkenazic

Added: commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_hungarian.txt
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_hungarian.txt?rev=1151311&view=auto
==============================================================================
--- commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_hungarian.txt (added)
+++ commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_hungarian.txt Wed Jul 27 02:29:11 2011
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// GENERAL
+
+// CONSONANTS
+"sz" "" "" "s"
+"zs" "" "" "Z"
+"cs" "" "" "tS"
+
+"ay" "" "" "(oj|aj)"
+"ai" "" "" "(oj|aj)"
+"aj" "" "" "(oj|aj)"
+
+"ei" "" "" "(aj|ej)" // German element
+"ey" "" "" "(aj|ej)" // German element
+
+"y" "[áo]" "" "j"
+"i" "[áo]" "" "j"
+"ee" "" "" "(ej|e)" 
+"ely" "" "" "(ej|eli)"
+"ly" "" "" "(j|li)"
+"gy" "" "[aeouáéóúüöőű]" "dj"
+"gy" "" "" "(d|gi)"
+"ny" "" "[aeouáéóúüöőű]" "nj"
+"ny" "" "" "(n|ni)"
+"ty" "" "[aeouáéóúüöőű]" "tj"
+"ty" "" "" "(t|ti)"
+"qu" "" "" "(ku|kv)"
+"h" "" "$" ""
+
+// SPECIAL VOWELS
+"á" "" "" "a"
+"é" "" "" "e"
+"í" "" "" "i"
+"ó" "" "" "o"
+"ú" "" "" "u"
+"ö" "" "" "Y"
+"ő" "" "" "Y" 
+"ü" "" "" "Q"
+"ű" "" "" "Q"
+
+// LATIN ALPHABET      
+"a" "" "" "a"
+"b" "" "" "b"
+"c" "" "" "ts" 
+"d" "" "" "d"
+"e" "" "" "E"
+"f" "" "" "f"
+"g" "" "" "g" 
+"h" "" "" "h"
+"i" "" "" "I"
+"j" "" "" "j"
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "o"
+"p" "" "" "p"
+"q" "" "" "k"
+"r" "" "" "r"
+"s" "" "" "(S|s)" 
+"t" "" "" "t"
+"u" "" "" "u"
+"v" "" "" "v" 
+"w" "" "" "v" 
+"x" "" "" "ks"
+"y" "" "" "i" 
+"z" "" "" "z"

Added: commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_italian.txt
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_italian.txt?rev=1151311&view=auto
==============================================================================
--- commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_italian.txt (added)
+++ commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_italian.txt Wed Jul 27 02:29:11 2011
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"kh" "" "" "x" // foreign
+
+"gli" "" "" "(l|gli)"
+"gn" "" "[aeou]" "(n|nj|gn)"
+"gni" "" "" "(ni|gni)"
+
+"gi" "" "[aeou]" "dZ"
+"gg" "" "[ei]" "dZ"
+"g" "" "[ei]" "dZ"
+"h" "[bdgt]" "" "g" // gh is It; others from Arabic translit
+"h" "" "$" "" // foreign
+
+"ci" "" "[aeou]" "tS"
+"ch" "" "[ei]" "k"
+"sc" "" "[ei]" "S" 
+"cc" "" "[ei]" "tS"
+"c" "" "[ei]" "tS"
+"s" "[aeiou]" "[aeiou]" "z"
+
+"i" "[aeou]" "" "j"
+"i" "" "[aeou]" "j"
+"y" "[aeou]" "" "j" // foreign
+"y" "" "[aeou]" "j" // foreign
+
+"qu" "" "" "k"    
+"uo" "" "" "(vo|o)"
+"u" "" "[aei]" "v" 
+
+"�" "" "" "e" 
+"�" "" "" "e" 
+"�" "" "" "o"  
+"�" "" "" "o" 
+
+// LATIN ALPHABET    
+"a" "" "" "a"
+"b" "" "" "b"
+"c" "" "" "k"
+"d" "" "" "d"
+"e" "" "" "e"
+"f" "" "" "f"
+"g" "" "" "g"
+"h" "" "" "h"
+"i" "" "" "i"
+"j" "" "" "(Z|dZ|j)" // foreign
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "o"
+"p" "" "" "p"
+"q" "" "" "k"    
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t"
+"u" "" "" "u"
+"v" "" "" "v"
+"w" "" "" "v"    // foreign
+"x" "" "" "ks"    // foreign
+"y" "" "" "i"    // foreign
+"z" "" "" "(ts|dz)"

Added: commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_polish.txt
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_polish.txt?rev=1151311&view=auto
==============================================================================
--- commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_polish.txt (added)
+++ commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_polish.txt Wed Jul 27 02:29:11 2011
@@ -0,0 +1,185 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// GENERIC
+
+// CONVERTING FEMININE TO MASCULINE
+"ska" "" "$" "ski"   
+"cka" "" "$" "tski"   
+"lowa" "" "$" "(lova|lof|l|el)"   
+"kowa" "" "$" "(kova|kof|k|ek)"   
+"owa" "" "$" "(ova|of|)"  
+"lowna" "" "$" "(lovna|levna|l|el)" 
+"kowna" "" "$" "(kovna|k|ek)"  
+"owna" "" "$" "(ovna|)"   
+"lówna" "" "$" "(l|el)"   
+"kówna" "" "$" "(k|ek)"   
+"ówna" "" "$" ""   
+"a" "" "$" "(a|i)"   
+
+// CONSONANTS
+"czy" "" "" "tSi"
+"cze" "" "[bcdgkpstwzż]" "(tSe|tSF)"
+"ciewicz" "" "" "(tsevitS|tSevitS)"
+"siewicz" "" "" "(sevitS|SevitS)"
+"ziewicz" "" "" "(zevitS|ZevitS)"
+"riewicz" "" "" "rjevitS" 
+"diewicz" "" "" "djevitS" 
+"tiewicz" "" "" "tjevitS" 
+"iewicz" "" "" "evitS"
+"ewicz" "" "" "evitS"
+"owicz" "" "" "ovitS"
+"icz" "" "" "itS"
+"cz" "" "" "tS"
+"ch" "" "" "x"
+
+"cia" "" "[bcdgkpstwzż]" "(tSB|tsB)"
+"cia" "" "" "(tSa|tsa)" 
+"cią" "" "[bp]" "(tSom|tsom)"
+"cią" "" "" "(tSon|tson)"
+"cię" "" "[bp]" "(tSem|tsem)"
+"cię" "" "" "(tSen|tsen)"
+"cie" "" "[bcdgkpstwzż]" "(tSF|tsF)" 
+"cie" "" "" "(tSe|tse)" 
+"cio" "" "" "(tSo|tso)" 
+"ciu" "" "" "(tSu|tsu)" 
+"ci" "" "" "(tSi|tsI)"
+"ć" "" "" "(tS|ts)"
+
+"ssz" "" "" "S"
+"sz" "" "" "S"
+"sia" "" "[bcdgkpstwzż]" "(SB|sB|sja)" 
+"sia" "" "" "(Sa|sja)" 
+"sią" "" "[bp]" "(Som|som)"
+"sią" "" "" "(Son|son)"
+"się" "" "[bp]" "(Sem|sem)"
+"się" "" "" "(Sen|sen)"
+"sie" "" "[bcdgkpstwzż]" "(SF|sF|se)" 
+"sie" "" "" "(Se|se)" 
+"sio" "" "" "(So|so)" 
+"siu" "" "" "(Su|sju)" 
+"si" "" "" "(Si|sI)"
+"ś" "" "" "(S|s)"
+
+"zia" "" "[bcdgkpstwzż]" "(ZB|zB|zja)" 
+"zia" "" "" "(Za|zja)" 
+"zią" "" "[bp]" "(Zom|zom)"
+"zią" "" "" "(Zon|zon)"
+"zię" "" "[bp]" "(Zem|zem)"
+"zię" "" "" "(Zen|zen)"
+"zie" "" "[bcdgkpstwzż]" "(ZF|zF)" 
+"zie" "" "" "(Ze|ze)" 
+"zio" "" "" "(Zo|zo)" 
+"ziu" "" "" "(Zu|zju)" 
+"zi" "" "" "(Zi|zI)"
+
+"że" "" "[bcdgkpstwzż]" "(Ze|ZF)"
+"że" "" "[bcdgkpstwzż]" "(Ze|ZF|ze|zF)"
+"że" "" "" "Ze"
+"źe" "" "" "(Ze|ze)"
+"ży" "" "" "Zi"
+"źi" "" "" "(Zi|zi)"
+"ż" "" "" "Z"
+"ź" "" "" "(Z|z)"
+
+"rze" "t" "" "(Se|re)"
+"rze" "" "" "(Ze|re|rZe)"
+"rzy" "t" "" "(Si|ri)"
+"rzy" "" "" "(Zi|ri|rZi)"
+"rz" "t" "" "(S|r)"
+"rz" "" "" "(Z|r|rZ)"
+
+"lio" "" "" "(lo|le)"
+"ł" "" "" "l"
+"ń" "" "" "n"
+"qu" "" "" "k"
+"s" "" "s" "" 
+
+// VOWELS   
+"ó" "" "" "(u|o)"
+"ą" "" "[bp]" "om"
+"ę" "" "[bp]" "em"
+"ą" "" "" "on"
+"ę" "" "" "en"
+
+"ije" "" "" "je"
+"yje" "" "" "je"
+"iie" "" "" "je"
+"yie" "" "" "je"
+"iye" "" "" "je"
+"yye" "" "" "je"
+
+"ij" "" "[aou]" "j"
+"yj" "" "[aou]" "j"
+"ii" "" "[aou]" "j"
+"yi" "" "[aou]" "j"
+"iy" "" "[aou]" "j"
+"yy" "" "[aou]" "j"
+
+"rie" "" "" "rje" 
+"die" "" "" "dje" 
+"tie" "" "" "tje" 
+"ie" "" "[bcdgkpstwzż]" "F" 
+"ie" "" "" "e"
+
+"aue" "" "" "aue"
+"au" "" "" "au"
+
+"ei" "" "" "aj"
+"ey" "" "" "aj"
+"ej" "" "" "aj"
+
+"ai" "" "" "aj"
+"ay" "" "" "aj"
+"aj" "" "" "aj"
+
+"i" "[aeou]" "" "j" 
+"y" "[aeou]" "" "j" 
+"i" "" "[aou]" "j"
+"y" "" "[aeou]" "j"
+
+"a" "" "[bcdgkpstwzż]" "B" 
+"e" "" "[bcdgkpstwzż]" "(E|F)" 
+"o" "" "[bcćdgklłmnńrsśtwzźż]" "P" 
+
+// LATIN ALPHABET
+"a" "" "" "a"
+"b" "" "" "b"
+"c" "" "" "ts"
+"d" "" "" "d"
+"e" "" "" "E"
+"f" "" "" "f"
+"g" "" "" "g"
+"h" "" "" "(h|x)"
+"i" "" "" "I"
+"j" "" "" "j"
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "o"
+"p" "" "" "p"
+"q" "" "" "k"
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t"
+"u" "" "" "u"
+"v" "" "" "v"
+"w" "" "" "v"
+"x" "" "" "ks"
+"y" "" "" "I"
+"z" "" "" "z"

Added: commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_portuguese.txt
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_portuguese.txt?rev=1151311&view=auto
==============================================================================
--- commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_portuguese.txt (added)
+++ commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_portuguese.txt Wed Jul 27 02:29:11 2011
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"kh" "" "" "x" // foreign
+"ch" "" "" "S"
+"ss" "" "" "s"
+"sc" "" "[ei]" "s"
+"sç" "" "[aou]" "s"
+"ç" "" "" "s"
+"c" "" "[ei]" "s"
+//  "c" "" "[aou]" "(k|C)"
+
+"s" "^" "" "s"
+"s" "[aáuiíoóeéêy]" "[aáuiíoóeéêy]" "z"
+"s" "" "[dglmnrv]" "(Z|S)" // Z is Brazil
+
+"z" "" "$" "(Z|s|S)" // s and S in Brazil
+"z" "" "[bdgv]" "(Z|z)" // Z in Brazil
+"z" "" "[ptckf]" "(s|S|z)" // s and S in Brazil
+
+"gu" "" "[eiu]" "g"    
+"gu" "" "[ao]" "gv"    
+"g" "" "[ei]" "Z"
+"qu" "" "[eiu]" "k"    
+"qu" "" "[ao]" "kv"    
+
+"uo" "" "" "(vo|o|u)"
+"u" "" "[aei]" "v" 
+
+"lh" "" "" "l"
+"nh" "" "" "nj"
+"h" "[bdgt]" "" "" // translit. from Arabic
+"h" "" "$" "" // foreign
+
+"ex" "" "[aáuiíoóeéêy]" "(ez|eS|eks)" // ez in Brazil
+"ex" "" "[cs]" "e" 
+
+"y" "[aáuiíoóeéê]" "" "j"
+"y" "" "[aeiíou]" "j"
+"m" "" "[bcdfglnprstv]" "(m|n)" // maybe to add a rule for m/n before a consonant that disappears [preceeding vowel becomes nasalized]
+"m" "" "$" "(m|n)" // maybe to add a rule for final m/n that disappears [preceeding vowel becomes nasalized]
+
+"ão" "" "" "(au|an|on)"
+"ãe" "" "" "(aj|an)"
+"ãi" "" "" "(aj|an)"
+"õe" "" "" "(oj|on)"
+"i" "[aáuoóeéê]" "" "j"
+"i" "" "[aeou]" "j"
+
+"â" "" "" "a"
+"à" "" "" "a"
+"á" "" "" "a"
+"ã" "" "" "(a|an|on)"
+"é" "" "" "e"
+"ê" "" "" "e"
+"í" "" "" "i"
+"ô" "" "" "o"
+"ó" "" "" "o"
+"õ" "" "" "(o|on)"
+"ú" "" "" "u"
+"ü" "" "" "u"
+
+"aue" "" "" "aue"
+
+// LATIN ALPHABET
+"a" "" "" "a"
+"b" "" "" "b"
+"c" "" "" "k"
+"d" "" "" "d"
+"e" "" "" "(e|i)"
+"f" "" "" "f"
+"g" "" "" "g"
+"h" "" "" "h"
+"i" "" "" "i"
+"j" "" "" "Z" 
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "(o|u)"
+"p" "" "" "p"
+"q" "" "" "k"    
+"r" "" "" "r"
+"s" "" "" "S"
+"t" "" "" "t"
+"u" "" "" "u"
+"v" "" "" "v"
+"w" "" "" "v"    
+"x" "" "" "(S|ks)"   
+"y" "" "" "i"   
+"z" "" "" "z"

Added: commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_romanian.txt
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_romanian.txt?rev=1151311&view=auto
==============================================================================
--- commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_romanian.txt (added)
+++ commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_romanian.txt Wed Jul 27 02:29:11 2011
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"ce" "" "" "tSe"
+"ci" "" "" "(tSi|tS)"
+"ch" "" "[ei]" "k"
+"ch" "" "" "x" // foreign
+
+"gi" "" "" "(dZi|dZ)"
+"g" "" "[ei]" "dZ"
+"gh" "" "" "g"
+
+"i" "[aeou]" "" "j"
+"i" "" "[aeou]" "j"
+"Å£" "" "" "ts"
+"ş" "" "" "S"
+"qu" "" "" "k"    
+
+"î" "" "" "i"
+"ea" "" "" "ja"
+"ă" "" "" "(e|a)"
+"aue" "" "" "aue"
+
+// LATIN ALPHABET
+"a" "" "" "a"
+"b" "" "" "b"
+"c" "" "" "k"
+"d" "" "" "d"
+"e" "" "" "E"
+"f" "" "" "f"
+"g" "" "" "g"
+"h" "" "" "(x|h)"
+"i" "" "" "I"
+"j" "" "" "Z"
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "o"
+"p" "" "" "p"
+"q" "" "" "k"    
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t"
+"u" "" "" "u"
+"v" "" "" "v"
+"w" "" "" "v"    
+"x" "" "" "ks"    
+"y" "" "" "i"    
+"z" "" "" "z"

Added: commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_russian.txt
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_russian.txt?rev=1151311&view=auto
==============================================================================
--- commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_russian.txt (added)
+++ commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_russian.txt Wed Jul 27 02:29:11 2011
@@ -0,0 +1,142 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//GENERAL// CONVERTING FEMININE TO MASCULINE
+"yna" "" "$" "(in|ina)" 
+"ina" "" "$" "(in|ina)" 
+"liova" "" "$" "(lof|lef)" 
+"lova" "" "$" "(lof|lef|lova)" 
+"ova" "" "$" "(of|ova)" 
+"eva" "" "$" "(ef|ova)" 
+"aia" "" "$" "(aja|i)" 
+"aja" "" "$" "(aja|i)" 
+"aya" "" "$" "(aja|i)" 
+
+//SPECIAL CONSONANTS
+"tsya" "" "" "tsa" 
+"tsyu" "" "" "tsu" 
+"tsia" "" "" "tsa" 
+"tsie" "" "" "tse" 
+"tsio" "" "" "tso"   
+"tsye" "" "" "tse" 
+"tsyo" "" "" "tso" 
+"tsiu" "" "" "tsu" 
+"sie" "" "" "se" 
+"sio" "" "" "so"   
+"zie" "" "" "ze" 
+"zio" "" "" "zo"   
+"sye" "" "" "se" 
+"syo" "" "" "so"   
+"zye" "" "" "ze" 
+"zyo" "" "" "zo"   
+
+"ger" "" "$" "ger" 
+"gen" "" "$" "gen" 
+"gin" "" "$" "gin" 
+"gg" "" "" "g" 
+"g" "[jaeoiuy]" "[aeoiu]" "g" 
+"g" "" "[aeoiu]" "(g|h)" 
+
+"kh" "" "" "x"
+"ch" "" "" "(tS|x)" 
+"sch" "" "" "(StS|S)"
+"ssh" "" "" "S"
+"sh" "" "" "S"
+"zh" "" "" "Z" 
+"tz" "" "$" "ts" 
+"tz" "" "" "(ts|tz)" 
+"c" "" "[iey]" "s" 
+"qu" "" "" "(kv|k)" 
+"s" "" "s" ""
+
+//SPECIAL VOWELS
+"lya" "" "" "la" 
+"lyu" "" "" "lu"  
+"lia" "" "" "la" // not in DJSRE
+"liu" "" "" "lu"  // not in DJSRE
+"lja" "" "" "la" // not in DJSRE
+"lju" "" "" "lu"  // not in DJSRE
+"le" "" "" "(lo|lE)" //not in DJSRE
+"lyo" "" "" "(lo|le)" //not in DJSRE
+"lio" "" "" "(lo|le)" 
+
+"ije" "" "" "je"
+"ie" "" "" "je"
+"iye" "" "" "je"
+"iie" "" "" "je"
+"yje" "" "" "je"
+"ye" "" "" "je"
+"yye" "" "" "je"
+"yie" "" "" "je"
+
+"ij" "" "[aou]" "j"
+"iy" "" "[aou]" "j"
+"ii" "" "[aou]" "j"
+"yj" "" "[aou]" "j"
+"yy" "" "[aou]" "j"
+"yi" "" "[aou]" "j"
+
+"io" "" "" "(jo|e)" 
+"i" "" "[au]" "j" 
+"i" "[aeou]" "" "j" 
+"yo" "" "" "(jo|e)" 
+"y" "" "[au]" "j"
+"y" "[aeiou]" "" "j" 
+
+"ii" "" "$" "i" 
+"iy" "" "$" "i" 
+"yy" "" "$" "i" 
+"yi" "" "$" "i" 
+"yj" "" "$" "i"
+"ij" "" "$" "i"
+
+"e" "^" "" "(je|E)" 
+"ee" "" "" "(aje|i)" 
+"e" "[aou]" "" "je" 
+"oo" "" "" "(oo|u)" 
+"'" "" "" "" 
+"\"" "" "" ""
+
+"aue" "" "" "aue"
+
+// LATIN ALPHABET 
+"a" "" "" "a"
+"b" "" "" "b"
+"c" "" "" "k" 
+"d" "" "" "d"
+"e" "" "" "E"
+"f" "" "" "f"
+"g" "" "" "g" 
+"h" "" "" "h" 
+"i" "" "" "I"
+"j" "" "" "j" 
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "o"
+"p" "" "" "p"
+"q" "" "" "k" 
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t"
+"u" "" "" "u"
+"v" "" "" "v"
+"w" "" "" "v" 
+"x" "" "" "ks" 
+"y" "" "" "I"
+"z" "" "" "z"

Added: commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_spanish.txt
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_spanish.txt?rev=1151311&view=auto
==============================================================================
--- commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_spanish.txt (added)
+++ commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_spanish.txt Wed Jul 27 02:29:11 2011
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// GENERAL
+
+// Includes both Spanish (Castillian) & Catalan
+
+// CONSONANTS
+"ñ" "" "" "(n|nj)"
+"ny" "" "" "nj" // Catalan
+"ç" "" "" "s" // Catalan
+
+"ig" "[aeiou]" "" "(tS|ig)" // tS is Catalan
+"ix" "[aeiou]" "" "S" // Catalan
+"tx" "" "" "tS" // Catalan
+"tj" "" "$" "tS" // Catalan
+"tj" "" "" "dZ" // Catalan
+"tg" "" "" "(tg|dZ)" // dZ is Catalan
+"ch" "" "" "(tS|dZ)" // dZ is typical for Argentina
+"bh" "" "" "b" // translit. from Arabic
+"h" "[dgt]" "" "" // translit. from Arabic
+"h" "" "$" "" // foreign
+//"ll" "" "" "(l|Z)" // Z is typical for Argentina, only Ashkenazic
+"m" "" "[bpvf]" "(m|n)"
+"c" "" "[ei]" "s" 
+//  "c" "" "[aou]" "(k|C)"
+"gu" "" "[ei]" "(g|gv)" // "gv" because "u" can actually be "ü"
+"g" "" "[ei]" "(x|g|dZ)"  // "g" only for foreign words; dZ is Catalan
+"qu" "" "" "k"
+
+"uo" "" "" "(vo|o)"    
+"u" "" "[aei]" "v"
+
+// SPECIAL VOWELS
+"ü" "" "" "v"
+"á" "" "" "a"
+"é" "" "" "e"
+"í" "" "" "i"
+"ó" "" "" "o"
+"ú" "" "" "u"
+"à" "" "" "a"  // Catalan
+"è" "" "" "e" // Catalan
+"ò" "" "" "o"  // Catalan
+
+// LATIN ALPHABET      
+"a" "" "" "a"
+"b" "" "" "B"
+"c" "" "" "k"
+"d" "" "" "d"
+"e" "" "" "e"
+"f" "" "" "f"
+"g" "" "" "g" 
+"h" "" "" "h"
+"i" "" "" "i"
+"j" "" "" "(x|Z)" // Z is Catalan
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "o"
+"p" "" "" "p"
+"q" "" "" "k"
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t"
+"u" "" "" "u"
+"v" "" "" "V"
+"w" "" "" "v" // foreign words
+"x" "" "" "(ks|gz|S)" // ks is Spanish, all are Catalan
+"y" "" "" "(i|j)"
+"z" "" "" "(z|s)" // as "c" befoire "e" or "i", in Spain it is like unvoiced English "th"

Added: commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_turkish.txt
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_turkish.txt?rev=1151311&view=auto
==============================================================================
--- commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_turkish.txt (added)
+++ commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_turkish.txt Wed Jul 27 02:29:11 2011
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"ç" "" "" "tS"
+"ğ" "" "" "" // to show that previous vowel is long
+"ş" "" "" "S"
+"ü" "" "" "Q"
+"ö" "" "" "Y"
+"ı" "" "" "(e|i|)" // as "e" in English "label"
+
+"a" "" "" "a"
+"b" "" "" "b"
+"c" "" "" "dZ"
+"d" "" "" "d"
+"e" "" "" "e"
+"f" "" "" "f"
+"g" "" "" "g"
+"h" "" "" "h"
+"i" "" "" "i"
+"j" "" "" "Z"
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "o"
+"p" "" "" "p"
+"q" "" "" "k" // foreign words
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t"
+"u" "" "" "u"
+"v" "" "" "v"
+"w" "" "" "v" // foreign words
+"x" "" "" "ks" // foreign words
+"y" "" "" "j"
+"z" "" "" "z" 

Added: commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/lang.txt
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/lang.txt?rev=1151311&view=auto
==============================================================================
--- commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/lang.txt (added)
+++ commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/lang.txt Wed Jul 27 02:29:11 2011
@@ -0,0 +1,293 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// 1. following are rules to accept the language
+// 1.1 Special letter combinations
+^o’ english true
+^o' english true
+^mc english true
+^fitz english true
+ceau french+romanian true
+eau$ french true // mp: I've added this
+eaux$ french true // mp: I've added this
+ault$ french true
+oult$ french true
+eux$ french true
+eix$ french true
+glou$ greeklatin true
+uu dutch true
+tx spanish true
+witz german true
+tz$ german+russian+english true
+^tz russian+english true
+poulos$ greeklatin true
+pulos$ greeklatin true
+iou greeklatin true
+sj$ dutch true
+^sj dutch true
+güe spanish true
+güi spanish true
+ghe romanian+greeklatin true
+ghi romanian+greeklatin true
+escu$ romanian true
+esco$ romanian true
+vici$ romanian true
+schi$ romanian true
+ii$ russian true
+iy$ russian true
+yy$ russian true
+yi$ russian true
+^rz polish true
+rz$ polish+german true
+[bcdfgklmnpstwz]rz polish true
+rz[bcdfghklmnpstw] polish true
+etti$ italian true
+eti$ italian true
+ati$ italian true
+ato$ italian true
+[aoei]no$ italian true
+[aoei]ni$ italian true
+esi$ italian true
+oli$ italian true
+field$ english true
+cki$ polish true
+ska$ polish true
+cka$ polish true
+ae german+russian+english true
+oe german+french+russian+english+dutch true
+th$ german+english true
+^th german+english+greeklatin true
+mann german true
+cz polish true
+cy polish+greeklatin true
+niew polish true
+stein german true
+heim$ german true
+heimer$ german true
+thal german true
+zweig german true
+[aeou]h german true
+äh german true
+öh german true
+üh german true
+[ln]h[ao]$ portuguese true
+[ln]h[aou] portuguese+french+german+dutch+czech+spanish+turkish true
+chsch german true
+tsch german true
+sch$ german+russian true
+^sch german+russian true
+ck$ german+english true
+c$ polish+romanian+hungarian+czech+turkish true
+sz polish+hungarian true
+cs$ hungarian true
+^cs hungarian true
+dzs hungarian true
+zs$ hungarian true
+^zs hungarian true
+^wl polish true
+^wr polish+english+german+dutch true
+
+gy$ hungarian true
+gy[aeou] hungarian true
+gy hungarian+russian+french+greeklatin true
+guy french true
+gu[ei] spanish+french+portuguese true
+gu[ao] spanish+portuguese true
+gi[aou] italian+greeklatin true
+
+ly hungarian+russian+polish+greeklatin true
+ny hungarian+russian+polish+spanish+greeklatin true
+ty hungarian+russian+polish+greeklatin true
+
+// 1.2 special characters
+ć polish true
+ç french+spanish+portuguese+turkish true
+č czech true
+ď czech true
+ğ turkish true
+ł polish true
+ń polish true
+ñ spanish true
+ň czech true
+ř czech true
+ś polish true
+ş romanian+turkish true
+Å¡ czech true
+Å£ romanian true
+Å¥ czech true
+ź polish true
+ż polish true
+
+ß german true
+
+ä german true
+á hungarian+spanish+portuguese+czech+greeklatin true
+â romanian+french+portuguese true
+ă romanian true
+ą polish true
+à portuguese true
+ã portuguese true
+ę polish true
+é french+hungarian+czech+greeklatin true
+è french+spanish+italian true
+ê french true
+ě czech true
+ê french+portuguese true
+í hungarian+spanish+portuguese+czech+greeklatin true
+î romanian+french true
+ı turkish true
+ó polish+hungarian+spanish+italian+portuguese+czech+greeklatin true
+ö german+hungarian+turkish true
+ô french+portuguese true
+õ portuguese+hungarian true
+ò italian+spanish true
+ű hungarian true
+ú hungarian+spanish+portuguese+czech+greeklatin true
+ü german+hungarian+spanish+portuguese+turkish true
+ù french true
+ů czech true
+ý czech+greeklatin true
+
+// Every Cyrillic word has at least one Cyrillic vowel (аёеоиуыэюя)
+а cyrillic true
+ё cyrillic true
+о cyrillic true
+е cyrillic true
+и cyrillic true
+у cyrillic true
+ы cyrillic true
+э cyrillic true
+ю cyrillic true
+я cyrillic true
+
+// Every Greek word has at least one Greek vowel
+α greek true
+ε greek true
+η greek true
+ι greek true
+ο greek true
+υ greek true
+ω greek true
+
+// Arabic (only initial)
+ا arabic true // alif (isol + init)   
+ب arabic true // ba' 
+ت arabic true // ta' 
+Ø« arabic true // tha'
+ج arabic true // jim
+Ø­ arabic true // h.a' 
+Ø®' arabic true // kha' 
+د arabic true // dal (isol + init)
+Ø° arabic true // dhal (isol + init)
+ر arabic true // ra' (isol + init)
+ز arabic true // za' (isol + init)
+س arabic true // sin 
+Ø´ arabic true // shin 
+ص arabic true // s.ad 
+ض arabic true // d.ad 
+Ø· arabic true // t.a' 
+ظ arabic true // z.a' 
+ع arabic true // 'ayn
+غ arabic true // ghayn 
+ف arabic true // fa' 
+ق arabic true // qaf 
+ك arabic true // kaf  
+ل arabic true // lam 
+م arabic true // mim 
+ن arabic true // nun 
+ه arabic true // ha' 
+و arabic true // waw (isol + init)
+ي arabic true // ya' 
+    
+Ø¢ arabic true // alif madda  
+Ø¥ arabic true // alif + diacritic  
+Ø£ arabic true // alif + hamza
+ؤ arabic true //  waw + hamza
+ئ arabic true //  ya' + hamza
+
+
+// Hebrew
+א hebrew true
+ב hebrew true
+ג hebrew true
+ד hebrew true
+ה hebrew true
+ו hebrew true
+ז hebrew true
+ח hebrew true
+ט hebrew true
+י hebrew true
+כ hebrew true
+ל hebrew true
+מ hebrew true
+×  hebrew true
+ס hebrew true
+×¢ hebrew true
+פ hebrew true
+צ hebrew true
+ק hebrew true
+ר hebrew true
+ש hebrew true
+ת hebrew true
+
+// 2. following are rules to reject the language
+
+// Every Latin character word has at least one Latin vowel
+a cyrillic+hebrew+greek+arabic false
+o cyrillic+hebrew+greek+arabic false
+e cyrillic+hebrew+greek+arabic false
+i cyrillic+hebrew+greek+arabic false
+y cyrillic+hebrew+greek+arabic+romanian+dutch false
+u cyrillic+hebrew+greek+arabic false
+
+j italian false
+j[^aoeiuy] french+spanish+portuguese+greeklatin false
+g czech false
+k romanian+spanish+portuguese+french+italian false
+q hungarian+polish+russian+romanian+czech+dutch+turkish+greeklatin false
+v polish false
+w french+romanian+spanish+hungarian+russian+czech+turkish+greeklatin false
+x czech+hungarian+dutch+turkish false // polish excluded from the list
+
+dj spanish+turkish false
+v[^aoeiu] german false // in german, "v" can be found before a vowel only
+y[^aoeiu] german false  // in german, "y" usually appears only in the last position; sometimes before a vowel
+c[^aohk] german false
+dzi german+english+french+turkish false
+ou german false
+a[eiou] turkish false // no diphthongs in Turkish
+ö[eaio] turkish false
+ü[eaio] turkish false
+e[aiou] turkish false
+i[aeou] turkish false
+o[aieu] turkish false
+u[aieo] turkish false
+aj german+english+french+dutch false
+ej german+english+french+dutch false
+oj german+english+french+dutch false
+uj german+english+french+dutch false
+eu russian+polish false
+ky polish false
+kie french+spanish+greeklatin false
+gie portuguese+romanian+spanish+greeklatin false
+ch[aou] italian false
+ch turkish false
+son$ german false
+sc[ei] french false
+sch hungarian+polish+french+spanish false
+^h russian false
+etti$ greeklatin false

Added: commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_approx_any.txt
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_approx_any.txt?rev=1151311&view=auto
==============================================================================
--- commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_approx_any.txt (added)
+++ commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_approx_any.txt Wed Jul 27 02:29:11 2011
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// SEPHARDIC
+
+"E" "" "" ""
\ No newline at end of file

Added: commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_approx_common.txt
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_approx_common.txt?rev=1151311&view=auto
==============================================================================
--- commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_approx_common.txt (added)
+++ commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_approx_common.txt Wed Jul 27 02:29:11 2011
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include sep_exact_approx_common
+
+"bens" "^" "" "(binz|s)" 
+"benS" "^" "" "(binz|s)" 
+"ben" "^" "" "(bin|)" 
+
+"abens" "^" "" "(abinz|binz|s)" 
+"abenS" "^" "" "(abinz|binz|s)" 
+"aben" "^" "" "(abin|bin|)"
+
+"els" "^" "" "(ilz|alz|s)" 
+"elS" "^" "" "(ilz|alz|s)" 
+"el" "^" "" "(il|al|)" 
+"als" "^" "" "(alz|s)" 
+"alS" "^" "" "(alz|s)" 
+"al" "^" "" "(al|)" 
+
+//"dels" "^" "" "(dilz|s)" 
+//"delS" "^" "" "(dilz|s)" 
+"del" "^" "" "(dil|)" 
+"dela" "^" "" "(dila|)" 
+//"delo" "^" "" "(dila|)" 
+"da" "^" "" "(da|)" 
+"de" "^" "" "(di|)" 
+//"des" "^" "" "(dis|dAs|)" 
+//"di" "^" "" "(di|)" 
+//"dos" "^" "" "(das|dus|)" 
+
+"oa" "" "" "(va|a|D)"
+"oe" "" "" "(vi|D)"
+"ae" "" "" "D"
+
+/// "s" "" "$" "(s|)" // Attia(s)
+/// "C" "" "" "s"  // "c" could actually be "�"
+
+"n" "" "[bp]" "m"
+
+"h" "" "" "(|h|f)" // sound "h" (absent) can be expressed via /x/, Cojab in Spanish = Kohab ; Hakim = Fakim
+"x" "" "" "h"
+
+// DIPHTHONGS ARE APPROXIMATELY equivalent
+"aja" "^" "" "(Da|ia)"                         
+"aje" "^" "" "(Di|Da|i|ia)"                         
+"aji" "^" "" "(Di|i)"                         
+"ajo" "^" "" "(Du|Da|iu|ia)"                         
+"aju" "^" "" "(Du|iu)"                         
+
+"aj" "" "" "D"                         
+"ej" "" "" "D"                         
+"oj" "" "" "D"                         
+"uj" "" "" "D"                         
+"au" "" "" "D"                         
+"eu" "" "" "D"                         
+"ou" "" "" "D"                         
+
+"a" "^" "" "(a|)"  // Arabic
+
+"ja" "^" "" "ia"                         
+"je" "^" "" "i"                         
+"jo" "^" "" "(iu|ia)"                         
+"ju" "^" "" "iu"                         
+
+"ja" "" "" "a"                         
+"je" "" "" "i"                         
+"ji" "" "" "i"                         
+"jo" "" "" "u"                         
+"ju" "" "" "u"                         
+
+"j" "" "" "i"                         
+
+// CONSONANTS {z & Z & dZ; s & S} are approximately interchangeable
+"s" "" "[rmnl]" "z"
+"S" "" "[rmnl]" "z"
+"s" "[rmnl]" "" "z"
+"S" "[rmnl]" "" "z" 
+
+"dS" "" "$" "S"
+"dZ" "" "$" "S"
+"Z" "" "$" "S"
+"S" "" "$" "(S|s)"
+"z" "" "$" "(S|s)"
+
+"S" "" "" "s"
+"dZ" "" "" "z"
+"Z" "" "" "z"
+
+"i" "" "$" "(i|)" // often in Arabic
+"e" "" "" "i"
+
+"o" "" "$" "(a|u)"
+"o" "" "" "u"
+
+// special character to deal correctly in Hebrew match
+"B" "" "" "b" 
+"V" "" "" "v" 
+
+// Arabic
+"p" "^" "" "b"

Added: commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_approx_french.txt
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_approx_french.txt?rev=1151311&view=auto
==============================================================================
--- commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_approx_french.txt (added)
+++ commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_approx_french.txt Wed Jul 27 02:29:11 2011
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// empty
\ No newline at end of file

Added: commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_approx_hebrew.txt
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_approx_hebrew.txt?rev=1151311&view=auto
==============================================================================
--- commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_approx_hebrew.txt (added)
+++ commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_approx_hebrew.txt Wed Jul 27 02:29:11 2011
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// empty
\ No newline at end of file

Added: commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_approx_italian.txt
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_approx_italian.txt?rev=1151311&view=auto
==============================================================================
--- commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_approx_italian.txt (added)
+++ commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_approx_italian.txt Wed Jul 27 02:29:11 2011
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include sep_approx_french
\ No newline at end of file



Mime
View raw message