commons-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From s...@apache.org
Subject svn commit: r1789764 - in /commons/proper/codec/trunk/src: changes/changes.xml main/java/org/apache/commons/codec/language/Soundex.java
Date Fri, 31 Mar 2017 23:57:48 GMT
Author: sebb
Date: Fri Mar 31 23:57:48 2017
New Revision: 1789764

URL: http://svn.apache.org/viewvc?rev=1789764&view=rev
Log:
CODEC-199 Bug in HW rule in Soundex
Revert to a fix which does not entail change to public API

Modified:
    commons/proper/codec/trunk/src/changes/changes.xml
    commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/language/Soundex.java

Modified: commons/proper/codec/trunk/src/changes/changes.xml
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/changes/changes.xml?rev=1789764&r1=1789763&r2=1789764&view=diff
==============================================================================
--- commons/proper/codec/trunk/src/changes/changes.xml (original)
+++ commons/proper/codec/trunk/src/changes/changes.xml Fri Mar 31 23:57:48 2017
@@ -59,7 +59,7 @@ The <action> type attribute can be add,u
       <action issue="CODEC-221" dev="sebb" type="update">HmacUtils.updateHmac calls
reset() unnecessarily</action>
       <action issue="CODEC-200" dev="sebb" type="fix" due-to="Luciano Vernaschi">Base32.HEX_DECODE_TABLE
contains the wrong value 32</action>
       <action issue="CODEC-207" dev="ggregory" type="fix" due-to="Gary Gregory">Charsets
Javadoc breaks build when using Java 8</action>
-      <action issue="CODEC-199" dev="ggregory" type="fix" due-to="Yossi Tamari">Bug
in HW rule in Soundex</action>
+      <action issue="CODEC-199" dev="ggregory/sebb" type="fix" due-to="Yossi Tamari">Bug
in HW rule in Soundex</action>
       <action issue="CODEC-209" dev="ggregory" type="fix" due-to="Gary Gregory">Javadoc
for SHA-224 DigestUtils methods should mention Java 1.8.0 restriction instead of 1.4.0.</action>
       <action issue="CODEC-219" dev="ggregory" type="fix" due-to="Gary Gregory, Sebb">Don't
deprecate Charsets Charset constants in favor of Java 7's java.nio.charset.StandardCharsets</action>
       <action issue="CODEC-217" dev="ggregory" type="add" due-to="Gary Gregory">Add
HmacAlgorithms.HMAC_SHA_224 (Java 8 only)</action>

Modified: commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/language/Soundex.java
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/language/Soundex.java?rev=1789764&r1=1789763&r2=1789764&view=diff
==============================================================================
--- commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/language/Soundex.java
(original)
+++ commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/language/Soundex.java
Fri Mar 31 23:57:48 2017
@@ -41,7 +41,7 @@ public class Soundex implements StringEn
      *
      * @see #US_ENGLISH_MAPPING
      */
-    public static final String US_ENGLISH_MAPPING_STRING = "0123012#02245501262301#202";
+    public static final String US_ENGLISH_MAPPING_STRING = "01230120022455012623010202";
 
     /**
      * This is a default mapping of the 26 letters used in US English. A value of <code>0</code>
for a letter position
@@ -179,15 +179,6 @@ public class Soundex implements StringEn
     }
 
     /**
-     * Returns the soundex mapping.
-     *
-     * @return soundexMapping.
-     */
-    private char[] getSoundexMapping() {
-        return this.soundexMapping;
-    }
-
-    /**
      * Maps the given upper-case character to its Soundex code.
      *
      * @param ch
@@ -198,10 +189,10 @@ public class Soundex implements StringEn
      */
     private char map(final char ch) {
         final int index = ch - 'A';
-        if (index < 0 || index >= this.getSoundexMapping().length) {
+        if (index < 0 || index >= this.soundexMapping.length) {
             throw new IllegalArgumentException("The character is not mapped: " + ch);
         }
-        return this.getSoundexMapping()[index];
+        return this.soundexMapping[index];
     }
 
     /**
@@ -234,19 +225,20 @@ public class Soundex implements StringEn
             return str;
         }
         final char out[] = {'0', '0', '0', '0'};
-        char last, mapped;
-        int incount = 1, count = 1;
-        out[0] = str.charAt(0);
-        // map() throws IllegalArgumentException
-        last = this.map(str.charAt(0));
-        while (incount < str.length() && count < out.length) {
-            mapped = this.map(str.charAt(incount++));
-            if (mapped == '0') {
-                last = mapped;
-            } else if (mapped != '#' && mapped != last) {
-                out[count++] = mapped;
-                last = mapped;
+        int count = 0;
+        final char first = str.charAt(0);
+        out[count++] = first;
+        char lastDigit = map(first); // previous digit
+        for(int i = 1; i < str.length() && count < out.length ; i++) {
+            char ch = str.charAt(i);
+            if (ch == 'H' || ch == 'W') { // these are ignored completely
+                continue;
+            }
+            char digit = map(ch);
+            if (digit != '0' && digit != lastDigit) { // don't store vowels or repeats
+                out[count++] = digit;
             }
+            lastDigit = digit;
         }
         return new String(out);
     }



Mime
View raw message