commons-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From bay...@apache.org
Subject cvs commit: jakarta-commons-sandbox/codec/src/java/org/apache/commons/codec Encoder.java EncoderComparator.java RefinedSoundex.java Metaphone.java Soundex.java SoundexComparator.java
Date Mon, 18 Nov 2002 08:46:02 GMT
bayard      2002/11/18 00:46:02

  Modified:    codec/src/java/org/apache/commons/codec Metaphone.java
                        Soundex.java
  Added:       codec/src/java/org/apache/commons/codec Encoder.java
                        EncoderComparator.java RefinedSoundex.java
  Removed:     codec/src/java/org/apache/commons/codec
                        SoundexComparator.java
  Log:
  A common interface added, a new algorithm added [refined soundex] and a generic comparator introduced.
  Submitted-by: "O'brien, Tim" <tobrien@transolutions.net>
  
  Revision  Changes    Path
  1.3       +320 -293  jakarta-commons-sandbox/codec/src/java/org/apache/commons/codec/Metaphone.java
  
  Index: Metaphone.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons-sandbox/codec/src/java/org/apache/commons/codec/Metaphone.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- Metaphone.java	21 Oct 2002 19:05:19 -0000	1.2
  +++ Metaphone.java	18 Nov 2002 08:46:02 -0000	1.3
  @@ -1,293 +1,320 @@
  -// Permission given by wbrogden for code to be used anywhere.
  -package org.apache.commons.codec;
  -
  -
  -/* ====================================================================
  - * The Apache Software License, Version 1.1
  - *
  - * Copyright (c) 2001 The Apache Software Foundation.  All rights
  - * reserved.
  - *
  - * Redistribution and use in source and binary forms, with or without
  - * modification, are permitted provided that the following conditions
  - * are met:
  - *
  - * 1. Redistributions of source code must retain the above copyright
  - *    notice, this list of conditions and the following disclaimer.
  - *
  - * 2. Redistributions in binary form must reproduce the above copyright
  - *    notice, this list of conditions and the following disclaimer in
  - *    the documentation and/or other materials provided with the
  - *    distribution.
  - *
  - * 3. The end-user documentation included with the redistribution,
  - *    if any, must include the following acknowledgment:
  - *       "This product includes software developed by the
  - *        Apache Software Foundation (http://www.apache.org/)."
  - *    Alternately, this acknowledgment may appear in the software itself,
  - *    if and wherever such third-party acknowledgments normally appear.
  - *
  - * 4. The names "Apache" and "Apache Software Foundation" and
  - *    "Apache Commons" must not be used to endorse or promote products
  - *    derived from this software without prior written permission. For
  - *    written permission, please contact apache@apache.org.
  - *
  - * 5. Products derived from this software may not be called "Apache",
  - *    "Apache Turbine", nor may "Apache" appear in their name, without
  - *    prior written permission of the Apache Software Foundation.
  - *
  - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  - * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  - * SUCH DAMAGE.
  - * ====================================================================
  - *
  - * This software consists of voluntary contributions made by many
  - * individuals on behalf of the Apache Software Foundation.  For more
  - * information on the Apache Software Foundation, please see
  - * <http://www.apache.org/>.
  - */
  -
  -/*                                                     Metaphone.java
  - * A class to generate phonetic code and keep lists of objects
  - *  retrievable by a phonetic code.
  - * reference: Computer Language of Dec. 1990, p 39
  - *  "Hanging on the Metaphone" by Lawrence Philips
  - *
  - * This Java implementation, William B. Brogden.  December, 1997
  - *
  - * @author wbrogden@bga.com
  - */
  -
  -/*
  - * List functionality removed: 2001-06-21 bayard@generationjava.com
  - */
  -
  -/* 
  - * Notes:
  - * The static method metaphone converts an input String into a code.
  - *   All input is converted to upper case.
  - *   Limitations: Input format is expected to be a single ASCII word
  - *   with only characters in the A - Z range, no punctuation or numbers.
  - *
  - */
  -
  -class Metaphone {
  -
  -    static String vowels = "AEIOU" ;
  -    static String frontv = "EIY"   ;
  -    static String varson = "CSPTG" ;
  -
  -    static final int maxCodeLen = 4 ;
  -
  -    /**
  -     * Are the metaphones of two strings the same.
  -     */
  -    static public boolean isMetaphoneEqual(String str1, String str2) {
  -        return metaphone(str1).equals(metaphone(str2));
  -    }
  -
  -    /**
  -     * Find the metaphone value of a String. This is similar to the
  -     * soundex algorithm, but better at finding similar sounding words.
  -     */
  -    static public String metaphone( String txt ){
  -      int mtsz = 0  ;
  -      boolean hard = false ;
  -      if(( txt == null ) ||
  -         ( txt.length() == 0 )) return "" ;
  -      // single character is itself
  -      if( txt.length() == 1 ) return txt.toUpperCase() ;
  -      
  -      char[] inwd = txt.toUpperCase().toCharArray() ;
  -      
  -      String tmpS ;
  -      StringBuffer local = new StringBuffer( 40 ); // manipulate
  -      StringBuffer code = new StringBuffer( 10 ) ; //   output
  -      // handle initial 2 characters exceptions
  -      switch( inwd[0] ){
  -        case 'K': case 'G' : case 'P' : /* looking for KN, etc*/
  -          if( inwd[1] == 'N')local.append(inwd, 1, inwd.length - 1 );
  -          else local.append( inwd );
  -          break;
  -        case 'A': /* looking for AE */
  -          if( inwd[1] == 'E' )local.append(inwd, 1, inwd.length - 1 );
  -          else local.append( inwd );
  -          break;
  -        case 'W' : /* looking for WR or WH */
  -          if( inwd[1] == 'R' ){   // WR -> R
  -            local.append(inwd, 1, inwd.length - 1 ); break ;
  -          }
  -          if( inwd[1] == 'H'){
  -            local.append(inwd, 1, inwd.length - 1 );
  -            local.setCharAt( 0,'W'); // WH -> W
  -          }
  -          else local.append( inwd );
  -          break;
  -        case 'X' : /* initial X becomes S */
  -          inwd[0] = 'S' ;local.append( inwd );
  -          break ;
  -        default :
  -          local.append( inwd );
  -      } // now local has working string with initials fixed
  -      int wdsz = local.length();
  -      int n = 0 ;
  -      while((mtsz < maxCodeLen ) && // max code size of 4 works well
  -            (n < wdsz ) ){
  -        char symb = local.charAt(n) ;
  -        // remove duplicate letters except C
  -        if(( symb != 'C' ) &&
  -           (n > 0 ) && ( local.charAt(n - 1 ) == symb )) n++ ;
  -        else{ // not dup
  -          switch( symb ){
  -            case 'A' : case 'E' : case 'I' : case 'O' : case 'U' :
  -              if( n == 0 ) { code.append(symb );mtsz++;
  -              }
  -              break ; // only use vowel if leading char
  -            case 'B' :
  -              if( (n > 0 ) &&
  -                  !(n + 1 == wdsz ) && // not MB at end of word
  -                  ( local.charAt(n - 1) == 'M')) {
  -                    code.append(symb);
  -                  }
  -              else code.append(symb);
  -              mtsz++ ;
  -              break ;
  -            case 'C' : // lots of C special cases
  -              /* discard if SCI, SCE or SCY */
  -              if( ( n > 0 ) &&
  -                  ( local.charAt(n-1) == 'S' ) &&
  -                  ( n + 1 < wdsz ) &&
  -                  ( frontv.indexOf( local.charAt(n + 1)) >= 0 )){ break ;}
  -              tmpS = local.toString();
  -              if( tmpS.indexOf("CIA", n ) == n ) { // "CIA" -> X
  -                 code.append('X' ); mtsz++; break ;
  -              }
  -              if( ( n + 1 < wdsz ) &&
  -                  (frontv.indexOf( local.charAt(n+1) )>= 0 )){
  -                 code.append('S');mtsz++; break ; // CI,CE,CY -> S
  -              }
  -              if(( n > 0) &&
  -                 ( tmpS.indexOf("SCH",n-1 )== n-1 )){ // SCH->sk
  -                 code.append('K') ; mtsz++;break ;
  -              }
  -              if( tmpS.indexOf("CH", n ) == n ){ // detect CH
  -                if((n == 0 ) &&
  -                   (wdsz >= 3 ) &&    // CH consonant -> K consonant
  -                   (vowels.indexOf( local.charAt( 2) ) < 0 )){
  -                     code.append('K');
  -                }
  -                else { code.append('X'); // CHvowel -> X
  -                }
  -                mtsz++;
  -              }
  -              else { code.append('K' );mtsz++;
  -              }
  -              break ;
  -            case 'D' :
  -              if(( n + 2 < wdsz )&&  // DGE DGI DGY -> J
  -                 ( local.charAt(n+1) == 'G' )&&
  -                 (frontv.indexOf( local.charAt(n+2) )>= 0)){
  -                    code.append('J' ); n += 2 ;
  -              }
  -              else { code.append( 'T' );
  -              }
  -              mtsz++;
  -              break ;
  -            case 'G' : // GH silent at end or before consonant
  -              if(( n + 2 == wdsz )&&
  -                 (local.charAt(n+1) == 'H' )) break ;
  -              if(( n + 2 < wdsz ) &&
  -                 (local.charAt(n+1) == 'H' )&&
  -                 (vowels.indexOf( local.charAt(n+2)) < 0 )) break ;
  -              tmpS = local.toString();
  -              if((n > 0) &&
  -                 ( tmpS.indexOf("GN", n ) == n)||
  -                 ( tmpS.indexOf("GNED",n) == n )) break ; // silent G
  -              if(( n > 0 ) &&
  -                 (local.charAt(n-1) == 'G')) hard = true ;
  -              else hard = false ;
  -              if((n+1 < wdsz) &&
  -                 (frontv.indexOf( local.charAt(n+1) ) >= 0 )&&
  -                 (!hard) ) code.append( 'J' );
  -              else code.append('K');
  -              mtsz++;
  -              break ;
  -            case 'H':
  -              if( n + 1 == wdsz ) break ; // terminal H
  -              if((n > 0) &&
  -                 (varson.indexOf( local.charAt(n-1)) >= 0)) break ;
  -              if( vowels.indexOf( local.charAt(n+1)) >=0 ){
  -                  code.append('H') ; mtsz++;// Hvowel
  -              }
  -              break;
  -            case 'F': case 'J' : case 'L' :
  -            case 'M': case 'N' : case 'R' :
  -              code.append( symb ); mtsz++; break ;
  -            case 'K' :
  -              if( n > 0 ){ // not initial
  -                if( local.charAt( n -1) != 'C' ) {
  -                     code.append(symb );
  -                }
  -              }
  -              else   code.append( symb ); // initial K
  -              mtsz++ ;
  -              break ;
  -            case 'P' :
  -              if((n + 1 < wdsz) &&  // PH -> F
  -                 (local.charAt( n+1) == 'H'))code.append('F');
  -              else code.append( symb );
  -              mtsz++;
  -              break ;
  -            case 'Q' :
  -              code.append('K' );mtsz++; break ;
  -            case 'S' :
  -              tmpS = local.toString();
  -              if((tmpS.indexOf("SH", n )== n) ||
  -                 (tmpS.indexOf("SIO",n )== n) ||
  -                 (tmpS.indexOf("SIA",n )== n)) code.append('X');
  -              else code.append( 'S' );
  -              mtsz++ ;
  -              break ;
  -            case 'T' :
  -              tmpS = local.toString(); // TIA TIO -> X
  -              if((tmpS.indexOf("TIA",n )== n)||
  -                 (tmpS.indexOf("TIO",n )== n) ){
  -                    code.append('X'); mtsz++; break;
  -              }
  -              if( tmpS.indexOf("TCH",n )==n) break;
  -              // substitute numeral 0 for TH (resembles theta after all)
  -              if( tmpS.indexOf("TH", n )==n) code.append('0');
  -              else code.append( 'T' );
  -              mtsz++ ;
  -              break ;
  -            case 'V' :
  -              code.append('F'); mtsz++;break ;
  -            case 'W' : case 'Y' : // silent if not followed by vowel
  -              if((n+1 < wdsz) &&
  -                 (vowels.indexOf( local.charAt(n+1))>=0)){
  -                    code.append( symb );mtsz++;
  -              }
  -              break ;
  -            case 'X' :
  -              code.append('K'); code.append('S');mtsz += 2;
  -              break ;
  -            case 'Z' :
  -              code.append('S'); mtsz++; break ;
  -          } // end switch
  -          n++ ;
  -        } // end else from symb != 'C'
  -        if( mtsz > 4 )code.setLength( 4);
  -      }
  -      return code.toString();
  -    } // end static method metaPhone()
  -
  -}
  +// Permission given by wbrogden for code to be used anywhere.
  +package org.apache.commons.codec;
  +
  +
  +/* ====================================================================
  + * The Apache Software License, Version 1.1
  + *
  + * Copyright (c) 2001 The Apache Software Foundation.  All rights
  + * reserved.
  + *
  + * Redistribution and use in source and binary forms, with or without
  + * modification, are permitted provided that the following conditions
  + * are met:
  + *
  + * 1. Redistributions of source code must retain the above copyright
  + *    notice, this list of conditions and the following disclaimer.
  + *
  + * 2. Redistributions in binary form must reproduce the above copyright
  + *    notice, this list of conditions and the following disclaimer in
  + *    the documentation and/or other materials provided with the
  + *    distribution.
  + *
  + * 3. The end-user documentation included with the redistribution,
  + *    if any, must include the following acknowledgment:
  + *       "This product includes software developed by the
  + *        Apache Software Foundation (http://www.apache.org/)."
  + *    Alternately, this acknowledgment may appear in the software itself,
  + *    if and wherever such third-party acknowledgments normally appear.
  + *
  + * 4. The names "Apache" and "Apache Software Foundation" and
  + *    "Apache Commons" must not be used to endorse or promote products
  + *    derived from this software without prior written permission. For
  + *    written permission, please contact apache@apache.org.
  + *
  + * 5. Products derived from this software may not be called "Apache",
  + *    "Apache Turbine", nor may "Apache" appear in their name, without
  + *    prior written permission of the Apache Software Foundation.
  + *
  + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  + * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  + * SUCH DAMAGE.
  + * ====================================================================
  + *
  + * This software consists of voluntary contributions made by many
  + * individuals on behalf of the Apache Software Foundation.  For more
  + * information on the Apache Software Foundation, please see
  + * <http://www.apache.org/>.
  + */
  +
  +/*                                                     Metaphone.java
  + * A class to generate phonetic code and keep lists of objects
  + *  retrievable by a phonetic code.
  + * reference: Computer Language of Dec. 1990, p 39
  + *  "Hanging on the Metaphone" by Lawrence Philips
  + *
  + * This Java implementation, William B. Brogden.  December, 1997
  + *
  + * @author wbrogden@bga.com
  + * @author tobrien@transolutions.net
  + */
  +
  +/*
  + * List functionality removed: 2001-06-21 bayard@generationjava.com
  + */
  +
  +/* 
  + * Notes:
  + * The static method metaphone converts an input String into a code.
  + *   All input is converted to upper case.
  + *   Limitations: Input format is expected to be a single ASCII word
  + *   with only characters in the A - Z range, no punctuation or numbers.
  + *
  + */
  +
  +public class Metaphone implements Encoder {
  +
  +    private String vowels = "AEIOU" ;
  +    private String frontv = "EIY"   ;
  +    private String varson = "CSPTG" ;
  +
  +    private int maxCodeLen = 4 ;
  +
  +    public Metaphone() {
  +        super();
  +    }
  +
  +
  +    /**
  +     * Find the metaphone value of a String. This is similar to the
  +     * soundex algorithm, but better at finding similar sounding words.
  +     */
  +    public String metaphone( String txt ){
  +      int mtsz = 0  ;
  +      boolean hard = false ;
  +      if(( txt == null ) ||
  +         ( txt.length() == 0 )) return "" ;
  +      // single character is itself
  +      if( txt.length() == 1 ) return txt.toUpperCase() ;
  +      
  +      char[] inwd = txt.toUpperCase().toCharArray() ;
  +      
  +      String tmpS ;
  +      StringBuffer local = new StringBuffer( 40 ); // manipulate
  +      StringBuffer code = new StringBuffer( 10 ) ; //   output
  +      // handle initial 2 characters exceptions
  +      switch( inwd[0] ){
  +        case 'K': case 'G' : case 'P' : /* looking for KN, etc*/
  +          if( inwd[1] == 'N')local.append(inwd, 1, inwd.length - 1 );
  +          else local.append( inwd );
  +          break;
  +        case 'A': /* looking for AE */
  +          if( inwd[1] == 'E' )local.append(inwd, 1, inwd.length - 1 );
  +          else local.append( inwd );
  +          break;
  +        case 'W' : /* looking for WR or WH */
  +          if( inwd[1] == 'R' ){   // WR -> R
  +            local.append(inwd, 1, inwd.length - 1 ); break ;
  +          }
  +          if( inwd[1] == 'H'){
  +            local.append(inwd, 1, inwd.length - 1 );
  +            local.setCharAt( 0,'W'); // WH -> W
  +          }
  +          else local.append( inwd );
  +          break;
  +        case 'X' : /* initial X becomes S */
  +          inwd[0] = 'S' ;local.append( inwd );
  +          break ;
  +        default :
  +          local.append( inwd );
  +      } // now local has working string with initials fixed
  +      int wdsz = local.length();
  +      int n = 0 ;
  +      while((mtsz < maxCodeLen ) && // max code size of 4 works well
  +            (n < wdsz ) ){
  +        char symb = local.charAt(n) ;
  +        // remove duplicate letters except C
  +        if(( symb != 'C' ) &&
  +           (n > 0 ) && ( local.charAt(n - 1 ) == symb )) n++ ;
  +        else{ // not dup
  +          switch( symb ){
  +            case 'A' : case 'E' : case 'I' : case 'O' : case 'U' :
  +              if( n == 0 ) { code.append(symb );mtsz++;
  +              }
  +              break ; // only use vowel if leading char
  +            case 'B' :
  +              if( (n > 0 ) &&
  +                  !(n + 1 == wdsz ) && // not MB at end of word
  +                  ( local.charAt(n - 1) == 'M')) {
  +                    code.append(symb);
  +                  }
  +              else code.append(symb);
  +              mtsz++ ;
  +              break ;
  +            case 'C' : // lots of C special cases
  +              /* discard if SCI, SCE or SCY */
  +              if( ( n > 0 ) &&
  +                  ( local.charAt(n-1) == 'S' ) &&
  +                  ( n + 1 < wdsz ) &&
  +                  ( frontv.indexOf( local.charAt(n + 1)) >= 0 )){ break ;}
  +              tmpS = local.toString();
  +              if( tmpS.indexOf("CIA", n ) == n ) { // "CIA" -> X
  +                 code.append('X' ); mtsz++; break ;
  +              }
  +              if( ( n + 1 < wdsz ) &&
  +                  (frontv.indexOf( local.charAt(n+1) )>= 0 )){
  +                 code.append('S');mtsz++; break ; // CI,CE,CY -> S
  +              }
  +              if(( n > 0) &&
  +                 ( tmpS.indexOf("SCH",n-1 )== n-1 )){ // SCH->sk
  +                 code.append('K') ; mtsz++;break ;
  +              }
  +              if( tmpS.indexOf("CH", n ) == n ){ // detect CH
  +                if((n == 0 ) &&
  +                   (wdsz >= 3 ) &&    // CH consonant -> K consonant
  +                   (vowels.indexOf( local.charAt( 2) ) < 0 )){
  +                     code.append('K');
  +                }
  +                else { code.append('X'); // CHvowel -> X
  +                }
  +                mtsz++;
  +              }
  +              else { code.append('K' );mtsz++;
  +              }
  +              break ;
  +            case 'D' :
  +              if(( n + 2 < wdsz )&&  // DGE DGI DGY -> J
  +                 ( local.charAt(n+1) == 'G' )&&
  +                 (frontv.indexOf( local.charAt(n+2) )>= 0)){
  +                    code.append('J' ); n += 2 ;
  +              }
  +              else { code.append( 'T' );
  +              }
  +              mtsz++;
  +              break ;
  +            case 'G' : // GH silent at end or before consonant
  +              if(( n + 2 == wdsz )&&
  +                 (local.charAt(n+1) == 'H' )) break ;
  +              if(( n + 2 < wdsz ) &&
  +                 (local.charAt(n+1) == 'H' )&&
  +                 (vowels.indexOf( local.charAt(n+2)) < 0 )) break ;
  +              tmpS = local.toString();
  +              if((n > 0) &&
  +                 ( tmpS.indexOf("GN", n ) == n)||
  +                 ( tmpS.indexOf("GNED",n) == n )) break ; // silent G
  +              if(( n > 0 ) &&
  +                 (local.charAt(n-1) == 'G')) hard = true ;
  +              else hard = false ;
  +              if((n+1 < wdsz) &&
  +                 (frontv.indexOf( local.charAt(n+1) ) >= 0 )&&
  +                 (!hard) ) code.append( 'J' );
  +              else code.append('K');
  +              mtsz++;
  +              break ;
  +            case 'H':
  +              if( n + 1 == wdsz ) break ; // terminal H
  +              if((n > 0) &&
  +                 (varson.indexOf( local.charAt(n-1)) >= 0)) break ;
  +              if( vowels.indexOf( local.charAt(n+1)) >=0 ){
  +                  code.append('H') ; mtsz++;// Hvowel
  +              }
  +              break;
  +            case 'F': case 'J' : case 'L' :
  +            case 'M': case 'N' : case 'R' :
  +              code.append( symb ); mtsz++; break ;
  +            case 'K' :
  +              if( n > 0 ){ // not initial
  +                if( local.charAt( n -1) != 'C' ) {
  +                     code.append(symb );
  +                }
  +              }
  +              else   code.append( symb ); // initial K
  +              mtsz++ ;
  +              break ;
  +            case 'P' :
  +              if((n + 1 < wdsz) &&  // PH -> F
  +                 (local.charAt( n+1) == 'H'))code.append('F');
  +              else code.append( symb );
  +              mtsz++;
  +              break ;
  +            case 'Q' :
  +              code.append('K' );mtsz++; break ;
  +            case 'S' :
  +              tmpS = local.toString();
  +              if((tmpS.indexOf("SH", n )== n) ||
  +                 (tmpS.indexOf("SIO",n )== n) ||
  +                 (tmpS.indexOf("SIA",n )== n)) code.append('X');
  +              else code.append( 'S' );
  +              mtsz++ ;
  +              break ;
  +            case 'T' :
  +              tmpS = local.toString(); // TIA TIO -> X
  +              if((tmpS.indexOf("TIA",n )== n)||
  +                 (tmpS.indexOf("TIO",n )== n) ){
  +                    code.append('X'); mtsz++; break;
  +              }
  +              if( tmpS.indexOf("TCH",n )==n) break;
  +              // substitute numeral 0 for TH (resembles theta after all)
  +              if( tmpS.indexOf("TH", n )==n) code.append('0');
  +              else code.append( 'T' );
  +              mtsz++ ;
  +              break ;
  +            case 'V' :
  +              code.append('F'); mtsz++;break ;
  +            case 'W' : case 'Y' : // silent if not followed by vowel
  +              if((n+1 < wdsz) &&
  +                 (vowels.indexOf( local.charAt(n+1))>=0)){
  +                    code.append( symb );mtsz++;
  +              }
  +              break ;
  +            case 'X' :
  +              code.append('K'); code.append('S');mtsz += 2;
  +              break ;
  +            case 'Z' :
  +              code.append('S'); mtsz++; break ;
  +          } // end switch
  +          n++ ;
  +        } // end else from symb != 'C'
  +        if( mtsz > 4 )code.setLength( 4);
  +      }
  +      return code.toString();
  +    } // end static method metaPhone()
  +    
  +    public String encode(String pString) {
  +        return( metaphone( pString ) );   
  +    }
  +
  +    /**
  +     * Are the metaphones of two strings the same.
  +     */
  +    public boolean isMetaphoneEqual(String str1, String str2) {
  +        return metaphone(str1).equals(metaphone(str2));
  +    }
  +
  +	/**
  +	 * Returns the maxCodeLen.
  +	 * @return int
  +	 */
  +	public int getMaxCodeLen() {
  +		return maxCodeLen;
  +	}
  +
  +	/**
  +	 * Sets the maxCodeLen.
  +	 * @param maxCodeLen The maxCodeLen to set
  +	 */
  +	public void setMaxCodeLen(int maxCodeLen) {
  +		this.maxCodeLen = maxCodeLen;
  +	}
  +
  +}
  +
  
  
  
  1.2       +143 -115  jakarta-commons-sandbox/codec/src/java/org/apache/commons/codec/Soundex.java
  
  Index: Soundex.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons-sandbox/codec/src/java/org/apache/commons/codec/Soundex.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- Soundex.java	22 Feb 2002 05:20:00 -0000	1.1
  +++ Soundex.java	18 Nov 2002 08:46:02 -0000	1.2
  @@ -1,115 +1,143 @@
  -package org.apache.commons.codec;
  -
  -/* ====================================================================
  - * The Apache Software License, Version 1.1
  - *
  - * Copyright (c) 2001 The Apache Software Foundation.  All rights
  - * reserved.
  - *
  - * Redistribution and use in source and binary forms, with or without
  - * modification, are permitted provided that the following conditions
  - * are met:
  - *
  - * 1. Redistributions of source code must retain the above copyright
  - *    notice, this list of conditions and the following disclaimer.
  - *
  - * 2. Redistributions in binary form must reproduce the above copyright
  - *    notice, this list of conditions and the following disclaimer in
  - *    the documentation and/or other materials provided with the
  - *    distribution.
  - *
  - * 3. The end-user documentation included with the redistribution,
  - *    if any, must include the following acknowledgment:
  - *       "This product includes software developed by the
  - *        Apache Software Foundation (http://www.apache.org/)."
  - *    Alternately, this acknowledgment may appear in the software itself,
  - *    if and wherever such third-party acknowledgments normally appear.
  - *
  - * 4. The names "Apache" and "Apache Software Foundation" and
  - *    "Apache Commons" must not be used to endorse or promote products
  - *    derived from this software without prior written permission. For
  - *    written permission, please contact apache@apache.org.
  - *
  - * 5. Products derived from this software may not be called "Apache",
  - *    "Apache Turbine", nor may "Apache" appear in their name, without
  - *    prior written permission of the Apache Software Foundation.
  - *
  - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  - * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  - * SUCH DAMAGE.
  - * ====================================================================
  - *
  - * This software consists of voluntary contributions made by many
  - * individuals on behalf of the Apache Software Foundation.  For more
  - * information on the Apache Software Foundation, please see
  - * <http://www.apache.org/>.
  - */
  -
  -/**
  - * Find the Soundex of a string. Needs internationalisation in a 
  - * future release.
  - *
  - * @author bayard@generationjava.com
  - * @version $Id$
  - */
  -public class Soundex {
  -
  -    static public final char[] US_ENGLISH_MAPPING =
  -        "01230120022455012623010202".toCharArray();
  -
  -    static public final Soundex US_ENGLISH = new Soundex();
  -    
  -    private char[] soundexMapping;
  -
  -    public Soundex() {
  -        this(US_ENGLISH_MAPPING);
  -    }
  -
  -    public Soundex(char[] mapping) {
  -        this.soundexMapping = mapping;
  -    }
  -
  -    /**
  -     * Get the SoundEx value of a string.
  -     * This implementation is taken from the code-snippers on 
  -     * http://www.sourceforge.net/
  -     */
  -    public String soundex(String str) {
  -        char out[] = { '0', '0', '0', '0' };
  -        char last, mapped;
  -        int incount = 1, count = 1;
  -        out[0] = Character.toUpperCase( str.charAt(0) );
  -        last = getMappingCode( str.charAt(0) );
  -        while( (incount < str.length() ) && 
  -               (mapped = getMappingCode(str.charAt(incount++))) != 0 &&
  -               (count < 4) )
  -        {
  -            if( (mapped != '0') && (mapped != last) ) {
  -                out[count++] = mapped;
  -            }
  -            last = mapped;
  -        }
  -        return new String(out);
  -    }
  -
  -    /**
  -     * Used internally by the SoundEx algorithm.
  -     */
  -    private char getMappingCode(char c) {
  -        if( !Character.isLetter(c) ) {
  -            return 0;
  -        } else {
  -            return soundexMapping[Character.toUpperCase(c) - 'A'];
  -        }
  -    }
  -
  -}
  +package org.apache.commons.codec;
  +
  +/* ====================================================================
  + * The Apache Software License, Version 1.1
  + *
  + * Copyright (c) 2001 The Apache Software Foundation.  All rights
  + * reserved.
  + *
  + * Redistribution and use in source and binary forms, with or without
  + * modification, are permitted provided that the following conditions
  + * are met:
  + *
  + * 1. Redistributions of source code must retain the above copyright
  + *    notice, this list of conditions and the following disclaimer.
  + *
  + * 2. Redistributions in binary form must reproduce the above copyright
  + *    notice, this list of conditions and the following disclaimer in
  + *    the documentation and/or other materials provided with the
  + *    distribution.
  + *
  + * 3. The end-user documentation included with the redistribution,
  + *    if any, must include the following acknowledgment:
  + *       "This product includes software developed by the
  + *        Apache Software Foundation (http://www.apache.org/)."
  + *    Alternately, this acknowledgment may appear in the software itself,
  + *    if and wherever such third-party acknowledgments normally appear.
  + *
  + * 4. The names "Apache" and "Apache Software Foundation" and
  + *    "Apache Commons" must not be used to endorse or promote products
  + *    derived from this software without prior written permission. For
  + *    written permission, please contact apache@apache.org.
  + *
  + * 5. Products derived from this software may not be called "Apache",
  + *    "Apache Turbine", nor may "Apache" appear in their name, without
  + *    prior written permission of the Apache Software Foundation.
  + *
  + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  + * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  + * SUCH DAMAGE.
  + * ====================================================================
  + *
  + * This software consists of voluntary contributions made by many
  + * individuals on behalf of the Apache Software Foundation.  For more
  + * information on the Apache Software Foundation, please see
  + * <http://www.apache.org/>.
  + */
  +
  +/**
  + * Encodes a string into a refined soundex value.  
  + * A refined soundex code is optimized for spell checking word. 
  + * "Soundex" method originally developed by Margaret Odell and 
  + *          Robert Russell
  + * 
  + * http://www.bluepoof.com/Soundex/info2.html
  + * 
  + * @todo Needs internationalisation in a future release.
  + *
  + * @author bayard@generationjava.com
  + * @author tobrien@transolutions.net
  + * @version $Id$
  + */
  +public class Soundex implements Encoder {
  +
  +    static public final char[] US_ENGLISH_MAPPING =
  +        "01230120022455012623010202".toCharArray();
  +
  +    static public final Soundex US_ENGLISH = new Soundex();
  +    
  +    private char[] soundexMapping;
  +    private int maxLength = 4;
  +
  +    public Soundex() {
  +        this(US_ENGLISH_MAPPING);
  +    }
  +
  +    public Soundex(char[] mapping) {
  +        this.soundexMapping = mapping;
  +    }
  +
  +    /**
  +     * Get the SoundEx value of a string.
  +     * This implementation is taken from the code-snippers on 
  +     * http://www.sourceforge.net/
  +     */
  +    public String soundex(String str) {
  +        char out[] = { '0', '0', '0', '0' };
  +        char last, mapped;
  +        int incount = 1, count = 1;
  +        out[0] = Character.toUpperCase( str.charAt(0) );
  +        last = getMappingCode( str.charAt(0) );
  +        while( (incount < str.length() ) && 
  +               (mapped = getMappingCode(str.charAt(incount++))) != 0 &&
  +               (count < maxLength) )
  +        {
  +            if( (mapped != '0') && (mapped != last) ) {
  +                out[count++] = mapped;
  +            }
  +            last = mapped;
  +        }
  +        return new String(out);
  +    }
  +
  +    public String encode(String pString) {
  +        return( soundex( pString ) );   
  +    }
  +
  +    /**
  +     * Used internally by the SoundEx algorithm.
  +     */
  +    private char getMappingCode(char c) {
  +        if( !Character.isLetter(c) ) {
  +            return 0;
  +        } else {
  +            return soundexMapping[Character.toUpperCase(c) - 'A'];
  +        }
  +    }
  +
  +	/**
  +	 * Returns the maxLength.  Standard Soundex
  +	 * @return int
  +	 */
  +	public int getMaxLength() {
  +		return maxLength;
  +	}
  +
  +	/**
  +	 * Sets the maxLength.
  +	 * @param maxLength The maxLength to set
  +	 */
  +	public void setMaxLength(int maxLength) {
  +		this.maxLength = maxLength;
  +	}
  +
  +}
  
  
  
  1.1                  jakarta-commons-sandbox/codec/src/java/org/apache/commons/codec/Encoder.java
  
  Index: Encoder.java
  ===================================================================
  package org.apache.commons.codec;
  
  /* ====================================================================
   * The Apache Software License, Version 1.1
   *
   * Copyright (c) 2001 The Apache Software Foundation.  All rights
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer.
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution,
   *    if any, must include the following acknowledgment:
   *       "This product includes software developed by the
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowledgment may appear in the software itself,
   *    if and wherever such third-party acknowledgments normally appear.
   *
   * 4. The names "Apache" and "Apache Software Foundation" and
   *    "Apache Commons" must not be used to endorse or promote products
   *    derived from this software without prior written permission. For
   *    written permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache",
   *    "Apache Turbine", nor may "Apache" appear in their name, without
   *    prior written permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   */
  
  /**
   * Encoder is an interface, which is implemented by Soundex,
   * Metaphone, Soundex2, etc.
   *
   * @author tobrien@transolutions.net
   * @version $Id: Encoder.java,v 1.1 2002/11/18 08:46:02 bayard Exp $
   */
  public interface Encoder {
  
      public String encode(String str);
      
  }
      
  
  
  
  1.1                  jakarta-commons-sandbox/codec/src/java/org/apache/commons/codec/EncoderComparator.java
  
  Index: EncoderComparator.java
  ===================================================================
  package org.apache.commons.codec;
  
  /* ====================================================================
   * The Apache Software License, Version 1.1
   *
   * Copyright (c) 2001 The Apache Software Foundation.  All rights
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer.
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution,
   *    if any, must include the following acknowledgment:
   *       "This product includes software developed by the
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowledgment may appear in the software itself,
   *    if and wherever such third-party acknowledgments normally appear.
   *
   * 4. The names "Apache" and "Apache Software Foundation" and
   *    "Apache Commons" must not be used to endorse or promote products
   *    derived from this software without prior written permission. For
   *    written permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache",
   *    "Apache Turbine", nor may "Apache" appear in their name, without
   *    prior written permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   */
  
  import java.util.Comparator;
  
  /**
   * Compare using an Encoder.
   *
   * @author tobrien@transolutions.net
   * @version $Id: EncoderComparator.java,v 1.1 2002/11/18 08:46:02 bayard Exp $
   */
  public class EncoderComparator implements Comparator {
  
      private Encoder encoder;
  
      /**
       * Use the default soundex algorithm, US_ENGLISH.
       */
      public EncoderComparator() {
          this(RefinedSoundex.US_ENGLISH);
      }
  
      /**
       * Use the provided soundex algorithm.
       */
      public EncoderComparator(Encoder en) {
          this.encoder = en;
      }
  
      public int compare(Object o1, Object o2) {
          String s1 = encoder.encode(o1.toString());
          String s2 = encoder.encode(o2.toString());
          return s1.compareTo(s2);
      }
  
  }
  
  
  
  1.1                  jakarta-commons-sandbox/codec/src/java/org/apache/commons/codec/RefinedSoundex.java
  
  Index: RefinedSoundex.java
  ===================================================================
  package org.apache.commons.codec;
  
  /* ====================================================================
   * The Apache Software License, Version 1.1
   *
   * Copyright (c) 2001 The Apache Software Foundation.  All rights
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer.
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution,
   *    if any, must include the following acknowledgment:
   *       "This product includes software developed by the
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowledgment may appear in the software itself,
   *    if and wherever such third-party acknowledgments normally appear.
   *
   * 4. The names "Apache" and "Apache Software Foundation" and
   *    "Apache Commons" must not be used to endorse or promote products
   *    derived from this software without prior written permission. For
   *    written permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache",
   *    "Apache Turbine", nor may "Apache" appear in their name, without
   *    prior written permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   */
  
  /**
   * Encodes a string into a soundex value.  Sounde is an encoding used to
   * relate similar names, but can also be used as a general purpose
   * scheme to find word with similar phonemes. 
   * More information may be found at: http://www.bluepoof.com/Soundex/info2.html
   * 
   * @todo Needs internationalisation in a future release.
   *
   * @author tobrien@transolutions.net
   * @version $Id: RefinedSoundex.java,v 1.1 2002/11/18 08:46:02 bayard Exp $
   */
  public class RefinedSoundex implements Encoder {
  
      static public final char[] US_ENGLISH_MAPPING =
          "01360240043788015936020505".toCharArray();
  
      static public final RefinedSoundex US_ENGLISH = new RefinedSoundex();
      
      private char[] soundexMapping;
  
      public RefinedSoundex() {
          this(US_ENGLISH_MAPPING);
      }
  
      public RefinedSoundex(char[] mapping) {
          this.soundexMapping = mapping;
      }
  
      /**
       * Get the SoundEx value of a string.
       * This implementation is taken from the code-snippers on 
       * http://www.sourceforge.net/
       */
      public String soundex(String str) {
         
          StringBuffer sBuf = new StringBuffer();        
          str = str.toUpperCase();
  
          sBuf.append( str.charAt(0) );
  
          char last, mapped, current;
          last = '*';
  
          for( int i = 0; i < str.length(); i++ ) {
  
              current = getMappingCode( str.charAt(i) );
              if( current == last ) {
                  continue;
              } else if( current != 0 ) {
                  sBuf.append( current );   
              }
              
              last = current;             
              
          }
          
          return sBuf.toString();
      }
  
      public String encode(String pString) {
          return( soundex( pString ) );   
      }
  
      /**
       * Used internally by the SoundEx algorithm.
       */
      private char getMappingCode(char c) {
          if( !Character.isLetter(c) ) {
              return 0;
          } else {
              return soundexMapping[Character.toUpperCase(c) - 'A'];
          }
      }
  
  }
  
  
  

--
To unsubscribe, e-mail:   <mailto:commons-dev-unsubscribe@jakarta.apache.org>
For additional commands, e-mail: <mailto:commons-dev-help@jakarta.apache.org>


Mime
View raw message