commons-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tobr...@apache.org
Subject cvs commit: jakarta-commons-sandbox/codec/src/java/org/apache/commons/codec/language Metaphone.java RefinedSoundex.java Soundex.java DoubleMetaphone.java Nysiis.java
Date Tue, 04 Feb 2003 04:43:59 GMT
tobrien     2003/02/03 20:43:59

  Modified:    codec/src/java/org/apache/commons/codec/language
                        DoubleMetaphone.java Nysiis.java
  Added:       codec/src/java/org/apache/commons/codec/language
                        Metaphone.java RefinedSoundex.java Soundex.java
  Log:
  Moved language codecs to language package
  
  Revision  Changes    Path
  1.3       +2 -2      jakarta-commons-sandbox/codec/src/java/org/apache/commons/codec/language/DoubleMetaphone.java
  
  Index: DoubleMetaphone.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons-sandbox/codec/src/java/org/apache/commons/codec/language/DoubleMetaphone.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- DoubleMetaphone.java	3 Feb 2003 16:03:54 -0000	1.2
  +++ DoubleMetaphone.java	4 Feb 2003 04:43:58 -0000	1.3
  @@ -62,7 +62,7 @@
    * <b>PLEASE NOTE:</b> This implementation is not thread-safe.  Please 
    * see TODO list for [codec] - Tim O'Brien
    *
  - * @see http://www.cse.ucsc.edu/~eparrish/toolbox/search.html
  + * @see <a href="http://www.cse.ucsc.edu/~eparrish/toolbox/search.html">Ed Parrish,
Developing a Search Engine</a>
    *
    * @version $Revision$
    * @author Ed Parish
  
  
  
  1.3       +4 -8      jakarta-commons-sandbox/codec/src/java/org/apache/commons/codec/language/Nysiis.java
  
  Index: Nysiis.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons-sandbox/codec/src/java/org/apache/commons/codec/language/Nysiis.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- Nysiis.java	3 Feb 2003 16:02:16 -0000	1.2
  +++ Nysiis.java	4 Feb 2003 04:43:58 -0000	1.3
  @@ -61,13 +61,9 @@
    * the code from the Perl module available from CPAN, which derives from an
    * implementation by Ben Kennedy.
    *
  - * @see http://www.nist.gov/dads/HTML/nysiis.html
  - * @see http://search.cpan.org/search?query=nysiis&mode=all
  - *
  - * @see Atack, J., and F. Bateman. 1992 .
  - *   <i>"Matchmaker, matchmaker, make me a match"</i> : a general
  - *   computer-based matching program for historical researc.
  - *   Historical Methods 25: 53-65.
  + * @see <a href="http://www.nist.gov/dads/HTML/nysiis.html">NIST Nysiis definition</a>
  + * @see <a href="http://search.cpan.org/search?query=nysiis&mode=all">Nysiis
implementation at CPAN</a>
  + * @see <a href="http://www.ssha.org/mainsite/news/methodsad.html">Atack, J., and
F. Bateman. 1992 . <i>"Matchmaker, matchmaker, make me a match"</i> : a general
computer-based matching program for historical researc. Historical Methods 25: 53-65.</a>
    *
    * @version $Revision$
    * @author <a href="mortis@voicenet.com">Kyle R. Burton</a>
  
  
  
  1.1                  jakarta-commons-sandbox/codec/src/java/org/apache/commons/codec/language/Metaphone.java
  
  Index: Metaphone.java
  ===================================================================
  /* ====================================================================
   * The Apache Software License, Version 1.1
   *
   * Copyright (c) 2001-2002 The Apache Software Foundation.  All rights
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer.
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution,
   *    if any, must include the following acknowledgment:
   *       "This product includes software developed by the
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowledgment may appear in the software itself,
   *    if and wherever such third-party acknowledgments normally appear.
   *
   * 4. The names "Apache" and "Apache Software Foundation" and
   *    "Apache Commons" must not be used to endorse or promote products
   *    derived from this software without prior written permission. For
   *    written permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache",
   *    "Apache Commons", nor may "Apache" appear in their name, without
   *    prior written permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   */
  package org.apache.commons.codec.language;
  
  import org.apache.commons.codec.Encoder;
  
  /**
   * A class to generate phonetic code.
   * The initial Java implementation, William B. Brogden.  December, 1997
   * Permission given by wbrogden for code to be used anywhere.
   * 
   *  "Hanging on the Metaphone" by Lawrence Philips
   *      <i>Computer Language</i> of Dec. 1990, p 39   
   * 
   * @version $Revision: 1.1 $ $Date: 2003/02/04 04:43:58 $
   * @author wbrogden@bga.com
   * @author bayard@generationjava.com
   * @author tobrien@transolutions.net
   */
  public class Metaphone implements Encoder {
  
      private String vowels = "AEIOU" ;
      private String frontv = "EIY"   ;
      private String varson = "CSPTG" ;
  
      private int maxCodeLen = 4 ;
  
      public Metaphone() {
          super();
      }
  
      /**
       * Find the metaphone value of a String. This is similar to the
       * soundex algorithm, but better at finding similar sounding words.
       * All input is converted to upper case.
       * Limitations: Input format is expected to be a single ASCII word
       * with only characters in the A - Z range, no punctuation or numbers.
       */
      public String metaphone( String txt ){
        int mtsz = 0  ;
        boolean hard = false ;
        if(( txt == null ) ||
           ( txt.length() == 0 )) return "" ;
        // single character is itself
        if( txt.length() == 1 ) return txt.toUpperCase() ;
        
        char[] inwd = txt.toUpperCase().toCharArray() ;
        
        String tmpS ;
        StringBuffer local = new StringBuffer( 40 ); // manipulate
        StringBuffer code = new StringBuffer( 10 ) ; //   output
        // handle initial 2 characters exceptions
        switch( inwd[0] ){
          case 'K': case 'G' : case 'P' : /* looking for KN, etc*/
            if( inwd[1] == 'N')local.append(inwd, 1, inwd.length - 1 );
            else local.append( inwd );
            break;
          case 'A': /* looking for AE */
            if( inwd[1] == 'E' )local.append(inwd, 1, inwd.length - 1 );
            else local.append( inwd );
            break;
          case 'W' : /* looking for WR or WH */
            if( inwd[1] == 'R' ){   // WR -> R
              local.append(inwd, 1, inwd.length - 1 ); break ;
            }
            if( inwd[1] == 'H'){
              local.append(inwd, 1, inwd.length - 1 );
              local.setCharAt( 0,'W'); // WH -> W
            }
            else local.append( inwd );
            break;
          case 'X' : /* initial X becomes S */
            inwd[0] = 'S' ;local.append( inwd );
            break ;
          default :
            local.append( inwd );
        } // now local has working string with initials fixed
        int wdsz = local.length();
        int n = 0 ;
        while((mtsz < maxCodeLen ) && // max code size of 4 works well
              (n < wdsz ) ){
          char symb = local.charAt(n) ;
          // remove duplicate letters except C
          if(( symb != 'C' ) &&
             (n > 0 ) && ( local.charAt(n - 1 ) == symb )) n++ ;
          else{ // not dup
            switch( symb ){
              case 'A' : case 'E' : case 'I' : case 'O' : case 'U' :
                if( n == 0 ) { code.append(symb );mtsz++;
                }
                break ; // only use vowel if leading char
              case 'B' :
                if( (n > 0 ) &&
                    !(n + 1 == wdsz ) && // not MB at end of word
                    ( local.charAt(n - 1) == 'M')) {
                      code.append(symb);
                    }
                else code.append(symb);
                mtsz++ ;
                break ;
              case 'C' : // lots of C special cases
                /* discard if SCI, SCE or SCY */
                if( ( n > 0 ) &&
                    ( local.charAt(n-1) == 'S' ) &&
                    ( n + 1 < wdsz ) &&
                    ( frontv.indexOf( local.charAt(n + 1)) >= 0 )){ break ;}
                tmpS = local.toString();
                if( tmpS.indexOf("CIA", n ) == n ) { // "CIA" -> X
                   code.append('X' ); mtsz++; break ;
                }
                if( ( n + 1 < wdsz ) &&
                    (frontv.indexOf( local.charAt(n+1) )>= 0 )){
                   code.append('S');mtsz++; break ; // CI,CE,CY -> S
                }
                if(( n > 0) &&
                   ( tmpS.indexOf("SCH",n-1 )== n-1 )){ // SCH->sk
                   code.append('K') ; mtsz++;break ;
                }
                if( tmpS.indexOf("CH", n ) == n ){ // detect CH
                  if((n == 0 ) &&
                     (wdsz >= 3 ) &&    // CH consonant -> K consonant
                     (vowels.indexOf( local.charAt( 2) ) < 0 )){
                       code.append('K');
                  }
                  else { code.append('X'); // CHvowel -> X
                  }
                  mtsz++;
                }
                else { code.append('K' );mtsz++;
                }
                break ;
              case 'D' :
                if(( n + 2 < wdsz )&&  // DGE DGI DGY -> J
                   ( local.charAt(n+1) == 'G' )&&
                   (frontv.indexOf( local.charAt(n+2) )>= 0)){
                      code.append('J' ); n += 2 ;
                }
                else { code.append( 'T' );
                }
                mtsz++;
                break ;
              case 'G' : // GH silent at end or before consonant
                if(( n + 2 == wdsz )&&
                   (local.charAt(n+1) == 'H' )) break ;
                if(( n + 2 < wdsz ) &&
                   (local.charAt(n+1) == 'H' )&&
                   (vowels.indexOf( local.charAt(n+2)) < 0 )) break ;
                tmpS = local.toString();
                if((n > 0) &&
                   ( tmpS.indexOf("GN", n ) == n)||
                   ( tmpS.indexOf("GNED",n) == n )) break ; // silent G
                if(( n > 0 ) &&
                   (local.charAt(n-1) == 'G')) hard = true ;
                else hard = false ;
                if((n+1 < wdsz) &&
                   (frontv.indexOf( local.charAt(n+1) ) >= 0 )&&
                   (!hard) ) code.append( 'J' );
                else code.append('K');
                mtsz++;
                break ;
              case 'H':
                if( n + 1 == wdsz ) break ; // terminal H
                if((n > 0) &&
                   (varson.indexOf( local.charAt(n-1)) >= 0)) break ;
                if( vowels.indexOf( local.charAt(n+1)) >=0 ){
                    code.append('H') ; mtsz++;// Hvowel
                }
                break;
              case 'F': case 'J' : case 'L' :
              case 'M': case 'N' : case 'R' :
                code.append( symb ); mtsz++; break ;
              case 'K' :
                if( n > 0 ){ // not initial
                  if( local.charAt( n -1) != 'C' ) {
                       code.append(symb );
                  }
                }
                else   code.append( symb ); // initial K
                mtsz++ ;
                break ;
              case 'P' :
                if((n + 1 < wdsz) &&  // PH -> F
                   (local.charAt( n+1) == 'H'))code.append('F');
                else code.append( symb );
                mtsz++;
                break ;
              case 'Q' :
                code.append('K' );mtsz++; break ;
              case 'S' :
                tmpS = local.toString();
                if((tmpS.indexOf("SH", n )== n) ||
                   (tmpS.indexOf("SIO",n )== n) ||
                   (tmpS.indexOf("SIA",n )== n)) code.append('X');
                else code.append( 'S' );
                mtsz++ ;
                break ;
              case 'T' :
                tmpS = local.toString(); // TIA TIO -> X
                if((tmpS.indexOf("TIA",n )== n)||
                   (tmpS.indexOf("TIO",n )== n) ){
                      code.append('X'); mtsz++; break;
                }
                if( tmpS.indexOf("TCH",n )==n) break;
                // substitute numeral 0 for TH (resembles theta after all)
                if( tmpS.indexOf("TH", n )==n) code.append('0');
                else code.append( 'T' );
                mtsz++ ;
                break ;
              case 'V' :
                code.append('F'); mtsz++;break ;
              case 'W' : case 'Y' : // silent if not followed by vowel
                if((n+1 < wdsz) &&
                   (vowels.indexOf( local.charAt(n+1))>=0)){
                      code.append( symb );mtsz++;
                }
                break ;
              case 'X' :
                code.append('K'); code.append('S');mtsz += 2;
                break ;
              case 'Z' :
                code.append('S'); mtsz++; break ;
            } // end switch
            n++ ;
          } // end else from symb != 'C'
          if( mtsz > 4 )code.setLength( 4);
        }
        return code.toString();
      } // end static method metaPhone()
      
      public String encode(String pString) {
          return( metaphone( pString ) );   
      }
  
      /**
       * Are the metaphones of two strings the same.
       */
      public boolean isMetaphoneEqual(String str1, String str2) {
          return metaphone(str1).equals(metaphone(str2));
      }
  
  	/**
  	 * Returns the maxCodeLen.
  	 * @return int
  	 */
  	public int getMaxCodeLen() {
  		return maxCodeLen;
  	}
  
  	/**
  	 * Sets the maxCodeLen.
  	 * @param maxCodeLen The maxCodeLen to set
  	 */
  	public void setMaxCodeLen(int maxCodeLen) {
  		this.maxCodeLen = maxCodeLen;
  	}
  
  }
  
  
  
  1.1                  jakarta-commons-sandbox/codec/src/java/org/apache/commons/codec/language/RefinedSoundex.java
  
  Index: RefinedSoundex.java
  ===================================================================
  /* ====================================================================
   * The Apache Software License, Version 1.1
   *
   * Copyright (c) 2002 The Apache Software Foundation.  All rights
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer.
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution,
   *    if any, must include the following acknowledgment:
   *       "This product includes software developed by the
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowledgment may appear in the software itself,
   *    if and wherever such third-party acknowledgments normally appear.
   *
   * 4. The names "Apache" and "Apache Software Foundation" and
   *    "Apache Commons" must not be used to endorse or promote products
   *    derived from this software without prior written permission. For
   *    written permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache",
   *    "Apache Commons", nor may "Apache" appear in their name, without
   *    prior written permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   */
  package org.apache.commons.codec.language;
  
  import org.apache.commons.codec.Encoder;
  
  /**
   * Encodes a string into a soundex value.  Sounde is an encoding used to
   * relate similar names, but can also be used as a general purpose
   * scheme to find word with similar phonemes. 
   * More information may be found at: http://www.bluepoof.com/Soundex/info2.html
   * 
   * @author tobrien@transolutions.net
   * @version $Revision: 1.1 $ $Date: 2003/02/04 04:43:59 $
   */
  public class RefinedSoundex implements Encoder {
  
      static public final char[] US_ENGLISH_MAPPING =
          "01360240043788015936020505".toCharArray();
  
      static public final RefinedSoundex US_ENGLISH = new RefinedSoundex();
      
      private char[] soundexMapping;
  
      public RefinedSoundex() {
          this(US_ENGLISH_MAPPING);
      }
  
      public RefinedSoundex(char[] mapping) {
          this.soundexMapping = mapping;
      }
  
      /**
       * Get the SoundEx value of a string.
       * This implementation is taken from the code-snippers on 
       * http://www.sourceforge.net/
       */
      public String soundex(String str) {
          if(null == str || str.length() == 0) { return str; }
         
          StringBuffer sBuf = new StringBuffer();        
          str = str.toUpperCase();
  
          sBuf.append( str.charAt(0) );
  
          char last, mapped, current;
          last = '*';
  
          for( int i = 0; i < str.length(); i++ ) {
  
              current = getMappingCode( str.charAt(i) );
              if( current == last ) {
                  continue;
              } else if( current != 0 ) {
                  sBuf.append( current );   
              }
              
              last = current;             
              
          }
          
          return sBuf.toString();
      }
  
      public String encode(String pString) {
          return( soundex( pString ) );   
      }
  
      /**
       * Used internally by the SoundEx algorithm.
       */
      private char getMappingCode(char c) {
          if( !Character.isLetter(c) ) {
              return 0;
          } else {
              return soundexMapping[Character.toUpperCase(c) - 'A'];
          }
      }
  }
  
  
  
  1.1                  jakarta-commons-sandbox/codec/src/java/org/apache/commons/codec/language/Soundex.java
  
  Index: Soundex.java
  ===================================================================
  /* ====================================================================
   * The Apache Software License, Version 1.1
   *
   * Copyright (c) 2001-2002 The Apache Software Foundation.  All rights
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer.
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution,
   *    if any, must include the following acknowledgment:
   *       "This product includes software developed by the
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowledgment may appear in the software itself,
   *    if and wherever such third-party acknowledgments normally appear.
   *
   * 4. The names "Apache" and "Apache Software Foundation" and
   *    "Apache Commons" must not be used to endorse or promote products
   *    derived from this software without prior written permission. For
   *    written permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache",
   *    "Apache Commons", nor may "Apache" appear in their name, without
   *    prior written permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   */
  package org.apache.commons.codec.language;
  
  import org.apache.commons.codec.Encoder;
  
  /**
   * Encodes a string into a refined soundex value.  
   * A refined soundex code is optimized for spell checking word. 
   * "Soundex" method originally developed by Margaret Odell and 
   *          Robert Russell
   * 
   * http://www.bluepoof.com/Soundex/info2.html
   * 
   * @author bayard@generationjava.com
   * @author tobrien@transolutions.net
   * @version $Revision: 1.1 $ $Date: 2003/02/04 04:43:59 $
   */
  public class Soundex implements Encoder {
  
      static public final char[] US_ENGLISH_MAPPING =
          "01230120022455012623010202".toCharArray();
  
      static public final Soundex US_ENGLISH = new Soundex();
      
      private char[] soundexMapping;
      private int maxLength = 4;
  
  
     public Soundex() {
          this(US_ENGLISH_MAPPING);
      }
  
      public Soundex(char[] mapping) {
          this.soundexMapping = mapping;
      }
  
      /**
       * Get the SoundEx value of a string.
       * This implementation is taken from the code-snippers on 
       * http://www.sourceforge.net/
       */
      public String soundex(String str) {
          if(null == str || str.length() == 0) { return str; }
          
          char out[] = { '0', '0', '0', '0' };
          char last, mapped;
          int incount = 1, count = 1;
          out[0] = Character.toUpperCase( str.charAt(0) );
          last = getMappingCode( str.charAt(0) );
          while( (incount < str.length() ) && 
                 (mapped = getMappingCode(str.charAt(incount++))) != 0 &&
                 (count < maxLength) )
          {
              if( (mapped != '0') && (mapped != last) ) {
                  out[count++] = mapped;
              }
              last = mapped;
          }
          return new String(out);
      }
  
      public String encode(String pString) {
          return( soundex( pString ) );   
      }
  
      /**
       * Used internally by the SoundEx algorithm.
       */
      private char getMappingCode(char c) {
          if( !Character.isLetter(c) ) {
              return 0;
          } else {
              return soundexMapping[Character.toUpperCase(c) - 'A'];
          }
      }
  
  	/**
  	 * Returns the maxLength.  Standard Soundex
  	 * @return int
  	 */
  	public int getMaxLength() {
  		return maxLength;
  	}
  
  	/**
  	 * Sets the maxLength.
  	 * @param maxLength The maxLength to set
  	 */
  	public void setMaxLength(int maxLength) {
  		this.maxLength = maxLength;
  	}
  
  }
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-dev-help@jakarta.apache.org


Mime
View raw message