commons-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From brit...@apache.org
Subject svn commit: r930980 [17/18] - in /websites/production/commons/content/sandbox/commons-text: ./ apidocs/ apidocs/org/ apidocs/org/apache/ apidocs/org/apache/commons/ apidocs/org/apache/commons/text/ apidocs/org/apache/commons/text/similarity/ apidocs/or...
Date Sat, 29 Nov 2014 11:35:17 GMT
Added: websites/production/commons/content/sandbox/commons-text/xref/org/apache/commons/text/similarity/JaroWrinklerDistance.html
==============================================================================
--- websites/production/commons/content/sandbox/commons-text/xref/org/apache/commons/text/similarity/JaroWrinklerDistance.html (added)
+++ websites/production/commons/content/sandbox/commons-text/xref/org/apache/commons/text/similarity/JaroWrinklerDistance.html Sat Nov 29 11:35:10 2014
@@ -0,0 +1,386 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head><meta http-equiv="content-type" content="text/html; charset=UTF-8" />
+<title>JaroWrinklerDistance xref</title>
+<link type="text/css" rel="stylesheet" href="../../../../../stylesheet.css" />
+</head>
+<body>
+<div id="overview"><a href="../../../../../../apidocs/org/apache/commons/text/similarity/JaroWrinklerDistance.html">View Javadoc</a></div><pre>
+<a class="jxr_linenumber" name="L1" href="#L1">1</a>   <em class="jxr_comment">/*</em>
+<a class="jxr_linenumber" name="L2" href="#L2">2</a>   <em class="jxr_comment"> * Licensed to the Apache Software Foundation (ASF) under one or more</em>
+<a class="jxr_linenumber" name="L3" href="#L3">3</a>   <em class="jxr_comment"> * contributor license agreements.  See the NOTICE file distributed with</em>
+<a class="jxr_linenumber" name="L4" href="#L4">4</a>   <em class="jxr_comment"> * this work for additional information regarding copyright ownership.</em>
+<a class="jxr_linenumber" name="L5" href="#L5">5</a>   <em class="jxr_comment"> * The ASF licenses this file to You under the Apache License, Version 2.0</em>
+<a class="jxr_linenumber" name="L6" href="#L6">6</a>   <em class="jxr_comment"> * (the "License"); you may not use this file except in compliance with</em>
+<a class="jxr_linenumber" name="L7" href="#L7">7</a>   <em class="jxr_comment"> * the License.  You may obtain a copy of the License at</em>
+<a class="jxr_linenumber" name="L8" href="#L8">8</a>   <em class="jxr_comment"> *</em>
+<a class="jxr_linenumber" name="L9" href="#L9">9</a>   <em class="jxr_comment"> *      <a href="http://www.apache.org/licenses/LICENSE-2." target="alexandria_uri">http://www.apache.org/licenses/LICENSE-2.</a>0</em>
+<a class="jxr_linenumber" name="L10" href="#L10">10</a>  <em class="jxr_comment"> *</em>
+<a class="jxr_linenumber" name="L11" href="#L11">11</a>  <em class="jxr_comment"> * Unless required by applicable law or agreed to in writing, software</em>
+<a class="jxr_linenumber" name="L12" href="#L12">12</a>  <em class="jxr_comment"> * distributed under the License is distributed on an "AS IS" BASIS,</em>
+<a class="jxr_linenumber" name="L13" href="#L13">13</a>  <em class="jxr_comment"> * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</em>
+<a class="jxr_linenumber" name="L14" href="#L14">14</a>  <em class="jxr_comment"> * See the License for the specific language governing permissions and</em>
+<a class="jxr_linenumber" name="L15" href="#L15">15</a>  <em class="jxr_comment"> * limitations under the License.</em>
+<a class="jxr_linenumber" name="L16" href="#L16">16</a>  <em class="jxr_comment"> */</em>
+<a class="jxr_linenumber" name="L17" href="#L17">17</a>  <strong class="jxr_keyword">package</strong> org.apache.commons.text.similarity;
+<a class="jxr_linenumber" name="L18" href="#L18">18</a>  
+<a class="jxr_linenumber" name="L19" href="#L19">19</a>  <em class="jxr_javadoccomment">/**</em>
+<a class="jxr_linenumber" name="L20" href="#L20">20</a>  <em class="jxr_javadoccomment"> * &lt;p&gt;</em>
+<a class="jxr_linenumber" name="L21" href="#L21">21</a>  <em class="jxr_javadoccomment"> * The Jaro measure is the weighted sum of percentage of matched characters</em>
+<a class="jxr_linenumber" name="L22" href="#L22">22</a>  <em class="jxr_javadoccomment"> * from each file and transposed characters. Winkler increased this measure</em>
+<a class="jxr_linenumber" name="L23" href="#L23">23</a>  <em class="jxr_javadoccomment"> * for matching initial characters.</em>
+<a class="jxr_linenumber" name="L24" href="#L24">24</a>  <em class="jxr_javadoccomment"> * &lt;/p&gt;</em>
+<a class="jxr_linenumber" name="L25" href="#L25">25</a>  <em class="jxr_javadoccomment"> *</em>
+<a class="jxr_linenumber" name="L26" href="#L26">26</a>  <em class="jxr_javadoccomment"> * &lt;p&gt;</em>
+<a class="jxr_linenumber" name="L27" href="#L27">27</a>  <em class="jxr_javadoccomment"> * This implementation is based on the Jaro Winkler similarity algorithm</em>
+<a class="jxr_linenumber" name="L28" href="#L28">28</a>  <em class="jxr_javadoccomment"> * from &lt;a href="<a href="http://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance" target="alexandria_uri">http://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance</a>"&gt;</em>
+<a class="jxr_linenumber" name="L29" href="#L29">29</a>  <em class="jxr_javadoccomment"> * <a href="http://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance&lt;/a&gt;" target="alexandria_uri">http://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance&lt;/a&gt;</a>.</em>
+<a class="jxr_linenumber" name="L30" href="#L30">30</a>  <em class="jxr_javadoccomment"> * &lt;/p&gt;</em>
+<a class="jxr_linenumber" name="L31" href="#L31">31</a>  <em class="jxr_javadoccomment"> *</em>
+<a class="jxr_linenumber" name="L32" href="#L32">32</a>  <em class="jxr_javadoccomment"> * &lt;p&gt;</em>
+<a class="jxr_linenumber" name="L33" href="#L33">33</a>  <em class="jxr_javadoccomment"> * This code has been adapted from Apache Commons Lang 3.3.</em>
+<a class="jxr_linenumber" name="L34" href="#L34">34</a>  <em class="jxr_javadoccomment"> * &lt;/p&gt;</em>
+<a class="jxr_linenumber" name="L35" href="#L35">35</a>  <em class="jxr_javadoccomment"> *</em>
+<a class="jxr_linenumber" name="L36" href="#L36">36</a>  <em class="jxr_javadoccomment"> * @since 1.0</em>
+<a class="jxr_linenumber" name="L37" href="#L37">37</a>  <em class="jxr_javadoccomment"> */</em>
+<a class="jxr_linenumber" name="L38" href="#L38">38</a>  <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">class</strong> <a href="../../../../../org/apache/commons/text/similarity/JaroWrinklerDistance.html">JaroWrinklerDistance</a> <strong class="jxr_keyword">implements</strong> StringMetric&lt;Double&gt; {
+<a class="jxr_linenumber" name="L39" href="#L39">39</a>  
+<a class="jxr_linenumber" name="L40" href="#L40">40</a>      <em class="jxr_javadoccomment">/**</em>
+<a class="jxr_linenumber" name="L41" href="#L41">41</a>  <em class="jxr_javadoccomment">     * Represents a failed index search.</em>
+<a class="jxr_linenumber" name="L42" href="#L42">42</a>  <em class="jxr_javadoccomment">     */</em>
+<a class="jxr_linenumber" name="L43" href="#L43">43</a>      <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">static</strong> <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">int</strong> INDEX_NOT_FOUND = -1;
+<a class="jxr_linenumber" name="L44" href="#L44">44</a>  
+<a class="jxr_linenumber" name="L45" href="#L45">45</a>      <em class="jxr_javadoccomment">/**</em>
+<a class="jxr_linenumber" name="L46" href="#L46">46</a>  <em class="jxr_javadoccomment">     * &lt;p&gt;</em>
+<a class="jxr_linenumber" name="L47" href="#L47">47</a>  <em class="jxr_javadoccomment">     * Find the Jaro Winkler Distance which indicates the similarity score</em>
+<a class="jxr_linenumber" name="L48" href="#L48">48</a>  <em class="jxr_javadoccomment">     * between two Strings.</em>
+<a class="jxr_linenumber" name="L49" href="#L49">49</a>  <em class="jxr_javadoccomment">     * &lt;/p&gt;</em>
+<a class="jxr_linenumber" name="L50" href="#L50">50</a>  <em class="jxr_javadoccomment">     *</em>
+<a class="jxr_linenumber" name="L51" href="#L51">51</a>  <em class="jxr_javadoccomment">     * &lt;pre&gt;</em>
+<a class="jxr_linenumber" name="L52" href="#L52">52</a>  <em class="jxr_javadoccomment">     * StringUtils.getJaroWinklerDistance(null, null)          = IllegalArgumentException</em>
+<a class="jxr_linenumber" name="L53" href="#L53">53</a>  <em class="jxr_javadoccomment">     * StringUtils.getJaroWinklerDistance("","")               = 0.0</em>
+<a class="jxr_linenumber" name="L54" href="#L54">54</a>  <em class="jxr_javadoccomment">     * StringUtils.getJaroWinklerDistance("","a")              = 0.0</em>
+<a class="jxr_linenumber" name="L55" href="#L55">55</a>  <em class="jxr_javadoccomment">     * StringUtils.getJaroWinklerDistance("aaapppp", "")       = 0.0</em>
+<a class="jxr_linenumber" name="L56" href="#L56">56</a>  <em class="jxr_javadoccomment">     * StringUtils.getJaroWinklerDistance("frog", "fog")       = 0.93</em>
+<a class="jxr_linenumber" name="L57" href="#L57">57</a>  <em class="jxr_javadoccomment">     * StringUtils.getJaroWinklerDistance("fly", "ant")        = 0.0</em>
+<a class="jxr_linenumber" name="L58" href="#L58">58</a>  <em class="jxr_javadoccomment">     * StringUtils.getJaroWinklerDistance("elephant", "hippo") = 0.44</em>
+<a class="jxr_linenumber" name="L59" href="#L59">59</a>  <em class="jxr_javadoccomment">     * StringUtils.getJaroWinklerDistance("hippo", "elephant") = 0.44</em>
+<a class="jxr_linenumber" name="L60" href="#L60">60</a>  <em class="jxr_javadoccomment">     * StringUtils.getJaroWinklerDistance("hippo", "zzzzzzzz") = 0.0</em>
+<a class="jxr_linenumber" name="L61" href="#L61">61</a>  <em class="jxr_javadoccomment">     * StringUtils.getJaroWinklerDistance("hello", "hallo")    = 0.88</em>
+<a class="jxr_linenumber" name="L62" href="#L62">62</a>  <em class="jxr_javadoccomment">     * StringUtils.getJaroWinklerDistance("ABC Corporation", "ABC Corp") = 0.91</em>
+<a class="jxr_linenumber" name="L63" href="#L63">63</a>  <em class="jxr_javadoccomment">     * StringUtils.getJaroWinklerDistance("D N H Enterprises Inc", "D &amp;amp; H Enterprises, Inc.") = 0.93</em>
+<a class="jxr_linenumber" name="L64" href="#L64">64</a>  <em class="jxr_javadoccomment">     * StringUtils.getJaroWinklerDistance("My Gym Children's Fitness Center", "My Gym. Childrens Fitness") = 0.94</em>
+<a class="jxr_linenumber" name="L65" href="#L65">65</a>  <em class="jxr_javadoccomment">     * StringUtils.getJaroWinklerDistance("PENNSYLVANIA", "PENNCISYLVNIA")    = 0.9</em>
+<a class="jxr_linenumber" name="L66" href="#L66">66</a>  <em class="jxr_javadoccomment">     * &lt;/pre&gt;</em>
+<a class="jxr_linenumber" name="L67" href="#L67">67</a>  <em class="jxr_javadoccomment">     *</em>
+<a class="jxr_linenumber" name="L68" href="#L68">68</a>  <em class="jxr_javadoccomment">     * @param left the first String, must not be null</em>
+<a class="jxr_linenumber" name="L69" href="#L69">69</a>  <em class="jxr_javadoccomment">     * @param right the second String, must not be null</em>
+<a class="jxr_linenumber" name="L70" href="#L70">70</a>  <em class="jxr_javadoccomment">     * @return result distance</em>
+<a class="jxr_linenumber" name="L71" href="#L71">71</a>  <em class="jxr_javadoccomment">     * @throws IllegalArgumentException if either String input {@code null}</em>
+<a class="jxr_linenumber" name="L72" href="#L72">72</a>  <em class="jxr_javadoccomment">     */</em>
+<a class="jxr_linenumber" name="L73" href="#L73">73</a>      @Override
+<a class="jxr_linenumber" name="L74" href="#L74">74</a>      <strong class="jxr_keyword">public</strong> Double compare(CharSequence left, CharSequence right) {
+<a class="jxr_linenumber" name="L75" href="#L75">75</a>          <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">double</strong> DEFAULT_SCALING_FACTOR = 0.1;
+<a class="jxr_linenumber" name="L76" href="#L76">76</a>  
+<a class="jxr_linenumber" name="L77" href="#L77">77</a>          <strong class="jxr_keyword">if</strong> (left == <strong class="jxr_keyword">null</strong> || right == <strong class="jxr_keyword">null</strong>) {
+<a class="jxr_linenumber" name="L78" href="#L78">78</a>              <strong class="jxr_keyword">throw</strong> <strong class="jxr_keyword">new</strong> IllegalArgumentException(<span class="jxr_string">"Strings must not be null"</span>);
+<a class="jxr_linenumber" name="L79" href="#L79">79</a>          }
+<a class="jxr_linenumber" name="L80" href="#L80">80</a>  
+<a class="jxr_linenumber" name="L81" href="#L81">81</a>          <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">double</strong> jaro = score(left, right);
+<a class="jxr_linenumber" name="L82" href="#L82">82</a>          <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">int</strong> cl = commonPrefixLength(left, right);
+<a class="jxr_linenumber" name="L83" href="#L83">83</a>          <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">double</strong> matchScore = Math.round((jaro + (DEFAULT_SCALING_FACTOR
+<a class="jxr_linenumber" name="L84" href="#L84">84</a>                  * cl * (1.0 - jaro))) *100.0)/100.0;
+<a class="jxr_linenumber" name="L85" href="#L85">85</a>  
+<a class="jxr_linenumber" name="L86" href="#L86">86</a>          <strong class="jxr_keyword">return</strong> matchScore;
+<a class="jxr_linenumber" name="L87" href="#L87">87</a>      }
+<a class="jxr_linenumber" name="L88" href="#L88">88</a>  
+<a class="jxr_linenumber" name="L89" href="#L89">89</a>      <em class="jxr_comment">// TODO: we can move these methods to a Util class, keep them here,</em>
+<a class="jxr_linenumber" name="L90" href="#L90">90</a>      <em class="jxr_comment">// create a common abstract class, shade lang-3.3...</em>
+<a class="jxr_linenumber" name="L91" href="#L91">91</a>  
+<a class="jxr_linenumber" name="L92" href="#L92">92</a>      <em class="jxr_javadoccomment">/**</em>
+<a class="jxr_linenumber" name="L93" href="#L93">93</a>  <em class="jxr_javadoccomment">     * Calculates the number of characters from the beginning of the strings</em>
+<a class="jxr_linenumber" name="L94" href="#L94">94</a>  <em class="jxr_javadoccomment">     * that match exactly one-to-one, up to a maximum of four (4) characters.</em>
+<a class="jxr_linenumber" name="L95" href="#L95">95</a>  <em class="jxr_javadoccomment">     *</em>
+<a class="jxr_linenumber" name="L96" href="#L96">96</a>  <em class="jxr_javadoccomment">     * @param first The first string.</em>
+<a class="jxr_linenumber" name="L97" href="#L97">97</a>  <em class="jxr_javadoccomment">     * @param second The second string.</em>
+<a class="jxr_linenumber" name="L98" href="#L98">98</a>  <em class="jxr_javadoccomment">     * @return A number between 0 and 4.</em>
+<a class="jxr_linenumber" name="L99" href="#L99">99</a>  <em class="jxr_javadoccomment">     */</em>
+<a class="jxr_linenumber" name="L100" href="#L100">100</a>     <strong class="jxr_keyword">private</strong> <strong class="jxr_keyword">static</strong> <strong class="jxr_keyword">int</strong> commonPrefixLength(<strong class="jxr_keyword">final</strong> CharSequence first,
+<a class="jxr_linenumber" name="L101" href="#L101">101</a>             <strong class="jxr_keyword">final</strong> CharSequence second) {
+<a class="jxr_linenumber" name="L102" href="#L102">102</a>         <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">int</strong> result = getCommonPrefix(first.toString(), second.toString())
+<a class="jxr_linenumber" name="L103" href="#L103">103</a>                 .length();
+<a class="jxr_linenumber" name="L104" href="#L104">104</a> 
+<a class="jxr_linenumber" name="L105" href="#L105">105</a>         <em class="jxr_comment">// Limit the result to 4.</em>
+<a class="jxr_linenumber" name="L106" href="#L106">106</a>         <strong class="jxr_keyword">return</strong> result &gt; 4 ? 4 : result;
+<a class="jxr_linenumber" name="L107" href="#L107">107</a>     }
+<a class="jxr_linenumber" name="L108" href="#L108">108</a> 
+<a class="jxr_linenumber" name="L109" href="#L109">109</a>     <em class="jxr_javadoccomment">/**</em>
+<a class="jxr_linenumber" name="L110" href="#L110">110</a> <em class="jxr_javadoccomment">     * &lt;p&gt;</em>
+<a class="jxr_linenumber" name="L111" href="#L111">111</a> <em class="jxr_javadoccomment">     * Compares all Strings in an array and returns the initial sequence of</em>
+<a class="jxr_linenumber" name="L112" href="#L112">112</a> <em class="jxr_javadoccomment">     * characters that is common to all of them.</em>
+<a class="jxr_linenumber" name="L113" href="#L113">113</a> <em class="jxr_javadoccomment">     * &lt;/p&gt;</em>
+<a class="jxr_linenumber" name="L114" href="#L114">114</a> <em class="jxr_javadoccomment">     *</em>
+<a class="jxr_linenumber" name="L115" href="#L115">115</a> <em class="jxr_javadoccomment">     * &lt;p&gt;</em>
+<a class="jxr_linenumber" name="L116" href="#L116">116</a> <em class="jxr_javadoccomment">     * For example,</em>
+<a class="jxr_linenumber" name="L117" href="#L117">117</a> <em class="jxr_javadoccomment">     * &lt;code&gt;getCommonPrefix(new String[] {"i am a machine", "i am a robot"}) -&amp;gt; "i am a "&lt;/code&gt;</em>
+<a class="jxr_linenumber" name="L118" href="#L118">118</a> <em class="jxr_javadoccomment">     * &lt;/p&gt;</em>
+<a class="jxr_linenumber" name="L119" href="#L119">119</a> <em class="jxr_javadoccomment">     *</em>
+<a class="jxr_linenumber" name="L120" href="#L120">120</a> <em class="jxr_javadoccomment">     * &lt;pre&gt;</em>
+<a class="jxr_linenumber" name="L121" href="#L121">121</a> <em class="jxr_javadoccomment">     * StringUtils.getCommonPrefix(null) = ""</em>
+<a class="jxr_linenumber" name="L122" href="#L122">122</a> <em class="jxr_javadoccomment">     * StringUtils.getCommonPrefix(new String[] {}) = ""</em>
+<a class="jxr_linenumber" name="L123" href="#L123">123</a> <em class="jxr_javadoccomment">     * StringUtils.getCommonPrefix(new String[] {"abc"}) = "abc"</em>
+<a class="jxr_linenumber" name="L124" href="#L124">124</a> <em class="jxr_javadoccomment">     * StringUtils.getCommonPrefix(new String[] {null, null}) = ""</em>
+<a class="jxr_linenumber" name="L125" href="#L125">125</a> <em class="jxr_javadoccomment">     * StringUtils.getCommonPrefix(new String[] {"", ""}) = ""</em>
+<a class="jxr_linenumber" name="L126" href="#L126">126</a> <em class="jxr_javadoccomment">     * StringUtils.getCommonPrefix(new String[] {"", null}) = ""</em>
+<a class="jxr_linenumber" name="L127" href="#L127">127</a> <em class="jxr_javadoccomment">     * StringUtils.getCommonPrefix(new String[] {"abc", null, null}) = ""</em>
+<a class="jxr_linenumber" name="L128" href="#L128">128</a> <em class="jxr_javadoccomment">     * StringUtils.getCommonPrefix(new String[] {null, null, "abc"}) = ""</em>
+<a class="jxr_linenumber" name="L129" href="#L129">129</a> <em class="jxr_javadoccomment">     * StringUtils.getCommonPrefix(new String[] {"", "abc"}) = ""</em>
+<a class="jxr_linenumber" name="L130" href="#L130">130</a> <em class="jxr_javadoccomment">     * StringUtils.getCommonPrefix(new String[] {"abc", ""}) = ""</em>
+<a class="jxr_linenumber" name="L131" href="#L131">131</a> <em class="jxr_javadoccomment">     * StringUtils.getCommonPrefix(new String[] {"abc", "abc"}) = "abc"</em>
+<a class="jxr_linenumber" name="L132" href="#L132">132</a> <em class="jxr_javadoccomment">     * StringUtils.getCommonPrefix(new String[] {"abc", "a"}) = "a"</em>
+<a class="jxr_linenumber" name="L133" href="#L133">133</a> <em class="jxr_javadoccomment">     * StringUtils.getCommonPrefix(new String[] {"ab", "abxyz"}) = "ab"</em>
+<a class="jxr_linenumber" name="L134" href="#L134">134</a> <em class="jxr_javadoccomment">     * StringUtils.getCommonPrefix(new String[] {"abcde", "abxyz"}) = "ab"</em>
+<a class="jxr_linenumber" name="L135" href="#L135">135</a> <em class="jxr_javadoccomment">     * StringUtils.getCommonPrefix(new String[] {"abcde", "xyz"}) = ""</em>
+<a class="jxr_linenumber" name="L136" href="#L136">136</a> <em class="jxr_javadoccomment">     * StringUtils.getCommonPrefix(new String[] {"xyz", "abcde"}) = ""</em>
+<a class="jxr_linenumber" name="L137" href="#L137">137</a> <em class="jxr_javadoccomment">     * StringUtils.getCommonPrefix(new String[] {"i am a machine", "i am a robot"}) = "i am a "</em>
+<a class="jxr_linenumber" name="L138" href="#L138">138</a> <em class="jxr_javadoccomment">     * &lt;/pre&gt;</em>
+<a class="jxr_linenumber" name="L139" href="#L139">139</a> <em class="jxr_javadoccomment">     *</em>
+<a class="jxr_linenumber" name="L140" href="#L140">140</a> <em class="jxr_javadoccomment">     * @param strs array of String objects, entries may be null</em>
+<a class="jxr_linenumber" name="L141" href="#L141">141</a> <em class="jxr_javadoccomment">     * @return the initial sequence of characters that are common to all Strings</em>
+<a class="jxr_linenumber" name="L142" href="#L142">142</a> <em class="jxr_javadoccomment">     *         in the array; empty String if the array is null, the elements are</em>
+<a class="jxr_linenumber" name="L143" href="#L143">143</a> <em class="jxr_javadoccomment">     *         all null or if there is no common prefix.</em>
+<a class="jxr_linenumber" name="L144" href="#L144">144</a> <em class="jxr_javadoccomment">     * @since 2.4</em>
+<a class="jxr_linenumber" name="L145" href="#L145">145</a> <em class="jxr_javadoccomment">     */</em>
+<a class="jxr_linenumber" name="L146" href="#L146">146</a>     <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">static</strong> String getCommonPrefix(<strong class="jxr_keyword">final</strong> String... strs) {
+<a class="jxr_linenumber" name="L147" href="#L147">147</a>         <strong class="jxr_keyword">if</strong> (strs == <strong class="jxr_keyword">null</strong> || strs.length == 0) {
+<a class="jxr_linenumber" name="L148" href="#L148">148</a>             <strong class="jxr_keyword">return</strong> <span class="jxr_string">""</span>;
+<a class="jxr_linenumber" name="L149" href="#L149">149</a>         }
+<a class="jxr_linenumber" name="L150" href="#L150">150</a>         <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">int</strong> smallestIndexOfDiff = indexOfDifference(strs);
+<a class="jxr_linenumber" name="L151" href="#L151">151</a>         <strong class="jxr_keyword">if</strong> (smallestIndexOfDiff == INDEX_NOT_FOUND) {
+<a class="jxr_linenumber" name="L152" href="#L152">152</a>             <em class="jxr_comment">// all strings were identical</em>
+<a class="jxr_linenumber" name="L153" href="#L153">153</a>             <strong class="jxr_keyword">if</strong> (strs[0] == <strong class="jxr_keyword">null</strong>) {
+<a class="jxr_linenumber" name="L154" href="#L154">154</a>                 <strong class="jxr_keyword">return</strong> <span class="jxr_string">""</span>;
+<a class="jxr_linenumber" name="L155" href="#L155">155</a>             }
+<a class="jxr_linenumber" name="L156" href="#L156">156</a>             <strong class="jxr_keyword">return</strong> strs[0];
+<a class="jxr_linenumber" name="L157" href="#L157">157</a>         } <strong class="jxr_keyword">else</strong> <strong class="jxr_keyword">if</strong> (smallestIndexOfDiff == 0) {
+<a class="jxr_linenumber" name="L158" href="#L158">158</a>             <em class="jxr_comment">// there were no common initial characters</em>
+<a class="jxr_linenumber" name="L159" href="#L159">159</a>             <strong class="jxr_keyword">return</strong> <span class="jxr_string">""</span>;
+<a class="jxr_linenumber" name="L160" href="#L160">160</a>         } <strong class="jxr_keyword">else</strong> {
+<a class="jxr_linenumber" name="L161" href="#L161">161</a>             <em class="jxr_comment">// we found a common initial character sequence</em>
+<a class="jxr_linenumber" name="L162" href="#L162">162</a>             <strong class="jxr_keyword">return</strong> strs[0].substring(0, smallestIndexOfDiff);
+<a class="jxr_linenumber" name="L163" href="#L163">163</a>         }
+<a class="jxr_linenumber" name="L164" href="#L164">164</a>     }
+<a class="jxr_linenumber" name="L165" href="#L165">165</a> 
+<a class="jxr_linenumber" name="L166" href="#L166">166</a>     <em class="jxr_javadoccomment">/**</em>
+<a class="jxr_linenumber" name="L167" href="#L167">167</a> <em class="jxr_javadoccomment">     * This method returns the Jaro-Winkler score for string matching.</em>
+<a class="jxr_linenumber" name="L168" href="#L168">168</a> <em class="jxr_javadoccomment">     *</em>
+<a class="jxr_linenumber" name="L169" href="#L169">169</a> <em class="jxr_javadoccomment">     * @param first the first string to be matched</em>
+<a class="jxr_linenumber" name="L170" href="#L170">170</a> <em class="jxr_javadoccomment">     * @param second the second string to be machted</em>
+<a class="jxr_linenumber" name="L171" href="#L171">171</a> <em class="jxr_javadoccomment">     * @return matching score without scaling factor impact</em>
+<a class="jxr_linenumber" name="L172" href="#L172">172</a> <em class="jxr_javadoccomment">     */</em>
+<a class="jxr_linenumber" name="L173" href="#L173">173</a>     <strong class="jxr_keyword">protected</strong> <strong class="jxr_keyword">static</strong> <strong class="jxr_keyword">double</strong> score(<strong class="jxr_keyword">final</strong> CharSequence first,
+<a class="jxr_linenumber" name="L174" href="#L174">174</a>             <strong class="jxr_keyword">final</strong> CharSequence second) {
+<a class="jxr_linenumber" name="L175" href="#L175">175</a>         String shorter;
+<a class="jxr_linenumber" name="L176" href="#L176">176</a>         String longer;
+<a class="jxr_linenumber" name="L177" href="#L177">177</a> 
+<a class="jxr_linenumber" name="L178" href="#L178">178</a>         <em class="jxr_comment">// Determine which String is longer.</em>
+<a class="jxr_linenumber" name="L179" href="#L179">179</a>         <strong class="jxr_keyword">if</strong> (first.length() &gt; second.length()) {
+<a class="jxr_linenumber" name="L180" href="#L180">180</a>             longer = first.toString().toLowerCase();
+<a class="jxr_linenumber" name="L181" href="#L181">181</a>             shorter = second.toString().toLowerCase();
+<a class="jxr_linenumber" name="L182" href="#L182">182</a>         } <strong class="jxr_keyword">else</strong> {
+<a class="jxr_linenumber" name="L183" href="#L183">183</a>             longer = second.toString().toLowerCase();
+<a class="jxr_linenumber" name="L184" href="#L184">184</a>             shorter = first.toString().toLowerCase();
+<a class="jxr_linenumber" name="L185" href="#L185">185</a>         }
+<a class="jxr_linenumber" name="L186" href="#L186">186</a> 
+<a class="jxr_linenumber" name="L187" href="#L187">187</a>         <em class="jxr_comment">// Calculate the half length() distance of the shorter String.</em>
+<a class="jxr_linenumber" name="L188" href="#L188">188</a>         <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">int</strong> halflength = shorter.length() / 2 + 1;
+<a class="jxr_linenumber" name="L189" href="#L189">189</a> 
+<a class="jxr_linenumber" name="L190" href="#L190">190</a>         <em class="jxr_comment">// Find the set of matching characters between the shorter and longer</em>
+<a class="jxr_linenumber" name="L191" href="#L191">191</a>         <em class="jxr_comment">// strings. Note that</em>
+<a class="jxr_linenumber" name="L192" href="#L192">192</a>         <em class="jxr_comment">// the set of matching characters may be different depending on the</em>
+<a class="jxr_linenumber" name="L193" href="#L193">193</a>         <em class="jxr_comment">// order of the strings.</em>
+<a class="jxr_linenumber" name="L194" href="#L194">194</a>         <strong class="jxr_keyword">final</strong> String m1 = getSetOfMatchingCharacterWithin(shorter, longer,
+<a class="jxr_linenumber" name="L195" href="#L195">195</a>                 halflength);
+<a class="jxr_linenumber" name="L196" href="#L196">196</a>         <strong class="jxr_keyword">final</strong> String m2 = getSetOfMatchingCharacterWithin(longer, shorter,
+<a class="jxr_linenumber" name="L197" href="#L197">197</a>                 halflength);
+<a class="jxr_linenumber" name="L198" href="#L198">198</a> 
+<a class="jxr_linenumber" name="L199" href="#L199">199</a>         <em class="jxr_comment">// If one or both of the sets of common characters is empty, then</em>
+<a class="jxr_linenumber" name="L200" href="#L200">200</a>         <em class="jxr_comment">// there is no similarity between the two strings.</em>
+<a class="jxr_linenumber" name="L201" href="#L201">201</a>         <strong class="jxr_keyword">if</strong> (m1.length() == 0 || m2.length() == 0) {
+<a class="jxr_linenumber" name="L202" href="#L202">202</a>             <strong class="jxr_keyword">return</strong> 0.0;
+<a class="jxr_linenumber" name="L203" href="#L203">203</a>         }
+<a class="jxr_linenumber" name="L204" href="#L204">204</a> 
+<a class="jxr_linenumber" name="L205" href="#L205">205</a>         <em class="jxr_comment">// If the set of common characters is not the same size, then</em>
+<a class="jxr_linenumber" name="L206" href="#L206">206</a>         <em class="jxr_comment">// there is no similarity between the two strings, either.</em>
+<a class="jxr_linenumber" name="L207" href="#L207">207</a>         <strong class="jxr_keyword">if</strong> (m1.length() != m2.length()) {
+<a class="jxr_linenumber" name="L208" href="#L208">208</a>             <strong class="jxr_keyword">return</strong> 0.0;
+<a class="jxr_linenumber" name="L209" href="#L209">209</a>         }
+<a class="jxr_linenumber" name="L210" href="#L210">210</a> 
+<a class="jxr_linenumber" name="L211" href="#L211">211</a>         <em class="jxr_comment">// Calculate the number of transposition between the two sets</em>
+<a class="jxr_linenumber" name="L212" href="#L212">212</a>         <em class="jxr_comment">// of common characters.</em>
+<a class="jxr_linenumber" name="L213" href="#L213">213</a>         <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">int</strong> transpositions = transpositions(m1, m2);
+<a class="jxr_linenumber" name="L214" href="#L214">214</a> 
+<a class="jxr_linenumber" name="L215" href="#L215">215</a>         <em class="jxr_comment">// Calculate the distance.</em>
+<a class="jxr_linenumber" name="L216" href="#L216">216</a>         <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">double</strong> dist = (m1.length() / ((<strong class="jxr_keyword">double</strong>) shorter.length())
+<a class="jxr_linenumber" name="L217" href="#L217">217</a>                 + m2.length() / ((<strong class="jxr_keyword">double</strong>) longer.length()) + (m1.length() - transpositions)
+<a class="jxr_linenumber" name="L218" href="#L218">218</a>                 / ((<strong class="jxr_keyword">double</strong>) m1.length())) / 3.0;
+<a class="jxr_linenumber" name="L219" href="#L219">219</a>         <strong class="jxr_keyword">return</strong> dist;
+<a class="jxr_linenumber" name="L220" href="#L220">220</a>     }
+<a class="jxr_linenumber" name="L221" href="#L221">221</a> 
+<a class="jxr_linenumber" name="L222" href="#L222">222</a>     <em class="jxr_javadoccomment">/**</em>
+<a class="jxr_linenumber" name="L223" href="#L223">223</a> <em class="jxr_javadoccomment">     * Calculates the number of transposition between two strings.</em>
+<a class="jxr_linenumber" name="L224" href="#L224">224</a> <em class="jxr_javadoccomment">     *</em>
+<a class="jxr_linenumber" name="L225" href="#L225">225</a> <em class="jxr_javadoccomment">     * @param first The first string.</em>
+<a class="jxr_linenumber" name="L226" href="#L226">226</a> <em class="jxr_javadoccomment">     * @param second The second string.</em>
+<a class="jxr_linenumber" name="L227" href="#L227">227</a> <em class="jxr_javadoccomment">     * @return The number of transposition between the two strings.</em>
+<a class="jxr_linenumber" name="L228" href="#L228">228</a> <em class="jxr_javadoccomment">     */</em>
+<a class="jxr_linenumber" name="L229" href="#L229">229</a>     <strong class="jxr_keyword">protected</strong> <strong class="jxr_keyword">static</strong> <strong class="jxr_keyword">int</strong> transpositions(<strong class="jxr_keyword">final</strong> CharSequence first,
+<a class="jxr_linenumber" name="L230" href="#L230">230</a>             <strong class="jxr_keyword">final</strong> CharSequence second) {
+<a class="jxr_linenumber" name="L231" href="#L231">231</a>         <strong class="jxr_keyword">int</strong> transpositions = 0;
+<a class="jxr_linenumber" name="L232" href="#L232">232</a>         <strong class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong> i = 0; i &lt; first.length(); i++) {
+<a class="jxr_linenumber" name="L233" href="#L233">233</a>             <strong class="jxr_keyword">if</strong> (first.charAt(i) != second.charAt(i)) {
+<a class="jxr_linenumber" name="L234" href="#L234">234</a>                 transpositions++;
+<a class="jxr_linenumber" name="L235" href="#L235">235</a>             }
+<a class="jxr_linenumber" name="L236" href="#L236">236</a>         }
+<a class="jxr_linenumber" name="L237" href="#L237">237</a>         <strong class="jxr_keyword">return</strong> transpositions / 2;
+<a class="jxr_linenumber" name="L238" href="#L238">238</a>     }
+<a class="jxr_linenumber" name="L239" href="#L239">239</a> 
+<a class="jxr_linenumber" name="L240" href="#L240">240</a>     <em class="jxr_javadoccomment">/**</em>
+<a class="jxr_linenumber" name="L241" href="#L241">241</a> <em class="jxr_javadoccomment">     * &lt;p&gt;</em>
+<a class="jxr_linenumber" name="L242" href="#L242">242</a> <em class="jxr_javadoccomment">     * Compares all CharSequences in an array and returns the index at which the</em>
+<a class="jxr_linenumber" name="L243" href="#L243">243</a> <em class="jxr_javadoccomment">     * CharSequences begin to differ.</em>
+<a class="jxr_linenumber" name="L244" href="#L244">244</a> <em class="jxr_javadoccomment">     * &lt;/p&gt;</em>
+<a class="jxr_linenumber" name="L245" href="#L245">245</a> <em class="jxr_javadoccomment">     *</em>
+<a class="jxr_linenumber" name="L246" href="#L246">246</a> <em class="jxr_javadoccomment">     * &lt;p&gt;</em>
+<a class="jxr_linenumber" name="L247" href="#L247">247</a> <em class="jxr_javadoccomment">     * For example,</em>
+<a class="jxr_linenumber" name="L248" href="#L248">248</a> <em class="jxr_javadoccomment">     * &lt;code&gt;indexOfDifference(new String[] {"i am a machine", "i am a robot"}) -&amp;gt; 7&lt;/code&gt;</em>
+<a class="jxr_linenumber" name="L249" href="#L249">249</a> <em class="jxr_javadoccomment">     * &lt;/p&gt;</em>
+<a class="jxr_linenumber" name="L250" href="#L250">250</a> <em class="jxr_javadoccomment">     *</em>
+<a class="jxr_linenumber" name="L251" href="#L251">251</a> <em class="jxr_javadoccomment">     * &lt;pre&gt;</em>
+<a class="jxr_linenumber" name="L252" href="#L252">252</a> <em class="jxr_javadoccomment">     * StringUtils.indexOfDifference(null) = -1</em>
+<a class="jxr_linenumber" name="L253" href="#L253">253</a> <em class="jxr_javadoccomment">     * StringUtils.indexOfDifference(new String[] {}) = -1</em>
+<a class="jxr_linenumber" name="L254" href="#L254">254</a> <em class="jxr_javadoccomment">     * StringUtils.indexOfDifference(new String[] {"abc"}) = -1</em>
+<a class="jxr_linenumber" name="L255" href="#L255">255</a> <em class="jxr_javadoccomment">     * StringUtils.indexOfDifference(new String[] {null, null}) = -1</em>
+<a class="jxr_linenumber" name="L256" href="#L256">256</a> <em class="jxr_javadoccomment">     * StringUtils.indexOfDifference(new String[] {"", ""}) = -1</em>
+<a class="jxr_linenumber" name="L257" href="#L257">257</a> <em class="jxr_javadoccomment">     * StringUtils.indexOfDifference(new String[] {"", null}) = 0</em>
+<a class="jxr_linenumber" name="L258" href="#L258">258</a> <em class="jxr_javadoccomment">     * StringUtils.indexOfDifference(new String[] {"abc", null, null}) = 0</em>
+<a class="jxr_linenumber" name="L259" href="#L259">259</a> <em class="jxr_javadoccomment">     * StringUtils.indexOfDifference(new String[] {null, null, "abc"}) = 0</em>
+<a class="jxr_linenumber" name="L260" href="#L260">260</a> <em class="jxr_javadoccomment">     * StringUtils.indexOfDifference(new String[] {"", "abc"}) = 0</em>
+<a class="jxr_linenumber" name="L261" href="#L261">261</a> <em class="jxr_javadoccomment">     * StringUtils.indexOfDifference(new String[] {"abc", ""}) = 0</em>
+<a class="jxr_linenumber" name="L262" href="#L262">262</a> <em class="jxr_javadoccomment">     * StringUtils.indexOfDifference(new String[] {"abc", "abc"}) = -1</em>
+<a class="jxr_linenumber" name="L263" href="#L263">263</a> <em class="jxr_javadoccomment">     * StringUtils.indexOfDifference(new String[] {"abc", "a"}) = 1</em>
+<a class="jxr_linenumber" name="L264" href="#L264">264</a> <em class="jxr_javadoccomment">     * StringUtils.indexOfDifference(new String[] {"ab", "abxyz"}) = 2</em>
+<a class="jxr_linenumber" name="L265" href="#L265">265</a> <em class="jxr_javadoccomment">     * StringUtils.indexOfDifference(new String[] {"abcde", "abxyz"}) = 2</em>
+<a class="jxr_linenumber" name="L266" href="#L266">266</a> <em class="jxr_javadoccomment">     * StringUtils.indexOfDifference(new String[] {"abcde", "xyz"}) = 0</em>
+<a class="jxr_linenumber" name="L267" href="#L267">267</a> <em class="jxr_javadoccomment">     * StringUtils.indexOfDifference(new String[] {"xyz", "abcde"}) = 0</em>
+<a class="jxr_linenumber" name="L268" href="#L268">268</a> <em class="jxr_javadoccomment">     * StringUtils.indexOfDifference(new String[] {"i am a machine", "i am a robot"}) = 7</em>
+<a class="jxr_linenumber" name="L269" href="#L269">269</a> <em class="jxr_javadoccomment">     * &lt;/pre&gt;</em>
+<a class="jxr_linenumber" name="L270" href="#L270">270</a> <em class="jxr_javadoccomment">     *</em>
+<a class="jxr_linenumber" name="L271" href="#L271">271</a> <em class="jxr_javadoccomment">     * @param css array of CharSequences, entries may be null</em>
+<a class="jxr_linenumber" name="L272" href="#L272">272</a> <em class="jxr_javadoccomment">     * @return the index where the strings begin to differ; -1 if they are all</em>
+<a class="jxr_linenumber" name="L273" href="#L273">273</a> <em class="jxr_javadoccomment">     *         equal</em>
+<a class="jxr_linenumber" name="L274" href="#L274">274</a> <em class="jxr_javadoccomment">     * @since 2.4</em>
+<a class="jxr_linenumber" name="L275" href="#L275">275</a> <em class="jxr_javadoccomment">     * @since 3.0 Changed signature from indexOfDifference(String...) to</em>
+<a class="jxr_linenumber" name="L276" href="#L276">276</a> <em class="jxr_javadoccomment">     *        indexOfDifference(CharSequence...)</em>
+<a class="jxr_linenumber" name="L277" href="#L277">277</a> <em class="jxr_javadoccomment">     */</em>
+<a class="jxr_linenumber" name="L278" href="#L278">278</a>     <strong class="jxr_keyword">protected</strong> <strong class="jxr_keyword">static</strong> <strong class="jxr_keyword">int</strong> indexOfDifference(<strong class="jxr_keyword">final</strong> CharSequence... css) {
+<a class="jxr_linenumber" name="L279" href="#L279">279</a>         <strong class="jxr_keyword">if</strong> (css == <strong class="jxr_keyword">null</strong> || css.length &lt;= 1) {
+<a class="jxr_linenumber" name="L280" href="#L280">280</a>             <strong class="jxr_keyword">return</strong> INDEX_NOT_FOUND;
+<a class="jxr_linenumber" name="L281" href="#L281">281</a>         }
+<a class="jxr_linenumber" name="L282" href="#L282">282</a>         <strong class="jxr_keyword">boolean</strong> anyStringNull = false;
+<a class="jxr_linenumber" name="L283" href="#L283">283</a>         <strong class="jxr_keyword">boolean</strong> allStringsNull = <strong class="jxr_keyword">true</strong>;
+<a class="jxr_linenumber" name="L284" href="#L284">284</a>         <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">int</strong> arrayLen = css.length;
+<a class="jxr_linenumber" name="L285" href="#L285">285</a>         <strong class="jxr_keyword">int</strong> shortestStrLen = Integer.MAX_VALUE;
+<a class="jxr_linenumber" name="L286" href="#L286">286</a>         <strong class="jxr_keyword">int</strong> longestStrLen = 0;
+<a class="jxr_linenumber" name="L287" href="#L287">287</a> 
+<a class="jxr_linenumber" name="L288" href="#L288">288</a>         <em class="jxr_comment">// find the min and max string lengths; this avoids checking to make</em>
+<a class="jxr_linenumber" name="L289" href="#L289">289</a>         <em class="jxr_comment">// sure we are not exceeding the length of the string each time through</em>
+<a class="jxr_linenumber" name="L290" href="#L290">290</a>         <em class="jxr_comment">// the bottom loop.</em>
+<a class="jxr_linenumber" name="L291" href="#L291">291</a>         <strong class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong> i = 0; i &lt; arrayLen; i++) {
+<a class="jxr_linenumber" name="L292" href="#L292">292</a>             <strong class="jxr_keyword">if</strong> (css[i] == <strong class="jxr_keyword">null</strong>) {
+<a class="jxr_linenumber" name="L293" href="#L293">293</a>                 anyStringNull = <strong class="jxr_keyword">true</strong>;
+<a class="jxr_linenumber" name="L294" href="#L294">294</a>                 shortestStrLen = 0;
+<a class="jxr_linenumber" name="L295" href="#L295">295</a>             } <strong class="jxr_keyword">else</strong> {
+<a class="jxr_linenumber" name="L296" href="#L296">296</a>                 allStringsNull = false;
+<a class="jxr_linenumber" name="L297" href="#L297">297</a>                 shortestStrLen = Math.min(css[i].length(), shortestStrLen);
+<a class="jxr_linenumber" name="L298" href="#L298">298</a>                 longestStrLen = Math.max(css[i].length(), longestStrLen);
+<a class="jxr_linenumber" name="L299" href="#L299">299</a>             }
+<a class="jxr_linenumber" name="L300" href="#L300">300</a>         }
+<a class="jxr_linenumber" name="L301" href="#L301">301</a> 
+<a class="jxr_linenumber" name="L302" href="#L302">302</a>         <em class="jxr_comment">// handle lists containing all nulls or all empty strings</em>
+<a class="jxr_linenumber" name="L303" href="#L303">303</a>         <strong class="jxr_keyword">if</strong> (allStringsNull || longestStrLen == 0 &amp;&amp; !anyStringNull) {
+<a class="jxr_linenumber" name="L304" href="#L304">304</a>             <strong class="jxr_keyword">return</strong> INDEX_NOT_FOUND;
+<a class="jxr_linenumber" name="L305" href="#L305">305</a>         }
+<a class="jxr_linenumber" name="L306" href="#L306">306</a> 
+<a class="jxr_linenumber" name="L307" href="#L307">307</a>         <em class="jxr_comment">// handle lists containing some nulls or some empty strings</em>
+<a class="jxr_linenumber" name="L308" href="#L308">308</a>         <strong class="jxr_keyword">if</strong> (shortestStrLen == 0) {
+<a class="jxr_linenumber" name="L309" href="#L309">309</a>             <strong class="jxr_keyword">return</strong> 0;
+<a class="jxr_linenumber" name="L310" href="#L310">310</a>         }
+<a class="jxr_linenumber" name="L311" href="#L311">311</a> 
+<a class="jxr_linenumber" name="L312" href="#L312">312</a>         <em class="jxr_comment">// find the position with the first difference across all strings</em>
+<a class="jxr_linenumber" name="L313" href="#L313">313</a>         <strong class="jxr_keyword">int</strong> firstDiff = -1;
+<a class="jxr_linenumber" name="L314" href="#L314">314</a>         <strong class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong> stringPos = 0; stringPos &lt; shortestStrLen; stringPos++) {
+<a class="jxr_linenumber" name="L315" href="#L315">315</a>             <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">char</strong> comparisonChar = css[0].charAt(stringPos);
+<a class="jxr_linenumber" name="L316" href="#L316">316</a>             <strong class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong> arrayPos = 1; arrayPos &lt; arrayLen; arrayPos++) {
+<a class="jxr_linenumber" name="L317" href="#L317">317</a>                 <strong class="jxr_keyword">if</strong> (css[arrayPos].charAt(stringPos) != comparisonChar) {
+<a class="jxr_linenumber" name="L318" href="#L318">318</a>                     firstDiff = stringPos;
+<a class="jxr_linenumber" name="L319" href="#L319">319</a>                     <strong class="jxr_keyword">break</strong>;
+<a class="jxr_linenumber" name="L320" href="#L320">320</a>                 }
+<a class="jxr_linenumber" name="L321" href="#L321">321</a>             }
+<a class="jxr_linenumber" name="L322" href="#L322">322</a>             <strong class="jxr_keyword">if</strong> (firstDiff != -1) {
+<a class="jxr_linenumber" name="L323" href="#L323">323</a>                 <strong class="jxr_keyword">break</strong>;
+<a class="jxr_linenumber" name="L324" href="#L324">324</a>             }
+<a class="jxr_linenumber" name="L325" href="#L325">325</a>         }
+<a class="jxr_linenumber" name="L326" href="#L326">326</a> 
+<a class="jxr_linenumber" name="L327" href="#L327">327</a>         <strong class="jxr_keyword">if</strong> (firstDiff == -1 &amp;&amp; shortestStrLen != longestStrLen) {
+<a class="jxr_linenumber" name="L328" href="#L328">328</a>             <em class="jxr_comment">// we compared all of the characters up to the length of the</em>
+<a class="jxr_linenumber" name="L329" href="#L329">329</a>             <em class="jxr_comment">// shortest string and didn't find a match, but the string lengths</em>
+<a class="jxr_linenumber" name="L330" href="#L330">330</a>             <em class="jxr_comment">// vary, so return the length of the shortest string.</em>
+<a class="jxr_linenumber" name="L331" href="#L331">331</a>             <strong class="jxr_keyword">return</strong> shortestStrLen;
+<a class="jxr_linenumber" name="L332" href="#L332">332</a>         }
+<a class="jxr_linenumber" name="L333" href="#L333">333</a>         <strong class="jxr_keyword">return</strong> firstDiff;
+<a class="jxr_linenumber" name="L334" href="#L334">334</a>     }
+<a class="jxr_linenumber" name="L335" href="#L335">335</a> 
+<a class="jxr_linenumber" name="L336" href="#L336">336</a>     <em class="jxr_javadoccomment">/**</em>
+<a class="jxr_linenumber" name="L337" href="#L337">337</a> <em class="jxr_javadoccomment">     * Gets a set of matching characters between two strings.</em>
+<a class="jxr_linenumber" name="L338" href="#L338">338</a> <em class="jxr_javadoccomment">     *</em>
+<a class="jxr_linenumber" name="L339" href="#L339">339</a> <em class="jxr_javadoccomment">     * &lt;p&gt;</em>
+<a class="jxr_linenumber" name="L340" href="#L340">340</a> <em class="jxr_javadoccomment">     * Two characters from the first string and the second string are</em>
+<a class="jxr_linenumber" name="L341" href="#L341">341</a> <em class="jxr_javadoccomment">     * considered matching if the character's respective positions are no</em>
+<a class="jxr_linenumber" name="L342" href="#L342">342</a> <em class="jxr_javadoccomment">     * farther than the limit value.</em>
+<a class="jxr_linenumber" name="L343" href="#L343">343</a> <em class="jxr_javadoccomment">     * &lt;/p&gt;</em>
+<a class="jxr_linenumber" name="L344" href="#L344">344</a> <em class="jxr_javadoccomment">     *</em>
+<a class="jxr_linenumber" name="L345" href="#L345">345</a> <em class="jxr_javadoccomment">     * @param first The first string.</em>
+<a class="jxr_linenumber" name="L346" href="#L346">346</a> <em class="jxr_javadoccomment">     * @param second The second string.</em>
+<a class="jxr_linenumber" name="L347" href="#L347">347</a> <em class="jxr_javadoccomment">     * @param limit The maximum distance to consider.</em>
+<a class="jxr_linenumber" name="L348" href="#L348">348</a> <em class="jxr_javadoccomment">     * @return A string contain the set of common characters.</em>
+<a class="jxr_linenumber" name="L349" href="#L349">349</a> <em class="jxr_javadoccomment">     */</em>
+<a class="jxr_linenumber" name="L350" href="#L350">350</a>     <strong class="jxr_keyword">protected</strong> <strong class="jxr_keyword">static</strong> String getSetOfMatchingCharacterWithin(
+<a class="jxr_linenumber" name="L351" href="#L351">351</a>             <strong class="jxr_keyword">final</strong> CharSequence first, <strong class="jxr_keyword">final</strong> CharSequence second, <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">int</strong> limit) {
+<a class="jxr_linenumber" name="L352" href="#L352">352</a>         <strong class="jxr_keyword">final</strong> StringBuilder common = <strong class="jxr_keyword">new</strong> StringBuilder();
+<a class="jxr_linenumber" name="L353" href="#L353">353</a>         <strong class="jxr_keyword">final</strong> StringBuilder copy = <strong class="jxr_keyword">new</strong> StringBuilder(second);
+<a class="jxr_linenumber" name="L354" href="#L354">354</a> 
+<a class="jxr_linenumber" name="L355" href="#L355">355</a>         <strong class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong> i = 0; i &lt; first.length(); i++) {
+<a class="jxr_linenumber" name="L356" href="#L356">356</a>             <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">char</strong> ch = first.charAt(i);
+<a class="jxr_linenumber" name="L357" href="#L357">357</a>             <strong class="jxr_keyword">boolean</strong> found = false;
+<a class="jxr_linenumber" name="L358" href="#L358">358</a> 
+<a class="jxr_linenumber" name="L359" href="#L359">359</a>             <em class="jxr_comment">// See if the character is within the limit positions away from the</em>
+<a class="jxr_linenumber" name="L360" href="#L360">360</a>             <em class="jxr_comment">// original position of that character.</em>
+<a class="jxr_linenumber" name="L361" href="#L361">361</a>             <strong class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong> j = Math.max(0, i - limit); !found
+<a class="jxr_linenumber" name="L362" href="#L362">362</a>                     &amp;&amp; j &lt; Math.min(i + limit, second.length()); j++) {
+<a class="jxr_linenumber" name="L363" href="#L363">363</a>                 <strong class="jxr_keyword">if</strong> (copy.charAt(j) == ch) {
+<a class="jxr_linenumber" name="L364" href="#L364">364</a>                     found = <strong class="jxr_keyword">true</strong>;
+<a class="jxr_linenumber" name="L365" href="#L365">365</a>                     common.append(ch);
+<a class="jxr_linenumber" name="L366" href="#L366">366</a>                     copy.setCharAt(j, '*');
+<a class="jxr_linenumber" name="L367" href="#L367">367</a>                 }
+<a class="jxr_linenumber" name="L368" href="#L368">368</a>             }
+<a class="jxr_linenumber" name="L369" href="#L369">369</a>         }
+<a class="jxr_linenumber" name="L370" href="#L370">370</a>         <strong class="jxr_keyword">return</strong> common.toString();
+<a class="jxr_linenumber" name="L371" href="#L371">371</a>     }
+<a class="jxr_linenumber" name="L372" href="#L372">372</a> 
+<a class="jxr_linenumber" name="L373" href="#L373">373</a> }
+</pre>
+<hr/>
+<div id="footer">Copyright &#169; 2014 <a href="http://www.apache.org/">The Apache Software Foundation</a>. All rights reserved.</div>
+</body>
+</html>

Propchange: websites/production/commons/content/sandbox/commons-text/xref/org/apache/commons/text/similarity/JaroWrinklerDistance.html
------------------------------------------------------------------------------
    svn:eol-style = native

Added: websites/production/commons/content/sandbox/commons-text/xref/org/apache/commons/text/similarity/LevenshteinDistance.html
==============================================================================
--- websites/production/commons/content/sandbox/commons-text/xref/org/apache/commons/text/similarity/LevenshteinDistance.html (added)
+++ websites/production/commons/content/sandbox/commons-text/xref/org/apache/commons/text/similarity/LevenshteinDistance.html Sat Nov 29 11:35:10 2014
@@ -0,0 +1,271 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head><meta http-equiv="content-type" content="text/html; charset=UTF-8" />
+<title>LevenshteinDistance xref</title>
+<link type="text/css" rel="stylesheet" href="../../../../../stylesheet.css" />
+</head>
+<body>
+<div id="overview"><a href="../../../../../../apidocs/org/apache/commons/text/similarity/LevenshteinDistance.html">View Javadoc</a></div><pre>
+<a class="jxr_linenumber" name="L1" href="#L1">1</a>   <em class="jxr_comment">/*</em>
+<a class="jxr_linenumber" name="L2" href="#L2">2</a>   <em class="jxr_comment"> * Licensed to the Apache Software Foundation (ASF) under one or more</em>
+<a class="jxr_linenumber" name="L3" href="#L3">3</a>   <em class="jxr_comment"> * contributor license agreements.  See the NOTICE file distributed with</em>
+<a class="jxr_linenumber" name="L4" href="#L4">4</a>   <em class="jxr_comment"> * this work for additional information regarding copyright ownership.</em>
+<a class="jxr_linenumber" name="L5" href="#L5">5</a>   <em class="jxr_comment"> * The ASF licenses this file to You under the Apache License, Version 2.0</em>
+<a class="jxr_linenumber" name="L6" href="#L6">6</a>   <em class="jxr_comment"> * (the "License"); you may not use this file except in compliance with</em>
+<a class="jxr_linenumber" name="L7" href="#L7">7</a>   <em class="jxr_comment"> * the License.  You may obtain a copy of the License at</em>
+<a class="jxr_linenumber" name="L8" href="#L8">8</a>   <em class="jxr_comment"> *</em>
+<a class="jxr_linenumber" name="L9" href="#L9">9</a>   <em class="jxr_comment"> *      <a href="http://www.apache.org/licenses/LICENSE-2." target="alexandria_uri">http://www.apache.org/licenses/LICENSE-2.</a>0</em>
+<a class="jxr_linenumber" name="L10" href="#L10">10</a>  <em class="jxr_comment"> *</em>
+<a class="jxr_linenumber" name="L11" href="#L11">11</a>  <em class="jxr_comment"> * Unless required by applicable law or agreed to in writing, software</em>
+<a class="jxr_linenumber" name="L12" href="#L12">12</a>  <em class="jxr_comment"> * distributed under the License is distributed on an "AS IS" BASIS,</em>
+<a class="jxr_linenumber" name="L13" href="#L13">13</a>  <em class="jxr_comment"> * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</em>
+<a class="jxr_linenumber" name="L14" href="#L14">14</a>  <em class="jxr_comment"> * See the License for the specific language governing permissions and</em>
+<a class="jxr_linenumber" name="L15" href="#L15">15</a>  <em class="jxr_comment"> * limitations under the License.</em>
+<a class="jxr_linenumber" name="L16" href="#L16">16</a>  <em class="jxr_comment"> */</em>
+<a class="jxr_linenumber" name="L17" href="#L17">17</a>  <strong class="jxr_keyword">package</strong> org.apache.commons.text.similarity;
+<a class="jxr_linenumber" name="L18" href="#L18">18</a>  
+<a class="jxr_linenumber" name="L19" href="#L19">19</a>  <strong class="jxr_keyword">import</strong> java.util.Arrays;
+<a class="jxr_linenumber" name="L20" href="#L20">20</a>  
+<a class="jxr_linenumber" name="L21" href="#L21">21</a>  <em class="jxr_javadoccomment">/**</em>
+<a class="jxr_linenumber" name="L22" href="#L22">22</a>  <em class="jxr_javadoccomment"> * &lt;p&gt;</em>
+<a class="jxr_linenumber" name="L23" href="#L23">23</a>  <em class="jxr_javadoccomment"> * A string metric for measuring the difference between two sequences.</em>
+<a class="jxr_linenumber" name="L24" href="#L24">24</a>  <em class="jxr_javadoccomment"> * &lt;/p&gt;</em>
+<a class="jxr_linenumber" name="L25" href="#L25">25</a>  <em class="jxr_javadoccomment"> *</em>
+<a class="jxr_linenumber" name="L26" href="#L26">26</a>  <em class="jxr_javadoccomment"> * &lt;p&gt;</em>
+<a class="jxr_linenumber" name="L27" href="#L27">27</a>  <em class="jxr_javadoccomment"> * This code has been adapted from Apache Commons Lang 3.3.</em>
+<a class="jxr_linenumber" name="L28" href="#L28">28</a>  <em class="jxr_javadoccomment"> * &lt;/p&gt;</em>
+<a class="jxr_linenumber" name="L29" href="#L29">29</a>  <em class="jxr_javadoccomment"> *</em>
+<a class="jxr_linenumber" name="L30" href="#L30">30</a>  <em class="jxr_javadoccomment"> * @since 1.0</em>
+<a class="jxr_linenumber" name="L31" href="#L31">31</a>  <em class="jxr_javadoccomment"> */</em>
+<a class="jxr_linenumber" name="L32" href="#L32">32</a>  <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">class</strong> <a href="../../../../../org/apache/commons/text/similarity/LevenshteinDistance.html">LevenshteinDistance</a> <strong class="jxr_keyword">implements</strong> StringMetric&lt;Integer&gt; {
+<a class="jxr_linenumber" name="L33" href="#L33">33</a>  
+<a class="jxr_linenumber" name="L34" href="#L34">34</a>      <em class="jxr_javadoccomment">/**</em>
+<a class="jxr_linenumber" name="L35" href="#L35">35</a>  <em class="jxr_javadoccomment">     * &lt;p&gt;</em>
+<a class="jxr_linenumber" name="L36" href="#L36">36</a>  <em class="jxr_javadoccomment">     * Find the Levenshtein distance between two Strings.</em>
+<a class="jxr_linenumber" name="L37" href="#L37">37</a>  <em class="jxr_javadoccomment">     * &lt;/p&gt;</em>
+<a class="jxr_linenumber" name="L38" href="#L38">38</a>  <em class="jxr_javadoccomment">     *</em>
+<a class="jxr_linenumber" name="L39" href="#L39">39</a>  <em class="jxr_javadoccomment">     * &lt;p&gt;</em>
+<a class="jxr_linenumber" name="L40" href="#L40">40</a>  <em class="jxr_javadoccomment">     * This is the number of changes needed to change one String into another,</em>
+<a class="jxr_linenumber" name="L41" href="#L41">41</a>  <em class="jxr_javadoccomment">     * where each change is a single character modification (deletion, insertion</em>
+<a class="jxr_linenumber" name="L42" href="#L42">42</a>  <em class="jxr_javadoccomment">     * or substitution).</em>
+<a class="jxr_linenumber" name="L43" href="#L43">43</a>  <em class="jxr_javadoccomment">     * &lt;/p&gt;</em>
+<a class="jxr_linenumber" name="L44" href="#L44">44</a>  <em class="jxr_javadoccomment">     *</em>
+<a class="jxr_linenumber" name="L45" href="#L45">45</a>  <em class="jxr_javadoccomment">     * &lt;p&gt;</em>
+<a class="jxr_linenumber" name="L46" href="#L46">46</a>  <em class="jxr_javadoccomment">     * The previous implementation of the Levenshtein distance algorithm was</em>
+<a class="jxr_linenumber" name="L47" href="#L47">47</a>  <em class="jxr_javadoccomment">     * from &lt;a</em>
+<a class="jxr_linenumber" name="L48" href="#L48">48</a>  <em class="jxr_javadoccomment">     * href="<a href="http://www.merriampark.com/ld.htm" target="alexandria_uri">http://www.merriampark.com/ld.htm</a>"&gt;http://www.merriampark.com</em>
+<a class="jxr_linenumber" name="L49" href="#L49">49</a>  <em class="jxr_javadoccomment">     * /ld.htm&lt;/a&gt;</em>
+<a class="jxr_linenumber" name="L50" href="#L50">50</a>  <em class="jxr_javadoccomment">     * &lt;/p&gt;</em>
+<a class="jxr_linenumber" name="L51" href="#L51">51</a>  <em class="jxr_javadoccomment">     *</em>
+<a class="jxr_linenumber" name="L52" href="#L52">52</a>  <em class="jxr_javadoccomment">     * &lt;p&gt;</em>
+<a class="jxr_linenumber" name="L53" href="#L53">53</a>  <em class="jxr_javadoccomment">     * Chas Emerick has written an implementation in Java, which avoids an</em>
+<a class="jxr_linenumber" name="L54" href="#L54">54</a>  <em class="jxr_javadoccomment">     * OutOfMemoryError which can occur when my Java implementation is used with</em>
+<a class="jxr_linenumber" name="L55" href="#L55">55</a>  <em class="jxr_javadoccomment">     * very large strings.&lt;br&gt;</em>
+<a class="jxr_linenumber" name="L56" href="#L56">56</a>  <em class="jxr_javadoccomment">     * This implementation of the Levenshtein distance algorithm is from &lt;a</em>
+<a class="jxr_linenumber" name="L57" href="#L57">57</a>  <em class="jxr_javadoccomment">     * href="<a href="http://www.merriampark.com/ldjava.htm" target="alexandria_uri">http://www.merriampark.com/ldjava.htm</a>"&gt;http://www.merriampark.com/</em>
+<a class="jxr_linenumber" name="L58" href="#L58">58</a>  <em class="jxr_javadoccomment">     * ldjava.htm&lt;/a&gt;</em>
+<a class="jxr_linenumber" name="L59" href="#L59">59</a>  <em class="jxr_javadoccomment">     * &lt;/p&gt;</em>
+<a class="jxr_linenumber" name="L60" href="#L60">60</a>  <em class="jxr_javadoccomment">     *</em>
+<a class="jxr_linenumber" name="L61" href="#L61">61</a>  <em class="jxr_javadoccomment">     * &lt;pre&gt;</em>
+<a class="jxr_linenumber" name="L62" href="#L62">62</a>  <em class="jxr_javadoccomment">     * StringUtils.getLevenshteinDistance(null, *)             = IllegalArgumentException</em>
+<a class="jxr_linenumber" name="L63" href="#L63">63</a>  <em class="jxr_javadoccomment">     * StringUtils.getLevenshteinDistance(*, null)             = IllegalArgumentException</em>
+<a class="jxr_linenumber" name="L64" href="#L64">64</a>  <em class="jxr_javadoccomment">     * StringUtils.getLevenshteinDistance("","")               = 0</em>
+<a class="jxr_linenumber" name="L65" href="#L65">65</a>  <em class="jxr_javadoccomment">     * StringUtils.getLevenshteinDistance("","a")              = 1</em>
+<a class="jxr_linenumber" name="L66" href="#L66">66</a>  <em class="jxr_javadoccomment">     * StringUtils.getLevenshteinDistance("aaapppp", "")       = 7</em>
+<a class="jxr_linenumber" name="L67" href="#L67">67</a>  <em class="jxr_javadoccomment">     * StringUtils.getLevenshteinDistance("frog", "fog")       = 1</em>
+<a class="jxr_linenumber" name="L68" href="#L68">68</a>  <em class="jxr_javadoccomment">     * StringUtils.getLevenshteinDistance("fly", "ant")        = 3</em>
+<a class="jxr_linenumber" name="L69" href="#L69">69</a>  <em class="jxr_javadoccomment">     * StringUtils.getLevenshteinDistance("elephant", "hippo") = 7</em>
+<a class="jxr_linenumber" name="L70" href="#L70">70</a>  <em class="jxr_javadoccomment">     * StringUtils.getLevenshteinDistance("hippo", "elephant") = 7</em>
+<a class="jxr_linenumber" name="L71" href="#L71">71</a>  <em class="jxr_javadoccomment">     * StringUtils.getLevenshteinDistance("hippo", "zzzzzzzz") = 8</em>
+<a class="jxr_linenumber" name="L72" href="#L72">72</a>  <em class="jxr_javadoccomment">     * StringUtils.getLevenshteinDistance("hello", "hallo")    = 1</em>
+<a class="jxr_linenumber" name="L73" href="#L73">73</a>  <em class="jxr_javadoccomment">     * &lt;/pre&gt;</em>
+<a class="jxr_linenumber" name="L74" href="#L74">74</a>  <em class="jxr_javadoccomment">     *</em>
+<a class="jxr_linenumber" name="L75" href="#L75">75</a>  <em class="jxr_javadoccomment">     * @param left the first string, must not be null</em>
+<a class="jxr_linenumber" name="L76" href="#L76">76</a>  <em class="jxr_javadoccomment">     * @param right the second string, must not be null</em>
+<a class="jxr_linenumber" name="L77" href="#L77">77</a>  <em class="jxr_javadoccomment">     * @return result distance</em>
+<a class="jxr_linenumber" name="L78" href="#L78">78</a>  <em class="jxr_javadoccomment">     * @throws IllegalArgumentException if either String input {@code null}</em>
+<a class="jxr_linenumber" name="L79" href="#L79">79</a>  <em class="jxr_javadoccomment">     */</em>
+<a class="jxr_linenumber" name="L80" href="#L80">80</a>      @Override
+<a class="jxr_linenumber" name="L81" href="#L81">81</a>      <strong class="jxr_keyword">public</strong> Integer compare(CharSequence left, CharSequence right) {
+<a class="jxr_linenumber" name="L82" href="#L82">82</a>          <strong class="jxr_keyword">return</strong> compare(left, right, Integer.MAX_VALUE);
+<a class="jxr_linenumber" name="L83" href="#L83">83</a>      }
+<a class="jxr_linenumber" name="L84" href="#L84">84</a>  
+<a class="jxr_linenumber" name="L85" href="#L85">85</a>      <em class="jxr_javadoccomment">/**</em>
+<a class="jxr_linenumber" name="L86" href="#L86">86</a>  <em class="jxr_javadoccomment">     * &lt;p&gt;</em>
+<a class="jxr_linenumber" name="L87" href="#L87">87</a>  <em class="jxr_javadoccomment">     * Find the Levenshtein distance between two Strings if it's less than or</em>
+<a class="jxr_linenumber" name="L88" href="#L88">88</a>  <em class="jxr_javadoccomment">     * equal to a given threshold.</em>
+<a class="jxr_linenumber" name="L89" href="#L89">89</a>  <em class="jxr_javadoccomment">     * &lt;/p&gt;</em>
+<a class="jxr_linenumber" name="L90" href="#L90">90</a>  <em class="jxr_javadoccomment">     *</em>
+<a class="jxr_linenumber" name="L91" href="#L91">91</a>  <em class="jxr_javadoccomment">     * &lt;p&gt;</em>
+<a class="jxr_linenumber" name="L92" href="#L92">92</a>  <em class="jxr_javadoccomment">     * This is the number of changes needed to change one String into another,</em>
+<a class="jxr_linenumber" name="L93" href="#L93">93</a>  <em class="jxr_javadoccomment">     * where each change is a single character modification (deletion, insertion</em>
+<a class="jxr_linenumber" name="L94" href="#L94">94</a>  <em class="jxr_javadoccomment">     * or substitution).</em>
+<a class="jxr_linenumber" name="L95" href="#L95">95</a>  <em class="jxr_javadoccomment">     * &lt;/p&gt;</em>
+<a class="jxr_linenumber" name="L96" href="#L96">96</a>  <em class="jxr_javadoccomment">     *</em>
+<a class="jxr_linenumber" name="L97" href="#L97">97</a>  <em class="jxr_javadoccomment">     * &lt;p&gt;</em>
+<a class="jxr_linenumber" name="L98" href="#L98">98</a>  <em class="jxr_javadoccomment">     * This implementation follows from Algorithms on Strings, Trees and</em>
+<a class="jxr_linenumber" name="L99" href="#L99">99</a>  <em class="jxr_javadoccomment">     * Sequences by Dan Gusfield and Chas Emerick's implementation of the</em>
+<a class="jxr_linenumber" name="L100" href="#L100">100</a> <em class="jxr_javadoccomment">     * Levenshtein distance algorithm from &lt;a</em>
+<a class="jxr_linenumber" name="L101" href="#L101">101</a> <em class="jxr_javadoccomment">     * href="<a href="http://www.merriampark.com/ld.htm" target="alexandria_uri">http://www.merriampark.com/ld.htm</a>"</em>
+<a class="jxr_linenumber" name="L102" href="#L102">102</a> <em class="jxr_javadoccomment">     * &gt;<a href="http://www.merriampark.com/ld.htm&lt;/a&gt" target="alexandria_uri">http://www.merriampark.com/ld.htm&lt;/a&gt</a>;</em>
+<a class="jxr_linenumber" name="L103" href="#L103">103</a> <em class="jxr_javadoccomment">     * &lt;/p&gt;</em>
+<a class="jxr_linenumber" name="L104" href="#L104">104</a> <em class="jxr_javadoccomment">     *</em>
+<a class="jxr_linenumber" name="L105" href="#L105">105</a> <em class="jxr_javadoccomment">     * &lt;pre&gt;</em>
+<a class="jxr_linenumber" name="L106" href="#L106">106</a> <em class="jxr_javadoccomment">     * StringUtils.getLevenshteinDistance(null, *, *)             = IllegalArgumentException</em>
+<a class="jxr_linenumber" name="L107" href="#L107">107</a> <em class="jxr_javadoccomment">     * StringUtils.getLevenshteinDistance(*, null, *)             = IllegalArgumentException</em>
+<a class="jxr_linenumber" name="L108" href="#L108">108</a> <em class="jxr_javadoccomment">     * StringUtils.getLevenshteinDistance(*, *, -1)               = IllegalArgumentException</em>
+<a class="jxr_linenumber" name="L109" href="#L109">109</a> <em class="jxr_javadoccomment">     * StringUtils.getLevenshteinDistance("","", 0)               = 0</em>
+<a class="jxr_linenumber" name="L110" href="#L110">110</a> <em class="jxr_javadoccomment">     * StringUtils.getLevenshteinDistance("aaapppp", "", 8)       = 7</em>
+<a class="jxr_linenumber" name="L111" href="#L111">111</a> <em class="jxr_javadoccomment">     * StringUtils.getLevenshteinDistance("aaapppp", "", 7)       = 7</em>
+<a class="jxr_linenumber" name="L112" href="#L112">112</a> <em class="jxr_javadoccomment">     * StringUtils.getLevenshteinDistance("aaapppp", "", 6))      = -1</em>
+<a class="jxr_linenumber" name="L113" href="#L113">113</a> <em class="jxr_javadoccomment">     * StringUtils.getLevenshteinDistance("elephant", "hippo", 7) = 7</em>
+<a class="jxr_linenumber" name="L114" href="#L114">114</a> <em class="jxr_javadoccomment">     * StringUtils.getLevenshteinDistance("elephant", "hippo", 6) = -1</em>
+<a class="jxr_linenumber" name="L115" href="#L115">115</a> <em class="jxr_javadoccomment">     * StringUtils.getLevenshteinDistance("hippo", "elephant", 7) = 7</em>
+<a class="jxr_linenumber" name="L116" href="#L116">116</a> <em class="jxr_javadoccomment">     * StringUtils.getLevenshteinDistance("hippo", "elephant", 6) = -1</em>
+<a class="jxr_linenumber" name="L117" href="#L117">117</a> <em class="jxr_javadoccomment">     * &lt;/pre&gt;</em>
+<a class="jxr_linenumber" name="L118" href="#L118">118</a> <em class="jxr_javadoccomment">     *</em>
+<a class="jxr_linenumber" name="L119" href="#L119">119</a> <em class="jxr_javadoccomment">     * @param left the first string, must not be null</em>
+<a class="jxr_linenumber" name="L120" href="#L120">120</a> <em class="jxr_javadoccomment">     * @param right the second string, must not be null</em>
+<a class="jxr_linenumber" name="L121" href="#L121">121</a> <em class="jxr_javadoccomment">     * @param threshold the target threshold, must not be negative</em>
+<a class="jxr_linenumber" name="L122" href="#L122">122</a> <em class="jxr_javadoccomment">     * @return result distance</em>
+<a class="jxr_linenumber" name="L123" href="#L123">123</a> <em class="jxr_javadoccomment">     * @throws IllegalArgumentException if either String input {@code null} or</em>
+<a class="jxr_linenumber" name="L124" href="#L124">124</a> <em class="jxr_javadoccomment">     *             negative threshold</em>
+<a class="jxr_linenumber" name="L125" href="#L125">125</a> <em class="jxr_javadoccomment">     */</em>
+<a class="jxr_linenumber" name="L126" href="#L126">126</a>     <strong class="jxr_keyword">public</strong> Integer compare(CharSequence left, CharSequence right, <strong class="jxr_keyword">int</strong> threshold) {
+<a class="jxr_linenumber" name="L127" href="#L127">127</a>         <strong class="jxr_keyword">if</strong> (left == <strong class="jxr_keyword">null</strong> || right == <strong class="jxr_keyword">null</strong>) {
+<a class="jxr_linenumber" name="L128" href="#L128">128</a>             <strong class="jxr_keyword">throw</strong> <strong class="jxr_keyword">new</strong> IllegalArgumentException(<span class="jxr_string">"Strings must not be null"</span>);
+<a class="jxr_linenumber" name="L129" href="#L129">129</a>         }
+<a class="jxr_linenumber" name="L130" href="#L130">130</a>         <strong class="jxr_keyword">if</strong> (threshold &lt; 0) {
+<a class="jxr_linenumber" name="L131" href="#L131">131</a>             <strong class="jxr_keyword">throw</strong> <strong class="jxr_keyword">new</strong> IllegalArgumentException(<span class="jxr_string">"Threshold must not be negative"</span>);
+<a class="jxr_linenumber" name="L132" href="#L132">132</a>         }
+<a class="jxr_linenumber" name="L133" href="#L133">133</a> 
+<a class="jxr_linenumber" name="L134" href="#L134">134</a>         <em class="jxr_comment">/*</em>
+<a class="jxr_linenumber" name="L135" href="#L135">135</a> <em class="jxr_comment">         * This implementation only computes the distance if it's less than or</em>
+<a class="jxr_linenumber" name="L136" href="#L136">136</a> <em class="jxr_comment">         * equal to the threshold value, returning -1 if it's greater. The</em>
+<a class="jxr_linenumber" name="L137" href="#L137">137</a> <em class="jxr_comment">         * advantage is performance: unbounded distance is O(nm), but a bound of</em>
+<a class="jxr_linenumber" name="L138" href="#L138">138</a> <em class="jxr_comment">         * k allows us to reduce it to O(km) time by only computing a diagonal</em>
+<a class="jxr_linenumber" name="L139" href="#L139">139</a> <em class="jxr_comment">         * stripe of width 2k + 1 of the cost table. It is also possible to use</em>
+<a class="jxr_linenumber" name="L140" href="#L140">140</a> <em class="jxr_comment">         * this to compute the unbounded Levenshtein distance by starting the</em>
+<a class="jxr_linenumber" name="L141" href="#L141">141</a> <em class="jxr_comment">         * threshold at 1 and doubling each time until the distance is found;</em>
+<a class="jxr_linenumber" name="L142" href="#L142">142</a> <em class="jxr_comment">         * this is O(dm), where d is the distance.</em>
+<a class="jxr_linenumber" name="L143" href="#L143">143</a> <em class="jxr_comment">         *</em>
+<a class="jxr_linenumber" name="L144" href="#L144">144</a> <em class="jxr_comment">         * One subtlety comes from needing to ignore entries on the border of</em>
+<a class="jxr_linenumber" name="L145" href="#L145">145</a> <em class="jxr_comment">         * our stripe eg. p[] = |#|#|#|* d[] = *|#|#|#| We must ignore the entry</em>
+<a class="jxr_linenumber" name="L146" href="#L146">146</a> <em class="jxr_comment">         * to the left of the leftmost member We must ignore the entry above the</em>
+<a class="jxr_linenumber" name="L147" href="#L147">147</a> <em class="jxr_comment">         * rightmost member</em>
+<a class="jxr_linenumber" name="L148" href="#L148">148</a> <em class="jxr_comment">         *</em>
+<a class="jxr_linenumber" name="L149" href="#L149">149</a> <em class="jxr_comment">         * Another subtlety comes from our stripe running off the matrix if the</em>
+<a class="jxr_linenumber" name="L150" href="#L150">150</a> <em class="jxr_comment">         * strings aren't of the same size. Since string s is always swapped to</em>
+<a class="jxr_linenumber" name="L151" href="#L151">151</a> <em class="jxr_comment">         * be the shorter of the two, the stripe will always run off to the</em>
+<a class="jxr_linenumber" name="L152" href="#L152">152</a> <em class="jxr_comment">         * upper right instead of the lower left of the matrix.</em>
+<a class="jxr_linenumber" name="L153" href="#L153">153</a> <em class="jxr_comment">         *</em>
+<a class="jxr_linenumber" name="L154" href="#L154">154</a> <em class="jxr_comment">         * As a concrete example, suppose s is of length 5, t is of length 7,</em>
+<a class="jxr_linenumber" name="L155" href="#L155">155</a> <em class="jxr_comment">         * and our threshold is 1. In this case we're going to walk a stripe of</em>
+<a class="jxr_linenumber" name="L156" href="#L156">156</a> <em class="jxr_comment">         * length 3. The matrix would look like so:</em>
+<a class="jxr_linenumber" name="L157" href="#L157">157</a> <em class="jxr_comment">         *</em>
+<a class="jxr_linenumber" name="L158" href="#L158">158</a> <em class="jxr_comment">         * 1 2 3 4 5 1 |#|#| | | | 2 |#|#|#| | | 3 | |#|#|#| | 4 | | |#|#|#| 5 |</em>
+<a class="jxr_linenumber" name="L159" href="#L159">159</a> <em class="jxr_comment">         * | | |#|#| 6 | | | | |#| 7 | | | | | |</em>
+<a class="jxr_linenumber" name="L160" href="#L160">160</a> <em class="jxr_comment">         *</em>
+<a class="jxr_linenumber" name="L161" href="#L161">161</a> <em class="jxr_comment">         * Note how the stripe leads off the table as there is no possible way</em>
+<a class="jxr_linenumber" name="L162" href="#L162">162</a> <em class="jxr_comment">         * to turn a string of length 5 into one of length 7 in edit distance of</em>
+<a class="jxr_linenumber" name="L163" href="#L163">163</a> <em class="jxr_comment">         * 1.</em>
+<a class="jxr_linenumber" name="L164" href="#L164">164</a> <em class="jxr_comment">         *</em>
+<a class="jxr_linenumber" name="L165" href="#L165">165</a> <em class="jxr_comment">         * Additionally, this implementation decreases memory usage by using two</em>
+<a class="jxr_linenumber" name="L166" href="#L166">166</a> <em class="jxr_comment">         * single-dimensional arrays and swapping them back and forth instead of</em>
+<a class="jxr_linenumber" name="L167" href="#L167">167</a> <em class="jxr_comment">         * allocating an entire n by m matrix. This requires a few minor</em>
+<a class="jxr_linenumber" name="L168" href="#L168">168</a> <em class="jxr_comment">         * changes, such as immediately returning when it's detected that the</em>
+<a class="jxr_linenumber" name="L169" href="#L169">169</a> <em class="jxr_comment">         * stripe has run off the matrix and initially filling the arrays with</em>
+<a class="jxr_linenumber" name="L170" href="#L170">170</a> <em class="jxr_comment">         * large values so that entries we don't compute are ignored.</em>
+<a class="jxr_linenumber" name="L171" href="#L171">171</a> <em class="jxr_comment">         *</em>
+<a class="jxr_linenumber" name="L172" href="#L172">172</a> <em class="jxr_comment">         * See Algorithms on Strings, Trees and Sequences by Dan Gusfield for</em>
+<a class="jxr_linenumber" name="L173" href="#L173">173</a> <em class="jxr_comment">         * some discussion.</em>
+<a class="jxr_linenumber" name="L174" href="#L174">174</a> <em class="jxr_comment">         */</em>
+<a class="jxr_linenumber" name="L175" href="#L175">175</a> 
+<a class="jxr_linenumber" name="L176" href="#L176">176</a>         <strong class="jxr_keyword">int</strong> n = left.length(); <em class="jxr_comment">// length of s</em>
+<a class="jxr_linenumber" name="L177" href="#L177">177</a>         <strong class="jxr_keyword">int</strong> m = right.length(); <em class="jxr_comment">// length of t</em>
+<a class="jxr_linenumber" name="L178" href="#L178">178</a> 
+<a class="jxr_linenumber" name="L179" href="#L179">179</a>         <em class="jxr_comment">// if one string is empty, the edit distance is necessarily the length</em>
+<a class="jxr_linenumber" name="L180" href="#L180">180</a>         <em class="jxr_comment">// of the other</em>
+<a class="jxr_linenumber" name="L181" href="#L181">181</a>         <strong class="jxr_keyword">if</strong> (n == 0) {
+<a class="jxr_linenumber" name="L182" href="#L182">182</a>             <strong class="jxr_keyword">return</strong> m &lt;= threshold ? m : -1;
+<a class="jxr_linenumber" name="L183" href="#L183">183</a>         } <strong class="jxr_keyword">else</strong> <strong class="jxr_keyword">if</strong> (m == 0) {
+<a class="jxr_linenumber" name="L184" href="#L184">184</a>             <strong class="jxr_keyword">return</strong> n &lt;= threshold ? n : -1;
+<a class="jxr_linenumber" name="L185" href="#L185">185</a>         }
+<a class="jxr_linenumber" name="L186" href="#L186">186</a> 
+<a class="jxr_linenumber" name="L187" href="#L187">187</a>         <strong class="jxr_keyword">if</strong> (n &gt; m) {
+<a class="jxr_linenumber" name="L188" href="#L188">188</a>             <em class="jxr_comment">// swap the two strings to consume less memory</em>
+<a class="jxr_linenumber" name="L189" href="#L189">189</a>             <strong class="jxr_keyword">final</strong> CharSequence tmp = left;
+<a class="jxr_linenumber" name="L190" href="#L190">190</a>             left = right;
+<a class="jxr_linenumber" name="L191" href="#L191">191</a>             right = tmp;
+<a class="jxr_linenumber" name="L192" href="#L192">192</a>             n = m;
+<a class="jxr_linenumber" name="L193" href="#L193">193</a>             m = right.length();
+<a class="jxr_linenumber" name="L194" href="#L194">194</a>         }
+<a class="jxr_linenumber" name="L195" href="#L195">195</a> 
+<a class="jxr_linenumber" name="L196" href="#L196">196</a>         <strong class="jxr_keyword">int</strong>[] p = <strong class="jxr_keyword">new</strong> <strong class="jxr_keyword">int</strong>[n + 1]; <em class="jxr_comment">// 'previous' cost array, horizontally</em>
+<a class="jxr_linenumber" name="L197" href="#L197">197</a>         <strong class="jxr_keyword">int</strong>[] d = <strong class="jxr_keyword">new</strong> <strong class="jxr_keyword">int</strong>[n + 1]; <em class="jxr_comment">// cost array, horizontally</em>
+<a class="jxr_linenumber" name="L198" href="#L198">198</a>         <strong class="jxr_keyword">int</strong>[] _d; <em class="jxr_comment">// placeholder to assist in swapping p and d</em>
+<a class="jxr_linenumber" name="L199" href="#L199">199</a> 
+<a class="jxr_linenumber" name="L200" href="#L200">200</a>         <em class="jxr_comment">// fill in starting table values</em>
+<a class="jxr_linenumber" name="L201" href="#L201">201</a>         <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">int</strong> boundary = Math.min(n, threshold) + 1;
+<a class="jxr_linenumber" name="L202" href="#L202">202</a>         <strong class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong> i = 0; i &lt; boundary; i++) {
+<a class="jxr_linenumber" name="L203" href="#L203">203</a>             p[i] = i;
+<a class="jxr_linenumber" name="L204" href="#L204">204</a>         }
+<a class="jxr_linenumber" name="L205" href="#L205">205</a>         <em class="jxr_comment">// these fills ensure that the value above the rightmost entry of our</em>
+<a class="jxr_linenumber" name="L206" href="#L206">206</a>         <em class="jxr_comment">// stripe will be ignored in following loop iterations</em>
+<a class="jxr_linenumber" name="L207" href="#L207">207</a>         Arrays.fill(p, boundary, p.length, Integer.MAX_VALUE);
+<a class="jxr_linenumber" name="L208" href="#L208">208</a>         Arrays.fill(d, Integer.MAX_VALUE);
+<a class="jxr_linenumber" name="L209" href="#L209">209</a> 
+<a class="jxr_linenumber" name="L210" href="#L210">210</a>         <em class="jxr_comment">// iterates through t</em>
+<a class="jxr_linenumber" name="L211" href="#L211">211</a>         <strong class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong> j = 1; j &lt;= m; j++) {
+<a class="jxr_linenumber" name="L212" href="#L212">212</a>             <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">char</strong> t_j = right.charAt(j - 1); <em class="jxr_comment">// jth character of t</em>
+<a class="jxr_linenumber" name="L213" href="#L213">213</a>             d[0] = j;
+<a class="jxr_linenumber" name="L214" href="#L214">214</a> 
+<a class="jxr_linenumber" name="L215" href="#L215">215</a>             <em class="jxr_comment">// compute stripe indices, constrain to array size</em>
+<a class="jxr_linenumber" name="L216" href="#L216">216</a>             <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">int</strong> min = Math.max(1, j - threshold);
+<a class="jxr_linenumber" name="L217" href="#L217">217</a>             <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">int</strong> max = j &gt; Integer.MAX_VALUE - threshold ? n : Math.min(
+<a class="jxr_linenumber" name="L218" href="#L218">218</a>                     n, j + threshold);
+<a class="jxr_linenumber" name="L219" href="#L219">219</a> 
+<a class="jxr_linenumber" name="L220" href="#L220">220</a>             <em class="jxr_comment">// the stripe may lead off of the table if s and t are of different</em>
+<a class="jxr_linenumber" name="L221" href="#L221">221</a>             <em class="jxr_comment">// sizes</em>
+<a class="jxr_linenumber" name="L222" href="#L222">222</a>             <strong class="jxr_keyword">if</strong> (min &gt; max) {
+<a class="jxr_linenumber" name="L223" href="#L223">223</a>                 <strong class="jxr_keyword">return</strong> -1;
+<a class="jxr_linenumber" name="L224" href="#L224">224</a>             }
+<a class="jxr_linenumber" name="L225" href="#L225">225</a> 
+<a class="jxr_linenumber" name="L226" href="#L226">226</a>             <em class="jxr_comment">// ignore entry left of leftmost</em>
+<a class="jxr_linenumber" name="L227" href="#L227">227</a>             <strong class="jxr_keyword">if</strong> (min &gt; 1) {
+<a class="jxr_linenumber" name="L228" href="#L228">228</a>                 d[min - 1] = Integer.MAX_VALUE;
+<a class="jxr_linenumber" name="L229" href="#L229">229</a>             }
+<a class="jxr_linenumber" name="L230" href="#L230">230</a> 
+<a class="jxr_linenumber" name="L231" href="#L231">231</a>             <em class="jxr_comment">// iterates through [min, max] in s</em>
+<a class="jxr_linenumber" name="L232" href="#L232">232</a>             <strong class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong> i = min; i &lt;= max; i++) {
+<a class="jxr_linenumber" name="L233" href="#L233">233</a>                 <strong class="jxr_keyword">if</strong> (left.charAt(i - 1) == t_j) {
+<a class="jxr_linenumber" name="L234" href="#L234">234</a>                     <em class="jxr_comment">// diagonally left and up</em>
+<a class="jxr_linenumber" name="L235" href="#L235">235</a>                     d[i] = p[i - 1];
+<a class="jxr_linenumber" name="L236" href="#L236">236</a>                 } <strong class="jxr_keyword">else</strong> {
+<a class="jxr_linenumber" name="L237" href="#L237">237</a>                     <em class="jxr_comment">// 1 + minimum of cell to the left, to the top, diagonally</em>
+<a class="jxr_linenumber" name="L238" href="#L238">238</a>                     <em class="jxr_comment">// left and up</em>
+<a class="jxr_linenumber" name="L239" href="#L239">239</a>                     d[i] = 1 + Math.min(Math.min(d[i - 1], p[i]), p[i - 1]);
+<a class="jxr_linenumber" name="L240" href="#L240">240</a>                 }
+<a class="jxr_linenumber" name="L241" href="#L241">241</a>             }
+<a class="jxr_linenumber" name="L242" href="#L242">242</a> 
+<a class="jxr_linenumber" name="L243" href="#L243">243</a>             <em class="jxr_comment">// copy current distance counts to 'previous row' distance counts</em>
+<a class="jxr_linenumber" name="L244" href="#L244">244</a>             _d = p;
+<a class="jxr_linenumber" name="L245" href="#L245">245</a>             p = d;
+<a class="jxr_linenumber" name="L246" href="#L246">246</a>             d = _d;
+<a class="jxr_linenumber" name="L247" href="#L247">247</a>         }
+<a class="jxr_linenumber" name="L248" href="#L248">248</a> 
+<a class="jxr_linenumber" name="L249" href="#L249">249</a>         <em class="jxr_comment">// if p[n] is greater than the threshold, there's no guarantee on it</em>
+<a class="jxr_linenumber" name="L250" href="#L250">250</a>         <em class="jxr_comment">// being the correct</em>
+<a class="jxr_linenumber" name="L251" href="#L251">251</a>         <em class="jxr_comment">// distance</em>
+<a class="jxr_linenumber" name="L252" href="#L252">252</a>         <strong class="jxr_keyword">if</strong> (p[n] &lt;= threshold) {
+<a class="jxr_linenumber" name="L253" href="#L253">253</a>             <strong class="jxr_keyword">return</strong> p[n];
+<a class="jxr_linenumber" name="L254" href="#L254">254</a>         }
+<a class="jxr_linenumber" name="L255" href="#L255">255</a>         <strong class="jxr_keyword">return</strong> -1;
+<a class="jxr_linenumber" name="L256" href="#L256">256</a>     }
+<a class="jxr_linenumber" name="L257" href="#L257">257</a> 
+<a class="jxr_linenumber" name="L258" href="#L258">258</a> }
+</pre>
+<hr/>
+<div id="footer">Copyright &#169; 2014 <a href="http://www.apache.org/">The Apache Software Foundation</a>. All rights reserved.</div>
+</body>
+</html>

Propchange: websites/production/commons/content/sandbox/commons-text/xref/org/apache/commons/text/similarity/LevenshteinDistance.html
------------------------------------------------------------------------------
    svn:eol-style = native



Mime
View raw message