commons-notifications mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ki...@apache.org
Subject svn commit: r947984 [35/36] - in /websites/production/commons/content/sandbox/commons-text: ./ apidocs/ apidocs/org/apache/commons/text/diff/ apidocs/org/apache/commons/text/diff/class-use/ apidocs/org/apache/commons/text/names/ apidocs/org/apache/comm...
Date Fri, 17 Apr 2015 06:46:32 GMT
Modified: websites/production/commons/content/sandbox/commons-text/xref/org/apache/commons/text/similarity/LevenshteinDistance.html
==============================================================================
--- websites/production/commons/content/sandbox/commons-text/xref/org/apache/commons/text/similarity/LevenshteinDistance.html (original)
+++ websites/production/commons/content/sandbox/commons-text/xref/org/apache/commons/text/similarity/LevenshteinDistance.html Fri Apr 17 06:46:28 2015
@@ -38,222 +38,391 @@
 <a class="jxr_linenumber" name="L30" href="#L30">30</a>  <em class="jxr_javadoccomment"> * &lt;p&gt;</em>
 <a class="jxr_linenumber" name="L31" href="#L31">31</a>  <em class="jxr_javadoccomment"> * This code has been adapted from Apache Commons Lang 3.3.</em>
 <a class="jxr_linenumber" name="L32" href="#L32">32</a>  <em class="jxr_javadoccomment"> * &lt;/p&gt;</em>
-<a class="jxr_linenumber" name="L33" href="#L33">33</a>  <em class="jxr_javadoccomment"> */</em>
-<a class="jxr_linenumber" name="L34" href="#L34">34</a>  <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">class</strong> <a href="../../../../../org/apache/commons/text/similarity/LevenshteinDistance.html">LevenshteinDistance</a> <strong class="jxr_keyword">implements</strong> StringMetric&lt;Integer&gt; {
-<a class="jxr_linenumber" name="L35" href="#L35">35</a>  
-<a class="jxr_linenumber" name="L36" href="#L36">36</a>      <em class="jxr_javadoccomment">/**</em>
-<a class="jxr_linenumber" name="L37" href="#L37">37</a>  <em class="jxr_javadoccomment">     * Find the Levenshtein distance between two Strings.</em>
-<a class="jxr_linenumber" name="L38" href="#L38">38</a>  <em class="jxr_javadoccomment">     *</em>
-<a class="jxr_linenumber" name="L39" href="#L39">39</a>  <em class="jxr_javadoccomment">     * &lt;p&gt;A higher score indicates a greater distance.&lt;/p&gt;</em>
-<a class="jxr_linenumber" name="L40" href="#L40">40</a>  <em class="jxr_javadoccomment">     *</em>
-<a class="jxr_linenumber" name="L41" href="#L41">41</a>  <em class="jxr_javadoccomment">     * &lt;p&gt;</em>
-<a class="jxr_linenumber" name="L42" href="#L42">42</a>  <em class="jxr_javadoccomment">     * The previous implementation of the Levenshtein distance algorithm was</em>
-<a class="jxr_linenumber" name="L43" href="#L43">43</a>  <em class="jxr_javadoccomment">     * from &lt;a</em>
-<a class="jxr_linenumber" name="L44" href="#L44">44</a>  <em class="jxr_javadoccomment">     * href="<a href="http://www.merriampark.com/ld.htm" target="alexandria_uri">http://www.merriampark.com/ld.htm</a>"&gt;http://www.merriampark.com</em>
-<a class="jxr_linenumber" name="L45" href="#L45">45</a>  <em class="jxr_javadoccomment">     * /ld.htm&lt;/a&gt;</em>
-<a class="jxr_linenumber" name="L46" href="#L46">46</a>  <em class="jxr_javadoccomment">     * &lt;/p&gt;</em>
-<a class="jxr_linenumber" name="L47" href="#L47">47</a>  <em class="jxr_javadoccomment">     *</em>
-<a class="jxr_linenumber" name="L48" href="#L48">48</a>  <em class="jxr_javadoccomment">     * &lt;p&gt;</em>
-<a class="jxr_linenumber" name="L49" href="#L49">49</a>  <em class="jxr_javadoccomment">     * Chas Emerick has written an implementation in Java, which avoids an</em>
-<a class="jxr_linenumber" name="L50" href="#L50">50</a>  <em class="jxr_javadoccomment">     * OutOfMemoryError which can occur when my Java implementation is used with</em>
-<a class="jxr_linenumber" name="L51" href="#L51">51</a>  <em class="jxr_javadoccomment">     * very large strings.&lt;br&gt;</em>
-<a class="jxr_linenumber" name="L52" href="#L52">52</a>  <em class="jxr_javadoccomment">     * This implementation of the Levenshtein distance algorithm is from &lt;a</em>
-<a class="jxr_linenumber" name="L53" href="#L53">53</a>  <em class="jxr_javadoccomment">     * href="<a href="http://www.merriampark.com/ldjava.htm" target="alexandria_uri">http://www.merriampark.com/ldjava.htm</a>"&gt;http://www.merriampark.com/</em>
-<a class="jxr_linenumber" name="L54" href="#L54">54</a>  <em class="jxr_javadoccomment">     * ldjava.htm&lt;/a&gt;</em>
-<a class="jxr_linenumber" name="L55" href="#L55">55</a>  <em class="jxr_javadoccomment">     * &lt;/p&gt;</em>
-<a class="jxr_linenumber" name="L56" href="#L56">56</a>  <em class="jxr_javadoccomment">     *</em>
-<a class="jxr_linenumber" name="L57" href="#L57">57</a>  <em class="jxr_javadoccomment">     * &lt;pre&gt;</em>
-<a class="jxr_linenumber" name="L58" href="#L58">58</a>  <em class="jxr_javadoccomment">     * distance.compare(null, *)             = IllegalArgumentException</em>
-<a class="jxr_linenumber" name="L59" href="#L59">59</a>  <em class="jxr_javadoccomment">     * distance.compare(*, null)             = IllegalArgumentException</em>
-<a class="jxr_linenumber" name="L60" href="#L60">60</a>  <em class="jxr_javadoccomment">     * distance.compare("","")               = 0</em>
-<a class="jxr_linenumber" name="L61" href="#L61">61</a>  <em class="jxr_javadoccomment">     * distance.compare("","a")              = 1</em>
-<a class="jxr_linenumber" name="L62" href="#L62">62</a>  <em class="jxr_javadoccomment">     * distance.compare("aaapppp", "")       = 7</em>
-<a class="jxr_linenumber" name="L63" href="#L63">63</a>  <em class="jxr_javadoccomment">     * distance.compare("frog", "fog")       = 1</em>
-<a class="jxr_linenumber" name="L64" href="#L64">64</a>  <em class="jxr_javadoccomment">     * distance.compare("fly", "ant")        = 3</em>
-<a class="jxr_linenumber" name="L65" href="#L65">65</a>  <em class="jxr_javadoccomment">     * distance.compare("elephant", "hippo") = 7</em>
-<a class="jxr_linenumber" name="L66" href="#L66">66</a>  <em class="jxr_javadoccomment">     * distance.compare("hippo", "elephant") = 7</em>
-<a class="jxr_linenumber" name="L67" href="#L67">67</a>  <em class="jxr_javadoccomment">     * distance.compare("hippo", "zzzzzzzz") = 8</em>
-<a class="jxr_linenumber" name="L68" href="#L68">68</a>  <em class="jxr_javadoccomment">     * distance.compare("hello", "hallo")    = 1</em>
-<a class="jxr_linenumber" name="L69" href="#L69">69</a>  <em class="jxr_javadoccomment">     * &lt;/pre&gt;</em>
-<a class="jxr_linenumber" name="L70" href="#L70">70</a>  <em class="jxr_javadoccomment">     *</em>
-<a class="jxr_linenumber" name="L71" href="#L71">71</a>  <em class="jxr_javadoccomment">     * @param left the first string, must not be null</em>
-<a class="jxr_linenumber" name="L72" href="#L72">72</a>  <em class="jxr_javadoccomment">     * @param right the second string, must not be null</em>
-<a class="jxr_linenumber" name="L73" href="#L73">73</a>  <em class="jxr_javadoccomment">     * @return result distance</em>
-<a class="jxr_linenumber" name="L74" href="#L74">74</a>  <em class="jxr_javadoccomment">     * @throws IllegalArgumentException if either String input {@code null}</em>
-<a class="jxr_linenumber" name="L75" href="#L75">75</a>  <em class="jxr_javadoccomment">     */</em>
-<a class="jxr_linenumber" name="L76" href="#L76">76</a>      @Override
-<a class="jxr_linenumber" name="L77" href="#L77">77</a>      <strong class="jxr_keyword">public</strong> Integer compare(CharSequence left, CharSequence right) {
-<a class="jxr_linenumber" name="L78" href="#L78">78</a>          <strong class="jxr_keyword">return</strong> compare(left, right, Integer.MAX_VALUE);
-<a class="jxr_linenumber" name="L79" href="#L79">79</a>      }
-<a class="jxr_linenumber" name="L80" href="#L80">80</a>  
-<a class="jxr_linenumber" name="L81" href="#L81">81</a>      <em class="jxr_javadoccomment">/**</em>
-<a class="jxr_linenumber" name="L82" href="#L82">82</a>  <em class="jxr_javadoccomment">     * Find the Levenshtein distance between two CharSequences if it's less than or</em>
-<a class="jxr_linenumber" name="L83" href="#L83">83</a>  <em class="jxr_javadoccomment">     * equal to a given threshold.</em>
+<a class="jxr_linenumber" name="L33" href="#L33">33</a>  <em class="jxr_javadoccomment"> *</em>
+<a class="jxr_linenumber" name="L34" href="#L34">34</a>  <em class="jxr_javadoccomment"> * @since 1.0</em>
+<a class="jxr_linenumber" name="L35" href="#L35">35</a>  <em class="jxr_javadoccomment"> */</em>
+<a class="jxr_linenumber" name="L36" href="#L36">36</a>  <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">class</strong> <a href="../../../../../org/apache/commons/text/similarity/LevenshteinDistance.html">LevenshteinDistance</a> <strong class="jxr_keyword">implements</strong> EditDistance&lt;Integer&gt; {
+<a class="jxr_linenumber" name="L37" href="#L37">37</a>  
+<a class="jxr_linenumber" name="L38" href="#L38">38</a>      <em class="jxr_javadoccomment">/**</em>
+<a class="jxr_linenumber" name="L39" href="#L39">39</a>  <em class="jxr_javadoccomment">     * Default instance.</em>
+<a class="jxr_linenumber" name="L40" href="#L40">40</a>  <em class="jxr_javadoccomment">     */</em>
+<a class="jxr_linenumber" name="L41" href="#L41">41</a>      <strong class="jxr_keyword">private</strong> <strong class="jxr_keyword">static</strong> <strong class="jxr_keyword">final</strong> <a href="../../../../../org/apache/commons/text/similarity/LevenshteinDistance.html">LevenshteinDistance</a> DEFAULT_INSTANCE = <strong class="jxr_keyword">new</strong> <a href="../../../../../org/apache/commons/text/similarity/LevenshteinDistance.html">LevenshteinDistance</a>();
+<a class="jxr_linenumber" name="L42" href="#L42">42</a>  
+<a class="jxr_linenumber" name="L43" href="#L43">43</a>      <em class="jxr_javadoccomment">/**</em>
+<a class="jxr_linenumber" name="L44" href="#L44">44</a>  <em class="jxr_javadoccomment">     * Threshold.</em>
+<a class="jxr_linenumber" name="L45" href="#L45">45</a>  <em class="jxr_javadoccomment">     */</em>
+<a class="jxr_linenumber" name="L46" href="#L46">46</a>      <strong class="jxr_keyword">private</strong> <strong class="jxr_keyword">final</strong> Integer threshold;
+<a class="jxr_linenumber" name="L47" href="#L47">47</a>  
+<a class="jxr_linenumber" name="L48" href="#L48">48</a>      <em class="jxr_javadoccomment">/**</em>
+<a class="jxr_linenumber" name="L49" href="#L49">49</a>  <em class="jxr_javadoccomment">     * &lt;p&gt;</em>
+<a class="jxr_linenumber" name="L50" href="#L50">50</a>  <em class="jxr_javadoccomment">     * This returns the default instance that uses a version</em>
+<a class="jxr_linenumber" name="L51" href="#L51">51</a>  <em class="jxr_javadoccomment">     * of the algorithm that does not use a threshold parameter.</em>
+<a class="jxr_linenumber" name="L52" href="#L52">52</a>  <em class="jxr_javadoccomment">     * &lt;/p&gt;</em>
+<a class="jxr_linenumber" name="L53" href="#L53">53</a>  <em class="jxr_javadoccomment">     *</em>
+<a class="jxr_linenumber" name="L54" href="#L54">54</a>  <em class="jxr_javadoccomment">     * @see LevenshteinDistance#getDefaultInstance()</em>
+<a class="jxr_linenumber" name="L55" href="#L55">55</a>  <em class="jxr_javadoccomment">     */</em>
+<a class="jxr_linenumber" name="L56" href="#L56">56</a>      <strong class="jxr_keyword">public</strong> <a href="../../../../../org/apache/commons/text/similarity/LevenshteinDistance.html">LevenshteinDistance</a>() {
+<a class="jxr_linenumber" name="L57" href="#L57">57</a>          <strong class="jxr_keyword">this</strong>(<strong class="jxr_keyword">null</strong>);
+<a class="jxr_linenumber" name="L58" href="#L58">58</a>      }
+<a class="jxr_linenumber" name="L59" href="#L59">59</a>  
+<a class="jxr_linenumber" name="L60" href="#L60">60</a>      <em class="jxr_javadoccomment">/**</em>
+<a class="jxr_linenumber" name="L61" href="#L61">61</a>  <em class="jxr_javadoccomment">     * &lt;p&gt;</em>
+<a class="jxr_linenumber" name="L62" href="#L62">62</a>  <em class="jxr_javadoccomment">     * If the threshold is not null, distance calculations will be limited to a maximum length.</em>
+<a class="jxr_linenumber" name="L63" href="#L63">63</a>  <em class="jxr_javadoccomment">     * If the threshold is null, the unlimited version of the algorithm will be used.</em>
+<a class="jxr_linenumber" name="L64" href="#L64">64</a>  <em class="jxr_javadoccomment">     * &lt;/p&gt;</em>
+<a class="jxr_linenumber" name="L65" href="#L65">65</a>  <em class="jxr_javadoccomment">     *</em>
+<a class="jxr_linenumber" name="L66" href="#L66">66</a>  <em class="jxr_javadoccomment">     * @param threshold</em>
+<a class="jxr_linenumber" name="L67" href="#L67">67</a>  <em class="jxr_javadoccomment">     *        If this is null then distances calculations will not be limited.</em>
+<a class="jxr_linenumber" name="L68" href="#L68">68</a>  <em class="jxr_javadoccomment">     *        This may not be negative.</em>
+<a class="jxr_linenumber" name="L69" href="#L69">69</a>  <em class="jxr_javadoccomment">     */</em>
+<a class="jxr_linenumber" name="L70" href="#L70">70</a>      <strong class="jxr_keyword">public</strong> <a href="../../../../../org/apache/commons/text/similarity/LevenshteinDistance.html">LevenshteinDistance</a>(<strong class="jxr_keyword">final</strong> Integer threshold) {
+<a class="jxr_linenumber" name="L71" href="#L71">71</a>          <strong class="jxr_keyword">if</strong> (threshold != <strong class="jxr_keyword">null</strong> &amp;&amp; threshold &lt; 0) {
+<a class="jxr_linenumber" name="L72" href="#L72">72</a>              <strong class="jxr_keyword">throw</strong> <strong class="jxr_keyword">new</strong> IllegalArgumentException(<span class="jxr_string">"Threshold must not be negative"</span>);
+<a class="jxr_linenumber" name="L73" href="#L73">73</a>          }
+<a class="jxr_linenumber" name="L74" href="#L74">74</a>          <strong class="jxr_keyword">this</strong>.threshold = threshold;
+<a class="jxr_linenumber" name="L75" href="#L75">75</a>      }
+<a class="jxr_linenumber" name="L76" href="#L76">76</a>  
+<a class="jxr_linenumber" name="L77" href="#L77">77</a>      <em class="jxr_javadoccomment">/**</em>
+<a class="jxr_linenumber" name="L78" href="#L78">78</a>  <em class="jxr_javadoccomment">     * &lt;p&gt;Find the Levenshtein distance between two Strings.&lt;/p&gt;</em>
+<a class="jxr_linenumber" name="L79" href="#L79">79</a>  <em class="jxr_javadoccomment">     *</em>
+<a class="jxr_linenumber" name="L80" href="#L80">80</a>  <em class="jxr_javadoccomment">     * &lt;p&gt;A higher score indicates a greater distance.&lt;/p&gt;</em>
+<a class="jxr_linenumber" name="L81" href="#L81">81</a>  <em class="jxr_javadoccomment">     *</em>
+<a class="jxr_linenumber" name="L82" href="#L82">82</a>  <em class="jxr_javadoccomment">     * &lt;p&gt;The previous implementation of the Levenshtein distance algorithm</em>
+<a class="jxr_linenumber" name="L83" href="#L83">83</a>  <em class="jxr_javadoccomment">     * was from &lt;a href="<a href="http://www.merriampark.com/ld.htm" target="alexandria_uri">http://www.merriampark.com/ld.htm</a>"&gt;<a href="http://www.merriampark.com/ld.htm" target="alexandria_uri">http://www.merriampark.com/ld.htm</a>&lt;/a&gt;&lt;/p&gt;</em>
 <a class="jxr_linenumber" name="L84" href="#L84">84</a>  <em class="jxr_javadoccomment">     *</em>
-<a class="jxr_linenumber" name="L85" href="#L85">85</a>  <em class="jxr_javadoccomment">     * &lt;p&gt;</em>
-<a class="jxr_linenumber" name="L86" href="#L86">86</a>  <em class="jxr_javadoccomment">     * This implementation follows from Algorithms on Strings, Trees and</em>
-<a class="jxr_linenumber" name="L87" href="#L87">87</a>  <em class="jxr_javadoccomment">     * Sequences by Dan Gusfield and Chas Emerick's implementation of the</em>
-<a class="jxr_linenumber" name="L88" href="#L88">88</a>  <em class="jxr_javadoccomment">     * Levenshtein distance algorithm from &lt;a</em>
-<a class="jxr_linenumber" name="L89" href="#L89">89</a>  <em class="jxr_javadoccomment">     * href="<a href="http://www.merriampark.com/ld.htm" target="alexandria_uri">http://www.merriampark.com/ld.htm</a>"</em>
-<a class="jxr_linenumber" name="L90" href="#L90">90</a>  <em class="jxr_javadoccomment">     * &gt;<a href="http://www.merriampark.com/ld.htm&lt;/a&gt" target="alexandria_uri">http://www.merriampark.com/ld.htm&lt;/a&gt</a>;</em>
-<a class="jxr_linenumber" name="L91" href="#L91">91</a>  <em class="jxr_javadoccomment">     * &lt;/p&gt;</em>
-<a class="jxr_linenumber" name="L92" href="#L92">92</a>  <em class="jxr_javadoccomment">     *</em>
-<a class="jxr_linenumber" name="L93" href="#L93">93</a>  <em class="jxr_javadoccomment">     * &lt;pre&gt;</em>
-<a class="jxr_linenumber" name="L94" href="#L94">94</a>  <em class="jxr_javadoccomment">     * distance.compare(null, *, *)             = IllegalArgumentException</em>
-<a class="jxr_linenumber" name="L95" href="#L95">95</a>  <em class="jxr_javadoccomment">     * distance.compare(*, null, *)             = IllegalArgumentException</em>
-<a class="jxr_linenumber" name="L96" href="#L96">96</a>  <em class="jxr_javadoccomment">     * distance.compare(*, *, -1)               = IllegalArgumentException</em>
-<a class="jxr_linenumber" name="L97" href="#L97">97</a>  <em class="jxr_javadoccomment">     * distance.compare("","", 0)               = 0</em>
-<a class="jxr_linenumber" name="L98" href="#L98">98</a>  <em class="jxr_javadoccomment">     * distance.compare("aaapppp", "", 8)       = 7</em>
-<a class="jxr_linenumber" name="L99" href="#L99">99</a>  <em class="jxr_javadoccomment">     * distance.compare("aaapppp", "", 7)       = 7</em>
-<a class="jxr_linenumber" name="L100" href="#L100">100</a> <em class="jxr_javadoccomment">     * distance.compare("aaapppp", "", 6))      = -1</em>
-<a class="jxr_linenumber" name="L101" href="#L101">101</a> <em class="jxr_javadoccomment">     * distance.compare("elephant", "hippo", 7) = 7</em>
-<a class="jxr_linenumber" name="L102" href="#L102">102</a> <em class="jxr_javadoccomment">     * distance.compare("elephant", "hippo", 6) = -1</em>
-<a class="jxr_linenumber" name="L103" href="#L103">103</a> <em class="jxr_javadoccomment">     * distance.compare("hippo", "elephant", 7) = 7</em>
-<a class="jxr_linenumber" name="L104" href="#L104">104</a> <em class="jxr_javadoccomment">     * distance.compare("hippo", "elephant", 6) = -1</em>
-<a class="jxr_linenumber" name="L105" href="#L105">105</a> <em class="jxr_javadoccomment">     * &lt;/pre&gt;</em>
-<a class="jxr_linenumber" name="L106" href="#L106">106</a> <em class="jxr_javadoccomment">     *</em>
-<a class="jxr_linenumber" name="L107" href="#L107">107</a> <em class="jxr_javadoccomment">     * @param left the first string, must not be null</em>
-<a class="jxr_linenumber" name="L108" href="#L108">108</a> <em class="jxr_javadoccomment">     * @param right the second string, must not be null</em>
-<a class="jxr_linenumber" name="L109" href="#L109">109</a> <em class="jxr_javadoccomment">     * @param threshold the target threshold, must not be negative</em>
-<a class="jxr_linenumber" name="L110" href="#L110">110</a> <em class="jxr_javadoccomment">     * @return result distance</em>
-<a class="jxr_linenumber" name="L111" href="#L111">111</a> <em class="jxr_javadoccomment">     * @throws IllegalArgumentException if either String input {@code null} or</em>
-<a class="jxr_linenumber" name="L112" href="#L112">112</a> <em class="jxr_javadoccomment">     *             negative threshold</em>
-<a class="jxr_linenumber" name="L113" href="#L113">113</a> <em class="jxr_javadoccomment">     */</em>
-<a class="jxr_linenumber" name="L114" href="#L114">114</a>     <strong class="jxr_keyword">public</strong> Integer compare(CharSequence left, CharSequence right, <strong class="jxr_keyword">int</strong> threshold) {
-<a class="jxr_linenumber" name="L115" href="#L115">115</a>         <strong class="jxr_keyword">if</strong> (left == <strong class="jxr_keyword">null</strong> || right == <strong class="jxr_keyword">null</strong>) {
-<a class="jxr_linenumber" name="L116" href="#L116">116</a>             <strong class="jxr_keyword">throw</strong> <strong class="jxr_keyword">new</strong> IllegalArgumentException(<span class="jxr_string">"Strings must not be null"</span>);
-<a class="jxr_linenumber" name="L117" href="#L117">117</a>         }
-<a class="jxr_linenumber" name="L118" href="#L118">118</a>         <strong class="jxr_keyword">if</strong> (threshold &lt; 0) {
-<a class="jxr_linenumber" name="L119" href="#L119">119</a>             <strong class="jxr_keyword">throw</strong> <strong class="jxr_keyword">new</strong> IllegalArgumentException(<span class="jxr_string">"Threshold must not be negative"</span>);
-<a class="jxr_linenumber" name="L120" href="#L120">120</a>         }
-<a class="jxr_linenumber" name="L121" href="#L121">121</a> 
-<a class="jxr_linenumber" name="L122" href="#L122">122</a>         <em class="jxr_comment">/*</em>
-<a class="jxr_linenumber" name="L123" href="#L123">123</a> <em class="jxr_comment">         * This implementation only computes the distance if it's less than or</em>
-<a class="jxr_linenumber" name="L124" href="#L124">124</a> <em class="jxr_comment">         * equal to the threshold value, returning -1 if it's greater. The</em>
-<a class="jxr_linenumber" name="L125" href="#L125">125</a> <em class="jxr_comment">         * advantage is performance: unbounded distance is O(nm), but a bound of</em>
-<a class="jxr_linenumber" name="L126" href="#L126">126</a> <em class="jxr_comment">         * k allows us to reduce it to O(km) time by only computing a diagonal</em>
-<a class="jxr_linenumber" name="L127" href="#L127">127</a> <em class="jxr_comment">         * stripe of width 2k + 1 of the cost table. It is also possible to use</em>
-<a class="jxr_linenumber" name="L128" href="#L128">128</a> <em class="jxr_comment">         * this to compute the unbounded Levenshtein distance by starting the</em>
-<a class="jxr_linenumber" name="L129" href="#L129">129</a> <em class="jxr_comment">         * threshold at 1 and doubling each time until the distance is found;</em>
-<a class="jxr_linenumber" name="L130" href="#L130">130</a> <em class="jxr_comment">         * this is O(dm), where d is the distance.</em>
-<a class="jxr_linenumber" name="L131" href="#L131">131</a> <em class="jxr_comment">         *</em>
-<a class="jxr_linenumber" name="L132" href="#L132">132</a> <em class="jxr_comment">         * One subtlety comes from needing to ignore entries on the border of</em>
-<a class="jxr_linenumber" name="L133" href="#L133">133</a> <em class="jxr_comment">         * our stripe eg. p[] = |#|#|#|* d[] = *|#|#|#| We must ignore the entry</em>
-<a class="jxr_linenumber" name="L134" href="#L134">134</a> <em class="jxr_comment">         * to the left of the leftmost member We must ignore the entry above the</em>
-<a class="jxr_linenumber" name="L135" href="#L135">135</a> <em class="jxr_comment">         * rightmost member</em>
-<a class="jxr_linenumber" name="L136" href="#L136">136</a> <em class="jxr_comment">         *</em>
-<a class="jxr_linenumber" name="L137" href="#L137">137</a> <em class="jxr_comment">         * Another subtlety comes from our stripe running off the matrix if the</em>
-<a class="jxr_linenumber" name="L138" href="#L138">138</a> <em class="jxr_comment">         * strings aren't of the same size. Since string s is always swapped to</em>
-<a class="jxr_linenumber" name="L139" href="#L139">139</a> <em class="jxr_comment">         * be the shorter of the two, the stripe will always run off to the</em>
-<a class="jxr_linenumber" name="L140" href="#L140">140</a> <em class="jxr_comment">         * upper right instead of the lower left of the matrix.</em>
-<a class="jxr_linenumber" name="L141" href="#L141">141</a> <em class="jxr_comment">         *</em>
-<a class="jxr_linenumber" name="L142" href="#L142">142</a> <em class="jxr_comment">         * As a concrete example, suppose s is of length 5, t is of length 7,</em>
-<a class="jxr_linenumber" name="L143" href="#L143">143</a> <em class="jxr_comment">         * and our threshold is 1. In this case we're going to walk a stripe of</em>
-<a class="jxr_linenumber" name="L144" href="#L144">144</a> <em class="jxr_comment">         * length 3. The matrix would look like so:</em>
-<a class="jxr_linenumber" name="L145" href="#L145">145</a> <em class="jxr_comment">         *</em>
-<a class="jxr_linenumber" name="L146" href="#L146">146</a> <em class="jxr_comment">         * 1 2 3 4 5 1 |#|#| | | | 2 |#|#|#| | | 3 | |#|#|#| | 4 | | |#|#|#| 5 |</em>
-<a class="jxr_linenumber" name="L147" href="#L147">147</a> <em class="jxr_comment">         * | | |#|#| 6 | | | | |#| 7 | | | | | |</em>
-<a class="jxr_linenumber" name="L148" href="#L148">148</a> <em class="jxr_comment">         *</em>
-<a class="jxr_linenumber" name="L149" href="#L149">149</a> <em class="jxr_comment">         * Note how the stripe leads off the table as there is no possible way</em>
-<a class="jxr_linenumber" name="L150" href="#L150">150</a> <em class="jxr_comment">         * to turn a string of length 5 into one of length 7 in edit distance of</em>
-<a class="jxr_linenumber" name="L151" href="#L151">151</a> <em class="jxr_comment">         * 1.</em>
-<a class="jxr_linenumber" name="L152" href="#L152">152</a> <em class="jxr_comment">         *</em>
-<a class="jxr_linenumber" name="L153" href="#L153">153</a> <em class="jxr_comment">         * Additionally, this implementation decreases memory usage by using two</em>
-<a class="jxr_linenumber" name="L154" href="#L154">154</a> <em class="jxr_comment">         * single-dimensional arrays and swapping them back and forth instead of</em>
-<a class="jxr_linenumber" name="L155" href="#L155">155</a> <em class="jxr_comment">         * allocating an entire n by m matrix. This requires a few minor</em>
-<a class="jxr_linenumber" name="L156" href="#L156">156</a> <em class="jxr_comment">         * changes, such as immediately returning when it's detected that the</em>
-<a class="jxr_linenumber" name="L157" href="#L157">157</a> <em class="jxr_comment">         * stripe has run off the matrix and initially filling the arrays with</em>
-<a class="jxr_linenumber" name="L158" href="#L158">158</a> <em class="jxr_comment">         * large values so that entries we don't compute are ignored.</em>
-<a class="jxr_linenumber" name="L159" href="#L159">159</a> <em class="jxr_comment">         *</em>
-<a class="jxr_linenumber" name="L160" href="#L160">160</a> <em class="jxr_comment">         * See Algorithms on Strings, Trees and Sequences by Dan Gusfield for</em>
-<a class="jxr_linenumber" name="L161" href="#L161">161</a> <em class="jxr_comment">         * some discussion.</em>
-<a class="jxr_linenumber" name="L162" href="#L162">162</a> <em class="jxr_comment">         */</em>
-<a class="jxr_linenumber" name="L163" href="#L163">163</a> 
-<a class="jxr_linenumber" name="L164" href="#L164">164</a>         <strong class="jxr_keyword">int</strong> n = left.length(); <em class="jxr_comment">// length of s</em>
-<a class="jxr_linenumber" name="L165" href="#L165">165</a>         <strong class="jxr_keyword">int</strong> m = right.length(); <em class="jxr_comment">// length of t</em>
-<a class="jxr_linenumber" name="L166" href="#L166">166</a> 
-<a class="jxr_linenumber" name="L167" href="#L167">167</a>         <em class="jxr_comment">// if one string is empty, the edit distance is necessarily the length</em>
-<a class="jxr_linenumber" name="L168" href="#L168">168</a>         <em class="jxr_comment">// of the other</em>
-<a class="jxr_linenumber" name="L169" href="#L169">169</a>         <strong class="jxr_keyword">if</strong> (n == 0) {
-<a class="jxr_linenumber" name="L170" href="#L170">170</a>             <strong class="jxr_keyword">return</strong> m &lt;= threshold ? m : -1;
-<a class="jxr_linenumber" name="L171" href="#L171">171</a>         } <strong class="jxr_keyword">else</strong> <strong class="jxr_keyword">if</strong> (m == 0) {
-<a class="jxr_linenumber" name="L172" href="#L172">172</a>             <strong class="jxr_keyword">return</strong> n &lt;= threshold ? n : -1;
-<a class="jxr_linenumber" name="L173" href="#L173">173</a>         }
-<a class="jxr_linenumber" name="L174" href="#L174">174</a> 
-<a class="jxr_linenumber" name="L175" href="#L175">175</a>         <strong class="jxr_keyword">if</strong> (n &gt; m) {
-<a class="jxr_linenumber" name="L176" href="#L176">176</a>             <em class="jxr_comment">// swap the two strings to consume less memory</em>
-<a class="jxr_linenumber" name="L177" href="#L177">177</a>             <strong class="jxr_keyword">final</strong> CharSequence tmp = left;
-<a class="jxr_linenumber" name="L178" href="#L178">178</a>             left = right;
-<a class="jxr_linenumber" name="L179" href="#L179">179</a>             right = tmp;
-<a class="jxr_linenumber" name="L180" href="#L180">180</a>             n = m;
-<a class="jxr_linenumber" name="L181" href="#L181">181</a>             m = right.length();
-<a class="jxr_linenumber" name="L182" href="#L182">182</a>         }
-<a class="jxr_linenumber" name="L183" href="#L183">183</a> 
-<a class="jxr_linenumber" name="L184" href="#L184">184</a>         <strong class="jxr_keyword">int</strong>[] p = <strong class="jxr_keyword">new</strong> <strong class="jxr_keyword">int</strong>[n + 1]; <em class="jxr_comment">// 'previous' cost array, horizontally</em>
-<a class="jxr_linenumber" name="L185" href="#L185">185</a>         <strong class="jxr_keyword">int</strong>[] d = <strong class="jxr_keyword">new</strong> <strong class="jxr_keyword">int</strong>[n + 1]; <em class="jxr_comment">// cost array, horizontally</em>
-<a class="jxr_linenumber" name="L186" href="#L186">186</a>         <strong class="jxr_keyword">int</strong>[] _d; <em class="jxr_comment">// placeholder to assist in swapping p and d</em>
-<a class="jxr_linenumber" name="L187" href="#L187">187</a> 
-<a class="jxr_linenumber" name="L188" href="#L188">188</a>         <em class="jxr_comment">// fill in starting table values</em>
-<a class="jxr_linenumber" name="L189" href="#L189">189</a>         <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">int</strong> boundary = Math.min(n, threshold) + 1;
-<a class="jxr_linenumber" name="L190" href="#L190">190</a>         <strong class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong> i = 0; i &lt; boundary; i++) {
-<a class="jxr_linenumber" name="L191" href="#L191">191</a>             p[i] = i;
-<a class="jxr_linenumber" name="L192" href="#L192">192</a>         }
-<a class="jxr_linenumber" name="L193" href="#L193">193</a>         <em class="jxr_comment">// these fills ensure that the value above the rightmost entry of our</em>
-<a class="jxr_linenumber" name="L194" href="#L194">194</a>         <em class="jxr_comment">// stripe will be ignored in following loop iterations</em>
-<a class="jxr_linenumber" name="L195" href="#L195">195</a>         Arrays.fill(p, boundary, p.length, Integer.MAX_VALUE);
-<a class="jxr_linenumber" name="L196" href="#L196">196</a>         Arrays.fill(d, Integer.MAX_VALUE);
-<a class="jxr_linenumber" name="L197" href="#L197">197</a> 
-<a class="jxr_linenumber" name="L198" href="#L198">198</a>         <em class="jxr_comment">// iterates through t</em>
-<a class="jxr_linenumber" name="L199" href="#L199">199</a>         <strong class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong> j = 1; j &lt;= m; j++) {
-<a class="jxr_linenumber" name="L200" href="#L200">200</a>             <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">char</strong> t_j = right.charAt(j - 1); <em class="jxr_comment">// jth character of t</em>
-<a class="jxr_linenumber" name="L201" href="#L201">201</a>             d[0] = j;
-<a class="jxr_linenumber" name="L202" href="#L202">202</a> 
-<a class="jxr_linenumber" name="L203" href="#L203">203</a>             <em class="jxr_comment">// compute stripe indices, constrain to array size</em>
-<a class="jxr_linenumber" name="L204" href="#L204">204</a>             <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">int</strong> min = Math.max(1, j - threshold);
-<a class="jxr_linenumber" name="L205" href="#L205">205</a>             <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">int</strong> max = j &gt; Integer.MAX_VALUE - threshold ? n : Math.min(
-<a class="jxr_linenumber" name="L206" href="#L206">206</a>                     n, j + threshold);
-<a class="jxr_linenumber" name="L207" href="#L207">207</a> 
-<a class="jxr_linenumber" name="L208" href="#L208">208</a>             <em class="jxr_comment">// the stripe may lead off of the table if s and t are of different</em>
-<a class="jxr_linenumber" name="L209" href="#L209">209</a>             <em class="jxr_comment">// sizes</em>
-<a class="jxr_linenumber" name="L210" href="#L210">210</a>             <strong class="jxr_keyword">if</strong> (min &gt; max) {
-<a class="jxr_linenumber" name="L211" href="#L211">211</a>                 <strong class="jxr_keyword">return</strong> -1;
-<a class="jxr_linenumber" name="L212" href="#L212">212</a>             }
-<a class="jxr_linenumber" name="L213" href="#L213">213</a> 
-<a class="jxr_linenumber" name="L214" href="#L214">214</a>             <em class="jxr_comment">// ignore entry left of leftmost</em>
-<a class="jxr_linenumber" name="L215" href="#L215">215</a>             <strong class="jxr_keyword">if</strong> (min &gt; 1) {
-<a class="jxr_linenumber" name="L216" href="#L216">216</a>                 d[min - 1] = Integer.MAX_VALUE;
-<a class="jxr_linenumber" name="L217" href="#L217">217</a>             }
-<a class="jxr_linenumber" name="L218" href="#L218">218</a> 
-<a class="jxr_linenumber" name="L219" href="#L219">219</a>             <em class="jxr_comment">// iterates through [min, max] in s</em>
-<a class="jxr_linenumber" name="L220" href="#L220">220</a>             <strong class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong> i = min; i &lt;= max; i++) {
-<a class="jxr_linenumber" name="L221" href="#L221">221</a>                 <strong class="jxr_keyword">if</strong> (left.charAt(i - 1) == t_j) {
-<a class="jxr_linenumber" name="L222" href="#L222">222</a>                     <em class="jxr_comment">// diagonally left and up</em>
-<a class="jxr_linenumber" name="L223" href="#L223">223</a>                     d[i] = p[i - 1];
-<a class="jxr_linenumber" name="L224" href="#L224">224</a>                 } <strong class="jxr_keyword">else</strong> {
-<a class="jxr_linenumber" name="L225" href="#L225">225</a>                     <em class="jxr_comment">// 1 + minimum of cell to the left, to the top, diagonally</em>
-<a class="jxr_linenumber" name="L226" href="#L226">226</a>                     <em class="jxr_comment">// left and up</em>
-<a class="jxr_linenumber" name="L227" href="#L227">227</a>                     d[i] = 1 + Math.min(Math.min(d[i - 1], p[i]), p[i - 1]);
-<a class="jxr_linenumber" name="L228" href="#L228">228</a>                 }
-<a class="jxr_linenumber" name="L229" href="#L229">229</a>             }
-<a class="jxr_linenumber" name="L230" href="#L230">230</a> 
-<a class="jxr_linenumber" name="L231" href="#L231">231</a>             <em class="jxr_comment">// copy current distance counts to 'previous row' distance counts</em>
-<a class="jxr_linenumber" name="L232" href="#L232">232</a>             _d = p;
-<a class="jxr_linenumber" name="L233" href="#L233">233</a>             p = d;
-<a class="jxr_linenumber" name="L234" href="#L234">234</a>             d = _d;
-<a class="jxr_linenumber" name="L235" href="#L235">235</a>         }
-<a class="jxr_linenumber" name="L236" href="#L236">236</a> 
-<a class="jxr_linenumber" name="L237" href="#L237">237</a>         <em class="jxr_comment">// if p[n] is greater than the threshold, there's no guarantee on it</em>
-<a class="jxr_linenumber" name="L238" href="#L238">238</a>         <em class="jxr_comment">// being the correct</em>
-<a class="jxr_linenumber" name="L239" href="#L239">239</a>         <em class="jxr_comment">// distance</em>
-<a class="jxr_linenumber" name="L240" href="#L240">240</a>         <strong class="jxr_keyword">if</strong> (p[n] &lt;= threshold) {
-<a class="jxr_linenumber" name="L241" href="#L241">241</a>             <strong class="jxr_keyword">return</strong> p[n];
+<a class="jxr_linenumber" name="L85" href="#L85">85</a>  <em class="jxr_javadoccomment">     * &lt;p&gt;Chas Emerick has written an implementation in Java, which avoids an OutOfMemoryError</em>
+<a class="jxr_linenumber" name="L86" href="#L86">86</a>  <em class="jxr_javadoccomment">     * which can occur when my Java implementation is used with very large strings.&lt;br&gt;</em>
+<a class="jxr_linenumber" name="L87" href="#L87">87</a>  <em class="jxr_javadoccomment">     * This implementation of the Levenshtein distance algorithm</em>
+<a class="jxr_linenumber" name="L88" href="#L88">88</a>  <em class="jxr_javadoccomment">     * is from &lt;a href="<a href="http://www.merriampark.com/ldjava.htm" target="alexandria_uri">http://www.merriampark.com/ldjava.htm</a>"&gt;<a href="http://www.merriampark.com/ldjava.htm" target="alexandria_uri">http://www.merriampark.com/ldjava.htm</a>&lt;/a&gt;&lt;/p&gt;</em>
+<a class="jxr_linenumber" name="L89" href="#L89">89</a>  <em class="jxr_javadoccomment">     *</em>
+<a class="jxr_linenumber" name="L90" href="#L90">90</a>  <em class="jxr_javadoccomment">     * &lt;pre&gt;</em>
+<a class="jxr_linenumber" name="L91" href="#L91">91</a>  <em class="jxr_javadoccomment">     * distance.apply(null, *)             = IllegalArgumentException</em>
+<a class="jxr_linenumber" name="L92" href="#L92">92</a>  <em class="jxr_javadoccomment">     * distance.apply(*, null)             = IllegalArgumentException</em>
+<a class="jxr_linenumber" name="L93" href="#L93">93</a>  <em class="jxr_javadoccomment">     * distance.apply("","")               = 0</em>
+<a class="jxr_linenumber" name="L94" href="#L94">94</a>  <em class="jxr_javadoccomment">     * distance.apply("","a")              = 1</em>
+<a class="jxr_linenumber" name="L95" href="#L95">95</a>  <em class="jxr_javadoccomment">     * distance.apply("aaapppp", "")       = 7</em>
+<a class="jxr_linenumber" name="L96" href="#L96">96</a>  <em class="jxr_javadoccomment">     * distance.apply("frog", "fog")       = 1</em>
+<a class="jxr_linenumber" name="L97" href="#L97">97</a>  <em class="jxr_javadoccomment">     * distance.apply("fly", "ant")        = 3</em>
+<a class="jxr_linenumber" name="L98" href="#L98">98</a>  <em class="jxr_javadoccomment">     * distance.apply("elephant", "hippo") = 7</em>
+<a class="jxr_linenumber" name="L99" href="#L99">99</a>  <em class="jxr_javadoccomment">     * distance.apply("hippo", "elephant") = 7</em>
+<a class="jxr_linenumber" name="L100" href="#L100">100</a> <em class="jxr_javadoccomment">     * distance.apply("hippo", "zzzzzzzz") = 8</em>
+<a class="jxr_linenumber" name="L101" href="#L101">101</a> <em class="jxr_javadoccomment">     * distance.apply("hello", "hallo")    = 1</em>
+<a class="jxr_linenumber" name="L102" href="#L102">102</a> <em class="jxr_javadoccomment">     * &lt;/pre&gt;</em>
+<a class="jxr_linenumber" name="L103" href="#L103">103</a> <em class="jxr_javadoccomment">     *</em>
+<a class="jxr_linenumber" name="L104" href="#L104">104</a> <em class="jxr_javadoccomment">     * @param left the first string, must not be null</em>
+<a class="jxr_linenumber" name="L105" href="#L105">105</a> <em class="jxr_javadoccomment">     * @param right the second string, must not be null</em>
+<a class="jxr_linenumber" name="L106" href="#L106">106</a> <em class="jxr_javadoccomment">     * @return result distance, or -1</em>
+<a class="jxr_linenumber" name="L107" href="#L107">107</a> <em class="jxr_javadoccomment">     * @throws IllegalArgumentException if either String input {@code null}</em>
+<a class="jxr_linenumber" name="L108" href="#L108">108</a> <em class="jxr_javadoccomment">     */</em>
+<a class="jxr_linenumber" name="L109" href="#L109">109</a>     <strong class="jxr_keyword">public</strong> Integer apply(CharSequence left, CharSequence right) {
+<a class="jxr_linenumber" name="L110" href="#L110">110</a>         <strong class="jxr_keyword">if</strong> (threshold != <strong class="jxr_keyword">null</strong>) {
+<a class="jxr_linenumber" name="L111" href="#L111">111</a>             <strong class="jxr_keyword">return</strong> limitedCompare(left, right, threshold);
+<a class="jxr_linenumber" name="L112" href="#L112">112</a>         } <strong class="jxr_keyword">else</strong> {
+<a class="jxr_linenumber" name="L113" href="#L113">113</a>             <strong class="jxr_keyword">return</strong> unlimitedCompare(left, right);
+<a class="jxr_linenumber" name="L114" href="#L114">114</a>         }
+<a class="jxr_linenumber" name="L115" href="#L115">115</a>     }
+<a class="jxr_linenumber" name="L116" href="#L116">116</a> 
+<a class="jxr_linenumber" name="L117" href="#L117">117</a>     <em class="jxr_javadoccomment">/**</em>
+<a class="jxr_linenumber" name="L118" href="#L118">118</a> <em class="jxr_javadoccomment">     * Gets the default instance.</em>
+<a class="jxr_linenumber" name="L119" href="#L119">119</a> <em class="jxr_javadoccomment">     *</em>
+<a class="jxr_linenumber" name="L120" href="#L120">120</a> <em class="jxr_javadoccomment">     * @return the default instace</em>
+<a class="jxr_linenumber" name="L121" href="#L121">121</a> <em class="jxr_javadoccomment">     */</em>
+<a class="jxr_linenumber" name="L122" href="#L122">122</a>     <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">static</strong> <a href="../../../../../org/apache/commons/text/similarity/LevenshteinDistance.html">LevenshteinDistance</a> getDefaultInstance() {
+<a class="jxr_linenumber" name="L123" href="#L123">123</a>         <strong class="jxr_keyword">return</strong> DEFAULT_INSTANCE;
+<a class="jxr_linenumber" name="L124" href="#L124">124</a>     }
+<a class="jxr_linenumber" name="L125" href="#L125">125</a> 
+<a class="jxr_linenumber" name="L126" href="#L126">126</a>     <em class="jxr_javadoccomment">/**</em>
+<a class="jxr_linenumber" name="L127" href="#L127">127</a> <em class="jxr_javadoccomment">     * Gets the distance threshold.</em>
+<a class="jxr_linenumber" name="L128" href="#L128">128</a> <em class="jxr_javadoccomment">     *</em>
+<a class="jxr_linenumber" name="L129" href="#L129">129</a> <em class="jxr_javadoccomment">     * @return the distance threshold</em>
+<a class="jxr_linenumber" name="L130" href="#L130">130</a> <em class="jxr_javadoccomment">     */</em>
+<a class="jxr_linenumber" name="L131" href="#L131">131</a>     <strong class="jxr_keyword">public</strong> Integer getThreshold() {
+<a class="jxr_linenumber" name="L132" href="#L132">132</a>         <strong class="jxr_keyword">return</strong> threshold;
+<a class="jxr_linenumber" name="L133" href="#L133">133</a>     }
+<a class="jxr_linenumber" name="L134" href="#L134">134</a> 
+<a class="jxr_linenumber" name="L135" href="#L135">135</a>     <em class="jxr_javadoccomment">/**</em>
+<a class="jxr_linenumber" name="L136" href="#L136">136</a> <em class="jxr_javadoccomment">     * Find the Levenshtein distance between two CharSequences if it's less than or</em>
+<a class="jxr_linenumber" name="L137" href="#L137">137</a> <em class="jxr_javadoccomment">     * equal to a given threshold.</em>
+<a class="jxr_linenumber" name="L138" href="#L138">138</a> <em class="jxr_javadoccomment">     *</em>
+<a class="jxr_linenumber" name="L139" href="#L139">139</a> <em class="jxr_javadoccomment">     * &lt;p&gt;</em>
+<a class="jxr_linenumber" name="L140" href="#L140">140</a> <em class="jxr_javadoccomment">     * This implementation follows from Algorithms on Strings, Trees and</em>
+<a class="jxr_linenumber" name="L141" href="#L141">141</a> <em class="jxr_javadoccomment">     * Sequences by Dan Gusfield and Chas Emerick's implementation of the</em>
+<a class="jxr_linenumber" name="L142" href="#L142">142</a> <em class="jxr_javadoccomment">     * Levenshtein distance algorithm from &lt;a</em>
+<a class="jxr_linenumber" name="L143" href="#L143">143</a> <em class="jxr_javadoccomment">     * href="<a href="http://www.merriampark.com/ld.htm" target="alexandria_uri">http://www.merriampark.com/ld.htm</a>"</em>
+<a class="jxr_linenumber" name="L144" href="#L144">144</a> <em class="jxr_javadoccomment">     * &gt;<a href="http://www.merriampark.com/ld.htm&lt;/a&gt" target="alexandria_uri">http://www.merriampark.com/ld.htm&lt;/a&gt</a>;</em>
+<a class="jxr_linenumber" name="L145" href="#L145">145</a> <em class="jxr_javadoccomment">     * &lt;/p&gt;</em>
+<a class="jxr_linenumber" name="L146" href="#L146">146</a> <em class="jxr_javadoccomment">     *</em>
+<a class="jxr_linenumber" name="L147" href="#L147">147</a> <em class="jxr_javadoccomment">     * &lt;pre&gt;</em>
+<a class="jxr_linenumber" name="L148" href="#L148">148</a> <em class="jxr_javadoccomment">     * limitedCompare(null, *, *)             = IllegalArgumentException</em>
+<a class="jxr_linenumber" name="L149" href="#L149">149</a> <em class="jxr_javadoccomment">     * limitedCompare(*, null, *)             = IllegalArgumentException</em>
+<a class="jxr_linenumber" name="L150" href="#L150">150</a> <em class="jxr_javadoccomment">     * limitedCompare(*, *, -1)               = IllegalArgumentException</em>
+<a class="jxr_linenumber" name="L151" href="#L151">151</a> <em class="jxr_javadoccomment">     * limitedCompare("","", 0)               = 0</em>
+<a class="jxr_linenumber" name="L152" href="#L152">152</a> <em class="jxr_javadoccomment">     * limitedCompare("aaapppp", "", 8)       = 7</em>
+<a class="jxr_linenumber" name="L153" href="#L153">153</a> <em class="jxr_javadoccomment">     * limitedCompare("aaapppp", "", 7)       = 7</em>
+<a class="jxr_linenumber" name="L154" href="#L154">154</a> <em class="jxr_javadoccomment">     * limitedCompare("aaapppp", "", 6))      = -1</em>
+<a class="jxr_linenumber" name="L155" href="#L155">155</a> <em class="jxr_javadoccomment">     * limitedCompare("elephant", "hippo", 7) = 7</em>
+<a class="jxr_linenumber" name="L156" href="#L156">156</a> <em class="jxr_javadoccomment">     * limitedCompare("elephant", "hippo", 6) = -1</em>
+<a class="jxr_linenumber" name="L157" href="#L157">157</a> <em class="jxr_javadoccomment">     * limitedCompare("hippo", "elephant", 7) = 7</em>
+<a class="jxr_linenumber" name="L158" href="#L158">158</a> <em class="jxr_javadoccomment">     * limitedCompare("hippo", "elephant", 6) = -1</em>
+<a class="jxr_linenumber" name="L159" href="#L159">159</a> <em class="jxr_javadoccomment">     * &lt;/pre&gt;</em>
+<a class="jxr_linenumber" name="L160" href="#L160">160</a> <em class="jxr_javadoccomment">     *</em>
+<a class="jxr_linenumber" name="L161" href="#L161">161</a> <em class="jxr_javadoccomment">     * @param left the first string, must not be null</em>
+<a class="jxr_linenumber" name="L162" href="#L162">162</a> <em class="jxr_javadoccomment">     * @param right the second string, must not be null</em>
+<a class="jxr_linenumber" name="L163" href="#L163">163</a> <em class="jxr_javadoccomment">     * @param threshold the target threshold, must not be negative</em>
+<a class="jxr_linenumber" name="L164" href="#L164">164</a> <em class="jxr_javadoccomment">     * @return result distance, or -1</em>
+<a class="jxr_linenumber" name="L165" href="#L165">165</a> <em class="jxr_javadoccomment">     */</em>
+<a class="jxr_linenumber" name="L166" href="#L166">166</a>     <strong class="jxr_keyword">private</strong> <strong class="jxr_keyword">static</strong> <strong class="jxr_keyword">int</strong> limitedCompare(CharSequence left, CharSequence right, <strong class="jxr_keyword">int</strong> threshold) {
+<a class="jxr_linenumber" name="L167" href="#L167">167</a>         <strong class="jxr_keyword">if</strong> (left == <strong class="jxr_keyword">null</strong> || right == <strong class="jxr_keyword">null</strong>) {
+<a class="jxr_linenumber" name="L168" href="#L168">168</a>             <strong class="jxr_keyword">throw</strong> <strong class="jxr_keyword">new</strong> IllegalArgumentException(<span class="jxr_string">"Strings must not be null"</span>);
+<a class="jxr_linenumber" name="L169" href="#L169">169</a>         }
+<a class="jxr_linenumber" name="L170" href="#L170">170</a>         <strong class="jxr_keyword">if</strong> (threshold &lt; 0) {
+<a class="jxr_linenumber" name="L171" href="#L171">171</a>             <strong class="jxr_keyword">throw</strong> <strong class="jxr_keyword">new</strong> IllegalArgumentException(<span class="jxr_string">"Threshold must not be negative"</span>);
+<a class="jxr_linenumber" name="L172" href="#L172">172</a>         }
+<a class="jxr_linenumber" name="L173" href="#L173">173</a> 
+<a class="jxr_linenumber" name="L174" href="#L174">174</a>         <em class="jxr_comment">/*</em>
+<a class="jxr_linenumber" name="L175" href="#L175">175</a> <em class="jxr_comment">         * This implementation only computes the distance if it's less than or</em>
+<a class="jxr_linenumber" name="L176" href="#L176">176</a> <em class="jxr_comment">         * equal to the threshold value, returning -1 if it's greater. The</em>
+<a class="jxr_linenumber" name="L177" href="#L177">177</a> <em class="jxr_comment">         * advantage is performance: unbounded distance is O(nm), but a bound of</em>
+<a class="jxr_linenumber" name="L178" href="#L178">178</a> <em class="jxr_comment">         * k allows us to reduce it to O(km) time by only computing a diagonal</em>
+<a class="jxr_linenumber" name="L179" href="#L179">179</a> <em class="jxr_comment">         * stripe of width 2k + 1 of the cost table. It is also possible to use</em>
+<a class="jxr_linenumber" name="L180" href="#L180">180</a> <em class="jxr_comment">         * this to compute the unbounded Levenshtein distance by starting the</em>
+<a class="jxr_linenumber" name="L181" href="#L181">181</a> <em class="jxr_comment">         * threshold at 1 and doubling each time until the distance is found;</em>
+<a class="jxr_linenumber" name="L182" href="#L182">182</a> <em class="jxr_comment">         * this is O(dm), where d is the distance.</em>
+<a class="jxr_linenumber" name="L183" href="#L183">183</a> <em class="jxr_comment">         *</em>
+<a class="jxr_linenumber" name="L184" href="#L184">184</a> <em class="jxr_comment">         * One subtlety comes from needing to ignore entries on the border of</em>
+<a class="jxr_linenumber" name="L185" href="#L185">185</a> <em class="jxr_comment">         * our stripe eg. p[] = |#|#|#|* d[] = *|#|#|#| We must ignore the entry</em>
+<a class="jxr_linenumber" name="L186" href="#L186">186</a> <em class="jxr_comment">         * to the left of the leftmost member We must ignore the entry above the</em>
+<a class="jxr_linenumber" name="L187" href="#L187">187</a> <em class="jxr_comment">         * rightmost member</em>
+<a class="jxr_linenumber" name="L188" href="#L188">188</a> <em class="jxr_comment">         *</em>
+<a class="jxr_linenumber" name="L189" href="#L189">189</a> <em class="jxr_comment">         * Another subtlety comes from our stripe running off the matrix if the</em>
+<a class="jxr_linenumber" name="L190" href="#L190">190</a> <em class="jxr_comment">         * strings aren't of the same size. Since string s is always swapped to</em>
+<a class="jxr_linenumber" name="L191" href="#L191">191</a> <em class="jxr_comment">         * be the shorter of the two, the stripe will always run off to the</em>
+<a class="jxr_linenumber" name="L192" href="#L192">192</a> <em class="jxr_comment">         * upper right instead of the lower left of the matrix.</em>
+<a class="jxr_linenumber" name="L193" href="#L193">193</a> <em class="jxr_comment">         *</em>
+<a class="jxr_linenumber" name="L194" href="#L194">194</a> <em class="jxr_comment">         * As a concrete example, suppose s is of length 5, t is of length 7,</em>
+<a class="jxr_linenumber" name="L195" href="#L195">195</a> <em class="jxr_comment">         * and our threshold is 1. In this case we're going to walk a stripe of</em>
+<a class="jxr_linenumber" name="L196" href="#L196">196</a> <em class="jxr_comment">         * length 3. The matrix would look like so:</em>
+<a class="jxr_linenumber" name="L197" href="#L197">197</a> <em class="jxr_comment">         *</em>
+<a class="jxr_linenumber" name="L198" href="#L198">198</a> <em class="jxr_comment">         * &lt;pre&gt;</em>
+<a class="jxr_linenumber" name="L199" href="#L199">199</a> <em class="jxr_comment">         *    1 2 3 4 5</em>
+<a class="jxr_linenumber" name="L200" href="#L200">200</a> <em class="jxr_comment">         * 1 |#|#| | | |</em>
+<a class="jxr_linenumber" name="L201" href="#L201">201</a> <em class="jxr_comment">         * 2 |#|#|#| | |</em>
+<a class="jxr_linenumber" name="L202" href="#L202">202</a> <em class="jxr_comment">         * 3 | |#|#|#| |</em>
+<a class="jxr_linenumber" name="L203" href="#L203">203</a> <em class="jxr_comment">         * 4 | | |#|#|#|</em>
+<a class="jxr_linenumber" name="L204" href="#L204">204</a> <em class="jxr_comment">         * 5 | | | |#|#|</em>
+<a class="jxr_linenumber" name="L205" href="#L205">205</a> <em class="jxr_comment">         * 6 | | | | |#|</em>
+<a class="jxr_linenumber" name="L206" href="#L206">206</a> <em class="jxr_comment">         * 7 | | | | | |</em>
+<a class="jxr_linenumber" name="L207" href="#L207">207</a> <em class="jxr_comment">         * &lt;/pre&gt;</em>
+<a class="jxr_linenumber" name="L208" href="#L208">208</a> <em class="jxr_comment">         *</em>
+<a class="jxr_linenumber" name="L209" href="#L209">209</a> <em class="jxr_comment">         * Note how the stripe leads off the table as there is no possible way</em>
+<a class="jxr_linenumber" name="L210" href="#L210">210</a> <em class="jxr_comment">         * to turn a string of length 5 into one of length 7 in edit distance of</em>
+<a class="jxr_linenumber" name="L211" href="#L211">211</a> <em class="jxr_comment">         * 1.</em>
+<a class="jxr_linenumber" name="L212" href="#L212">212</a> <em class="jxr_comment">         *</em>
+<a class="jxr_linenumber" name="L213" href="#L213">213</a> <em class="jxr_comment">         * Additionally, this implementation decreases memory usage by using two</em>
+<a class="jxr_linenumber" name="L214" href="#L214">214</a> <em class="jxr_comment">         * single-dimensional arrays and swapping them back and forth instead of</em>
+<a class="jxr_linenumber" name="L215" href="#L215">215</a> <em class="jxr_comment">         * allocating an entire n by m matrix. This requires a few minor</em>
+<a class="jxr_linenumber" name="L216" href="#L216">216</a> <em class="jxr_comment">         * changes, such as immediately returning when it's detected that the</em>
+<a class="jxr_linenumber" name="L217" href="#L217">217</a> <em class="jxr_comment">         * stripe has run off the matrix and initially filling the arrays with</em>
+<a class="jxr_linenumber" name="L218" href="#L218">218</a> <em class="jxr_comment">         * large values so that entries we don't compute are ignored.</em>
+<a class="jxr_linenumber" name="L219" href="#L219">219</a> <em class="jxr_comment">         *</em>
+<a class="jxr_linenumber" name="L220" href="#L220">220</a> <em class="jxr_comment">         * See Algorithms on Strings, Trees and Sequences by Dan Gusfield for</em>
+<a class="jxr_linenumber" name="L221" href="#L221">221</a> <em class="jxr_comment">         * some discussion.</em>
+<a class="jxr_linenumber" name="L222" href="#L222">222</a> <em class="jxr_comment">         */</em>
+<a class="jxr_linenumber" name="L223" href="#L223">223</a> 
+<a class="jxr_linenumber" name="L224" href="#L224">224</a>         <strong class="jxr_keyword">int</strong> n = left.length(); <em class="jxr_comment">// length of left</em>
+<a class="jxr_linenumber" name="L225" href="#L225">225</a>         <strong class="jxr_keyword">int</strong> m = right.length(); <em class="jxr_comment">// length of right</em>
+<a class="jxr_linenumber" name="L226" href="#L226">226</a> 
+<a class="jxr_linenumber" name="L227" href="#L227">227</a>         <em class="jxr_comment">// if one string is empty, the edit distance is necessarily the length</em>
+<a class="jxr_linenumber" name="L228" href="#L228">228</a>         <em class="jxr_comment">// of the other</em>
+<a class="jxr_linenumber" name="L229" href="#L229">229</a>         <strong class="jxr_keyword">if</strong> (n == 0) {
+<a class="jxr_linenumber" name="L230" href="#L230">230</a>             <strong class="jxr_keyword">return</strong> m &lt;= threshold ? m : -1;
+<a class="jxr_linenumber" name="L231" href="#L231">231</a>         } <strong class="jxr_keyword">else</strong> <strong class="jxr_keyword">if</strong> (m == 0) {
+<a class="jxr_linenumber" name="L232" href="#L232">232</a>             <strong class="jxr_keyword">return</strong> n &lt;= threshold ? n : -1;
+<a class="jxr_linenumber" name="L233" href="#L233">233</a>         }
+<a class="jxr_linenumber" name="L234" href="#L234">234</a> 
+<a class="jxr_linenumber" name="L235" href="#L235">235</a>         <strong class="jxr_keyword">if</strong> (n &gt; m) {
+<a class="jxr_linenumber" name="L236" href="#L236">236</a>             <em class="jxr_comment">// swap the two strings to consume less memory</em>
+<a class="jxr_linenumber" name="L237" href="#L237">237</a>             <strong class="jxr_keyword">final</strong> CharSequence tmp = left;
+<a class="jxr_linenumber" name="L238" href="#L238">238</a>             left = right;
+<a class="jxr_linenumber" name="L239" href="#L239">239</a>             right = tmp;
+<a class="jxr_linenumber" name="L240" href="#L240">240</a>             n = m;
+<a class="jxr_linenumber" name="L241" href="#L241">241</a>             m = right.length();
 <a class="jxr_linenumber" name="L242" href="#L242">242</a>         }
-<a class="jxr_linenumber" name="L243" href="#L243">243</a>         <strong class="jxr_keyword">return</strong> -1;
-<a class="jxr_linenumber" name="L244" href="#L244">244</a>     }
-<a class="jxr_linenumber" name="L245" href="#L245">245</a> 
-<a class="jxr_linenumber" name="L246" href="#L246">246</a> }
+<a class="jxr_linenumber" name="L243" href="#L243">243</a> 
+<a class="jxr_linenumber" name="L244" href="#L244">244</a>         <strong class="jxr_keyword">int</strong>[] p = <strong class="jxr_keyword">new</strong> <strong class="jxr_keyword">int</strong>[n + 1]; <em class="jxr_comment">// 'previous' cost array, horizontally</em>
+<a class="jxr_linenumber" name="L245" href="#L245">245</a>         <strong class="jxr_keyword">int</strong>[] d = <strong class="jxr_keyword">new</strong> <strong class="jxr_keyword">int</strong>[n + 1]; <em class="jxr_comment">// cost array, horizontally</em>
+<a class="jxr_linenumber" name="L246" href="#L246">246</a>         <strong class="jxr_keyword">int</strong>[] tempD; <em class="jxr_comment">// placeholder to assist in swapping p and d</em>
+<a class="jxr_linenumber" name="L247" href="#L247">247</a> 
+<a class="jxr_linenumber" name="L248" href="#L248">248</a>         <em class="jxr_comment">// fill in starting table values</em>
+<a class="jxr_linenumber" name="L249" href="#L249">249</a>         <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">int</strong> boundary = Math.min(n, threshold) + 1;
+<a class="jxr_linenumber" name="L250" href="#L250">250</a>         <strong class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong> i = 0; i &lt; boundary; i++) {
+<a class="jxr_linenumber" name="L251" href="#L251">251</a>             p[i] = i;
+<a class="jxr_linenumber" name="L252" href="#L252">252</a>         }
+<a class="jxr_linenumber" name="L253" href="#L253">253</a>         <em class="jxr_comment">// these fills ensure that the value above the rightmost entry of our</em>
+<a class="jxr_linenumber" name="L254" href="#L254">254</a>         <em class="jxr_comment">// stripe will be ignored in following loop iterations</em>
+<a class="jxr_linenumber" name="L255" href="#L255">255</a>         Arrays.fill(p, boundary, p.length, Integer.MAX_VALUE);
+<a class="jxr_linenumber" name="L256" href="#L256">256</a>         Arrays.fill(d, Integer.MAX_VALUE);
+<a class="jxr_linenumber" name="L257" href="#L257">257</a> 
+<a class="jxr_linenumber" name="L258" href="#L258">258</a>         <em class="jxr_comment">// iterates through t</em>
+<a class="jxr_linenumber" name="L259" href="#L259">259</a>         <strong class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong> j = 1; j &lt;= m; j++) {
+<a class="jxr_linenumber" name="L260" href="#L260">260</a>             <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">char</strong> rightJ = right.charAt(j - 1); <em class="jxr_comment">// jth character of right</em>
+<a class="jxr_linenumber" name="L261" href="#L261">261</a>             d[0] = j;
+<a class="jxr_linenumber" name="L262" href="#L262">262</a> 
+<a class="jxr_linenumber" name="L263" href="#L263">263</a>             <em class="jxr_comment">// compute stripe indices, constrain to array size</em>
+<a class="jxr_linenumber" name="L264" href="#L264">264</a>             <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">int</strong> min = Math.max(1, j - threshold);
+<a class="jxr_linenumber" name="L265" href="#L265">265</a>             <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">int</strong> max = j &gt; Integer.MAX_VALUE - threshold ? n : Math.min(
+<a class="jxr_linenumber" name="L266" href="#L266">266</a>                     n, j + threshold);
+<a class="jxr_linenumber" name="L267" href="#L267">267</a> 
+<a class="jxr_linenumber" name="L268" href="#L268">268</a>             <em class="jxr_comment">// the stripe may lead off of the table if s and t are of different</em>
+<a class="jxr_linenumber" name="L269" href="#L269">269</a>             <em class="jxr_comment">// sizes</em>
+<a class="jxr_linenumber" name="L270" href="#L270">270</a>             <strong class="jxr_keyword">if</strong> (min &gt; max) {
+<a class="jxr_linenumber" name="L271" href="#L271">271</a>                 <strong class="jxr_keyword">return</strong> -1;
+<a class="jxr_linenumber" name="L272" href="#L272">272</a>             }
+<a class="jxr_linenumber" name="L273" href="#L273">273</a> 
+<a class="jxr_linenumber" name="L274" href="#L274">274</a>             <em class="jxr_comment">// ignore entry left of leftmost</em>
+<a class="jxr_linenumber" name="L275" href="#L275">275</a>             <strong class="jxr_keyword">if</strong> (min &gt; 1) {
+<a class="jxr_linenumber" name="L276" href="#L276">276</a>                 d[min - 1] = Integer.MAX_VALUE;
+<a class="jxr_linenumber" name="L277" href="#L277">277</a>             }
+<a class="jxr_linenumber" name="L278" href="#L278">278</a> 
+<a class="jxr_linenumber" name="L279" href="#L279">279</a>             <em class="jxr_comment">// iterates through [min, max] in s</em>
+<a class="jxr_linenumber" name="L280" href="#L280">280</a>             <strong class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong> i = min; i &lt;= max; i++) {
+<a class="jxr_linenumber" name="L281" href="#L281">281</a>                 <strong class="jxr_keyword">if</strong> (left.charAt(i - 1) == rightJ) {
+<a class="jxr_linenumber" name="L282" href="#L282">282</a>                     <em class="jxr_comment">// diagonally left and up</em>
+<a class="jxr_linenumber" name="L283" href="#L283">283</a>                     d[i] = p[i - 1];
+<a class="jxr_linenumber" name="L284" href="#L284">284</a>                 } <strong class="jxr_keyword">else</strong> {
+<a class="jxr_linenumber" name="L285" href="#L285">285</a>                     <em class="jxr_comment">// 1 + minimum of cell to the left, to the top, diagonally</em>
+<a class="jxr_linenumber" name="L286" href="#L286">286</a>                     <em class="jxr_comment">// left and up</em>
+<a class="jxr_linenumber" name="L287" href="#L287">287</a>                     d[i] = 1 + Math.min(Math.min(d[i - 1], p[i]), p[i - 1]);
+<a class="jxr_linenumber" name="L288" href="#L288">288</a>                 }
+<a class="jxr_linenumber" name="L289" href="#L289">289</a>             }
+<a class="jxr_linenumber" name="L290" href="#L290">290</a> 
+<a class="jxr_linenumber" name="L291" href="#L291">291</a>             <em class="jxr_comment">// copy current distance counts to 'previous row' distance counts</em>
+<a class="jxr_linenumber" name="L292" href="#L292">292</a>             tempD = p;
+<a class="jxr_linenumber" name="L293" href="#L293">293</a>             p = d;
+<a class="jxr_linenumber" name="L294" href="#L294">294</a>             d = tempD;
+<a class="jxr_linenumber" name="L295" href="#L295">295</a>         }
+<a class="jxr_linenumber" name="L296" href="#L296">296</a> 
+<a class="jxr_linenumber" name="L297" href="#L297">297</a>         <em class="jxr_comment">// if p[n] is greater than the threshold, there's no guarantee on it</em>
+<a class="jxr_linenumber" name="L298" href="#L298">298</a>         <em class="jxr_comment">// being the correct</em>
+<a class="jxr_linenumber" name="L299" href="#L299">299</a>         <em class="jxr_comment">// distance</em>
+<a class="jxr_linenumber" name="L300" href="#L300">300</a>         <strong class="jxr_keyword">if</strong> (p[n] &lt;= threshold) {
+<a class="jxr_linenumber" name="L301" href="#L301">301</a>             <strong class="jxr_keyword">return</strong> p[n];
+<a class="jxr_linenumber" name="L302" href="#L302">302</a>         }
+<a class="jxr_linenumber" name="L303" href="#L303">303</a>         <strong class="jxr_keyword">return</strong> -1;
+<a class="jxr_linenumber" name="L304" href="#L304">304</a>     }
+<a class="jxr_linenumber" name="L305" href="#L305">305</a> 
+<a class="jxr_linenumber" name="L306" href="#L306">306</a>     <em class="jxr_javadoccomment">/**</em>
+<a class="jxr_linenumber" name="L307" href="#L307">307</a> <em class="jxr_javadoccomment">     * &lt;p&gt;Find the Levenshtein distance between two Strings.&lt;/p&gt;</em>
+<a class="jxr_linenumber" name="L308" href="#L308">308</a> <em class="jxr_javadoccomment">     *</em>
+<a class="jxr_linenumber" name="L309" href="#L309">309</a> <em class="jxr_javadoccomment">     * &lt;p&gt;A higher score indicates a greater distance.&lt;/p&gt;</em>
+<a class="jxr_linenumber" name="L310" href="#L310">310</a> <em class="jxr_javadoccomment">     *</em>
+<a class="jxr_linenumber" name="L311" href="#L311">311</a> <em class="jxr_javadoccomment">     * &lt;p&gt;The previous implementation of the Levenshtein distance algorithm</em>
+<a class="jxr_linenumber" name="L312" href="#L312">312</a> <em class="jxr_javadoccomment">     * was from &lt;a href="<a href="http://www.merriampark.com/ld.htm" target="alexandria_uri">http://www.merriampark.com/ld.htm</a>"&gt;<a href="http://www.merriampark.com/ld.htm" target="alexandria_uri">http://www.merriampark.com/ld.htm</a>&lt;/a&gt;&lt;/p&gt;</em>
+<a class="jxr_linenumber" name="L313" href="#L313">313</a> <em class="jxr_javadoccomment">     *</em>
+<a class="jxr_linenumber" name="L314" href="#L314">314</a> <em class="jxr_javadoccomment">     * &lt;p&gt;Chas Emerick has written an implementation in Java, which avoids an OutOfMemoryError</em>
+<a class="jxr_linenumber" name="L315" href="#L315">315</a> <em class="jxr_javadoccomment">     * which can occur when my Java implementation is used with very large strings.&lt;br&gt;</em>
+<a class="jxr_linenumber" name="L316" href="#L316">316</a> <em class="jxr_javadoccomment">     * This implementation of the Levenshtein distance algorithm</em>
+<a class="jxr_linenumber" name="L317" href="#L317">317</a> <em class="jxr_javadoccomment">     * is from &lt;a href="<a href="http://www.merriampark.com/ldjava.htm" target="alexandria_uri">http://www.merriampark.com/ldjava.htm</a>"&gt;<a href="http://www.merriampark.com/ldjava.htm" target="alexandria_uri">http://www.merriampark.com/ldjava.htm</a>&lt;/a&gt;&lt;/p&gt;</em>
+<a class="jxr_linenumber" name="L318" href="#L318">318</a> <em class="jxr_javadoccomment">     *</em>
+<a class="jxr_linenumber" name="L319" href="#L319">319</a> <em class="jxr_javadoccomment">     * &lt;pre&gt;</em>
+<a class="jxr_linenumber" name="L320" href="#L320">320</a> <em class="jxr_javadoccomment">     * unlimitedCompare(null, *)             = IllegalArgumentException</em>
+<a class="jxr_linenumber" name="L321" href="#L321">321</a> <em class="jxr_javadoccomment">     * unlimitedCompare(*, null)             = IllegalArgumentException</em>
+<a class="jxr_linenumber" name="L322" href="#L322">322</a> <em class="jxr_javadoccomment">     * unlimitedCompare("","")               = 0</em>
+<a class="jxr_linenumber" name="L323" href="#L323">323</a> <em class="jxr_javadoccomment">     * unlimitedCompare("","a")              = 1</em>
+<a class="jxr_linenumber" name="L324" href="#L324">324</a> <em class="jxr_javadoccomment">     * unlimitedCompare("aaapppp", "")       = 7</em>
+<a class="jxr_linenumber" name="L325" href="#L325">325</a> <em class="jxr_javadoccomment">     * unlimitedCompare("frog", "fog")       = 1</em>
+<a class="jxr_linenumber" name="L326" href="#L326">326</a> <em class="jxr_javadoccomment">     * unlimitedCompare("fly", "ant")        = 3</em>
+<a class="jxr_linenumber" name="L327" href="#L327">327</a> <em class="jxr_javadoccomment">     * unlimitedCompare("elephant", "hippo") = 7</em>
+<a class="jxr_linenumber" name="L328" href="#L328">328</a> <em class="jxr_javadoccomment">     * unlimitedCompare("hippo", "elephant") = 7</em>
+<a class="jxr_linenumber" name="L329" href="#L329">329</a> <em class="jxr_javadoccomment">     * unlimitedCompare("hippo", "zzzzzzzz") = 8</em>
+<a class="jxr_linenumber" name="L330" href="#L330">330</a> <em class="jxr_javadoccomment">     * unlimitedCompare("hello", "hallo")    = 1</em>
+<a class="jxr_linenumber" name="L331" href="#L331">331</a> <em class="jxr_javadoccomment">     * &lt;/pre&gt;</em>
+<a class="jxr_linenumber" name="L332" href="#L332">332</a> <em class="jxr_javadoccomment">     *</em>
+<a class="jxr_linenumber" name="L333" href="#L333">333</a> <em class="jxr_javadoccomment">     * @param left the first String, must not be null</em>
+<a class="jxr_linenumber" name="L334" href="#L334">334</a> <em class="jxr_javadoccomment">     * @param right the second String, must not be null</em>
+<a class="jxr_linenumber" name="L335" href="#L335">335</a> <em class="jxr_javadoccomment">     * @return result distance, or -1</em>
+<a class="jxr_linenumber" name="L336" href="#L336">336</a> <em class="jxr_javadoccomment">     * @throws IllegalArgumentException if either String input {@code null}</em>
+<a class="jxr_linenumber" name="L337" href="#L337">337</a> <em class="jxr_javadoccomment">     */</em>
+<a class="jxr_linenumber" name="L338" href="#L338">338</a>     <strong class="jxr_keyword">private</strong> <strong class="jxr_keyword">static</strong> <strong class="jxr_keyword">int</strong> unlimitedCompare(CharSequence left, CharSequence right) {
+<a class="jxr_linenumber" name="L339" href="#L339">339</a>         <strong class="jxr_keyword">if</strong> (left == <strong class="jxr_keyword">null</strong> || right == <strong class="jxr_keyword">null</strong>) {
+<a class="jxr_linenumber" name="L340" href="#L340">340</a>             <strong class="jxr_keyword">throw</strong> <strong class="jxr_keyword">new</strong> IllegalArgumentException(<span class="jxr_string">"Strings must not be null"</span>);
+<a class="jxr_linenumber" name="L341" href="#L341">341</a>         }
+<a class="jxr_linenumber" name="L342" href="#L342">342</a> 
+<a class="jxr_linenumber" name="L343" href="#L343">343</a>         <em class="jxr_comment">/*</em>
+<a class="jxr_linenumber" name="L344" href="#L344">344</a> <em class="jxr_comment">           The difference between this impl. and the previous is that, rather</em>
+<a class="jxr_linenumber" name="L345" href="#L345">345</a> <em class="jxr_comment">           than creating and retaining a matrix of size s.length() + 1 by t.length() + 1,</em>
+<a class="jxr_linenumber" name="L346" href="#L346">346</a> <em class="jxr_comment">           we maintain two single-dimensional arrays of length s.length() + 1.  The first, d,</em>
+<a class="jxr_linenumber" name="L347" href="#L347">347</a> <em class="jxr_comment">           is the 'current working' distance array that maintains the newest distance cost</em>
+<a class="jxr_linenumber" name="L348" href="#L348">348</a> <em class="jxr_comment">           counts as we iterate through the characters of String s.  Each time we increment</em>
+<a class="jxr_linenumber" name="L349" href="#L349">349</a> <em class="jxr_comment">           the index of String t we are comparing, d is copied to p, the second int[].  Doing so</em>
+<a class="jxr_linenumber" name="L350" href="#L350">350</a> <em class="jxr_comment">           allows us to retain the previous cost counts as required by the algorithm (taking</em>
+<a class="jxr_linenumber" name="L351" href="#L351">351</a> <em class="jxr_comment">           the minimum of the cost count to the left, up one, and diagonally up and to the left</em>
+<a class="jxr_linenumber" name="L352" href="#L352">352</a> <em class="jxr_comment">           of the current cost count being calculated).  (Note that the arrays aren't really</em>
+<a class="jxr_linenumber" name="L353" href="#L353">353</a> <em class="jxr_comment">           copied anymore, just switched...this is clearly much better than cloning an array</em>
+<a class="jxr_linenumber" name="L354" href="#L354">354</a> <em class="jxr_comment">           or doing a System.arraycopy() each time  through the outer loop.)</em>
+<a class="jxr_linenumber" name="L355" href="#L355">355</a> 
+<a class="jxr_linenumber" name="L356" href="#L356">356</a> <em class="jxr_comment">           Effectively, the difference between the two implementations is this one does not</em>
+<a class="jxr_linenumber" name="L357" href="#L357">357</a> <em class="jxr_comment">           cause an out of memory condition when calculating the LD over two very large strings.</em>
+<a class="jxr_linenumber" name="L358" href="#L358">358</a> <em class="jxr_comment">         */</em>
+<a class="jxr_linenumber" name="L359" href="#L359">359</a> 
+<a class="jxr_linenumber" name="L360" href="#L360">360</a>         <strong class="jxr_keyword">int</strong> n = left.length(); <em class="jxr_comment">// length of left</em>
+<a class="jxr_linenumber" name="L361" href="#L361">361</a>         <strong class="jxr_keyword">int</strong> m = right.length(); <em class="jxr_comment">// length of right</em>
+<a class="jxr_linenumber" name="L362" href="#L362">362</a> 
+<a class="jxr_linenumber" name="L363" href="#L363">363</a>         <strong class="jxr_keyword">if</strong> (n == 0) {
+<a class="jxr_linenumber" name="L364" href="#L364">364</a>             <strong class="jxr_keyword">return</strong> m;
+<a class="jxr_linenumber" name="L365" href="#L365">365</a>         } <strong class="jxr_keyword">else</strong> <strong class="jxr_keyword">if</strong> (m == 0) {
+<a class="jxr_linenumber" name="L366" href="#L366">366</a>             <strong class="jxr_keyword">return</strong> n;
+<a class="jxr_linenumber" name="L367" href="#L367">367</a>         }
+<a class="jxr_linenumber" name="L368" href="#L368">368</a> 
+<a class="jxr_linenumber" name="L369" href="#L369">369</a>         <strong class="jxr_keyword">if</strong> (n &gt; m) {
+<a class="jxr_linenumber" name="L370" href="#L370">370</a>             <em class="jxr_comment">// swap the input strings to consume less memory</em>
+<a class="jxr_linenumber" name="L371" href="#L371">371</a>             <strong class="jxr_keyword">final</strong> CharSequence tmp = left;
+<a class="jxr_linenumber" name="L372" href="#L372">372</a>             left = right;
+<a class="jxr_linenumber" name="L373" href="#L373">373</a>             right = tmp;
+<a class="jxr_linenumber" name="L374" href="#L374">374</a>             n = m;
+<a class="jxr_linenumber" name="L375" href="#L375">375</a>             m = right.length();
+<a class="jxr_linenumber" name="L376" href="#L376">376</a>         }
+<a class="jxr_linenumber" name="L377" href="#L377">377</a> 
+<a class="jxr_linenumber" name="L378" href="#L378">378</a>         <strong class="jxr_keyword">int</strong>[] p = <strong class="jxr_keyword">new</strong> <strong class="jxr_keyword">int</strong>[n + 1]; <em class="jxr_comment">//'previous' cost array, horizontally</em>
+<a class="jxr_linenumber" name="L379" href="#L379">379</a>         <strong class="jxr_keyword">int</strong>[] d = <strong class="jxr_keyword">new</strong> <strong class="jxr_keyword">int</strong>[n + 1]; <em class="jxr_comment">// cost array, horizontally</em>
+<a class="jxr_linenumber" name="L380" href="#L380">380</a>         <strong class="jxr_keyword">int</strong>[] tempD; <em class="jxr_comment">//placeholder to assist in swapping p and d</em>
+<a class="jxr_linenumber" name="L381" href="#L381">381</a> 
+<a class="jxr_linenumber" name="L382" href="#L382">382</a>         <em class="jxr_comment">// indexes into strings left and right</em>
+<a class="jxr_linenumber" name="L383" href="#L383">383</a>         <strong class="jxr_keyword">int</strong> i; <em class="jxr_comment">// iterates through left</em>
+<a class="jxr_linenumber" name="L384" href="#L384">384</a>         <strong class="jxr_keyword">int</strong> j; <em class="jxr_comment">// iterates through right</em>
+<a class="jxr_linenumber" name="L385" href="#L385">385</a> 
+<a class="jxr_linenumber" name="L386" href="#L386">386</a>         <strong class="jxr_keyword">char</strong> rightJ; <em class="jxr_comment">// jth character of right</em>
+<a class="jxr_linenumber" name="L387" href="#L387">387</a> 
+<a class="jxr_linenumber" name="L388" href="#L388">388</a>         <strong class="jxr_keyword">int</strong> cost; <em class="jxr_comment">// cost</em>
+<a class="jxr_linenumber" name="L389" href="#L389">389</a> 
+<a class="jxr_linenumber" name="L390" href="#L390">390</a>         <strong class="jxr_keyword">for</strong> (i = 0; i &lt;= n; i++) {
+<a class="jxr_linenumber" name="L391" href="#L391">391</a>             p[i] = i;
+<a class="jxr_linenumber" name="L392" href="#L392">392</a>         }
+<a class="jxr_linenumber" name="L393" href="#L393">393</a> 
+<a class="jxr_linenumber" name="L394" href="#L394">394</a>         <strong class="jxr_keyword">for</strong> (j = 1; j &lt;= m; j++) {
+<a class="jxr_linenumber" name="L395" href="#L395">395</a>             rightJ = right.charAt(j - 1);
+<a class="jxr_linenumber" name="L396" href="#L396">396</a>             d[0] = j;
+<a class="jxr_linenumber" name="L397" href="#L397">397</a> 
+<a class="jxr_linenumber" name="L398" href="#L398">398</a>             <strong class="jxr_keyword">for</strong> (i = 1; i &lt;= n; i++) {
+<a class="jxr_linenumber" name="L399" href="#L399">399</a>                 cost = left.charAt(i - 1) == rightJ ? 0 : 1;
+<a class="jxr_linenumber" name="L400" href="#L400">400</a>                 <em class="jxr_comment">// minimum of cell to the left+1, to the top+1, diagonally left and up +cost</em>
+<a class="jxr_linenumber" name="L401" href="#L401">401</a>                 d[i] = Math.min(Math.min(d[i - 1] + 1, p[i] + 1), p[i - 1] + cost);
+<a class="jxr_linenumber" name="L402" href="#L402">402</a>             }
+<a class="jxr_linenumber" name="L403" href="#L403">403</a> 
+<a class="jxr_linenumber" name="L404" href="#L404">404</a>             <em class="jxr_comment">// copy current distance counts to 'previous row' distance counts</em>
+<a class="jxr_linenumber" name="L405" href="#L405">405</a>             tempD = p;
+<a class="jxr_linenumber" name="L406" href="#L406">406</a>             p = d;
+<a class="jxr_linenumber" name="L407" href="#L407">407</a>             d = tempD;
+<a class="jxr_linenumber" name="L408" href="#L408">408</a>         }
+<a class="jxr_linenumber" name="L409" href="#L409">409</a> 
+<a class="jxr_linenumber" name="L410" href="#L410">410</a>         <em class="jxr_comment">// our last action in the above loop was to switch d and p, so p now</em>
+<a class="jxr_linenumber" name="L411" href="#L411">411</a>         <em class="jxr_comment">// actually has the most recent cost counts</em>
+<a class="jxr_linenumber" name="L412" href="#L412">412</a>         <strong class="jxr_keyword">return</strong> p[n];
+<a class="jxr_linenumber" name="L413" href="#L413">413</a>     }
+<a class="jxr_linenumber" name="L414" href="#L414">414</a> 
+<a class="jxr_linenumber" name="L415" href="#L415">415</a> }
 </pre>
 <hr/>
 <div id="footer">Copyright &#169; 2014&#x2013;2015 <a href="http://www.apache.org/">The Apache Software Foundation</a>. All rights reserved.</div>
 </body>
-</html>
\ No newline at end of file
+</html>

Added: websites/production/commons/content/sandbox/commons-text/xref/org/apache/commons/text/similarity/internal/Counter.html
==============================================================================
--- websites/production/commons/content/sandbox/commons-text/xref/org/apache/commons/text/similarity/internal/Counter.html (added)
+++ websites/production/commons/content/sandbox/commons-text/xref/org/apache/commons/text/similarity/internal/Counter.html Fri Apr 17 06:46:28 2015
@@ -0,0 +1,74 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head><meta http-equiv="content-type" content="text/html; charset=UTF-8" />
+<title>Counter xref</title>
+<link type="text/css" rel="stylesheet" href="../../../../../../stylesheet.css" />
+</head>
+<body>
+<div id="overview"><a href="../../../../../../../apidocs/org/apache/commons/text/similarity/internal/Counter.html">View Javadoc</a></div><pre>
+<a class="jxr_linenumber" name="L1" href="#L1">1</a>   <em class="jxr_comment">/*</em>
+<a class="jxr_linenumber" name="L2" href="#L2">2</a>   <em class="jxr_comment"> * Licensed to the Apache Software Foundation (ASF) under one or more</em>
+<a class="jxr_linenumber" name="L3" href="#L3">3</a>   <em class="jxr_comment"> * contributor license agreements.  See the NOTICE file distributed with</em>
+<a class="jxr_linenumber" name="L4" href="#L4">4</a>   <em class="jxr_comment"> * this work for additional information regarding copyright ownership.</em>
+<a class="jxr_linenumber" name="L5" href="#L5">5</a>   <em class="jxr_comment"> * The ASF licenses this file to You under the Apache License, Version 2.0</em>
+<a class="jxr_linenumber" name="L6" href="#L6">6</a>   <em class="jxr_comment"> * (the "License"); you may not use this file except in compliance with</em>
+<a class="jxr_linenumber" name="L7" href="#L7">7</a>   <em class="jxr_comment"> * the License.  You may obtain a copy of the License at</em>
+<a class="jxr_linenumber" name="L8" href="#L8">8</a>   <em class="jxr_comment"> *</em>
+<a class="jxr_linenumber" name="L9" href="#L9">9</a>   <em class="jxr_comment"> *      <a href="http://www.apache.org/licenses/LICENSE-2." target="alexandria_uri">http://www.apache.org/licenses/LICENSE-2.</a>0</em>
+<a class="jxr_linenumber" name="L10" href="#L10">10</a>  <em class="jxr_comment"> *</em>
+<a class="jxr_linenumber" name="L11" href="#L11">11</a>  <em class="jxr_comment"> * Unless required by applicable law or agreed to in writing, software</em>
+<a class="jxr_linenumber" name="L12" href="#L12">12</a>  <em class="jxr_comment"> * distributed under the License is distributed on an "AS IS" BASIS,</em>
+<a class="jxr_linenumber" name="L13" href="#L13">13</a>  <em class="jxr_comment"> * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</em>
+<a class="jxr_linenumber" name="L14" href="#L14">14</a>  <em class="jxr_comment"> * See the License for the specific language governing permissions and</em>
+<a class="jxr_linenumber" name="L15" href="#L15">15</a>  <em class="jxr_comment"> * limitations under the License.</em>
+<a class="jxr_linenumber" name="L16" href="#L16">16</a>  <em class="jxr_comment"> */</em>
+<a class="jxr_linenumber" name="L17" href="#L17">17</a>  <strong class="jxr_keyword">package</strong> org.apache.commons.text.similarity.internal;
+<a class="jxr_linenumber" name="L18" href="#L18">18</a>  
+<a class="jxr_linenumber" name="L19" href="#L19">19</a>  <strong class="jxr_keyword">import</strong> java.util.HashMap;
+<a class="jxr_linenumber" name="L20" href="#L20">20</a>  <strong class="jxr_keyword">import</strong> java.util.Map;
+<a class="jxr_linenumber" name="L21" href="#L21">21</a>  
+<a class="jxr_linenumber" name="L22" href="#L22">22</a>  <em class="jxr_javadoccomment">/**</em>
+<a class="jxr_linenumber" name="L23" href="#L23">23</a>  <em class="jxr_javadoccomment"> * Java implementation of Python's collections Counter module.</em>
+<a class="jxr_linenumber" name="L24" href="#L24">24</a>  <em class="jxr_javadoccomment"> *</em>
+<a class="jxr_linenumber" name="L25" href="#L25">25</a>  <em class="jxr_javadoccomment"> * &lt;p&gt;It counts how many times each element provided occurred in an array and</em>
+<a class="jxr_linenumber" name="L26" href="#L26">26</a>  <em class="jxr_javadoccomment"> * returns a dict with the element as key and the count as value.&lt;/p&gt;</em>
+<a class="jxr_linenumber" name="L27" href="#L27">27</a>  <em class="jxr_javadoccomment"> *</em>
+<a class="jxr_linenumber" name="L28" href="#L28">28</a>  <em class="jxr_javadoccomment"> * @see &lt;a href="https://docs.python.org/dev/library/collections.html#collections.Counter"&gt;</em>
+<a class="jxr_linenumber" name="L29" href="#L29">29</a>  <em class="jxr_javadoccomment"> * https://docs.python.org/dev/library/collections.html#collections.Counter&lt;/a&gt;</em>
+<a class="jxr_linenumber" name="L30" href="#L30">30</a>  <em class="jxr_javadoccomment"> * @since 0.1</em>
+<a class="jxr_linenumber" name="L31" href="#L31">31</a>  <em class="jxr_javadoccomment"> */</em>
+<a class="jxr_linenumber" name="L32" href="#L32">32</a>  <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">class</strong> <a href="../../../../../../org/apache/commons/text/similarity/internal/Counter.html">Counter</a> {
+<a class="jxr_linenumber" name="L33" href="#L33">33</a>  
+<a class="jxr_linenumber" name="L34" href="#L34">34</a>      <em class="jxr_javadoccomment">/**</em>
+<a class="jxr_linenumber" name="L35" href="#L35">35</a>  <em class="jxr_javadoccomment">     * Hidden constructor.</em>
+<a class="jxr_linenumber" name="L36" href="#L36">36</a>  <em class="jxr_javadoccomment">     */</em>
+<a class="jxr_linenumber" name="L37" href="#L37">37</a>      <strong class="jxr_keyword">private</strong> <a href="../../../../../../org/apache/commons/text/similarity/internal/Counter.html">Counter</a>() {
+<a class="jxr_linenumber" name="L38" href="#L38">38</a>          <strong class="jxr_keyword">super</strong>();
+<a class="jxr_linenumber" name="L39" href="#L39">39</a>      }
+<a class="jxr_linenumber" name="L40" href="#L40">40</a>  
+<a class="jxr_linenumber" name="L41" href="#L41">41</a>      <em class="jxr_javadoccomment">/**</em>
+<a class="jxr_linenumber" name="L42" href="#L42">42</a>  <em class="jxr_javadoccomment">     * It counts how many times each element provided occurred in an array and</em>
+<a class="jxr_linenumber" name="L43" href="#L43">43</a>  <em class="jxr_javadoccomment">     * returns a dict with the element as key and the count as value.</em>
+<a class="jxr_linenumber" name="L44" href="#L44">44</a>  <em class="jxr_javadoccomment">     *</em>
+<a class="jxr_linenumber" name="L45" href="#L45">45</a>  <em class="jxr_javadoccomment">     * @param tokens array of tokens</em>
+<a class="jxr_linenumber" name="L46" href="#L46">46</a>  <em class="jxr_javadoccomment">     * @return dict, where the elements are key, and the count the value</em>
+<a class="jxr_linenumber" name="L47" href="#L47">47</a>  <em class="jxr_javadoccomment">     */</em>
+<a class="jxr_linenumber" name="L48" href="#L48">48</a>      <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">static</strong> Map&lt;CharSequence, Integer&gt; of(CharSequence[] tokens) {
+<a class="jxr_linenumber" name="L49" href="#L49">49</a>          <strong class="jxr_keyword">final</strong> Map&lt;CharSequence, Integer&gt; innerCounter = <strong class="jxr_keyword">new</strong> HashMap&lt;CharSequence, Integer&gt;();
+<a class="jxr_linenumber" name="L50" href="#L50">50</a>          <strong class="jxr_keyword">for</strong> (CharSequence token : tokens) {
+<a class="jxr_linenumber" name="L51" href="#L51">51</a>              <strong class="jxr_keyword">if</strong> (innerCounter.containsKey(token)) {
+<a class="jxr_linenumber" name="L52" href="#L52">52</a>                  <strong class="jxr_keyword">int</strong> value = innerCounter.get(token);
+<a class="jxr_linenumber" name="L53" href="#L53">53</a>                  innerCounter.put(token, ++value);
+<a class="jxr_linenumber" name="L54" href="#L54">54</a>              } <strong class="jxr_keyword">else</strong> {
+<a class="jxr_linenumber" name="L55" href="#L55">55</a>                  innerCounter.put(token, 1);
+<a class="jxr_linenumber" name="L56" href="#L56">56</a>              }
+<a class="jxr_linenumber" name="L57" href="#L57">57</a>          }
+<a class="jxr_linenumber" name="L58" href="#L58">58</a>          <strong class="jxr_keyword">return</strong> innerCounter;
+<a class="jxr_linenumber" name="L59" href="#L59">59</a>      }
+<a class="jxr_linenumber" name="L60" href="#L60">60</a>  
+<a class="jxr_linenumber" name="L61" href="#L61">61</a>  }
+</pre>
+<hr/>
+<div id="footer">Copyright &#169; 2014&#x2013;2015 <a href="http://www.apache.org/">The Apache Software Foundation</a>. All rights reserved.</div>
+</body>
+</html>



Mime
View raw message