commons-notifications mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From chtom...@apache.org
Subject svn commit: r1010276 [7/7] - in /websites/production/commons/content/proper/commons-text: ./ apidocs/org/apache/commons/text/similarity/ apidocs/src-html/org/apache/commons/text/similarity/ jacoco-aggregate/ jacoco/ jacoco/org.apache.commons.text.simil...
Date Wed, 12 Apr 2017 00:35:38 GMT
Modified: websites/production/commons/content/proper/commons-text/xref/org/apache/commons/text/similarity/JaroWinklerDistance.html
==============================================================================
--- websites/production/commons/content/proper/commons-text/xref/org/apache/commons/text/similarity/JaroWinklerDistance.html
(original)
+++ websites/production/commons/content/proper/commons-text/xref/org/apache/commons/text/similarity/JaroWinklerDistance.html
Wed Apr 12 00:35:37 2017
@@ -83,86 +83,85 @@
 <a class="jxr_linenumber" name="L75" href="#L75">75</a>      @Override
 <a class="jxr_linenumber" name="L76" href="#L76">76</a>      <strong class="jxr_keyword">public</strong>
Double apply(<strong class="jxr_keyword">final</strong> CharSequence left, <strong
class="jxr_keyword">final</strong> CharSequence right) {
 <a class="jxr_linenumber" name="L77" href="#L77">77</a>          <strong class="jxr_keyword">final</strong>
<strong class="jxr_keyword">double</strong> defaultScalingFactor = 0.1;
-<a class="jxr_linenumber" name="L78" href="#L78">78</a>          <strong class="jxr_keyword">final</strong>
<strong class="jxr_keyword">double</strong> percentageRoundValue = 100.0;
-<a class="jxr_linenumber" name="L79" href="#L79">79</a>  
-<a class="jxr_linenumber" name="L80" href="#L80">80</a>          <strong class="jxr_keyword">if</strong>
(left == <strong class="jxr_keyword">null</strong> || right == <strong class="jxr_keyword">null</strong>)
{
-<a class="jxr_linenumber" name="L81" href="#L81">81</a>              <strong
class="jxr_keyword">throw</strong> <strong class="jxr_keyword">new</strong>
IllegalArgumentException(<span class="jxr_string">"Strings must not be null"</span>);
-<a class="jxr_linenumber" name="L82" href="#L82">82</a>          }
-<a class="jxr_linenumber" name="L83" href="#L83">83</a>  
-<a class="jxr_linenumber" name="L84" href="#L84">84</a>          <strong class="jxr_keyword">int</strong>[]
mtp = matches(left, right);
-<a class="jxr_linenumber" name="L85" href="#L85">85</a>          <strong class="jxr_keyword">double</strong>
m = mtp[0];
-<a class="jxr_linenumber" name="L86" href="#L86">86</a>          <strong class="jxr_keyword">if</strong>
(m == 0) {
-<a class="jxr_linenumber" name="L87" href="#L87">87</a>              <strong
class="jxr_keyword">return</strong> 0D;
-<a class="jxr_linenumber" name="L88" href="#L88">88</a>          }
-<a class="jxr_linenumber" name="L89" href="#L89">89</a>          <strong class="jxr_keyword">double</strong>
j = ((m / left.length() + m / right.length() + (m - mtp[1]) / m)) / 3;
-<a class="jxr_linenumber" name="L90" href="#L90">90</a>          <strong class="jxr_keyword">double</strong>
jw = j &lt; 0.7D ? j : j + Math.min(defaultScalingFactor, 1D / mtp[3]) * mtp[2] * (1D
- j);
-<a class="jxr_linenumber" name="L91" href="#L91">91</a>          <strong class="jxr_keyword">return</strong>
Math.round(jw * percentageRoundValue) / percentageRoundValue;
-<a class="jxr_linenumber" name="L92" href="#L92">92</a>      }
-<a class="jxr_linenumber" name="L93" href="#L93">93</a>  
-<a class="jxr_linenumber" name="L94" href="#L94">94</a>      <em class="jxr_javadoccomment">/**</em>
-<a class="jxr_linenumber" name="L95" href="#L95">95</a>  <em class="jxr_javadoccomment">
    * This method returns the Jaro-Winkler string matches, transpositions, prefix, max array.</em>
-<a class="jxr_linenumber" name="L96" href="#L96">96</a>  <em class="jxr_javadoccomment">
    *</em>
-<a class="jxr_linenumber" name="L97" href="#L97">97</a>  <em class="jxr_javadoccomment">
    * @param first the first string to be matched</em>
-<a class="jxr_linenumber" name="L98" href="#L98">98</a>  <em class="jxr_javadoccomment">
    * @param second the second string to be machted</em>
-<a class="jxr_linenumber" name="L99" href="#L99">99</a>  <em class="jxr_javadoccomment">
    * @return mtp array containing: matches, transpositions, prefix, and max length</em>
-<a class="jxr_linenumber" name="L100" href="#L100">100</a> <em class="jxr_javadoccomment">
    */</em>
-<a class="jxr_linenumber" name="L101" href="#L101">101</a>     <strong class="jxr_keyword">protected</strong>
<strong class="jxr_keyword">static</strong> <strong class="jxr_keyword">int</strong>[]
matches(<strong class="jxr_keyword">final</strong> CharSequence first, <strong
class="jxr_keyword">final</strong> CharSequence second) {
-<a class="jxr_linenumber" name="L102" href="#L102">102</a>         CharSequence
max, min;
-<a class="jxr_linenumber" name="L103" href="#L103">103</a>         <strong
class="jxr_keyword">if</strong> (first.length() &gt; second.length()) {
-<a class="jxr_linenumber" name="L104" href="#L104">104</a>             max =
first;
-<a class="jxr_linenumber" name="L105" href="#L105">105</a>             min =
second;
-<a class="jxr_linenumber" name="L106" href="#L106">106</a>         } <strong
class="jxr_keyword">else</strong> {
-<a class="jxr_linenumber" name="L107" href="#L107">107</a>             max =
second;
-<a class="jxr_linenumber" name="L108" href="#L108">108</a>             min =
first;
-<a class="jxr_linenumber" name="L109" href="#L109">109</a>         }
-<a class="jxr_linenumber" name="L110" href="#L110">110</a>         <strong
class="jxr_keyword">int</strong> range = Math.max(max.length() / 2 - 1, 0);
-<a class="jxr_linenumber" name="L111" href="#L111">111</a>         <strong
class="jxr_keyword">int</strong>[] matchIndexes = <strong class="jxr_keyword">new</strong>
<strong class="jxr_keyword">int</strong>[min.length()];
-<a class="jxr_linenumber" name="L112" href="#L112">112</a>         Arrays.fill(matchIndexes,
-1);
-<a class="jxr_linenumber" name="L113" href="#L113">113</a>         <strong
class="jxr_keyword">boolean</strong>[] matchFlags = <strong class="jxr_keyword">new</strong>
<strong class="jxr_keyword">boolean</strong>[max.length()];
-<a class="jxr_linenumber" name="L114" href="#L114">114</a>         <strong
class="jxr_keyword">int</strong> matches = 0;
-<a class="jxr_linenumber" name="L115" href="#L115">115</a>         <strong
class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong>
mi = 0; mi &lt; min.length(); mi++) {
-<a class="jxr_linenumber" name="L116" href="#L116">116</a>             <strong
class="jxr_keyword">char</strong> c1 = min.charAt(mi);
-<a class="jxr_linenumber" name="L117" href="#L117">117</a>             <strong
class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong>
xi = Math.max(mi - range, 0), xn = Math.min(mi + range + 1, max.length()); xi &lt; xn;
xi++) {
-<a class="jxr_linenumber" name="L118" href="#L118">118</a>                 <strong
class="jxr_keyword">if</strong> (!matchFlags[xi] &amp;&amp; c1 == max.charAt(xi))
{
-<a class="jxr_linenumber" name="L119" href="#L119">119</a>                  
  matchIndexes[mi] = xi;
-<a class="jxr_linenumber" name="L120" href="#L120">120</a>                  
  matchFlags[xi] = <strong class="jxr_keyword">true</strong>;
-<a class="jxr_linenumber" name="L121" href="#L121">121</a>                  
  matches++;
-<a class="jxr_linenumber" name="L122" href="#L122">122</a>                  
  <strong class="jxr_keyword">break</strong>;
-<a class="jxr_linenumber" name="L123" href="#L123">123</a>                 }
-<a class="jxr_linenumber" name="L124" href="#L124">124</a>             }
-<a class="jxr_linenumber" name="L125" href="#L125">125</a>         }
-<a class="jxr_linenumber" name="L126" href="#L126">126</a>         <strong
class="jxr_keyword">char</strong>[] ms1 = <strong class="jxr_keyword">new</strong>
<strong class="jxr_keyword">char</strong>[matches];
-<a class="jxr_linenumber" name="L127" href="#L127">127</a>         <strong
class="jxr_keyword">char</strong>[] ms2 = <strong class="jxr_keyword">new</strong>
<strong class="jxr_keyword">char</strong>[matches];
-<a class="jxr_linenumber" name="L128" href="#L128">128</a>         <strong
class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong>
i = 0, si = 0; i &lt; min.length(); i++) {
-<a class="jxr_linenumber" name="L129" href="#L129">129</a>             <strong
class="jxr_keyword">if</strong> (matchIndexes[i] != -1) {
-<a class="jxr_linenumber" name="L130" href="#L130">130</a>                 ms1[si]
= min.charAt(i);
-<a class="jxr_linenumber" name="L131" href="#L131">131</a>                 si++;
-<a class="jxr_linenumber" name="L132" href="#L132">132</a>             }
-<a class="jxr_linenumber" name="L133" href="#L133">133</a>         }
-<a class="jxr_linenumber" name="L134" href="#L134">134</a>         <strong
class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong>
i = 0, si = 0; i &lt; max.length(); i++) {
-<a class="jxr_linenumber" name="L135" href="#L135">135</a>             <strong
class="jxr_keyword">if</strong> (matchFlags[i]) {
-<a class="jxr_linenumber" name="L136" href="#L136">136</a>                 ms2[si]
= max.charAt(i);
-<a class="jxr_linenumber" name="L137" href="#L137">137</a>                 si++;
-<a class="jxr_linenumber" name="L138" href="#L138">138</a>             }
-<a class="jxr_linenumber" name="L139" href="#L139">139</a>         }
-<a class="jxr_linenumber" name="L140" href="#L140">140</a>         <strong
class="jxr_keyword">int</strong> transpositions = 0;
-<a class="jxr_linenumber" name="L141" href="#L141">141</a>         <strong
class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong>
mi = 0; mi &lt; ms1.length; mi++) {
-<a class="jxr_linenumber" name="L142" href="#L142">142</a>             <strong
class="jxr_keyword">if</strong> (ms1[mi] != ms2[mi]) {
-<a class="jxr_linenumber" name="L143" href="#L143">143</a>                 transpositions++;
-<a class="jxr_linenumber" name="L144" href="#L144">144</a>             }
-<a class="jxr_linenumber" name="L145" href="#L145">145</a>         }
-<a class="jxr_linenumber" name="L146" href="#L146">146</a>         <strong
class="jxr_keyword">int</strong> prefix = 0;
-<a class="jxr_linenumber" name="L147" href="#L147">147</a>         <strong
class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong>
mi = 0; mi &lt; min.length(); mi++) {
-<a class="jxr_linenumber" name="L148" href="#L148">148</a>             <strong
class="jxr_keyword">if</strong> (first.charAt(mi) == second.charAt(mi)) {
-<a class="jxr_linenumber" name="L149" href="#L149">149</a>                 prefix++;
-<a class="jxr_linenumber" name="L150" href="#L150">150</a>             } <strong
class="jxr_keyword">else</strong> {
-<a class="jxr_linenumber" name="L151" href="#L151">151</a>                 <strong
class="jxr_keyword">break</strong>;
-<a class="jxr_linenumber" name="L152" href="#L152">152</a>             }
-<a class="jxr_linenumber" name="L153" href="#L153">153</a>         }
-<a class="jxr_linenumber" name="L154" href="#L154">154</a>         <strong
class="jxr_keyword">return</strong> <strong class="jxr_keyword">new</strong>
<strong class="jxr_keyword">int</strong>[] { matches, transpositions / 2, prefix,
max.length() };
-<a class="jxr_linenumber" name="L155" href="#L155">155</a>     }
-<a class="jxr_linenumber" name="L156" href="#L156">156</a> 
-<a class="jxr_linenumber" name="L157" href="#L157">157</a> }
+<a class="jxr_linenumber" name="L78" href="#L78">78</a>  
+<a class="jxr_linenumber" name="L79" href="#L79">79</a>          <strong class="jxr_keyword">if</strong>
(left == <strong class="jxr_keyword">null</strong> || right == <strong class="jxr_keyword">null</strong>)
{
+<a class="jxr_linenumber" name="L80" href="#L80">80</a>              <strong
class="jxr_keyword">throw</strong> <strong class="jxr_keyword">new</strong>
IllegalArgumentException(<span class="jxr_string">"Strings must not be null"</span>);
+<a class="jxr_linenumber" name="L81" href="#L81">81</a>          }
+<a class="jxr_linenumber" name="L82" href="#L82">82</a>  
+<a class="jxr_linenumber" name="L83" href="#L83">83</a>          <strong class="jxr_keyword">int</strong>[]
mtp = matches(left, right);
+<a class="jxr_linenumber" name="L84" href="#L84">84</a>          <strong class="jxr_keyword">double</strong>
m = mtp[0];
+<a class="jxr_linenumber" name="L85" href="#L85">85</a>          <strong class="jxr_keyword">if</strong>
(m == 0) {
+<a class="jxr_linenumber" name="L86" href="#L86">86</a>              <strong
class="jxr_keyword">return</strong> 0D;
+<a class="jxr_linenumber" name="L87" href="#L87">87</a>          }
+<a class="jxr_linenumber" name="L88" href="#L88">88</a>          <strong class="jxr_keyword">double</strong>
j = ((m / left.length() + m / right.length() + (m - mtp[1]) / m)) / 3;
+<a class="jxr_linenumber" name="L89" href="#L89">89</a>          <strong class="jxr_keyword">double</strong>
jw = j &lt; 0.7D ? j : j + Math.min(defaultScalingFactor, 1D / mtp[3]) * mtp[2] * (1D
- j);
+<a class="jxr_linenumber" name="L90" href="#L90">90</a>          <strong class="jxr_keyword">return</strong>
jw;
+<a class="jxr_linenumber" name="L91" href="#L91">91</a>      }
+<a class="jxr_linenumber" name="L92" href="#L92">92</a>  
+<a class="jxr_linenumber" name="L93" href="#L93">93</a>      <em class="jxr_javadoccomment">/**</em>
+<a class="jxr_linenumber" name="L94" href="#L94">94</a>  <em class="jxr_javadoccomment">
    * This method returns the Jaro-Winkler string matches, transpositions, prefix, max array.</em>
+<a class="jxr_linenumber" name="L95" href="#L95">95</a>  <em class="jxr_javadoccomment">
    *</em>
+<a class="jxr_linenumber" name="L96" href="#L96">96</a>  <em class="jxr_javadoccomment">
    * @param first the first string to be matched</em>
+<a class="jxr_linenumber" name="L97" href="#L97">97</a>  <em class="jxr_javadoccomment">
    * @param second the second string to be machted</em>
+<a class="jxr_linenumber" name="L98" href="#L98">98</a>  <em class="jxr_javadoccomment">
    * @return mtp array containing: matches, transpositions, prefix, and max length</em>
+<a class="jxr_linenumber" name="L99" href="#L99">99</a>  <em class="jxr_javadoccomment">
    */</em>
+<a class="jxr_linenumber" name="L100" href="#L100">100</a>     <strong class="jxr_keyword">protected</strong>
<strong class="jxr_keyword">static</strong> <strong class="jxr_keyword">int</strong>[]
matches(<strong class="jxr_keyword">final</strong> CharSequence first, <strong
class="jxr_keyword">final</strong> CharSequence second) {
+<a class="jxr_linenumber" name="L101" href="#L101">101</a>         CharSequence
max, min;
+<a class="jxr_linenumber" name="L102" href="#L102">102</a>         <strong
class="jxr_keyword">if</strong> (first.length() &gt; second.length()) {
+<a class="jxr_linenumber" name="L103" href="#L103">103</a>             max =
first;
+<a class="jxr_linenumber" name="L104" href="#L104">104</a>             min =
second;
+<a class="jxr_linenumber" name="L105" href="#L105">105</a>         } <strong
class="jxr_keyword">else</strong> {
+<a class="jxr_linenumber" name="L106" href="#L106">106</a>             max =
second;
+<a class="jxr_linenumber" name="L107" href="#L107">107</a>             min =
first;
+<a class="jxr_linenumber" name="L108" href="#L108">108</a>         }
+<a class="jxr_linenumber" name="L109" href="#L109">109</a>         <strong
class="jxr_keyword">int</strong> range = Math.max(max.length() / 2 - 1, 0);
+<a class="jxr_linenumber" name="L110" href="#L110">110</a>         <strong
class="jxr_keyword">int</strong>[] matchIndexes = <strong class="jxr_keyword">new</strong>
<strong class="jxr_keyword">int</strong>[min.length()];
+<a class="jxr_linenumber" name="L111" href="#L111">111</a>         Arrays.fill(matchIndexes,
-1);
+<a class="jxr_linenumber" name="L112" href="#L112">112</a>         <strong
class="jxr_keyword">boolean</strong>[] matchFlags = <strong class="jxr_keyword">new</strong>
<strong class="jxr_keyword">boolean</strong>[max.length()];
+<a class="jxr_linenumber" name="L113" href="#L113">113</a>         <strong
class="jxr_keyword">int</strong> matches = 0;
+<a class="jxr_linenumber" name="L114" href="#L114">114</a>         <strong
class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong>
mi = 0; mi &lt; min.length(); mi++) {
+<a class="jxr_linenumber" name="L115" href="#L115">115</a>             <strong
class="jxr_keyword">char</strong> c1 = min.charAt(mi);
+<a class="jxr_linenumber" name="L116" href="#L116">116</a>             <strong
class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong>
xi = Math.max(mi - range, 0), xn = Math.min(mi + range + 1, max.length()); xi &lt; xn;
xi++) {
+<a class="jxr_linenumber" name="L117" href="#L117">117</a>                 <strong
class="jxr_keyword">if</strong> (!matchFlags[xi] &amp;&amp; c1 == max.charAt(xi))
{
+<a class="jxr_linenumber" name="L118" href="#L118">118</a>                  
  matchIndexes[mi] = xi;
+<a class="jxr_linenumber" name="L119" href="#L119">119</a>                  
  matchFlags[xi] = <strong class="jxr_keyword">true</strong>;
+<a class="jxr_linenumber" name="L120" href="#L120">120</a>                  
  matches++;
+<a class="jxr_linenumber" name="L121" href="#L121">121</a>                  
  <strong class="jxr_keyword">break</strong>;
+<a class="jxr_linenumber" name="L122" href="#L122">122</a>                 }
+<a class="jxr_linenumber" name="L123" href="#L123">123</a>             }
+<a class="jxr_linenumber" name="L124" href="#L124">124</a>         }
+<a class="jxr_linenumber" name="L125" href="#L125">125</a>         <strong
class="jxr_keyword">char</strong>[] ms1 = <strong class="jxr_keyword">new</strong>
<strong class="jxr_keyword">char</strong>[matches];
+<a class="jxr_linenumber" name="L126" href="#L126">126</a>         <strong
class="jxr_keyword">char</strong>[] ms2 = <strong class="jxr_keyword">new</strong>
<strong class="jxr_keyword">char</strong>[matches];
+<a class="jxr_linenumber" name="L127" href="#L127">127</a>         <strong
class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong>
i = 0, si = 0; i &lt; min.length(); i++) {
+<a class="jxr_linenumber" name="L128" href="#L128">128</a>             <strong
class="jxr_keyword">if</strong> (matchIndexes[i] != -1) {
+<a class="jxr_linenumber" name="L129" href="#L129">129</a>                 ms1[si]
= min.charAt(i);
+<a class="jxr_linenumber" name="L130" href="#L130">130</a>                 si++;
+<a class="jxr_linenumber" name="L131" href="#L131">131</a>             }
+<a class="jxr_linenumber" name="L132" href="#L132">132</a>         }
+<a class="jxr_linenumber" name="L133" href="#L133">133</a>         <strong
class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong>
i = 0, si = 0; i &lt; max.length(); i++) {
+<a class="jxr_linenumber" name="L134" href="#L134">134</a>             <strong
class="jxr_keyword">if</strong> (matchFlags[i]) {
+<a class="jxr_linenumber" name="L135" href="#L135">135</a>                 ms2[si]
= max.charAt(i);
+<a class="jxr_linenumber" name="L136" href="#L136">136</a>                 si++;
+<a class="jxr_linenumber" name="L137" href="#L137">137</a>             }
+<a class="jxr_linenumber" name="L138" href="#L138">138</a>         }
+<a class="jxr_linenumber" name="L139" href="#L139">139</a>         <strong
class="jxr_keyword">int</strong> transpositions = 0;
+<a class="jxr_linenumber" name="L140" href="#L140">140</a>         <strong
class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong>
mi = 0; mi &lt; ms1.length; mi++) {
+<a class="jxr_linenumber" name="L141" href="#L141">141</a>             <strong
class="jxr_keyword">if</strong> (ms1[mi] != ms2[mi]) {
+<a class="jxr_linenumber" name="L142" href="#L142">142</a>                 transpositions++;
+<a class="jxr_linenumber" name="L143" href="#L143">143</a>             }
+<a class="jxr_linenumber" name="L144" href="#L144">144</a>         }
+<a class="jxr_linenumber" name="L145" href="#L145">145</a>         <strong
class="jxr_keyword">int</strong> prefix = 0;
+<a class="jxr_linenumber" name="L146" href="#L146">146</a>         <strong
class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong>
mi = 0; mi &lt; min.length(); mi++) {
+<a class="jxr_linenumber" name="L147" href="#L147">147</a>             <strong
class="jxr_keyword">if</strong> (first.charAt(mi) == second.charAt(mi)) {
+<a class="jxr_linenumber" name="L148" href="#L148">148</a>                 prefix++;
+<a class="jxr_linenumber" name="L149" href="#L149">149</a>             } <strong
class="jxr_keyword">else</strong> {
+<a class="jxr_linenumber" name="L150" href="#L150">150</a>                 <strong
class="jxr_keyword">break</strong>;
+<a class="jxr_linenumber" name="L151" href="#L151">151</a>             }
+<a class="jxr_linenumber" name="L152" href="#L152">152</a>         }
+<a class="jxr_linenumber" name="L153" href="#L153">153</a>         <strong
class="jxr_keyword">return</strong> <strong class="jxr_keyword">new</strong>
<strong class="jxr_keyword">int</strong>[] { matches, transpositions / 2, prefix,
max.length() };
+<a class="jxr_linenumber" name="L154" href="#L154">154</a>     }
+<a class="jxr_linenumber" name="L155" href="#L155">155</a> 
+<a class="jxr_linenumber" name="L156" href="#L156">156</a> }
 </pre>
 <hr/>
 <div id="footer">Copyright &#169; 2014&#x2013;2017 <a href="https://www.apache.org/">The
Apache Software Foundation</a>. All rights reserved.</div>



Mime
View raw message