commons-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From chtom...@apache.org
Subject [1/4] [text] TEXT-21: Javadoc fixes and adding SimilarityScore
Date Thu, 17 Nov 2016 12:14:12 GMT
Repository: commons-text
Updated Branches:
  refs/heads/master 594aa29b8 -> 5888d4945


TEXT-21: Javadoc fixes and adding SimilarityScore


Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/c5f724c1
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/c5f724c1
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/c5f724c1

Branch: refs/heads/master
Commit: c5f724c1a38ed738d2143c950ad3a5f745ad4dba
Parents: 594aa29
Author: Rob Tompkins <chtompki@gmail.com>
Authored: Wed Nov 16 07:28:23 2016 -0500
Committer: Rob Tompkins <chtompki@gmail.com>
Committed: Wed Nov 16 07:28:23 2016 -0500

----------------------------------------------------------------------
 .../org/apache/commons/text/StrBuilder.java     |  2 +-
 .../commons/text/similarity/EditDistance.java   | 16 +++++--
 .../text/similarity/SimilarityScore.java        | 45 ++++++++++++++++++++
 3 files changed, 59 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-text/blob/c5f724c1/src/main/java/org/apache/commons/text/StrBuilder.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/StrBuilder.java b/src/main/java/org/apache/commons/text/StrBuilder.java
index 5aa4a31..8221e07 100644
--- a/src/main/java/org/apache/commons/text/StrBuilder.java
+++ b/src/main/java/org/apache/commons/text/StrBuilder.java
@@ -62,7 +62,7 @@ import java.util.Objects;
  * with invalid indices or null input, have been altered - see individual methods.
  * The biggest of these changes is that by default, null will not output the text
  * 'null'. This can be controlled by a property, {@link #setNullText(String)}.
- * <p>
+ * </p>
  *
  */
 public class StrBuilder implements CharSequence, Appendable, Serializable, Builder<String>
{

http://git-wip-us.apache.org/repos/asf/commons-text/blob/c5f724c1/src/main/java/org/apache/commons/text/similarity/EditDistance.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/similarity/EditDistance.java b/src/main/java/org/apache/commons/text/similarity/EditDistance.java
index 8407601..fd6a024 100644
--- a/src/main/java/org/apache/commons/text/similarity/EditDistance.java
+++ b/src/main/java/org/apache/commons/text/similarity/EditDistance.java
@@ -20,9 +20,19 @@ package org.apache.commons.text.similarity;
  * Interface for <a href="http://en.wikipedia.org/wiki/Edit_distance">Edit Distances</a>.
  *
  * <p>
- * A edit distance measures the similarity between two character sequences. Closer strings
- * have shorter distances, and vice-versa.
+ * An edit distance is a formal metric on the Kleene closure (<code>X<sup>*</sup></code>)
over an
+ * alphabet (<code>X</code>). Note, that a <a href="https://en.wikipedia.org/wiki/Metric_(mathematics)">metric</a>
+ * on a set <code>S</code> is a function <code>d: [S * S] -&gt; [0,
INFINITY)</code> such
+ * that the following hold for <code>x,y,z</code> in
+ * the set <code>S</code>:
  * </p>
+ * <ul>
+ *     <li><code>d(x,y) &gt;= 0</code>, non-negativity or separation
axiom</li>
+ *     <li><code>d(x,y) == 0</code>, if and only if, <code>x == y</code></li>
+ *     <li><code>d(x,y) == d(y,x)</code>, symmetry, and</li>
+ *     <li><code>d(x,z) &lt;=  d(x,y) + d(y,z)</code>, the triangle
inequality</li>
+ * </ul>
+ *
  *
  * <p>
  * This is a BiFunction&lt;CharSequence, CharSequence, R&gt;.
@@ -33,7 +43,7 @@ package org.apache.commons.text.similarity;
  *
  * @param <R> The type of similarity score unit used by this EditDistance.
  */
-public interface EditDistance<R> {
+public interface EditDistance<R> extends SimilarityScore<R> {
 
     /**
      * Compares two CharSequences.

http://git-wip-us.apache.org/repos/asf/commons-text/blob/c5f724c1/src/main/java/org/apache/commons/text/similarity/SimilarityScore.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/similarity/SimilarityScore.java b/src/main/java/org/apache/commons/text/similarity/SimilarityScore.java
new file mode 100644
index 0000000..c19645c
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/similarity/SimilarityScore.java
@@ -0,0 +1,45 @@
+package org.apache.commons.text.similarity;
+
+/**
+ * Interface for the concept of a string similarity score.
+ *
+ * <p>
+ * A string similarity score is intended to have <i>some</i> of the properties
of a metric, yet
+ * allowing for exceptions, namely the Jaro-Winkler similarity score.
+ * </p>
+ * <p>
+ * We Define a SimilarityScore to be a function <code>d: [X * X] -&gt; [0, INFINITY)</code>
with the
+ * following properties:
+ * </p>
+ * <ul>
+ *     <li><code>d(x,y) &gt;= 0</code>, non-negativity or separation
axiom</li>
+ *     <li><code>d(x,y) == d(y,x)</code>, symmetry.</li>
+ * </ul>
+ * <p>
+ * Notice, these are two of the properties that contribute to d being a metric.
+ * </p>
+ *
+ *
+ * <p>
+ * Further, this intended to be BiFunction&lt;CharSequence, CharSequence, R&gt;.
+ * The <code>apply</code> method
+ * accepts a pair of {@link CharSequence} parameters
+ * and returns an <code>R</code> type similarity score. We have ommitted the
explicit
+ * statement of extending BiFunction due to it only being implemented in Java 1.8, and we
+ * wish to maintain Java 1.7 compatibility.
+ * </p>
+ *
+ * @param <R> The type of similarity score unit used by this EditDistance.
+ */
+public interface SimilarityScore<R> {
+
+    /**
+     * Compares two CharSequences.
+     *
+     * @param left the first CharSequence
+     * @param right the second CharSequence
+     * @return the similarity score between two CharSequences
+     */
+    R apply(CharSequence left, CharSequence right);
+
+}


Mime
View raw message