commons-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From chtom...@apache.org
Subject [3/4] [text] TEXT-21: Adding "SimilarityScoreFrom", fixing unit tests
Date Thu, 17 Nov 2016 12:14:14 GMT
TEXT-21: Adding "SimilarityScoreFrom", fixing unit tests


Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/7909652f
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/7909652f
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/7909652f

Branch: refs/heads/master
Commit: 7909652f51344c1206699518ae4cd510b8532765
Parents: 7af39fb
Author: Rob Tompkins <chtompki@gmail.com>
Authored: Wed Nov 16 13:52:47 2016 -0500
Committer: Rob Tompkins <chtompki@gmail.com>
Committed: Wed Nov 16 13:52:47 2016 -0500

----------------------------------------------------------------------
 .../text/similarity/SimilarityScoreFrom.java    | 111 +++++++++++++++++++
 .../ParameterizedEditDistanceFromTest.java      |   4 -
 .../ParameterizedSimilarityScoreFromTest.java   |  64 +++++++++++
 3 files changed, 175 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-text/blob/7909652f/src/main/java/org/apache/commons/text/similarity/SimilarityScoreFrom.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/similarity/SimilarityScoreFrom.java b/src/main/java/org/apache/commons/text/similarity/SimilarityScoreFrom.java
new file mode 100644
index 0000000..475e0ed
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/similarity/SimilarityScoreFrom.java
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.similarity;
+
+/**
+ * <p>
+ * This stores a {@link SimilarityScore} implementation and a {@link CharSequence} "left"
string.
+ * The {@link #apply(CharSequence right)} method accepts the "right" string and invokes the
+ * comparison function for the pair of strings.
+ * </p>
+ *
+ * <p>
+ * The following is an example which finds the most similar string:
+ * </p>
+ * <pre>
+ * SimilarityScore&lt;Integer&gt; similarityScore = new LevenshteinDistance();
+ * String target = "Apache";
+ * SimilarityScoreFrom&lt;Integer&gt; similarityScoreFrom =
+ *     new SimilarityScoreFrom&lt;Integer&gt;(similarityScore, target);
+ * String mostSimilar = null;
+ * Integer shortestDistance = null;
+ *
+ * for (String test : new String[] { "Appaloosa", "a patchy", "apple" }) {
+ *     Integer distance = similarityScoreFrom.apply(test);
+ *     if (shortestDistance == null || distance &lt; shortestDistance) {
+ *         shortestDistance = distance;
+ *         mostSimilar = test;
+ *     }
+ * }
+ *
+ * System.out.println("The string most similar to \"" + target + "\" "
+ *     + "is \"" + mostSimilar + "\" because "
+ *     + "its distance is only " + shortestDistance + ".");
+ * </pre>
+ *
+ * @param <R> This is the type of similarity score used by the SimilarityScore function.
+ */
+public class SimilarityScoreFrom<R> {
+
+    /**
+     * Similarity score.
+     */
+    private final SimilarityScore<R> similarityScore;
+    /**
+     * Left parameter used in distance function.
+     */
+    private final CharSequence left;
+
+    /**
+     * <p>This accepts the similarity score implementation and the "left" string.</p>
+     *
+     * @param similarityScore This may not be null.
+     * @param left This may be null here,
+     *             but the SimilarityScore#compare(CharSequence left, CharSequence right)
+     *             implementation may not accept nulls.
+     */
+    public SimilarityScoreFrom(final SimilarityScore<R> similarityScore, final CharSequence
left) {
+        if (similarityScore == null) {
+            throw new IllegalArgumentException("The edit distance may not be null.");
+        }
+
+        this.similarityScore = similarityScore;
+        this.left = left;
+    }
+
+    /**
+     * <p>
+     * This compares "left" field against the "right" parameter
+     * using the "similarity score" implementation.
+     * </p>
+     *
+     * @param right the second CharSequence
+     * @return the similarity score between two CharSequences
+     */
+    public R apply(CharSequence right) {
+        return similarityScore.apply(left, right);
+    }
+
+    /**
+     * Gets the left parameter.
+     *
+     * @return the left parameter
+     */
+    public CharSequence getLeft() {
+        return left;
+    }
+
+    /**
+     * Gets the edit distance.
+     *
+     * @return the edit distance
+     */
+    public SimilarityScore<R> getSimilarityScore() {
+        return similarityScore;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/7909652f/src/test/java/org/apache/commons/text/similarity/ParameterizedEditDistanceFromTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/similarity/ParameterizedEditDistanceFromTest.java
b/src/test/java/org/apache/commons/text/similarity/ParameterizedEditDistanceFromTest.java
index 1068749..891be27 100644
--- a/src/test/java/org/apache/commons/text/similarity/ParameterizedEditDistanceFromTest.java
+++ b/src/test/java/org/apache/commons/text/similarity/ParameterizedEditDistanceFromTest.java
@@ -58,10 +58,6 @@ public class ParameterizedEditDistanceFromTest<R> {
             { new HammingDistance(), "Japtheth, Ham, Shem", "Japtheth, HAM, Shem", 2 },
             { new HammingDistance(), "Hamming", "Hamming", 0 },
 
-            { new JaroWinklerDistance(), "elephant", "hippo", 0.44 },
-            { new JaroWinklerDistance(), "hippo", "elephant",  0.44 },
-            { new JaroWinklerDistance(), "hippo", "zzzzzzzz", 0.0 },
-
             { new LevenshteinDistance(), "Apache", "a patchy", 4 },
             { new LevenshteinDistance(), "go", "no go", 3 },
             { new LevenshteinDistance(), "go", "go", 0 },

http://git-wip-us.apache.org/repos/asf/commons-text/blob/7909652f/src/test/java/org/apache/commons/text/similarity/ParameterizedSimilarityScoreFromTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/similarity/ParameterizedSimilarityScoreFromTest.java
b/src/test/java/org/apache/commons/text/similarity/ParameterizedSimilarityScoreFromTest.java
new file mode 100644
index 0000000..a5fc915
--- /dev/null
+++ b/src/test/java/org/apache/commons/text/similarity/ParameterizedSimilarityScoreFromTest.java
@@ -0,0 +1,64 @@
+package org.apache.commons.text.similarity;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+import java.util.Arrays;
+
+import static org.hamcrest.core.IsEqual.equalTo;
+import static org.junit.Assert.assertThat;
+
+/**
+ * Unit tests for {@link org.apache.commons.text.similarity.SimilarityScoreFrom}.
+ *
+ * @param <R> The {@link SimilarityScore} return type.
+ */
+@RunWith(Parameterized.class)
+public class ParameterizedSimilarityScoreFromTest<R> {
+
+    private final SimilarityScore<R> similarityScore;
+    private final CharSequence left;
+    private final CharSequence right;
+    private final R distance;
+
+    public ParameterizedSimilarityScoreFromTest(
+            final SimilarityScore<R> similarityScore,
+            final CharSequence left, final CharSequence right,
+            final R distance) {
+
+        this.similarityScore = similarityScore;
+        this.left = left;
+        this.right = right;
+        this.distance = distance;
+    }
+
+    @Parameters
+    public static Iterable<Object[]> parameters() {
+        return Arrays.asList( new Object[][] {
+
+                { new JaroWinklerDistance(), "elephant", "hippo", 0.44 },
+                { new JaroWinklerDistance(), "hippo", "elephant",  0.44 },
+                { new JaroWinklerDistance(), "hippo", "zzzzzzzz", 0.0 },
+
+                {
+                        new SimilarityScore<Boolean>() {
+                            public Boolean apply(CharSequence left, CharSequence right) {
+                                return left == right || (left != null && left.equals(right));
+                            }
+                        },
+                        "Bob's your uncle.",
+                        "Every good boy does fine.",
+                        false
+                }
+
+        } );
+    }
+
+    @Test
+    public void test() {
+        SimilarityScoreFrom<R> similarityScoreFrom = new SimilarityScoreFrom<R>(similarityScore,
left);
+        assertThat(similarityScoreFrom.apply(right), equalTo(distance));
+    }
+}


Mime
View raw message