Return-Path: X-Original-To: apmail-commons-commits-archive@minotaur.apache.org Delivered-To: apmail-commons-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 927BA17F4B for ; Thu, 16 Apr 2015 11:25:41 +0000 (UTC) Received: (qmail 79892 invoked by uid 500); 16 Apr 2015 11:25:41 -0000 Delivered-To: apmail-commons-commits-archive@commons.apache.org Received: (qmail 79835 invoked by uid 500); 16 Apr 2015 11:25:41 -0000 Mailing-List: contact commits-help@commons.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@commons.apache.org Delivered-To: mailing list commits@commons.apache.org Received: (qmail 79826 invoked by uid 99); 16 Apr 2015 11:25:41 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 16 Apr 2015 11:25:41 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 0C6D1E103A; Thu, 16 Apr 2015 11:25:41 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: kinow@apache.org To: commits@commons.apache.org Message-Id: <310c2d4b79124d78b77f335fbebe6eb1@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [text] SANDBOX-488 Use an interface for EditDistance, and leave separate classes as utility objects Date: Thu, 16 Apr 2015 11:25:41 +0000 (UTC) Repository: commons-text Updated Branches: refs/heads/master 67ab6de8a -> b0b9d358c SANDBOX-488 Use an interface for EditDistance, and leave separate classes as utility objects Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/b0b9d358 Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/b0b9d358 Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/b0b9d358 Branch: refs/heads/master Commit: b0b9d358cc44e8e52d0675abf49eaefac003a112 Parents: 67ab6de Author: Bruno P. Kinoshita Authored: Wed Apr 15 21:52:30 2015 +1200 Committer: Bruno P. Kinoshita Committed: Wed Apr 15 21:52:30 2015 +1200 ---------------------------------------------------------------------- .../commons/text/similarity/CosineDistance.java | 4 +- .../text/similarity/CosineSimilarity.java | 2 +- .../commons/text/similarity/EditDistance.java | 48 ++++++++ .../text/similarity/EditDistanceFrom.java | 112 +++++++++++++++++++ .../commons/text/similarity/FuzzyScore.java | 23 ++-- .../text/similarity/HammingDistance.java | 4 +- .../text/similarity/JaroWrinklerDistance.java | 8 +- .../text/similarity/LevenshteinDistance.java | 4 +- .../commons/text/similarity/StringMetric.java | 47 -------- .../text/similarity/StringMetricFrom.java | 111 ------------------ .../commons/text/similarity/FuzzyScoreTest.java | 20 ++-- .../ParameterizedEditDistanceFromTest.java | 92 +++++++++++++++ .../ParameterizedStringMetricFromTest.java | 92 --------------- .../text/similarity/StringMetricFromTest.java | 14 +-- 14 files changed, 295 insertions(+), 286 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-text/blob/b0b9d358/src/main/java/org/apache/commons/text/similarity/CosineDistance.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/similarity/CosineDistance.java b/src/main/java/org/apache/commons/text/similarity/CosineDistance.java index f9fcf39..98ef49e 100644 --- a/src/main/java/org/apache/commons/text/similarity/CosineDistance.java +++ b/src/main/java/org/apache/commons/text/similarity/CosineDistance.java @@ -28,9 +28,9 @@ import org.apache.commons.text.similarity.internal.Tokenizer; *

It utilizes the CosineSimilarity to compute the distance. Character sequences * are converted into vectors through a simple tokenizer that works with

* - * @since 0.1 + * @since 1.0 */ -public class CosineDistance implements StringMetric { +public class CosineDistance implements EditDistance { /** * Tokenizer used to convert the character sequence into a vector. */ http://git-wip-us.apache.org/repos/asf/commons-text/blob/b0b9d358/src/main/java/org/apache/commons/text/similarity/CosineSimilarity.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/similarity/CosineSimilarity.java b/src/main/java/org/apache/commons/text/similarity/CosineSimilarity.java index 4b29a04..cf21186 100644 --- a/src/main/java/org/apache/commons/text/similarity/CosineSimilarity.java +++ b/src/main/java/org/apache/commons/text/similarity/CosineSimilarity.java @@ -29,7 +29,7 @@ import java.util.Set; * http://en.wikipedia.org/wiki/Cosine_similarity. *

* - * @since 0.1 + * @since 1.0 */ public class CosineSimilarity { http://git-wip-us.apache.org/repos/asf/commons-text/blob/b0b9d358/src/main/java/org/apache/commons/text/similarity/EditDistance.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/similarity/EditDistance.java b/src/main/java/org/apache/commons/text/similarity/EditDistance.java new file mode 100644 index 0000000..824522a --- /dev/null +++ b/src/main/java/org/apache/commons/text/similarity/EditDistance.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.text.similarity; + +/** + * Interface for Edit Distances. + * + *

+ * A edit distance measures the similarity between two character sequences. Closer strings + * have shorter distances, and vice-versa. + *

+ * + *

+ * This is a BiFunction<CharSequence, CharSequence, R>. + * The apply method + * accepts a pair of {@link CharSequence} parameters + * and returns an R type similarity score. + *

+ * + * @param The type of similarity score unit used by this EditDistance. + * @since 1.0 + */ +public interface EditDistance { + + /** + * Compares two CharSequences. + * + * @param left the first CharSequence + * @param right the second CharSequence + * @return the similarity score between two CharSequences + */ + R apply(CharSequence left, CharSequence right); + +} http://git-wip-us.apache.org/repos/asf/commons-text/blob/b0b9d358/src/main/java/org/apache/commons/text/similarity/EditDistanceFrom.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/similarity/EditDistanceFrom.java b/src/main/java/org/apache/commons/text/similarity/EditDistanceFrom.java new file mode 100644 index 0000000..710eace --- /dev/null +++ b/src/main/java/org/apache/commons/text/similarity/EditDistanceFrom.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.text.similarity; + +/** + *

+ * This stores a {@link EditDistance} implementation and a {@link CharSequence} "left" string. + * The {@link #apply(CharSequence right)} method accepts the "right" string and invokes the + * comparison function for the pair of strings. + *

+ * + *

+ * The following is an example which finds the most similar string: + *

+ *
+ * EditDistance<Integer> editDistance = new LevenshteinDistance();
+ * String target = "Apache";
+ * EditDistanceFrom<Integer> editDistanceFrom =
+ *     new EditDistanceFrom<Integer>(editDistance, target);
+ * String mostSimilar = null;
+ * Integer shortestDistance = null;
+ *
+ * for (String test : new String[] { "Appaloosa", "a patchy", "apple" }) {
+ *     Integer distance = editDistanceFrom.apply(test);
+ *     if (shortestDistance == null || distance < shortestDistance) {
+ *         shortestDistance = distance;
+ *         mostSimilar = test;
+ *     }
+ * }
+ *
+ * System.out.println("The string most similar to \"" + target + "\" "
+ *     + "is \"" + mostSimilar + "\" because "
+ *     + "its distance is only " + shortestDistance + ".");
+ * 
+ * + * @param This is the type of similarity score used by the EditDistance function. + * @since 1.0 + */ +public class EditDistanceFrom { + + /** + * Edit distance. + */ + private final EditDistance editDistance; + /** + * Left parameter used in distance function. + */ + private final CharSequence left; + + /** + *

This accepts the edit distance implementation and the "left" string.

+ * + * @param editDistance This may not be null. + * @param left This may be null here, + * but the EditDistance#compare(CharSequence left, CharSequence right) + * implementation may not accept nulls. + */ + public EditDistanceFrom(final EditDistance editDistance, final CharSequence left) { + if (editDistance == null) { + throw new IllegalArgumentException("The edit distance may not be null."); + } + + this.editDistance = editDistance; + this.left = left; + } + + /** + *

+ * This compares "left" field against the "right" parameter + * using the "edit distance" implementation. + *

+ * + * @param right the second CharSequence + * @return the similarity score between two CharSequences + */ + public R apply(CharSequence right) { + return editDistance.apply(left, right); + } + + /** + * Gets the left parameter. + * + * @return the left parameter + */ + public CharSequence getLeft() { + return left; + } + + /** + * Gets the edit distance. + * + * @return the edit distance + */ + public EditDistance getEditDistance() { + return editDistance; + } + +} http://git-wip-us.apache.org/repos/asf/commons-text/blob/b0b9d358/src/main/java/org/apache/commons/text/similarity/FuzzyScore.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/similarity/FuzzyScore.java b/src/main/java/org/apache/commons/text/similarity/FuzzyScore.java index 73b282a..32b557a 100644 --- a/src/main/java/org/apache/commons/text/similarity/FuzzyScore.java +++ b/src/main/java/org/apache/commons/text/similarity/FuzzyScore.java @@ -30,8 +30,10 @@ import java.util.Locale; *

* This code has been adapted from Apache Commons Lang 3.3. *

+ * + * @since 1.0 */ -public class FuzzyScore implements StringMetric { +public class FuzzyScore { /** * Locale used to change the case of text. @@ -61,14 +63,14 @@ public class FuzzyScore implements StringMetric { *

* *
-     * score.apply(null, null, null)                                    = IllegalArgumentException
-     * score.apply("", "", Locale.ENGLISH)                              = 0
-     * score.apply("Workshop", "b", Locale.ENGLISH)                     = 0
-     * score.apply("Room", "o", Locale.ENGLISH)                         = 1
-     * score.apply("Workshop", "w", Locale.ENGLISH)                     = 1
-     * score.apply("Workshop", "ws", Locale.ENGLISH)                    = 2
-     * score.apply("Workshop", "wo", Locale.ENGLISH)                    = 4
-     * score.apply("Apache Software Foundation", "asf", Locale.ENGLISH) = 3
+     * score.fuzzyScore(null, null, null)                                    = IllegalArgumentException
+     * score.fuzzyScore("", "", Locale.ENGLISH)                              = 0
+     * score.fuzzyScore("Workshop", "b", Locale.ENGLISH)                     = 0
+     * score.fuzzyScore("Room", "o", Locale.ENGLISH)                         = 1
+     * score.fuzzyScore("Workshop", "w", Locale.ENGLISH)                     = 1
+     * score.fuzzyScore("Workshop", "ws", Locale.ENGLISH)                    = 2
+     * score.fuzzyScore("Workshop", "wo", Locale.ENGLISH)                    = 4
+     * score.fuzzyScore("Apache Software Foundation", "asf", Locale.ENGLISH) = 3
      * 
* * @param term a full term that should be matched against, must not be null @@ -78,8 +80,7 @@ public class FuzzyScore implements StringMetric { * @throws IllegalArgumentException if either String input {@code null} or * Locale input {@code null} */ - @Override - public Integer apply(CharSequence term, CharSequence query) { + public Integer fuzzyScore(CharSequence term, CharSequence query) { if (term == null || query == null) { throw new IllegalArgumentException("Strings must not be null"); } http://git-wip-us.apache.org/repos/asf/commons-text/blob/b0b9d358/src/main/java/org/apache/commons/text/similarity/HammingDistance.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/similarity/HammingDistance.java b/src/main/java/org/apache/commons/text/similarity/HammingDistance.java index 94d0aad..a62cfa5 100644 --- a/src/main/java/org/apache/commons/text/similarity/HammingDistance.java +++ b/src/main/java/org/apache/commons/text/similarity/HammingDistance.java @@ -24,8 +24,10 @@ package org.apache.commons.text.similarity; * For further explanation about the Hamming Distance, take a look at its * Wikipedia page at http://en.wikipedia.org/wiki/Hamming_distance. *

+ * + * @since 1.0 */ -public class HammingDistance implements StringMetric { +public class HammingDistance implements EditDistance { /** * Find the Hamming Distance between two strings with the same http://git-wip-us.apache.org/repos/asf/commons-text/blob/b0b9d358/src/main/java/org/apache/commons/text/similarity/JaroWrinklerDistance.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/similarity/JaroWrinklerDistance.java b/src/main/java/org/apache/commons/text/similarity/JaroWrinklerDistance.java index b96b83b..df9d6b2 100644 --- a/src/main/java/org/apache/commons/text/similarity/JaroWrinklerDistance.java +++ b/src/main/java/org/apache/commons/text/similarity/JaroWrinklerDistance.java @@ -34,8 +34,10 @@ package org.apache.commons.text.similarity; *

* This code has been adapted from Apache Commons Lang 3.3. *

+ * + * @since 1.0 */ -public class JaroWrinklerDistance implements StringMetric { +public class JaroWrinklerDistance implements EditDistance { /** * The default prefix length limit set to four. @@ -83,8 +85,8 @@ public class JaroWrinklerDistance implements StringMetric { final double jaro = score(left, right); final int cl = commonPrefixLength(left, right); - final double matchScore = Math.round((jaro + (defaultScalingFactor - * cl * (1.0 - jaro))) * percentageRoundValue) / percentageRoundValue; + final double matchScore = Math.round((jaro + defaultScalingFactor + * cl * (1.0 - jaro)) * percentageRoundValue) / percentageRoundValue; return matchScore; } http://git-wip-us.apache.org/repos/asf/commons-text/blob/b0b9d358/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java b/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java index f776cce..d94fa47 100644 --- a/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java +++ b/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java @@ -30,8 +30,10 @@ import java.util.Arrays; *

* This code has been adapted from Apache Commons Lang 3.3. *

+ * + * @since 1.0 */ -public class LevenshteinDistance implements StringMetric { +public class LevenshteinDistance implements EditDistance { /** * Default instance. http://git-wip-us.apache.org/repos/asf/commons-text/blob/b0b9d358/src/main/java/org/apache/commons/text/similarity/StringMetric.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/similarity/StringMetric.java b/src/main/java/org/apache/commons/text/similarity/StringMetric.java deleted file mode 100644 index 2d1adfa..0000000 --- a/src/main/java/org/apache/commons/text/similarity/StringMetric.java +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.commons.text.similarity; - -/** - * Interface for String Metrics. - * - *

- * A string metric measures the similarity between two character sequences. Depending on - * the algorithm, higher values can mean closer strings, or more distant strings. - *

- * - *

- * This is a BiFunction<CharSequence, CharSequence, R>. - * The apply method - * accepts a pair of {@link CharSequence} parameters - * and returns an R type similarity score. - *

- * - * @param The type of similarity score unit used by this StringMetric. - */ -public interface StringMetric { - - /** - * Compares two CharSequences. - * - * @param left the first CharSequence - * @param right the second CharSequence - * @return the similarity score between two CharSequences - */ - R apply(CharSequence left, CharSequence right); - -} http://git-wip-us.apache.org/repos/asf/commons-text/blob/b0b9d358/src/main/java/org/apache/commons/text/similarity/StringMetricFrom.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/similarity/StringMetricFrom.java b/src/main/java/org/apache/commons/text/similarity/StringMetricFrom.java deleted file mode 100644 index 3b2a871..0000000 --- a/src/main/java/org/apache/commons/text/similarity/StringMetricFrom.java +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.commons.text.similarity; - -/** - *

- * This stores a {@link StringMetric} implementation and a {@link CharSequence} "left" string. - * The {@link #apply(CharSequence right)} method accepts the "right" string and invokes the - * comparison function for the pair of strings. - *

- * - *

- * The following is an example which finds the most similar string: - *

- *
- * StringMetric<Integer> metric = new LevenshteinDistance();
- * String target = "Apache";
- * StringMetricFrom<Integer> metricFrom =
- *     new StringMetricFrom<Integer>(metric, target);
- * String mostSimilar = null;
- * Integer shortestDistance = null;
- *
- * for (String test : new String[] { "Appaloosa", "a patchy", "apple" }) {
- *     Integer distance = metricFrom.apply(test);
- *     if (shortestDistance == null || distance < shortestDistance) {
- *         shortestDistance = distance;
- *         mostSimilar = test;
- *     }
- * }
- *
- * System.out.println("The string most similar to \"" + target + "\" "
- *     + "is \"" + mostSimilar + "\" because "
- *     + "its distance is only " + shortestDistance + ".");
- * 
- * - * @param This is the type of similarity score used by the StringMetric function. - */ -public class StringMetricFrom { - - /** - * String metric. - */ - private final StringMetric metric; - /** - * Left parameter used in distance function. - */ - private final CharSequence left; - - /** - *

This accepts the metric implementation and the "left" string.

- * - * @param metric This may not be null. - * @param left This may be null here, - * but the StringMetric#compare(CharSequence left, CharSequence right) - * implementation may not accept nulls. - */ - public StringMetricFrom(final StringMetric metric, final CharSequence left) { - if (metric == null) { - throw new IllegalArgumentException("The metric may not be null."); - } - - this.metric = metric; - this.left = left; - } - - /** - *

- * This compares "left" field against the "right" parameter - * using the "metric" implementation. - *

- * - * @param right the second CharSequence - * @return the similarity score between two CharSequences - */ - public R apply(CharSequence right) { - return metric.apply(left, right); - } - - /** - * Gets the left parameter. - * - * @return the left parameter - */ - public CharSequence getLeft() { - return left; - } - - /** - * Gets the right parameter. - * - * @return the right parameter - */ - public StringMetric getMetric() { - return metric; - } - -} http://git-wip-us.apache.org/repos/asf/commons-text/blob/b0b9d358/src/test/java/org/apache/commons/text/similarity/FuzzyScoreTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/text/similarity/FuzzyScoreTest.java b/src/test/java/org/apache/commons/text/similarity/FuzzyScoreTest.java index 44c2eeb..60bc802 100644 --- a/src/test/java/org/apache/commons/text/similarity/FuzzyScoreTest.java +++ b/src/test/java/org/apache/commons/text/similarity/FuzzyScoreTest.java @@ -31,29 +31,29 @@ public class FuzzyScoreTest { @Test public void testGetFuzzyScore() throws Exception { - assertEquals(0, (int) ENGLISH_SCORE.apply("", "")); - assertEquals(0, (int) ENGLISH_SCORE.apply("Workshop", "b")); - assertEquals(1, (int) ENGLISH_SCORE.apply("Room", "o")); - assertEquals(1, (int) ENGLISH_SCORE.apply("Workshop", "w")); - assertEquals(2, (int) ENGLISH_SCORE.apply("Workshop", "ws")); - assertEquals(4, (int) ENGLISH_SCORE.apply("Workshop", "wo")); - assertEquals(3, (int) ENGLISH_SCORE.apply( + assertEquals(0, (int) ENGLISH_SCORE.fuzzyScore("", "")); + assertEquals(0, (int) ENGLISH_SCORE.fuzzyScore("Workshop", "b")); + assertEquals(1, (int) ENGLISH_SCORE.fuzzyScore("Room", "o")); + assertEquals(1, (int) ENGLISH_SCORE.fuzzyScore("Workshop", "w")); + assertEquals(2, (int) ENGLISH_SCORE.fuzzyScore("Workshop", "ws")); + assertEquals(4, (int) ENGLISH_SCORE.fuzzyScore("Workshop", "wo")); + assertEquals(3, (int) ENGLISH_SCORE.fuzzyScore( "Apache Software Foundation", "asf")); } @Test(expected = IllegalArgumentException.class) public void testGetFuzzyScore_StringNullLocale() throws Exception { - ENGLISH_SCORE.apply("not null", null); + ENGLISH_SCORE.fuzzyScore("not null", null); } @Test(expected = IllegalArgumentException.class) public void testGetFuzzyScore_NullStringLocale() throws Exception { - ENGLISH_SCORE.apply(null, "not null"); + ENGLISH_SCORE.fuzzyScore(null, "not null"); } @Test(expected = IllegalArgumentException.class) public void testGetFuzzyScore_NullNullLocale() throws Exception { - ENGLISH_SCORE.apply(null, null); + ENGLISH_SCORE.fuzzyScore(null, null); } @Test(expected = IllegalArgumentException.class) http://git-wip-us.apache.org/repos/asf/commons-text/blob/b0b9d358/src/test/java/org/apache/commons/text/similarity/ParameterizedEditDistanceFromTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/text/similarity/ParameterizedEditDistanceFromTest.java b/src/test/java/org/apache/commons/text/similarity/ParameterizedEditDistanceFromTest.java new file mode 100644 index 0000000..5a4d6d1 --- /dev/null +++ b/src/test/java/org/apache/commons/text/similarity/ParameterizedEditDistanceFromTest.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.text.similarity; + +import static org.hamcrest.core.IsEqual.equalTo; +import static org.junit.Assert.assertThat; + +import java.util.Arrays; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +/** + * Unit tests for {@link org.apache.commons.text.similarity.EditDistanceFrom}. + * + * @param The {@link EditDistance} return type. + */ +@RunWith(Parameterized.class) +public class ParameterizedEditDistanceFromTest { + + private final EditDistance editDistance; + private final CharSequence left; + private final CharSequence right; + private final R distance; + + public ParameterizedEditDistanceFromTest( + final EditDistance editDistance, + final CharSequence left, final CharSequence right, + final R distance) { + + this.editDistance = editDistance; + this.left = left; + this.right = right; + this.distance = distance; + } + + @Parameters + public static Iterable parameters() { + return Arrays.asList( new Object[][] { + + /* TODO: When SANDBOX-491 is ready, add a few FuzzyScore tests. */ + + { new HammingDistance(), "Sam I am.", "Ham I am.", 1 }, + { new HammingDistance(), "Japtheth, Ham, Shem", "Japtheth, HAM, Shem", 2 }, + { new HammingDistance(), "Hamming", "Hamming", 0 }, + + { new JaroWrinklerDistance(), "elephant", "hippo", 0.44 }, + { new JaroWrinklerDistance(), "hippo", "elephant", 0.44 }, + { new JaroWrinklerDistance(), "hippo", "zzzzzzzz", 0.0 }, + + /* TODO: When SANDBOX-491 is ready, add a few limited/threshold tests. */ + { new LevenshteinDistance(), "Apache", "a patchy", 4 }, + { new LevenshteinDistance(), "go", "no go", 3 }, + { new LevenshteinDistance(), "go", "go", 0 }, + + { + new EditDistance() { + public Boolean apply(CharSequence left, CharSequence right) { + return left == right || (left != null && left.equals(right)); + } + }, + "Bob's your uncle.", + "Every good boy does fine.", + false + } + + } ); + } + + @Test + public void test() { + EditDistanceFrom editDistanceFrom = new EditDistanceFrom(editDistance, left); + assertThat(editDistanceFrom.apply(right), equalTo(distance)); + } + +} http://git-wip-us.apache.org/repos/asf/commons-text/blob/b0b9d358/src/test/java/org/apache/commons/text/similarity/ParameterizedStringMetricFromTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/text/similarity/ParameterizedStringMetricFromTest.java b/src/test/java/org/apache/commons/text/similarity/ParameterizedStringMetricFromTest.java deleted file mode 100644 index 36c03bb..0000000 --- a/src/test/java/org/apache/commons/text/similarity/ParameterizedStringMetricFromTest.java +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.commons.text.similarity; - -import static org.hamcrest.core.IsEqual.equalTo; -import static org.junit.Assert.assertThat; - -import java.util.Arrays; - -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameters; - -/** - * Unit tests for {@link org.apache.commons.text.similarity.StringMetricFrom}. - * - * @param The {@link StringMetric} return type. - */ -@RunWith(Parameterized.class) -public class ParameterizedStringMetricFromTest { - - private final StringMetric metric; - private final CharSequence left; - private final CharSequence right; - private final R distance; - - public ParameterizedStringMetricFromTest( - final StringMetric metric, - final CharSequence left, final CharSequence right, - final R distance) { - - this.metric = metric; - this.left = left; - this.right = right; - this.distance = distance; - } - - @Parameters - public static Iterable parameters() { - return Arrays.asList( new Object[][] { - - /* TODO: When SANDBOX-491 is ready, add a few FuzzyScore tests. */ - - { new HammingDistance(), "Sam I am.", "Ham I am.", 1 }, - { new HammingDistance(), "Japtheth, Ham, Shem", "Japtheth, HAM, Shem", 2 }, - { new HammingDistance(), "Hamming", "Hamming", 0 }, - - { new JaroWrinklerDistance(), "elephant", "hippo", 0.44 }, - { new JaroWrinklerDistance(), "hippo", "elephant", 0.44 }, - { new JaroWrinklerDistance(), "hippo", "zzzzzzzz", 0.0 }, - - /* TODO: When SANDBOX-491 is ready, add a few limited/threshold tests. */ - { new LevenshteinDistance(), "Apache", "a patchy", 4 }, - { new LevenshteinDistance(), "go", "no go", 3 }, - { new LevenshteinDistance(), "go", "go", 0 }, - - { - new StringMetric() { - public Boolean apply(CharSequence left, CharSequence right) { - return left == right || (left != null && left.equals(right)); - } - }, - "Bob's your uncle.", - "Every good boy does fine.", - false - } - - } ); - } - - @Test - public void test() { - StringMetricFrom metricFrom = new StringMetricFrom(metric, left); - assertThat(metricFrom.apply(right), equalTo(distance)); - } - -} http://git-wip-us.apache.org/repos/asf/commons-text/blob/b0b9d358/src/test/java/org/apache/commons/text/similarity/StringMetricFromTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/text/similarity/StringMetricFromTest.java b/src/test/java/org/apache/commons/text/similarity/StringMetricFromTest.java index e268366..de59452 100644 --- a/src/test/java/org/apache/commons/text/similarity/StringMetricFromTest.java +++ b/src/test/java/org/apache/commons/text/similarity/StringMetricFromTest.java @@ -22,17 +22,17 @@ import static org.junit.Assert.assertThat; import org.junit.Test; /** - * Unit tests for {@link org.apache.commons.text.similarity.StringMetricFrom}. + * Unit tests for {@link org.apache.commons.text.similarity.EditDistanceFrom}. */ public class StringMetricFromTest { @Test public void testEquivalence() { - StringMetric metric = new LevenshteinDistance(); + EditDistance metric = new LevenshteinDistance(); String left = "Apache"; String right = "a patchy"; Integer distance = 4; - StringMetricFrom metricFrom = new StringMetricFrom(metric, left); + EditDistanceFrom metricFrom = new EditDistanceFrom(metric, left); assertThat(metricFrom.apply(right), equalTo(distance)); assertThat(metricFrom.apply(right), equalTo(metric.apply(left, right))); @@ -40,10 +40,10 @@ public class StringMetricFromTest { @Test public void testJavadocExample() { - StringMetric metric = new LevenshteinDistance(); + EditDistance metric = new LevenshteinDistance(); String target = "Apache"; - StringMetricFrom metricFrom = - new StringMetricFrom(metric, target); + EditDistanceFrom metricFrom = + new EditDistanceFrom(metric, target); String mostSimilar = null; Integer shortestDistance = null; @@ -65,7 +65,7 @@ public class StringMetricFromTest { @Test(expected = IllegalArgumentException.class) public void testMissingMetric() { - new StringMetricFrom(null, "no go"); + new EditDistanceFrom(null, "no go"); } }