commons-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From chtom...@apache.org
Subject [3/8] [text] TEXT-32: LCS distance calculation
Date Fri, 23 Dec 2016 14:11:59 GMT
TEXT-32: LCS distance calculation


Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/288781ef
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/288781ef
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/288781ef

Branch: refs/heads/master
Commit: 288781eff3725fd55c1ddece95be9f22fddfd324
Parents: 46bb20a
Author: Rob Tompkins <chtompki@gmail.com>
Authored: Thu Dec 22 15:55:27 2016 -0500
Committer: Rob Tompkins <chtompki@gmail.com>
Committed: Thu Dec 22 15:55:27 2016 -0500

----------------------------------------------------------------------
 .../LongestCommonSubsequenceDistance.java       | 61 ++++++++++++++++++
 .../LongestCommonSubsequenceDistanceTest.java   | 68 ++++++++++++++++++++
 .../LongestCommonSubsequenceTest.java           | 26 ++++++--
 3 files changed, 150 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-text/blob/288781ef/src/main/java/org/apache/commons/text/similarity/LongestCommonSubsequenceDistance.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/similarity/LongestCommonSubsequenceDistance.java
b/src/main/java/org/apache/commons/text/similarity/LongestCommonSubsequenceDistance.java
new file mode 100644
index 0000000..bac02de
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/similarity/LongestCommonSubsequenceDistance.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.similarity;
+
+/**
+ * An edit distance algorithm based on the length of the longest common subsequence between
two strings.
+ *
+ * <p>
+ * This code is directly based upon the implementation in {@link LongestCommonSubsequence}.
+ * </p>
+ *
+ * <p>
+ * For reference see: <a href="https://en.wikipedia.org/wiki/Longest_common_subsequence_problem">
+ * https://en.wikipedia.org/wiki/Longest_common_subsequence_problem</a>.
+ * </p>
+ *
+ * <p>For further reading see:</p>
+ *
+ * <p>Lothaire, M. <i>Applied combinatorics on words</i>. New York: Cambridge
U Press, 2005. <b>12-13</b></p>
+ *
+ * @since 1.0
+ */
+public class LongestCommonSubsequenceDistance implements EditDistance<Integer> {
+
+    private final LongestCommonSubsequence longestCommonSubsequence = new LongestCommonSubsequence();
+
+    /**
+     * Calculates an edit distance between two <code>CharSequence</code>'s <code>left</code>
and
+     * <code>right</code> as: <code>left.length() + right.length() - 2
* LCS(left, right)</code>, where
+     * <code>LCS</code> is given in {@link LongestCommonSubsequence#apply(CharSequence,
CharSequence)}.
+     *
+     * @param left first character sequence
+     * @param right second character sequence
+     * @return distance
+     * @throws IllegalArgumentException
+     *             if either String input {@code null}
+     */
+    @Override
+    public Integer apply(final CharSequence left, final CharSequence right) {
+        // Quick return for invalid inputs
+        if (left == null || right == null) {
+            throw new IllegalArgumentException("Inputs must not be null");
+        }
+        return left.length() + right.length() - 2 * longestCommonSubsequence.apply(left,
right);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/288781ef/src/test/java/org/apache/commons/text/similarity/LongestCommonSubsequenceDistanceTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/similarity/LongestCommonSubsequenceDistanceTest.java
b/src/test/java/org/apache/commons/text/similarity/LongestCommonSubsequenceDistanceTest.java
new file mode 100644
index 0000000..a107b47
--- /dev/null
+++ b/src/test/java/org/apache/commons/text/similarity/LongestCommonSubsequenceDistanceTest.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.similarity;
+
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+/**
+ * Unit tests for {@link org.apache.commons.text.similarity.LongestCommonSubsequenceDistance}.
+ */
+public class LongestCommonSubsequenceDistanceTest {
+
+    private static LongestCommonSubsequenceDistance subject;
+    
+    @BeforeClass
+    public static void setup() {
+        subject = new LongestCommonSubsequenceDistance();
+    }
+
+    @Test
+    public void testGettingLogestCommonSubsequenceDistacne() {
+        assertEquals(Integer.valueOf(0), subject.apply("", ""));
+        assertEquals(Integer.valueOf(4), subject.apply("left", ""));
+        assertEquals(Integer.valueOf(5), subject.apply("", "right"));
+        assertEquals(Integer.valueOf(1), subject.apply("frog", "fog"));
+        assertEquals(Integer.valueOf(6), subject.apply("fly", "ant"));
+        assertEquals(Integer.valueOf(11), subject.apply("elephant", "hippo"));
+        assertEquals(Integer.valueOf(7), subject.apply("ABC Corporation", "ABC Corp"));
+        assertEquals(Integer.valueOf(4), subject.apply("D N H Enterprises Inc", "D &
H Enterprises, Inc."));
+        assertEquals(Integer.valueOf(9), subject.apply("My Gym Children's Fitness Center",
"My Gym. Childrens Fitness"));
+        assertEquals(Integer.valueOf(3), subject.apply("PENNSYLVANIA", "PENNCISYLVNIA"));
+        assertEquals(Integer.valueOf(7), subject.apply("left", "right"));
+        assertEquals(Integer.valueOf(9), subject.apply("leettteft", "ritttght"));
+        assertEquals(Integer.valueOf(0), subject.apply("the same string", "the same string"));
+    }
+
+    @Test(expected = IllegalArgumentException.class)
+    public void testGettingLongestCommonSubsequenceDistanceNullNull() throws Exception {
+        subject.apply(null, null);
+    }
+
+    @Test(expected = IllegalArgumentException.class)
+    public void testGettingLongestCommonSubsequenceDistanceStringNull() throws Exception
{
+        subject.apply(" ", null);
+    }
+
+    @Test(expected = IllegalArgumentException.class)
+    public void testGettingLongestCommonSubsequenceDistanceNullString() throws Exception
{
+        subject.apply(null, "right");
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/288781ef/src/test/java/org/apache/commons/text/similarity/LongestCommonSubsequenceTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/similarity/LongestCommonSubsequenceTest.java
b/src/test/java/org/apache/commons/text/similarity/LongestCommonSubsequenceTest.java
index e852884..c5f43ba 100644
--- a/src/test/java/org/apache/commons/text/similarity/LongestCommonSubsequenceTest.java
+++ b/src/test/java/org/apache/commons/text/similarity/LongestCommonSubsequenceTest.java
@@ -1,19 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 package org.apache.commons.text.similarity;
 
-import org.junit.Before;
+import org.junit.BeforeClass;
 import org.junit.Test;
 
 import static org.junit.Assert.assertEquals;
 
 /**
- * Created by tompkicr on 12/20/16.
+ * Unit tests for {@link org.apache.commons.text.similarity.LongestCommonSubsequence}.
  */
 public class LongestCommonSubsequenceTest {
 
-    private LongestCommonSubsequence subject;
+    private static LongestCommonSubsequence subject;
 
-    @Before
-    public void setup() {
+    @BeforeClass
+    public static void setup() {
         subject = new LongestCommonSubsequence();
     }
 


Mime
View raw message