commons-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From chtom...@apache.org
Subject [1/8] [text] TEXT-32: finishing LCS similarity
Date Fri, 23 Dec 2016 14:11:57 GMT
Repository: commons-text
Updated Branches:
  refs/heads/master 4c29a73b0 -> 8bda46197


TEXT-32: finishing LCS similarity


Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/46bb20ab
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/46bb20ab
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/46bb20ab

Branch: refs/heads/master
Commit: 46bb20abc4bd2dd2afbf92eb5005158a281f0ba7
Parents: 676a1a7
Author: Rob Tompkins <chtompki@gmail.com>
Authored: Thu Dec 22 15:30:49 2016 -0500
Committer: Rob Tompkins <chtompki@gmail.com>
Committed: Thu Dec 22 15:34:14 2016 -0500

----------------------------------------------------------------------
 .../similarity/LongestCommonSubsequence.java    | 48 ++++-------
 .../LongestCommonSubsequenceTest.java           | 89 +++++++++++++++-----
 2 files changed, 83 insertions(+), 54 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-text/blob/46bb20ab/src/main/java/org/apache/commons/text/similarity/LongestCommonSubsequence.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/similarity/LongestCommonSubsequence.java
b/src/main/java/org/apache/commons/text/similarity/LongestCommonSubsequence.java
index 4e809c2..fd2b5bb 100644
--- a/src/main/java/org/apache/commons/text/similarity/LongestCommonSubsequence.java
+++ b/src/main/java/org/apache/commons/text/similarity/LongestCommonSubsequence.java
@@ -42,12 +42,23 @@ package org.apache.commons.text.similarity;
  */
 public class LongestCommonSubsequence implements SimilarityScore<Integer> {
 
+    /**
+     * Calculates longestCommonSubsequence similarity scoreof two set character sequence
passed as
+     * input.
+     *
+     * @param left first character sequence
+     * @param right second character sequence
+     * @return longestCommonSubsequenceLength
+     * @throws IllegalArgumentException
+     *             if either String input {@code null}
+     */
+    @Override
     public Integer apply(final CharSequence left, final CharSequence right) {
         // Quick return for invalid inputs
         if (left == null || right == null) {
             throw new IllegalArgumentException("Inputs must not be null");
         }
-        return longestCommonSubstringLengthArray(left,right)[left.length()][right.length()];
+        return logestCommonSubsequence(left, right).length();
     }
 
     /**
@@ -70,6 +81,8 @@ public class LongestCommonSubsequence implements SimilarityScore<Integer>
{
      * @param left
      * @param right
      * @return lcsLengthArray
+     * @throws IllegalArgumentException
+     *             if either String input {@code null}
      */
     public CharSequence logestCommonSubsequence(final CharSequence left, final CharSequence
right) {
         // Quick return
@@ -93,7 +106,7 @@ public class LongestCommonSubsequence implements SimilarityScore<Integer>
{
                 j = j - 1;
             }
         }
-        return longestCommonSubstringArray;
+        return longestCommonSubstringArray.reverse().toString();
     }
 
     /**
@@ -126,35 +139,4 @@ public class LongestCommonSubsequence implements SimilarityScore<Integer>
{
         return lcsLengthArray;
     }
 
-
-    public int wikiLcs(CharSequence left, CharSequence right) {
-        int m = left.length();
-        int n = right.length();
-        char[] x = left.toString().toCharArray();
-        char[] y = right.toString().toCharArray();
-
-        int[][] c = new int[m + 1][n + 1];
-
-        for (int i = 0; i <= m; i++) {
-            c[i][0] = 0;
-        }
-
-        for (int j = 0; j <= n; j++) {
-            c[0][j] = 0;
-        }
-
-        for (int i = 1; i <= m; i++) {
-            for (int j = 1; j <= n; j++) {
-                if (x[i - 1] == y[j - 1]) {
-                    c[i][j] = c[i - 1][j - 1] + 1;
-
-                } else {
-                    c[i][j] = Math.max(c[i][j - 1], c[i - 1][j]);
-                }
-            }
-        }
-
-        return c[m][n];
-    }
-
 }

http://git-wip-us.apache.org/repos/asf/commons-text/blob/46bb20ab/src/test/java/org/apache/commons/text/similarity/LongestCommonSubsequenceTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/similarity/LongestCommonSubsequenceTest.java
b/src/test/java/org/apache/commons/text/similarity/LongestCommonSubsequenceTest.java
index 9c50412..e852884 100644
--- a/src/test/java/org/apache/commons/text/similarity/LongestCommonSubsequenceTest.java
+++ b/src/test/java/org/apache/commons/text/similarity/LongestCommonSubsequenceTest.java
@@ -1,36 +1,83 @@
 package org.apache.commons.text.similarity;
 
+import org.junit.Before;
 import org.junit.Test;
 
+import static org.junit.Assert.assertEquals;
+
 /**
  * Created by tompkicr on 12/20/16.
  */
 public class LongestCommonSubsequenceTest {
 
+    private LongestCommonSubsequence subject;
+
+    @Before
+    public void setup() {
+        subject = new LongestCommonSubsequence();
+    }
+
     @Test
-    public void testLongestCommonSubsequence() {
-        LongestCommonSubsequence lcs = new LongestCommonSubsequence();
-        CharSequence retVal = lcs.logestCommonSubsequence("abab","abba");
-        System.out.println("testLongestCommonSubstring:");
-        System.out.println("retVal: " + retVal);
-        for (int n=0; n < retVal.length(); n++){
-            System.out.print("isISOControl: " + Character.isISOControl(retVal.charAt(n))
+ " ");
-            System.out.println(retVal.charAt(n));
-        }
+    public void testLongestCommonSubsequenceApply() {
+        assertEquals(Integer.valueOf(0), subject.apply("", ""));
+        assertEquals(Integer.valueOf(0), subject.apply("left", ""));
+        assertEquals(Integer.valueOf(0), subject.apply("", "right"));
+        assertEquals(Integer.valueOf(3), subject.apply("frog", "fog"));
+        assertEquals(Integer.valueOf(0), subject.apply("fly", "ant"));
+        assertEquals(Integer.valueOf(1), subject.apply("elephant", "hippo"));
+        assertEquals(Integer.valueOf(8), subject.apply("ABC Corporation", "ABC Corp"));
+        assertEquals(Integer.valueOf(20), subject.apply("D N H Enterprises Inc", "D &
H Enterprises, Inc."));
+        assertEquals(Integer.valueOf(24), subject.apply("My Gym Children's Fitness Center",
"My Gym. Childrens Fitness"));
+        assertEquals(Integer.valueOf(11), subject.apply("PENNSYLVANIA", "PENNCISYLVNIA"));
+        assertEquals(Integer.valueOf(1), subject.apply("left", "right"));
+        assertEquals(Integer.valueOf(4), subject.apply("leettteft", "ritttght"));
+        assertEquals(Integer.valueOf(15), subject.apply("the same string", "the same string"));
+    }
+
+    @Test(expected = IllegalArgumentException.class)
+    public void testGettingLongestCommonSubsequenceApplyNullNull() throws Exception {
+        subject.apply(null, null);
+    }
+
+    @Test(expected = IllegalArgumentException.class)
+    public void testGettingLongestCommonSubsequenceApplyStringNull() throws Exception {
+        subject.apply(" ", null);
+    }
+
+    @Test(expected = IllegalArgumentException.class)
+    public void testGettingLongestCommonSubsequenceApplyNullString() throws Exception {
+        subject.apply(null, "right");
     }
 
     @Test
-    public void testLcsLengthArray() {
-        LongestCommonSubsequence lcs = new LongestCommonSubsequence();
-        String left = "abab";
-        String right = "abba";
-        int[][] retVal = lcs.longestCommonSubstringLengthArray(left, right);
-        System.out.println("testLcsLengthArray:");
-        for (int i=0; i <= left.length(); i++){
-            for (int j=0; j <= right.length(); j++){
-                System.out.print(retVal[i][j] + " ");
-            }
-            System.out.print("\n");
-        }
+    public void testLongestCommonSubsequence() {
+        assertEquals("", subject.logestCommonSubsequence("", ""));
+        assertEquals("", subject.logestCommonSubsequence("left", ""));
+        assertEquals("", subject.logestCommonSubsequence("", "right"));
+        assertEquals("fog", subject.logestCommonSubsequence("frog", "fog"));
+        assertEquals("", subject.logestCommonSubsequence("fly", "ant"));
+        assertEquals("h", subject.logestCommonSubsequence("elephant", "hippo"));
+        assertEquals("ABC Corp", subject.logestCommonSubsequence("ABC Corporation", "ABC
Corp"));
+        assertEquals("D  H Enterprises Inc", subject.logestCommonSubsequence("D N H Enterprises
Inc", "D & H Enterprises, Inc."));
+        assertEquals("My Gym Childrens Fitness", subject.logestCommonSubsequence("My Gym
Children's Fitness Center", "My Gym. Childrens Fitness"));
+        assertEquals("PENNSYLVNIA", subject.logestCommonSubsequence("PENNSYLVANIA", "PENNCISYLVNIA"));
+        assertEquals("t", subject.logestCommonSubsequence("left", "right"));
+        assertEquals("tttt", subject.logestCommonSubsequence("leettteft", "ritttght"));
+        assertEquals("the same string", subject.logestCommonSubsequence("the same string",
"the same string"));
+    }
+
+    @Test(expected = IllegalArgumentException.class)
+    public void testGettingLongestCommonSubsequenceNullNull() throws Exception {
+        subject.logestCommonSubsequence(null, null);
+    }
+
+    @Test(expected = IllegalArgumentException.class)
+    public void testGettingLongestCommonSubsequenceStringNull() throws Exception {
+        subject.logestCommonSubsequence(" ", null);
+    }
+
+    @Test(expected = IllegalArgumentException.class)
+    public void testGettingLongestCommonSubsequenceNullString() throws Exception {
+        subject.logestCommonSubsequence(null, "right");
     }
 }


Mime
View raw message