commons-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From aherb...@apache.org
Subject [commons-text] 02/03: TEXT-157: Use expected=(intersect/union) in Jaccard tests
Date Fri, 08 Mar 2019 16:12:35 GMT
This is an automated email from the ASF dual-hosted git repository.

aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-text.git

commit eacfa3672ad6bb3a65c9268f9cd3c8b36be12149
Author: aherbert <a.herbert@sussex.ac.uk>
AuthorDate: Fri Mar 8 12:32:16 2019 +0000

    TEXT-157: Use expected=(intersect/union) in Jaccard tests
---
 .../commons/text/similarity/JaccardDistanceTest.java  | 19 +++++++++----------
 .../text/similarity/JaccardSimilarityTest.java        | 19 +++++++++----------
 2 files changed, 18 insertions(+), 20 deletions(-)

diff --git a/src/test/java/org/apache/commons/text/similarity/JaccardDistanceTest.java b/src/test/java/org/apache/commons/text/similarity/JaccardDistanceTest.java
index 595c83b..979354f 100644
--- a/src/test/java/org/apache/commons/text/similarity/JaccardDistanceTest.java
+++ b/src/test/java/org/apache/commons/text/similarity/JaccardDistanceTest.java
@@ -36,22 +36,21 @@ public class JaccardDistanceTest {
 
     @Test
     public void testGettingJaccardDistance() {
-        // Results generated using the python distance library using:
-        // distance.jaccard(seq1, seq2)
+        // Expected Jaccard distance = 1.0 - (intersect / union)
         assertEquals(1.0, classBeingTested.apply("", ""));
         assertEquals(1.0, classBeingTested.apply("left", ""));
         assertEquals(1.0, classBeingTested.apply("", "right"));
-        assertEquals(0.25, classBeingTested.apply("frog", "fog"));
+        assertEquals(1.0 - (3.0 / 4), classBeingTested.apply("frog", "fog"));
         assertEquals(1.0, classBeingTested.apply("fly", "ant"));
-        assertEquals(0.7777777777777778, classBeingTested.apply("elephant", "hippo"));
-        assertEquals(0.36363636363636365, classBeingTested.apply("ABC Corporation", "ABC
Corp"));
-        assertEquals(0.23529411764705888,
+        assertEquals(1.0 - (2.0 / 9), classBeingTested.apply("elephant", "hippo"));
+        assertEquals(1.0 - (7.0 / 11), classBeingTested.apply("ABC Corporation", "ABC Corp"));
+        assertEquals(1.0 - (13.0 / 17),
                 classBeingTested.apply("D N H Enterprises Inc", "D & H Enterprises, Inc."));
-        assertEquals(0.11111111111111116,
+        assertEquals(1.0 - (16.0 / 18),
                 classBeingTested.apply("My Gym Children's Fitness Center", "My Gym. Childrens
Fitness"));
-        assertEquals(0.09999999999999998, classBeingTested.apply("PENNSYLVANIA", "PENNCISYLVNIA"));
-        assertEquals(0.875, classBeingTested.apply("left", "right"));
-        assertEquals(0.875, classBeingTested.apply("leettteft", "ritttght"));
+        assertEquals(1.0 - (9.0 / 10), classBeingTested.apply("PENNSYLVANIA", "PENNCISYLVNIA"));
+        assertEquals(1.0 - (1.0 / 8), classBeingTested.apply("left", "right"));
+        assertEquals(1.0 - (1.0 / 8), classBeingTested.apply("leettteft", "ritttght"));
         assertEquals(0.0, classBeingTested.apply("the same string", "the same string"));
     }
 
diff --git a/src/test/java/org/apache/commons/text/similarity/JaccardSimilarityTest.java b/src/test/java/org/apache/commons/text/similarity/JaccardSimilarityTest.java
index 96e0908..bb46122 100644
--- a/src/test/java/org/apache/commons/text/similarity/JaccardSimilarityTest.java
+++ b/src/test/java/org/apache/commons/text/similarity/JaccardSimilarityTest.java
@@ -36,22 +36,21 @@ public class JaccardSimilarityTest {
 
     @Test
     public void testGettingJaccardSimilarity() {
-        // Results generated using the python distance library using:
-        // 1 - distance.jaccard(seq1, seq2)
+        // Expected Jaccard similarity = (intersect / union)
         assertEquals(0.0, classBeingTested.apply("", ""));
         assertEquals(0.0, classBeingTested.apply("left", ""));
         assertEquals(0.0, classBeingTested.apply("", "right"));
-        assertEquals(0.75, classBeingTested.apply("frog", "fog"));
+        assertEquals(3.0 / 4, classBeingTested.apply("frog", "fog"));
         assertEquals(0.0, classBeingTested.apply("fly", "ant"));
-        assertEquals(0.2222222222222222, classBeingTested.apply("elephant", "hippo"));
-        assertEquals(0.6363636363636364, classBeingTested.apply("ABC Corporation", "ABC Corp"));
-        assertEquals(0.7647058823529411,
+        assertEquals(2.0 / 9, classBeingTested.apply("elephant", "hippo"));
+        assertEquals(7.0 / 11, classBeingTested.apply("ABC Corporation", "ABC Corp"));
+        assertEquals(13.0 / 17,
                 classBeingTested.apply("D N H Enterprises Inc", "D & H Enterprises, Inc."));
-        assertEquals(0.8888888888888888,
+        assertEquals(16.0 / 18,
                 classBeingTested.apply("My Gym Children's Fitness Center", "My Gym. Childrens
Fitness"));
-        assertEquals(0.9, classBeingTested.apply("PENNSYLVANIA", "PENNCISYLVNIA"));
-        assertEquals(0.125, classBeingTested.apply("left", "right"));
-        assertEquals(0.125, classBeingTested.apply("leettteft", "ritttght"));
+        assertEquals(9.0 / 10, classBeingTested.apply("PENNSYLVANIA", "PENNCISYLVNIA"));
+        assertEquals(1.0 / 8, classBeingTested.apply("left", "right"));
+        assertEquals(1.0 / 8, classBeingTested.apply("leettteft", "ritttght"));
         assertEquals(1.0, classBeingTested.apply("the same string", "the same string"));
     }
 


Mime
View raw message