commons-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From pste...@apache.org
Subject [math] Fixed error in computing discrete distribution of D statistics for small-sample 2-sample Kolmogorov-Smirnov tests. Error was causing incorrect p-values returned by exactP and monteCarloP methods (used by default for small, mid-size samples).
Date Fri, 10 Jul 2015 19:31:46 GMT
Repository: commons-math
Updated Branches:
  refs/heads/MATH_3_X 759fed8a7 -> 7a6aa92c8


Fixed error in computing discrete distribution of D statistics for small-sample
2-sample Kolmogorov-Smirnov tests. Error was causing incorrect p-values returned
by exactP and monteCarloP methods (used by default for small, mid-size samples).

JIRA: MATH-1245


Project: http://git-wip-us.apache.org/repos/asf/commons-math/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-math/commit/7a6aa92c
Tree: http://git-wip-us.apache.org/repos/asf/commons-math/tree/7a6aa92c
Diff: http://git-wip-us.apache.org/repos/asf/commons-math/diff/7a6aa92c

Branch: refs/heads/MATH_3_X
Commit: 7a6aa92c8ac46059f7ca9d76d7da6b710df901aa
Parents: 759fed8
Author: Phil Steitz <phil.steitz@gmail.com>
Authored: Fri Jul 10 12:31:36 2015 -0700
Committer: Phil Steitz <phil.steitz@gmail.com>
Committed: Fri Jul 10 12:31:36 2015 -0700

----------------------------------------------------------------------
 src/changes/changes.xml                         |  5 ++
 .../stat/inference/KolmogorovSmirnovTest.java   | 11 ++--
 .../inference/KolmogorovSmirnovTestTest.java    | 55 +++++++++++++++++++-
 3 files changed, 65 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-math/blob/7a6aa92c/src/changes/changes.xml
----------------------------------------------------------------------
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 5d47406..c5cdb11 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -51,6 +51,11 @@ If the output is not quite correct, check for invisible trailing spaces!
   </properties>
   <body>
     <release version="3.6" date="XXXX-XX-XX" description="">
+      <action dev="psteitz" type="fix" issue="MATH-1245">
+        Fixed error in computing discrete distribution of D statistics for small-sample
+        2-sample Kolmogorov-Smirnov tests. Error was causing incorrect p-values returned

+        by exactP and monteCarloP methods (used by default for small, mid-size samples).
+      </action>
       <action dev="tn" type="fix" issue="MATH-1240">
         "KolmogorovSmirnovTest#ksSum(...)" returned wrong result in case the provided
         t-parameters was zero. This affected the calculation of "approximateP(...)" for

http://git-wip-us.apache.org/repos/asf/commons-math/blob/7a6aa92c/src/main/java/org/apache/commons/math3/stat/inference/KolmogorovSmirnovTest.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/math3/stat/inference/KolmogorovSmirnovTest.java
b/src/main/java/org/apache/commons/math3/stat/inference/KolmogorovSmirnovTest.java
index e0f5c7d..f32dbf3 100644
--- a/src/main/java/org/apache/commons/math3/stat/inference/KolmogorovSmirnovTest.java
+++ b/src/main/java/org/apache/commons/math3/stat/inference/KolmogorovSmirnovTest.java
@@ -21,6 +21,7 @@ import java.math.BigDecimal;
 import java.util.Arrays;
 import java.util.Iterator;
 
+import org.apache.commons.math3.util.Precision;
 import org.apache.commons.math3.distribution.RealDistribution;
 import org.apache.commons.math3.exception.InsufficientDataException;
 import org.apache.commons.math3.exception.MathArithmeticException;
@@ -885,6 +886,7 @@ public class KolmogorovSmirnovTest {
         long tail = 0;
         final double[] nSet = new double[n];
         final double[] mSet = new double[m];
+        final double tol = 1e-12;  // d-values within tol of one another are considered equal
         while (combinationsIterator.hasNext()) {
             // Generate an n-set
             final int[] nSetI = combinationsIterator.next();
@@ -899,9 +901,8 @@ public class KolmogorovSmirnovTest {
                 }
             }
             final double curD = kolmogorovSmirnovStatistic(nSet, mSet);
-            if (curD > d) {
-                tail++;
-            } else if (curD == d && !strict) {
+            final int order = Precision.compareTo(curD, d, tol);
+            if (order > 0 || (order == 0 && !strict)) {
                 tail++;
             }
         }
@@ -957,6 +958,7 @@ public class KolmogorovSmirnovTest {
         final int nn = FastMath.max(n, m);
         final int mm = FastMath.min(n, m);
         final int sum = nn + mm;
+        final double tol = 1e-12;  // d-values within tol of one another are considered equal
 
         int tail = 0;
         final boolean b[] = new boolean[sum];
@@ -978,7 +980,8 @@ public class KolmogorovSmirnovTest {
                     final double cdf_n = rankN / (double) nn;
                     final double cdf_m = rankM / (double) mm;
                     final double curD = FastMath.abs(cdf_n - cdf_m);
-                    if (curD > d || (curD == d && !strict)) {
+                    final int order = Precision.compareTo(curD, d, tol);
+                    if (order > 0 || (order == 0 && !strict)) {
                         tail++;
                         break;
                     }

http://git-wip-us.apache.org/repos/asf/commons-math/blob/7a6aa92c/src/test/java/org/apache/commons/math3/stat/inference/KolmogorovSmirnovTestTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/math3/stat/inference/KolmogorovSmirnovTestTest.java
b/src/test/java/org/apache/commons/math3/stat/inference/KolmogorovSmirnovTestTest.java
index 3d90e31..9d0d669 100644
--- a/src/test/java/org/apache/commons/math3/stat/inference/KolmogorovSmirnovTestTest.java
+++ b/src/test/java/org/apache/commons/math3/stat/inference/KolmogorovSmirnovTestTest.java
@@ -323,7 +323,7 @@ public class KolmogorovSmirnovTestTest {
      */
     // @Test
     public void testTwoSampleMonteCarloPerformance() {
-        int numIterations = 100_000;
+        int numIterations = 100000;
         int N = (int)Math.sqrt(KolmogorovSmirnovTest.LARGE_SAMPLE_PRODUCT);
         final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest(new Well19937c(1000));
         for (int n = 2; n <= N; ++n) {
@@ -400,7 +400,7 @@ public class KolmogorovSmirnovTestTest {
 
     @Test
     public void testTwoSamplesAllEqual() {
-        int iterations = 10_000;
+        int iterations = 10000;
         final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
         for (int i = 2; i < 30; ++i) {
             // testing values with ties
@@ -427,6 +427,57 @@ public class KolmogorovSmirnovTestTest {
             Assert.assertEquals(1.0, test.approximateP(0, values.length, values.length),
0.);
         }
     }
+    
+    /**
+     * JIRA: MATH-1245
+     * 
+     * Verify that D-values are not viewed as distinct when they are mathematically equal
+     * when computing p-statistics for small sample tests. Reference values are from R 3.2.0.
+     */
+    @Test
+    public void testDRounding() {
+        final double tol = 1e-12;
+        final double[] x = {0, 2, 3, 4, 5, 6, 7, 8, 9, 12};
+        final double[] y = {1, 10, 11, 13, 14, 15, 16, 17, 18};
+        final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
+        Assert.assertEquals(0.0027495724090154106, test.kolmogorovSmirnovTest(x, y,false),
tol);
+        
+        final double[] x1 = {2, 4, 6, 8, 9, 10, 11, 12, 13};
+        final double[] y1 = {0, 1, 3, 5, 7};
+        Assert.assertEquals(0.085914085914085896, test.kolmogorovSmirnovTest(x1, y1, false),
tol);
+        
+        final double[] x2 = {4, 6, 7, 8, 9, 10, 11};
+        final double[] y2 = {0, 1, 2, 3, 5};
+        Assert.assertEquals(0.015151515151515027, test.kolmogorovSmirnovTest(x2, y2, false),
tol); 
+    }
+    
+    /**
+     * JIRA: MATH-1245
+     * 
+     * Verify that D-values are not viewed as distinct when they are mathematically equal
+     * when computing p-statistics for small sample tests. Reference values are from R 3.2.0.
+     */
+    @Test
+    public void testDRoundingMonteCarlo() {
+        final double tol = 1e-2;
+        final int iterations = 1000000;
+        final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest(new Well19937c(1000));
+        
+        final double[] x = {0, 2, 3, 4, 5, 6, 7, 8, 9, 12};
+        final double[] y = {1, 10, 11, 13, 14, 15, 16, 17, 18};
+        double d = test.kolmogorovSmirnovStatistic(x, y);
+        Assert.assertEquals(0.0027495724090154106, test.monteCarloP(d, x.length, y.length,
false, iterations), tol);
+        
+        final double[] x1 = {2, 4, 6, 8, 9, 10, 11, 12, 13};
+        final double[] y1 = {0, 1, 3, 5, 7};
+        d = test.kolmogorovSmirnovStatistic(x1, y1);
+        Assert.assertEquals(0.085914085914085896, test.monteCarloP(d, x1.length, y1.length,
false, iterations), tol);
+        
+        final double[] x2 = {4, 6, 7, 8, 9, 10, 11};
+        final double[] y2 = {0, 1, 2, 3, 5};
+        d = test.kolmogorovSmirnovStatistic(x2, y2);
+        Assert.assertEquals(0.015151515151515027, test.monteCarloP(d, x2.length, y2.length,
false, iterations), tol);
+    }
 
     /**
      * Verifies the inequality exactP(criticalValue, n, m, true) < alpha < exactP(criticalValue,
n,


Mime
View raw message