commons-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From oe...@apache.org
Subject [2/2] [math] [MATH-1242] fixed shuffle algorithm used by the Monte Carlo KS statistic calculation method, moved shuffle algorithm to static package-private method that is now explicitly tested by a unit test
Date Fri, 17 Jul 2015 19:32:00 GMT
[MATH-1242] fixed shuffle algorithm used by the Monte Carlo KS statistic
calculation method, moved shuffle algorithm to static package-private
method that is now explicitly tested by a unit test

Project: http://git-wip-us.apache.org/repos/asf/commons-math/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-math/commit/cf4416a8
Tree: http://git-wip-us.apache.org/repos/asf/commons-math/tree/cf4416a8
Diff: http://git-wip-us.apache.org/repos/asf/commons-math/diff/cf4416a8

Branch: refs/heads/master
Commit: cf4416a84203ff9d360d4398da3b166e6d0c72b9
Parents: 6913170
Author: Otmar Ertl <otmar.ertl@gmail.com>
Authored: Fri Jul 17 20:46:53 2015 +0200
Committer: Otmar Ertl <otmar.ertl@gmail.com>
Committed: Fri Jul 17 20:46:53 2015 +0200

----------------------------------------------------------------------
 .../stat/inference/KolmogorovSmirnovTest.java   | 30 +++++++----
 .../inference/KolmogorovSmirnovTestTest.java    | 53 ++++++++++++++++++++
 2 files changed, 74 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-math/blob/cf4416a8/src/main/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTest.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTest.java
b/src/main/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTest.java
index c0e7e51..26020d5 100644
--- a/src/main/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTest.java
+++ b/src/main/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTest.java
@@ -934,6 +934,26 @@ public class KolmogorovSmirnovTest {
     }
 
     /**
+     * Fills a boolean array randomly with a fixed number of {@code true} values. 
+     * The method uses a simplified version of the Fisher-Yates shuffle algorithm. 
+     * By processing first the {@code true} values followed by the remaining {@code false}
values
+     * less random numbers need to be generated. The method is optimized for the case 
+     * that the number of {@code true} values is larger than or equal to the number of
+     * {@code false} values.
+     *
+     * @param b boolean array
+     * @param numberOfTrueValues number of {@code true} values the boolean array should finally
have
+     * @param rng random data generator
+     */
+    static void fillBooleanArrayRandomlyWithFixedNumberTrueValues(final boolean[] b, final
int numberOfTrueValues, final RandomGenerator rng) {
+        Arrays.fill(b, true);
+        for (int k = numberOfTrueValues; k < b.length; k++) {
+            final int r = rng.nextInt(k + 1);
+            b[(b[r]) ? r : k] = false;
+        }
+    }
+
+    /**
      * Uses Monte Carlo simulation to approximate \(P(D_{n,m} > d)\) where \(D_{n,m}\)
is the
      * 2-sample Kolmogorov-Smirnov statistic. See
      * {@link #kolmogorovSmirnovStatistic(double[], double[])} for the definition of \(D_{n,m}\).
@@ -964,15 +984,7 @@ public class KolmogorovSmirnovTest {
         int tail = 0;
         final boolean b[] = new boolean[sum];
         for (int i = 0; i < iterations; i++) {
-            // Shuffle n true values and m false values using Fisher-Yates shuffle algorithm.
-            // By processing first the n true values followed by the m false values the
-            // shuffle algorithm can be simplified annd requires fewer random numbers.
-            Arrays.fill(b, true);
-            for (int k = nn; k < sum; k++) {
-                int r = rng.nextInt(k);
-                b[(b[r]) ? r : k] = false;
-            }
-
+            fillBooleanArrayRandomlyWithFixedNumberTrueValues(b, nn, rng);
             int rankN = b[0] ? 1 : 0;
             int rankM = b[0] ? 0 : 1;
             boolean previous = b[0];

http://git-wip-us.apache.org/repos/asf/commons-math/blob/cf4416a8/src/test/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTestTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTestTest.java
b/src/test/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTestTest.java
index 3735721..e5c03fe 100644
--- a/src/test/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTestTest.java
+++ b/src/test/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTestTest.java
@@ -19,10 +19,13 @@ package org.apache.commons.math4.stat.inference;
 
 import java.util.Arrays;
 
+import org.apache.commons.math4.TestUtils;
 import org.apache.commons.math4.distribution.NormalDistribution;
 import org.apache.commons.math4.distribution.UniformRealDistribution;
+import org.apache.commons.math4.random.RandomGenerator;
 import org.apache.commons.math4.random.Well19937c;
 import org.apache.commons.math4.stat.inference.KolmogorovSmirnovTest;
+import org.apache.commons.math4.util.CombinatoricsUtils;
 import org.apache.commons.math4.util.FastMath;
 import org.junit.Assert;
 import org.junit.Test;
@@ -479,6 +482,56 @@ public class KolmogorovSmirnovTestTest {
         Assert.assertEquals(0.015151515151515027, test.monteCarloP(d, x2.length, y2.length,
false, iterations), tol);
     }
 
+    @Test
+    public void testFillBooleanArrayRandomlyWithFixedNumberTrueValues() {
+
+        final int[][] parameters = {{5, 1}, {5, 2}, {5, 3}, {5, 4}, {8, 1}, {8, 2}, {8, 3},
{8, 4}, {8, 5}, {8, 6}, {8, 7}};
+
+        final double alpha = 0.001;
+        final int numIterations = 1000000;
+
+        final RandomGenerator rng = new Well19937c(0);
+
+        for (final int[] parameter : parameters) {
+
+            final int arraySize = parameter[0];
+            final int numberOfTrueValues = parameter[1];
+
+            final boolean[] b = new boolean[arraySize];
+            final long[] counts = new long[1 << arraySize];
+
+            for (int i = 0; i < numIterations; ++i) {
+                KolmogorovSmirnovTest.fillBooleanArrayRandomlyWithFixedNumberTrueValues(b,
numberOfTrueValues, rng);
+                int x = 0;
+                for (int j = 0; j < arraySize; ++j) {
+                    x = ((x << 1) | ((b[j])?1:0));
+                }
+                counts[x] += 1;
+            }
+
+            final int numCombinations = (int) CombinatoricsUtils.binomialCoefficient(arraySize,
numberOfTrueValues);
+
+            final long[] observed = new long[numCombinations];
+            final double[] expected = new double[numCombinations];
+            Arrays.fill(expected, numIterations / (double) numCombinations);
+
+            int observedIdx = 0;
+
+            for (int i = 0; i < (1 << arraySize); ++i) {
+                if (Integer.bitCount(i) == numberOfTrueValues) {
+                    observed[observedIdx] = counts[i];
+                    observedIdx += 1;
+                }
+                else {
+                    Assert.assertEquals(0, counts[i]);
+                }
+            }
+
+            Assert.assertEquals(numCombinations, observedIdx);
+            TestUtils.assertChiSquareAccept(expected, observed, alpha);
+        }
+    }
+
     /**
      * Verifies the inequality exactP(criticalValue, n, m, true) < alpha < exactP(criticalValue,
n,
      * m, false).


Mime
View raw message