Return-Path: X-Original-To: apmail-commons-commits-archive@minotaur.apache.org Delivered-To: apmail-commons-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 60BAB186C5 for ; Sun, 21 Jun 2015 17:39:41 +0000 (UTC) Received: (qmail 85884 invoked by uid 500); 21 Jun 2015 17:39:41 -0000 Delivered-To: apmail-commons-commits-archive@commons.apache.org Received: (qmail 85810 invoked by uid 500); 21 Jun 2015 17:39:41 -0000 Mailing-List: contact commits-help@commons.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@commons.apache.org Delivered-To: mailing list commits@commons.apache.org Received: (qmail 85792 invoked by uid 99); 21 Jun 2015 17:39:41 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 21 Jun 2015 17:39:41 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 0913FE0216; Sun, 21 Jun 2015 17:39:41 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: tn@apache.org To: commits@commons.apache.org Message-Id: X-Mailer: ASF-Git Admin Mailer Subject: [math] [MATH-1236] Improve performance of calculating the two-sample Kolmogorov-Smirnov test statistic. Thanks to Otmar Ertl. Date: Sun, 21 Jun 2015 17:39:41 +0000 (UTC) Repository: commons-math Updated Branches: refs/heads/master 75c2b24c6 -> 276e22858 [MATH-1236] Improve performance of calculating the two-sample Kolmogorov-Smirnov test statistic. Thanks to Otmar Ertl. Project: http://git-wip-us.apache.org/repos/asf/commons-math/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-math/commit/276e2285 Tree: http://git-wip-us.apache.org/repos/asf/commons-math/tree/276e2285 Diff: http://git-wip-us.apache.org/repos/asf/commons-math/diff/276e2285 Branch: refs/heads/master Commit: 276e22858cdd56f5725e0c2f7a3522df99967973 Parents: 75c2b24 Author: Thomas Neidhart Authored: Sun Jun 21 19:39:23 2015 +0200 Committer: Thomas Neidhart Committed: Sun Jun 21 19:39:23 2015 +0200 ---------------------------------------------------------------------- src/changes/changes.xml | 4 ++ .../stat/inference/KolmogorovSmirnovTest.java | 52 +++++--------------- .../inference/KolmogorovSmirnovTestTest.java | 12 +++++ 3 files changed, 28 insertions(+), 40 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-math/blob/276e2285/src/changes/changes.xml ---------------------------------------------------------------------- diff --git a/src/changes/changes.xml b/src/changes/changes.xml index 13193e5..cb25351 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -54,6 +54,10 @@ If the output is not quite correct, check for invisible trailing spaces! + + Improved performance of calculating the two-sample Kolmogorov-Smirnov + test statistic. + Lifted unnecessary restriction on constructor's argument of "MicrosphereInterpolator" (package "o.a.c.m.analysis.interpolation"). http://git-wip-us.apache.org/repos/asf/commons-math/blob/276e2285/src/main/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTest.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTest.java b/src/main/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTest.java index 6144ff7..a664cda 100644 --- a/src/main/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTest.java +++ b/src/main/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTest.java @@ -294,58 +294,30 @@ public class KolmogorovSmirnovTest { final int n = sx.length; final int m = sy.length; + int rankX = 0; + int rankY = 0; + // Find the max difference between cdf_x and cdf_y double supD = 0d; - // First walk x points - for (int i = 0; i < n; i++) { - final double x_i = sx[i]; - // ties can be safely ignored - if (i > 0 && x_i == sx[i-1]) { - continue; + do { + double z = Double.compare(sx[rankX], sy[rankY]) <= 0 ? sx[rankX] : sy[rankY]; + while(rankX < n && Double.compare(sx[rankX], z) == 0) { + rankX += 1; } - final double cdf_x = edf(x_i, sx); - final double cdf_y = edf(x_i, sy); - final double curD = FastMath.abs(cdf_x - cdf_y); - if (curD > supD) { - supD = curD; + while(rankY < m && Double.compare(sy[rankY], z) == 0) { + rankY += 1; } - } - // Now look at y - for (int i = 0; i < m; i++) { - final double y_i = sy[i]; - // ties can be safely ignored - if (i > 0 && y_i == sy[i-1]) { - continue; - } - final double cdf_x = edf(y_i, sx); - final double cdf_y = edf(y_i, sy); + final double cdf_x = rankX / (double) n; + final double cdf_y = rankY / (double) m; final double curD = FastMath.abs(cdf_x - cdf_y); if (curD > supD) { supD = curD; } - } + } while(rankX < n && rankY < m); return supD; } /** - * Computes the empirical distribution function. - * - * @param x the given x - * @param samples the observations - * @return the empirical distribution function \(F_n(x)\) - */ - private double edf(final double x, final double[] samples) { - final int n = samples.length; - int index = Arrays.binarySearch(samples, x); - if (index >= 0) { - while(index < (n - 1) && samples[index+1] == x) { - ++index; - } - } - return index >= 0 ? (index + 1d) / n : (-index - 1d) / n; - } - - /** * Computes the p-value, or observed significance level, of a one-sample Kolmogorov-Smirnov test * evaluating the null hypothesis that {@code data} conforms to {@code distribution}. http://git-wip-us.apache.org/repos/asf/commons-math/blob/276e2285/src/test/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTestTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTestTest.java b/src/test/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTestTest.java index 0a73139..3b1c10b 100644 --- a/src/test/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTestTest.java +++ b/src/test/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTestTest.java @@ -17,6 +17,8 @@ package org.apache.commons.math4.stat.inference; +import java.util.Arrays; + import org.apache.commons.math4.distribution.NormalDistribution; import org.apache.commons.math4.distribution.UniformRealDistribution; import org.apache.commons.math4.random.Well19937c; @@ -365,6 +367,16 @@ public class KolmogorovSmirnovTestTest { } + @Test + public void testTwoSamplesAllEqual() { + final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest(); + for (int i = 2; i < 30; ++i) { + double[] values = new double[i]; + Arrays.fill(values, i); + Assert.assertEquals(0., test.kolmogorovSmirnovStatistic(values, values), 0.); + } + } + /** * Verifies the inequality exactP(criticalValue, n, m, true) < alpha < exactP(criticalValue, n, * m, false).