Return-Path:
X-Original-To: apmail-commons-commits-archive@minotaur.apache.org
Delivered-To: apmail-commons-commits-archive@minotaur.apache.org
Received: from mail.apache.org (hermes.apache.org [140.211.11.3])
by minotaur.apache.org (Postfix) with SMTP id CC5B8E765
for ;
Fri, 15 Mar 2013 13:57:35 +0000 (UTC)
Received: (qmail 3147 invoked by uid 500); 15 Mar 2013 13:57:35 -0000
Delivered-To: apmail-commons-commits-archive@commons.apache.org
Received: (qmail 3085 invoked by uid 500); 15 Mar 2013 13:57:35 -0000
Mailing-List: contact commits-help@commons.apache.org; run by ezmlm
Precedence: bulk
List-Help:
List-Unsubscribe:
List-Post:
List-Id:
Reply-To: dev@commons.apache.org
Delivered-To: mailing list commits@commons.apache.org
Received: (qmail 3074 invoked by uid 99); 15 Mar 2013 13:57:35 -0000
Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136)
by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 15 Mar 2013 13:57:35 +0000
X-ASF-Spam-Status: No, hits=-2000.0 required=5.0
tests=ALL_TRUSTED
X-Spam-Check-By: apache.org
Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4)
by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 15 Mar 2013 13:57:33 +0000
Received: from eris.apache.org (localhost [127.0.0.1])
by eris.apache.org (Postfix) with ESMTP id 659E92388962;
Fri, 15 Mar 2013 13:55:28 +0000 (UTC)
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Subject: svn commit: r1456958 - in /commons/proper/math/trunk: pom.xml
src/changes/changes.xml
src/main/java/org/apache/commons/math3/stat/inference/OneWayAnova.java
src/test/java/org/apache/commons/math3/stat/inference/OneWayAnovaTest.java
Date: Fri, 15 Mar 2013 13:55:28 -0000
To: commits@commons.apache.org
From: luc@apache.org
X-Mailer: svnmailer-1.0.8-patched
Message-Id: <20130315135528.659E92388962@eris.apache.org>
X-Virus-Checked: Checked by ClamAV on apache.org
Author: luc
Date: Fri Mar 15 13:55:27 2013
New Revision: 1456958
URL: http://svn.apache.org/r1456958
Log:
Allow direct use of SummaryStatistics in one-way ANOVA.
Patch provided by Peter Andrews.
JIRA: MATH-877
Modified:
commons/proper/math/trunk/pom.xml
commons/proper/math/trunk/src/changes/changes.xml
commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/inference/OneWayAnova.java
commons/proper/math/trunk/src/test/java/org/apache/commons/math3/stat/inference/OneWayAnovaTest.java
Modified: commons/proper/math/trunk/pom.xml
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/pom.xml?rev=1456958&r1=1456957&r2=1456958&view=diff
==============================================================================
--- commons/proper/math/trunk/pom.xml (original)
+++ commons/proper/math/trunk/pom.xml Fri Mar 15 13:55:27 2013
@@ -139,6 +139,9 @@
Mark Anderson
+ Peter Andrews
+
+
Rémi Arntzen
Modified: commons/proper/math/trunk/src/changes/changes.xml
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/changes/changes.xml?rev=1456958&r1=1456957&r2=1456958&view=diff
==============================================================================
--- commons/proper/math/trunk/src/changes/changes.xml (original)
+++ commons/proper/math/trunk/src/changes/changes.xml Fri Mar 15 13:55:27 2013
@@ -55,6 +55,9 @@ This is a minor release: It combines bug
Changes to existing features were made in a backwards-compatible
way such as to allow drop-in replacement of the v3.1[.1] JAR file.
">
+
+ Allow direct use of SummaryStatistics in one-way ANOVA.
+
Fixed infinite loop when NaN occurs in singular value decomposition.
Modified: commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/inference/OneWayAnova.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/inference/OneWayAnova.java?rev=1456958&r1=1456957&r2=1456958&view=diff
==============================================================================
--- commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/inference/OneWayAnova.java (original)
+++ commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/inference/OneWayAnova.java Fri Mar 15 13:55:27 2013
@@ -16,6 +16,9 @@
*/
package org.apache.commons.math3.stat.inference;
+import java.util.ArrayList;
+import java.util.Collection;
+
import org.apache.commons.math3.distribution.FDistribution;
import org.apache.commons.math3.exception.ConvergenceException;
import org.apache.commons.math3.exception.DimensionMismatchException;
@@ -23,10 +26,8 @@ import org.apache.commons.math3.exceptio
import org.apache.commons.math3.exception.NullArgumentException;
import org.apache.commons.math3.exception.OutOfRangeException;
import org.apache.commons.math3.exception.util.LocalizedFormats;
-import org.apache.commons.math3.stat.descriptive.summary.Sum;
-import org.apache.commons.math3.stat.descriptive.summary.SumOfSquares;
-
-import java.util.Collection;
+import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
+import org.apache.commons.math3.util.MathUtils;
/**
* Implements one-way ANOVA (analysis of variance) statistics.
@@ -132,6 +133,82 @@ public class OneWayAnova {
}
/**
+ * Computes the ANOVA P-value for a collection of {@link SummaryStatistics}.
+ *
+ * Preconditions:
+ * - The categoryData
Collection
must contain
+ * {@link SummaryStatistics}.
+ * - There must be at least two {@link SummaryStatistics} in the
+ *
categoryData
collection and each of these statistics must
+ * contain at least two values.
+ * This implementation uses the
+ * {@link org.apache.commons.math3.distribution.FDistribution
+ * commons-math F Distribution implementation} to estimate the exact
+ * p-value, using the formula
+ * p = 1 - cumulativeProbability(F)
+ * where F
is the F value and cumulativeProbability
+ * is the commons-math implementation of the F distribution.
+ *
+ * @param categoryData Collection
of {@link SummaryStatistics}
+ * each containing data for one category
+ * @param allowOneElementData if true, allow computation for one catagory
+ * only or for one data element per category
+ * @return Pvalue
+ * @throws NullArgumentException if categoryData
is null
+ * @throws DimensionMismatchException if the length of the categoryData
+ * array is less than 2 or a contained {@link SummaryStatistics} does not have
+ * at least two values
+ * @throws ConvergenceException if the p-value can not be computed due to a convergence error
+ * @throws MaxCountExceededException if the maximum number of iterations is exceeded
+ */
+ public double anovaPValue(final Collection categoryData,
+ final boolean allowOneElementData)
+ throws NullArgumentException, DimensionMismatchException,
+ ConvergenceException, MaxCountExceededException {
+
+ final AnovaStats a = anovaStats(categoryData, allowOneElementData);
+ final FDistribution fdist = new FDistribution(a.dfbg, a.dfwg);
+ return 1.0 - fdist.cumulativeProbability(a.F);
+
+ }
+
+ /**
+ * This method calls the method that actually does the calculations (except
+ * P-value).
+ *
+ * @param categoryData
+ * Collection
of double[]
arrays each
+ * containing data for one category
+ * @return computed AnovaStats
+ * @throws NullArgumentException
+ * if categoryData
is null
+ * @throws DimensionMismatchException
+ * if the length of the categoryData
array is less
+ * than 2 or a contained double[]
array does not
+ * contain at least two values
+ */
+ private AnovaStats anovaStats(final Collection categoryData)
+ throws NullArgumentException, DimensionMismatchException {
+
+ MathUtils.checkNotNull(categoryData);
+
+ final Collection categoryDataSummaryStatistics =
+ new ArrayList(categoryData.size());
+
+ // convert arrays to SummaryStatistics
+ for (final double[] data : categoryData) {
+ final SummaryStatistics dataSummaryStatistics = new SummaryStatistics();
+ categoryDataSummaryStatistics.add(dataSummaryStatistics);
+ for (final double val : data) {
+ dataSummaryStatistics.addValue(val);
+ }
+ }
+
+ return anovaStats(categoryDataSummaryStatistics, false);
+
+ }
+
+ /**
* Performs an ANOVA test, evaluating the null hypothesis that there
* is no difference among the means of the data categories.
*
@@ -184,73 +261,65 @@ public class OneWayAnova {
*
* @param categoryData Collection
of double[]
* arrays each containing data for one category
+ * @param allowOneElementData if true, allow computation for one catagory
+ * only or for one data element per category
* @return computed AnovaStats
* @throws NullArgumentException if categoryData
is null
- * @throws DimensionMismatchException if the length of the categoryData
- * array is less than 2 or a contained double[]
array does not contain
+ * @throws DimensionMismatchException if allowOneElementData
is false and the number of
+ * categories is less than 2 or a contained SummaryStatistics does not contain
* at least two values
*/
- private AnovaStats anovaStats(final Collection categoryData)
+ private AnovaStats anovaStats(final Collection categoryData,
+ final boolean allowOneElementData)
throws NullArgumentException, DimensionMismatchException {
- if (categoryData == null) {
- throw new NullArgumentException();
- }
+ MathUtils.checkNotNull(categoryData);
- // check if we have enough categories
- if (categoryData.size() < 2) {
- throw new DimensionMismatchException(
- LocalizedFormats.TWO_OR_MORE_CATEGORIES_REQUIRED,
- categoryData.size(), 2);
- }
+ if (!allowOneElementData) {
+ // check if we have enough categories
+ if (categoryData.size() < 2) {
+ throw new DimensionMismatchException(LocalizedFormats.TWO_OR_MORE_CATEGORIES_REQUIRED,
+ categoryData.size(), 2);
+ }
- // check if each category has enough data and all is double[]
- for (double[] array : categoryData) {
- if (array.length <= 1) {
- throw new DimensionMismatchException(
- LocalizedFormats.TWO_OR_MORE_VALUES_IN_CATEGORY_REQUIRED,
- array.length, 2);
+ // check if each category has enough data
+ for (final SummaryStatistics array : categoryData) {
+ if (array.getN() <= 1) {
+ throw new DimensionMismatchException(LocalizedFormats.TWO_OR_MORE_VALUES_IN_CATEGORY_REQUIRED,
+ (int) array.getN(), 2);
+ }
}
}
int dfwg = 0;
double sswg = 0;
- Sum totsum = new Sum();
- SumOfSquares totsumsq = new SumOfSquares();
+ double totsum = 0;
+ double totsumsq = 0;
int totnum = 0;
- for (double[] data : categoryData) {
+ for (final SummaryStatistics data : categoryData) {
+
+ final double sum = data.getSum();
+ final double sumsq = data.getSumsq();
+ final int num = (int) data.getN();
+ totnum += num;
+ totsum += sum;
+ totsumsq += sumsq;
- Sum sum = new Sum();
- SumOfSquares sumsq = new SumOfSquares();
- int num = 0;
-
- for (int i = 0; i < data.length; i++) {
- double val = data[i];
-
- // within category
- num++;
- sum.increment(val);
- sumsq.increment(val);
-
- // for all categories
- totnum++;
- totsum.increment(val);
- totsumsq.increment(val);
- }
dfwg += num - 1;
- double ss = sumsq.getResult() - sum.getResult() * sum.getResult() / num;
+ final double ss = sumsq - ((sum * sum) / num);
sswg += ss;
}
- double sst = totsumsq.getResult() - totsum.getResult() *
- totsum.getResult()/totnum;
- double ssbg = sst - sswg;
- int dfbg = categoryData.size() - 1;
- double msbg = ssbg/dfbg;
- double mswg = sswg/dfwg;
- double F = msbg/mswg;
+
+ final double sst = totsumsq - ((totsum * totsum) / totnum);
+ final double ssbg = sst - sswg;
+ final int dfbg = categoryData.size() - 1;
+ final double msbg = ssbg / dfbg;
+ final double mswg = sswg / dfwg;
+ final double F = msbg / mswg;
return new AnovaStats(dfbg, dfwg, F);
+
}
/**
Modified: commons/proper/math/trunk/src/test/java/org/apache/commons/math3/stat/inference/OneWayAnovaTest.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/java/org/apache/commons/math3/stat/inference/OneWayAnovaTest.java?rev=1456958&r1=1456957&r2=1456958&view=diff
==============================================================================
--- commons/proper/math/trunk/src/test/java/org/apache/commons/math3/stat/inference/OneWayAnovaTest.java (original)
+++ commons/proper/math/trunk/src/test/java/org/apache/commons/math3/stat/inference/OneWayAnovaTest.java Fri Mar 15 13:55:27 2013
@@ -20,6 +20,7 @@ import java.util.ArrayList;
import java.util.List;
import org.apache.commons.math3.exception.MathIllegalArgumentException;
+import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
import org.junit.Assert;
import org.junit.Test;
@@ -103,6 +104,38 @@ public class OneWayAnovaTest {
}
@Test
+ public void testAnovaPValueSummaryStatistics() {
+ // Target comparison values computed using R version 2.6.0 (Linux version)
+ List threeClasses = new ArrayList();
+ SummaryStatistics statsA = new SummaryStatistics();
+ for (double a : classA) {
+ statsA.addValue(a);
+ }
+ threeClasses.add(statsA);
+ SummaryStatistics statsB = new SummaryStatistics();
+ for (double b : classB) {
+ statsB.addValue(b);
+ }
+ threeClasses.add(statsB);
+ SummaryStatistics statsC = new SummaryStatistics();
+ for (double c : classC) {
+ statsC.addValue(c);
+ }
+ threeClasses.add(statsC);
+
+ Assert.assertEquals("ANOVA P-value", 6.959446E-06,
+ testStatistic.anovaPValue(threeClasses, true), 1E-12);
+
+ List twoClasses = new ArrayList();
+ twoClasses.add(statsA);
+ twoClasses.add(statsB);
+
+ Assert.assertEquals("ANOVA P-value", 0.904212960464,
+ testStatistic.anovaPValue(twoClasses, false), 1E-12);
+
+ }
+
+ @Test
public void testAnovaTest() {
// Target comparison values computed using R version 2.3.1 (Linux version)
List threeClasses = new ArrayList();