Return-Path: X-Original-To: apmail-commons-commits-archive@minotaur.apache.org Delivered-To: apmail-commons-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id CC5B8E765 for ; Fri, 15 Mar 2013 13:57:35 +0000 (UTC) Received: (qmail 3147 invoked by uid 500); 15 Mar 2013 13:57:35 -0000 Delivered-To: apmail-commons-commits-archive@commons.apache.org Received: (qmail 3085 invoked by uid 500); 15 Mar 2013 13:57:35 -0000 Mailing-List: contact commits-help@commons.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@commons.apache.org Delivered-To: mailing list commits@commons.apache.org Received: (qmail 3074 invoked by uid 99); 15 Mar 2013 13:57:35 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 15 Mar 2013 13:57:35 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 15 Mar 2013 13:57:33 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 659E92388962; Fri, 15 Mar 2013 13:55:28 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1456958 - in /commons/proper/math/trunk: pom.xml src/changes/changes.xml src/main/java/org/apache/commons/math3/stat/inference/OneWayAnova.java src/test/java/org/apache/commons/math3/stat/inference/OneWayAnovaTest.java Date: Fri, 15 Mar 2013 13:55:28 -0000 To: commits@commons.apache.org From: luc@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20130315135528.659E92388962@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: luc Date: Fri Mar 15 13:55:27 2013 New Revision: 1456958 URL: http://svn.apache.org/r1456958 Log: Allow direct use of SummaryStatistics in one-way ANOVA. Patch provided by Peter Andrews. JIRA: MATH-877 Modified: commons/proper/math/trunk/pom.xml commons/proper/math/trunk/src/changes/changes.xml commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/inference/OneWayAnova.java commons/proper/math/trunk/src/test/java/org/apache/commons/math3/stat/inference/OneWayAnovaTest.java Modified: commons/proper/math/trunk/pom.xml URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/pom.xml?rev=1456958&r1=1456957&r2=1456958&view=diff ============================================================================== --- commons/proper/math/trunk/pom.xml (original) +++ commons/proper/math/trunk/pom.xml Fri Mar 15 13:55:27 2013 @@ -139,6 +139,9 @@ Mark Anderson + Peter Andrews + + Rémi Arntzen Modified: commons/proper/math/trunk/src/changes/changes.xml URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/changes/changes.xml?rev=1456958&r1=1456957&r2=1456958&view=diff ============================================================================== --- commons/proper/math/trunk/src/changes/changes.xml (original) +++ commons/proper/math/trunk/src/changes/changes.xml Fri Mar 15 13:55:27 2013 @@ -55,6 +55,9 @@ This is a minor release: It combines bug Changes to existing features were made in a backwards-compatible way such as to allow drop-in replacement of the v3.1[.1] JAR file. "> + + Allow direct use of SummaryStatistics in one-way ANOVA. + Fixed infinite loop when NaN occurs in singular value decomposition. Modified: commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/inference/OneWayAnova.java URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/inference/OneWayAnova.java?rev=1456958&r1=1456957&r2=1456958&view=diff ============================================================================== --- commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/inference/OneWayAnova.java (original) +++ commons/proper/math/trunk/src/main/java/org/apache/commons/math3/stat/inference/OneWayAnova.java Fri Mar 15 13:55:27 2013 @@ -16,6 +16,9 @@ */ package org.apache.commons.math3.stat.inference; +import java.util.ArrayList; +import java.util.Collection; + import org.apache.commons.math3.distribution.FDistribution; import org.apache.commons.math3.exception.ConvergenceException; import org.apache.commons.math3.exception.DimensionMismatchException; @@ -23,10 +26,8 @@ import org.apache.commons.math3.exceptio import org.apache.commons.math3.exception.NullArgumentException; import org.apache.commons.math3.exception.OutOfRangeException; import org.apache.commons.math3.exception.util.LocalizedFormats; -import org.apache.commons.math3.stat.descriptive.summary.Sum; -import org.apache.commons.math3.stat.descriptive.summary.SumOfSquares; - -import java.util.Collection; +import org.apache.commons.math3.stat.descriptive.SummaryStatistics; +import org.apache.commons.math3.util.MathUtils; /** * Implements one-way ANOVA (analysis of variance) statistics. @@ -132,6 +133,82 @@ public class OneWayAnova { } /** + * Computes the ANOVA P-value for a collection of {@link SummaryStatistics}. + * + *

Preconditions:

    + *
  • The categoryData Collection must contain + * {@link SummaryStatistics}.
  • + *
  • There must be at least two {@link SummaryStatistics} in the + * categoryData collection and each of these statistics must + * contain at least two values.

+ * This implementation uses the + * {@link org.apache.commons.math3.distribution.FDistribution + * commons-math F Distribution implementation} to estimate the exact + * p-value, using the formula

+     *   p = 1 - cumulativeProbability(F)
+ * where F is the F value and cumulativeProbability + * is the commons-math implementation of the F distribution.

+ * + * @param categoryData Collection of {@link SummaryStatistics} + * each containing data for one category + * @param allowOneElementData if true, allow computation for one catagory + * only or for one data element per category + * @return Pvalue + * @throws NullArgumentException if categoryData is null + * @throws DimensionMismatchException if the length of the categoryData + * array is less than 2 or a contained {@link SummaryStatistics} does not have + * at least two values + * @throws ConvergenceException if the p-value can not be computed due to a convergence error + * @throws MaxCountExceededException if the maximum number of iterations is exceeded + */ + public double anovaPValue(final Collection categoryData, + final boolean allowOneElementData) + throws NullArgumentException, DimensionMismatchException, + ConvergenceException, MaxCountExceededException { + + final AnovaStats a = anovaStats(categoryData, allowOneElementData); + final FDistribution fdist = new FDistribution(a.dfbg, a.dfwg); + return 1.0 - fdist.cumulativeProbability(a.F); + + } + + /** + * This method calls the method that actually does the calculations (except + * P-value). + * + * @param categoryData + * Collection of double[] arrays each + * containing data for one category + * @return computed AnovaStats + * @throws NullArgumentException + * if categoryData is null + * @throws DimensionMismatchException + * if the length of the categoryData array is less + * than 2 or a contained double[] array does not + * contain at least two values + */ + private AnovaStats anovaStats(final Collection categoryData) + throws NullArgumentException, DimensionMismatchException { + + MathUtils.checkNotNull(categoryData); + + final Collection categoryDataSummaryStatistics = + new ArrayList(categoryData.size()); + + // convert arrays to SummaryStatistics + for (final double[] data : categoryData) { + final SummaryStatistics dataSummaryStatistics = new SummaryStatistics(); + categoryDataSummaryStatistics.add(dataSummaryStatistics); + for (final double val : data) { + dataSummaryStatistics.addValue(val); + } + } + + return anovaStats(categoryDataSummaryStatistics, false); + + } + + /** * Performs an ANOVA test, evaluating the null hypothesis that there * is no difference among the means of the data categories. * @@ -184,73 +261,65 @@ public class OneWayAnova { * * @param categoryData Collection of double[] * arrays each containing data for one category + * @param allowOneElementData if true, allow computation for one catagory + * only or for one data element per category * @return computed AnovaStats * @throws NullArgumentException if categoryData is null - * @throws DimensionMismatchException if the length of the categoryData - * array is less than 2 or a contained double[] array does not contain + * @throws DimensionMismatchException if allowOneElementData is false and the number of + * categories is less than 2 or a contained SummaryStatistics does not contain * at least two values */ - private AnovaStats anovaStats(final Collection categoryData) + private AnovaStats anovaStats(final Collection categoryData, + final boolean allowOneElementData) throws NullArgumentException, DimensionMismatchException { - if (categoryData == null) { - throw new NullArgumentException(); - } + MathUtils.checkNotNull(categoryData); - // check if we have enough categories - if (categoryData.size() < 2) { - throw new DimensionMismatchException( - LocalizedFormats.TWO_OR_MORE_CATEGORIES_REQUIRED, - categoryData.size(), 2); - } + if (!allowOneElementData) { + // check if we have enough categories + if (categoryData.size() < 2) { + throw new DimensionMismatchException(LocalizedFormats.TWO_OR_MORE_CATEGORIES_REQUIRED, + categoryData.size(), 2); + } - // check if each category has enough data and all is double[] - for (double[] array : categoryData) { - if (array.length <= 1) { - throw new DimensionMismatchException( - LocalizedFormats.TWO_OR_MORE_VALUES_IN_CATEGORY_REQUIRED, - array.length, 2); + // check if each category has enough data + for (final SummaryStatistics array : categoryData) { + if (array.getN() <= 1) { + throw new DimensionMismatchException(LocalizedFormats.TWO_OR_MORE_VALUES_IN_CATEGORY_REQUIRED, + (int) array.getN(), 2); + } } } int dfwg = 0; double sswg = 0; - Sum totsum = new Sum(); - SumOfSquares totsumsq = new SumOfSquares(); + double totsum = 0; + double totsumsq = 0; int totnum = 0; - for (double[] data : categoryData) { + for (final SummaryStatistics data : categoryData) { + + final double sum = data.getSum(); + final double sumsq = data.getSumsq(); + final int num = (int) data.getN(); + totnum += num; + totsum += sum; + totsumsq += sumsq; - Sum sum = new Sum(); - SumOfSquares sumsq = new SumOfSquares(); - int num = 0; - - for (int i = 0; i < data.length; i++) { - double val = data[i]; - - // within category - num++; - sum.increment(val); - sumsq.increment(val); - - // for all categories - totnum++; - totsum.increment(val); - totsumsq.increment(val); - } dfwg += num - 1; - double ss = sumsq.getResult() - sum.getResult() * sum.getResult() / num; + final double ss = sumsq - ((sum * sum) / num); sswg += ss; } - double sst = totsumsq.getResult() - totsum.getResult() * - totsum.getResult()/totnum; - double ssbg = sst - sswg; - int dfbg = categoryData.size() - 1; - double msbg = ssbg/dfbg; - double mswg = sswg/dfwg; - double F = msbg/mswg; + + final double sst = totsumsq - ((totsum * totsum) / totnum); + final double ssbg = sst - sswg; + final int dfbg = categoryData.size() - 1; + final double msbg = ssbg / dfbg; + final double mswg = sswg / dfwg; + final double F = msbg / mswg; return new AnovaStats(dfbg, dfwg, F); + } /** Modified: commons/proper/math/trunk/src/test/java/org/apache/commons/math3/stat/inference/OneWayAnovaTest.java URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/java/org/apache/commons/math3/stat/inference/OneWayAnovaTest.java?rev=1456958&r1=1456957&r2=1456958&view=diff ============================================================================== --- commons/proper/math/trunk/src/test/java/org/apache/commons/math3/stat/inference/OneWayAnovaTest.java (original) +++ commons/proper/math/trunk/src/test/java/org/apache/commons/math3/stat/inference/OneWayAnovaTest.java Fri Mar 15 13:55:27 2013 @@ -20,6 +20,7 @@ import java.util.ArrayList; import java.util.List; import org.apache.commons.math3.exception.MathIllegalArgumentException; +import org.apache.commons.math3.stat.descriptive.SummaryStatistics; import org.junit.Assert; import org.junit.Test; @@ -103,6 +104,38 @@ public class OneWayAnovaTest { } @Test + public void testAnovaPValueSummaryStatistics() { + // Target comparison values computed using R version 2.6.0 (Linux version) + List threeClasses = new ArrayList(); + SummaryStatistics statsA = new SummaryStatistics(); + for (double a : classA) { + statsA.addValue(a); + } + threeClasses.add(statsA); + SummaryStatistics statsB = new SummaryStatistics(); + for (double b : classB) { + statsB.addValue(b); + } + threeClasses.add(statsB); + SummaryStatistics statsC = new SummaryStatistics(); + for (double c : classC) { + statsC.addValue(c); + } + threeClasses.add(statsC); + + Assert.assertEquals("ANOVA P-value", 6.959446E-06, + testStatistic.anovaPValue(threeClasses, true), 1E-12); + + List twoClasses = new ArrayList(); + twoClasses.add(statsA); + twoClasses.add(statsB); + + Assert.assertEquals("ANOVA P-value", 0.904212960464, + testStatistic.anovaPValue(twoClasses, false), 1E-12); + + } + + @Test public void testAnovaTest() { // Target comparison values computed using R version 2.3.1 (Linux version) List threeClasses = new ArrayList();