commons-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From pste...@apache.org
Subject svn commit: r602306 - in /commons/proper/math/trunk: src/java/org/apache/commons/math/stat/descriptive/moment/Mean.java src/test/org/apache/commons/math/stat/CertifiedDataTest.java xdocs/changes.xml
Date Sat, 08 Dec 2007 02:59:54 GMT
Author: psteitz
Date: Fri Dec  7 18:59:53 2007
New Revision: 602306

URL: http://svn.apache.org/viewvc?rev=602306&view=rev
Log:
Changed Mean.evaluate() to use a two-pass algorithm, improving accuracy
by exploiting the the fact that this method has access to the full
array of data values.

Modified:
    commons/proper/math/trunk/src/java/org/apache/commons/math/stat/descriptive/moment/Mean.java
    commons/proper/math/trunk/src/test/org/apache/commons/math/stat/CertifiedDataTest.java
    commons/proper/math/trunk/xdocs/changes.xml

Modified: commons/proper/math/trunk/src/java/org/apache/commons/math/stat/descriptive/moment/Mean.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/java/org/apache/commons/math/stat/descriptive/moment/Mean.java?rev=602306&r1=602305&r2=602306&view=diff
==============================================================================
--- commons/proper/math/trunk/src/java/org/apache/commons/math/stat/descriptive/moment/Mean.java
(original)
+++ commons/proper/math/trunk/src/java/org/apache/commons/math/stat/descriptive/moment/Mean.java
Fri Dec  7 18:59:53 2007
@@ -22,24 +22,32 @@
 import org.apache.commons.math.stat.descriptive.summary.Sum;
 
 /**
- * Returns the arithmetic mean of the available values. Uses the definitional 
- * formula:
+ * <p>Computes the arithmetic mean of a set of values. Uses the definitional 
+ * formula:</p>
  * <p>
  * mean = sum(x_i) / n
- * <p>
- * where <code>n</code> is the number of observations.
- * <p>
- * The value of the statistic is computed using the following recursive
- * updating algorithm:
- * <p>
+ * </p>
+ * <p>where <code>n</code> is the number of observations.
+ * </p>
+ * <p>When {@link #increment(double)} is used to add data incrementally from a
+ * stream of (unstored) values, the value of the statistic that 
+ * {@link #getResult()} returns is computed using the following recursive
+ * updating algorithm: </p>
  * <ol>
  * <li>Initialize <code>m = </code> the first value</li>
  * <li>For each additional value, update using <br>
  *   <code>m = m + (new value - m) / (number of observations)</code></li>
  * </ol>
+ * <p> If {@link #evaluate(double[])} is used to compute the mean of an array
+ * of stored values, a two-pass, corrected algorithm is used, starting with
+ * the definitional formula computed using the array of stored values and then
+ * correcting this by adding the mean deviation of the data values from the
+ * arithmetic mean. See, e.g. "Comparison of Several Algorithms for Computing
+ * Sample Means and Variances," Robert F. Ling, Journal of the American
+ * Statistical Association, Vol. 69, No. 348 (Dec., 1974), pp. 859-866. </p>
  * <p>
  *  Returns <code>Double.NaN</code> if the dataset is empty.
- * <p>
+ * </p>
  * <strong>Note that this implementation is not synchronized.</strong> If 
  * multiple threads access an instance of this class concurrently, and at least
  * one of the threads invokes the <code>increment()</code> or 
@@ -131,7 +139,17 @@
     public double evaluate(final double[] values,final int begin, final int length) {
         if (test(values, begin, length)) {
             Sum sum = new Sum();
-            return sum.evaluate(values, begin, length) / ((double) length);
+            double sampleSize = (double) length;
+            
+            // Compute initial estimate using definitional formula
+            double xbar = sum.evaluate(values, begin, length) / sampleSize;
+            
+            // Compute correction factor in second pass
+            double correction = 0;
+            for (int i = begin; i < begin + length; i++) {
+                correction += (values[i] - xbar);
+            }
+            return xbar + (correction/sampleSize);
         }
         return Double.NaN;
     }

Modified: commons/proper/math/trunk/src/test/org/apache/commons/math/stat/CertifiedDataTest.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/org/apache/commons/math/stat/CertifiedDataTest.java?rev=602306&r1=602305&r2=602306&view=diff
==============================================================================
--- commons/proper/math/trunk/src/test/org/apache/commons/math/stat/CertifiedDataTest.java
(original)
+++ commons/proper/math/trunk/src/test/org/apache/commons/math/stat/CertifiedDataTest.java
Fri Dec  7 18:59:53 2007
@@ -61,57 +61,59 @@
     }
 
     /**
-     * Test StorelessDescriptiveStatistics
+     * Test SummaryStatistics - implementations that do not store the data
+     * and use single pass algorithms to compute statistics
     */
-    public void testUnivariateImpl() throws Exception {
+    public void testSummaryStatistics() throws Exception {
         SummaryStatistics u = SummaryStatistics.newInstance(SummaryStatisticsImpl.class);
         loadStats("data/PiDigits.txt", u);
-        assertEquals("PiDigits: std", std, u.getStandardDeviation(), .0000000000001);
-        assertEquals("PiDigits: mean", mean, u.getMean(), .0000000000001);  
+        assertEquals("PiDigits: std", std, u.getStandardDeviation(), 1E-13);
+        assertEquals("PiDigits: mean", mean, u.getMean(), 1E-13);  
 
         loadStats("data/Mavro.txt", u);
-        assertEquals("Mavro: std", std, u.getStandardDeviation(), .00000000000001);
-        assertEquals("Mavro: mean", mean, u.getMean(), .00000000000001);
+        assertEquals("Mavro: std", std, u.getStandardDeviation(), 1E-14);
+        assertEquals("Mavro: mean", mean, u.getMean(), 1E-14);
         
-        //loadStats("data/Michelso.txt");
-        //assertEquals("Michelso: std", std, u.getStandardDeviation(), .00000000000001);
-        //assertEquals("Michelso: mean", mean, u.getMean(), .00000000000001);   
+        loadStats("data/Michelso.txt", u);
+        assertEquals("Michelso: std", std, u.getStandardDeviation(), 1E-13);
+        assertEquals("Michelso: mean", mean, u.getMean(), 1E-13);   
                                         
         loadStats("data/NumAcc1.txt", u);
-        assertEquals("NumAcc1: std", std, u.getStandardDeviation(), .00000000000001);
-        assertEquals("NumAcc1: mean", mean, u.getMean(), .00000000000001);
+        assertEquals("NumAcc1: std", std, u.getStandardDeviation(), 1E-14);
+        assertEquals("NumAcc1: mean", mean, u.getMean(), 1E-14);
         
-        //loadStats("data/NumAcc2.txt");
-        //assertEquals("NumAcc2: std", std, u.getStandardDeviation(), .000000001);
-        //assertEquals("NumAcc2: mean", mean, u.getMean(), .00000000000001);
+        loadStats("data/NumAcc2.txt", u);
+        assertEquals("NumAcc2: std", std, u.getStandardDeviation(), 1E-14);
+        assertEquals("NumAcc2: mean", mean, u.getMean(), 1E-14);
     }
 
     /**
-     * Test StorelessDescriptiveStatistics
+     * Test DescriptiveStatistics - implementations that store full array of
+     * values and execute multi-pass algorithms
      */
-    public void testStoredUnivariateImpl() throws Exception {
+    public void testDescriptiveStatistics() throws Exception {
 
         DescriptiveStatistics u = DescriptiveStatistics.newInstance();
         
         loadStats("data/PiDigits.txt", u);
-        assertEquals("PiDigits: std", std, u.getStandardDeviation(), .0000000000001);
-        assertEquals("PiDigits: mean", mean, u.getMean(), .0000000000001);
+        assertEquals("PiDigits: std", std, u.getStandardDeviation(), 1E-14);
+        assertEquals("PiDigits: mean", mean, u.getMean(), 1E-14);
         
         loadStats("data/Mavro.txt", u);
-        assertEquals("Mavro: std", std, u.getStandardDeviation(), .00000000000001);
-        assertEquals("Mavro: mean", mean, u.getMean(), .00000000000001);        
+        assertEquals("Mavro: std", std, u.getStandardDeviation(), 1E-14);
+        assertEquals("Mavro: mean", mean, u.getMean(), 1E-14);        
         
-        //loadStats("data/Michelso.txt");
-        //assertEquals("Michelso: std", std, u.getStandardDeviation(), .00000000000001);
-        //assertEquals("Michelso: mean", mean, u.getMean(), .00000000000001);   
+        loadStats("data/Michelso.txt", u);
+        assertEquals("Michelso: std", std, u.getStandardDeviation(), 1E-14);
+        assertEquals("Michelso: mean", mean, u.getMean(), 1E-14);   
 
         loadStats("data/NumAcc1.txt", u);
-        assertEquals("NumAcc1: std", std, u.getStandardDeviation(), .00000000000001);
-        assertEquals("NumAcc1: mean", mean, u.getMean(), .00000000000001);
+        assertEquals("NumAcc1: std", std, u.getStandardDeviation(), 1E-14);
+        assertEquals("NumAcc1: mean", mean, u.getMean(), 1E-14);
         
-        //loadStats("data/NumAcc2.txt");
-        //assertEquals("NumAcc2: std", std, u.getStandardDeviation(), .000000001);
-        //assertEquals("NumAcc2: mean", mean, u.getMean(), .00000000000001);
+        loadStats("data/NumAcc2.txt", u);
+        assertEquals("NumAcc2: std", std, u.getStandardDeviation(), 1E-14);
+        assertEquals("NumAcc2: mean", mean, u.getMean(), 1E-14);
     }
 
     /**

Modified: commons/proper/math/trunk/xdocs/changes.xml
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/xdocs/changes.xml?rev=602306&r1=602305&r2=602306&view=diff
==============================================================================
--- commons/proper/math/trunk/xdocs/changes.xml (original)
+++ commons/proper/math/trunk/xdocs/changes.xml Fri Dec  7 18:59:53 2007
@@ -111,7 +111,12 @@
         and SummaryStatistics concrete classes. Pushed implementations up
         from DescriptiveStatisticsImpl, SummaryStatisticsImpl. Made
         implementations of statistics configurable via setters.
-      </action>    
+      </action>
+      <action dev="psteitz" type="fix" issue="MATH-174">
+        Changed Mean.evaluate() to use a two-pass algorithm, improving accuracy
+        by exploiting the the fact that this method has access to the full
+        array of data values.
+      </action>     
     </release>
     <release version="1.1" date="2005-12-17"  
  description="This is a maintenance release containing bug fixes and enhancements.



Mime
View raw message