commons-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From pste...@apache.org
Subject cvs commit: jakarta-commons/math/src/test/org/apache/commons/math/stat/multivariate BivariateRegressionTest.java
Date Sun, 11 Apr 2004 21:52:29 GMT
psteitz     2004/04/11 14:52:28

  Added:       math/src/java/org/apache/commons/math/stat/multivariate
                        BivariateRegression.java
               math/src/test/org/apache/commons/math/stat/multivariate
                        BivariateRegressionTest.java
  Removed:     math/src/java/org/apache/commons/math/stat
                        BivariateRegression.java
               math/src/test/org/apache/commons/math/stat
                        BivariateRegressionTest.java
  Log:
  Moved BivariateRegression to multivariate subpackage.
  
  Revision  Changes    Path
  1.1                  jakarta-commons/math/src/java/org/apache/commons/math/stat/multivariate/BivariateRegression.java
  
  Index: BivariateRegression.java
  ===================================================================
  /*
   * Copyright 2003-2004 The Apache Software Foundation.
   * 
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   * 
   *      http://www.apache.org/licenses/LICENSE-2.0
   * 
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
  
  package org.apache.commons.math.stat.multivariate;
  import java.io.Serializable;
  
  import org.apache.commons.math.MathException;
  import org.apache.commons.math.distribution.DistributionFactory;
  import org.apache.commons.math.distribution.TDistribution;
  
  /**
   * Estimates an ordinary least squares regression model
   * with one independent variable.
   * <p>
   * <code> y = intercept + slope * x  </code>
   * <p>
   * Standard errors for <code>intercept</code> and <code>slope</code>
are 
   * available as well as ANOVA, r-square and Pearson's r statistics.
   * <p>
   * Observations (x,y pairs) can be added to the model one at a time or they 
   * can be provided in a 2-dimensional array.  The observations are not stored
   * in memory, so there is no limit to the number of observations that can be
   * added to the model. 
   * <p>
   * <strong>Usage Notes</strong>: <ul>
   * <li> When there are fewer than two observations in the model, or when
   * there is no variation in the x values (i.e. all x values are the same) 
   * all statistics return <code>NaN</code>. At least two observations with
   * different x coordinates are requred to estimate a bivariate regression 
   * model.
   * </li>
   * <li> getters for the statistics always compute values based on the current
   * set of observations -- i.e., you can get statistics, then add more data
   * and get updated statistics without using a new instance.  There is no 
   * "compute" method that updates all statistics.  Each of the getters performs
   * the necessary computations to return the requested statistic.</li>
   * </ul>
   *
   * @version $Revision: 1.1 $ $Date: 2004/04/11 21:52:28 $
   */
  public class BivariateRegression implements Serializable {
  
      static final long serialVersionUID = -3004689053607543335L;
  
      /** sum of x values */
      private double sumX = 0d;
  
      /** total variation in x (sum of squared deviations from xbar) */
      private double sumXX = 0d;
  
      /** sum of y values */
      private double sumY = 0d;
  
      /** total variation in y (sum of squared deviations from ybar) */
      private double sumYY = 0d;
  
      /** sum of products */
      private double sumXY = 0d;
  
      /** number of observations */
      private long n = 0;
  
      /** mean of accumulated x values, used in updating formulas */
      private double xbar = 0;
  
      /** mean of accumulated y values, used in updating formulas */
      private double ybar = 0;
  
      // ---------------------Public methods--------------------------------------
  
      /**
       * Adds the observation (x,y) to the regression data set.
       * <p>
       * Uses updating formulas for means and sums of squares defined in 
       * "Algorithms for Computing the Sample Variance: Analysis and
       * Recommendations", Chan, T.F., Golub, G.H., and LeVeque, R.J. 
       * 1983, American Statistician, vol. 37, pp. 242-247, referenced in
       * Weisberg, S. "Applied Linear Regression". 2nd Ed. 1985
       *
       *
       * @param x independent variable value
       * @param y dependent variable value
       */
      public void addData(double x, double y) {
          if (n == 0) {
              xbar = x;
              ybar = y;
          } else {
              double dx = x - xbar;
              double dy = y - ybar;
              sumXX += dx * dx * (double) n / (double) (n + 1.0);
              sumYY += dy * dy * (double) n / (double) (n + 1.0);
              sumXY += dx * dy * (double) n / (double) (n + 1.0);
              xbar += dx / (double) (n + 1.0);
              ybar += dy / (double) (n + 1.0);
          }
          sumX += x;
          sumY += y;
          n++;
      }
  
      /**
       * Adds the observations represented by the elements in 
       * <code>data</code>.
       * <p>
       * <code>(data[0][0],data[0][1])</code> will be the first observation, then
       * <code>(data[1][0],data[1][1])</code>, etc. 
       * <p> 
       * This method does not replace data that has already been added.  The
       * observations represented by <code>data</code> are added to the existing
       * dataset.
       * <p> 
       * To replace all data, use <code>clear()</code> before adding the new 
       * data.
       * 
       * @param data array of observations to be added
       */
      public void addData(double[][] data) {
          for (int i = 0; i < data.length; i++) {
              addData(data[i][0], data[i][1]);
          }
      }
  
      /**
       * Clears all data from the model.
       */
      public void clear() {
          sumX = 0d;
          sumXX = 0d;
          sumY = 0d;
          sumYY = 0d;
          sumXY = 0d;
          n = 0;
      }
  
      /**
       * Returns the number of observations that have been added to the model.
       *
       * @return n number of observations that have been added.
       */
      public long getN() {
          return n;
      }
  
      /**
       * Returns the "predicted" <code>y</code> value associated with the 
       * supplied <code>x</code> value,  based on the data that has been
       * added to the model when this method is activated.
       * <p>
       * <code> predict(x) = intercept + slope * x </code>
       * <p>
       * <strong>Preconditions</strong>: <ul>
       * <li>At least two observations (with at least two different x values)
       * must have been added before invoking this method. If this method is 
       * invoked before a model can be estimated, <code>Double,NaN</code> is
       * returned.
       * </li></ul>
       *
       * @param x input <code>x</code> value
       * @return predicted <code>y</code> value
       */
      public double predict(double x) {
          double b1 = getSlope();
          return getIntercept(b1) + b1 * x;
      }
  
      /**
       * Returns the intercept of the estimated regression line.
       * <p>
       * The least squares estimate of the intercept is computed using the 
       * <a href="http://www.xycoon.com/estimation4.htm">normal equations</a>.
       * The intercept is sometimes denoted b0. 
       * <p>
       * <strong>Preconditions</strong>: <ul>
       * <li>At least two observations (with at least two different x values)
       * must have been added before invoking this method. If this method is 
       * invoked before a model can be estimated, <code>Double,NaN</code> is
       * returned.
       * </li></ul>
       *
       * @return the intercept of the regression line
       */
      public double getIntercept() {
          return getIntercept(getSlope());
      }
  
      /**
      * Returns the slope of the estimated regression line.  
      * <p>
      * The least squares estimate of the slope is computed using the 
      * <a href="http://www.xycoon.com/estimation4.htm">normal equations</a>.
      * The slope is sometimes denoted b1. 
      * <p>
      * <strong>Preconditions</strong>: <ul>
      * <li>At least two observations (with at least two different x values)
      * must have been added before invoking this method. If this method is 
      * invoked before a model can be estimated, <code>Double.NaN</code> is
      * returned.
      * </li></ul>
      *
      * @return the slope of the regression line
      */
      public double getSlope() {
          if (n < 2) {
              return Double.NaN; //not enough data 
          }
          if (Math.abs(sumXX) < 10 * Double.MIN_VALUE) {
              return Double.NaN; //not enough variation in x
          }
          return sumXY / sumXX;
      }
  
      /**
       * Returns the <a href="http://www.xycoon.com/SumOfSquares.htm">
       * sum of squared errors</a> (SSE) associated with the regression 
       * model.
       * <p>
       * <strong>Preconditions</strong>: <ul>
       * <li>At least two observations (with at least two different x values)
       * must have been added before invoking this method. If this method is 
       * invoked before a model can be estimated, <code>Double,NaN</code> is
       * returned.
       * </li></ul>
       *
       * @return sum of squared errors associated with the regression model
       */
      public double getSumSquaredErrors() {
          return getSumSquaredErrors(getSlope());
      }
  
      /**
       * Returns the sum of squared deviations of the y values about their mean.
       * <p>
       * This is defined as SSTO 
       * <a href="http://www.xycoon.com/SumOfSquares.htm">here</a>.
       * <p>
       * If <code>n < 2</code>, this returns <code>Double.NaN</code>.
       *
       * @return sum of squared deviations of y values
       */
      public double getTotalSumSquares() {
          if (n < 2) {
              return Double.NaN;
          }
          return sumYY;
      }
  
      /**
       * Returns the sum of squared deviations of the predicted y values about 
       * their mean (which equals the mean of y).
       * <p>
       * This is usually abbreviated SSR or SSM.  It is defined as SSM 
       * <a href="http://www.xycoon.com/SumOfSquares.htm">here</a>
       * <p>
       * <strong>Preconditions</strong>: <ul>
       * <li>At least two observations (with at least two different x values)
       * must have been added before invoking this method. If this method is 
       * invoked before a model can be estimated, <code>Double.NaN</code> is
       * returned.
       * </li></ul>
       *
       * @return sum of squared deviations of predicted y values
       */
      public double getRegressionSumSquares() {
          return getRegressionSumSquares(getSlope());
      }
  
      /**
       * Returns the sum of squared errors divided by the degrees of freedom,
       * usually abbreviated MSE. 
       * <p>
       * If there are fewer than <strong>three</strong> data pairs in the model,
       * or if there is no variation in <code>x</code>, this returns 
       * <code>Double.NaN</code>.
       *
       * @return sum of squared deviations of y values
       */
      public double getMeanSquareError() {
          if (n < 3) {
              return Double.NaN;
          }
          return getSumSquaredErrors() / (double) (n - 2);
      }
  
      /**
       * Returns <a href="http://mathworld.wolfram.com/CorrelationCoefficient.html">
       * Pearson's product moment correlation coefficient</a>,
       * usually denoted r. 
       * <p>
       * <strong>Preconditions</strong>: <ul>
       * <li>At least two observations (with at least two different x values)
       * must have been added before invoking this method. If this method is 
       * invoked before a model can be estimated, <code>Double,NaN</code> is
       * returned.
       * </li></ul>
       *
       * @return Pearson's r
       */
      public double getR() {
          double b1 = getSlope();
          double result = Math.sqrt(getRSquare(b1));
          if (b1 < 0) {
              result = -result;
          }
          return result;
      }
  
      /** 
       * Returns the <a href="http://www.xycoon.com/coefficient1.htm"> 
       * coefficient of determination</a>,
       * usually denoted r-square. 
       * <p>
       * <strong>Preconditions</strong>: <ul>
       * <li>At least two observations (with at least two different x values)
       * must have been added before invoking this method. If this method is 
       * invoked before a model can be estimated, <code>Double,NaN</code> is
       * returned.
       * </li></ul>
       *
       * @return r-square
       */
      public double getRSquare() {
          return getRSquare(getSlope());
      }
  
      /**
       * Returns the <a href="http://www.xycoon.com/standarderrorb0.htm">
       * standard error of the intercept estimate</a>, 
       * usually denoted s(b0). 
       * <p>
       * If there are fewer that <strong>three</strong> observations in the 
       * model, or if there is no variation in x, this returns 
       * <code>Double.NaN</code>.
       *
       * @return standard error associated with intercept estimate
       */
      public double getInterceptStdErr() {
          return Math.sqrt(
              getMeanSquareError() * ((1d / (double) n) + (xbar * xbar) / sumXX));
      }
  
      /**
       * Returns the <a href="http://www.xycoon.com/standerrorb(1).htm">standard
       * error of the slope estimate</a>,
       * usually denoted s(b1). 
       * <p>
       * If there are fewer that <strong>three</strong> data pairs in the model,
       * or if there is no variation in x, this returns <code>Double.NaN</code>.
       *
       * @return standard error associated with slope estimate
       */
      public double getSlopeStdErr() {
          return Math.sqrt(getMeanSquareError() / sumXX);
      }
  
      /**
       * Returns the half-width of a 95% confidence interval for the slope
       * estimate.
       * <p>
       * The 95% confidence interval is 
       * <p>
       * <code>(getSlope() - getSlopeConfidenceInterval(), 
       * getSlope() + getSlopeConfidenceInterval())</code>
       * <p>
       * If there are fewer that <strong>three</strong> observations in the 
       * model, or if there is no variation in x, this returns 
       * <code>Double.NaN</code>.
       * <p>
       * <strong>Usage Note</strong>:<br>
       * The validity of this statistic depends on the assumption that the 
       * observations included in the model are drawn from a
       * <a href="http://mathworld.wolfram.com/BivariateNormalDistribution.html">
       * Bivariate Normal Distribution</a>.
       *
       * @return half-width of 95% confidence interval for the slope estimate
       */
      public double getSlopeConfidenceInterval() throws MathException {
          return getSlopeConfidenceInterval(0.05d);
      }
  
      /**
       * Returns the half-width of a (100-100*alpha)% confidence interval for 
       * the slope estimate.
       * <p>
       * The (100-100*alpha)% confidence interval is 
       * <p>
       * <code>(getSlope() - getSlopeConfidenceInterval(), 
       * getSlope() + getSlopeConfidenceInterval())</code>
       * <p>
       * To request, for example, a 99% confidence interval, use 
       * <code>alpha = .01</code>
       * <p>
       * <strong>Usage Note</strong>:<br>
       * The validity of this statistic depends on the assumption that the 
       * observations included in the model are drawn from a
       * <a href="http://mathworld.wolfram.com/BivariateNormalDistribution.html">
       * Bivariate Normal Distribution</a>.
       * <p>
       * <strong> Preconditions:</strong><ul>
       * <li>If there are fewer that <strong>three</strong> observations
in the 
       * model, or if there is no variation in x, this returns 
       * <code>Double.NaN</code>. 
       * </li>
       * <li><code>(0 < alpha < 1)</code>; otherwise an 
       * <code>IllegalArgumentException</code> is thrown.
       * </li></ul>    
       *
       * @param alpha the desired significance level 
       * @return half-width of 95% confidence interval for the slope estimate
       */
      public double getSlopeConfidenceInterval(double alpha)
          throws MathException {
          if (alpha >= 1 || alpha <= 0) {
              throw new IllegalArgumentException();
          }
          return getSlopeStdErr()
              * getTDistribution().inverseCumulativeProbability(1d - alpha / 2d);
      }
  
      /**
       * Returns the significance level of the slope (equiv) correlation. 
       * <p>
       * Specifically, the returned value is the smallest <code>alpha</code>
       * such that the slope confidence interval with significance level
       * equal to <code>alpha</code> does not include <code>0</code>.
       * On regression output, this is often denoted <code>Prob(|t| > 0)</code>
       * <p>
       * <strong>Usage Note</strong>:<br>
       * The validity of this statistic depends on the assumption that the 
       * observations included in the model are drawn from a
       * <a href="http://mathworld.wolfram.com/BivariateNormalDistribution.html">
       * Bivariate Normal Distribution</a>.
       * <p>
       * If there are fewer that <strong>three</strong> observations in the 
       * model, or if there is no variation in x, this returns 
       * <code>Double.NaN</code>.
       *
       * @return significance level for slope/correlation
       */
      public double getSignificance() throws MathException {
          return (
              1d
                  - getTDistribution().cumulativeProbability(
                      Math.abs(getSlope()) / getSlopeStdErr()));
      }
  
      // ---------------------Private methods-----------------------------------
  
      /**
      * Returns the intercept of the estimated regression line, given the slope.
      * <p>
      * Will return <code>NaN</code> if slope is <code>NaN</code>.
      *
      * @param slope current slope
      * @return the intercept of the regression line
      */
      private double getIntercept(double slope) {
          return (sumY - slope * sumX) / ((double) n);
      }
  
      /**
       * Returns the sum of squared errors associated with the regression 
       * model, using the slope of the regression line. 
       * <p> 
       * Returns NaN if the slope is NaN.
       * 
       * @param b1 current slope
       * @return sum of squared errors associated with the regression model
       */
      private double getSumSquaredErrors(double b1) {
          return sumYY - sumXY * sumXY / sumXX;
      }
  
      /** 
       * Computes r-square from the slope.
       * <p>
       * will return NaN if slope is Nan.
       *
       * @param b1 current slope
       * @return r-square
       */
      private double getRSquare(double b1) {
          double ssto = getTotalSumSquares();
          return (ssto - getSumSquaredErrors(b1)) / ssto;
      }
  
      /**
       * Computes SSR from b1.
       * 
       * @param slope regression slope estimate
       * @return sum of squared deviations of predicted y values
       */
      private double getRegressionSumSquares(double slope) {
          return slope * slope * sumXX;
      }
  
      /**
       * Uses distribution framework to get a t distribution instance 
       * with df = n - 2
       *
       * @return t distribution with df = n - 2
       */
      private TDistribution getTDistribution() {
          return DistributionFactory.newInstance().createTDistribution(n - 2);
      }
  }
  
  
  
  1.1                  jakarta-commons/math/src/test/org/apache/commons/math/stat/multivariate/BivariateRegressionTest.java
  
  Index: BivariateRegressionTest.java
  ===================================================================
  /*
   * Copyright 2003-2004 The Apache Software Foundation.
   * 
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   * 
   *      http://www.apache.org/licenses/LICENSE-2.0
   * 
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
  package org.apache.commons.math.stat.multivariate;
  
  import java.util.Random;
  
  import org.apache.commons.math.MathException;
  
  import junit.framework.Test;
  import junit.framework.TestCase;
  import junit.framework.TestSuite;
  /**
   * Test cases for the TestStatistic class.
   *
   * @version $Revision: 1.1 $ $Date: 2004/04/11 21:52:28 $
   */
  
  public final class BivariateRegressionTest extends TestCase {
  
      /* 
       * NIST "Norris" refernce data set from 
       * http://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/Norris.dat
       * Strangely, order is {y,x}
       */
      private double[][] data = { { 0.1, 0.2 }, {
              338.8, 337.4 }, {
              118.1, 118.2 }, {
              888.0, 884.6 }, {
              9.2, 10.1 }, {
              228.1, 226.5 }, {
              668.5, 666.3 }, {
              998.5, 996.3 }, {
              449.1, 448.6 }, {
              778.9, 777.0 }, {
              559.2, 558.2 }, {
              0.3, 0.4 }, {
              0.1, 0.6 }, {
              778.1, 775.5 }, {
              668.8, 666.9 }, {
              339.3, 338.0 }, {
              448.9, 447.5 }, {
              10.8, 11.6 }, {
              557.7, 556.0 }, {
              228.3, 228.1 }, {
              998.0, 995.8 }, {
              888.8, 887.6 }, {
              119.6, 120.2 }, {
              0.3, 0.3 }, {
              0.6, 0.3 }, {
              557.6, 556.8 }, {
              339.3, 339.1 }, {
              888.0, 887.2 }, {
              998.5, 999.0 }, {
              778.9, 779.0 }, {
              10.2, 11.1 }, {
              117.6, 118.3 }, {
              228.9, 229.2 }, {
              668.4, 669.1 }, {
              449.2, 448.9 }, {
              0.2, 0.5 }
      };
  
      /* 
       * Correlation example from 
       * http://www.xycoon.com/correlation.htm
       */
      private double[][] corrData = { { 101.0, 99.2 }, {
              100.1, 99.0 }, {
              100.0, 100.0 }, {
              90.6, 111.6 }, {
              86.5, 122.2 }, {
              89.7, 117.6 }, {
              90.6, 121.1 }, {
              82.8, 136.0 }, {
              70.1, 154.2 }, {
              65.4, 153.6 }, {
              61.3, 158.5 }, {
              62.5, 140.6 }, {
              63.6, 136.2 }, {
              52.6, 168.0 }, {
              59.7, 154.3 }, {
              59.5, 149.0 }, {
              61.3, 165.5 }
      };
  
      /*
       * From Moore and Mcabe, "Introduction to the Practice of Statistics"
       * Example 10.3 
       */
      private double[][] infData = { { 15.6, 5.2 }, {
              26.8, 6.1 }, {
              37.8, 8.7 }, {
              36.4, 8.5 }, {
              35.5, 8.8 }, {
              18.6, 4.9 }, {
              15.3, 4.5 }, {
              7.9, 2.5 }, {
              0.0, 1.1 }
      };
  
      /*
       * From http://www.xycoon.com/simple_linear_regression.htm
       */
      private double[][] infData2 = { { 1, 3 }, {
              2, 5 }, {
              3, 7 }, {
              4, 14 }, {
              5, 11 }
      };
  
      public BivariateRegressionTest(String name) {
          super(name);
      }
  
      public void setUp() {
      }
  
      public static Test suite() {
          TestSuite suite = new TestSuite(BivariateRegressionTest.class);
          suite.setName("BivariateRegression Tests");
          return suite;
      }
  
      public void testNorris() {
          BivariateRegression regression = new BivariateRegression();
          for (int i = 0; i < data.length; i++) {
              regression.addData(data[i][1], data[i][0]);
          }
          assertEquals("slope", 1.00211681802045, regression.getSlope(), 10E-12);
          assertEquals(
              "slope std err",
              0.429796848199937E-03,
              regression.getSlopeStdErr(),
              10E-12);
          assertEquals("number of observations", 36, regression.getN());
          assertEquals(
              "intercept",
              -0.262323073774029,
              regression.getIntercept(),
              10E-12);
          assertEquals(
              "std err intercept",
              0.232818234301152,
              regression.getInterceptStdErr(),
              10E-12);
          assertEquals(
              "r-square",
              0.999993745883712,
              regression.getRSquare(),
              10E-12);
          assertEquals(
              "SSR",
              4255954.13232369,
              regression.getRegressionSumSquares(),
              10E-9);
          assertEquals(
              "MSE",
              0.782864662630069,
              regression.getMeanSquareError(),
              10E-10);
          assertEquals(
              "SSE",
              26.6173985294224,
              regression.getSumSquaredErrors(),
              10E-9);
          assertEquals(
              "predict(0)",
              -0.262323073774029,
              regression.predict(0),
              10E-12);
          assertEquals(
              "predict(1)",
              1.00211681802045 - 0.262323073774029,
              regression.predict(1),
              10E-12);
      }
  
      public void testCorr() {
          BivariateRegression regression = new BivariateRegression();
          regression.addData(corrData);
          assertEquals("number of observations", 17, regression.getN());
          assertEquals("r-square", .896123, regression.getRSquare(), 10E-6);
          assertEquals("r", -.946638, regression.getR(), 10E-6);
      }
  
      public void testNaNs() {
  
          BivariateRegression regression = new BivariateRegression();
  
          assertTrue(
              "intercept not NaN",
              Double.isNaN(regression.getIntercept()));
          assertTrue("slope not NaN", Double.isNaN(regression.getSlope()));
          assertTrue(
              "slope std err not NaN",
              Double.isNaN(regression.getSlopeStdErr()));
          assertTrue(
              "intercept std err not NaN",
              Double.isNaN(regression.getInterceptStdErr()));
          assertTrue(
              "MSE not NaN",
              Double.isNaN(regression.getMeanSquareError()));
          assertTrue("e not NaN", Double.isNaN(regression.getR()));
          assertTrue("r-square not NaN", Double.isNaN(regression.getRSquare()));
          assertTrue(
              "RSS not NaN",
              Double.isNaN(regression.getRegressionSumSquares()));
          assertTrue(
              "SSE not NaN",
              Double.isNaN(regression.getSumSquaredErrors()));
          assertTrue(
              "SSTO not NaN",
              Double.isNaN(regression.getTotalSumSquares()));
          assertTrue("predict not NaN", Double.isNaN(regression.predict(0)));
  
          regression.addData(1, 2);
          regression.addData(1, 3);
  
          // No x variation, so these should still blow...
          assertTrue(
              "intercept not NaN",
              Double.isNaN(regression.getIntercept()));
          assertTrue("slope not NaN", Double.isNaN(regression.getSlope()));
          assertTrue(
              "slope std err not NaN",
              Double.isNaN(regression.getSlopeStdErr()));
          assertTrue(
              "intercept std err not NaN",
              Double.isNaN(regression.getInterceptStdErr()));
          assertTrue(
              "MSE not NaN",
              Double.isNaN(regression.getMeanSquareError()));
          assertTrue("e not NaN", Double.isNaN(regression.getR()));
          assertTrue("r-square not NaN", Double.isNaN(regression.getRSquare()));
          assertTrue(
              "RSS not NaN",
              Double.isNaN(regression.getRegressionSumSquares()));
          assertTrue(
              "SSE not NaN",
              Double.isNaN(regression.getSumSquaredErrors()));
          assertTrue("predict not NaN", Double.isNaN(regression.predict(0)));
  
          // but SSTO should be OK
          assertTrue("SSTO NaN", !Double.isNaN(regression.getTotalSumSquares()));
  
          regression = new BivariateRegression();
  
          regression.addData(1, 2);
          regression.addData(3, 3);
  
          // All should be OK except MSE, s(b0), s(b1) which need one more df 
          assertTrue("interceptNaN", !Double.isNaN(regression.getIntercept()));
          assertTrue("slope NaN", !Double.isNaN(regression.getSlope()));
          assertTrue(
              "slope std err not NaN",
              Double.isNaN(regression.getSlopeStdErr()));
          assertTrue(
              "intercept std err not NaN",
              Double.isNaN(regression.getInterceptStdErr()));
          assertTrue(
              "MSE not NaN",
              Double.isNaN(regression.getMeanSquareError()));
          assertTrue("r NaN", !Double.isNaN(regression.getR()));
          assertTrue("r-square NaN", !Double.isNaN(regression.getRSquare()));
          assertTrue(
              "RSS NaN",
              !Double.isNaN(regression.getRegressionSumSquares()));
          assertTrue("SSE NaN", !Double.isNaN(regression.getSumSquaredErrors()));
          assertTrue("SSTO NaN", !Double.isNaN(regression.getTotalSumSquares()));
          assertTrue("predict NaN", !Double.isNaN(regression.predict(0)));
  
          regression.addData(1, 4);
  
          // MSE, MSE, s(b0), s(b1) should all be OK now
          assertTrue("MSE NaN", !Double.isNaN(regression.getMeanSquareError()));
          assertTrue(
              "slope std err NaN",
              !Double.isNaN(regression.getSlopeStdErr()));
          assertTrue(
              "intercept std err NaN",
              !Double.isNaN(regression.getInterceptStdErr()));
      }
  
      public void testClear() {
          BivariateRegression regression = new BivariateRegression();
          regression.addData(corrData);
          assertEquals("number of observations", 17, regression.getN());
          regression.clear();
          assertEquals("number of observations", 0, regression.getN());
          regression.addData(corrData);
          assertEquals("r-square", .896123, regression.getRSquare(), 10E-6);
          regression.addData(data);
          assertEquals("number of observations", 53, regression.getN());
      }
  
      public void testInference() {
  
          BivariateRegression regression = new BivariateRegression();
          regression.addData(infData);
  
          try {
              assertEquals(
                  "slope confidence interval",
                  0.0271,
                  regression.getSlopeConfidenceInterval(),
                  0.0001);
              assertEquals(
                  "slope std err",
                  0.01146,
                  regression.getSlopeStdErr(),
                  0.0001);
  
              regression = new BivariateRegression();
              regression.addData(infData2);
              assertEquals(
                  "significance",
                  0.023331,
                  regression.getSignificance(),
                  0.0001);
  
              //FIXME: get a real example to test against with alpha = .01
              assertTrue(
                  "tighter means wider",
                  regression.getSlopeConfidenceInterval()
                      < regression.getSlopeConfidenceInterval(0.01));
  
          } catch (MathException e) {
              // TODO Auto-generated catch block
              e.printStackTrace();
          }
  
          try {
              double x = regression.getSlopeConfidenceInterval(1);
              fail("expecting IllegalArgumentException for alpha = 1");
          } catch (IllegalArgumentException ex) {
              ;
          } catch (MathException e) {
              // TODO Auto-generated catch block
              e.printStackTrace();
          }
  
      }
  
      public void testPerfect() {
          BivariateRegression regression = new BivariateRegression();
          int n = 100;
          for (int i = 0; i < n; i++) {
              regression.addData(((double) i) / (n - 1), i);
          }
  
          try {
              assertEquals(0.0, regression.getSignificance(), 1.0e-5);
              assertTrue(regression.getSlope() > 0.0);
          } catch (MathException e) {
              // TODO Auto-generated catch block
              e.printStackTrace();
          }
      }
  
      public void testPerfectNegative() {
          BivariateRegression regression = new BivariateRegression();
          int n = 100;
          for (int i = 0; i < n; i++) {
              regression.addData(- ((double) i) / (n - 1), i);
          }
          try {
              assertEquals(0.0, regression.getSignificance(), 1.0e-5);
              assertTrue(regression.getSlope() < 0.0);
          } catch (MathException e) {
              // TODO Auto-generated catch block
              e.printStackTrace();
          }
      }
  
      public void testRandom() {
          BivariateRegression regression = new BivariateRegression();
          Random random = new Random(1);
          int n = 100;
          for (int i = 0; i < n; i++) {
              regression.addData(((double) i) / (n - 1), random.nextDouble());
          }
  
          try {
              assertTrue(
                  0.0 < regression.getSignificance()
                      && regression.getSignificance() < 1.0);
          } catch (MathException e) {
              // TODO Auto-generated catch block
              e.printStackTrace();
          }
      }
  }
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-dev-help@jakarta.apache.org


Mime
View raw message