commons-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From pste...@apache.org
Subject cvs commit: jakarta-commons/math/src/java/org/apache/commons/math/stat/inference TestStatistic.java TestStatisticImpl.java
Date Sun, 11 Apr 2004 20:19:14 GMT
psteitz     2004/04/11 13:19:14

  Added:       math/src/java/org/apache/commons/math/stat/inference
                        TestStatistic.java TestStatisticImpl.java
  Removed:     math/src/java/org/apache/commons/math/stat
                        TestStatistic.java TestStatisticImpl.java
  Log:
  Created inference package, moved statistical tests there.
  
  Revision  Changes    Path
  1.1                  jakarta-commons/math/src/java/org/apache/commons/math/stat/inference/TestStatistic.java
  
  Index: TestStatistic.java
  ===================================================================
  /*
   * Copyright 2003-2004 The Apache Software Foundation.
   * 
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   * 
   *      http://www.apache.org/licenses/LICENSE-2.0
   * 
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
  package org.apache.commons.math.stat;
  
  import org.apache.commons.math.MathException;
  
  /**
   * A collection of commonly used test statistics and statistical tests.
   * 
   * @version $Revision: 1.1 $ $Date: 2004/04/11 20:19:14 $ 
   */
  public interface TestStatistic {
      
      /**
       * Computes the <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm">
       * Chi-Square statistic</a> comparing <code>observed</code> and <code>expected</code>

       * freqeuncy counts. 
       * <p>
       * This statistic can be used to perform Chi-Square tests.
       * <p>
       * <strong>Preconditions</strong>: <ul>
       * <li>Expected counts must all be positive.  
       * </li>
       * <li>Observed counds must all be >= 0.   
       * </li>
       * <li>The observed and expected arrays must have the same length and
       * their common length must be at least 2.  
       * </li></ul><p>
       * If any of the preconditions are not met, an 
       * <code>IllegalArgumentException</code> is thrown.
       *
       * @param observed array of observed frequency counts
       * @param expected array of exptected frequency counts
       * @return chiSquare statistic
       * @throws IllegalArgumentException if preconditions are not met
       */
      double chiSquare(double[] expected, double[] observed) 
          throws IllegalArgumentException;
      
      /**
       * Returns the <i>observed significance level</i>, or <a href=
       * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
       * p-value</a>, associated with a 
       * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm">
       * Chi-square goodness of fit test</a> comparing the <code>observed</code>

       * frequency counts to those in the <code>expected</code> array.
       * <p>
       * The number returned is the smallest significance level at which one can reject 
       * the null hypothesis that the observed counts conform to the frequency distribution

       * described by the expected counts. 
       * <p>
       * <strong>Preconditions</strong>: <ul>
       * <li>Expected counts must all be positive.  
       * </li>
       * <li>Observed counds must all be >= 0.   
       * </li>
       * <li>The observed and expected arrays must have the same length and
       * their common length must be at least 2.  
       * </li></ul><p>
       * If any of the preconditions are not met, an 
       * <code>IllegalArgumentException</code> is thrown.
       *
       * @param observed array of observed frequency counts
       * @param expected array of exptected frequency counts
       * @return p-value
       * @throws IllegalArgumentException if preconditions are not met
       * @throws MathException if an error occurs computing the p-value
       */
      double chiSquareTest(double[] expected, double[] observed) 
          throws IllegalArgumentException, MathException;
      
      /**
       * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm">
       * Chi-square goodness of fit test</a> evaluating the null hypothesis that the
observed counts 
       * conform to the frequency distribution described by the expected counts, with 
       * significance level <code>alpha</code>.
       * <p>
       * <strong>Example:</strong><br>
       * To test the hypothesis that <code>observed</code> follows 
       * <code>expected</code> at the 99% level, use <p>
       * <code>chiSquareTest(expected, observed, 0.01) </code>
       * <p>
       * <strong>Preconditions</strong>: <ul>
       * <li>Expected counts must all be positive.  
       * </li>
       * <li>Observed counds must all be >= 0.   
       * </li>
       * <li>The observed and expected arrays must have the same length and
       * their common length must be at least 2.  
       * <li> <code> 0 < alpha < 0.5 </code>
       * </li></ul><p>
       * If any of the preconditions are not met, an 
       * <code>IllegalArgumentException</code> is thrown.
       *
       * @param observed array of observed frequency counts
       * @param expected array of exptected frequency counts
       * @param alpha significance level of the test
       * @return true iff null hypothesis can be rejected with confidence
       * 1 - alpha
       * @throws IllegalArgumentException if preconditions are not met
       * @throws MathException if an error occurs performing the test
       */
      boolean chiSquareTest(double[] expected, double[] observed, double alpha) 
          throws IllegalArgumentException, MathException;
      
      /**
       * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">

       * t statistic </a> given observed values and a comparison constant.
       * <p>
       * This statistic can be used to perform a one sample t-test for the mean.
       * <p>
       * <strong>Preconditions</strong>: <ul>
       * <li>The observed array length must be at least 2.
       * </li></ul>
       *
       * @param mu comparison constant
       * @param observed array of values
       * @return t statistic
       * @throws IllegalArgumentException if input array length is less than 2
       */
      double t(double mu, double[] observed) 
          throws IllegalArgumentException;
      
      /**
       * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
       * 2-sample t statistic </a>, without the assumption of equal sample variances.
       * <p>
       * This statistic can be used to perform a two-sample t-test to compare
       * sample means.
       * <p>
       * <strong>Preconditions</strong>: <ul>
       * <li>The observed array lengths must both be at least 5.
       * </li></ul>
       *
       * @param sample1 array of sample data values
       * @param sample2 array of sample data values
       * @return t statistic
       * @throws IllegalArgumentException if the precondition is not met
       * @throws MathException if the statistic can not be computed do to a
       *         convergence or other numerical error.
       */
      double t(double[] sample1, double[] sample2) 
          throws IllegalArgumentException, MathException;
      
      /**
       * Returns the <i>observed significance level</i>, or 
       * <a href="http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
       * p-value</a>, associated with a two-sample, two-tailed t-test 
       * comparing the means of the input arrays.
       * <p>
       * The number returned is the smallest significance level
       * at which one can reject the null hypothesis that the two means are
       * equal in favor of the two-sided alternative that they are different. 
       * For a one-sided test, divide the returned value by 2.
       * <p>
       * The test does not assume that the underlying popuation variances are
       * equal and it uses approximated degrees of freedom computed from the 
       * sample data as described 
       * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">here</a>
       * <p>
       * <strong>Usage Note:</strong><br>
       * The validity of the p-value depends on the assumptions of the parametric
       * t-test procedure, as discussed 
       * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
       * <p>
       * <strong>Preconditions</strong>: <ul>
       * <li>The observed array lengths must both be at least 5.
       * </li></ul>
       *
       * @param sample1 array of sample data values
       * @param sample2 array of sample data values
       * @return p-value for t-test
       * @throws IllegalArgumentException if the precondition is not met
       * @throws MathException if an error occurs computing the p-value
       */
      double tTest(double[] sample1, double[] sample2)
          throws IllegalArgumentException, MathException;
      
      /**
       * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
       * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>

       * and <code>sample2</code> are drawn from populations with the same mean,

       * with significance level <code>alpha</code>.
       * <p>
       * Returns <code>true</code> iff the null hypothesis that the means are
       * equal can be rejected with confidence <code>1 - alpha</code>.  To 
       * perform a 1-sided test, use <code>alpha / 2</code>
       * <p>
       * <strong>Examples:</strong><br><ol>
       * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code>
at
       * the 95% level, use <br><code>tTest(sample1, sample2, 0.05) </code>
       * </li>
       * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code>
       * at the 99% level, first verify that the measured mean of 
       * <code>sample 1</code> is less than the mean of <code>sample 2</code>
       * and then use <br><code>tTest(sample1, sample2, 0.005) </code>
       * </li></ol>
       * <p>
       * The test does not assume that the underlying popuation variances are
       * equal and it uses approximated degrees of freedom computed from the 
       * sample data as described 
       * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">here</a>
       * <p>
       * <strong>Usage Note:</strong><br>
       * The validity of the test depends on the assumptions of the parametric
       * t-test procedure, as discussed 
       * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
       * <p>
       * <strong>Preconditions</strong>: <ul>
       * <li>The observed array lengths must both be at least 5.
       * </li>
       * <li> <code> 0 < alpha < 0.5 </code>
       * </li></ul>
       *
       * @param sample1 array of sample data values
       * @param sample2 array of sample data values
       * @param alpha significance level of the test
       * @return true if the null hypothesis can be rejected with 
       * confidence 1 - alpha
       * @throws IllegalArgumentException if the preconditions are not met
       * @throws MathException if an error occurs performing the test
       */
      boolean tTest(double[] sample1, double[] sample2, double alpha)
          throws IllegalArgumentException, MathException;
      
      /**
       * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
       * two-sided t-test</a> evaluating the null hypothesis that the mean of the population
from
       *  which <code>sample</code> is drawn equals <code>mu</code>.
       * <p>
       * Returns <code>true</code> iff the null hypothesis can be 
       * rejected with confidence <code>1 - alpha</code>.  To 
       * perform a 1-sided test, use <code>alpha / 2</code>
       * <p>
       * <strong>Examples:</strong><br><ol>
       * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code>
at
       * the 95% level, use <br><code>tTest(mu, sample, 0.05) </code>
       * </li>
       * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
       * at the 99% level, first verify that the measured sample mean is less 
       * than <code>mu</code> and then use 
       * <br><code>tTest(mu, sample, 0.005) </code>
       * </li></ol>
       * <p>
       * <strong>Usage Note:</strong><br>
       * The validity of the test depends on the assumptions of the one-sample 
       * parametric t-test procedure, as discussed 
       * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
       * <p>
       * <strong>Preconditions</strong>: <ul>
       * <li>The observed array length must be at least 5.
       * </li></ul>
       *
       * @param mu constant value to compare sample mean against
       * @param sample array of sample data values
       * @param alpha significance level of the test
       * @return p-value
       * @throws IllegalArgumentException if the precondition is not met
       * @throws MathException if an error computing the p-value
       */
      boolean tTest(double mu, double[] sample, double alpha)
          throws IllegalArgumentException, MathException;
      
      /**
       * Returns the <i>observed significance level</i>, or 
       * <a href="http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
       * p-value</a>, associated with a one-sample, two-tailed t-test 
       * comparing the mean of the input array with the constant <code>mu</code>.
       * <p>
       * The number returned is the smallest significance level
       * at which one can reject the null hypothesis that the mean equals 
       * <code>mu</code> in favor of the two-sided alternative that the mean
       * is different from <code>mu</code>. For a one-sided test, divide the 
       * returned value by 2.
       * <p>
       * <strong>Usage Note:</strong><br>
       * The validity of the test depends on the assumptions of the parametric
       * t-test procedure, as discussed 
       * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
       * <p>
       * <strong>Preconditions</strong>: <ul>
       * <li>The observed array length must be at least 5.
       * </li></ul>
       *
       * @param mu constant value to compare sample mean against
       * @param sample array of sample data values
       * @return p-value
       * @throws IllegalArgumentException if the precondition is not met
       * @throws MathException if an error occurs computing the p-value
       */
      double tTest(double mu, double[] sample)
          throws IllegalArgumentException, MathException;
      
      /**
       * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
       * t statistic </a> to use in comparing the dataset described by <code>sampleStats</code>
       *  to <code>mu</code>.
       * <p>
       * This statistic can be used to perform a one sample t-test for the mean.
       * <p>
       * <strong>Preconditions</strong>: <ul>
       * <li><code>observed.getN() > = 2</code>.
       * </li></ul>
       *
       * @param mu comparison constant
       * @param sampleStats DescriptiveStatistics holding sample summary statitstics
       * @return t statistic
       * @throws IllegalArgumentException if the precondition is not met
       */
      double t(double mu, StatisticalSummary sampleStats) 
          throws IllegalArgumentException;
      
      /**
       * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
       * 2-sample t statistic </a>, comparing the means of the datasets described
       * by two {@link StatisticalSummary} instances without the assumption of equal sample
variances.
       * <p>
       * This statistic can be used to perform a two-sample t-test to compare
       * sample means.
       * <p>
       * <strong>Preconditions</strong>: <ul>
       * <li>The datasets described by the two Univariates must each contain
       * at least 5 observations.
       * </li></ul>
       *
       * @param sampleStats1 StatisticalSummary describing data from the first sample
       * @param sampleStats2 StatisticalSummary describing data from the second sample
       * @return t statistic
       * @throws IllegalArgumentException if the precondition is not met
       */
      double t(StatisticalSummary sampleStats1, StatisticalSummary sampleStats2) 
          throws IllegalArgumentException;
      
      /**
       * Returns the <i>observed significance level</i>, or 
       * <a href="http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
       * p-value</a>, associated with a two-sample, two-tailed t-test 
       * comparing the means of the datasets described by two Univariates.
       * <p>
       * The number returned is the smallest significance level
       * at which one can reject the null hypothesis that the two means are
       * equal in favor of the two-sided alternative that they are different. 
       * For a one-sided test, divide the returned value by 2.
       * <p>
       * The test does not assume that the underlying popuation variances are
       * equal and it uses approximated degrees of freedom computed from the 
       * sample data as described 
       * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">here</a>
       * <p>
       * <strong>Usage Note:</strong><br>
       * The validity of the p-value depends on the assumptions of the parametric
       * t-test procedure, as discussed 
       * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
       * <p>
       * <strong>Preconditions</strong>: <ul>
       * <li>The datasets described by the two Univariates must each contain
       * at least 5 observations.
       * </li></ul>
       *
       * @param sampleStats1 StatisticalSummary describing data from the first sample
       * @param sampleStats2 StatisticalSummary describing data from the second sample
       * @return p-value for t-test
       * @throws IllegalArgumentException if the precondition is not met
       * @throws MathException if an error occurs computing the p-value
       */
      double tTest(StatisticalSummary sampleStats1, StatisticalSummary sampleStats2)
          throws IllegalArgumentException, MathException;
      
      /**
       * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
       * two-sided t-test</a> evaluating the null hypothesis that <code>sampleStats1</code>
       * and <code>sampleStats2</code> describe datasets drawn from populations
with the 
       * same mean, with significance level <code>alpha</code>.
       * <p>
       * Returns <code>true</code> iff the null hypothesis that the means are
       * equal can be rejected with confidence <code>1 - alpha</code>.  To 
       * perform a 1-sided test, use <code>alpha / 2</code>
       * <p>
       * <strong>Examples:</strong><br><ol>
       * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code>
at
       * the 95% level, use 
       * <br><code>tTest(sampleStats1, sampleStats2, 0.05) </code>
       * </li>
       * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code>
       * at the 99% level, first verify that the measured mean of 
       * <code>sample 1</code> is less than the mean of <code>sample 2</code>
       * and then use <br><code>tTest(sampleStats1, sampleStats2, 0.005) </code>
       * </li></ol>
       * <p>
       * The test does not assume that the underlying popuation variances are
       * equal and it uses approximated degrees of freedom computed from the 
       * sample data as described 
       * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">here</a>
       * <p>
       * <strong>Usage Note:</strong><br>
       * The validity of the test depends on the assumptions of the parametric
       * t-test procedure, as discussed 
       * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
       * <p>
       * <strong>Preconditions</strong>: <ul>
       * <li>The datasets described by the two Univariates must each contain
       * at least 5 observations.
       * </li>
       * <li> <code> 0 < alpha < 0.5 </code>
       * </li></ul>
       *
       * @param sampleStats1 StatisticalSummary describing sample data values
       * @param sampleStats2 StatisticalSummary describing sample data values
       * @param alpha significance level of the test
       * @return true if the null hypothesis can be rejected with 
       * confidence 1 - alpha
       * @throws IllegalArgumentException if the preconditions are not met
       * @throws MathException if an error occurs performing the test
       */
      boolean tTest(StatisticalSummary sampleStats1, StatisticalSummary sampleStats2, 
          double alpha)
          throws IllegalArgumentException, MathException;
      
      /**
       * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
       * two-sided t-test</a> evaluating the null hypothesis that the mean of the population
from
       * which the dataset described by <code>stats</code> is drawn equals <code>mu</code>.
       * <p>
       * Returns <code>true</code> iff the null hypothesis can be 
       * rejected with confidence <code>1 - alpha</code>.  To 
       * perform a 1-sided test, use <code>alpha / 2</code>
       * <p>
       * <strong>Examples:</strong><br><ol>
       * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code>
at
       * the 95% level, use <br><code>tTest(mu, sampleStats, 0.05) </code>
       * </li>
       * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
       * at the 99% level, first verify that the measured sample mean is less 
       * than <code>mu</code> and then use 
       * <br><code>tTest(mu, sampleStats, 0.005) </code>
       * </li></ol>
       * <p>
       * <strong>Usage Note:</strong><br>
       * The validity of the test depends on the assumptions of the one-sample 
       * parametric t-test procedure, as discussed 
       * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
       * <p>
       * <strong>Preconditions</strong>: <ul>
       * <li>The sample must include at least 5 observations.
       * </li></ul>
       *
       * @param mu constant value to compare sample mean against
       * @param sampleStats StatisticalSummary describing sample data values
       * @param alpha significance level of the test
       * @return p-value
       * @throws IllegalArgumentException if the precondition is not met
       * @throws MathException if an error occurs computing the p-value
       */
      boolean tTest(double mu, StatisticalSummary sampleStats, double alpha)
          throws IllegalArgumentException, MathException;
      
      /**
       * Returns the <i>observed significance level</i>, or 
       * <a href="http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
       * p-value</a>, associated with a one-sample, two-tailed t-test 
       * comparing the mean of the dataset described by <code>sampleStats</code>
       * with the constant <code>mu</code>.
       * <p>
       * The number returned is the smallest significance level
       * at which one can reject the null hypothesis that the mean equals 
       * <code>mu</code> in favor of the two-sided alternative that the mean
       * is different from <code>mu</code>. For a one-sided test, divide the 
       * returned value by 2.
       * <p>
       * <strong>Usage Note:</strong><br>
       * The validity of the test depends on the assumptions of the parametric
       * t-test procedure, as discussed 
       * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
       * <p>
       * <strong>Preconditions</strong>: <ul>
       * <li>The sample must contain at least 5 observations.
       * </li></ul>
       *
       * @param mu constant value to compare sample mean against
       * @param sampleStats StatisticalSummary describing sample data
       * @return p-value
       * @throws IllegalArgumentException if the precondition is not met
       * @throws MathException if an error occurs computing the p-value
       */
      double tTest(double mu, StatisticalSummary sampleStats)
          throws IllegalArgumentException, MathException;
  }
  
  
  
  
  1.1                  jakarta-commons/math/src/java/org/apache/commons/math/stat/inference/TestStatisticImpl.java
  
  Index: TestStatisticImpl.java
  ===================================================================
  /*
   * Copyright 2003-2004 The Apache Software Foundation.
   * 
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   * 
   *      http://www.apache.org/licenses/LICENSE-2.0
   * 
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
  
  package org.apache.commons.math.stat;
  
  import java.io.Serializable;
  
  import org.apache.commons.math.MathException;
  import org.apache.commons.math.distribution.DistributionFactory;
  import org.apache.commons.math.distribution.TDistribution;
  import org.apache.commons.math.distribution.ChiSquaredDistribution;
  
  /**
   * Implements test statistics defined in the TestStatistic interface.
   *
   * @version $Revision: 1.1 $ $Date: 2004/04/11 20:19:14 $
   */
  public class TestStatisticImpl implements TestStatistic, Serializable {
  
      static final long serialVersionUID = 3357444126133491679L;
  
      /**
       * Default constructor
       */
      public TestStatisticImpl() {
      }
  
      /**
       * @param observed array of observed frequency counts
       * @param expected array of expected frequency counts
       * @return chi-square test statistic
       * @throws IllegalArgumentException if preconditions are not met
       * or length is less than 2
       */
      public double chiSquare(double[] expected, double[] observed)
          throws IllegalArgumentException {
          double sumSq = 0.0d;
          double dev = 0.0d;
          if ((expected.length < 2) || (expected.length != observed.length)) {
              throw new IllegalArgumentException("observed, expected array lengths incorrect");
          }
          if ((StatUtils.min(expected) <= 0) || (StatUtils.min(observed) < 0)) {
              throw new IllegalArgumentException( "observed counts must be non-negative,"
                      + " expected counts must be postive");
          }
          for (int i = 0; i < observed.length; i++) {
              dev = (observed[i] - expected[i]);
              sumSq += dev * dev / expected[i];
          }
          return sumSq;
      }
  
      /**
       * @param observed array of observed frequency counts
       * @param expected array of exptected frequency counts
       * @return p-value
       * @throws IllegalArgumentException if preconditions are not met
       * @throws MathException if an error occurs computing the p-value
       */
      public double chiSquareTest(double[] expected, double[] observed)
          throws IllegalArgumentException, MathException {
          ChiSquaredDistribution chiSquaredDistribution =
              DistributionFactory.newInstance().createChiSquareDistribution((double) expected.length
- 1);
          return 1 - chiSquaredDistribution.cumulativeProbability(chiSquare(expected, observed));
      }
  
      /**
       * @param observed array of observed frequency counts
       * @param expected array of exptected frequency counts
       * @param alpha significance level of the test
       * @return true iff null hypothesis can be rejected with confidence
       * 1 - alpha
       * @throws IllegalArgumentException if preconditions are not met
       * @throws MathException if an error occurs performing the test
       */
      public boolean chiSquareTest(double[] expected, double[] observed, double alpha)
          throws IllegalArgumentException, MathException {
          if ((alpha <= 0) || (alpha > 0.5)) {
              throw new IllegalArgumentException("bad significance level: " + alpha);
          }
          return (chiSquareTest(expected, observed) < alpha);
      }
  
      /**
       * @param mu comparison constant
       * @param observed array of values
       * @return t statistic
       * @throws IllegalArgumentException if input array length is less than 5
       */
      public double t(double mu, double[] observed)
          throws IllegalArgumentException {
          if ((observed == null) || (observed.length < 5)) {
              throw new IllegalArgumentException("insufficient data for t statistic");
          }
          return t(StatUtils.mean(observed), mu, StatUtils.variance(observed), observed.length);
      }
  
      /**
       * @param mu constant value to compare sample mean against
       * @param sample array of sample data values
       * @param alpha significance level of the test
       * @return p-value
       * @throws IllegalArgumentException if the precondition is not met
       * @throws MathException if an error occurs computing the p-value
       */
      public boolean tTest(double mu, double[] sample, double alpha)
          throws IllegalArgumentException, MathException {
          if ((alpha <= 0) || (alpha > 0.5)) {
              throw new IllegalArgumentException("bad significance level: " + alpha);
          }
          return (tTest(mu, sample) < alpha);
      }
  
      /**
       * @param sample1 array of sample data values
       * @param sample2 array of sample data values
       * @return t-statistic
       * @throws IllegalArgumentException if the precondition is not met
       */
      public double t(double[] sample1, double[] sample2)
          throws IllegalArgumentException {
          if ((sample1 == null) || (sample2 == null
                  || Math.min(sample1.length, sample2.length) < 5)) {
              throw new IllegalArgumentException("insufficient data for t statistic");
          }
          return t(StatUtils.mean(sample1), StatUtils.mean(sample2), StatUtils.variance(sample1),
              StatUtils.variance(sample2),  (double) sample1.length, (double) sample2.length);
      }
  
      /**
       *
       * @param sample1 array of sample data values
       * @param sample2 array of sample data values
       * @return tTest p-value
       * @throws IllegalArgumentException if the precondition is not met
       * @throws MathException if an error occurs computing the p-value
       */
      public double tTest(double[] sample1, double[] sample2)
          throws IllegalArgumentException, MathException {
          if ((sample1 == null) || (sample2 == null
                  || Math.min(sample1.length, sample2.length) < 5)) {
              throw new IllegalArgumentException("insufficient data");
          }
          return tTest(StatUtils.mean(sample1), StatUtils.mean(sample2), StatUtils.variance(sample1),
              StatUtils.variance(sample2), (double) sample1.length, (double) sample2.length);
      }
  
      /**
       * @param sample1 array of sample data values
       * @param sample2 array of sample data values
       * @param alpha significance level
       * @return true if the null hypothesis can be rejected with 
       *     confidence 1 - alpha
       * @throws IllegalArgumentException if the preconditions are not met
       * @throws MathException if an error occurs performing the test
       */
      public boolean tTest(double[] sample1, double[] sample2, double alpha)
          throws IllegalArgumentException, MathException {
          if ((alpha <= 0) || (alpha > 0.5)) {
              throw new IllegalArgumentException("bad significance level: " + alpha);
          }
          return (tTest(sample1, sample2) < alpha);
      }
  
      /**
       * @param mu constant value to compare sample mean against
       * @param sample array of sample data values
       * @return p-value
       * @throws IllegalArgumentException if the precondition is not met
       * @throws MathException if an error occurs computing the p-value
       */
      public double tTest(double mu, double[] sample)
          throws IllegalArgumentException, MathException {
          if ((sample == null) || (sample.length < 5)) {
              throw new IllegalArgumentException("insufficient data for t statistic");
          }
          return tTest( StatUtils.mean(sample), mu, StatUtils.variance(sample), sample.length);
      }
  
      /**
       * @param mu comparison constant
       * @param sampleStats StatisticalSummary holding sample summary statitstics
       * @return t statistic
       * @throws IllegalArgumentException if the precondition is not met
       */
      public double t(double mu, StatisticalSummary sampleStats)
          throws IllegalArgumentException {
          if ((sampleStats == null) || (sampleStats.getN() < 5)) {
              throw new IllegalArgumentException("insufficient data for t statistic");
          }
          return t(sampleStats.getMean(), mu, sampleStats.getVariance(), sampleStats.getN());
      }
  
      /**
       * @param sampleStats1 StatisticalSummary describing data from the first sample
       * @param sampleStats2 StatisticalSummary describing data from the second sample
       * @return t statistic
       * @throws IllegalArgumentException if the precondition is not met
       */
      public double t(StatisticalSummary sampleStats1, StatisticalSummary sampleStats2)
          throws IllegalArgumentException {
          if ((sampleStats1 == null)
              || (sampleStats2 == null
                  || Math.min(sampleStats1.getN(), sampleStats2.getN()) < 5)) {
              throw new IllegalArgumentException("insufficient data for t statistic");
          }
          return t(sampleStats1.getMean(), sampleStats2.getMean(), sampleStats1.getVariance(),
              sampleStats2.getVariance(), (double) sampleStats1.getN(), (double) sampleStats2.getN());
      }
  
      /**
       * @param sampleStats1 StatisticalSummary describing data from the first sample
       * @param sampleStats2 StatisticalSummary describing data from the second sample
       * @return p-value for t-test
       * @throws IllegalArgumentException if the precondition is not met
       * @throws MathException if an error occurs computing the p-value
       */
      public double tTest(StatisticalSummary sampleStats1, StatisticalSummary sampleStats2)
          throws IllegalArgumentException, MathException {
          if ((sampleStats1 == null) || (sampleStats2 == null
                  || Math.min(sampleStats1.getN(), sampleStats2.getN()) < 5)) {
              throw new IllegalArgumentException("insufficient data for t statistic");
          }
          return tTest(sampleStats1.getMean(), sampleStats2.getMean(), sampleStats1.getVariance(),
              sampleStats2.getVariance(), (double) sampleStats1.getN(), (double) sampleStats2.getN());
      }
  
      /**
       * @param sampleStats1 StatisticalSummary describing sample data values
       * @param sampleStats2 StatisticalSummary describing sample data values
       * @param alpha significance level of the test
       * @return true if the null hypothesis can be rejected with 
       *     confidence 1 - alpha
       * @throws IllegalArgumentException if the preconditions are not met
       * @throws MathException if an error occurs performing the test
       */
      public boolean tTest(StatisticalSummary sampleStats1, StatisticalSummary sampleStats2,
          double alpha)
          throws IllegalArgumentException, MathException {
          if ((alpha <= 0) || (alpha > 0.5)) {
              throw new IllegalArgumentException("bad significance level: " + alpha);
          }
          return (tTest(sampleStats1, sampleStats2) < alpha);
      }
  
      /**
       * @param mu constant value to compare sample mean against
       * @param sampleStats StatisticalSummary describing sample data values
       * @param alpha significance level of the test
       * @return p-value
       * @throws IllegalArgumentException if the precondition is not met
       * @throws MathException if an error occurs computing the p-value
       */
      public boolean tTest( double mu, StatisticalSummary sampleStats,double alpha)
          throws IllegalArgumentException, MathException {
          if ((alpha <= 0) || (alpha > 0.5)) {
              throw new IllegalArgumentException("bad significance level: " + alpha);
          }
          return (tTest(mu, sampleStats) < alpha);
      }
  
      /**
       * @param mu constant value to compare sample mean against
       * @param sampleStats StatisticalSummary describing sample data
       * @return p-value
       * @throws IllegalArgumentException if the precondition is not met
       * @throws MathException if an error occurs computing the p-value
       */
      public double tTest(double mu, StatisticalSummary sampleStats)
          throws IllegalArgumentException, MathException {
          if ((sampleStats == null) || (sampleStats.getN() < 5)) {
              throw new IllegalArgumentException("insufficient data for t statistic");
          }
          return tTest(sampleStats.getMean(), mu, sampleStats.getVariance(), sampleStats.getN());
      }
  
      //----------------------------------------------- Private methods 
  
      /**
       * Computes approximate degrees of freedom for 2-sample t-test.
       * 
       * @param v1 first sample variance
       * @param v2 second sample variance
       * @param n1 first sample n
       * @param n2 second sample n
       * @return approximate degrees of freedom
       */
      private double df(double v1, double v2, double n1, double n2) {
          return (((v1 / n1) + (v2 / n2)) * ((v1 / n1) + (v2 / n2)))
              / ((v1 * v1) / (n1 * n1 * (n1 - 1d))
                  + (v2 * v2) / (n2 * n2 * (n2 - 1d)));
      }
  
      /**
      * Computes t test statistic for 2-sample t-test.
      * 
      * @param m1 first sample mean
      * @param m2 second sample mean
      * @param v1 first sample variance
      * @param v2 second sample variance
      * @param n1 first sample n
      * @param n2 second sample n
      * @return t test statistic
      */
      private double t(double m1, double m2,  double v1, double v2, double n1,double n2) 
{
          return (m1 - m2) / Math.sqrt((v1 / n1) + (v2 / n2));
      }
  
      /**
       * Computes t test statistic for 1-sample t-test.
       * 
       * @param m sample mean
       * @param mu constant to test against
       * @param v sample variance
       * @param n sample n
       * @return t test statistic
       */
      private double t(double m, double mu, double v, double n) {
          return (m - mu) / Math.sqrt(v / n);
      }
  
      /**
       * Computes p-value for 2-sided, 2-sample t-test.
       * 
       * @param m1 first sample mean
       * @param m2 second sample mean
       * @param v1 first sample variance
       * @param v2 second sample variance
       * @param n1 first sample n
       * @param n2 second sample n
       * @return p-value
       * @throws MathException if an error occurs computing the p-value
       */
      private double tTest(double m1, double m2, double v1, double v2, double n1, double n2)
          throws MathException {
          double t = Math.abs(t(m1, m2, v1, v2, n1, n2));
          TDistribution tDistribution =
              DistributionFactory.newInstance().createTDistribution(df(v1, v2, n1, n2));
          return 1.0 - tDistribution.cumulativeProbability(-t, t);
      }
  
      /**
       * Computes p-value for 2-sided, 1-sample t-test.
       * 
       * @param m sample mean
       * @param mu constant to test against
       * @param v sample variance
       * @param n sample n
       * @return p-value
       * @throws MathException if an error occurs computing the p-value
       */
      private double tTest(double m, double mu, double v, double n)
          throws MathException {
          double t = Math.abs(t(m, mu, v, n));
          TDistribution tDistribution =
              DistributionFactory.newInstance().createTDistribution(n - 1);
          return 1.0 - tDistribution.cumulativeProbability(-t, t);
      }
  }
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-dev-help@jakarta.apache.org


Mime
View raw message