commons-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From pste...@apache.org
Subject svn commit: r1144986 - in /commons/proper/math/trunk: pom.xml src/main/java/org/apache/commons/math/stat/regression/RegressionResults.java src/main/java/org/apache/commons/math/stat/regression/UpdatingMultipleLinearRegression.java
Date Sun, 10 Jul 2011 23:45:44 GMT
Author: psteitz
Date: Sun Jul 10 23:45:43 2011
New Revision: 1144986

URL: http://svn.apache.org/viewvc?rev=1144986&view=rev
Log:
Added interface and reporting class for updating regression.  JIRA: MATH-607.  Contributed
by Greg Sterijevski.

Added:
    commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/RegressionResults.java
  (with props)
    commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/UpdatingMultipleLinearRegression.java
  (with props)
Modified:
    commons/proper/math/trunk/pom.xml

Modified: commons/proper/math/trunk/pom.xml
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/pom.xml?rev=1144986&r1=1144985&r2=1144986&view=diff
==============================================================================
--- commons/proper/math/trunk/pom.xml (original)
+++ commons/proper/math/trunk/pom.xml Sun Jul 10 23:45:43 2011
@@ -220,6 +220,9 @@
       <name>David Stefka</name>
     </contributor>
     <contributor>
+      <name>Greg Sterijevski</name>
+    </contributor>
+    <contributor>
       <name>Mauro Talevi</name>
     </contributor>
     <contributor>

Added: commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/RegressionResults.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/RegressionResults.java?rev=1144986&view=auto
==============================================================================
--- commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/RegressionResults.java
(added)
+++ commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/RegressionResults.java
Sun Jul 10 23:45:43 2011
@@ -0,0 +1,385 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.regression;
+
+import java.io.Serializable;
+import java.util.Arrays;
+import org.apache.commons.math.util.FastMath;
+
+/**
+ * Results of a Multiple Linear Regression model fit.
+ *
+ * @version $Id$
+ * @since 3.0
+ */
+public class RegressionResults implements Serializable {
+
+    private static final int SSE_IDX = 0;
+    private static final int SST_IDX = 1;
+    private static final int RSQ_IDX = 2;
+    private static final int MSE_IDX = 3;
+    private static final int ADJRSQ_IDX = 4;
+    private static final long serialVersionUID = 1l;
+    private final double[] parameters;
+    private final double[][] varCovData;
+    private final boolean isSymmetricVCD;
+    private final int rank;
+    private final long nobs;
+    private final boolean containsConstant;
+    private final double[] globalFitInfo;
+
+    /**
+     *  Set the default constructor to private access
+     *  to prevent inadvertent instantiation
+     */
+    @SuppressWarnings("unused")
+    private RegressionResults() {
+        this.parameters = null;
+        this.varCovData = null;
+        this.rank = -1;
+        this.nobs = -1;
+        this.containsConstant = false;
+        this.isSymmetricVCD = false;
+        this.globalFitInfo = null;
+    }
+
+    public RegressionResults(
+            final double[] parameters, final double[][] varcov,
+            final boolean isSymmetricCompressed,
+            final long nobs, final int rank,
+            final double sumy, final double sumysq, final double sse,
+            final boolean containsConstant,
+            final boolean copyData) {
+        if (copyData) {
+            this.parameters = Arrays.copyOf(parameters, parameters.length);
+            this.varCovData = new double[varcov.length][];
+            for (int i = 0; i < varcov.length; i++) {
+                this.varCovData[i] = Arrays.copyOf(varcov[i], varcov[i].length);
+            }
+        } else {
+            this.parameters = parameters;
+            this.varCovData = varcov;
+        }
+        this.isSymmetricVCD = isSymmetricCompressed;
+        this.nobs = nobs;
+        this.rank = rank;
+        this.containsConstant = containsConstant;
+        this.globalFitInfo = new double[5];
+        Arrays.fill(this.globalFitInfo, Double.NaN);
+
+        if (rank > 2) {
+            this.globalFitInfo[SST_IDX] = containsConstant ?
+                    (sumysq - sumy * sumy / ((double) nobs)) : sumysq;
+        }
+        this.globalFitInfo[SSE_IDX] = sse;
+        this.globalFitInfo[MSE_IDX] = this.globalFitInfo[SSE_IDX] /
+                ((double) (nobs - rank));
+        this.globalFitInfo[RSQ_IDX] = 1.0 -
+                this.globalFitInfo[SSE_IDX] /
+                this.globalFitInfo[SST_IDX];
+
+        if (!containsConstant) {
+            this.globalFitInfo[ADJRSQ_IDX] = 1.0 - (1.0 - this.globalFitInfo[RSQ_IDX]) *
+                    (nobs / (nobs - rank));
+        } else {
+            this.globalFitInfo[ADJRSQ_IDX] = 1.0 - (sse * (nobs - 1.0)) /
+                    (globalFitInfo[SST_IDX] * (nobs - rank));
+        }
+    }
+
+    /**
+     * <p>Returns the parameter estimate for the regressor at the given index.</p>
+     *
+     * <p>A redundant regressor will have its redundancy flag set, as well as
+     *  a parameters estimated equal to {@code Double.NaN}</p>
+     *
+     * @param index an integer index which must be in the range [0, numberOfParameters-1]
+     * @return parameters estimated for regressor at index
+     * @throws IndexOutOfBoundsException thrown if the index >= numberOfParameters
+     */
+    public double getParameterEstimate(int index) throws IndexOutOfBoundsException {
+        if (parameters == null) {
+            return Double.NaN;
+        }
+        if (index < 0 || index >= this.parameters.length) {
+            throw new IndexOutOfBoundsException("Index is outside of the 0 to number of variables
- 1 range");
+        }
+        return this.parameters[index];
+    }
+
+    /**
+     * <p>Returns a copy of the regression parameters estimates.</p>
+     *
+     * <p>The parameter estimates are returned in the natural order of the data.</p>
+     *
+     * <p>A redundant regressor will have its redundancy flag set, as will
+     *  a parameter estimate equal to {@code Double.NaN}.</p>
+     *
+     * @return array of parameter estimates, null if no estimation occurred
+     */
+    public double[] getParameterEstimates() {
+        if (this.parameters == null) {
+            return null;
+        }
+        return Arrays.copyOf(parameters, parameters.length);
+    }
+
+    /**
+     * Returns the <a href="http://www.xycoon.com/standerrorb(1).htm">standard
+     * error of the parameter estimate at index</a>,
+     * usually denoted s(b<sub>index</sub>).
+     *
+     * @param index an integer index which must be in the range [0, numberOfParameters-1]
+     * @return standard errors associated with parameters estimated at index
+     * @throws IndexOutOfBoundsException thrown if the index >= numberOfParameters
+     */
+    public double getStdErrorOfEstimate(int index) throws IndexOutOfBoundsException {
+        if (parameters == null) {
+            return Double.NaN;
+        }
+        if (index < 0 || index >= this.parameters.length) {
+            throw new IndexOutOfBoundsException("Index is outside of the 0 to number of variables
- 1 range");
+        }
+        double var = this.getVcvElement(index, index);
+        if (!Double.isNaN(var) && var > Double.MIN_VALUE) {
+            return FastMath.sqrt(rank);
+        }
+        return Double.NaN;
+    }
+
+    /**
+     * <p>Returns the <a href="http://www.xycoon.com/standerrorb(1).htm">standard
+     * error of the parameter estimates</a>,
+     * usually denoted s(b<sub>i</sub>).</p>
+     *
+     * <p>If there are problems with an ill conditioned design matrix then the regressor
+     * which is redundant will be assigned <code>Double.NaN</code>. </p>
+     *
+     * @return an array standard errors associated with parameters estimates,
+     *  null if no estimation occurred
+     */
+    public double[] getStdErrorOfEstimates() {
+        if (parameters == null) {
+            return null;
+        }
+        double[] se = new double[this.parameters.length];
+        for (int i = 0; i < this.parameters.length; i++) {
+            double var = this.getVcvElement(i, i);
+            if (!Double.isNaN(var) && var > Double.MIN_VALUE) {
+                se[i] = FastMath.sqrt(rank);
+                continue;
+            }
+            se[i] = Double.NaN;
+        }
+        return se;
+    }
+
+    /**
+     * <p>Returns the covariance between regression parameters i and j.</p>
+     *
+     * <p>If there are problems with an ill conditioned design matrix then the covariance
+     * which involves redundant columns will be assigned {@code Double.NaN}. </p>
+     *
+     * @param i - the ith regression parameter
+     * @param j - the jth regression parameter
+     * @return the covariance of the parameter estimates
+     */
+    public double getCovarianceOfParameters(int i, int j) throws IndexOutOfBoundsException
{
+        if (parameters == null) {
+            return Double.NaN;
+        }
+        if (i < 0 || i >= this.parameters.length) {
+            throw new IndexOutOfBoundsException(" Row index is outside of the 0 " +
+                    "to number of variables - 1 range");
+        }
+        if (j < 0 || j >= this.parameters.length) {
+            throw new IndexOutOfBoundsException(" Column index is outside of the 0" +
+                    " to number of variables - 1 range");
+        }
+        return this.getVcvElement(i, j);
+    }
+
+    /**
+     * <p>Returns the number of parameters estimated in the model.</p>
+     *
+     * <p>This is the maximum number of regressors, some techniques may drop
+     * redundant parameters</p>
+     *
+     * @return number of regressors, -1 if not estimated
+     */
+    public int getNumberOfParameters() {
+        if (this.parameters == null) {
+            return -1;
+        }
+        return this.parameters.length;
+    }
+
+    /**
+     * Returns the number of observations added to the regression model.
+     *
+     * @return Number of observations, -1 if an error condition prevents estimation
+     */
+    public long getN() {
+        return this.nobs;
+    }
+
+    /**
+     * <p>Returns the sum of squared deviations of the y values about their mean.</p>
+     *
+     * <p>This is defined as SSTO
+     * <a href="http://www.xycoon.com/SumOfSquares.htm">here</a>.</p>
+     *
+     * <p>If {@code n < 2}, this returns {@code Double.NaN}.</p>
+     *
+     * @return sum of squared deviations of y values
+     */
+    public double getTotalSumSquares() {
+        return this.globalFitInfo[SST_IDX];
+    }
+
+    /**
+     * <p>Returns the sum of squared deviations of the predicted y values about
+     * their mean (which equals the mean of y).</p>
+     *
+     * <p>This is usually abbreviated SSR or SSM.  It is defined as SSM
+     * <a href="http://www.xycoon.com/SumOfSquares.htm">here</a></p>
+     *
+     * <p><strong>Preconditions</strong>: <ul>
+     * <li>At least two observations (with at least two different x values)
+     * must have been added before invoking this method. If this method is
+     * invoked before a model can be estimated, <code>Double.NaN</code> is
+     * returned.
+     * </li></ul></p>
+     *
+     * @return sum of squared deviations of predicted y values
+     */
+    public double getRegressionSumSquares() {
+        return this.globalFitInfo[SST_IDX] - this.globalFitInfo[SSE_IDX];
+    }
+
+    /**
+     * <p>Returns the <a href="http://www.xycoon.com/SumOfSquares.htm">
+     * sum of squared errors</a> (SSE) associated with the regression
+     * model.</p>
+     *
+     * <p>The return value is constrained to be non-negative - i.e., if due to
+     * rounding errors the computational formula returns a negative result,
+     * 0 is returned.</p>
+     *
+     * <p><strong>Preconditions</strong>: <ul>
+     * <li>numberOfParameters data pairs
+     * must have been added before invoking this method. If this method is
+     * invoked before a model can be estimated, <code>Double,NaN</code> is
+     * returned.
+     * </li></ul></p>
+     *
+     * @return sum of squared errors associated with the regression model
+     */
+    public double getErrorSumSquares() {
+        return this.globalFitInfo[ SSE_IDX];
+    }
+
+    /**
+     * <p>Returns the sum of squared errors divided by the degrees of freedom,
+     * usually abbreviated MSE.</p>
+     *
+     * <p>If there are fewer than <strong>numberOfParameters + 1</strong>
data pairs in the model,
+     * or if there is no variation in <code>x</code>, this returns
+     * <code>Double.NaN</code>.</p>
+     *
+     * @return sum of squared deviations of y values
+     */
+    public double getMeanSquareError() {
+        return this.globalFitInfo[ MSE_IDX];
+    }
+
+    /**
+     * <p>Returns the <a href="http://www.xycoon.com/coefficient1.htm">
+     * coefficient of multiple determination</a>,
+     * usually denoted r-square.</p>
+     *
+     * <p><strong>Preconditions</strong>: <ul>
+     * <li>At least numberOfParameters observations (with at least numberOfParameters
different x values)
+     * must have been added before invoking this method. If this method is
+     * invoked before a model can be estimated, {@code Double,NaN} is
+     * returned.
+     * </li></ul></p>
+     *
+     * @return r-square, a double in the interval [0, 1]
+     */
+    public double getRSquared() {
+        return this.globalFitInfo[ RSQ_IDX];
+    }
+
+    /**
+     * <p>Returns the adjusted R-squared statistic, defined by the formula <pre>
+     * R<sup>2</sup><sub>adj</sub> = 1 - [SSR (n - 1)] / [SSTO (n
- p)]
+     * </pre>
+     * where SSR is the sum of squared residuals},
+     * SSTO is the total sum of squares}, n is the number
+     * of observations and p is the number of parameters estimated (including the intercept).</p>
+     *
+     * <p>If the regression is estimated without an intercept term, what is returned
is <pre>
+     * <code> 1 - (1 - {@link #getRSquared()} ) * (n / (n - p)) </code>
+     * </pre></p>
+     *
+     * @return adjusted R-Squared statistic
+     */
+    public double getAdjustedRSquared() {
+        return this.globalFitInfo[ ADJRSQ_IDX];
+    }
+
+    /**
+     * Returns true if the regression model has been computed including an intercept.
+     * In this case, the coefficient of the intercept is the first element of the
+     * {@link #getParameterEstimates() parameter estimates}.
+     * @return true if the model has an intercept term
+     */
+    public boolean hasIntercept() {
+        return this.containsConstant;
+    }
+
+    /**
+     * Gets the i-jth element of the variance-covariance matrix.
+     *
+     * @param i first variable index
+     * @param j second variable index
+     * @return the requested variance-covariance matrix entry
+     */
+    private double getVcvElement(int i, int j) {
+        if (this.isSymmetricVCD) {
+            if (this.varCovData.length > 1) {
+                //could be stored in upper or lower triangular
+                if (i == j) {
+                    return varCovData[i][i];
+                } else if (i >= varCovData[j].length) {
+                    return varCovData[i][j];
+                } else {
+                    return varCovData[j][i];
+                }
+            } else {//could be in single array
+                if (i > j) {
+                    return varCovData[0][(i + 1) * i / 2 + j];
+                } else {
+                    return varCovData[0][(j + 1) * j / 2 + i];
+                }
+            }
+        } else {
+            return this.varCovData[i][j];
+        }
+    }
+}

Propchange: commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/RegressionResults.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/RegressionResults.java
------------------------------------------------------------------------------
    svn:keywords = Date Revision Id

Propchange: commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/RegressionResults.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/UpdatingMultipleLinearRegression.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/UpdatingMultipleLinearRegression.java?rev=1144986&view=auto
==============================================================================
--- commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/UpdatingMultipleLinearRegression.java
(added)
+++ commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/UpdatingMultipleLinearRegression.java
Sun Jul 10 23:45:43 2011
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.regression;
+
+import org.apache.commons.math.MathException;
+
+/**
+ * An interface for regression models allowing for dynamic updating of the data.
+ * That is, the entire data set need not be loaded into memory. As observations
+ * become available, they can be added to the regression  model and an updated
+ * estimate regression statistics can be calculated.
+ *
+ * @version $Id$
+ * @since 3.0
+ */
+public interface UpdatingMultipleLinearRegression {
+
+    /**
+     * Returns true if a constant has been included false otherwise.
+     *
+     * @return true if constant exists, false otherwise
+     */
+    boolean hasIntercept();
+
+    /**
+     * Returns the number of observations added to the regression model.
+     *
+     * @return Number of observations
+     */
+    long getN();
+
+    /**
+     * Adds one observation to the regression model.
+     *
+     * @param x the independent variables which form the design matrix
+     * @param y the dependent or response variable
+     */
+    void addObservation(double[] x, double y);
+
+    /**
+     * Adds a series of observations to the regression model. The lengths of
+     * x and y must be the same and x must be rectangular.
+     *
+     * @param x a series of observations on the independent variables
+     * @param y a series of observations on the dependent variable
+     * The length of x and y must be the same
+     */
+    void addObservations(double[][] x, double[] y);
+
+    /**
+     * Clears internal buffers and resets the regression model. This means all
+     * data and derived values are initialized
+     */
+    void clear();
+
+
+    /**
+     * Performs a regression on data present in buffers and outputs a RegressionResults object
+     * @return RegressionResults acts as a container of regression output
+     * @throws MathException a wide variety of exception cases are possible, check message
+     */
+    RegressionResults regress() throws MathException;
+
+    /**
+     * Performs a regression on data present in buffers including only regressors
+     * indexed in variablesToInclude and outputs a RegressionResults object
+     * @param variablesToInclude an array of indices of regressors to include
+     * @return RegressionResults acts as a container of regression output
+     * @throws MathException a wide variety of exception cases are possible, check message
+     */
+    RegressionResults regress(int[] variablesToInclude) throws MathException;
+}

Propchange: commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/UpdatingMultipleLinearRegression.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/UpdatingMultipleLinearRegression.java
------------------------------------------------------------------------------
    svn:keywords = Date Revision Id

Propchange: commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/UpdatingMultipleLinearRegression.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain



Mime
View raw message