Return-Path: X-Original-To: apmail-commons-commits-archive@minotaur.apache.org Delivered-To: apmail-commons-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id CE4456FCC for ; Tue, 19 Jul 2011 21:45:08 +0000 (UTC) Received: (qmail 79931 invoked by uid 500); 19 Jul 2011 21:45:08 -0000 Delivered-To: apmail-commons-commits-archive@commons.apache.org Received: (qmail 79847 invoked by uid 500); 19 Jul 2011 21:45:07 -0000 Mailing-List: contact commits-help@commons.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@commons.apache.org Delivered-To: mailing list commits@commons.apache.org Received: (qmail 79838 invoked by uid 99); 19 Jul 2011 21:45:07 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 19 Jul 2011 21:45:07 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 19 Jul 2011 21:45:02 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 4ADE9238890D for ; Tue, 19 Jul 2011 21:44:42 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1148557 - in /commons/proper/math/trunk/src: main/java/org/apache/commons/math/stat/regression/MillerUpdatingRegression.java test/java/org/apache/commons/math/stat/regression/MillerUpdatingRegressionTest.java Date: Tue, 19 Jul 2011 21:44:42 -0000 To: commits@commons.apache.org From: psteitz@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20110719214442.4ADE9238890D@eris.apache.org> Author: psteitz Date: Tue Jul 19 21:44:41 2011 New Revision: 1148557 URL: http://svn.apache.org/viewvc?rev=1148557&view=rev Log: Added Miller updating regression implementation. JIRA: MATH-607. Contributed by Greg Sterijevski. Added: commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/MillerUpdatingRegression.java (with props) commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/regression/MillerUpdatingRegressionTest.java (with props) Added: commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/MillerUpdatingRegression.java URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/MillerUpdatingRegression.java?rev=1148557&view=auto ============================================================================== --- commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/MillerUpdatingRegression.java (added) +++ commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/MillerUpdatingRegression.java Tue Jul 19 21:44:41 2011 @@ -0,0 +1,1014 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math.stat.regression; + +import java.util.Arrays; +import org.apache.commons.math.MathException; +import org.apache.commons.math.exception.util.DummyLocalizable; +import org.apache.commons.math.exception.util.Localizable; +import org.apache.commons.math.util.FastMath; +import org.apache.commons.math.util.MathUtils; + +/** + *

This class is a concrete implementation of the {@link UpdatingMultipleLinearRegression} interface.

+ * + *

The algorithm is described in:

+ * Algorithm AS 274: Least Squares Routines to Supplement Those of Gentleman
+ * Author(s): Alan J. Miller
+ * Source: Journal of the Royal Statistical Society.
+ * Series C (Applied Statistics), Vol. 41, No. 2
+ * (1992), pp. 458-478
+ * Published by: Blackwell Publishing for the Royal Statistical Society
+ * Stable URL: http://www.jstor.org/stable/2347583 

+ * + *

This method for multiple regression forms the solution to the OLS problem + * by updating the QR decomposition as described by Gentleman.

+ * + * @version $Id$ + * @since 3.0 + */ +public class MillerUpdatingRegression implements UpdatingMultipleLinearRegression { + + private final int nvars; + private final double[] d; + private final double[] rhs; + private final double[] r; + private final double[] tol; + private final double[] rss; + private final int[] vorder; + private final double[] work_tolset; + private long nobs = 0; + private double sserr = 0.0; + private boolean rss_set = false; + private boolean tol_set = false; + private final boolean[] lindep; + private final double[] x_sing; + private final double[] work_sing; + private double sumy = 0.0; + private double sumsqy = 0.0; + private boolean hasIntercept; + private final double epsilon; + + /** + * Set the default constructor to private access + * to prevent inadvertent instantiation + */ + @SuppressWarnings("unused") + private MillerUpdatingRegression() { + this.d = null; + this.hasIntercept = false; + this.lindep = null; + this.nobs = -1; + this.nvars = -1; + this.r = null; + this.rhs = null; + this.rss = null; + this.rss_set = false; + this.sserr = Double.NaN; + this.sumsqy = Double.NaN; + this.sumy = Double.NaN; + this.tol = null; + this.tol_set = false; + this.vorder = null; + this.work_sing = null; + this.work_tolset = null; + this.x_sing = null; + this.epsilon = Double.NaN; + } + + public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant, double errorTolerance) { + if (numberOfVariables < 1) { + throw new IllegalArgumentException("NumberOfVariables must be greater than or equal to one"); + } + if (includeConstant) { + this.nvars = numberOfVariables + 1; + } else { + this.nvars = numberOfVariables; + } + this.hasIntercept = includeConstant; + this.nobs = 0; + this.d = new double[this.nvars]; + this.rhs = new double[this.nvars]; + this.r = new double[this.nvars * (this.nvars - 1) / 2]; + this.tol = new double[this.nvars]; + this.rss = new double[this.nvars]; + this.vorder = new int[this.nvars]; + this.x_sing = new double[this.nvars]; + this.work_sing = new double[this.nvars]; + this.work_tolset = new double[this.nvars]; + this.lindep = new boolean[this.nvars]; + for (int i = 0; i < this.nvars; i++) { + vorder[i] = i; + } + if (errorTolerance > 0) { + this.epsilon = errorTolerance; + } else { + this.epsilon = -errorTolerance; + } + return; + } + + public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant) { + this(numberOfVariables, includeConstant, MathUtils.EPSILON); + } + + public boolean hasIntercept() { + return this.hasIntercept; + } + + public long getN() { + return this.nobs; + } + + public void addObservation(final double[] x, final double y) { + + if ((!this.hasIntercept && x.length != nvars) || + (this.hasIntercept && x.length + 1 != nvars)) { + throw new IllegalArgumentException("Length of regressor list is less that numberOfVariables"); + } + if (!this.hasIntercept) { + include(Arrays.copyOf(x, x.length), 1.0, y); + } else { + double[] tmp = new double[x.length + 1]; + System.arraycopy(x, 0, tmp, 1, x.length); + tmp[0] = 1.0; + include(tmp, 1.0, y); + } + ++nobs; + return; + + } + + public void addObservations(double[][] x, double[] y) { + if (x.length != y.length) { + throw new IllegalArgumentException("Lengths of x and y matrices must be equal"); + } + for (int i = 0; i < x.length; i++) { + this.addObservation(x[i], y[i]); + } + return; + } + + /* + * The include method is where the QR decomposition occurs. This statement forms all + * intermediate data which will be used for all derivative measures. + * According to the miller paper, note that in the original implementation the x vector + * is overwritten. In this implementation, the include method is passed a copy of the + * original data vector so that there is no contamination of the data. Additionally, + * this method differs slighlty from gentleman's method, in that the assumption is + * of dense design matrices, there is some advantage in using the original gentleman algorithm + * on sparse matrices. + */ + private void include(final double[] x, final double wi, final double yi) { + int nextr = 0; + double w = wi; + double y = yi; + double xi; + double di; + double wxi; + double dpi; + double xk; + double _w; + this.rss_set = false; + sumy = smartAdd(yi, sumy); + sumsqy = smartAdd(sumsqy, yi * yi); + for (int i = 0; i < x.length; i++) { + if (w == 0.0) { + return; + } + xi = x[i]; + + if (xi == 0.0) { + nextr += nvars - i - 1; + continue; + } + di = d[i]; + wxi = w * xi; + _w = w; + if (di != 0.0) { + dpi = smartAdd(di, wxi * xi); + double tmp = wxi * xi / di; + if (FastMath.abs(tmp) > MathUtils.EPSILON) { + w = (di * w) / dpi; + } + } else { + dpi = wxi * xi; + w = 0.0; + } + d[i] = dpi; + for (int k = i + 1; k < nvars; k++) { + xk = x[k]; + + x[k] = smartAdd(xk, -xi * r[nextr]); + if (di != 0.0) { + r[nextr] = smartAdd(di * r[nextr], (_w * xi) * xk) / dpi; + } else { + r[nextr] = xk / xi; + } + ++nextr; + } + xk = y; + y = smartAdd(xk, -xi * rhs[i]); + if (di != 0.0) { + rhs[i] = smartAdd(di * rhs[i], wxi * xk) / dpi; + } else { + rhs[i] = xk / xi; + } + } + sserr = smartAdd(sserr, w * y * y); + return; + } + + private double smartAdd(double a, double b) { + double _a = FastMath.abs(a); + double _b = FastMath.abs(b); + if (_a > _b) { + double eps = _a * MathUtils.EPSILON; + if (_b > eps) { + return a + b; + } + return a; + } else { + double eps = _b * MathUtils.EPSILON; + if (_a > eps) { + return a + b; + } + return b; + } + } + + /* + * As the name suggest clear, wipes the internals and reoders everything in the + * canonical order. + */ + public void clear() { + Arrays.fill(d, 0.0); + Arrays.fill(rhs, 0.0); + Arrays.fill(r, 0.0); + Arrays.fill(tol, 0.0); + Arrays.fill(rss, 0.0); + Arrays.fill(this.work_tolset, 0.0); + Arrays.fill(this.work_sing, 0.0); + Arrays.fill(this.x_sing, 0.0); + Arrays.fill(lindep, false); + for (int i = 0; i < nvars; i++) { + vorder[i] = i; + } + + nobs = 0; + sserr = 0.0; + sumy = 0.0; + sumsqy = 0.0; + rss_set = false; + tol_set = false; + return; + } + + /* + * This sets up tolerances for singularity testing + */ + private void tolset() { + int pos; + double total; + final double eps = this.epsilon; + for (int i = 0; i < nvars; i++) { + this.work_tolset[i] = Math.sqrt(d[i]); + } + tol[0] = eps * this.work_tolset[0]; + for (int col = 1; col < nvars; col++) { + pos = col - 1; + total = work_tolset[col]; + for (int row = 0; row < col; row++) { + total += Math.abs(r[pos]) * work_tolset[row]; + pos += nvars - row - 2; + } + tol[col] = eps * total; + } + tol_set = true; + return; + } + + /* + * The regcf methods conducts the linear regression and extracts the + * parameter vector. Notice that the algorithm can do subset regression + * with no alteration. + */ + private double[] regcf(int nreq) { + int nextr; + if (nreq < 1 || nreq > this.nvars) { + throw new IllegalArgumentException("Number of regressors not correct"); + } + if (!this.tol_set) { + tolset(); + } + double[] ret = new double[nreq]; + boolean rankProblem = false; + for (int i = nreq - 1; i > -1; i--) { + if (Math.sqrt(d[i]) < tol[i]) { + ret[i] = 0.0; + d[i] = 0.0; + rankProblem = true; + } else { + ret[i] = rhs[i]; + nextr = i * (nvars + nvars - i - 1) / 2; + for (int j = i + 1; j < nreq; j++) { + + ret[i] = smartAdd(ret[i], -r[nextr] * ret[j]); + + ++nextr; + } + } + } + if (rankProblem) { + for (int i = 0; i < nreq; i++) { + if (this.lindep[i]) { + ret[i] = Double.NaN; + } + } + } + return ret; + } + + /* + * The method which checks for singularities and then eliminates the offending + * columns + */ + private void singcheck() { + double temp; + double y; + double weight; + int pos; + + for (int i = 0; i < nvars; i++) { + work_sing[i] = Math.sqrt(d[i]); + } + + for (int col = 0; col < nvars; col++) { + // Set elements within R to zero if they are less than tol(col) in + // absolute value after being scaled by the square root of their row + // multiplier + temp = tol[col]; + pos = col - 1; + for (int row = 0; row < col - 1; row++) { + if (Math.abs(r[pos]) * work_sing[row] < temp) { + r[pos] = 0.0; + } + pos += nvars - row - 2; + } + // If diagonal element is near zero, set it to zero, set appropriate + // element of LINDEP, and use INCLUD to augment the projections in + // the lower rows of the orthogonalization. + lindep[col] = false; + if (work_sing[col] < temp) { + lindep[col] = true; + if (col < nvars - 1) { + Arrays.fill(x_sing, 0.0); + int _pi = col * (nvars + nvars - col - 1) / 2; + for (int _xi = col + 1; _xi < nvars; _xi++, _pi++) { + x_sing[_xi] = r[_pi]; + r[_pi] = 0.0; + } + y = rhs[col]; + weight = d[col]; + d[col] = 0.0; + rhs[col] = 0.0; + this.include(x_sing, weight, y); + } else { + sserr += d[col] * rhs[col] * rhs[col]; + } + } + } + return; + } + + /* + * Calculates the sum of squared errors for the full regression + * and all subsets in the following manner: + * rss[] ={ + * ResidualSumOfSquares_allNvars, + * ResidualSumOfSquares_FirstNvars-1, + * ResidualSumOfSquares_FirstNvars-2, + * ..., ResidualSumOfSquares_FirstVariable} + */ + private void ss() { + double total = sserr; + rss[nvars - 1] = sserr; + for (int i = nvars - 1; i > 0; i--) { + total += d[i] * rhs[i] * rhs[i]; + rss[i - 1] = total; + } + rss_set = true; + return; + } + + /* + * Calculates the cov matrix assuming only the first nreq variables are + * included in the calculation. The returned array contains a symmetric + * matrix stored in lower triangular form. The matrix will have + * ( nreq + 1 ) * nreq / 2 elements. For illustration + * cov = + * { + * cov_00, + * cov_10, cov_11, + * cov_20, cov_21, cov22, + * ... + * } + */ + private double[] cov(int nreq) { + if (this.nobs <= nreq) { + return null; + } + double rnk = 0.0; + for (int i = 0; i < nreq; i++) { + if (!this.lindep[i]) { + rnk += 1.0; + } + } + double var = rss[nreq - 1] / (nobs - rnk); + double[] rinv = new double[nreq * (nreq - 1) / 2]; + inverse(rinv, nreq); + double[] covmat = new double[nreq * (nreq + 1) / 2]; + Arrays.fill(covmat, Double.NaN); + int pos2; + int pos1; + int start = 0; + double total = 0; + for (int row = 0; row < nreq; row++) { + pos2 = start; + if (!this.lindep[row]) { + for (int col = row; col < nreq; col++) { + if (!this.lindep[col]) { + pos1 = start + col - row; + if (row == col) { + total = 1.0 / d[col]; + } else { + total = rinv[pos1 - 1] / d[col]; + } + for (int k = col + 1; k < nreq; k++) { + if (!this.lindep[k]) { + total += rinv[pos1] * rinv[pos2] / d[k]; + } + ++pos1; + ++pos2; + } + covmat[ (col + 1) * col / 2 + row] = total * var; + } else { + pos2 += nreq - col - 1; + } + } + } + start += nreq - row - 1; + } + return covmat; + } + + /* + * This internal method calculates the inverse of the upper-triangular portion + * of the R matrix. + */ + private void inverse(double[] rinv, int nreq) { + int pos = nreq * (nreq - 1) / 2 - 1; + int pos1 = -1; + int pos2 = -1; + double total = 0.0; + int start; + Arrays.fill(rinv, Double.NaN); + for (int row = nreq - 1; row > 0; --row) { + if (!this.lindep[row]) { + start = (row - 1) * (nvars + nvars - row) / 2; + for (int col = nreq; col > row; --col) { + pos1 = start; + pos2 = pos; + total = 0.0; + for (int k = row; k < col - 1; k++) { + pos2 += nreq - k - 1; + if (!this.lindep[k]) { + total += -r[pos1] * rinv[pos2]; + } + ++pos1; + } + rinv[pos] = total - r[pos1]; + --pos; + } + } else { + pos -= nreq - row; + } + } + return; + } + + /* + * In the original algorithm only the partial correlations of the regressors + * is returned to the user. In this implementation, we have + * corr = + * { + * corrxx - lower triangular + * corrxy - bottom row of the matrix + * } + */ + public double[] getPartialCorrelations(int in) { + /* + Replaces subroutines PCORR and COR of: + ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2 + + Calculate partial correlations after the variables in rows + 1, 2, ..., IN have been forced into the regression. + If IN = 1, and the first row of R represents a constant in the + model, then the usual simple correlations are returned. + + If IN = 0, the value returned in array CORMAT for the correlation + of variables Xi & Xj is: + sum ( Xi.Xj ) / Sqrt ( sum (Xi^2) . sum (Xj^2) ) + + On return, array CORMAT contains the upper triangle of the matrix of + partial correlations stored by rows, excluding the 1's on the diagonal. + e.g. if IN = 2, the consecutive elements returned are: + (3,4) (3,5) ... (3,ncol), (4,5) (4,6) ... (4,ncol), etc. + Array YCORR stores the partial correlations with the Y-variable + starting with YCORR(IN+1) = partial correlation with the variable in + position (IN+1). + + --------------------------------------------------------------------------*/ + double[] output = new double[(nvars - in + 1) * (nvars - in) / 2]; + int base_pos; + int pos; + int pos1; + int pos2; + int rms_off = -in; + int wrk_off = -(in + 1); + double[] rms = new double[nvars - in]; + double[] work = new double[nvars - in - 1]; + double sumxx; + double sumxy; + double sumyy; + int offXX = (nvars - in) * (nvars - in - 1) / 2; + if (in < -1 || in >= nvars) { + return null; + } + int nvm = nvars - 1; + base_pos = r.length - (nvm - in) * (nvm - in + 1) / 2; + if (d[in] > 0.0) { + rms[in + rms_off] = 1.0 / Math.sqrt(d[in]); + } + for (int col = in + 1; col < nvars; col++) { + pos = base_pos + col - 1 - in; + sumxx = d[col]; + for (int row = in; row < col; row++) { + sumxx += d[row] * r[pos] * r[pos]; + pos += nvars - row - 2; + } + if (sumxx > 0.0) { + rms[col + rms_off] = 1.0 / Math.sqrt(sumxx); + } else { + rms[col + rms_off] = 0.0; + } + } + sumyy = sserr; + for (int row = in; row < nvars; row++) { + sumyy += d[row] * rhs[row] * rhs[row]; + } + if (sumyy > 0.0) { + sumyy = 1.0 / Math.sqrt(sumyy); + } + pos = 0; + for (int col1 = in; col1 < nvars; col1++) { + sumxy = 0.0; + Arrays.fill(work, 0.0); + pos1 = base_pos + col1 - in - 1; + for (int row = in; row < col1; row++) { + pos2 = pos1 + 1; + for (int col2 = col1 + 1; col2 < nvars; col2++) { + work[col2 + wrk_off] += d[row] * r[pos1] * r[pos2]; + pos2++; + } + sumxy += d[row] * r[pos1] * rhs[row]; + pos1 += nvars - row - 2; + } + pos2 = pos1 + 1; + for (int col2 = col1 + 1; col2 < nvars; col2++) { + work[col2 + wrk_off] += d[col1] * r[pos2]; + ++pos2; + output[ (col2 - 1 - in) * (col2 - in) / 2 + col1 - in] = + work[col2 + wrk_off] * rms[col1 + rms_off] * rms[col2 + rms_off]; + ++pos; + } + sumxy += d[col1] * rhs[col1]; + output[col1 + rms_off + offXX] = sumxy * rms[col1 + rms_off] * sumyy; + } + + return output; + } + + /** + * ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2 + * Move variable from position FROM to position TO in an + * orthogonal reduction produced by AS75.1. + * + * @param from initial position + * @param to destination + */ + private void vmove(int from, int to) { + double d1; + double d2; + double X; + double d1new; + double d2new; + double cbar; + double sbar; + double Y; + int first; + int inc; + int m1; + int m2; + int mp1; + int pos; + boolean bSkipTo40 = false; + if (from == to) { + return; + } + if (!this.rss_set) { + ss(); + } + int count = 0; + if (from < to) { + first = from; + inc = 1; + count = to - from; + } else { + first = from - 1; + inc = -1; + count = from - to; + } + + int m = first; + int idx = 0; + while (idx < count) { + m1 = m * (nvars + nvars - m - 1) / 2; + m2 = m1 + nvars - m - 1; + mp1 = m + 1; + + d1 = d[m]; + d2 = d[mp1]; + // Special cases. + if (d1 > this.epsilon || d2 > this.epsilon) { + X = r[m1]; + if (Math.abs(X) * Math.sqrt(d1) < tol[mp1]) { + X = 0.0; + } + if (d1 < this.epsilon || Math.abs(X) < this.epsilon) { + d[m] = d2; + d[mp1] = d1; + r[m1] = 0.0; + for (int col = m + 2; col < nvars; col++) { + ++m1; + X = r[m1]; + r[m1] = r[m2]; + r[m2] = X; + ++m2; + } + X = rhs[m]; + rhs[m] = rhs[mp1]; + rhs[mp1] = X; + bSkipTo40 = true; + break; + } else if (d2 < this.epsilon) { + d[m] = d1 * X * X; + r[m1] = 1.0 / X; + for (int _i = m1 + 1; _i < m1 + nvars - m - 1; _i++) { + r[_i] /= X; + } + rhs[m] = rhs[m] / X; + bSkipTo40 = true; + break; + } + if (!bSkipTo40) { + d1new = d2 + d1 * X * X; + cbar = d2 / d1new; + sbar = X * d1 / d1new; + d2new = d1 * cbar; + d[m] = d1new; + d[mp1] = d2new; + r[m1] = sbar; + for (int col = m + 2; col < nvars; col++) { + ++m1; + Y = r[m1]; + r[m1] = cbar * r[m2] + sbar * Y; + r[m2] = Y - X * r[m2]; + ++m2; + } + Y = rhs[m]; + rhs[m] = cbar * rhs[mp1] + sbar * Y; + rhs[mp1] = Y - X * rhs[mp1]; + } + } + if (m > 0) { + pos = m; + for (int row = 0; row < m; row++) { + X = r[pos]; + r[pos] = r[pos - 1]; + r[pos - 1] = X; + pos += nvars - row - 2; + } + } + // Adjust variable order (VORDER), the tolerances (TOL) and + // the vector of residual sums of squares (RSS). + m1 = vorder[m]; + vorder[m] = vorder[mp1]; + vorder[mp1] = m1; + X = tol[m]; + tol[m] = tol[mp1]; + tol[mp1] = X; + rss[m] = rss[mp1] + d[mp1] * rhs[mp1] * rhs[mp1]; + + m += inc; + ++idx; + } + } + + private int reorderRegressors(int[] list, int pos1) { + +// ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2 + +// Re-order the variables in an orthogonal reduction produced by +// AS75.1 so that the N variables in LIST start at position POS1, +// though will not necessarily be in the same order as in LIST. +// Any variables in VORDER before position POS1 are not moved. + +// Auxiliary routine called: VMOVE +// +//-------------------------------------------------------------------------- + + int next; + int i; + int l; + if (list.length < 1 || list.length > nvars + 1 - pos1) { + return -1; + } + next = pos1; + i = pos1; + while (i < nvars) { + l = vorder[i]; + for (int j = 0; j < list.length; j++) { + if (l == list[j]) { + if (i > next) { + this.vmove(i, next); + ++next; + if (next >= list.length + pos1) { + return 0; + } else { + break; + } + } + } + } + ++i; + } + return 0; + } + + /* + * Gets the diagonal of the Hat matrix also known as the leverage matrix + * + * + * @returns the diagonal element of the hatmatrix + */ + public double getDiagonalOfHatMatrix(double[] row_data) { + double[] wk = new double[this.nvars]; + int pos; + double total; + + if (row_data.length > nvars) { + return Double.NaN; + } + double[] xrow; + if (this.hasIntercept) { + xrow = new double[row_data.length + 1]; + xrow[0] = 1.0; + System.arraycopy(row_data, 0, xrow, 1, row_data.length); + } else { + xrow = row_data; + } + double hii = 0.0; + for (int col = 0; col < xrow.length; col++) { + if (Math.sqrt(d[col]) < tol[col]) { + wk[col] = 0.0; + } else { + pos = col - 1; + total = xrow[col]; + for (int row = 0; row < col; row++) { + total = smartAdd(total, -wk[row] * r[pos]); + pos += nvars - row - 2; + } + wk[col] = total; + hii = smartAdd(hii, (total * total) / d[col]); + } + } + return hii; + } + + /* + * Gets the order of the regressors, useful if sometype of reording + * has been called. Calling regress with int[]{} args will trigger + * a reordering + * @returns int[] with the current order of the regressors + */ + public int[] getOrderOfRegressors() { + return MathUtils.copyOf(vorder); + } + + + public RegressionResults regress() throws MathException { + return regress(this.nvars); + } + + public RegressionResults regress(int numberOfRegressors) throws MathException { + if (this.nobs <= numberOfRegressors) { + Localizable outMsg = new DummyLocalizable("Number of observations not " + + "greater than the number of number of variables"); + throw new MathException(outMsg, (Object) null); + } + if( numberOfRegressors > this.nvars ){ + Localizable outMsg = new DummyLocalizable("Number of variables requested " + + "in regression greater than the number of number of variables"); + throw new MathException(outMsg, (Object) null); + } + this.tolset(); + + this.singcheck(); + + double[] beta = this.regcf(numberOfRegressors); + + this.ss(); + + double[] cov = this.cov(numberOfRegressors); + + int rnk = 0; + for (int i = 0; i < this.lindep.length; i++) { + if (!this.lindep[i]) { + ++rnk; + } + } + + boolean needsReorder = false; + for (int i = 0; i < numberOfRegressors; i++) { + if (this.vorder[i] != i) { + needsReorder = true; + break; + } + } + if (!needsReorder) { + return new RegressionResults( + beta, new double[][]{cov}, true, this.nobs, rnk, + this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false); + } else { + double[] betaNew = new double[beta.length]; + double[] covNew = new double[cov.length]; + + int[] newIndices = new int[beta.length]; + for (int i = 0; i < nvars; i++) { + for (int j = 0; j < numberOfRegressors; j++) { + if (this.vorder[j] == i) { + betaNew[i] = beta[ j]; + newIndices[i] = j; + } + } + } + + int idx1 = 0; + int idx2; + int _i; + int _j; + for (int i = 0; i < beta.length; i++) { + _i = newIndices[i]; + for (int j = 0; j <= i; j++, idx1++) { + _j = newIndices[j]; + if (_i > _j) { + idx2 = _i * (_i + 1) / 2 + _j; + } else { + idx2 = _j * (_j + 1) / 2 + _i; + } + covNew[idx1] = cov[idx2]; + } + } + return new RegressionResults( + betaNew, new double[][]{covNew}, true, this.nobs, rnk, + this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false); + } + } + + public RegressionResults regress(int[] variablesToInclude) throws MathException { + if (variablesToInclude.length > this.nvars) { + Localizable outMsg = new DummyLocalizable("Number of variables in included list " + + "greater than the number of number of variables"); + throw new MathException(outMsg, (Object) null); + } + if (this.nobs <= this.nvars) { + Localizable outMsg = new DummyLocalizable("Number of observations not " + + "greater than the number of number of variables"); + throw new MathException(outMsg, (Object) null); + } + Arrays.sort(variablesToInclude); + int iExclude = 0; + for (int i = 0; i < variablesToInclude.length; i++) { + if (i >= this.nvars) { + Localizable outMsg = new DummyLocalizable("Requesting variable for inclusion " + + "which does not exist in data supplied"); + throw new MathException(outMsg, (Object) null); + } + if (i > 0 && variablesToInclude[i] == variablesToInclude[i - 1]) { + variablesToInclude[i] = -1; + ++iExclude; + } + } + int[] series; + if (iExclude > 0) { + int j = 0; + series = new int[variablesToInclude.length - iExclude]; + for (int i = 0; i < variablesToInclude.length; i++) { + if (variablesToInclude[i] > -1) { + series[j] = variablesToInclude[i]; + ++j; + } + } + } else { + series = variablesToInclude; + } + + this.reorderRegressors(series, 0); + + this.tolset(); + + this.singcheck(); + + double[] beta = this.regcf(series.length); + + this.ss(); + + double[] cov = this.cov(series.length); + + int rnk = 0; + for (int i = 0; i < this.lindep.length; i++) { + if (!this.lindep[i]) { + ++rnk; + } + } + + boolean needsReorder = false; + for (int i = 0; i < this.nvars; i++) { + if (this.vorder[i] != series[i]) { + needsReorder = true; + break; + } + } + if (!needsReorder) { + return new RegressionResults( + beta, new double[][]{cov}, true, this.nobs, rnk, + this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false); + } else { + double[] betaNew = new double[beta.length]; + int[] newIndices = new int[beta.length]; + for (int i = 0; i < series.length; i++) { + for (int j = 0; j < this.vorder.length; j++) { + if (this.vorder[j] == series[i]) { + betaNew[i] = beta[ j]; + newIndices[i] = j; + } + } + } + double[] covNew = new double[cov.length]; + int idx1 = 0; + int idx2; + int _i; + int _j; + for (int i = 0; i < beta.length; i++) { + _i = newIndices[i]; + for (int j = 0; j <= i; j++, idx1++) { + _j = newIndices[j]; + if (_i > _j) { + idx2 = _i * (_i + 1) / 2 + _j; + } else { + idx2 = _j * (_j + 1) / 2 + _i; + } + covNew[idx1] = cov[idx2]; + } + } + return new RegressionResults( + betaNew, new double[][]{covNew}, true, this.nobs, rnk, + this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false); + } + } +} Propchange: commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/MillerUpdatingRegression.java ------------------------------------------------------------------------------ svn:eol-style = native Propchange: commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/MillerUpdatingRegression.java ------------------------------------------------------------------------------ svn:keywords = Date Revision Id Propchange: commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/MillerUpdatingRegression.java ------------------------------------------------------------------------------ svn:mime-type = text/plain Added: commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/regression/MillerUpdatingRegressionTest.java URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/regression/MillerUpdatingRegressionTest.java?rev=1148557&view=auto ============================================================================== --- commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/regression/MillerUpdatingRegressionTest.java (added) +++ commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/regression/MillerUpdatingRegressionTest.java Tue Jul 19 21:44:41 2011 @@ -0,0 +1,1098 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math.stat.regression; + +import org.apache.commons.math.linear.RealMatrix; +import org.apache.commons.math.stat.correlation.PearsonsCorrelation; +import org.apache.commons.math.MathException; +import org.apache.commons.math.TestUtils; +import org.apache.commons.math.util.FastMath; +import org.junit.Test; +import static org.junit.Assert.*; + +/** + * MillerUpdatingRegression tests. + */ +public class MillerUpdatingRegressionTest { + + public MillerUpdatingRegressionTest() { + } + /* This is the Greene Airline Cost data. + * The data can be downloaded from http://www.indiana.edu/~statmath/stat/all/panel/airline.csv + */ + private final static double[][] airdata = { + /*"I",*/new double[]{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6}, + /*"T",*/ new double[]{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + /*"C",*/ new double[]{1140640, 1215690, 1309570, 1511530, 1676730, 1823740, 2022890, 2314760, 2639160, 3247620, 3787750, 3867750, 3996020, 4282880, 4748320, 569292, 640614, 777655, 999294, 1203970, 1358100, 1501350, 1709270, 2025400, 2548370, 3137740, 3557700, 3717740, 3962370, 4209390, 286298, 309290, 342056, 374595, 450037, 510412, 575347, 669331, 783799, 913883, 1041520, 1125800, 1096070, 1198930, 1170470, 145167, 170192, 247506, 309391, 354338, 373941, 420915, 474017, 532590, 676771, 880438, 1052020, 1193680, 1303390, 1436970, 91361, 95428, 98187, 115967, 138382, 156228, 183169, 210212, 274024, 356915, 432344, 524294, 530924, 581447, 610257, 68978, 74904, 83829, 98148, 118449, 133161, 145062, 170711, 199775, 276797, 381478, 506969, 633388, 804388, 1009500}, + /*"Q",*/ new double[]{0.952757, 0.986757, 1.09198, 1.17578, 1.16017, 1.17376, 1.29051, 1.39067, 1.61273, 1.82544, 1.54604, 1.5279, 1.6602, 1.82231, 1.93646, 0.520635, 0.534627, 0.655192, 0.791575, 0.842945, 0.852892, 0.922843, 1, 1.19845, 1.34067, 1.32624, 1.24852, 1.25432, 1.37177, 1.38974, 0.262424, 0.266433, 0.306043, 0.325586, 0.345706, 0.367517, 0.409937, 0.448023, 0.539595, 0.539382, 0.467967, 0.450544, 0.468793, 0.494397, 0.493317, 0.086393, 0.09674, 0.1415, 0.169715, 0.173805, 0.164272, 0.170906, 0.17784, 0.192248, 0.242469, 0.256505, 0.249657, 0.273923, 0.371131, 0.421411, 0.051028, 0.052646, 0.056348, 0.066953, 0.070308, 0.073961, 0.084946, 0.095474, 0.119814, 0.150046, 0.144014, 0.1693, 0.172761, 0.18667, 0.213279, 0.037682, 0.039784, 0.044331, 0.050245, 0.055046, 0.052462, 0.056977, 0.06149, 0.069027, 0.092749, 0.11264, 0.154154, 0.186461, 0.246847, 0.304013}, + /*"PF",*/ new double[]{106650, 110307, 110574, 121974, 196606, 265609, 263451, 316411, 384110, 569251, 871636, 997239, 938002, 859572, 823411, 103795, 111477, 118664, 114797, 215322, 281704, 304818, 348609, 374579, 544109, 853356, 1003200, 941977, 856533, 821361, 118788, 123798, 122882, 131274, 222037, 278721, 306564, 356073, 378311, 555267, 850322, 1015610, 954508, 886999, 844079, 114987, 120501, 121908, 127220, 209405, 263148, 316724, 363598, 389436, 547376, 850418, 1011170, 951934, 881323, 831374, 118222, 116223, 115853, 129372, 243266, 277930, 317273, 358794, 397667, 566672, 848393, 1005740, 958231, 872924, 844622, 117112, 119420, 116087, 122997, 194309, 307923, 323595, 363081, 386422, 564867, 874818, 1013170, 930477, 851676, 819476}, + /*"LF",*/ new double[]{0.534487, 0.532328, 0.547736, 0.540846, 0.591167, 0.575417, 0.594495, 0.597409, 0.638522, 0.676287, 0.605735, 0.61436, 0.633366, 0.650117, 0.625603, 0.490851, 0.473449, 0.503013, 0.512501, 0.566782, 0.558133, 0.558799, 0.57207, 0.624763, 0.628706, 0.58915, 0.532612, 0.526652, 0.540163, 0.528775, 0.524334, 0.537185, 0.582119, 0.579489, 0.606592, 0.60727, 0.582425, 0.573972, 0.654256, 0.631055, 0.56924, 0.589682, 0.587953, 0.565388, 0.577078, 0.432066, 0.439669, 0.488932, 0.484181, 0.529925, 0.532723, 0.549067, 0.55714, 0.611377, 0.645319, 0.611734, 0.580884, 0.572047, 0.59457, 0.585525, 0.442875, 0.462473, 0.519118, 0.529331, 0.557797, 0.556181, 0.569327, 0.583465, 0.631818, 0.604723, 0.587921, 0.616159, 0.605868, 0.594688, 0.635545, 0.448539, 0.475889, 0.500562, 0.500344, 0.528897, 0.495361, 0.510342, 0.518296, 0.546723, 0.554276, 0.517766, 0.580049, 0.556024, 0.537791, 0.525775} + }; + + /** + * Test of hasIntercept method, of class MillerUpdatingRegression. + */ + @Test + public void testHasIntercept() { + MillerUpdatingRegression instance = new MillerUpdatingRegression(10, false); + if (instance.hasIntercept()) { + fail("Should not have intercept"); + } + instance = new MillerUpdatingRegression(10, true); + if (!instance.hasIntercept()) { + fail("Should have intercept"); + } + } + + /** + * Test of getN method, of class MillerUpdatingRegression. + */ + @Test + public void testAddObsGetNClear() { + System.out.println("getN - test add observation - clear"); + MillerUpdatingRegression instance = new MillerUpdatingRegression(3, true); + double[][] xAll = new double[airdata[0].length][]; + double[] y = new double[airdata[0].length]; + for (int i = 0; i < airdata[0].length; i++) { + xAll[i] = new double[3]; + xAll[i][0] = Math.log(airdata[3][i]); + xAll[i][1] = Math.log(airdata[4][i]); + xAll[i][2] = airdata[5][i]; + y[i] = Math.log(airdata[2][i]); + } + instance.addObservations(xAll, y); + if (instance.getN() != xAll.length) { + fail("Number of observations not correct in bulk addition"); + } + instance.clear(); + for (int i = 0; i < xAll.length; i++) { + instance.addObservation(xAll[i], y[i]); + } + if (instance.getN() != xAll.length) { + fail("Number of observations not correct in drip addition"); + } + return; + } + + @Test + public void testNegativeTestAddObs() { + System.out.println("Test Add obs should fail if number of vars changes"); + MillerUpdatingRegression instance = new MillerUpdatingRegression(3, true); + try { + instance.addObservation(new double[]{1.0}, 0.0); + fail("Should throw IllegalArgumentException"); + } catch (IllegalArgumentException iae) { + } catch (Exception e) { + fail("Should throw IllegalArgumentException"); + } + try { + instance.addObservation(new double[]{1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}, 0.0); + fail("Should throw IllegalArgumentException"); + } catch (IllegalArgumentException iae) { + } catch (Exception e) { + fail("Should throw IllegalArgumentException"); + } + try { + instance.addObservation(new double[]{1.0, 1.0, 1.0}, 0.0); + } catch (Exception e) { + fail("Should throw IllegalArgumentException"); + } + + //now we try it without an intercept + instance = new MillerUpdatingRegression(3, false); + try { + instance.addObservation(new double[]{1.0}, 0.0); + fail("Should throw IllegalArgumentException [NOINTERCEPT]"); + } catch (IllegalArgumentException iae) { + } catch (Exception e) { + fail("Should throw IllegalArgumentException [NOINTERCEPT]"); + } + try { + instance.addObservation(new double[]{1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}, 0.0); + fail("Should throw IllegalArgumentException [NOINTERCEPT]"); + } catch (IllegalArgumentException iae) { + } catch (Exception e) { + fail("Should throw IllegalArgumentException [NOINTERCEPT]"); + } + try { + instance.addObservation(new double[]{1.0, 1.0, 1.0}, 0.0); + } catch (Exception e) { + fail("Should throw IllegalArgumentException [NOINTERCEPT]"); + } + } + + @Test + public void testNegativeTestAddMultipleObs() { + System.out.println("Test Add Multiple obs should fail if length of arrays is not same"); + MillerUpdatingRegression instance = new MillerUpdatingRegression(3, true); + try { + double[][] tst = {{1.0, 1.0, 1.0}, {1.20, 2.0, 2.1}}; + double[] y = {1.0}; + instance.addObservations(tst, y); + + fail("Should throw IllegalArgumentException"); + } catch (IllegalArgumentException iae) { + } catch (Exception e) { + fail("Should throw IllegalArgumentException"); + } + + try { + double[][] tst = {{1.0, 1.0, 1.0}, {1.20, 2.0, 2.1}}; + double[] y = {1.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0}; + instance.addObservations(tst, y); + + fail("Should throw IllegalArgumentException"); + } catch (IllegalArgumentException iae) { + } catch (Exception e) { + fail("Should throw IllegalArgumentException"); + } + } + + /* Results can be found at http://www.indiana.edu/~statmath/stat/all/panel/panel4.html + * This test concerns a known data set + */ + @Test + public void testRegressAirlineConstantExternal() { + MillerUpdatingRegression instance = new MillerUpdatingRegression(4, false); + double[][] x = new double[airdata[0].length][]; + double[] y = new double[airdata[0].length]; + for (int i = 0; i < airdata[0].length; i++) { + x[i] = new double[4]; + x[i][0] = 1.0; + x[i][1] = Math.log(airdata[3][i]); + x[i][2] = Math.log(airdata[4][i]); + x[i][3] = airdata[5][i]; + y[i] = Math.log(airdata[2][i]); + } + + instance.addObservations(x, y); + try { + RegressionResults result = instance.regress(); + if (result == null) { + fail("The test case is a prototype."); + } + TestUtils.assertEquals( + new double[]{9.5169, 0.8827, 0.4540, -1.6275}, + result.getParameterEstimates(), 1e-4); + + + TestUtils.assertEquals( + new double[]{.2292445, .0132545, .0203042, .345302}, + result.getStdErrorOfEstimates(), 1.0e-4); + + TestUtils.assertEquals(0.01552839, result.getMeanSquareError(), 1.0e-8); + } catch (Exception e) { + fail("Should not throw exception but does"); + } + } + + @Test + public void testRegressAirlineConstantInternal() { + MillerUpdatingRegression instance = new MillerUpdatingRegression(3, true); + double[][] x = new double[airdata[0].length][]; + double[] y = new double[airdata[0].length]; + for (int i = 0; i < airdata[0].length; i++) { + x[i] = new double[3]; + x[i][0] = Math.log(airdata[3][i]); + x[i][1] = Math.log(airdata[4][i]); + x[i][2] = airdata[5][i]; + y[i] = Math.log(airdata[2][i]); + } + + instance.addObservations(x, y); + try { + RegressionResults result = instance.regress(); + if (result == null) { + fail("The test case is a prototype."); + } + TestUtils.assertEquals( + new double[]{9.5169, 0.8827, 0.4540, -1.6275}, + result.getParameterEstimates(), 1e-4); + + + TestUtils.assertEquals( + new double[]{.2292445, .0132545, .0203042, .345302}, + result.getStdErrorOfEstimates(), 1.0e-4); + + TestUtils.assertEquals(0.9883, result.getRSquared(), 1.0e-4); + TestUtils.assertEquals(0.01552839, result.getMeanSquareError(), 1.0e-8); + } catch (Exception e) { + fail("Should not throw exception but does"); + } + } + + @Test + public void testFilippelli() throws MathException { + double[] data = new double[]{ + 0.8116, -6.860120914, + 0.9072, -4.324130045, + 0.9052, -4.358625055, + 0.9039, -4.358426747, + 0.8053, -6.955852379, + 0.8377, -6.661145254, + 0.8667, -6.355462942, + 0.8809, -6.118102026, + 0.7975, -7.115148017, + 0.8162, -6.815308569, + 0.8515, -6.519993057, + 0.8766, -6.204119983, + 0.8885, -5.853871964, + 0.8859, -6.109523091, + 0.8959, -5.79832982, + 0.8913, -5.482672118, + 0.8959, -5.171791386, + 0.8971, -4.851705903, + 0.9021, -4.517126416, + 0.909, -4.143573228, + 0.9139, -3.709075441, + 0.9199, -3.499489089, + 0.8692, -6.300769497, + 0.8872, -5.953504836, + 0.89, -5.642065153, + 0.891, -5.031376979, + 0.8977, -4.680685696, + 0.9035, -4.329846955, + 0.9078, -3.928486195, + 0.7675, -8.56735134, + 0.7705, -8.363211311, + 0.7713, -8.107682739, + 0.7736, -7.823908741, + 0.7775, -7.522878745, + 0.7841, -7.218819279, + 0.7971, -6.920818754, + 0.8329, -6.628932138, + 0.8641, -6.323946875, + 0.8804, -5.991399828, + 0.7668, -8.781464495, + 0.7633, -8.663140179, + 0.7678, -8.473531488, + 0.7697, -8.247337057, + 0.77, -7.971428747, + 0.7749, -7.676129393, + 0.7796, -7.352812702, + 0.7897, -7.072065318, + 0.8131, -6.774174009, + 0.8498, -6.478861916, + 0.8741, -6.159517513, + 0.8061, -6.835647144, + 0.846, -6.53165267, + 0.8751, -6.224098421, + 0.8856, -5.910094889, + 0.8919, -5.598599459, + 0.8934, -5.290645224, + 0.894, -4.974284616, + 0.8957, -4.64454848, + 0.9047, -4.290560426, + 0.9129, -3.885055584, + 0.9209, -3.408378962, + 0.9219, -3.13200249, + 0.7739, -8.726767166, + 0.7681, -8.66695597, + 0.7665, -8.511026475, + 0.7703, -8.165388579, + 0.7702, -7.886056648, + 0.7761, -7.588043762, + 0.7809, -7.283412422, + 0.7961, -6.995678626, + 0.8253, -6.691862621, + 0.8602, -6.392544977, + 0.8809, -6.067374056, + 0.8301, -6.684029655, + 0.8664, -6.378719832, + 0.8834, -6.065855188, + 0.8898, -5.752272167, + 0.8964, -5.132414673, + 0.8963, -4.811352704, + 0.9074, -4.098269308, + 0.9119, -3.66174277, + 0.9228, -3.2644011 + }; + MillerUpdatingRegression model = new MillerUpdatingRegression(10, true); + int off = 0; + double[] tmp = new double[10]; + int nobs = 82; + for (int i = 0; i < nobs; i++) { + tmp[0] = data[off + 1]; +// tmp[1] = tmp[0] * tmp[0]; +// tmp[2] = tmp[0] * tmp[1]; //^3 +// tmp[3] = tmp[1] * tmp[1]; //^4 +// tmp[4] = tmp[2] * tmp[1]; //^5 +// tmp[5] = tmp[2] * tmp[2]; //^6 +// tmp[6] = tmp[2] * tmp[3]; //^7 +// tmp[7] = tmp[3] * tmp[3]; //^8 +// tmp[8] = tmp[4] * tmp[3]; //^9 +// tmp[9] = tmp[4] * tmp[4]; //^10 + tmp[1] = tmp[0] * tmp[0]; + tmp[2] = tmp[0] * tmp[1]; + tmp[3] = tmp[0] * tmp[2]; + tmp[4] = tmp[0] * tmp[3]; + tmp[5] = tmp[0] * tmp[4]; + tmp[6] = tmp[0] * tmp[5]; + tmp[7] = tmp[0] * tmp[6]; + tmp[8] = tmp[0] * tmp[7]; + tmp[9] = tmp[0] * tmp[8]; + model.addObservation(tmp, data[off]); + off += 2; + } + RegressionResults result = model.regress(); + double[] betaHat = result.getParameterEstimates(); + TestUtils.assertEquals(betaHat, + new double[]{ + -1467.48961422980, + -2772.17959193342, + -2316.37108160893, + -1127.97394098372, + -354.478233703349, + -75.1242017393757, + -10.8753180355343, + -1.06221498588947, + -0.670191154593408E-01, + -0.246781078275479E-02, + -0.402962525080404E-04 + }, 1E-5); // +// + double[] se = result.getStdErrorOfEstimates(); + TestUtils.assertEquals(se, + new double[]{ + 298.084530995537, + 559.779865474950, + 466.477572127796, + 227.204274477751, + 71.6478660875927, + 15.2897178747400, + 2.23691159816033, + 0.221624321934227, + 0.142363763154724E-01, + 0.535617408889821E-03, + 0.896632837373868E-05 + }, 1E-5); // + + TestUtils.assertEquals(0.996727416185620, result.getRSquared(), 1.0e-8); + TestUtils.assertEquals(0.112091743968020E-04, result.getMeanSquareError(), 1.0e-10); + TestUtils.assertEquals(0.795851382172941E-03, result.getErrorSumSquares(), 1.0e-10); + + } + + @Test + public void testWampler1() throws MathException { + double[] data = new double[]{ + 1, 0, + 6, 1, + 63, 2, + 364, 3, + 1365, 4, + 3906, 5, + 9331, 6, + 19608, 7, + 37449, 8, + 66430, 9, + 111111, 10, + 177156, 11, + 271453, 12, + 402234, 13, + 579195, 14, + 813616, 15, + 1118481, 16, + 1508598, 17, + 2000719, 18, + 2613660, 19, + 3368421, 20}; + + MillerUpdatingRegression model = new MillerUpdatingRegression(5, true); + int off = 0; + double[] tmp = new double[5]; + int nobs = 21; + for (int i = 0; i < nobs; i++) { + tmp[0] = data[off + 1]; + tmp[1] = tmp[0] * tmp[0]; + tmp[2] = tmp[0] * tmp[1]; + tmp[3] = tmp[0] * tmp[2]; + tmp[4] = tmp[0] * tmp[3]; + model.addObservation(tmp, data[off]); + off += 2; + } + RegressionResults result = model.regress(); + double[] betaHat = result.getParameterEstimates(); + TestUtils.assertEquals(betaHat, + new double[]{1.0, + 1.0, 1.0, + 1.0, 1.0, + 1.0}, 1E-8); // +// + double[] se = result.getStdErrorOfEstimates(); + TestUtils.assertEquals(se, + new double[]{0.0, + 0.0, 0.0, + 0.0, 0.0, + 0.0}, 1E-8); // + + TestUtils.assertEquals(1.0, result.getRSquared(), 1.0e-10); + TestUtils.assertEquals(0, result.getMeanSquareError(), 1.0e-7); + TestUtils.assertEquals(0.00, result.getErrorSumSquares(), 1.0e-6); + + return; + } + + @Test + public void testWampler2() throws MathException { + double[] data = new double[]{ + 1.00000, 0, + 1.11111, 1, + 1.24992, 2, + 1.42753, 3, + 1.65984, 4, + 1.96875, 5, + 2.38336, 6, + 2.94117, 7, + 3.68928, 8, + 4.68559, 9, + 6.00000, 10, + 7.71561, 11, + 9.92992, 12, + 12.75603, 13, + 16.32384, 14, + 20.78125, 15, + 26.29536, 16, + 33.05367, 17, + 41.26528, 18, + 51.16209, 19, + 63.00000, 20}; + + MillerUpdatingRegression model = new MillerUpdatingRegression(5, true); + int off = 0; + double[] tmp = new double[5]; + int nobs = 21; + for (int i = 0; i < nobs; i++) { + tmp[0] = data[off + 1]; + tmp[1] = tmp[0] * tmp[0]; + tmp[2] = tmp[0] * tmp[1]; + tmp[3] = tmp[0] * tmp[2]; + tmp[4] = tmp[0] * tmp[3]; + model.addObservation(tmp, data[off]); + off += 2; + } + RegressionResults result = model.regress(); + double[] betaHat = result.getParameterEstimates(); + TestUtils.assertEquals(betaHat, + new double[]{1.0, + 1.0e-1, 1.0e-2, + 1.0e-3, 1.0e-4, + 1.0e-5}, 1E-8); // +// + double[] se = result.getStdErrorOfEstimates(); + TestUtils.assertEquals(se, + new double[]{0.0, + 0.0, 0.0, + 0.0, 0.0, + 0.0}, 1E-8); // + + TestUtils.assertEquals(1.0, result.getRSquared(), 1.0e-10); + TestUtils.assertEquals(0, result.getMeanSquareError(), 1.0e-7); + TestUtils.assertEquals(0.00, result.getErrorSumSquares(), 1.0e-6); + return; + } + + @Test + public void testWampler3() throws MathException { + double[] data = new double[]{ + 760, 0, + -2042, 1, + 2111, 2, + -1684, 3, + 3888, 4, + 1858, 5, + 11379, 6, + 17560, 7, + 39287, 8, + 64382, 9, + 113159, 10, + 175108, 11, + 273291, 12, + 400186, 13, + 581243, 14, + 811568, 15, + 1121004, 16, + 1506550, 17, + 2002767, 18, + 2611612, 19, + 3369180, 20}; + MillerUpdatingRegression model = new MillerUpdatingRegression(5, true); + int off = 0; + double[] tmp = new double[5]; + int nobs = 21; + for (int i = 0; i < nobs; i++) { + tmp[0] = data[off + 1]; + tmp[1] = tmp[0] * tmp[0]; + tmp[2] = tmp[0] * tmp[1]; + tmp[3] = tmp[0] * tmp[2]; + tmp[4] = tmp[0] * tmp[3]; + model.addObservation(tmp, data[off]); + off += 2; + } + RegressionResults result = model.regress(); + double[] betaHat = result.getParameterEstimates(); + TestUtils.assertEquals(betaHat, + new double[]{1.0, + 1.0, 1.0, + 1.0, 1.0, + 1.0}, 1E-8); // + double[] se = result.getStdErrorOfEstimates(); + TestUtils.assertEquals(se, + new double[]{2152.32624678170, + 2363.55173469681, 779.343524331583, + 101.475507550350, 5.64566512170752, + 0.112324854679312}, 1E-8); // + + TestUtils.assertEquals(.999995559025820, result.getRSquared(), 1.0e-10); + TestUtils.assertEquals(5570284.53333333, result.getMeanSquareError(), 1.0e-7); + TestUtils.assertEquals(83554268.0000000, result.getErrorSumSquares(), 1.0e-6); + return; + } + + //@Test + public void testWampler4() throws MathException { + double[] data = new double[]{ + 75901, 0, + -204794, 1, + 204863, 2, + -204436, 3, + 253665, 4, + -200894, 5, + 214131, 6, + -185192, 7, + 221249, 8, + -138370, 9, + 315911, 10, + -27644, 11, + 455253, 12, + 197434, 13, + 783995, 14, + 608816, 15, + 1370781, 16, + 1303798, 17, + 2205519, 18, + 2408860, 19, + 3444321, 20}; + MillerUpdatingRegression model = new MillerUpdatingRegression(5, true); + int off = 0; + double[] tmp = new double[5]; + int nobs = 21; + for (int i = 0; i < nobs; i++) { + tmp[0] = data[off + 1]; + tmp[1] = tmp[0] * tmp[0]; + tmp[2] = tmp[0] * tmp[1]; + tmp[3] = tmp[0] * tmp[2]; + tmp[4] = tmp[0] * tmp[3]; + model.addObservation(tmp, data[off]); + off += 2; + } + RegressionResults result = model.regress(); + double[] betaHat = result.getParameterEstimates(); + TestUtils.assertEquals(betaHat, + new double[]{1.0, + 1.0, 1.0, + 1.0, 1.0, + 1.0}, 1E-8); // +// + double[] se = result.getStdErrorOfEstimates(); + TestUtils.assertEquals(se, + new double[]{215232.624678170, + 236355.173469681, 77934.3524331583, + 10147.5507550350, 564.566512170752, + 11.2324854679312}, 1E-8); // + + TestUtils.assertEquals(.957478440825662, result.getRSquared(), 1.0e-10); + TestUtils.assertEquals(55702845333.3333, result.getMeanSquareError(), 1.0e-4); + TestUtils.assertEquals(835542680000.000, result.getErrorSumSquares(), 1.0e-3); + + return; + } + + /** + * Test Longley dataset against certified values provided by NIST. + * Data Source: J. Longley (1967) "An Appraisal of Least Squares + * Programs for the Electronic Computer from the Point of View of the User" + * Journal of the American Statistical Association, vol. 62. September, + * pp. 819-841. + * + * Certified values (and data) are from NIST: + * http://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/Longley.dat + */ + @Test + public void testLongly() throws Exception { + // Y values are first, then independent vars + // Each row is one observation + double[] design = new double[]{ + 60323, 83.0, 234289, 2356, 1590, 107608, 1947, + 61122, 88.5, 259426, 2325, 1456, 108632, 1948, + 60171, 88.2, 258054, 3682, 1616, 109773, 1949, + 61187, 89.5, 284599, 3351, 1650, 110929, 1950, + 63221, 96.2, 328975, 2099, 3099, 112075, 1951, + 63639, 98.1, 346999, 1932, 3594, 113270, 1952, + 64989, 99.0, 365385, 1870, 3547, 115094, 1953, + 63761, 100.0, 363112, 3578, 3350, 116219, 1954, + 66019, 101.2, 397469, 2904, 3048, 117388, 1955, + 67857, 104.6, 419180, 2822, 2857, 118734, 1956, + 68169, 108.4, 442769, 2936, 2798, 120445, 1957, + 66513, 110.8, 444546, 4681, 2637, 121950, 1958, + 68655, 112.6, 482704, 3813, 2552, 123366, 1959, + 69564, 114.2, 502601, 3931, 2514, 125368, 1960, + 69331, 115.7, 518173, 4806, 2572, 127852, 1961, + 70551, 116.9, 554894, 4007, 2827, 130081, 1962 + }; + + final int nobs = 16; + final int nvars = 6; + + // Estimate the model + MillerUpdatingRegression model = new MillerUpdatingRegression(6, true); + int off = 0; + double[] tmp = new double[6]; + for (int i = 0; i < nobs; i++) { + System.arraycopy(design, off + 1, tmp, 0, nvars); + model.addObservation(tmp, design[off]); + off += nvars + 1; + } + + // Check expected beta values from NIST + RegressionResults result = model.regress(); + double[] betaHat = result.getParameterEstimates(); + TestUtils.assertEquals(betaHat, + new double[]{-3482258.63459582, 15.0618722713733, + -0.358191792925910E-01, -2.02022980381683, + -1.03322686717359, -0.511041056535807E-01, + 1829.15146461355}, 1E-8); // + + // Check standard errors from NIST + double[] errors = result.getStdErrorOfEstimates(); + TestUtils.assertEquals(new double[]{890420.383607373, + 84.9149257747669, + 0.334910077722432E-01, + 0.488399681651699, + 0.214274163161675, + 0.226073200069370, + 455.478499142212}, errors, 1E-6); +// + // Check R-Square statistics against R + TestUtils.assertEquals(0.995479004577296, result.getRSquared(), 1E-12); + TestUtils.assertEquals(0.992465007628826, result.getAdjustedRSquared(), 1E-12); +// +// +// // Estimate model without intercept + model = new MillerUpdatingRegression(6, false); + off = 0; + for (int i = 0; i < nobs; i++) { + System.arraycopy(design, off + 1, tmp, 0, nvars); + model.addObservation(tmp, design[off]); + off += nvars + 1; + } + // Check expected beta values from R + result = model.regress(); + betaHat = result.getParameterEstimates(); + TestUtils.assertEquals(betaHat, + new double[]{-52.99357013868291, 0.07107319907358, + -0.42346585566399, -0.57256866841929, + -0.41420358884978, 48.41786562001326}, 1E-11); +// + // Check standard errors from R + errors = result.getStdErrorOfEstimates(); + TestUtils.assertEquals(new double[]{129.54486693117232, 0.03016640003786, + 0.41773654056612, 0.27899087467676, 0.32128496193363, + 17.68948737819961}, errors, 1E-11); +// + +// // Check R-Square statistics against R + TestUtils.assertEquals(0.9999670130706, result.getRSquared(), 1E-12); + TestUtils.assertEquals(0.999947220913, result.getAdjustedRSquared(), 1E-12); + + } + +// @Test +// public void testRegressReorder() throws MathException { +// // System.out.println("testRegressReorder"); +// MillerUpdatingRegression instance = new MillerUpdatingRegression(4, false); +// double[][] x = new double[airdata[0].length][]; +// double[] y = new double[airdata[0].length]; +// for (int i = 0; i < airdata[0].length; i++) { +// x[i] = new double[4]; +// x[i][0] = 1.0; +// x[i][1] = Math.log(airdata[3][i]); +// x[i][2] = Math.log(airdata[4][i]); +// x[i][3] = airdata[5][i]; +// y[i] = Math.log(airdata[2][i]); +// } +// +// instance.addObservations(x, y); +// RegressionResults result = instance.regress(); +// if (result == null) { +// fail("Null result...."); +// } +// +// instance.reorderRegressors(new int[]{3, 2}, 0); +// RegressionResults resultInverse = instance.regress(); +// +// double[] beta = result.getParameterEstimates(); +// double[] betar = resultInverse.getParameterEstimates(); +// if (Math.abs(beta[0] - betar[0]) > 1.0e-14) { +// fail("Parameters not correct after reorder (0,3)"); +// } +// if (Math.abs(beta[1] - betar[1]) > 1.0e-14) { +// fail("Parameters not correct after reorder (1,2)"); +// } +// if (Math.abs(beta[2] - betar[2]) > 1.0e-14) { +// fail("Parameters not correct after reorder (2,1)"); +// } +// if (Math.abs(beta[3] - betar[3]) > 1.0e-14) { +// fail("Parameters not correct after reorder (3,0)"); +// } +// } + + @Test + public void testOneRedundantColumn() throws MathException { + MillerUpdatingRegression instance = new MillerUpdatingRegression(4, false); + MillerUpdatingRegression instance2 = new MillerUpdatingRegression(5, false); + double[][] x = new double[airdata[0].length][]; + double[][] x2 = new double[airdata[0].length][]; + double[] y = new double[airdata[0].length]; + for (int i = 0; i < airdata[0].length; i++) { + x[i] = new double[4]; + x2[i] = new double[5]; + x[i][0] = 1.0; + x[i][1] = Math.log(airdata[3][i]); + x[i][2] = Math.log(airdata[4][i]); + x[i][3] = airdata[5][i]; + + x2[i][0] = x[i][0]; + x2[i][1] = x[i][1]; + x2[i][2] = x[i][2]; + x2[i][3] = x[i][3]; + x2[i][4] = x[i][3]; + + y[i] = Math.log(airdata[2][i]); + } + + instance.addObservations(x, y); + RegressionResults result = instance.regress(); + if (result == null) { + fail("Could not estimate initial regression"); + } + + instance2.addObservations(x2, y); + RegressionResults resultRedundant = instance2.regress(); + if (resultRedundant == null) { + fail("Could not estimate redundant regression"); + } + double[] beta = result.getParameterEstimates(); + double[] betar = resultRedundant.getParameterEstimates(); + double[] se = result.getStdErrorOfEstimates(); + double[] ser = resultRedundant.getStdErrorOfEstimates(); + + for (int i = 0; i < beta.length; i++) { + if (Math.abs(beta[i] - betar[i]) > 1.0e-8) { + fail("Parameters not correctly estimated"); + } + if (Math.abs(se[i] - ser[i]) > 1.0e-8) { + fail("Standard errors not correctly estimated"); + } + for (int j = 0; j < i; j++) { + if (Math.abs(result.getCovarianceOfParameters(i, j) + - resultRedundant.getCovarianceOfParameters(i, j)) > 1.0e-8) { + fail("Variance Covariance not correct"); + } + } + } + + + TestUtils.assertEquals(result.getAdjustedRSquared(), resultRedundant.getAdjustedRSquared(), 1.0e-8); + TestUtils.assertEquals(result.getErrorSumSquares(), resultRedundant.getErrorSumSquares(), 1.0e-8); + TestUtils.assertEquals(result.getMeanSquareError(), resultRedundant.getMeanSquareError(), 1.0e-8); + TestUtils.assertEquals(result.getRSquared(), resultRedundant.getRSquared(), 1.0e-8); + return; + } + + @Test + public void testThreeRedundantColumn() throws MathException { + + MillerUpdatingRegression instance = new MillerUpdatingRegression(4, false); + MillerUpdatingRegression instance2 = new MillerUpdatingRegression(7, false); + double[][] x = new double[airdata[0].length][]; + double[][] x2 = new double[airdata[0].length][]; + double[] y = new double[airdata[0].length]; + for (int i = 0; i < airdata[0].length; i++) { + x[i] = new double[4]; + x2[i] = new double[7]; + x[i][0] = 1.0; + x[i][1] = Math.log(airdata[3][i]); + x[i][2] = Math.log(airdata[4][i]); + x[i][3] = airdata[5][i]; + + x2[i][0] = x[i][0]; + x2[i][1] = x[i][0]; + x2[i][2] = x[i][1]; + x2[i][3] = x[i][2]; + x2[i][4] = x[i][1]; + x2[i][5] = x[i][3]; + x2[i][6] = x[i][2]; + + y[i] = Math.log(airdata[2][i]); + } + + instance.addObservations(x, y); + RegressionResults result = instance.regress(); + if (result == null) { + fail("Could not estimate initial regression"); + } + + instance2.addObservations(x2, y); + RegressionResults resultRedundant = instance2.regress(); + if (resultRedundant == null) { + fail("Could not estimate redundant regression"); + } + double[] beta = result.getParameterEstimates(); + double[] betar = resultRedundant.getParameterEstimates(); + double[] se = result.getStdErrorOfEstimates(); + double[] ser = resultRedundant.getStdErrorOfEstimates(); + + if (Math.abs(beta[0] - betar[0]) > 1.0e-8) { + fail("Parameters not correct after reorder (0,3)"); + } + if (Math.abs(beta[1] - betar[2]) > 1.0e-8) { + fail("Parameters not correct after reorder (1,2)"); + } + if (Math.abs(beta[2] - betar[3]) > 1.0e-8) { + fail("Parameters not correct after reorder (2,1)"); + } + if (Math.abs(beta[3] - betar[5]) > 1.0e-8) { + fail("Parameters not correct after reorder (3,0)"); + } + + if (Math.abs(se[0] - ser[0]) > 1.0e-8) { + fail("Se not correct after reorder (0,3)"); + } + if (Math.abs(se[1] - ser[2]) > 1.0e-8) { + fail("Se not correct after reorder (1,2)"); + } + if (Math.abs(se[2] - ser[3]) > 1.0e-8) { + fail("Se not correct after reorder (2,1)"); + } + if (Math.abs(se[3] - ser[5]) > 1.0e-8) { + fail("Se not correct after reorder (3,0)"); + } + + if (Math.abs(result.getCovarianceOfParameters(0, 0) + - resultRedundant.getCovarianceOfParameters(0, 0)) > 1.0e-8) { + fail("VCV not correct after reorder (0,0)"); + } + if (Math.abs(result.getCovarianceOfParameters(0, 1) + - resultRedundant.getCovarianceOfParameters(0, 2)) > 1.0e-8) { + fail("VCV not correct after reorder (0,1)<->(0,2)"); + } + if (Math.abs(result.getCovarianceOfParameters(0, 2) + - resultRedundant.getCovarianceOfParameters(0, 3)) > 1.0e-8) { + fail("VCV not correct after reorder (0,2)<->(0,1)"); + } + if (Math.abs(result.getCovarianceOfParameters(0, 3) + - resultRedundant.getCovarianceOfParameters(0, 5)) > 1.0e-8) { + fail("VCV not correct after reorder (0,3)<->(0,3)"); + } + if (Math.abs(result.getCovarianceOfParameters(1, 0) + - resultRedundant.getCovarianceOfParameters(2, 0)) > 1.0e-8) { + fail("VCV not correct after reorder (1,0)<->(2,0)"); + } + if (Math.abs(result.getCovarianceOfParameters(1, 1) + - resultRedundant.getCovarianceOfParameters(2, 2)) > 1.0e-8) { + fail("VCV not correct (1,1)<->(2,1)"); + } + if (Math.abs(result.getCovarianceOfParameters(1, 2) + - resultRedundant.getCovarianceOfParameters(2, 3)) > 1.0e-8) { + fail("VCV not correct (1,2)<->(2,2)"); + } + + if (Math.abs(result.getCovarianceOfParameters(2, 0) + - resultRedundant.getCovarianceOfParameters(3, 0)) > 1.0e-8) { + fail("VCV not correct (2,0)<->(1,0)"); + } + if (Math.abs(result.getCovarianceOfParameters(2, 1) + - resultRedundant.getCovarianceOfParameters(3, 2)) > 1.0e-8) { + fail("VCV not correct (2,1)<->(1,2)"); + } + + if (Math.abs(result.getCovarianceOfParameters(3, 3) + - resultRedundant.getCovarianceOfParameters(5, 5)) > 1.0e-8) { + fail("VCV not correct (3,3)<->(3,2)"); + } + + TestUtils.assertEquals(result.getAdjustedRSquared(), resultRedundant.getAdjustedRSquared(), 1.0e-8); + TestUtils.assertEquals(result.getErrorSumSquares(), resultRedundant.getErrorSumSquares(), 1.0e-8); + TestUtils.assertEquals(result.getMeanSquareError(), resultRedundant.getMeanSquareError(), 1.0e-8); + TestUtils.assertEquals(result.getRSquared(), resultRedundant.getRSquared(), 1.0e-8); + return; + } + + @Test + public void testPCorr() { + MillerUpdatingRegression instance = new MillerUpdatingRegression(4, false); + double[][] x = new double[airdata[0].length][]; + double[] y = new double[airdata[0].length]; + double[] cp = new double[10]; + double[] yxcorr = new double[4]; + double[] diag = new double[4]; + double sumysq = 0.0; + int off = 0; + for (int i = 0; i < airdata[0].length; i++) { + x[i] = new double[4]; + x[i][0] = 1.0; + x[i][1] = Math.log(airdata[3][i]); + x[i][2] = Math.log(airdata[4][i]); + x[i][3] = airdata[5][i]; + y[i] = Math.log(airdata[2][i]); + off = 0; + for (int j = 0; j < 4; j++) { + double tmp = x[i][j]; + for (int k = 0; k <= j; k++, off++) { + cp[off] += tmp * x[i][k]; + } + yxcorr[j] += tmp * y[i]; + } + sumysq += y[i] * y[i]; + } + PearsonsCorrelation pearson = new PearsonsCorrelation(x); + RealMatrix corr = pearson.getCorrelationMatrix(); + off = 0; + for (int i = 0; i < 4; i++, off += (i + 1)) { + diag[i] = FastMath.sqrt(cp[off]); + } + + instance.addObservations(x, y); + double[] pc = instance.getPartialCorrelations(0); + int idx = 0; + off = 0; + int off2 = 6; + for (int i = 0; i < 4; i++) { + for (int j = 0; j < i; j++) { + if (Math.abs(pc[idx] - cp[off] / (diag[i] * diag[j])) > 1.0e-8) { + fail("Failed cross products... i = " + i + " j = " + j); + } + ++idx; + ++off; + } + ++off; + if (Math.abs(pc[i+off2] - yxcorr[ i] / (FastMath.sqrt(sumysq) * diag[i])) > 1.0e-8) { + fail("failed cross product i = " + i + " y"); + } + } + double[] pc2 = instance.getPartialCorrelations(1); + + idx = 0; + + for (int i = 1; i < 4; i++) { + for (int j = 1; j < i; j++) { + if (Math.abs(pc2[idx] - corr.getEntry(j, i)) > 1.0e-8) { + fail("Failed cross products... i = " + i + " j = " + j); + } + ++idx; + } + } + double[] pc3 = instance.getPartialCorrelations(2); + if (pc3 == null) { + fail("Should not be null"); + } + return; + } + + @Test + public void testHdiag() { + MillerUpdatingRegression instance = new MillerUpdatingRegression(4, false); + double[][] x = new double[airdata[0].length][]; + double[] y = new double[airdata[0].length]; + for (int i = 0; i < airdata[0].length; i++) { + x[i] = new double[4]; + x[i][0] = 1.0; + x[i][1] = Math.log(airdata[3][i]); + x[i][2] = Math.log(airdata[4][i]); + x[i][3] = airdata[5][i]; + y[i] = Math.log(airdata[2][i]); + } + instance.addObservations(x, y); + OLSMultipleLinearRegression ols = new OLSMultipleLinearRegression(); + ols.setNoIntercept(true); + ols.newSampleData(y, x); + + RealMatrix rm = ols.calculateHat(); + for (int i = 0; i < x.length; i++) { + TestUtils.assertEquals(instance.getDiagonalOfHatMatrix(x[i]), rm.getEntry(i, i), 1.0e-8); + } + return; + } + @Test + public void testHdiagConstant() { + MillerUpdatingRegression instance = new MillerUpdatingRegression(3, true); + double[][] x = new double[airdata[0].length][]; + double[] y = new double[airdata[0].length]; + for (int i = 0; i < airdata[0].length; i++) { + x[i] = new double[3]; + x[i][0] = Math.log(airdata[3][i]); + x[i][1] = Math.log(airdata[4][i]); + x[i][2] = airdata[5][i]; + y[i] = Math.log(airdata[2][i]); + } + instance.addObservations(x, y); + OLSMultipleLinearRegression ols = new OLSMultipleLinearRegression(); + ols.setNoIntercept(false); + ols.newSampleData(y, x); + + RealMatrix rm = ols.calculateHat(); + for (int i = 0; i < x.length; i++) { + TestUtils.assertEquals(instance.getDiagonalOfHatMatrix(x[i]), rm.getEntry(i, i), 1.0e-8); + } + return; + } + + + @Test + public void testSubsetRegression() throws MathException { + + MillerUpdatingRegression instance = new MillerUpdatingRegression(3, true); + MillerUpdatingRegression redRegression = new MillerUpdatingRegression(2, true); + double[][] x = new double[airdata[0].length][]; + double[][] xReduced = new double[airdata[0].length][]; + double[] y = new double[airdata[0].length]; + for (int i = 0; i < airdata[0].length; i++) { + x[i] = new double[3]; + x[i][0] = Math.log(airdata[3][i]); + x[i][1] = Math.log(airdata[4][i]); + x[i][2] = airdata[5][i]; + + xReduced[i] = new double[2]; + xReduced[i][0] = Math.log(airdata[3][i]); + xReduced[i][1] = Math.log(airdata[4][i]); + + y[i] = Math.log(airdata[2][i]); + } + + instance.addObservations(x, y); + redRegression.addObservations(xReduced, y); + + RegressionResults resultsInstance = instance.regress( new int[]{0,1,2} ); + RegressionResults resultsReduced = redRegression.regress(); + + TestUtils.assertEquals(resultsInstance.getParameterEstimates(), resultsReduced.getParameterEstimates(), 1.0e-12); + TestUtils.assertEquals(resultsInstance.getStdErrorOfEstimates(), resultsReduced.getStdErrorOfEstimates(), 1.0e-12); + } + + +} Propchange: commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/regression/MillerUpdatingRegressionTest.java ------------------------------------------------------------------------------ svn:eol-style = native Propchange: commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/regression/MillerUpdatingRegressionTest.java ------------------------------------------------------------------------------ svn:keywords = Date Revision Id Propchange: commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/regression/MillerUpdatingRegressionTest.java ------------------------------------------------------------------------------ svn:mime-type = text/plain