commons-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From pste...@apache.org
Subject svn commit: r744802 - in /commons/proper/math/trunk/src: java/org/apache/commons/math/stat/correlation/ site/xdoc/ test/R/ test/org/apache/commons/math/stat/correlation/
Date Mon, 16 Feb 2009 05:09:50 GMT
Author: psteitz
Date: Mon Feb 16 05:09:49 2009
New Revision: 744802

URL: http://svn.apache.org/viewvc?rev=744802&view=rev
Log:
Added Pearsons correlation implemendation. JIRA: MATH-114

Added:
    commons/proper/math/trunk/src/java/org/apache/commons/math/stat/correlation/PearsonsCorrelation.java
  (with props)
    commons/proper/math/trunk/src/test/R/correlationTestCases
    commons/proper/math/trunk/src/test/org/apache/commons/math/stat/correlation/PearsonsCorrelationTest.java
  (with props)
Modified:
    commons/proper/math/trunk/src/site/xdoc/changes.xml
    commons/proper/math/trunk/src/test/R/testAll

Added: commons/proper/math/trunk/src/java/org/apache/commons/math/stat/correlation/PearsonsCorrelation.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/java/org/apache/commons/math/stat/correlation/PearsonsCorrelation.java?rev=744802&view=auto
==============================================================================
--- commons/proper/math/trunk/src/java/org/apache/commons/math/stat/correlation/PearsonsCorrelation.java
(added)
+++ commons/proper/math/trunk/src/java/org/apache/commons/math/stat/correlation/PearsonsCorrelation.java
Mon Feb 16 05:09:49 2009
@@ -0,0 +1,270 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.correlation;
+
+import org.apache.commons.math.MathException;
+import org.apache.commons.math.MathRuntimeException;
+import org.apache.commons.math.distribution.TDistribution;
+import org.apache.commons.math.distribution.TDistributionImpl;
+import org.apache.commons.math.linear.RealMatrix;
+import org.apache.commons.math.linear.DenseRealMatrix;
+import org.apache.commons.math.stat.regression.SimpleRegression;
+
+/**
+ * Computes Pearson's product-moment correlation coefficients for pairs of arrays
+ * or columns of a matrix.
+ * 
+ * <p>The constructors that take <code>RealMatrix</code> or 
+ * <code>double[][]</code> arguments generate correlation matrices.  The
+ * columns of the input matrices are assumed to represent variable values.
+ * Correlations are given by the formula</p>
+ * <code>cor(X, Y) = &Sigma;[(x<sub>i</sub> - E(X))(y<sub>i</sub>
- E(Y))] / [(n - 1)s(X)s(Y)]</code>
+ * where <code>E(X)</code> is the mean of <code>X</code>, <code>E(Y)</code>
+ * is the mean of the <code>Y</code> values and s(X), s(Y) are standard deviations.
+ * 
+ * @version $Revision$ $Date$
+ * @since 2.0
+ */
+public class PearsonsCorrelation {
+    
+    /** correlation matrix */
+    private final RealMatrix correlationMatrix;
+    
+    /** number of observations */
+    private final int nObs;
+    
+    /**
+     * Create a PearsonsCorrelation instance without data
+     */
+    public PearsonsCorrelation() {
+        super();
+        correlationMatrix = null;
+        nObs = 0;
+    }
+    
+    /**
+     * Create a PearsonsCorrelation from a rectangular array
+     * whose columns represent values of variables to be correlated.
+     * 
+     * @param data rectangular array with columns representing variables
+     * @throws IllegalArgumentException if the input data array is not
+     * rectangular with at least two rows and two columns.
+     */
+    public PearsonsCorrelation(double[][] data) {
+        this(new DenseRealMatrix(data));
+    }
+    
+    /**
+     * Create a PearsonsCorrelation from a RealMatrix whose columns
+     * represent variables to be correlated.
+     * 
+     * @param matrix matrix with columns representing variables to correlate
+     */
+    public PearsonsCorrelation(RealMatrix matrix) {
+        checkSufficientData(matrix);
+        nObs = matrix.getRowDimension();
+        correlationMatrix = computeCorrelation(matrix);
+    }
+    
+    /**
+     * Create a PearsonsCorrelation from a {@link Covariance}.  The correlation
+     * matrix is computed by scaling the Covariance's covariance matrix.
+     * The Covariance instance must have been created from a data matrix with
+     * columns representing variable values.
+     * 
+     * @param covariance Covariance instance
+     */
+    public PearsonsCorrelation(Covariance covariance) {
+        RealMatrix covarianceMatrix = covariance.getCovarianceMatrix();
+        if (covarianceMatrix == null) {
+            throw MathRuntimeException.createIllegalArgumentException(
+                    "Covariance matrix is null", null);
+        }
+        nObs = covariance.getN();
+        correlationMatrix = covarianceToCorrelation(covarianceMatrix);
+    }
+    
+    /**
+     * Create a PearsonsCorrelation from a covariance matrix.  The correlation
+     * matrix is computed by scaling the covariance matrix.
+     * 
+     * @param covarianceMatrix covariance matrix
+     * @param numberOfObservations the number of observations in the dataset used to compute
+     * the covariance matrix
+     */
+    public PearsonsCorrelation(RealMatrix covarianceMatrix, int numberOfObservations) {
+        nObs = numberOfObservations;
+        correlationMatrix = covarianceToCorrelation(covarianceMatrix);
+        
+    }
+    
+    /**
+     * Returns the correlation matrix
+     * 
+     * @return correlation matrix
+     */
+    public RealMatrix getCorrelationMatrix() {
+        return correlationMatrix;  
+    }
+    
+    /**
+     * Returns a matrix of standard errors associated with the estimates
+     * in the correlation matrix.<br/>
+     * <code>getCorrelationStandardErrors().getEntry(i,j)</code> is the standard
+     * error associated with <code>getCorrelationMatrix.getEntry(i,j)</code>
+     * <p>The formula used to compute the standard error is <br/>
+     * <code>SE<sub>r</sub> = ((1 - r<sup>2</sup>) / (n - 2))<sup>1/2</sup></code>
+     * where <code>r</code> is the estimated correlation coefficient and 
+     * <code>n</code> is the number of observations in the source dataset.</p>
+     * 
+     * @return matrix of correlation standard errors
+     */
+    public RealMatrix getCorrelationStandardErrors() {
+        int nVars = correlationMatrix.getColumnDimension();
+        double[][] out = new double[nVars][nVars];
+        for (int i = 0; i < nVars; i++) {
+            for (int j = 0; j < nVars; j++) {
+                double r = correlationMatrix.getEntry(i, j);
+                out[i][j] = Math.sqrt((1 - r * r) /(nObs - 2));
+            }
+        }
+        return new DenseRealMatrix(out);
+    }
+
+    /**
+     * Returns a matrix of p-values associated with the (two-sided) null
+     * hypothesis that the corresponding correlation coefficient is zero.
+     * <p><code>getCorrelationPValues().getEntry(i,j)</code> is the probability
+     * that a random variable distributed as <code>t<sub>n-2</sub></code>
takes
+     * a value with absolute value greater than or equal to <br>
+     * <code>|r|((n - 2) / (1 - r<sup>2</sup>))<sup>1/2</sup></code></p>
+     * <p>The values in the matrix are sometimes referred to as the 
+     * <i>significance</i> of the corresponding correlation coefficients.</p>
+     * 
+     * @return matrix of p-values
+     * @throws MathException if an error occurs estimating probabilities
+     */
+    public RealMatrix getCorrelationPValues() throws MathException {
+        TDistribution tDistribution = new TDistributionImpl(nObs - 2);
+        int nVars = correlationMatrix.getColumnDimension();
+        double[][] out = new double[nVars][nVars];
+        for (int i = 0; i < nVars; i++) {
+            for (int j = 0; j < nVars; j++) {
+                if (i == j) {
+                    out[i][j] = 0d;
+                } else {
+                    double r = correlationMatrix.getEntry(i, j);
+                    double t = Math.abs(r * Math.sqrt((nObs - 2)/(1 - r * r)));
+                    out[i][j] = 2 * (1 - tDistribution.cumulativeProbability(t));
+                }
+            }
+        }
+        return new DenseRealMatrix(out);
+    }
+    
+    
+    /**
+     * Computes the correlation matrix for the columns of the
+     * input matrix.
+     * 
+     * @param matrix matrix with columns representing variables to correlate
+     * @return correlation matrix
+     */
+    public RealMatrix computeCorrelation(RealMatrix matrix) {
+        int nVars = matrix.getColumnDimension();
+        RealMatrix outMatrix = new DenseRealMatrix(nVars, nVars);
+        for (int i = 0; i < nVars; i++) {
+            for (int j = 0; j < i; j++) {
+              double corr = correlation(matrix.getColumn(i), matrix.getColumn(j));
+              outMatrix.setEntry(i, j, corr);
+              outMatrix.setEntry(j, i, corr);
+            }
+            outMatrix.setEntry(i, i, 1d);
+        }
+        return outMatrix;
+    }
+    
+    /**
+     * Computes the Pearson's product-moment correlation coefficient between the two arrays.
+     * 
+     * </p>Throws IllegalArgumentException if the arrays do not have the same length
+     * or their common length is less than 2</p>
+     *
+     * @param xArray first data array
+     * @param yArray second data array
+     * @return Returns Pearson's correlation coefficient for the two arrays 
+     * @throws  IllegalArgumentException if the arrays lengths do not match or
+     * there is insufficient data
+     */
+    public double correlation(final double[] xArray, final double[] yArray) throws IllegalArgumentException
{
+        SimpleRegression regression = new SimpleRegression();
+        if(xArray.length == yArray.length && xArray.length > 1) {
+            for(int i=0; i<xArray.length; i++) {
+                regression.addData(xArray[i], yArray[i]);
+            }
+            return regression.getR();
+        }
+        else {
+            throw MathRuntimeException.createIllegalArgumentException(
+                    "Invalid array dimensions. xArray has size {0}; yArray has {1} elements",
+                    new Object[] {xArray.length, yArray.length});
+        }
+    }
+    
+    /**
+     * Derives a correlation matrix from a covariance matrix.
+     * 
+     * <p>Uses the formula <br/>
+     * <code>r(X,Y) = cov(X,Y)/s(X)s(Y)</code> where 
+     * <code>r(&middot,&middot;)</code> is the correlation coefficient
and
+     * <code>s(&middot;)</code> means standard deviation.</p>
+     * 
+     * @param covarianceMatrix the covariance matrix
+     * @return correlation matrix
+     */
+    public RealMatrix covarianceToCorrelation(RealMatrix covarianceMatrix) {
+        int nVars = covarianceMatrix.getColumnDimension();
+        RealMatrix outMatrix = new DenseRealMatrix(nVars, nVars);
+        for (int i = 0; i < nVars; i++) {
+            double sigma = Math.sqrt(covarianceMatrix.getEntry(i, i));
+            outMatrix.setEntry(i, i, 1d);
+            for (int j = 0; j < i; j++) {
+                double entry = covarianceMatrix.getEntry(i, j) / 
+                       (sigma * Math.sqrt(covarianceMatrix.getEntry(j, j)));
+                outMatrix.setEntry(i, j, entry);
+                outMatrix.setEntry(j, i, entry);
+            }
+        }
+        return outMatrix;
+    }
+    
+    /**
+     * Throws IllegalArgumentException of the matrix does not have at least
+     * two columns and two rows
+     * 
+     * @param matrix matrix to check for sufficiency
+     */
+    private void checkSufficientData(final RealMatrix matrix) {
+        int nRows = matrix.getRowDimension();
+        int nCols = matrix.getColumnDimension();
+        if (nRows < 2 || nCols < 2) {
+            throw MathRuntimeException.createIllegalArgumentException(
+                    "Insufficient data: only {0} rows and {1} columns.",
+                    new Object[]{nRows, nCols});
+        }
+    }
+}

Propchange: commons/proper/math/trunk/src/java/org/apache/commons/math/stat/correlation/PearsonsCorrelation.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: commons/proper/math/trunk/src/site/xdoc/changes.xml
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/site/xdoc/changes.xml?rev=744802&r1=744801&r2=744802&view=diff
==============================================================================
--- commons/proper/math/trunk/src/site/xdoc/changes.xml (original)
+++ commons/proper/math/trunk/src/site/xdoc/changes.xml Mon Feb 16 05:09:49 2009
@@ -39,6 +39,10 @@
   </properties>
   <body>
     <release version="2.0" date="TBD" description="TBD">
+      <action dev="psteitz" type="add" issue="MATH-114" due-to="John Gant">
+        Added PearsonsCorrelation class to compute correlation matrices, standard
+        errors and p-values for correlation coefficients.
+      </action>
       <action dev="psteitz" type="add" issue="MATH-114">
         Added Covariance class to compute variance-covariance matrices in new
         correlation package.

Added: commons/proper/math/trunk/src/test/R/correlationTestCases
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/R/correlationTestCases?rev=744802&view=auto
==============================================================================
--- commons/proper/math/trunk/src/test/R/correlationTestCases (added)
+++ commons/proper/math/trunk/src/test/R/correlationTestCases Mon Feb 16 05:09:49 2009
@@ -0,0 +1,185 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#------------------------------------------------------------------------------
+# R source file to validate Pearson's correlation tests in
+# org.apache.commons.math.stat.correlation.PearsonsCorrelationTest
+#
+# To run the test, install R, put this file and testFunctions
+# into the same directory, launch R from this directory and then enter
+# source("<name-of-this-file>")
+#
+#------------------------------------------------------------------------------
+tol <- 1E-9                       # error tolerance for tests
+#------------------------------------------------------------------------------
+# Function definitions
+
+source("testFunctions")           # utility test functions
+options(digits=16)	              # override number of digits displayed
+
+# function to verify correlation computations
+verifyCorrelation <- function(matrix, expectedCorrelation, name) {
+    correlation <- cor(matrix)
+    output <- c("Correlation matrix test dataset = ", name)
+    if (assertEquals(expectedCorrelation, correlation,tol,"Correlations")) {
+        displayPadded(output, SUCCEEDED, WIDTH)
+    } else {
+        displayPadded(output, FAILED, WIDTH)
+    }  
+}
+
+# function to verify p-values
+verifyPValues <- function(matrix, pValues, name) {
+	dimension <- dim(matrix)[2]
+	corValues <- matrix(nrow=dimension,ncol=dimension)
+	expectedValues <- matrix(nrow=dimension,ncol=dimension)
+	for (i in 2:dimension) {
+		for (j in 1:(i-1)) {
+			corValues[i,j]<-cor.test(matrix[,i], matrix[,j])$p.value
+			corValues[j,i]<-corValues[i,j]
+		}
+	}
+	for (i in 1:dimension) {
+		corValues[i,i] <- 1
+		expectedValues[i,i] <- 1
+	}
+	ptr <- 1
+	for (i in 2:dimension) {
+		for (j in 1:(i-1)) {
+			expectedValues[i,j] <- pValues[ptr]
+			expectedValues[j,i] <- expectedValues[i,j]
+			ptr <- ptr + 1
+		}
+    }
+    output <- c("Correlation p-values test dataset = ", name)
+    if (assertEquals(expectedValues, corValues,tol,"p-values")) {
+        displayPadded(output, SUCCEEDED, WIDTH)
+    } else {
+        displayPadded(output, FAILED, WIDTH)
+    }
+ }  	
+
+#--------------------------------------------------------------------------
+cat("Correlation test cases\n")
+
+# Longley
+
+longley <- matrix(c(60323,83.0,234289,2356,1590,107608,1947,
+                    61122,88.5,259426,2325,1456,108632,1948,
+                    60171,88.2,258054,3682,1616,109773,1949,
+                    61187,89.5,284599,3351,1650,110929,1950,
+                    63221,96.2,328975,2099,3099,112075,1951,
+                    63639,98.1,346999,1932,3594,113270,1952,
+                    64989,99.0,365385,1870,3547,115094,1953,
+                    63761,100.0,363112,3578,3350,116219,1954,
+                    66019,101.2,397469,2904,3048,117388,1955,
+                    67857,104.6,419180,2822,2857,118734,1956,
+                    68169,108.4,442769,2936,2798,120445,1957,
+                    66513,110.8,444546,4681,2637,121950,1958,
+                    68655,112.6,482704,3813,2552,123366,1959,
+                    69564,114.2,502601,3931,2514,125368,1960,
+                    69331,115.7,518173,4806,2572,127852,1961,
+                    70551,116.9,554894,4007,2827,130081,1962),
+                    nrow = 16, ncol = 7, byrow = TRUE)
+
+expectedCorrelation <- matrix(c(
+         1.000000000000000, 0.9708985250610560, 0.9835516111796693, 0.5024980838759942,
+         0.4573073999764817, 0.960390571594376, 0.9713294591921188,
+         0.970898525061056, 1.0000000000000000, 0.9915891780247822, 0.6206333925590966,
+         0.4647441876006747, 0.979163432977498, 0.9911491900672053,
+         0.983551611179669, 0.9915891780247822, 1.0000000000000000, 0.6042609398895580,
+         0.4464367918926265, 0.991090069458478, 0.9952734837647849,
+         0.502498083875994, 0.6206333925590966, 0.6042609398895580, 1.0000000000000000,
+         -0.1774206295018783, 0.686551516365312, 0.6682566045621746,
+          0.457307399976482, 0.4647441876006747, 0.4464367918926265, -0.1774206295018783,
+          1.0000000000000000, 0.364416267189032, 0.4172451498349454,
+          0.960390571594376, 0.9791634329774981, 0.9910900694584777, 0.6865515163653120,
+          0.3644162671890320, 1.000000000000000, 0.9939528462329257,
+          0.971329459192119, 0.9911491900672053, 0.9952734837647849, 0.6682566045621746,
+          0.4172451498349454, 0.993952846232926, 1.0000000000000000),
+          nrow = 7, ncol = 7, byrow = TRUE)
+ verifyCorrelation(longley, expectedCorrelation, "longley")
+ 
+ expectedPValues <- c(
+          4.38904690369668e-10,
+          8.36353208910623e-12, 7.8159700933611e-14,
+          0.0472894097790304, 0.01030636128354301, 0.01316878049026582, 
+          0.0749178049642416, 0.06971758330341182, 0.0830166169296545, 0.510948586323452,
+          3.693245043123738e-09, 4.327782576751815e-11, 1.167954621905665e-13, 0.00331028281967516,
0.1652293725106684, 
+          3.95834476307755e-10, 1.114663916723657e-13, 1.332267629550188e-15, 0.00466039138541463,
0.1078477071581498, 7.771561172376096e-15)
+ verifyPValues(longley, expectedPValues, "longley")
+ 
+ # Swiss Fertility
+ 
+ fertility <- matrix(c(80.2,17.0,15,12,9.96,
+  83.1,45.1,6,9,84.84,
+  92.5,39.7,5,5,93.40,
+  85.8,36.5,12,7,33.77,
+  76.9,43.5,17,15,5.16,
+  76.1,35.3,9,7,90.57,
+  83.8,70.2,16,7,92.85,
+  92.4,67.8,14,8,97.16,
+  82.4,53.3,12,7,97.67,
+  82.9,45.2,16,13,91.38,
+  87.1,64.5,14,6,98.61,
+  64.1,62.0,21,12,8.52,
+  66.9,67.5,14,7,2.27,
+  68.9,60.7,19,12,4.43,
+  61.7,69.3,22,5,2.82,
+  68.3,72.6,18,2,24.20,
+  71.7,34.0,17,8,3.30,
+  55.7,19.4,26,28,12.11,
+  54.3,15.2,31,20,2.15,
+  65.1,73.0,19,9,2.84,
+  65.5,59.8,22,10,5.23,
+  65.0,55.1,14,3,4.52,
+  56.6,50.9,22,12,15.14,
+  57.4,54.1,20,6,4.20,
+  72.5,71.2,12,1,2.40,
+  74.2,58.1,14,8,5.23,
+  72.0,63.5,6,3,2.56,
+  60.5,60.8,16,10,7.72,
+  58.3,26.8,25,19,18.46,
+  65.4,49.5,15,8,6.10,
+  75.5,85.9,3,2,99.71,
+  69.3,84.9,7,6,99.68,
+  77.3,89.7,5,2,100.00,
+  70.5,78.2,12,6,98.96,
+  79.4,64.9,7,3,98.22,
+  65.0,75.9,9,9,99.06,
+  92.2,84.6,3,3,99.46,
+  79.3,63.1,13,13,96.83,
+  70.4,38.4,26,12,5.62,
+  65.7,7.7,29,11,13.79,
+  72.7,16.7,22,13,11.22,
+  64.4,17.6,35,32,16.92,
+  77.6,37.6,15,7,4.97,
+  67.6,18.7,25,7,8.65,
+  35.0,1.2,37,53,42.34,
+  44.7,46.6,16,29,50.43,
+  42.8,27.7,22,29,58.33),
+  nrow = 47, ncol = 5, byrow = TRUE)
+   
+ expectedCorrelation <- matrix(c(
+         1.0000000000000000, 0.3530791836199747, -0.6458827064572875, -0.6637888570350691,
 0.4636847006517939,
+         0.3530791836199747, 1.0000000000000000,-0.6865422086171366, -0.6395225189483201,
0.4010950530487398,
+         -0.6458827064572875, -0.6865422086171366, 1.0000000000000000, 0.6984152962884830,
-0.5727418060641666,
+         -0.6637888570350691, -0.6395225189483201, 0.6984152962884830, 1.0000000000000000,
-0.1538589170909148,
+          0.4636847006517939, 0.4010950530487398, -0.5727418060641666, -0.1538589170909148,
1.0000000000000000),
+          nrow = 5, ncol = 5, byrow = TRUE)
+      
+verifyCorrelation(fertility, expectedCorrelation, "swiss fertility")
+
+displayDashes(WIDTH)
\ No newline at end of file

Modified: commons/proper/math/trunk/src/test/R/testAll
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/R/testAll?rev=744802&r1=744801&r2=744802&view=diff
==============================================================================
--- commons/proper/math/trunk/src/test/R/testAll (original)
+++ commons/proper/math/trunk/src/test/R/testAll Mon Feb 16 05:09:49 2009
@@ -50,6 +50,9 @@
 # covariance
 source("covarianceTestCases")
 
+# correlation
+source("correlationTestCases")
+
 #------------------------------------------------------------------------------
 # if output has been diverted, change it back
 if (sink.number()) {

Added: commons/proper/math/trunk/src/test/org/apache/commons/math/stat/correlation/PearsonsCorrelationTest.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/org/apache/commons/math/stat/correlation/PearsonsCorrelationTest.java?rev=744802&view=auto
==============================================================================
--- commons/proper/math/trunk/src/test/org/apache/commons/math/stat/correlation/PearsonsCorrelationTest.java
(added)
+++ commons/proper/math/trunk/src/test/org/apache/commons/math/stat/correlation/PearsonsCorrelationTest.java
Mon Feb 16 05:09:49 2009
@@ -0,0 +1,274 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.correlation;
+
+import org.apache.commons.math.TestUtils;
+import org.apache.commons.math.distribution.TDistribution;
+import org.apache.commons.math.distribution.TDistributionImpl;
+import org.apache.commons.math.linear.RealMatrix;
+import org.apache.commons.math.linear.DenseRealMatrix;
+
+import junit.framework.TestCase;
+
+public class PearsonsCorrelationTest extends TestCase {
+    
+    protected final double[] longleyData = new double[] {
+            60323,83.0,234289,2356,1590,107608,1947,
+            61122,88.5,259426,2325,1456,108632,1948,
+            60171,88.2,258054,3682,1616,109773,1949,
+            61187,89.5,284599,3351,1650,110929,1950,
+            63221,96.2,328975,2099,3099,112075,1951,
+            63639,98.1,346999,1932,3594,113270,1952,
+            64989,99.0,365385,1870,3547,115094,1953,
+            63761,100.0,363112,3578,3350,116219,1954,
+            66019,101.2,397469,2904,3048,117388,1955,
+            67857,104.6,419180,2822,2857,118734,1956,
+            68169,108.4,442769,2936,2798,120445,1957,
+            66513,110.8,444546,4681,2637,121950,1958,
+            68655,112.6,482704,3813,2552,123366,1959,
+            69564,114.2,502601,3931,2514,125368,1960,
+            69331,115.7,518173,4806,2572,127852,1961,
+            70551,116.9,554894,4007,2827,130081,1962
+        };
+    
+    protected final double[] swissData = new double[] {
+            80.2,17.0,15,12,9.96,
+            83.1,45.1,6,9,84.84,
+            92.5,39.7,5,5,93.40,
+            85.8,36.5,12,7,33.77,
+            76.9,43.5,17,15,5.16,
+            76.1,35.3,9,7,90.57,
+            83.8,70.2,16,7,92.85,
+            92.4,67.8,14,8,97.16,
+            82.4,53.3,12,7,97.67,
+            82.9,45.2,16,13,91.38,
+            87.1,64.5,14,6,98.61,
+            64.1,62.0,21,12,8.52,
+            66.9,67.5,14,7,2.27,
+            68.9,60.7,19,12,4.43,
+            61.7,69.3,22,5,2.82,
+            68.3,72.6,18,2,24.20,
+            71.7,34.0,17,8,3.30,
+            55.7,19.4,26,28,12.11,
+            54.3,15.2,31,20,2.15,
+            65.1,73.0,19,9,2.84,
+            65.5,59.8,22,10,5.23,
+            65.0,55.1,14,3,4.52,
+            56.6,50.9,22,12,15.14,
+            57.4,54.1,20,6,4.20,
+            72.5,71.2,12,1,2.40,
+            74.2,58.1,14,8,5.23,
+            72.0,63.5,6,3,2.56,
+            60.5,60.8,16,10,7.72,
+            58.3,26.8,25,19,18.46,
+            65.4,49.5,15,8,6.10,
+            75.5,85.9,3,2,99.71,
+            69.3,84.9,7,6,99.68,
+            77.3,89.7,5,2,100.00,
+            70.5,78.2,12,6,98.96,
+            79.4,64.9,7,3,98.22,
+            65.0,75.9,9,9,99.06,
+            92.2,84.6,3,3,99.46,
+            79.3,63.1,13,13,96.83,
+            70.4,38.4,26,12,5.62,
+            65.7,7.7,29,11,13.79,
+            72.7,16.7,22,13,11.22,
+            64.4,17.6,35,32,16.92,
+            77.6,37.6,15,7,4.97,
+            67.6,18.7,25,7,8.65,
+            35.0,1.2,37,53,42.34,
+            44.7,46.6,16,29,50.43,
+            42.8,27.7,22,29,58.33
+        };
+ 
+    
+    /**
+     * Test Longley dataset against R.
+     */
+    public void testLongly() throws Exception {  
+        RealMatrix matrix = createRealMatrix(longleyData, 16, 7);
+        PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix); 
+        RealMatrix correlationMatrix = corrInstance.getCorrelationMatrix();
+        double[] rData = new double[] {
+                1.000000000000000, 0.9708985250610560, 0.9835516111796693, 0.5024980838759942,
+                0.4573073999764817, 0.960390571594376, 0.9713294591921188,
+                0.970898525061056, 1.0000000000000000, 0.9915891780247822, 0.6206333925590966,
+                0.4647441876006747, 0.979163432977498, 0.9911491900672053,
+                0.983551611179669, 0.9915891780247822, 1.0000000000000000, 0.6042609398895580,
+                0.4464367918926265, 0.991090069458478, 0.9952734837647849,
+                0.502498083875994, 0.6206333925590966, 0.6042609398895580, 1.0000000000000000,
+                -0.1774206295018783, 0.686551516365312, 0.6682566045621746,
+                0.457307399976482, 0.4647441876006747, 0.4464367918926265, -0.1774206295018783,
+                1.0000000000000000, 0.364416267189032, 0.4172451498349454,
+                0.960390571594376, 0.9791634329774981, 0.9910900694584777, 0.6865515163653120,
+                0.3644162671890320, 1.000000000000000, 0.9939528462329257,
+                0.971329459192119, 0.9911491900672053, 0.9952734837647849, 0.6682566045621746,
+                0.4172451498349454, 0.993952846232926, 1.0000000000000000
+        }; 
+        TestUtils.assertEquals("correlation matrix", createRealMatrix(rData, 7, 7), correlationMatrix,
10E-15);
+        
+        double[] rPvalues = new double[] {
+                4.38904690369668e-10,
+                8.36353208910623e-12, 7.8159700933611e-14,
+                0.0472894097790304, 0.01030636128354301, 0.01316878049026582, 
+                0.0749178049642416, 0.06971758330341182, 0.0830166169296545, 0.510948586323452,
+                3.693245043123738e-09, 4.327782576751815e-11, 1.167954621905665e-13, 0.00331028281967516,
0.1652293725106684, 
+                3.95834476307755e-10, 1.114663916723657e-13, 1.332267629550188e-15, 0.00466039138541463,
0.1078477071581498, 7.771561172376096e-15
+        };
+        RealMatrix rPMatrix = createLowerTriangularRealMatrix(rPvalues, 7);
+        fillUpper(rPMatrix, 0d);
+        TestUtils.assertEquals("correlation p values", rPMatrix, corrInstance.getCorrelationPValues(),
10E-15);
+    }
+    
+    /**
+     * Test R Swiss fertility dataset against R.
+     */
+    public void testSwissFertility() throws Exception {
+         RealMatrix matrix = createRealMatrix(swissData, 47, 5);
+         PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix); 
+         RealMatrix correlationMatrix = corrInstance.getCorrelationMatrix();
+         double[] rData = new double[] {
+               1.0000000000000000, 0.3530791836199747, -0.6458827064572875, -0.6637888570350691,
 0.4636847006517939,
+                 0.3530791836199747, 1.0000000000000000,-0.6865422086171366, -0.6395225189483201,
0.4010950530487398,
+                -0.6458827064572875, -0.6865422086171366, 1.0000000000000000, 0.6984152962884830,
-0.5727418060641666,
+                -0.6637888570350691, -0.6395225189483201, 0.6984152962884830, 1.0000000000000000,
-0.1538589170909148,
+                 0.4636847006517939, 0.4010950530487398, -0.5727418060641666, -0.1538589170909148,
1.0000000000000000
+         };
+         TestUtils.assertEquals("correlation matrix", createRealMatrix(rData, 5, 5), correlationMatrix,
10E-15);
+         
+         double[] rPvalues = new double[] {
+                 0.01491720061472623,
+                 9.45043734069043e-07, 9.95151527133974e-08,
+                 3.658616965962355e-07, 1.304590105694471e-06, 4.811397236181847e-08,
+                 0.001028523190118147, 0.005204433539191644, 2.588307925380906e-05, 0.301807756132683
+         };
+         RealMatrix rPMatrix = createLowerTriangularRealMatrix(rPvalues, 5);
+         fillUpper(rPMatrix, 0d);
+         TestUtils.assertEquals("correlation p values", rPMatrix, corrInstance.getCorrelationPValues(),
10E-15);
+    }
+    
+    /**
+     * Constant column
+     */
+    public void testConstant() {
+        double[] noVariance = new double[] {1, 1, 1, 1};
+        double[] values = new double[] {1, 2, 3, 4};
+        assertTrue(Double.isNaN(new PearsonsCorrelation().correlation(noVariance, values)));
+        assertTrue(Double.isNaN(new PearsonsCorrelation().correlation(noVariance, values)));
+    }
+    
+    
+    /**
+     * Insufficient data
+     */
+     
+    public void testInsufficientData() {
+        double[] one = new double[] {1};
+        double[] two = new double[] {2};
+        try {
+            new PearsonsCorrelation().correlation(one, two);
+            fail("Expecting IllegalArgumentException");
+        } catch (IllegalArgumentException ex) {
+            // Expected
+        }
+        RealMatrix matrix = new DenseRealMatrix(new double[][] {{0},{1}});
+        try {
+            new PearsonsCorrelation(matrix);
+            fail("Expecting IllegalArgumentException");
+        } catch (IllegalArgumentException ex) {
+            // Expected
+        }
+    }
+    
+    /**
+     * Verify that direct t-tests using standard error estimates are consistent
+     * with reported p-values
+     */
+    public void testStdErrorConsistency() throws Exception {
+        TDistribution tDistribution = new TDistributionImpl(45);
+        RealMatrix matrix = createRealMatrix(swissData, 47, 5);
+        PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix); 
+        RealMatrix rValues = corrInstance.getCorrelationMatrix();
+        RealMatrix pValues = corrInstance.getCorrelationPValues();
+        RealMatrix stdErrors = corrInstance.getCorrelationStandardErrors();
+        for (int i = 0; i < 5; i++) {
+            for (int j = 0; j < i; j++) {
+                double t = Math.abs(rValues.getEntry(i, j)) / stdErrors.getEntry(i, j);
+                double p = 2 * (1 - tDistribution.cumulativeProbability(t));
+                assertEquals(p, pValues.getEntry(i, j), 10E-15);
+            }
+        }
+    }
+    
+    /**
+     * Verify that creating correlation from covariance gives same results as
+     * direct computation from the original matrix
+     */
+    public void testCovarianceConsistency() throws Exception {
+        RealMatrix matrix = createRealMatrix(longleyData, 16, 7);
+        PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix); 
+        Covariance covInstance = new Covariance(matrix);
+        PearsonsCorrelation corrFromCovInstance = new PearsonsCorrelation(covInstance);
+        TestUtils.assertEquals("correlation values", corrInstance.getCorrelationMatrix(),
+                corrFromCovInstance.getCorrelationMatrix(), 10E-15);
+        TestUtils.assertEquals("p values", corrInstance.getCorrelationPValues(),
+                corrFromCovInstance.getCorrelationPValues(), 10E-15);
+        TestUtils.assertEquals("standard errors", corrInstance.getCorrelationStandardErrors(),
+                corrFromCovInstance.getCorrelationStandardErrors(), 10E-15);
+        
+        PearsonsCorrelation corrFromCovInstance2 = 
+            new PearsonsCorrelation(covInstance.getCovarianceMatrix(), 16);
+        TestUtils.assertEquals("correlation values", corrInstance.getCorrelationMatrix(),
+                corrFromCovInstance2.getCorrelationMatrix(), 10E-15);
+        TestUtils.assertEquals("p values", corrInstance.getCorrelationPValues(),
+                corrFromCovInstance2.getCorrelationPValues(), 10E-15);
+        TestUtils.assertEquals("standard errors", corrInstance.getCorrelationStandardErrors(),
+                corrFromCovInstance2.getCorrelationStandardErrors(), 10E-15);
+    }
+    
+    protected RealMatrix createRealMatrix(double[] data, int nRows, int nCols) {
+        double[][] matrixData = new double[nRows][nCols];
+        int ptr = 0;
+        for (int i = 0; i < nRows; i++) {
+            System.arraycopy(data, ptr, matrixData[i], 0, nCols);
+            ptr += nCols;
+        }
+        return new DenseRealMatrix(matrixData); 
+    }
+    
+    protected RealMatrix createLowerTriangularRealMatrix(double[] data, int dimension) {
+        int ptr = 0;
+        RealMatrix result = new DenseRealMatrix(dimension, dimension);
+        for (int i = 1; i < dimension; i++) {
+            for (int j = 0; j < i; j++) {
+                result.setEntry(i, j, data[ptr]);
+                ptr++;
+            }
+        }
+        return result;
+    }
+    
+    protected void fillUpper(RealMatrix matrix, double diagonalValue) {
+        int dimension = matrix.getColumnDimension();
+        for (int i = 0; i < dimension; i++) {
+            matrix.setEntry(i, i, diagonalValue);
+            for (int j = i+1; j < dimension; j++) {
+                matrix.setEntry(i, j, matrix.getEntry(j, i));
+            }
+        }  
+    }
+}

Propchange: commons/proper/math/trunk/src/test/org/apache/commons/math/stat/correlation/PearsonsCorrelationTest.java
------------------------------------------------------------------------------
    svn:eol-style = native



Mime
View raw message