Return-Path: Delivered-To: apmail-mahout-commits-archive@www.apache.org Received: (qmail 5400 invoked from network); 23 Jul 2010 13:10:04 -0000 Received: from unknown (HELO mail.apache.org) (140.211.11.3) by 140.211.11.9 with SMTP; 23 Jul 2010 13:10:04 -0000 Received: (qmail 60772 invoked by uid 500); 23 Jul 2010 13:10:04 -0000 Delivered-To: apmail-mahout-commits-archive@mahout.apache.org Received: (qmail 60454 invoked by uid 500); 23 Jul 2010 13:10:01 -0000 Mailing-List: contact commits-help@mahout.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@mahout.apache.org Delivered-To: mailing list commits@mahout.apache.org Received: (qmail 60447 invoked by uid 99); 23 Jul 2010 13:10:01 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 23 Jul 2010 13:10:01 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 23 Jul 2010 13:09:55 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id F316723889DA; Fri, 23 Jul 2010 13:09:00 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: svn commit: r967084 - in /mahout/trunk: core/src/main/java/org/apache/mahout/common/distance/ core/src/test/java/org/apache/mahout/common/distance/ math/src/main/java/org/apache/mahout/math/ math/src/test/java/org/apache/mahout/math/ Date: Fri, 23 Jul 2010 13:09:00 -0000 To: commits@mahout.apache.org From: drew@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20100723130900.F316723889DA@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: drew Date: Fri Jul 23 13:09:00 2010 New Revision: 967084 URL: http://svn.apache.org/viewvc?rev=967084&view=rev Log: MAHOUT-446: Mahalanobis Distance + SVD contributed by Nicolas Maillot Added: mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/MahalanobisDistanceMeasure.java mahout/trunk/core/src/test/java/org/apache/mahout/common/distance/TestMahalanobisDistanceMeasure.java mahout/trunk/math/src/main/java/org/apache/mahout/math/Algebra.java mahout/trunk/math/src/main/java/org/apache/mahout/math/SingularValueDecomposition.java mahout/trunk/math/src/test/java/org/apache/mahout/math/TestSingularValueDecomposition.java Added: mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/MahalanobisDistanceMeasure.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/MahalanobisDistanceMeasure.java?rev=967084&view=auto ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/MahalanobisDistanceMeasure.java (added) +++ mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/MahalanobisDistanceMeasure.java Fri Jul 23 13:09:00 2010 @@ -0,0 +1,208 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.common.distance; + +import java.io.DataInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.mahout.common.parameters.ClassParameter; +import org.apache.mahout.common.parameters.Parameter; +import org.apache.mahout.common.parameters.PathParameter; +import org.apache.mahout.math.CardinalityException; +import org.apache.mahout.math.DenseMatrix; +import org.apache.mahout.math.DenseVector; +import org.apache.mahout.math.Vector; +import org.apache.mahout.math.Matrix; +import org.apache.mahout.math.Algebra; +import org.apache.mahout.math.SingularValueDecomposition; +import org.apache.mahout.math.VectorWritable; +import org.apache.mahout.math.MatrixWritable; + + +//See http://en.wikipedia.org/wiki/Mahalanobis_distance for details +public class MahalanobisDistanceMeasure implements DistanceMeasure { + + private Matrix inverseCovarianceMatrix=null; + private Vector meanVector=null; + + private ClassParameter vectorClass; + private ClassParameter matrixClass; + private List> parameters; + private Parameter inverseCovarianceFile; + private Parameter meanVectorFile; + + /*public MahalanobisDistanceMeasure(Vector meanVector,Matrix inputMatrix, boolean inversionNeeded) + { + this.meanVector=meanVector; + if(inversionNeeded) + setCovarianceMatrix(inputMatrix); + else + setInverseCovarianceMatrix(inputMatrix); + }*/ + + @Override + public void configure(Configuration jobConf) { + if (parameters == null) { + ParameteredGeneralizations.configureParameters(this, jobConf); + } + try { + if (inverseCovarianceFile.get() != null) { + FileSystem fs = FileSystem.get(inverseCovarianceFile.get().toUri(), jobConf); + MatrixWritable inverseCovarianceMatrix = (MatrixWritable) vectorClass.get().newInstance(); + if (!fs.exists(inverseCovarianceFile.get())) { + throw new FileNotFoundException(inverseCovarianceFile.get().toString()); + } + DataInputStream in = fs.open(inverseCovarianceFile.get()); + try { + inverseCovarianceMatrix.readFields(in); + } finally { + in.close(); + } + this.inverseCovarianceMatrix = inverseCovarianceMatrix.get(); + } + + if (meanVectorFile.get() != null) { + FileSystem fs = FileSystem.get(meanVectorFile.get().toUri(), jobConf); + VectorWritable meanVector = (VectorWritable) vectorClass.get().newInstance(); + if (!fs.exists(meanVectorFile.get())) { + throw new FileNotFoundException(meanVectorFile.get().toString()); + } + DataInputStream in = fs.open(meanVectorFile.get()); + try { + meanVector.readFields(in); + } finally { + in.close(); + } + this.meanVector = meanVector.get(); + } + + } catch (IOException e) { + throw new IllegalStateException(e); + } catch (IllegalAccessException e) { + throw new IllegalStateException(e); + } catch (InstantiationException e) { + throw new IllegalStateException(e); + } + } + + @Override + public Collection> getParameters() { + return parameters; + } + + @Override + public void createParameters(String prefix, Configuration jobConf) { + parameters = new ArrayList>(); + inverseCovarianceFile = new PathParameter(prefix, "inverseCovarianceFile", jobConf, null, + "Path on DFS to a file containing the inverse covariance matrix."); + parameters.add(inverseCovarianceFile); + + matrixClass = new ClassParameter(prefix, "maxtrixClass", jobConf, DenseMatrix.class, + "Class file specified in parameter inverseCovarianceFile has been serialized with."); + parameters.add(matrixClass); + + meanVectorFile=new PathParameter(prefix, "meanVectorFile", jobConf, null, + "Path on DFS to a file containing the mean Vector."); + parameters.add(meanVectorFile); + + vectorClass = new ClassParameter(prefix, "vectorClass", jobConf, DenseVector.class, + "Class file specified in parameter meanVectorFile has been serialized with."); + parameters.add(vectorClass); + } + + /** + * @param v + * @return Mahalanobis distance of a multivariate vector + */ + public double distance(Vector v) + { + if (meanVector==null || inverseCovarianceMatrix==null) { + throw new IllegalArgumentException("meanVector or inverseCovarianceMatrix not initialized"); + } + + return Math.sqrt(v.minus(meanVector).dot(Algebra.mult(inverseCovarianceMatrix, v.minus(meanVector)))); + } + + @Override + public double distance(Vector v1, Vector v2) { + if (v1.size() != v2.size()) { + throw new CardinalityException(v1.size(), v2.size()); + } + + if (meanVector==null || inverseCovarianceMatrix==null) { + throw new IllegalArgumentException("meanVector or inverseCovarianceMatrix not initialized"); + } + + return Math.sqrt(v1.minus(v2).dot(Algebra.mult(inverseCovarianceMatrix, v1.minus(v2)))); + } + + @Override + public double distance(double centroidLengthSquare, Vector centroid, Vector v) { + return distance(centroid, v); // TODO + } + + public void setInverseCovarianceMatrix(Matrix inverseCovarianceMatrix) { + this.inverseCovarianceMatrix = inverseCovarianceMatrix; + } + + + /** + * Computes the inverse covariance from the input covariance matrix given in input. + * @param m + * A covariance matrix. + * @throws IllegalArgumentException + * if eigen values equal to 0 found. + */ + public void setCovarianceMatrix(Matrix m) { + if (m.numRows() != m.numCols()) { + throw new CardinalityException(m.numRows(), m.numCols()); + } + //see http://www.mlahanas.de/Math/svd.htm for details, which specifically details the case of covariance matrix inversion + //Complexity: O(min(nm2,mn2)) + SingularValueDecomposition svd=new SingularValueDecomposition(m); + Matrix SInv=svd.getS(); + //Inverse Diagonal Elems + for(int i=0;i0.0) + SInv.set(i,i,1/diagElem); + else + throw new IllegalArgumentException("Eigen Value equals to 0 found."); + } + inverseCovarianceMatrix=svd.getU().times(SInv.times(svd.getU().transpose())); + } + + public Matrix getInverseCovarianceMatrix() { + return inverseCovarianceMatrix; + } + + public void setMeanVector(Vector meanVector) { + this.meanVector = meanVector; + } + + public Vector getMeanVector() { + return meanVector; + } +} Added: mahout/trunk/core/src/test/java/org/apache/mahout/common/distance/TestMahalanobisDistanceMeasure.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/common/distance/TestMahalanobisDistanceMeasure.java?rev=967084&view=auto ============================================================================== --- mahout/trunk/core/src/test/java/org/apache/mahout/common/distance/TestMahalanobisDistanceMeasure.java (added) +++ mahout/trunk/core/src/test/java/org/apache/mahout/common/distance/TestMahalanobisDistanceMeasure.java Fri Jul 23 13:09:00 2010 @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.common.distance; + +import org.apache.mahout.common.MahoutTestCase; +import org.apache.mahout.math.Matrix; +import org.apache.mahout.math.DenseMatrix; +import org.apache.mahout.math.DenseVector; + + +//To launch this test only : mvn test -Dtest=org.apache.mahout.common.distance.TestMahalanobisDistanceMeasure +public class TestMahalanobisDistanceMeasure extends MahoutTestCase { + + public void testMeasure() { + double tolerance=10E-5; + double[][] invCovValues = { { 2.2, 0.4 }, { 0.4, 2.8 } }; + double[] meanValues = { -2.3, -0.9 }; + DenseMatrix invCov = new DenseMatrix(invCovValues); + DenseVector meanVector = new DenseVector(meanValues); + MahalanobisDistanceMeasure distanceMeasure = new MahalanobisDistanceMeasure(); + distanceMeasure.setInverseCovarianceMatrix(invCov); + distanceMeasure.setMeanVector(meanVector); + double[] v1 = { -1.9, -2.3 }; + double[] v2 = { -2.9, -1.3 }; + double dist=distanceMeasure.distance(new DenseVector(v1),new DenseVector(v2)); + assertEquals(2.0493901531919194,dist,tolerance); + //now set the covariance Matrix + distanceMeasure.setCovarianceMatrix(invCov); + //check the inverse covariance times covariance equals identity + Matrix identity=distanceMeasure.getInverseCovarianceMatrix().times(invCov); + assertEquals(1,identity.get(0,0),tolerance); + assertEquals(1,identity.get(1,1),tolerance); + assertEquals(0,identity.get(1,0),tolerance); + assertEquals(0,identity.get(0,1),tolerance); + } + +} Added: mahout/trunk/math/src/main/java/org/apache/mahout/math/Algebra.java URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/Algebra.java?rev=967084&view=auto ============================================================================== --- mahout/trunk/math/src/main/java/org/apache/mahout/math/Algebra.java (added) +++ mahout/trunk/math/src/main/java/org/apache/mahout/math/Algebra.java Fri Jul 23 13:09:00 2010 @@ -0,0 +1,70 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math; +import org.apache.mahout.math.Matrix; +import org.apache.mahout.math.Vector; + +public class Algebra { + + public static Vector mult(Matrix m, Vector v) { + if (m.numRows() != v.size()) { + throw new CardinalityException(m.numRows(), v.size()); + } + // Use a Dense Vector for the moment, + Vector result = new DenseVector(m.numRows()); + + for (int i = 0; i < m.numRows(); i++) { + result.set(i, m.getRow(i).dot(v)); + } + + return result; + } + + /** Returns sqrt(a^2 + b^2) without under/overflow. */ + public static double hypot(double a, double b) { + double r; + if (Math.abs(a) > Math.abs(b)) { + r = b / a; + r = Math.abs(a) * Math.sqrt(1 + r * r); + } else if (b != 0) { + r = a / b; + r = Math.abs(b) * Math.sqrt(1 + r * r); + } else { + r = 0.0; + } + return r; + } + + /** + * Compute Maximum Absolute Row Sum Norm of input Matrix m + * http://mathworld.wolfram.com/MaximumAbsoluteRowSumNorm.html + */ + public static double getNorm(Matrix m) { + double max=0.0; + Vector cv; + for(int i=0;imax) max=sum; + } + return max; + } + +} Added: mahout/trunk/math/src/main/java/org/apache/mahout/math/SingularValueDecomposition.java URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/SingularValueDecomposition.java?rev=967084&view=auto ============================================================================== --- mahout/trunk/math/src/main/java/org/apache/mahout/math/SingularValueDecomposition.java (added) +++ mahout/trunk/math/src/main/java/org/apache/mahout/math/SingularValueDecomposition.java Fri Jul 23 13:09:00 2010 @@ -0,0 +1,657 @@ +/* +Copyright � 1999 CERN - European Organization for Nuclear Research. +Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose +is hereby granted without fee, provided that the above copyright notice appear in all copies and +that both that copyright notice and this permission notice appear in supporting documentation. +CERN makes no representations about the suitability of this software for any purpose. +It is provided "as is" without expressed or implied warranty. + */ +package org.apache.mahout.math; + +//import org.apache.mahout.math.matrix.DoubleFactory2D; +//import org.apache.mahout.math.matrix.DoubleMatrix2D; +import org.apache.mahout.math.Matrix; +import org.apache.mahout.math.Algebra; + + +public class SingularValueDecomposition implements java.io.Serializable { + + /** Arrays for internal storage of U and V. */ + private final double[][] U; + private final double[][] V; + + /** Array for internal storage of singular values. */ + private final double[] s; + + /** Row and column dimensions. */ + private final int m; + private final int n; + + /**To handle the case where numRows() < numCols() and to use the fact that SVD(A')=VSU'=> SVD(A')'=SVD(A)**/ + private boolean transpositionNeeded=false; + + /** + * Constructs and returns a new singular value decomposition object; The + * decomposed matrices can be retrieved via instance methods of the returned + * decomposition object. + * + * @param Arg + * A rectangular matrix. + * @return A decomposition object to access U, S and + * V. + * @throws IllegalArgumentException + * if A.rows() < A.columns(). + */ + public SingularValueDecomposition(Matrix Arg) { + if (Arg.numRows() < Arg.numCols()) { + transpositionNeeded=true; + } + + // Derived from LINPACK code. + // Initialize. + double[][] A; + if(!transpositionNeeded) + { + m = Arg.numRows(); + n = Arg.numCols(); + A = new double[m][n]; + for(int i=0;i= 0; k--) { + if (s[k] != 0.0) { + for (int j = k + 1; j < nu; j++) { + double t = 0; + for (int i = k; i < m; i++) { + t += U[i][k] * U[i][j]; + } + t = -t / U[k][k]; + for (int i = k; i < m; i++) { + U[i][j] += t * U[i][k]; + } + } + for (int i = k; i < m; i++) { + U[i][k] = -U[i][k]; + } + U[k][k] = 1.0 + U[k][k]; + for (int i = 0; i < k - 1; i++) { + U[i][k] = 0.0; + } + } else { + for (int i = 0; i < m; i++) { + U[i][k] = 0.0; + } + U[k][k] = 1.0; + } + } + } + + // If required, generate V. + + if (wantv) { + for (int k = n - 1; k >= 0; k--) { + if ((k < nrt) && (e[k] != 0.0)) { + for (int j = k + 1; j < nu; j++) { + double t = 0; + for (int i = k + 1; i < n; i++) { + t += V[i][k] * V[i][j]; + } + t = -t / V[k + 1][k]; + for (int i = k + 1; i < n; i++) { + V[i][j] += t * V[i][k]; + } + } + } + for (int i = 0; i < n; i++) { + V[i][k] = 0.0; + } + V[k][k] = 1.0; + } + } + + // Main iteration loop for the singular values. + + int pp = p - 1; + int iter = 0; + double eps = Math.pow(2.0, -52.0); + while (p > 0) { + int k; + + // Here is where a test for too many iterations would go. + + // This section of the program inspects for + // negligible elements in the s and e arrays. On + // completion the variables kase and k are set as follows. + + // kase = 1 if s(p) and e[k-1] are negligible and k

= -1; k--) { + if (k == -1) { + break; + } + if (Math.abs(e[k]) <= eps * (Math.abs(s[k]) + Math.abs(s[k + 1]))) { + e[k] = 0.0; + break; + } + } + int kase; + if (k == p - 2) { + kase = 4; + } else { + int ks; + for (ks = p - 1; ks >= k; ks--) { + if (ks == k) { + break; + } + double t = (ks == p ? 0.0 : Math.abs(e[ks])) + + (ks == k + 1 ? 0.0 : Math.abs(e[ks - 1])); + if (Math.abs(s[ks]) <= eps * t) { + s[ks] = 0.0; + break; + } + } + if (ks == k) { + kase = 3; + } else if (ks == p - 1) { + kase = 1; + } else { + kase = 2; + k = ks; + } + } + k++; + + // Perform the task indicated by kase. + + switch (kase) { + + // Deflate negligible s(p). + + case 1: { + double f = e[p - 2]; + e[p - 2] = 0.0; + for (int j = p - 2; j >= k; j--) { + double t = Algebra.hypot(s[j], f); + double cs = s[j] / t; + double sn = f / t; + s[j] = t; + if (j != k) { + f = -sn * e[j - 1]; + e[j - 1] = cs * e[j - 1]; + } + if (wantv) { + for (int i = 0; i < n; i++) { + t = cs * V[i][j] + sn * V[i][p - 1]; + V[i][p - 1] = -sn * V[i][j] + cs * V[i][p - 1]; + V[i][j] = t; + } + } + } + } + break; + + // Split at negligible s(k). + + case 2: { + double f = e[k - 1]; + e[k - 1] = 0.0; + for (int j = k; j < p; j++) { + double t = Algebra.hypot(s[j], f); + double cs = s[j] / t; + double sn = f / t; + s[j] = t; + f = -sn * e[j]; + e[j] = cs * e[j]; + if (wantu) { + for (int i = 0; i < m; i++) { + t = cs * U[i][j] + sn * U[i][k - 1]; + U[i][k - 1] = -sn * U[i][j] + cs * U[i][k - 1]; + U[i][j] = t; + } + } + } + } + break; + + // Perform one qr step. + + case 3: { + + // Calculate the shift. + + double scale = Math.max(Math.max(Math.max(Math.max( + Math.abs(s[p - 1]), Math.abs(s[p - 2])), Math.abs(e[p - 2])), + Math.abs(s[k])), Math.abs(e[k])); + double sp = s[p - 1] / scale; + double spm1 = s[p - 2] / scale; + double epm1 = e[p - 2] / scale; + double sk = s[k] / scale; + double ek = e[k] / scale; + double b = ((spm1 + sp) * (spm1 - sp) + epm1 * epm1) / 2.0; + double c = (sp * epm1) * (sp * epm1); + double shift = 0.0; + if ((b != 0.0) || (c != 0.0)) { + shift = Math.sqrt(b * b + c); + if (b < 0.0) { + shift = -shift; + } + shift = c / (b + shift); + } + double f = (sk + sp) * (sk - sp) + shift; + double g = sk * ek; + + // Chase zeros. + + for (int j = k; j < p - 1; j++) { + double t = Algebra.hypot(f, g); + double cs = f / t; + double sn = g / t; + if (j != k) { + e[j - 1] = t; + } + f = cs * s[j] + sn * e[j]; + e[j] = cs * e[j] - sn * s[j]; + g = sn * s[j + 1]; + s[j + 1] = cs * s[j + 1]; + if (wantv) { + for (int i = 0; i < n; i++) { + t = cs * V[i][j] + sn * V[i][j + 1]; + V[i][j + 1] = -sn * V[i][j] + cs * V[i][j + 1]; + V[i][j] = t; + } + } + t = Algebra.hypot(f, g); + cs = f / t; + sn = g / t; + s[j] = t; + f = cs * e[j] + sn * s[j + 1]; + s[j + 1] = -sn * e[j] + cs * s[j + 1]; + g = sn * e[j + 1]; + e[j + 1] = cs * e[j + 1]; + if (wantu && (j < m - 1)) { + for (int i = 0; i < m; i++) { + t = cs * U[i][j] + sn * U[i][j + 1]; + U[i][j + 1] = -sn * U[i][j] + cs * U[i][j + 1]; + U[i][j] = t; + } + } + } + e[p - 2] = f; + iter += 1; + } + break; + + // Convergence. + + case 4: { + + // Make the singular values positive. + + if (s[k] <= 0.0) { + s[k] = (s[k] < 0.0 ? -s[k] : 0.0); + if (wantv) { + for (int i = 0; i <= pp; i++) { + V[i][k] = -V[i][k]; + } + } + } + + // Order the singular values. + + while (k < pp) { + if (s[k] >= s[k + 1]) { + break; + } + double t = s[k]; + s[k] = s[k + 1]; + s[k + 1] = t; + if (wantv && (k < n - 1)) { + for (int i = 0; i < n; i++) { + t = V[i][k + 1]; + V[i][k + 1] = V[i][k]; + V[i][k] = t; + } + } + if (wantu && (k < m - 1)) { + for (int i = 0; i < m; i++) { + t = U[i][k + 1]; + U[i][k + 1] = U[i][k]; + U[i][k] = t; + } + } + k++; + } + iter = 0; + p--; + } + break; + } + } + } + + /** + * Returns the two norm condition number, which is max(S) / min(S). + */ + public double cond() { + return s[0] / s[Math.min(m, n) - 1]; + } + + /** + * Returns the diagonal matrix of singular values. + * + * @return S + */ + public Matrix getS() { + double[][] S = new double[n][n]; + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + S[i][j] = 0.0; + } + S[i][i] = this.s[i]; + } + + return new DenseMatrix(S); + } + + /** + * Returns the diagonal of S, which is a one-dimensional array of + * singular values + * + * @return diagonal of S. + */ + public double[] getSingularValues() { + return s; + } + + /** + * Returns the left singular vectors U. + * + * @return U + */ + public Matrix getU() { + if (!transpositionNeeded) { + int numCols = Math.min(m + 1, n); + Matrix r = new DenseMatrix(m, numCols); + for (int i = 0; i < m; i++) + for (int j = 0; j < numCols; j++) + r.set(i, j, U[i][j]); + + return r; + } + else { //case numRows() < numCols() + return new DenseMatrix(V); + } + } + + /** + * Returns the right singular vectors V. + * + * @return V + */ + public Matrix getV() { + if (!transpositionNeeded) { + return new DenseMatrix(V); + } + else { //case numRows() < numCols() + int numCols = Math.min(m + 1, n); + Matrix r = new DenseMatrix(m, numCols); + for (int i = 0; i < m; i++) + for (int j = 0; j < numCols; j++) + r.set(i, j, U[i][j]); + + return r; + } + } + + /** Returns the two norm, which is max(S). */ + public double norm2() { + return s[0]; + } + + /** + * Returns the effective numerical matrix rank, which is the number of + * nonnegligible singular values. + */ + public int rank() { + double eps = Math.pow(2.0, -52.0); + double tol = Math.max(m, n) * s[0] * eps; + int r = 0; + for (double value : s) { + if (value > tol) { + r++; + } + } + return r; + } + + /** + * + * @return Returns the n × n covariance matrix. + * The covariance matrix is V × J × Vt where J is the diagonal matrix of the inverse of the squares of the singular values. + * @parameter minSingularValue + * minSingularValue - value below which singular values are ignored (a 0 or negative value implies all singular value will be used) + */ + Matrix getCovariance(double minSingularValue) + { + DenseMatrix J=new DenseMatrix(s.length,s.length); + Matrix VMat=new DenseMatrix(this.V); + for(int i=0;i=minSingularValue)?1/(s[i]*s[i]):0.0); + return VMat.times(J).times(VMat.transpose()); + } + + /** + * Returns a String with (propertyName, propertyValue) pairs. Useful for + * debugging or to quickly get the rough picture. For example, + * + *

+   * rank          : 3
+   * trace         : 0
+   * 
+ */ + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append("---------------------------------------------------------------------\n"); + buf.append("SingularValueDecomposition(A) --> cond(A), rank(A), norm2(A), U, S, V\n"); + buf.append("---------------------------------------------------------------------\n"); + + buf.append("cond = "); + String unknown = "Illegal operation or error: "; + try { + buf.append(String.valueOf(this.cond())); + } catch (IllegalArgumentException exc) { + buf.append(unknown).append(exc.getMessage()); + } + + buf.append("\nrank = "); + try { + buf.append(String.valueOf(this.rank())); + } catch (IllegalArgumentException exc) { + buf.append(unknown).append(exc.getMessage()); + } + + buf.append("\nnorm2 = "); + try { + buf.append(String.valueOf(this.norm2())); + } catch (IllegalArgumentException exc) { + buf.append(unknown).append(exc.getMessage()); + } + + buf.append("\n\nU = "); + try { + buf.append(String.valueOf(this.getU())); + } catch (IllegalArgumentException exc) { + buf.append(unknown).append(exc.getMessage()); + } + + buf.append("\n\nS = "); + try { + buf.append(String.valueOf(this.getS())); + } catch (IllegalArgumentException exc) { + buf.append(unknown).append(exc.getMessage()); + } + + buf.append("\n\nV = "); + try { + buf.append(String.valueOf(this.getV())); + } catch (IllegalArgumentException exc) { + buf.append(unknown).append(exc.getMessage()); + } + + return buf.toString(); + } +} Added: mahout/trunk/math/src/test/java/org/apache/mahout/math/TestSingularValueDecomposition.java URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/TestSingularValueDecomposition.java?rev=967084&view=auto ============================================================================== --- mahout/trunk/math/src/test/java/org/apache/mahout/math/TestSingularValueDecomposition.java (added) +++ mahout/trunk/math/src/test/java/org/apache/mahout/math/TestSingularValueDecomposition.java Fri Jul 23 13:09:00 2010 @@ -0,0 +1,280 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math; + +import java.util.Random; + +import org.apache.mahout.math.Algebra; +import org.apache.mahout.math.DenseMatrix; + + +//To launch this test only : mvn test -Dtest=org.apache.mahout.math.TestSingularValueDecomposition +public class TestSingularValueDecomposition extends MahoutTestCase { + + private double[][] testSquare = { + { 24.0 / 25.0, 43.0 / 25.0 }, + { 57.0 / 25.0, 24.0 / 25.0 } + }; + + private double[][] testNonSquare = { + { -540.0 / 625.0, 963.0 / 625.0, -216.0 / 625.0 }, + { -1730.0 / 625.0, -744.0 / 625.0, 1008.0 / 625.0 }, + { -720.0 / 625.0, 1284.0 / 625.0, -288.0 / 625.0 }, + { -360.0 / 625.0, 192.0 / 625.0, 1756.0 / 625.0 }, + }; + + private static final double normTolerance = 10e-14; + + + public void testMoreRows() { + final double[] singularValues = { 123.456, 2.3, 1.001, 0.999 }; + final int rows = singularValues.length + 2; + final int columns = singularValues.length; + Random r = new Random(15338437322523l); + SingularValueDecomposition svd = + new SingularValueDecomposition(createTestMatrix(r, rows, columns, singularValues)); + double[] computedSV = svd.getSingularValues(); + assertEquals(singularValues.length, computedSV.length); + for (int i = 0; i < singularValues.length; ++i) { + assertEquals(singularValues[i], computedSV[i], 1.0e-10); + } + } + + + public void testMoreColumns() { + final double[] singularValues = { 123.456, 2.3, 1.001, 0.999 }; + final int rows = singularValues.length; + final int columns = singularValues.length + 2; + Random r = new Random(732763225836210l); + SingularValueDecomposition svd = + new SingularValueDecomposition(createTestMatrix(r, rows, columns, singularValues)); + double[] computedSV = svd.getSingularValues(); + assertEquals(singularValues.length, computedSV.length); + for (int i = 0; i < singularValues.length; ++i) { + assertEquals(singularValues[i], computedSV[i], 1.0e-10); + } + } + + /** test dimensions */ + public void testDimensions() { + Matrix matrix = new DenseMatrix(testSquare); + final int m = matrix.numRows(); + final int n = matrix.numCols(); + SingularValueDecomposition svd = new SingularValueDecomposition(matrix); + assertEquals(m, svd.getU().numRows()); + assertEquals(m, svd.getU().numCols()); + assertEquals(m, svd.getS().numCols()); + assertEquals(n, svd.getS().numCols()); + assertEquals(n, svd.getV().numRows()); + assertEquals(n, svd.getV().numCols()); + + } + + /** Test based on a dimension 4 Hadamard matrix. */ + // getCovariance to be implemented + public void testHadamard() { + Matrix matrix = new DenseMatrix(new double[][] { + {15.0 / 2.0, 5.0 / 2.0, 9.0 / 2.0, 3.0 / 2.0 }, + { 5.0 / 2.0, 15.0 / 2.0, 3.0 / 2.0, 9.0 / 2.0 }, + { 9.0 / 2.0, 3.0 / 2.0, 15.0 / 2.0, 5.0 / 2.0 }, + { 3.0 / 2.0, 9.0 / 2.0, 5.0 / 2.0, 15.0 / 2.0 } + }); + SingularValueDecomposition svd = new SingularValueDecomposition(matrix); + assertEquals(16.0, svd.getSingularValues()[0], 1.0e-14); + assertEquals( 8.0, svd.getSingularValues()[1], 1.0e-14); + assertEquals( 4.0, svd.getSingularValues()[2], 1.0e-14); + assertEquals( 2.0, svd.getSingularValues()[3], 1.0e-14); + + Matrix fullCovariance = new DenseMatrix(new double[][] { + { 85.0 / 1024, -51.0 / 1024, -75.0 / 1024, 45.0 / 1024 }, + { -51.0 / 1024, 85.0 / 1024, 45.0 / 1024, -75.0 / 1024 }, + { -75.0 / 1024, 45.0 / 1024, 85.0 / 1024, -51.0 / 1024 }, + { 45.0 / 1024, -75.0 / 1024, -51.0 / 1024, 85.0 / 1024 } + }); + + assertEquals(0.0,Algebra.getNorm(fullCovariance.minus(svd.getCovariance(0.0))),1.0e-14); + + + Matrix halfCovariance = new DenseMatrix(new double[][] { + { 5.0 / 1024, -3.0 / 1024, 5.0 / 1024, -3.0 / 1024 }, + { -3.0 / 1024, 5.0 / 1024, -3.0 / 1024, 5.0 / 1024 }, + { 5.0 / 1024, -3.0 / 1024, 5.0 / 1024, -3.0 / 1024 }, + { -3.0 / 1024, 5.0 / 1024, -3.0 / 1024, 5.0 / 1024 } + }); + assertEquals(0.0,Algebra.getNorm(halfCovariance.minus(svd.getCovariance(6.0))),1.0e-14); + + } + + /** test A = USVt */ + public void testAEqualUSVt() { + checkAEqualUSVt(new DenseMatrix(testSquare)); + checkAEqualUSVt(new DenseMatrix(testNonSquare)); + checkAEqualUSVt(new DenseMatrix(testNonSquare).transpose()); + } + + public void checkAEqualUSVt(final Matrix matrix) { + SingularValueDecomposition svd = new SingularValueDecomposition(matrix); + Matrix u = svd.getU(); + Matrix s = svd.getS(); + Matrix v = svd.getV(); + + //pad with 0, to be able to check some properties if some singular values are equal to 0 + if(s.numRows()