mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jman...@apache.org
Subject svn commit: r1088831 - in /mahout/trunk: core/src/main/java/org/apache/mahout/math/hadoop/decomposer/ core/src/test/java/org/apache/mahout/math/hadoop/decomposer/ math/src/main/java/org/apache/mahout/math/decomposer/lanczos/ math/src/main/java/org/apac...
Date Tue, 05 Apr 2011 00:20:27 GMT
Author: jmannix
Date: Tue Apr  5 00:20:27 2011
New Revision: 1088831

URL: http://svn.apache.org/viewvc?rev=1088831&view=rev
Log:
Fixes MAHOUT-369, adds new tests for Lanczos SVD consistency, provides a few methods to convert
old COLT vectors to Mahout vectors.

Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVector.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java
    mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolverCLI.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/lanczos/LanczosSolver.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleMatrix1D.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/DenseDoubleMatrix1D.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/SparseDoubleMatrix1D.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/EigenvalueDecomposition.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/Property.java
    mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/SolverTest.java
    mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/lanczos/TestLanczosSolver.java
    mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java?rev=1088831&r1=1088830&r2=1088831&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java
Tue Apr  5 00:20:27 2011
@@ -17,11 +17,6 @@
 
 package org.apache.mahout.math.hadoop.decomposer;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -34,6 +29,7 @@ import org.apache.mahout.common.Abstract
 import org.apache.mahout.math.DenseMatrix;
 import org.apache.mahout.math.DenseVector;
 import org.apache.mahout.math.Matrix;
+import org.apache.mahout.math.NamedVector;
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorIterable;
 import org.apache.mahout.math.VectorWritable;
@@ -42,6 +38,11 @@ import org.apache.mahout.math.hadoop.Dis
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
 public class DistributedLanczosSolver extends LanczosSolver implements Tool {
 
   public static final String RAW_EIGENVECTORS = "rawEigenvectors";
@@ -190,14 +191,17 @@ public class DistributedLanczosSolver ex
    * @param outputPath The path (relative to the current Configuration's FileSystem) to save
the output to.
    */
   public void serializeOutput(Matrix eigenVectors, List<Double> eigenValues, Path outputPath)
throws IOException {
-    log.info("Persisting {} eigenVectors and eigenValues to: {}", eigenVectors.numRows(),
outputPath);
+    int numEigenVectors = eigenVectors.numRows();
+    log.info("Persisting {} eigenVectors and eigenValues to: {}", numEigenVectors, outputPath);

     Configuration conf = getConf() != null ? getConf() : new Configuration();
     FileSystem fs = FileSystem.get(conf);
     SequenceFile.Writer seqWriter =
         new SequenceFile.Writer(fs, conf, outputPath, IntWritable.class, VectorWritable.class);
     IntWritable iw = new IntWritable();
-    for (int i = 0; i < eigenVectors.numRows() - 1; i++) {
-      Vector v = eigenVectors.getRow(i);
+    for (int i = 0; i < numEigenVectors; i++) {
+      // Persist eigenvectors sorted by eigenvalues in descending order
+      NamedVector v = new NamedVector(eigenVectors.getRow(numEigenVectors-1-i),
+          "eigenVector" + i + ", eigenvalue = " + eigenValues.get(numEigenVectors-1-i));
       Writable vw = new VectorWritable(v);
       iw.set(i);
       seqWriter.append(iw, vw);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVector.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVector.java?rev=1088831&r1=1088830&r2=1088831&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVector.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVector.java
Tue Apr  5 00:20:27 2011
@@ -18,6 +18,7 @@
 package org.apache.mahout.math.hadoop.decomposer;
 
 import org.apache.mahout.math.DenseVector;
+import org.apache.mahout.math.Vector;
 
 import java.util.regex.Pattern;
 
@@ -31,8 +32,8 @@ public class EigenVector extends DenseVe
 
   private final String name;
 
-  public EigenVector(DenseVector v, double eigenValue, double cosAngleError, int order) {
-    super(v, false);
+  public EigenVector(Vector v, double eigenValue, double cosAngleError, int order) {
+    super(v instanceof DenseVector ? (DenseVector) v : new DenseVector(v), false);
     name = "e|" + order + "| = |" + eigenValue + "|, err = " + cosAngleError;
   }
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java?rev=1088831&r1=1088830&r2=1088831&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java
Tue Apr  5 00:20:27 2011
@@ -17,15 +17,6 @@
 
 package org.apache.mahout.math.hadoop.decomposer;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -35,7 +26,6 @@ import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.util.ToolRunner;
 import org.apache.mahout.common.AbstractJob;
 import org.apache.mahout.common.commandline.DefaultOptionCreator;
-import org.apache.mahout.math.DenseVector;
 import org.apache.mahout.math.MatrixSlice;
 import org.apache.mahout.math.OrthonormalityVerifier;
 import org.apache.mahout.math.SparseRowMatrix;
@@ -49,6 +39,15 @@ import org.apache.mahout.math.hadoop.Dis
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
 /**
  * <p>Class for taking the output of an eigendecomposition (specified as a Path location),
and verifies correctness,
  * in terms of the following: if you have a vector e, and a matrix m, then let e' = m.timesSquared(v);
the error
@@ -193,7 +192,7 @@ public class EigenVerificationJob extend
     for (Map.Entry<MatrixSlice, EigenStatus> pruneSlice : prunedEigenMeta) {
       MatrixSlice s = pruneSlice.getKey();
       EigenStatus meta = pruneSlice.getValue();
-      EigenVector ev = new EigenVector((DenseVector) s.vector(),
+      EigenVector ev = new EigenVector(s.vector(),
                                        meta.getEigenValue(),
                                        Math.abs(1 - meta.getCosAngle()),
                                        s.index());
@@ -226,7 +225,8 @@ public class EigenVerificationJob extend
 
     Collections.sort(prunedEigenMeta, new Comparator<Map.Entry<MatrixSlice, EigenStatus>>()
{
       @Override
-      public int compare(Map.Entry<MatrixSlice, EigenStatus> e1, Map.Entry<MatrixSlice,
EigenStatus> e2) {
+      public int compare(Map.Entry<MatrixSlice, EigenStatus> e1,
+          Map.Entry<MatrixSlice, EigenStatus> e2) {
         return e1.getKey().index() - e2.getKey().index();
       }
     });

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolverCLI.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolverCLI.java?rev=1088831&r1=1088830&r2=1088831&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolverCLI.java
(original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolverCLI.java
Tue Apr  5 00:20:27 2011
@@ -59,7 +59,7 @@ public final class TestDistributedLanczo
       eigenVectors.assignRow(i, v);
       i++;
     }
-    assertEquals("number of eigenvectors", 9, i);
+    assertEquals("number of eigenvectors", 10, i);
   }
 
   @Test

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/lanczos/LanczosSolver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/lanczos/LanczosSolver.java?rev=1088831&r1=1088830&r2=1088831&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/lanczos/LanczosSolver.java
(original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/lanczos/LanczosSolver.java
Tue Apr  5 00:20:27 2011
@@ -18,19 +18,10 @@
 package org.apache.mahout.math.decomposer.lanczos;
 
 
-import java.util.EnumMap;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.mahout.math.DenseVector;
-import org.apache.mahout.math.Matrix;
-import org.apache.mahout.math.MatrixSlice;
-import org.apache.mahout.math.SparseRowMatrix;
-import org.apache.mahout.math.VectorIterable;
+import org.apache.mahout.math.*;
+import org.apache.mahout.math.function.DoubleFunction;
 import org.apache.mahout.math.function.Functions;
 import org.apache.mahout.math.function.PlusMult;
-import org.apache.mahout.math.function.DoubleFunction;
-import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.matrix.DoubleMatrix1D;
 import org.apache.mahout.math.matrix.DoubleMatrix2D;
 import org.apache.mahout.math.matrix.impl.DenseDoubleMatrix2D;
@@ -38,6 +29,10 @@ import org.apache.mahout.math.matrix.lin
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.util.EnumMap;
+import java.util.List;
+import java.util.Map;
+
 /**
  * <p>Simple implementation of the <a href="http://en.wikipedia.org/wiki/Lanczos_algorithm">Lanczos
algorithm</a> for
  * finding eigenvalues of a symmetric matrix, applied to non-symmetric matrices by applying
Matrix.timesSquared(vector)
@@ -152,7 +147,7 @@ public class LanczosSolver {
     endTime(TimingSection.TRIDIAG_DECOMP);
     startTime(TimingSection.FINAL_EIGEN_CREATE);
 
-    for (int i = 0; i < basis.numRows() - 1; i++) {
+    for (int i = 0; i < basis.numRows(); i++) {
       Vector realEigen = new DenseVector(corpus.numCols());
       // the eigenvectors live as columns of V, in reverse order.  Weird but true.
       DoubleMatrix1D ejCol = eigenVects.viewColumn(basis.numRows() - i - 1);
@@ -162,8 +157,9 @@ public class LanczosSolver {
       }
       realEigen = realEigen.normalize();
       eigenVectors.assignRow(i, realEigen);
-      log.info("Eigenvector {} found with eigenvalue {}", i, eigenVals.get(i));
-      eigenValues.add(eigenVals.get(i));
+      double e = Math.sqrt(eigenVals.get(i) * scaleFactor);
+      log.info("Eigenvector {} found with eigenvalue {}", i, e);
+      eigenValues.add(e);
     }
     log.info("LanczosSolver finished.");
     endTime(TimingSection.FINAL_EIGEN_CREATE);

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleMatrix1D.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleMatrix1D.java?rev=1088831&r1=1088830&r2=1088831&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleMatrix1D.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleMatrix1D.java Tue
Apr  5 00:20:27 2011
@@ -8,10 +8,12 @@ It is provided "as is" without expressed
 */
 package org.apache.mahout.math.matrix;
 
+import org.apache.mahout.math.DenseVector;
+import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.function.DoubleDoubleFunction;
+import org.apache.mahout.math.function.DoubleFunction;
 import org.apache.mahout.math.function.Functions;
 import org.apache.mahout.math.function.PlusMult;
-import org.apache.mahout.math.function.DoubleFunction;
 import org.apache.mahout.math.list.DoubleArrayList;
 import org.apache.mahout.math.list.IntArrayList;
 import org.apache.mahout.math.matrix.impl.AbstractMatrix1D;
@@ -169,6 +171,14 @@ public abstract class DoubleMatrix1D ext
     return this;
   }
 
+  public Vector toVector() {
+    final DenseVector vector = new DenseVector(cardinality());
+    for(int i=0; i<cardinality(); i++) {
+      vector.set(i, get(i));
+    }
+    return vector;
+  }
+
   /**
    * Assigns the result of a function to each cell; <tt>x[i] = function(x[i],y[i])</tt>.
<p> <b>Example:</b>
    * <pre>

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/DenseDoubleMatrix1D.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/DenseDoubleMatrix1D.java?rev=1088831&r1=1088830&r2=1088831&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/DenseDoubleMatrix1D.java
(original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/DenseDoubleMatrix1D.java
Tue Apr  5 00:20:27 2011
@@ -9,10 +9,10 @@ It is provided "as is" without expressed
 package org.apache.mahout.math.matrix.impl;
 
 import org.apache.mahout.math.function.DoubleDoubleFunction;
+import org.apache.mahout.math.function.DoubleFunction;
 import org.apache.mahout.math.function.Functions;
 import org.apache.mahout.math.function.Mult;
 import org.apache.mahout.math.function.PlusMult;
-import org.apache.mahout.math.function.DoubleFunction;
 import org.apache.mahout.math.matrix.DoubleMatrix1D;
 import org.apache.mahout.math.matrix.DoubleMatrix2D;
 

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/SparseDoubleMatrix1D.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/SparseDoubleMatrix1D.java?rev=1088831&r1=1088830&r2=1088831&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/SparseDoubleMatrix1D.java
(original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/SparseDoubleMatrix1D.java
Tue Apr  5 00:20:27 2011
@@ -8,6 +8,9 @@ It is provided "as is" without expressed
 */
 package org.apache.mahout.math.matrix.impl;
 
+import org.apache.mahout.math.RandomAccessSparseVector;
+import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.function.IntDoubleProcedure;
 import org.apache.mahout.math.map.AbstractIntDoubleMap;
 import org.apache.mahout.math.map.OpenIntDoubleHashMap;
 import org.apache.mahout.math.matrix.DoubleMatrix1D;
@@ -76,6 +79,19 @@ public final class SparseDoubleMatrix1D 
     this.isNoView = false;
   }
 
+
+  public Vector toVector() {
+    final RandomAccessSparseVector vector = new RandomAccessSparseVector(cardinality());
+    elements.forEachPair(new IntDoubleProcedure() {
+      public boolean apply(int i, double v) {
+        vector.setQuick(i, v);
+        return true;
+      }
+    });
+    return vector;
+  }
+
+
   /**
    * Sets all cells to the state specified by <tt>value</tt>.
    *

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/EigenvalueDecomposition.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/EigenvalueDecomposition.java?rev=1088831&r1=1088830&r2=1088831&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/EigenvalueDecomposition.java
(original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/EigenvalueDecomposition.java
Tue Apr  5 00:20:27 2011
@@ -8,6 +8,9 @@ It is provided "as is" without expressed
 */
 package org.apache.mahout.math.matrix.linalg;
 
+import org.apache.mahout.math.Matrix;
+import org.apache.mahout.math.MatrixSlice;
+import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.matrix.DoubleMatrix1D;
 import org.apache.mahout.math.matrix.DoubleMatrix2D;
 import org.apache.mahout.math.matrix.impl.DenseDoubleMatrix1D;
@@ -16,7 +19,7 @@ import org.apache.mahout.math.matrix.imp
 import java.io.Serializable;
 
 import static org.apache.mahout.math.Algebra.hypot;
-import static org.apache.mahout.math.matrix.linalg.Property.*;
+import static org.apache.mahout.math.matrix.linalg.Property.checkSquare;
 
 /** @deprecated until unit tests are in place.  Until this time, this class/interface is
unsupported. */
 @Deprecated
@@ -43,30 +46,16 @@ public final class EigenvalueDecompositi
   private double cdivr;
   private double cdivi;
 
-  /**
-   * Constructs and returns a new eigenvalue decomposition object; The decomposed matrices
can be retrieved via instance
-   * methods of the returned decomposition object. Checks for symmetry, then constructs the
eigenvalue decomposition.
-   *
-   * @param A A square matrix.
-   * @throws IllegalArgumentException if <tt>A</tt> is not square.
-   */
-  public EigenvalueDecomposition(DoubleMatrix2D A) {
-    checkSquare(A);
-
-    n = A.columns();
-    V = new double[n][n];
+  public EigenvalueDecomposition(double[][] v) {
+    if(v.length != v[0].length) {
+      throw new IllegalArgumentException("Matrix must be square");
+    }
+    n = v.length;
+    V = v;
     d = new double[n];
     e = new double[n];
 
-    boolean issymmetric = DEFAULT.isSymmetric(A);
-
-    if (issymmetric) {
-      for (int i = 0; i < n; i++) {
-        for (int j = 0; j < n; j++) {
-          V[i][j] = A.getQuick(i, j);
-        }
-      }
-
+    if (isSymmetric(v)) {
       // Tridiagonalize.
       tred2();
 
@@ -79,7 +68,7 @@ public final class EigenvalueDecompositi
 
       for (int j = 0; j < n; j++) {
         for (int i = 0; i < n; i++) {
-          H[i][j] = A.getQuick(i, j);
+          H[i][j] = v[i][j];
         }
       }
 
@@ -91,6 +80,58 @@ public final class EigenvalueDecompositi
     }
   }
 
+  public EigenvalueDecomposition(Matrix A) {
+    this(toArray(A));
+  }
+
+  private static double[][] toArray(Matrix A) {
+    checkSquare(A);
+
+    int n = A.numCols();
+    double[][] V = new double[n][n];
+    for(MatrixSlice slice : A) {
+      int row = slice.index();
+      for(Vector.Element element : slice.vector()) {
+        V[row][element.index()] = element.get();
+      }
+    }
+    return V;
+  }
+
+  private static boolean isSymmetric(double[][] matrix) {
+    for(int i=0; i<matrix.length; i++) {
+      for(int j=0; j<i; j++) {
+        if(matrix[i][j] != matrix[j][i]) {
+          return false;
+        }
+      }
+    }
+    return true;
+  }
+
+  private static double[][] toArray(DoubleMatrix2D A) {
+    checkSquare(A);
+
+    int n = A.columns();
+    double[][] V = new double[n][n];
+    for(int row = 0; row < A.rows(); row++) {
+      for(int col = 0; col < A.rows(); col++) {
+        V[row][col] = A.getQuick(row, col);
+      }
+    }
+    return V;
+  }
+  /**
+   * Constructs and returns a new eigenvalue decomposition object; The decomposed matrices
can be retrieved via instance
+   * methods of the returned decomposition object. Checks for symmetry, then constructs the
eigenvalue decomposition.
+   *
+   * @param A A square matrix.
+   * @throws IllegalArgumentException if <tt>A</tt> is not square.
+   */
+  public EigenvalueDecomposition(DoubleMatrix2D A) {
+    this(toArray(A));
+  }
+
   private void cdiv(double xr, double xi, double yr, double yi) {
     double r;
     double d;

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/Property.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/Property.java?rev=1088831&r1=1088830&r2=1088831&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/Property.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/Property.java Tue
Apr  5 00:20:27 2011
@@ -8,6 +8,7 @@ It is provided "as is" without expressed
 */
 package org.apache.mahout.math.matrix.linalg;
 
+import org.apache.mahout.math.Matrix;
 import org.apache.mahout.math.function.Functions;
 import org.apache.mahout.math.matrix.DoubleMatrix1D;
 import org.apache.mahout.math.matrix.DoubleMatrix2D;
@@ -53,6 +54,12 @@ public final class Property {
     }
   }
 
+  public static void checkSquare(Matrix matrix) {
+    if(matrix.numRows() != matrix.numCols()) {
+      throw new IllegalArgumentException("Matrix must be square");      
+    }
+  }
+
   /** Returns the matrix's fraction of non-zero cells; <tt>A.cardinality() / A.size()</tt>.
*/
   public static double density(DoubleMatrix2D a) {
     return a.cardinality() / (double) a.size();

Modified: mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/SolverTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/SolverTest.java?rev=1088831&r1=1088830&r2=1088831&view=diff
==============================================================================
--- mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/SolverTest.java (original)
+++ mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/SolverTest.java Tue
Apr  5 00:20:27 2011
@@ -17,12 +17,8 @@
 
 package org.apache.mahout.math.decomposer;
 
-import org.apache.mahout.math.MahoutTestCase;
-import org.apache.mahout.math.Matrix;
-import org.apache.mahout.math.SequentialAccessSparseVector;
-import org.apache.mahout.math.SparseRowMatrix;
-import org.apache.mahout.math.Vector;
-import org.apache.mahout.math.VectorIterable;
+import org.apache.mahout.math.*;
+import org.apache.mahout.math.function.Functions;
 
 import java.util.Random;
 
@@ -67,7 +63,7 @@ public abstract class SolverTest extends
       Vector afterMultiply = isSymmetric ? corpus.times(e) : corpus.timesSquared(e);
       double dot = afterMultiply.dot(e);
       double afterNorm = afterMultiply.getLengthSquared();
-      double error = 1 - dot / Math.sqrt(afterNorm * e.getLengthSquared());
+      double error = 1 - Math.abs(dot / Math.sqrt(afterNorm * e.getLengthSquared()));
       assertTrue("Error margin: {" + error + " too high! (for eigen " + i + ')', Math.abs(error)
< errorMargin);
     }
   }
@@ -105,4 +101,31 @@ public abstract class SolverTest extends
     }
     return m;
   }
+
+  public static Matrix randomHierarchicalMatrix(int numRows, int numCols, boolean symmetric)
{
+    DenseMatrix matrix = new DenseMatrix(numRows, numCols);
+    Random r = new Random(1234L);
+    for(int row = 0; row < numRows; row++) {
+      Vector v = new DenseVector(numCols);
+      for(int col = 0; col < numCols; col++) {
+        double val = r.nextGaussian();
+        v.set(col, val);
+      }
+      v.assign(Functions.MULT, 1/((row + 1) * v.norm(2)));
+      matrix.assignRow(row, v);
+    }
+    if(symmetric) {
+      if(true) return matrix.times(matrix.transpose());
+      for(int i = 0; i < numRows; i++) {
+        for(int j = 0; j < i; j++) {
+          matrix.set(j, i, matrix.get(i, j));
+        }
+      }
+    }
+    return matrix;
+  }
+
+  public static Matrix randomHierarchicalSymmetricMatrix(int size) {
+    return randomHierarchicalMatrix(size, size, true);
+  }
 }

Modified: mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/lanczos/TestLanczosSolver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/lanczos/TestLanczosSolver.java?rev=1088831&r1=1088830&r2=1088831&view=diff
==============================================================================
--- mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/lanczos/TestLanczosSolver.java
(original)
+++ mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/lanczos/TestLanczosSolver.java
Tue Apr  5 00:20:27 2011
@@ -19,37 +19,71 @@ package org.apache.mahout.math.decompose
 
 import org.apache.mahout.math.DenseMatrix;
 import org.apache.mahout.math.Matrix;
+import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.decomposer.SolverTest;
+import org.apache.mahout.math.matrix.DoubleMatrix1D;
+import org.apache.mahout.math.matrix.linalg.EigenvalueDecomposition;
 import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.util.ArrayList;
 import java.util.List;
 
 public final class TestLanczosSolver extends SolverTest {
+  private static final Logger log = LoggerFactory.getLogger(TestLanczosSolver.class);
+
+  private static final double ERROR_TOLERANCE = 1e-5;
+
+  @Test
+  public void testEigenvalueCheck() throws Exception {
+    int size = 100;
+    Matrix m = randomHierarchicalSymmetricMatrix(size);
+    int desiredRank = 80;
+    float fractionOfEigensExpectedGood = 0.75f;
+    LanczosSolver solver = new LanczosSolver();
+    Matrix eigenvectors = new DenseMatrix(desiredRank, size);
+    List<Double> eigenvalueList = new ArrayList<Double>();
+    solver.solve(m, desiredRank, eigenvectors, eigenvalueList);
+
+    EigenvalueDecomposition decomposition = new EigenvalueDecomposition(m);
+    DoubleMatrix1D eigenvalues = decomposition.getRealEigenvalues();
+
+    for(int i = 0; i < fractionOfEigensExpectedGood * desiredRank; i++) {
+      log.info(i + " : L = {}, E = {}",
+          eigenvalueList.get(desiredRank - i - 1),
+          eigenvalues.get(eigenvalues.size() - i - 1) );
+      Vector v = eigenvectors.getRow(i);
+      Vector v2 = decomposition.getV().viewColumn(eigenvalues.size() - i - 1).toVector();
+      double error = 1 - Math.abs(v.dot(v2)/(v.norm(2) * v2.norm(2)));
+      log.info("error: {}", error);
+      assertTrue(i + ": 1 - cosAngle = " + error, error < ERROR_TOLERANCE);
+    }
+  }
+
 
   @Test
   public void testLanczosSolver() throws Exception {
-    int numColumns = 800;
-    Matrix corpus = randomSequentialAccessSparseMatrix(1000, 900, numColumns, 30, 1.0);
+    int numRows = 800;
+    int numColumns = 500;
+    Matrix corpus = randomHierarchicalMatrix(numRows, numColumns, false);
     int rank = 50;
     Matrix eigens = new DenseMatrix(rank, numColumns);
     long time = timeLanczos(corpus, eigens, rank, false);
     assertTrue("Lanczos taking too long!  Are you in the debugger? :)", time < 10000);
     assertOrthonormal(eigens);
-    assertEigen(eigens, corpus, 0.1, false);
+    assertEigen(eigens, corpus, rank / 2, ERROR_TOLERANCE, false);
   }
 
   @Test
   public void testLanczosSolverSymmetric() throws Exception {
-    int numColumns = 400;
-    Matrix corpus = randomSequentialAccessSparseMatrix(500, 450, numColumns, 10, 1.0);
-    Matrix gramMatrix = corpus.times(corpus.transpose());
+    Matrix corpus = randomHierarchicalSymmetricMatrix(500);
     int rank = 30;
-    Matrix eigens = new DenseMatrix(rank, gramMatrix.numCols());
-    long time = timeLanczos(gramMatrix, eigens, rank, true);
+    Matrix eigens = new DenseMatrix(rank, corpus.numCols());
+    long time = timeLanczos(corpus, eigens, rank, true);
     assertTrue("Lanczos taking too long!  Are you in the debugger? :)", time < 10000);
     assertOrthonormal(eigens);
-    assertEigen(eigens, gramMatrix, 0.1, true);
+    assertEigen(eigens, corpus, rank / 2, ERROR_TOLERANCE, true);
   }
 
   public static long timeLanczos(Matrix corpus, Matrix eigens, int rank, boolean symmetric)
{

Modified: mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java?rev=1088831&r1=1088830&r2=1088831&view=diff
==============================================================================
--- mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java (original)
+++ mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java Tue
Apr  5 00:20:27 2011
@@ -17,11 +17,6 @@
 
 package org.apache.mahout.clustering;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -68,6 +63,11 @@ import org.apache.mahout.vectorizer.Weig
 import org.junit.Before;
 import org.junit.Test;
 
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
 public final class TestClusterDumper extends MahoutTestCase {
 
   private static final String[] DOCS = { "The quick red fox jumped over the lazy brown dogs.",
@@ -329,11 +329,14 @@ public final class TestClusterDumper ext
     int sampleDimension = sampleData.get(0).get().size();
     // Run EigenVerificationJob from within DistributedLanczosSolver.run(...)
     int desiredRank = 13;
-    solver.run(testData, output, tmp, sampleData.size(), sampleDimension, false, desiredRank,
0.5, 0.0, false);
+    solver.run(testData, output, tmp, sampleData.size(), sampleDimension,
+        false, desiredRank, 0.5, 0.0, false);
+
     Path cleanEigenvectors = new Path(output, EigenVerificationJob.CLEAN_EIGENVECTORS);
 
     // now multiply the testdata matrix and the eigenvector matrix
-    DistributedRowMatrix svdT = new DistributedRowMatrix(cleanEigenvectors, tmp, desiredRank
- 1, sampleDimension);
+    DistributedRowMatrix svdT = new DistributedRowMatrix(cleanEigenvectors, tmp,
+        desiredRank, sampleDimension);
     Configuration conf = new Configuration(config);
     svdT.setConf(conf);
     DistributedRowMatrix a = new DistributedRowMatrix(testData, tmp, sampleData.size(), sampleDimension);
@@ -348,6 +351,7 @@ public final class TestClusterDumper ext
     // run ClusterDumper
     ClusterDumper clusterDumper = new ClusterDumper(finalClusterPath(conf, output, 10), new
Path(output, "clusteredPoints"));
     clusterDumper.printClusters(termDictionary);
+    assertTrue(true);
   }
 
   @Test
@@ -369,9 +373,11 @@ public final class TestClusterDumper ext
     Path cleanEigenvectors = new Path(output, EigenVerificationJob.CLEAN_EIGENVECTORS);
 
     // now multiply the testdata matrix and the eigenvector matrix
-    DistributedRowMatrix svdT = new DistributedRowMatrix(cleanEigenvectors, tmp, desiredRank
- 1, sampleDimension);
+    DistributedRowMatrix svdT = new DistributedRowMatrix(cleanEigenvectors, tmp, desiredRank,
+        sampleDimension);
     svdT.setConf(conf);
-    DistributedRowMatrix a = new DistributedRowMatrix(testData, tmp, sampleData.size(), sampleDimension);
+    DistributedRowMatrix a = new DistributedRowMatrix(testData, tmp, sampleData.size(),
+        sampleDimension);
     a.setConf(conf);
     DistributedRowMatrix sData = a.transpose().times(svdT.transpose());
     sData.setConf(conf);
@@ -379,9 +385,12 @@ public final class TestClusterDumper ext
     // now run the Canopy job to prime kMeans canopies
     CanopyDriver.run(conf, sData.getRowPath(), output, measure, 8, 4, false, false);
     // now run the KMeans job
-    KMeansDriver.run(sData.getRowPath(), new Path(output, "clusters-0"), output, measure,
0.001, 10, true, false);
+    KMeansDriver.run(sData.getRowPath(), new Path(output, "clusters-0"), output, measure,
+        0.001, 10, true, false);
     // run ClusterDumper
-    ClusterDumper clusterDumper = new ClusterDumper(finalClusterPath(conf, output, 10), new
Path(output, "clusteredPoints"));
+    ClusterDumper clusterDumper = new ClusterDumper(finalClusterPath(conf, output, 10),
+        new Path(output, "clusteredPoints"));
     clusterDumper.printClusters(termDictionary);
+    assertTrue(true);
   }
 }



Mime
View raw message