mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jman...@apache.org
Subject svn commit: r1098044 - in /mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop: TimesSquaredJob.java decomposer/EigenVerificationJob.java
Date Sat, 30 Apr 2011 05:23:59 GMT
Author: jmannix
Date: Sat Apr 30 05:23:58 2011
New Revision: 1098044

URL: http://svn.apache.org/viewvc?rev=1098044&view=rev
Log:
Remove unnecessary copy in TimesSquaredJob, computePairwiseInnerProducts() (not used), and
properly add option for number of eigens to keep for cleansvd

Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java?rev=1098044&r1=1098043&r2=1098044&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java Sat
Apr 30 05:23:58 2011
@@ -165,7 +165,7 @@ public final class TimesSquaredJob {
     DistributedCache.setCacheFiles(new URI[] {ivpURI}, conf);
 
     conf.set(INPUT_VECTOR, ivpURI.toString());
-    conf.setBoolean(IS_SPARSE_OUTPUT, !(v instanceof DenseVector));
+    conf.setBoolean(IS_SPARSE_OUTPUT, !(v.isDense()));
     conf.setInt(OUTPUT_VECTOR_DIMENSION, outputVectorDim);
     FileInputFormat.addInputPath(conf, matrixInputPath);
     conf.setInputFormat(SequenceFileInputFormat.class);
@@ -211,9 +211,6 @@ public final class TimesSquaredJob {
         inputVector = iterator.next().get();
         iterator.close();
 
-        if (!(inputVector instanceof SequentialAccessSparseVector || inputVector instanceof
DenseVector)) {
-          inputVector = new SequentialAccessSparseVector(inputVector);
-        }
         int outDim = conf.getInt(OUTPUT_VECTOR_DIMENSION, Integer.MAX_VALUE);
         outputVector = conf.getBoolean(IS_SPARSE_OUTPUT, false)
                      ? new RandomAccessSparseVector(outDim, 10)

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java?rev=1098044&r1=1098043&r2=1098044&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java
Sat Apr 30 05:23:58 2011
@@ -111,7 +111,8 @@ public class EigenVerificationJob extend
            getOutputPath(),
            argMap.get("--inMemory") != null,
            Double.parseDouble(argMap.get("--maxError")),
-           Double.parseDouble(argMap.get("--minEigenvalue")), Integer.parseInt(argMap.get("--maxEigens")));
+           Double.parseDouble(argMap.get("--minEigenvalue")),
+           Integer.parseInt(argMap.get("--maxEigens")));
     return 0;
   }
 
@@ -151,8 +152,8 @@ public class EigenVerificationJob extend
 
     eigenVerifier = new SimpleEigenVerifier();
 
-    //VectorIterable pairwiseInnerProducts = computePairwiseInnerProducts();
-    computePairwiseInnerProducts();
+    // we don't currently verify orthonormality here.
+    // VectorIterable pairwiseInnerProducts = computePairwiseInnerProducts();
 
     Map<MatrixSlice, EigenStatus> eigenMetaData = verifyEigens();
 
@@ -174,6 +175,7 @@ public class EigenVerificationJob extend
     addOption("inMemory", "mem", "Buffer eigen matrix into memory (if you have enough!)",
"false");
     addOption("maxError", "err", "Maximum acceptable error", "0.05");
     addOption("minEigenvalue", "mev", "Minimum eigenvalue to keep the vector for", "0.0");
+    addOption("maxEigens", "max", "Maximum number of eigenvectors to keep (0 means all)",
"0");
 
     return parseArguments(args);
   }



Mime
View raw message