mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From gsing...@apache.org
Subject svn commit: r1147136 - in /mahout/trunk: integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java math/src/main/java/org/apache/mahout/math/NamedVector.java
Date Fri, 15 Jul 2011 13:05:07 GMT
Author: gsingers
Date: Fri Jul 15 13:05:06 2011
New Revision: 1147136

URL: http://svn.apache.org/viewvc?rev=1147136&view=rev
Log:
add ability to restrict the number of KV pairs to dump, implement toString on NamedVector

Modified:
    mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/NamedVector.java

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java?rev=1147136&r1=1147135&r2=1147136&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
(original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
Fri Jul 15 13:05:06 2011
@@ -44,6 +44,7 @@ import org.slf4j.LoggerFactory;
 import java.io.File;
 import java.io.OutputStreamWriter;
 import java.io.Writer;
+import java.util.Iterator;
 
 /**
  * Can read in a {@link SequenceFile} of {@link Vector}s and dump
@@ -87,12 +88,15 @@ public final class VectorDumper {
             .withShortName("n").create();
     Option sizeOpt = obuilder.withLongName("sizeOnly").withRequired(false).
             withDescription("Dump only the size of the vector").withShortName("sz").create();
+    Option numItemsOpt = obuilder.withLongName("n").withRequired(false).withArgument(
+            abuilder.withName("numItems").withMinimum(1).withMaximum(1).create()).
+            withDescription("Output at most <n> key value pairs").withShortName("n").create();
     Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
             .create();
 
     Group group = gbuilder.withName("Options").withOption(seqOpt).withOption(outputOpt).withOption(
             dictTypeOpt).withOption(dictOpt).withOption(csvOpt).withOption(vectorAsKeyOpt).withOption(
-            printKeyOpt).withOption(sizeOpt).withOption(helpOpt).create();
+            printKeyOpt).withOption(sizeOpt).withOption(numItemsOpt).withOption(helpOpt).create();
 
     try {
       Parser parser = new Parser();
@@ -138,18 +142,27 @@ public final class VectorDumper {
         }
         try {
           boolean printKey = cmdLine.hasOption(printKeyOpt);
-          if (useCSV && dictionary != null){
+          if (useCSV && dictionary != null) {
             writer.write("#");
             for (int j = 0; j < dictionary.length; j++) {
               writer.write(dictionary[j]);
-              if (j < dictionary.length - 1){
+              if (j < dictionary.length - 1) {
                 writer.write(',');
               }
             }
             writer.write('\n');
           }
           long i = 0;
-          for (Pair<Writable,Writable> record : new SequenceFileIterable<Writable,
Writable>(path, true, conf)) {
+          long count = 0;
+          long numItems = Long.MAX_VALUE;
+          if (cmdLine.hasOption(numItemsOpt)) {
+            numItems = Long.parseLong(cmdLine.getValue(numItemsOpt).toString());
+            writer.append("#Max Items to dump: ").append(String.valueOf(numItems)).append('\n');
+          }
+          SequenceFileIterable<Writable, Writable> iterable = new SequenceFileIterable<Writable,
Writable>(path, true, conf);
+          Iterator<Pair<Writable,Writable>> iterator = iterable.iterator();
+          while (iterator.hasNext() && count < numItems) {
+            Pair<Writable, Writable> record = iterator.next();
             Writable keyWritable = record.getFirst();
             Writable valueWritable = record.getSecond();
             if (printKey) {
@@ -171,7 +184,7 @@ public final class VectorDumper {
               writer.write('\n');
             } else {
               String fmtStr;
-              if (useCSV){
+              if (useCSV) {
                 fmtStr = VectorHelper.vectorToCSVString(vector, namesAsComments);
               } else {
                 fmtStr = vector.asFormatString();
@@ -179,6 +192,7 @@ public final class VectorDumper {
               writer.write(fmtStr);
               writer.write('\n');
             }
+            count++;
           }
         } finally {
           Closeables.closeQuietly(writer);

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/NamedVector.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/NamedVector.java?rev=1147136&r1=1147135&r2=1147136&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/NamedVector.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/NamedVector.java Fri Jul 15 13:05:06
2011
@@ -73,6 +73,13 @@ public class NamedVector implements Vect
   }
 
   @Override
+  public String toString() {
+    StringBuilder bldr = new StringBuilder();
+    bldr.append(name).append(':').append(delegate.toString());
+    return bldr.toString();
+  }
+
+  @Override
   public Vector assign(double value) {
     return delegate.assign(value);
   }



Mime
View raw message