hama-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From edwardy...@apache.org
Subject svn commit: r1545568 - in /hama/trunk: CHANGES.txt examples/src/main/java/org/apache/hama/examples/Kmeans.java ml/src/main/java/org/apache/hama/ml/kmeans/KMeansBSP.java ml/src/test/java/org/apache/hama/ml/kmeans/TestKMeansBSP.java
Date Tue, 26 Nov 2013 08:46:31 GMT
Author: edwardyoon
Date: Tue Nov 26 08:46:30 2013
New Revision: 1545568

URL: http://svn.apache.org/r1545568
Log:
HAMA-821: Fix bugs in KMeans example and make output more readable

Modified:
    hama/trunk/CHANGES.txt
    hama/trunk/examples/src/main/java/org/apache/hama/examples/Kmeans.java
    hama/trunk/ml/src/main/java/org/apache/hama/ml/kmeans/KMeansBSP.java
    hama/trunk/ml/src/test/java/org/apache/hama/ml/kmeans/TestKMeansBSP.java

Modified: hama/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hama/trunk/CHANGES.txt?rev=1545568&r1=1545567&r2=1545568&view=diff
==============================================================================
--- hama/trunk/CHANGES.txt (original)
+++ hama/trunk/CHANGES.txt Tue Nov 26 08:46:30 2013
@@ -8,6 +8,7 @@ Release 0.7.0 (unreleased changes)
 
   BUG FIXES
 
+   HAMA-821: Fix bugs in KMeans example and make output more readable (edwardyoon)
 
   IMPROVEMENTS
 

Modified: hama/trunk/examples/src/main/java/org/apache/hama/examples/Kmeans.java
URL: http://svn.apache.org/viewvc/hama/trunk/examples/src/main/java/org/apache/hama/examples/Kmeans.java?rev=1545568&r1=1545567&r2=1545568&view=diff
==============================================================================
--- hama/trunk/examples/src/main/java/org/apache/hama/examples/Kmeans.java (original)
+++ hama/trunk/examples/src/main/java/org/apache/hama/examples/Kmeans.java Tue Nov 26 08:46:30
2013
@@ -17,6 +17,8 @@
  */
 package org.apache.hama.examples;
 
+import java.util.List;
+
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -54,7 +56,7 @@ import org.apache.hama.ml.kmeans.KMeansB
 public class Kmeans {
 
   public static void main(String[] args) throws Exception {
-    if (args.length < 4 || args.length != 7) {
+    if (args.length < 4 || (args.length > 4 && args.length != 7)) {
       System.out
           .println("USAGE: <INPUT_PATH> <OUTPUT_PATH> <MAXITERATIONS> <K
(how many centers)> -g [<COUNT> <DIMENSION OF VECTORS>]");
       return;
@@ -85,12 +87,18 @@ public class Kmeans {
       KMeansBSP.prepareInput(count, k, dimension, conf, in, center, out, fs);
     } else {
       KMeansBSP.prepareInputText(k, conf, in, center, out, fs);
-      in = new Path(args[0], "textinput/in.seq");
+      in = new Path(in.getParent(), "textinput/in.seq");
     }
 
     BSPJob job = KMeansBSP.createJob(conf, in, out, true);
 
     // just submit the job
     job.waitForCompletion(true);
+
+    List<String> results = KMeansBSP.readOutput(conf, out, fs, 5);
+    for (String line : results) {
+      System.out.println(line);
+    }
+    System.out.println("...");
   }
 }

Modified: hama/trunk/ml/src/main/java/org/apache/hama/ml/kmeans/KMeansBSP.java
URL: http://svn.apache.org/viewvc/hama/trunk/ml/src/main/java/org/apache/hama/ml/kmeans/KMeansBSP.java?rev=1545568&r1=1545567&r2=1545568&view=diff
==============================================================================
--- hama/trunk/ml/src/main/java/org/apache/hama/ml/kmeans/KMeansBSP.java (original)
+++ hama/trunk/ml/src/main/java/org/apache/hama/ml/kmeans/KMeansBSP.java Tue Nov 26 08:46:30
2013
@@ -28,6 +28,7 @@ import java.util.Random;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IntWritable;
@@ -106,17 +107,16 @@ public final class KMeansBSP
         "Centers file must contain at least a single center!");
     this.centers = centers.toArray(new DoubleVector[centers.size()]);
 
-
     String distanceClass = peer.getConfiguration().get(DISTANCE_MEASURE_CLASS);
     if (distanceClass != null) {
       try {
         distanceMeasurer = ReflectionUtils.newInstance(distanceClass);
       } catch (ClassNotFoundException e) {
-        throw new RuntimeException(new StringBuilder("Wrong DistanceMeasurer implementation
").
-            append(distanceClass).append(" provided").toString());
+        throw new RuntimeException(new StringBuilder(
+            "Wrong DistanceMeasurer implementation ").append(distanceClass)
+            .append(" provided").toString());
       }
-    }
-    else {
+    } else {
       distanceMeasurer = new EuclidianDistance();
     }
 
@@ -177,7 +177,8 @@ public final class KMeansBSP
     for (int i = 0; i < msgCenters.length; i++) {
       final DoubleVector oldCenter = centers[i];
       if (msgCenters[i] != null) {
-        double calculateError = oldCenter.subtractUnsafe(msgCenters[i]).abs().sum();
+        double calculateError = oldCenter.subtractUnsafe(msgCenters[i]).abs()
+            .sum();
         if (calculateError > 0.0d) {
           centers[i] = msgCenters[i];
           convergedCounter++;
@@ -366,12 +367,13 @@ public final class KMeansBSP
   }
 
   /**
-   * Reads the centers outputted from the clustering job.
+   * Reads the cluster centers.
    * 
    * @return an index on the key dimension, and a cluster center on the value.
    */
-  public static HashMap<Integer, DoubleVector> readOutput(Configuration conf,
-      Path out, Path centerPath, FileSystem fs) throws IOException {
+  public static HashMap<Integer, DoubleVector> readClusterCenters(
+      Configuration conf, Path out, Path centerPath, FileSystem fs)
+      throws IOException {
     HashMap<Integer, DoubleVector> centerMap = new HashMap<Integer, DoubleVector>();
     SequenceFile.Reader centerReader = new SequenceFile.Reader(fs, centerPath,
         conf);
@@ -385,6 +387,37 @@ public final class KMeansBSP
   }
 
   /**
+   * Reads output. The list of output records can be restricted to maxlines.
+   * 
+   * @param conf
+   * @param outPath
+   * @param fs
+   * @param maxlines
+   * @return the list of output records
+   * @throws IOException
+   */
+  public static List<String> readOutput(Configuration conf, Path outPath,
+      FileSystem fs, int maxlines) throws IOException {
+    List<String> output = new ArrayList<String>();
+
+    FileStatus[] globStatus = fs.globStatus(new Path(outPath + "/part-*"));
+    for (FileStatus fts : globStatus) {
+      BufferedReader reader = new BufferedReader(new InputStreamReader(
+          fs.open(fts.getPath())));
+      String line = null;
+      while ((line = reader.readLine()) != null) {
+        String[] split = line.split("\t");
+        output.add(split[1] + " belongs to cluster " + split[0]);
+
+        if (output.size() >= maxlines)
+          return output;
+      }
+    }
+
+    return output;
+  }
+
+  /**
    * Reads input text files and writes it to a sequencefile.
    */
   public static Path prepareInputText(int k, Configuration conf, Path txtIn,
@@ -396,7 +429,7 @@ public final class KMeansBSP
     } else {
       in = new Path(txtIn, "textinput/in.seq");
     }
-
+    
     if (fs.exists(out))
       fs.delete(out, true);
 
@@ -428,8 +461,8 @@ public final class KMeansBSP
       VectorWritable vector = new VectorWritable(vec);
       dataWriter.append(vector, value);
       if (k > i) {
-          assert centerWriter != null;
-          centerWriter.append(vector, value);
+        assert centerWriter != null;
+        centerWriter.append(vector, value);
       } else {
         if (centerWriter != null) {
           centerWriter.close();

Modified: hama/trunk/ml/src/test/java/org/apache/hama/ml/kmeans/TestKMeansBSP.java
URL: http://svn.apache.org/viewvc/hama/trunk/ml/src/test/java/org/apache/hama/ml/kmeans/TestKMeansBSP.java?rev=1545568&r1=1545567&r2=1545568&view=diff
==============================================================================
--- hama/trunk/ml/src/test/java/org/apache/hama/ml/kmeans/TestKMeansBSP.java (original)
+++ hama/trunk/ml/src/test/java/org/apache/hama/ml/kmeans/TestKMeansBSP.java Tue Nov 26 08:46:30
2013
@@ -72,8 +72,8 @@ public class TestKMeansBSP extends TestC
 
       assertEquals(true, result);
 
-      HashMap<Integer, DoubleVector> centerMap = KMeansBSP.readOutput(conf,
-          out, centerOut, fs);
+      HashMap<Integer, DoubleVector> centerMap = KMeansBSP.readClusterCenters(
+          conf, out, centerOut, fs);
       System.out.println(centerMap);
       assertEquals(1, centerMap.size());
       DoubleVector doubleVector = centerMap.get(0);



Mime
View raw message