mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jeast...@apache.org
Subject svn commit: r780137 - in /lucene/mahout/trunk: core/src/main/java/org/apache/mahout/clustering/kmeans/ core/src/test/java/org/apache/mahout/clustering/kmeans/ examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/
Date Fri, 29 May 2009 23:07:50 GMT
Author: jeastman
Date: Fri May 29 23:07:50 2009
New Revision: 780137

URL: http://svn.apache.org/viewvc?rev=780137&view=rev
Log:
- KMeansDriver: renamed parameter numCentroids to numReduceTasks to comply with its usage
- cleaned up commented-out code and improved comments in TestKmeansClustering and kmeans/Job

Modified:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java?rev=780137&r1=780136&r2=780137&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
Fri May 29 23:07:50 2009
@@ -57,10 +57,11 @@
    * @param measureClass the classname of the DistanceMeasure
    * @param convergenceDelta the convergence delta value
    * @param maxIterations the maximum number of iterations
+   * @param numReduceTasks the number of reducers
    */
   public static void runJob(String input, String clustersIn, String output,
       String measureClass, double convergenceDelta, int maxIterations,
-      int numCentroids) {
+      int numReduceTasks) {
     // iterate until the clusters converge
     boolean converged = false;
     int iteration = 0;
@@ -71,7 +72,7 @@
       // point the output to a new directory per iteration
       String clustersOut = output + "/clusters-" + iteration;
       converged = runIteration(input, clustersIn, clustersOut, measureClass,
-          delta, numCentroids);
+          delta, numReduceTasks);
       // now point the input to the old output directory
       clustersIn = output + "/clusters-" + iteration;
       iteration++;
@@ -89,6 +90,7 @@
    * @param clustersOut the directory pathname for output clusters
    * @param measureClass the classname of the DistanceMeasure
    * @param convergenceDelta the convergence delta value
+   * @param numReduceTasks the number of reducer tasks
    * @return true if the iteration successfully runs
    */
   private static boolean runIteration(String input, String clustersIn,
@@ -107,16 +109,10 @@
     conf.setMapperClass(KMeansMapper.class);
     conf.setCombinerClass(KMeansCombiner.class);
     conf.setReducerClass(KMeansReducer.class);
-    // conf.setNumMapTasks(numMapTasks);
     conf.setNumReduceTasks(numReduceTasks);
     conf.set(Cluster.CLUSTER_PATH_KEY, clustersIn);
     conf.set(Cluster.DISTANCE_MEASURE_KEY, measureClass);
     conf.set(Cluster.CLUSTER_CONVERGENCE_KEY, convergenceDelta);
-
-//    conf.set("mapred.child.java.opts", "-Xmx1536m");
-    // uncomment it to run locally
-//    conf.set("mapred.job.tracker", "local");
-
     client.setConf(conf);
     try {
       JobClient.runJob(conf);
@@ -156,9 +152,6 @@
     conf.set(Cluster.CLUSTER_CONVERGENCE_KEY, convergenceDelta);
 
     client.setConf(conf);
-    // uncomment it to run locally
-    // conf.set("mapred.job.tracker", "local");
-//    conf.set("mapred.child.java.opts", "-Xmx1536m");
     try {
       JobClient.runJob(conf);
     } catch (IOException e) {

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java?rev=780137&r1=780136&r2=780137&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java
Fri May 29 23:07:50 2009
@@ -70,12 +70,10 @@
       // iterate thru the result path list
       for (Path path : result) {
         SequenceFile.Reader reader = null;
-//        RecordReader<Text, Text> recordReader = null;
         try {
           reader =new SequenceFile.Reader(fs, path, job); 
           Text key = new Text();
           Text value = new Text();
-          int counter = 1;
           while (reader.next(key, value)) {
             // get the cluster info
             Cluster cluster = Cluster.decodeCluster(value.toString());

Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java?rev=780137&r1=780136&r2=780137&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java
(original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java
Fri May 29 23:07:50 2009
@@ -410,7 +410,6 @@
       // now compare the expected clusters with actual
       File outDir = new File("output/points");
       assertTrue("output dir exists?", outDir.exists());
-      String[] outFiles = outDir.list();
       // assertEquals("output dir files?", 4, outFiles.length);
       BufferedReader reader = new BufferedReader(new InputStreamReader(
           new FileInputStream("output/points/part-00000"), Charset

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java?rev=780137&r1=780136&r2=780137&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
(original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
Fri May 29 23:07:50 2009
@@ -17,6 +17,8 @@
 
 package org.apache.mahout.clustering.syntheticcontrol.kmeans;
 
+import java.io.IOException;
+
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapred.JobClient;
@@ -25,10 +27,6 @@
 import org.apache.mahout.clustering.kmeans.KMeansDriver;
 import org.apache.mahout.clustering.syntheticcontrol.canopy.InputDriver;
 
-import java.io.IOException;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
 public class Job {
   private Job() {
   }
@@ -69,7 +67,8 @@
    * @param maxIterations the int maximum number of iterations
    */
   private static void runJob(String input, String output, String measureClass,
-      double t1, double t2, double convergenceDelta, int maxIterations) throws IOException
{
+      double t1, double t2, double convergenceDelta, int maxIterations)
+      throws IOException {
     JobClient client = new JobClient();
     JobConf conf = new JobConf(Job.class);
 
@@ -79,10 +78,9 @@
     if (dfs.exists(outPath))
       dfs.delete(outPath, true);
     InputDriver.runJob(input, output + "/data");
-    CanopyClusteringJob
-        .runJob(output + "/data", output, measureClass, t1, t2);
+    CanopyClusteringJob.runJob(output + "/data", output, measureClass, t1, t2);
     KMeansDriver.runJob(output + "/data", output + "/canopies", output,
-        measureClass, convergenceDelta, maxIterations,1);
-//    OutputDriver.runJob(output + "/points", output + "/clustered-points");
+        measureClass, convergenceDelta, maxIterations, 1);
+    //    OutputDriver.runJob(output + "/points", output + "/clustered-points");
   }
 }



Mime
View raw message