mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sro...@apache.org
Subject svn commit: r1177786 - in /mahout/trunk: core/src/main/java/org/apache/mahout/clustering/ core/src/main/java/org/apache/mahout/clustering/dirichlet/ core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/ core/src/main/java/org/apache/mahout/clust...
Date Fri, 30 Sep 2011 19:20:48 GMT
Author: srowen
Date: Fri Sep 30 19:20:47 2011
New Revision: 1177786

URL: http://svn.apache.org/viewvc?rev=1177786&view=rev
Log:
MAHOUT-778 label final output as "clusters-N-final"

Added:
    mahout/trunk/examples/src/test/java/org/apache/mahout/clustering/
    mahout/trunk/examples/src/test/java/org/apache/mahout/clustering/display/
    mahout/trunk/examples/src/test/java/org/apache/mahout/clustering/display/ClustersFilterTest.java
Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/Cluster.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java
    mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/ClustersFilter.java
    mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
    mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterEvaluator.java

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/Cluster.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/Cluster.java?rev=1177786&r1=1177785&r2=1177786&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/Cluster.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/Cluster.java Fri Sep 30 19:20:47
2011
@@ -35,6 +35,9 @@ public interface Cluster extends Model<V
   // default directory for output of clusters per iteration
   String CLUSTERS_DIR = "clusters-";
 
+  // default suffix for output of clusters for final iteration
+  String FINAL_ITERATION_SUFFIX = "-final";
+
   /**
    * Get the id of the Cluster
    * 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java?rev=1177786&r1=1177785&r2=1177786&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java
Fri Sep 30 19:20:47 2011
@@ -374,7 +374,8 @@ public class DirichletDriver extends Abs
                                        int maxIterations,
                                        double alpha0,
                                        Path clustersIn) throws IOException {
-    for (int iteration = 1; iteration <= maxIterations; iteration++) {
+    int iteration = 1;
+    while (iteration <= maxIterations) {
       log.info("Iteration {}", iteration);
       // point the output to a new directory per iteration
       Path clustersOut = new Path(output, Cluster.CLUSTERS_DIR + iteration);
@@ -383,7 +384,7 @@ public class DirichletDriver extends Abs
                                                        description,
                                                        alpha0,
                                                        numClusters);
-      
+
       List<DirichletCluster> oldModels = state.getClusters();
       for (DirichletCluster oldModel : oldModels) {
         oldModel.getModel().configure(conf);
@@ -405,8 +406,11 @@ public class DirichletDriver extends Abs
 
       // now point the input to the old output directory
       clustersIn = clustersOut;
+      iteration++;
     }
-    return clustersIn;
+    Path finalClustersIn = new Path(output, Cluster.CLUSTERS_DIR + (iteration-1) + Cluster.FINAL_ITERATION_SUFFIX);
+    FileSystem.get(conf).rename(new Path(output, Cluster.CLUSTERS_DIR + (iteration-1)), finalClustersIn);
+    return finalClustersIn;
   }
 
   private static Path buildClustersMR(Configuration conf,
@@ -418,15 +422,19 @@ public class DirichletDriver extends Abs
                                       double alpha0,
                                       Path clustersIn)
     throws IOException, InterruptedException, ClassNotFoundException {
-    for (int iteration = 1; iteration <= maxIterations; iteration++) {
+    int iteration = 1;
+    while (iteration <= maxIterations) {
       log.info("Iteration {}", iteration);
       // point the output to a new directory per iteration
       Path clustersOut = new Path(output, Cluster.CLUSTERS_DIR + iteration);
       runIteration(conf, input, clustersIn, clustersOut, description, numClusters, alpha0);
       // now point the input to the old output directory
       clustersIn = clustersOut;
+      iteration++;
     }
-    return clustersIn;
+    Path finalClustersIn = new Path(output, Cluster.CLUSTERS_DIR + (iteration-1) + Cluster.FINAL_ITERATION_SUFFIX);
+    FileSystem.get(conf).rename(new Path(output, Cluster.CLUSTERS_DIR + (iteration-1)), finalClustersIn);
+    return finalClustersIn;
   }
 
   /**

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java?rev=1177786&r1=1177785&r2=1177786&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
Fri Sep 30 19:20:47 2011
@@ -362,9 +362,9 @@ public class FuzzyKMeansDriver extends A
     }
     boolean converged = false;
     int iteration = 1;
+    Configuration conf = new Configuration();
     while (!converged && iteration <= maxIterations) {
       log.info("Fuzzy k-Means Iteration: " + iteration);
-      Configuration conf = new Configuration();
       FileSystem fs = FileSystem.get(input.toUri(), conf);
       for (VectorWritable value
            : new SequenceFileDirValueIterable<VectorWritable>(input,
@@ -398,7 +398,9 @@ public class FuzzyKMeansDriver extends A
       clustersIn = clustersOut;
       iteration++;
     }
-    return clustersIn;
+    Path finalClustersIn = new Path(output, Cluster.CLUSTERS_DIR + (iteration-1) + Cluster.FINAL_ITERATION_SUFFIX);
+    FileSystem.get(conf).rename(new Path(output, Cluster.CLUSTERS_DIR + (iteration-1)), finalClustersIn);
+    return finalClustersIn;
   }
 
   private static Path buildClustersMR(Configuration conf,
@@ -424,7 +426,9 @@ public class FuzzyKMeansDriver extends A
       clustersIn = clustersOut;
       iteration++;
     }
-    return clustersIn;
+    Path finalClustersIn = new Path(output, Cluster.CLUSTERS_DIR + (iteration-1) + "-final");
+    FileSystem.get(conf).rename(new Path(output, Cluster.CLUSTERS_DIR + (iteration-1)), finalClustersIn);
+    return finalClustersIn;
   }
 
   /**

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java?rev=1177786&r1=1177785&r2=1177786&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
Fri Sep 30 19:20:47 2011
@@ -292,7 +292,9 @@ public class KMeansDriver extends Abstra
       clustersIn = clustersOut;
       iteration++;
     }
-    return clustersIn;
+    Path finalClustersIn = new Path(output, AbstractCluster.CLUSTERS_DIR + (iteration-1)
+ org.apache.mahout.clustering.Cluster.FINAL_ITERATION_SUFFIX);
+    FileSystem.get(conf).rename(new Path(output, AbstractCluster.CLUSTERS_DIR + (iteration-1)),
finalClustersIn);
+    return finalClustersIn;
   }
 
   private static Path buildClustersMR(Configuration conf,
@@ -314,7 +316,9 @@ public class KMeansDriver extends Abstra
       clustersIn = clustersOut;
       iteration++;
     }
-    return clustersIn;
+    Path finalClustersIn = new Path(output, AbstractCluster.CLUSTERS_DIR + (iteration-1)
+ "-final");
+    FileSystem.get(conf).rename(new Path(output, AbstractCluster.CLUSTERS_DIR + (iteration-1)),
finalClustersIn);
+    return finalClustersIn;
   }
 
   /**

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java?rev=1177786&r1=1177785&r2=1177786&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java
Fri Sep 30 19:20:47 2011
@@ -333,7 +333,9 @@ public class MeanShiftCanopyDriver exten
       clustersIn = clustersOut;
       iteration++;
     }
-    return clustersIn;
+    Path finalClustersIn = new Path(output, Cluster.CLUSTERS_DIR + (iteration-1) + "-final");
+    FileSystem.get(conf).rename(new Path(output, Cluster.CLUSTERS_DIR + (iteration-1)), finalClustersIn);
+    return finalClustersIn;
   }
 
   /**
@@ -369,7 +371,9 @@ public class MeanShiftCanopyDriver exten
         conf.set(MAPRED_REDUCE_TASKS, String.valueOf(numReducers));
       }
     }
-    return clustersIn;
+    Path finalClustersIn = new Path(output, Cluster.CLUSTERS_DIR + (iteration-1) + Cluster.FINAL_ITERATION_SUFFIX);
+    FileSystem.get(conf).rename(new Path(output, Cluster.CLUSTERS_DIR + (iteration-1)), finalClustersIn);
+    return finalClustersIn;
   }
 
   /**

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java?rev=1177786&r1=1177785&r2=1177786&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java
(original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java
Fri Sep 30 19:20:47 2011
@@ -61,7 +61,7 @@ public final class TestMeanShift extends
 
   /**
    * Print the canopies to the transcript
-   * 
+   *
    * @param canopies
    *          a List<Canopy>
    */
@@ -376,7 +376,7 @@ public final class TestMeanShift extends
         optKey(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION), "0.2",
         optKey(DefaultOptionCreator.OVERWRITE_OPTION) };
     ToolRunner.run(conf, new MeanShiftCanopyDriver(), args);
-    Path outPart = new Path(output, "clusters-4/part-r-00000");
+    Path outPart = new Path(output, "clusters-4-final/part-r-00000");
     long count = HadoopUtil.countRecords(outPart, conf);
     assertEquals("count", 3, count);
     outPart = new Path(output, "clusters-0/part-m-00000");
@@ -430,7 +430,7 @@ public final class TestMeanShift extends
         optKey(DefaultOptionCreator.METHOD_OPTION),
         DefaultOptionCreator.SEQUENTIAL_METHOD };
     ToolRunner.run(new Configuration(), new MeanShiftCanopyDriver(), args);
-    Path outPart = new Path(output, "clusters-7/part-r-00000");
+    Path outPart = new Path(output, "clusters-7-final/part-r-00000");
     long count = HadoopUtil.countRecords(outPart, conf);
     assertEquals("count", 3, count);
   }
@@ -470,7 +470,7 @@ public final class TestMeanShift extends
         optKey(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION), "0.2",
         optKey(DefaultOptionCreator.OVERWRITE_OPTION) };
     ToolRunner.run(conf, new MeanShiftCanopyDriver(), args);
-    Path outPart = new Path(output, "clusters-3/part-r-00000");
+    Path outPart = new Path(output, "clusters-3-final/part-r-00000");
     long count = HadoopUtil.countRecords(outPart, conf);
     assertEquals("count", 3, count);
     Iterator<?> iterator = new SequenceFileValueIterator<Writable>(outPart,
@@ -520,7 +520,7 @@ public final class TestMeanShift extends
         optKey(DefaultOptionCreator.METHOD_OPTION),
         DefaultOptionCreator.SEQUENTIAL_METHOD };
     ToolRunner.run(new Configuration(), new MeanShiftCanopyDriver(), args);
-    Path outPart = new Path(output, "clusters-7/part-r-00000");
+    Path outPart = new Path(output, "clusters-7-final/part-r-00000");
     long count = HadoopUtil.countRecords(outPart, conf);
     assertEquals("count", 3, count);
     Iterator<?> iterator = new SequenceFileValueIterator<Writable>(outPart,

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/ClustersFilter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/ClustersFilter.java?rev=1177786&r1=1177785&r2=1177786&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/ClustersFilter.java
(original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/ClustersFilter.java
Fri Sep 30 19:20:47 2011
@@ -19,10 +19,12 @@ package org.apache.mahout.clustering.dis
 
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.PathFilter;
+import org.apache.mahout.clustering.Cluster;
 
 public class ClustersFilter implements PathFilter {
   @Override
   public boolean accept(Path path) {
-    return path.toString().contains("/clusters-");
+    String pathString = path.toString();
+    return pathString.contains("/clusters-") && pathString.endsWith(Cluster.FINAL_ITERATION_SUFFIX);
   }
 }

Added: mahout/trunk/examples/src/test/java/org/apache/mahout/clustering/display/ClustersFilterTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/test/java/org/apache/mahout/clustering/display/ClustersFilterTest.java?rev=1177786&view=auto
==============================================================================
--- mahout/trunk/examples/src/test/java/org/apache/mahout/clustering/display/ClustersFilterTest.java
(added)
+++ mahout/trunk/examples/src/test/java/org/apache/mahout/clustering/display/ClustersFilterTest.java
Fri Sep 30 19:20:47 2011
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.clustering.display;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
+import org.apache.mahout.common.MahoutTestCase;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.IOException;
+
+public class ClustersFilterTest extends MahoutTestCase {
+
+  private Configuration configuration;
+  private Path output;
+
+  @Override
+  @Before
+  public void setUp() throws Exception {
+    super.setUp();
+    configuration = new Configuration();
+    output = getTestTempDirPath();
+  }
+
+  @Test
+  public void testAccept_notFinal() throws Exception {
+    Path path0 = new Path(output, "clusters-0");
+    Path path1 = new Path(output, "clusters-1");
+
+    path0.getFileSystem(configuration).createNewFile(path0);
+    path1.getFileSystem(configuration).createNewFile(path1);
+
+    PathFilter clustersFilter = new ClustersFilter();
+
+    assertFalse(clustersFilter.accept(path0));
+    assertFalse(clustersFilter.accept(path1));
+  }
+
+  @Test
+  public void testAccept_finalPath() throws IOException {
+    Path path0 = new Path(output, "clusters-0");
+    Path path1 = new Path(output, "clusters-1");
+    Path path2 = new Path(output, "clusters-2");
+    Path path3Final = new Path(output, "clusters-3-final");
+
+    path0.getFileSystem(configuration).createNewFile(path0);
+    path1.getFileSystem(configuration).createNewFile(path1);
+    path2.getFileSystem(configuration).createNewFile(path2);
+    path3Final.getFileSystem(configuration).createNewFile(path3Final);
+
+    PathFilter clustersFilter = new ClustersFilter();
+
+    assertFalse(clustersFilter.accept(path0));
+    assertFalse(clustersFilter.accept(path1));
+    assertFalse(clustersFilter.accept(path2));
+    assertTrue(clustersFilter.accept(path3Final));
+  }
+}

Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java?rev=1177786&r1=1177785&r2=1177786&view=diff
==============================================================================
--- mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
(original)
+++ mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
Fri Sep 30 19:20:47 2011
@@ -96,7 +96,7 @@ public final class TestClusterDumper ext
   private List<VectorWritable> sampleData;
   
   private String[] termDictionary;
-  
+
   @Override
   @Before
   public void setUp() throws Exception {
@@ -176,7 +176,7 @@ public final class TestClusterDumper ext
       int maxIterations) throws IOException {
     FileSystem fs = FileSystem.get(conf);
     for (int i = maxIterations; i >= 0; i--) {
-      Path clusters = new Path(output, "clusters-" + i);
+      Path clusters = new Path(output, "clusters-" + i + "-final");
       if (fs.exists(clusters)) {
         return clusters;
       }

Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterEvaluator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterEvaluator.java?rev=1177786&r1=1177785&r2=1177786&view=diff
==============================================================================
--- mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterEvaluator.java
(original)
+++ mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterEvaluator.java
Fri Sep 30 19:20:47 2011
@@ -391,7 +391,7 @@ public final class TestClusterEvaluator 
     MeanShiftCanopyDriver.run(conf, testdata, output, measure, kernelProfile,
         2.1, 1.0, 0.001, 10, false, true, true);
     int numIterations = 10;
-    Path clustersIn = new Path(output, "clusters-7");
+    Path clustersIn = new Path(output, "clusters-7-final");
     RepresentativePointsDriver.run(conf, clustersIn, new Path(output,
         "clusteredPoints"), output, measure, numIterations, true);
     ClusterEvaluator evaluator = new ClusterEvaluator(conf, clustersIn);
@@ -414,7 +414,7 @@ public final class TestClusterEvaluator 
         0, true);
     int numIterations = 10;
     Configuration conf = new Configuration();
-    Path clustersIn = new Path(output, "clusters-5");
+    Path clustersIn = new Path(output, "clusters-5-final");
     RepresentativePointsDriver.run(conf, clustersIn, new Path(output,
         "clusteredPoints"), output, new EuclideanDistanceMeasure(),
         numIterations, true);



Mime
View raw message