mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jeast...@apache.org
Subject svn commit: r1090861 - in /mahout/trunk/core/src: main/java/org/apache/mahout/clustering/meanshift/ test/java/org/apache/mahout/clustering/meanshift/
Date Sun, 10 Apr 2011 18:10:51 GMT
Author: jeastman
Date: Sun Apr 10 18:10:50 2011
New Revision: 1090861

URL: http://svn.apache.org/viewvc?rev=1090861&view=rev
Log:
MAHOUT-552: Added static initialCanopy method to create initial canopies with original center
type. Added to unit test. All tests run.

Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyCreatorMapper.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java
    mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java?rev=1090861&r1=1090860&r2=1090861&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java
Sun Apr 10 18:10:50 2011
@@ -55,6 +55,20 @@ public class MeanShiftCanopy extends Clu
     super(point, id, measure);
     boundPoints.add(id);
   }
+  
+  /**
+   * Create an initial Canopy, retaining the original type of the given point (e.g. NamedVector)
+   * @param point a Vector
+   * @param id an int
+   * @param measure a DistanceMeasure
+   * @return a MeanShiftCanopy
+   */
+  public static MeanShiftCanopy initialCanopy(Vector point, int id, DistanceMeasure measure){
+	  MeanShiftCanopy result = new MeanShiftCanopy(point, id, measure);
+	  // overwrite center so original point type is retained 
+	  result.setCenter(point);
+	  return result;
+  }
 
   /**
    * Create a new Canopy containing the given point, id and bound points

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyCreatorMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyCreatorMapper.java?rev=1090861&r1=1090860&r2=1090861&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyCreatorMapper.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyCreatorMapper.java
Sun Apr 10 18:10:50 2011
@@ -38,7 +38,7 @@ public class MeanShiftCanopyCreatorMappe
 
   @Override
   protected void map(WritableComparable<?> key, VectorWritable point, Context context)
throws IOException, InterruptedException {
-    MeanShiftCanopy canopy = new MeanShiftCanopy(point.get(), nextCanopyId++, measure);
+    MeanShiftCanopy canopy = MeanShiftCanopy.initialCanopy(point.get(), nextCanopyId++, measure);
     context.write(new Text(key.toString()), canopy);
   }
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java?rev=1090861&r1=1090860&r2=1090861&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java
Sun Apr 10 18:10:50 2011
@@ -209,7 +209,7 @@ public class MeanShiftCanopyDriver exten
                                                            MeanShiftCanopy.class);
       try {
         for (VectorWritable value : new SequenceFileValueIterable<VectorWritable>(s.getPath(),
conf)) {
-          writer.append(new Text(), new MeanShiftCanopy(value.get(), id++, measure));
+          writer.append(new Text(), MeanShiftCanopy.initialCanopy(value.get(), id++, measure));
         }
       } finally {
         writer.close();

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java?rev=1090861&r1=1090860&r2=1090861&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java
(original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java
Sun Apr 10 18:10:50 2011
@@ -21,6 +21,7 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.HashMap;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 
@@ -28,6 +29,7 @@ import org.apache.hadoop.conf.Configurat
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.hadoop.mapreduce.Reducer;
@@ -39,6 +41,7 @@ import org.apache.mahout.common.MahoutTe
 import org.apache.mahout.common.commandline.DefaultOptionCreator;
 import org.apache.mahout.common.distance.DistanceMeasure;
 import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
+import org.apache.mahout.common.iterator.sequencefile.SequenceFileValueIterator;
 import org.apache.mahout.math.DenseVector;
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
@@ -327,6 +330,13 @@ public final class TestMeanShift extends
     Path outPart = new Path(output, "clusters-3/part-r-00000");
     long count = HadoopUtil.countRecords(outPart, conf);
     assertEquals("count", 3, count);
+    outPart = new Path(output, "clusters-0/part-m-00000");
+	Iterator<?> iterator = new SequenceFileValueIterator<Writable>(outPart, true,
conf);
+	// now test the initial clusters to ensure the type of their centers has been retained
+	while (iterator.hasNext()) {
+	  MeanShiftCanopy canopy = (MeanShiftCanopy) iterator.next();
+	  assertTrue(canopy.getCenter()instanceof DenseVector);
+	}
   }
 
   /**



Mime
View raw message