mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jeast...@apache.org
Subject svn commit: r788456 - in /lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering: canopy/ kmeans/ meanshift/
Date Thu, 25 Jun 2009 18:15:50 GMT
Author: jeastman
Date: Thu Jun 25 18:15:50 2009
New Revision: 788456

URL: http://svn.apache.org/viewvc?rev=788456&view=rev
Log:
- moved MeanShiftCanopy under CanopyBase
- cleaned up obsolete imports
- all tests run (except watchmaker...)

Modified:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansCombiner.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansReducer.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java?rev=788456&r1=788455&r2=788456&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java
Thu Jun 25 18:15:50 2009
@@ -17,20 +17,19 @@
 
 package org.apache.mahout.clustering.canopy;
 
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.List;
+
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.mahout.clustering.ClusterBase;
 import org.apache.mahout.matrix.AbstractVector;
-import org.apache.mahout.matrix.SparseVector;
 import org.apache.mahout.matrix.Vector;
 import org.apache.mahout.utils.DistanceMeasure;
-import org.apache.mahout.clustering.ClusterBase;
-
-import java.io.IOException;
-import java.io.DataOutput;
-import java.io.DataInput;
-import java.util.List;
 
 /**
  * This class models a canopy as a center point, the number of points that are

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyMapper.java?rev=788456&r1=788455&r2=788456&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyMapper.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyMapper.java
Thu Jun 25 18:15:50 2009
@@ -17,6 +17,10 @@
 
 package org.apache.mahout.clustering.canopy;
 
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.mapred.JobConf;
@@ -24,14 +28,8 @@
 import org.apache.hadoop.mapred.Mapper;
 import org.apache.hadoop.mapred.OutputCollector;
 import org.apache.hadoop.mapred.Reporter;
-import org.apache.mahout.matrix.AbstractVector;
-import org.apache.mahout.matrix.SparseVector;
 import org.apache.mahout.matrix.Vector;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
 public class CanopyMapper extends MapReduceBase implements
     Mapper<WritableComparable<?>, Vector, Text, Vector> {
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterMapper.java?rev=788456&r1=788455&r2=788456&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterMapper.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterMapper.java
Thu Jun 25 18:15:50 2009
@@ -17,6 +17,10 @@
 
 package org.apache.mahout.clustering.canopy;
 
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.SequenceFile;
@@ -27,13 +31,7 @@
 import org.apache.hadoop.mapred.Mapper;
 import org.apache.hadoop.mapred.OutputCollector;
 import org.apache.hadoop.mapred.Reporter;
-import org.apache.mahout.matrix.AbstractVector;
 import org.apache.mahout.matrix.Vector;
-import org.apache.mahout.matrix.SparseVector;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
 
 public class ClusterMapper extends MapReduceBase implements
         Mapper<WritableComparable<?>, Vector, Text, Vector> {

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java?rev=788456&r1=788455&r2=788456&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java
Thu Jun 25 18:15:50 2009
@@ -25,12 +25,11 @@
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.mahout.clustering.ClusterBase;
 import org.apache.mahout.matrix.AbstractVector;
-import org.apache.mahout.matrix.SparseVector;
 import org.apache.mahout.matrix.SquareRootFunction;
 import org.apache.mahout.matrix.Vector;
 import org.apache.mahout.utils.DistanceMeasure;
-import org.apache.mahout.clustering.ClusterBase;
 
 public class Cluster extends ClusterBase implements Writable {
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansCombiner.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansCombiner.java?rev=788456&r1=788455&r2=788456&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansCombiner.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansCombiner.java
Thu Jun 25 18:15:50 2009
@@ -25,7 +25,6 @@
 import org.apache.hadoop.mapred.OutputCollector;
 import org.apache.hadoop.mapred.Reducer;
 import org.apache.hadoop.mapred.Reporter;
-import org.apache.mahout.matrix.AbstractVector;
 
 public class KMeansCombiner extends MapReduceBase implements
     Reducer<Text, KMeansInfo, Text, KMeansInfo> {

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansMapper.java?rev=788456&r1=788455&r2=788456&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansMapper.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansMapper.java
Thu Jun 25 18:15:50 2009
@@ -27,7 +27,6 @@
 import org.apache.hadoop.mapred.Mapper;
 import org.apache.hadoop.mapred.OutputCollector;
 import org.apache.hadoop.mapred.Reporter;
-import org.apache.mahout.matrix.AbstractVector;
 import org.apache.mahout.matrix.Vector;
 
 public class KMeansMapper extends MapReduceBase implements

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansReducer.java?rev=788456&r1=788455&r2=788456&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansReducer.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansReducer.java
Thu Jun 25 18:15:50 2009
@@ -29,8 +29,6 @@
 import org.apache.hadoop.mapred.OutputCollector;
 import org.apache.hadoop.mapred.Reducer;
 import org.apache.hadoop.mapred.Reporter;
-import org.apache.mahout.matrix.AbstractVector;
-import org.apache.mahout.matrix.Vector;
 
 public class KMeansReducer extends MapReduceBase implements
     Reducer<Text, KMeansInfo, Text, Cluster> {

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java?rev=788456&r1=788455&r2=788456&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java
Thu Jun 25 18:15:50 2009
@@ -25,10 +25,10 @@
 import java.util.List;
 
 import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.mahout.clustering.ClusterBase;
 import org.apache.mahout.matrix.AbstractVector;
 import org.apache.mahout.matrix.CardinalityException;
 import org.apache.mahout.matrix.DenseVector;
@@ -48,7 +48,7 @@
  * a point total which is the sum of all the points and is used to compute the
  * centroid when needed.
  */
-public class MeanShiftCanopy implements Writable {
+public class MeanShiftCanopy extends ClusterBase {
 
   // keys used by Driver, Mapper, Combiner & Reducer
   public static final String DISTANCE_MEASURE_KEY = "org.apache.mahout.clustering.canopy.measure";
@@ -75,18 +75,7 @@
   // the distance measure
   private static DistanceMeasure measure;
 
-  // this canopy's canopyId
-  private int canopyId;
-
-  // the current center
-  private Vector center = null;
-
-  // the number of points in the canopy
-  private int numPoints = 0;
-
-  // the total of all points added to the canopy
-  private Vector pointTotal = null;
-
+  // TODO: this is problematic, but how else to encode membership?
   private List<Vector> boundPoints = new ArrayList<Vector>();
 
   private boolean converged = false;
@@ -200,7 +189,7 @@
    * @param id
    */
   public MeanShiftCanopy(String id) {
-    this.canopyId = Integer.parseInt(id.substring(1));
+    this.id = Integer.parseInt(id.substring(1));
     this.center = null;
     this.pointTotal = null;
     this.numPoints = 0;
@@ -212,7 +201,7 @@
    * @param point a Vector
    */
   public MeanShiftCanopy(Vector point) {
-    this.canopyId = nextCanopyId++;
+    this.id = nextCanopyId++;
     this.center = point;
     this.pointTotal = point.clone();
     this.numPoints = 1;
@@ -220,16 +209,16 @@
   }
 
   /**
-   * Create a new Canopy containing the given point, canopyId and bound points
+   * Create a new Canopy containing the given point, id and bound points
    * 
    * @param point a Vector
-   * @param canopyId an int identifying the canopy local to this process only
+   * @param id an int identifying the canopy local to this process only
    * @param boundPoints a List<Vector> containing points bound to the canopy
    * @param converged true if the canopy has converged
    */
-  MeanShiftCanopy(Vector point, int canopyId, List<Vector> boundPoints,
+  MeanShiftCanopy(Vector point, int id, List<Vector> boundPoints,
       boolean converged) {
-    this.canopyId = canopyId;
+    this.id = id;
     this.center = point;
     this.pointTotal = point.clone();
     this.numPoints = 1;
@@ -310,7 +299,7 @@
   }
 
   public int getCanopyId() {
-    return canopyId;
+    return id;
   }
 
   /**
@@ -323,7 +312,7 @@
   }
 
   public String getIdentifier() {
-    return converged ? "V" + canopyId : "C" + canopyId;
+    return converged ? "V" + id : "C" + id;
   }
 
   /**
@@ -334,7 +323,7 @@
   }
 
   void init(MeanShiftCanopy canopy) {
-    canopyId = canopy.canopyId;
+    id = canopy.id;
     center = canopy.center;
     addPoints(center, 1);
     boundPoints.addAll(canopy.getBoundPoints());
@@ -356,6 +345,7 @@
 
   /**
    * Shift the center to the new centroid of the cluster
+   * 
    * @return if the cluster is converged
    */
   public boolean shiftToMean() {
@@ -384,7 +374,7 @@
 
   @Override
   public void readFields(DataInput in) throws IOException {
-    this.canopyId = in.readInt();
+    super.readFields(in);
     this.center = AbstractVector.readVector(in);
     int numpoints = in.readInt();
     this.boundPoints = new ArrayList<Vector>();
@@ -394,20 +384,25 @@
 
   @Override
   public void write(DataOutput out) throws IOException {
-    out.writeInt(canopyId);
+    super.write(out);
     AbstractVector.writeVector(out, computeCentroid());
     out.writeInt(boundPoints.size());
     for (Vector v : boundPoints)
       AbstractVector.writeVector(out, v);
   }
-  
-  public MeanShiftCanopy shallowCopy(){
+
+  public MeanShiftCanopy shallowCopy() {
     MeanShiftCanopy result = new MeanShiftCanopy();
-    result.canopyId = this.canopyId;
+    result.id = this.id;
     result.center = this.center;
     result.pointTotal = this.pointTotal;
     result.numPoints = this.numPoints;
     result.boundPoints = this.boundPoints;
     return result;
   }
+
+  @Override
+  public String asFormatString() {
+    return formatCanopy(this);
+  }
 }



Mime
View raw message