mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jeast...@apache.org
Subject svn commit: r1131504 [3/3] - in /mahout/trunk: core/src/main/java/org/apache/mahout/clustering/meanshift/ core/src/main/java/org/apache/mahout/common/commandline/ core/src/main/java/org/apache/mahout/common/kernel/ core/src/test/java/org/apache/mahout/...
Date Sat, 04 Jun 2011 23:40:29 GMT
Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java?rev=1131504&r1=1131503&r2=1131504&view=diff
==============================================================================
--- mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java
(original)
+++ mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java
Sat Jun  4 23:40:28 2011
@@ -44,6 +44,8 @@ import org.apache.mahout.clustering.mean
 import org.apache.mahout.common.MahoutTestCase;
 import org.apache.mahout.common.distance.DistanceMeasure;
 import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
+import org.apache.mahout.common.kernel.IKernelProfile;
+import org.apache.mahout.common.kernel.TriangularKernelProfile;
 import org.apache.mahout.math.DenseVector;
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
@@ -53,28 +55,29 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 public final class TestCDbwEvaluator extends MahoutTestCase {
-
-  private static final double[][] REFERENCE = { { 1, 1 }, { 2, 1 }, { 1, 2 }, { 2, 2 }, {
3, 3 }, { 4, 4 }, { 5, 4 }, { 4, 5 },
-      { 5, 5 } };
-
-  private static final Logger log = LoggerFactory.getLogger(TestClusterEvaluator.class);
-
-  private Map<Integer, List<VectorWritable>> representativePoints;
-
+  
+  private static final double[][] REFERENCE = { {1, 1}, {2, 1}, {1, 2}, {2, 2},
+      {3, 3}, {4, 4}, {5, 4}, {4, 5}, {5, 5}};
+  
+  private static final Logger log = LoggerFactory
+      .getLogger(TestClusterEvaluator.class);
+  
+  private Map<Integer,List<VectorWritable>> representativePoints;
+  
   private List<Cluster> clusters;
-
+  
   private Configuration conf;
-
+  
   private FileSystem fs;
-
+  
   private final Collection<VectorWritable> sampleData = new ArrayList<VectorWritable>();
-
+  
   private List<VectorWritable> referenceData = new ArrayList<VectorWritable>();
-
+  
   private Path testdata;
-
+  
   private Path output;
-
+  
   @Override
   @Before
   public void setUp() throws Exception {
@@ -88,31 +91,44 @@ public final class TestCDbwEvaluator ext
     // generate larger test data set for the clustering tests to chew on
     generateSamples();
   }
-
+  
   /**
-   * Initialize synthetic data using 4 clusters dC units from origin having 4 representative
points dP from each center
-   * @param dC a double cluster center offset
-   * @param dP a double representative point offset
-   * @param measure the DistanceMeasure
+   * Initialize synthetic data using 4 clusters dC units from origin having 4
+   * representative points dP from each center
+   * 
+   * @param dC
+   *          a double cluster center offset
+   * @param dP
+   *          a double representative point offset
+   * @param measure
+   *          the DistanceMeasure
    */
   private void initData(double dC, double dP, DistanceMeasure measure) {
     clusters = new ArrayList<Cluster>();
-    clusters.add(new Canopy(new DenseVector(new double[] { -dC, -dC }), 1, measure));
-    clusters.add(new Canopy(new DenseVector(new double[] { -dC, dC }), 3, measure));
-    clusters.add(new Canopy(new DenseVector(new double[] { dC, dC }), 5, measure));
-    clusters.add(new Canopy(new DenseVector(new double[] { dC, -dC }), 7, measure));
-    representativePoints = new HashMap<Integer, List<VectorWritable>>();
+    clusters.add(new Canopy(new DenseVector(new double[] {-dC, -dC}), 1,
+        measure));
+    clusters
+        .add(new Canopy(new DenseVector(new double[] {-dC, dC}), 3, measure));
+    clusters
+        .add(new Canopy(new DenseVector(new double[] {dC, dC}), 5, measure));
+    clusters
+        .add(new Canopy(new DenseVector(new double[] {dC, -dC}), 7, measure));
+    representativePoints = new HashMap<Integer,List<VectorWritable>>();
     for (Cluster cluster : clusters) {
       List<VectorWritable> points = new ArrayList<VectorWritable>();
       representativePoints.put(cluster.getId(), points);
       points.add(new VectorWritable(cluster.getCenter().clone()));
-      points.add(new VectorWritable(cluster.getCenter().plus(new DenseVector(new double[]
{ dP, dP }))));
-      points.add(new VectorWritable(cluster.getCenter().plus(new DenseVector(new double[]
{ dP, -dP }))));
-      points.add(new VectorWritable(cluster.getCenter().plus(new DenseVector(new double[]
{ -dP, -dP }))));
-      points.add(new VectorWritable(cluster.getCenter().plus(new DenseVector(new double[]
{ -dP, dP }))));
+      points.add(new VectorWritable(cluster.getCenter().plus(
+          new DenseVector(new double[] {dP, dP}))));
+      points.add(new VectorWritable(cluster.getCenter().plus(
+          new DenseVector(new double[] {dP, -dP}))));
+      points.add(new VectorWritable(cluster.getCenter().plus(
+          new DenseVector(new double[] {-dP, -dP}))));
+      points.add(new VectorWritable(cluster.getCenter().plus(
+          new DenseVector(new double[] {-dP, dP}))));
     }
   }
-
+  
   /**
    * Generate random samples and add them to the sampleData
    * 
@@ -124,241 +140,307 @@ public final class TestCDbwEvaluator ext
    *          double y-value of the sample mean
    * @param sd
    *          double standard deviation of the samples
-   * @throws Exception 
+   * @throws Exception
    */
   private void generateSamples(int num, double mx, double my, double sd) {
-    log.info("Generating {} samples m=[{}, {}] sd={}", new Object[] { num, mx, my, sd });
+    log.info("Generating {} samples m=[{}, {}] sd={}", new Object[] {num, mx,
+        my, sd});
     for (int i = 0; i < num; i++) {
-      sampleData.add(new VectorWritable(new DenseVector(new double[] { UncommonDistributions.rNorm(mx,
sd),
-          UncommonDistributions.rNorm(my, sd) })));
+      sampleData.add(new VectorWritable(new DenseVector(new double[] {
+          UncommonDistributions.rNorm(mx, sd),
+          UncommonDistributions.rNorm(my, sd)})));
     }
   }
-
+  
   private void generateSamples() {
     generateSamples(500, 1, 1, 3);
     generateSamples(300, 1, 0, 0.5);
     generateSamples(300, 0, 2, 0.1);
   }
-
+  
   @Test
   public void testCDbw0() throws IOException {
-    ClusteringTestUtils.writePointsToFile(referenceData, getTestTempFilePath("testdata/file1"),
fs, conf);
+    ClusteringTestUtils.writePointsToFile(referenceData,
+        getTestTempFilePath("testdata/file1"), fs, conf);
     DistanceMeasure measure = new EuclideanDistanceMeasure();
     initData(1, 0.25, measure);
-    CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters, measure);
-    assertEquals("inter cluster density", 0.0, evaluator.interClusterDensity(), EPSILON);
-    assertEquals("separation", 20.485281374238568, evaluator.separation(), EPSILON);
-    assertEquals("intra cluster density", 0.8, evaluator.intraClusterDensity(), EPSILON);
+    CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters,
+        measure);
+    assertEquals("inter cluster density", 0.0, evaluator.interClusterDensity(),
+        EPSILON);
+    assertEquals("separation", 20.485281374238568, evaluator.separation(),
+        EPSILON);
+    assertEquals("intra cluster density", 0.8, evaluator.intraClusterDensity(),
+        EPSILON);
     assertEquals("CDbw", 16.388225099390855, evaluator.getCDbw(), EPSILON);
   }
-
+  
   @Test
   public void testCDbw1() throws IOException {
-    ClusteringTestUtils.writePointsToFile(referenceData, getTestTempFilePath("testdata/file1"),
fs, conf);
+    ClusteringTestUtils.writePointsToFile(referenceData,
+        getTestTempFilePath("testdata/file1"), fs, conf);
     DistanceMeasure measure = new EuclideanDistanceMeasure();
     initData(1, 0.5, measure);
-    CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters, measure);
-    assertEquals("inter cluster density", 1.2, evaluator.interClusterDensity(), EPSILON);
-    assertEquals("separation", 6.207661022496537, evaluator.separation(), EPSILON);
-    assertEquals("intra cluster density", 0.4, evaluator.intraClusterDensity(), EPSILON);
+    CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters,
+        measure);
+    assertEquals("inter cluster density", 1.2, evaluator.interClusterDensity(),
+        EPSILON);
+    assertEquals("separation", 6.207661022496537, evaluator.separation(),
+        EPSILON);
+    assertEquals("intra cluster density", 0.4, evaluator.intraClusterDensity(),
+        EPSILON);
     assertEquals("CDbw", 2.483064408998615, evaluator.getCDbw(), EPSILON);
   }
-
+  
   @Test
   public void testCDbw2() throws IOException {
-    ClusteringTestUtils.writePointsToFile(referenceData, getTestTempFilePath("testdata/file1"),
fs, conf);
+    ClusteringTestUtils.writePointsToFile(referenceData,
+        getTestTempFilePath("testdata/file1"), fs, conf);
     DistanceMeasure measure = new EuclideanDistanceMeasure();
     initData(1, 0.75, measure);
-    CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters, measure);
-    assertEquals("inter cluster density", 0.682842712474619, evaluator.interClusterDensity(),
EPSILON);
-    assertEquals("separation", 4.0576740025245694, evaluator.separation(), EPSILON);
-    assertEquals("intra cluster density", 0.26666666666666666, evaluator.intraClusterDensity(),
EPSILON);
+    CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters,
+        measure);
+    assertEquals("inter cluster density", 0.682842712474619,
+        evaluator.interClusterDensity(), EPSILON);
+    assertEquals("separation", 4.0576740025245694, evaluator.separation(),
+        EPSILON);
+    assertEquals("intra cluster density", 0.26666666666666666,
+        evaluator.intraClusterDensity(), EPSILON);
     assertEquals("CDbw", 1.0820464006732184, evaluator.getCDbw(), EPSILON);
   }
-
+  
   @Test
   public void testEmptyCluster() throws IOException {
-    ClusteringTestUtils.writePointsToFile(referenceData, getTestTempFilePath("testdata/file1"),
fs, conf);
+    ClusteringTestUtils.writePointsToFile(referenceData,
+        getTestTempFilePath("testdata/file1"), fs, conf);
     DistanceMeasure measure = new EuclideanDistanceMeasure();
     initData(1, 0.25, measure);
-    Canopy cluster = new Canopy(new DenseVector(new double[] { 10, 10 }), 19, measure);
+    Canopy cluster = new Canopy(new DenseVector(new double[] {10, 10}), 19,
+        measure);
     clusters.add(cluster);
     List<VectorWritable> points = new ArrayList<VectorWritable>();
     representativePoints.put(cluster.getId(), points);
-    CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters, measure);
-    assertEquals("inter cluster density", 0.0, evaluator.interClusterDensity(), EPSILON);
-    assertEquals("separation", 20.485281374238568, evaluator.separation(), EPSILON);
-    assertEquals("intra cluster density", 0.8, evaluator.intraClusterDensity(), EPSILON);
+    CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters,
+        measure);
+    assertEquals("inter cluster density", 0.0, evaluator.interClusterDensity(),
+        EPSILON);
+    assertEquals("separation", 20.485281374238568, evaluator.separation(),
+        EPSILON);
+    assertEquals("intra cluster density", 0.8, evaluator.intraClusterDensity(),
+        EPSILON);
     assertEquals("CDbw", 16.388225099390855, evaluator.getCDbw(), EPSILON);
   }
-
+  
   @Test
   public void testSingleValueCluster() throws IOException {
-    ClusteringTestUtils.writePointsToFile(referenceData, getTestTempFilePath("testdata/file1"),
fs, conf);
+    ClusteringTestUtils.writePointsToFile(referenceData,
+        getTestTempFilePath("testdata/file1"), fs, conf);
     DistanceMeasure measure = new EuclideanDistanceMeasure();
     initData(1, 0.25, measure);
-    Canopy cluster = new Canopy(new DenseVector(new double[] { 0, 0 }), 19, measure);
+    Canopy cluster = new Canopy(new DenseVector(new double[] {0, 0}), 19,
+        measure);
     clusters.add(cluster);
     List<VectorWritable> points = new ArrayList<VectorWritable>();
-    points.add(new VectorWritable(cluster.getCenter().plus(new DenseVector(new double[] {
1, 1 }))));
+    points.add(new VectorWritable(cluster.getCenter().plus(
+        new DenseVector(new double[] {1, 1}))));
     representativePoints.put(cluster.getId(), points);
-    CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters, measure);
-    assertEquals("inter cluster density", 0.0, evaluator.interClusterDensity(), EPSILON);
-    assertEquals("separation", 20.485281374238568, evaluator.separation(), EPSILON);
-    assertEquals("intra cluster density", 0.8, evaluator.intraClusterDensity(), EPSILON);
+    CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters,
+        measure);
+    assertEquals("inter cluster density", 0.0, evaluator.interClusterDensity(),
+        EPSILON);
+    assertEquals("separation", 20.485281374238568, evaluator.separation(),
+        EPSILON);
+    assertEquals("intra cluster density", 0.8, evaluator.intraClusterDensity(),
+        EPSILON);
     assertEquals("CDbw", 16.388225099390855, evaluator.getCDbw(), EPSILON);
   }
-
+  
   /**
-   * Representative points extraction will duplicate the cluster center if the cluster has
no 
-   * assigned points. These clusters should be ignored like empty clusters above
-   * @throws IOException 
+   * Representative points extraction will duplicate the cluster center if the
+   * cluster has no assigned points. These clusters should be ignored like empty
+   * clusters above
+   * 
+   * @throws IOException
    */
   @Test
   public void testAllSameValueCluster() throws IOException {
-    ClusteringTestUtils.writePointsToFile(referenceData, getTestTempFilePath("testdata/file1"),
fs, conf);
+    ClusteringTestUtils.writePointsToFile(referenceData,
+        getTestTempFilePath("testdata/file1"), fs, conf);
     DistanceMeasure measure = new EuclideanDistanceMeasure();
     initData(1, 0.25, measure);
-    Canopy cluster = new Canopy(new DenseVector(new double[] { 0, 0 }), 19, measure);
+    Canopy cluster = new Canopy(new DenseVector(new double[] {0, 0}), 19,
+        measure);
     clusters.add(cluster);
     List<VectorWritable> points = new ArrayList<VectorWritable>();
     points.add(new VectorWritable(cluster.getCenter()));
     points.add(new VectorWritable(cluster.getCenter()));
     points.add(new VectorWritable(cluster.getCenter()));
     representativePoints.put(cluster.getId(), points);
-    CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters, measure);
-    assertEquals("inter cluster density", 0.0, evaluator.interClusterDensity(), EPSILON);
-    assertEquals("separation", 20.485281374238568, evaluator.separation(), EPSILON);
-    assertEquals("intra cluster density", 0.8, evaluator.intraClusterDensity(), EPSILON);
+    CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters,
+        measure);
+    assertEquals("inter cluster density", 0.0, evaluator.interClusterDensity(),
+        EPSILON);
+    assertEquals("separation", 20.485281374238568, evaluator.separation(),
+        EPSILON);
+    assertEquals("intra cluster density", 0.8, evaluator.intraClusterDensity(),
+        EPSILON);
     assertEquals("CDbw", 16.388225099390855, evaluator.getCDbw(), EPSILON);
   }
-
+  
   /**
-   * Clustering can produce very, very tight clusters that can cause the std calculation
to fail.
-   * These clusters should be processed correctly.
-   * @throws IOException 
+   * Clustering can produce very, very tight clusters that can cause the std
+   * calculation to fail. These clusters should be processed correctly.
+   * 
+   * @throws IOException
    */
   @Test
   public void testAlmostSameValueCluster() throws IOException {
-    ClusteringTestUtils.writePointsToFile(referenceData, getTestTempFilePath("testdata/file1"),
fs, conf);
+    ClusteringTestUtils.writePointsToFile(referenceData,
+        getTestTempFilePath("testdata/file1"), fs, conf);
     DistanceMeasure measure = new EuclideanDistanceMeasure();
     initData(1, 0.25, measure);
-    Canopy cluster = new Canopy(new DenseVector(new double[] { 0, 0 }), 19, measure);
+    Canopy cluster = new Canopy(new DenseVector(new double[] {0, 0}), 19,
+        measure);
     clusters.add(cluster);
     List<VectorWritable> points = new ArrayList<VectorWritable>();
-    Vector delta = new DenseVector(new double[] { 0, Double.MIN_NORMAL });
+    Vector delta = new DenseVector(new double[] {0, Double.MIN_NORMAL});
     points.add(new VectorWritable(delta.clone()));
     points.add(new VectorWritable(delta.clone()));
     points.add(new VectorWritable(delta.clone()));
     points.add(new VectorWritable(delta.clone()));
     points.add(new VectorWritable(delta.clone()));
     representativePoints.put(cluster.getId(), points);
-    CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters, measure);
-    assertEquals("inter cluster density", 0.0, evaluator.interClusterDensity(), EPSILON);
-    assertEquals("separation", 28.970562748477143, evaluator.separation(), EPSILON);
-    assertEquals("intra cluster density", 1.8, evaluator.intraClusterDensity(), EPSILON);
+    CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters,
+        measure);
+    assertEquals("inter cluster density", 0.0, evaluator.interClusterDensity(),
+        EPSILON);
+    assertEquals("separation", 28.970562748477143, evaluator.separation(),
+        EPSILON);
+    assertEquals("intra cluster density", 1.8, evaluator.intraClusterDensity(),
+        EPSILON);
     assertEquals("CDbw", 52.147012947258865, evaluator.getCDbw(), EPSILON);
   }
-
+  
   @Test
   public void testCanopy() throws Exception {
-    ClusteringTestUtils.writePointsToFile(sampleData, getTestTempFilePath("testdata/file1"),
fs, conf);
+    ClusteringTestUtils.writePointsToFile(sampleData,
+        getTestTempFilePath("testdata/file1"), fs, conf);
     DistanceMeasure measure = new EuclideanDistanceMeasure();
-    CanopyDriver.run(new Configuration(), testdata, output, measure, 3.1, 2.1, true, true);
+    CanopyDriver.run(new Configuration(), testdata, output, measure, 3.1, 2.1,
+        true, true);
     int numIterations = 10;
     Path clustersIn = new Path(output, "clusters-0");
-    RepresentativePointsDriver.run(conf, clustersIn, new Path(output, "clusteredPoints"),
output, measure, numIterations, true);
+    RepresentativePointsDriver.run(conf, clustersIn, new Path(output,
+        "clusteredPoints"), output, measure, numIterations, true);
     CDbwEvaluator evaluator = new CDbwEvaluator(conf, clustersIn);
-    //printRepPoints(numIterations);
+    // printRepPoints(numIterations);
     // now print out the Results
     System.out.println("Canopy CDbw = " + evaluator.getCDbw());
-    System.out.println("Intra-cluster density = " + evaluator.intraClusterDensity());
-    System.out.println("Inter-cluster density = " + evaluator.interClusterDensity());
+    System.out.println("Intra-cluster density = "
+        + evaluator.intraClusterDensity());
+    System.out.println("Inter-cluster density = "
+        + evaluator.interClusterDensity());
     System.out.println("Separation = " + evaluator.separation());
   }
-
+  
   @Test
   public void testKmeans() throws Exception {
-    ClusteringTestUtils.writePointsToFile(sampleData, getTestTempFilePath("testdata/file1"),
fs, conf);
+    ClusteringTestUtils.writePointsToFile(sampleData,
+        getTestTempFilePath("testdata/file1"), fs, conf);
     DistanceMeasure measure = new EuclideanDistanceMeasure();
     // now run the Canopy job to prime kMeans canopies
-    CanopyDriver.run(new Configuration(), testdata, output, measure, 3.1, 2.1, false, true);
+    CanopyDriver.run(new Configuration(), testdata, output, measure, 3.1, 2.1,
+        false, true);
     // now run the KMeans job
-    KMeansDriver.run(testdata, new Path(output, "clusters-0"), output, measure, 0.001, 10,
true, true);
+    KMeansDriver.run(testdata, new Path(output, "clusters-0"), output, measure,
+        0.001, 10, true, true);
     int numIterations = 10;
     Path clustersIn = new Path(output, "clusters-2");
-    RepresentativePointsDriver.run(conf, clustersIn, new Path(output, "clusteredPoints"),
output, measure, numIterations, true);
+    RepresentativePointsDriver.run(conf, clustersIn, new Path(output,
+        "clusteredPoints"), output, measure, numIterations, true);
     CDbwEvaluator evaluator = new CDbwEvaluator(conf, clustersIn);
-    //printRepPoints(numIterations);
+    // printRepPoints(numIterations);
     // now print out the Results
     System.out.println("K-Means CDbw = " + evaluator.getCDbw());
-    System.out.println("Intra-cluster density = " + evaluator.intraClusterDensity());
-    System.out.println("Inter-cluster density = " + evaluator.interClusterDensity());
+    System.out.println("Intra-cluster density = "
+        + evaluator.intraClusterDensity());
+    System.out.println("Inter-cluster density = "
+        + evaluator.interClusterDensity());
     System.out.println("Separation = " + evaluator.separation());
   }
-
+  
   @Test
   public void testFuzzyKmeans() throws Exception {
-    ClusteringTestUtils.writePointsToFile(sampleData, getTestTempFilePath("testdata/file1"),
fs, conf);
+    ClusteringTestUtils.writePointsToFile(sampleData,
+        getTestTempFilePath("testdata/file1"), fs, conf);
     DistanceMeasure measure = new EuclideanDistanceMeasure();
     // now run the Canopy job to prime kMeans canopies
-    CanopyDriver.run(new Configuration(), testdata, output, measure, 3.1, 2.1, false, true);
+    CanopyDriver.run(new Configuration(), testdata, output, measure, 3.1, 2.1,
+        false, true);
     // now run the KMeans job
-    FuzzyKMeansDriver.run(testdata, new Path(output, "clusters-0"), output, measure, 0.001,
10, 2, true, true, 0, true);
+    FuzzyKMeansDriver.run(testdata, new Path(output, "clusters-0"), output,
+        measure, 0.001, 10, 2, true, true, 0, true);
     int numIterations = 10;
     Path clustersIn = new Path(output, "clusters-4");
-    RepresentativePointsDriver.run(conf, clustersIn, new Path(output, "clusteredPoints"),
output, measure, numIterations, true);
+    RepresentativePointsDriver.run(conf, clustersIn, new Path(output,
+        "clusteredPoints"), output, measure, numIterations, true);
     CDbwEvaluator evaluator = new CDbwEvaluator(conf, clustersIn);
-    //printRepPoints(numIterations);
+    // printRepPoints(numIterations);
     // now print out the Results
     System.out.println("Fuzzy K-Means CDbw = " + evaluator.getCDbw());
-    System.out.println("Intra-cluster density = " + evaluator.intraClusterDensity());
-    System.out.println("Inter-cluster density = " + evaluator.interClusterDensity());
+    System.out.println("Intra-cluster density = "
+        + evaluator.intraClusterDensity());
+    System.out.println("Inter-cluster density = "
+        + evaluator.interClusterDensity());
     System.out.println("Separation = " + evaluator.separation());
   }
-
+  
   @Test
   public void testMeanShift() throws Exception {
-    ClusteringTestUtils.writePointsToFile(sampleData, getTestTempFilePath("testdata/file1"),
fs, conf);
+    ClusteringTestUtils.writePointsToFile(sampleData,
+        getTestTempFilePath("testdata/file1"), fs, conf);
     DistanceMeasure measure = new EuclideanDistanceMeasure();
-    new MeanShiftCanopyDriver().run(conf, testdata, output, measure, 2.1, 1.0, 0.001, 10,
false, true, true);
+    IKernelProfile kernelProfile = new TriangularKernelProfile();
+    MeanShiftCanopyDriver.run(conf, testdata, output, measure, kernelProfile,
+        2.1, 1.0, 0.001, 10, false, true, true);
     int numIterations = 10;
     Path clustersIn = new Path(output, "clusters-2");
-    RepresentativePointsDriver.run(conf, clustersIn, new Path(output, "clusteredPoints"),
output, measure, numIterations, true);
+    RepresentativePointsDriver.run(conf, clustersIn, new Path(output,
+        "clusteredPoints"), output, measure, numIterations, true);
     CDbwEvaluator evaluator = new CDbwEvaluator(conf, clustersIn);
-    //printRepPoints(numIterations);
+    // printRepPoints(numIterations);
     // now print out the Results
     System.out.println("Mean Shift CDbw = " + evaluator.getCDbw());
-    System.out.println("Intra-cluster density = " + evaluator.intraClusterDensity());
-    System.out.println("Inter-cluster density = " + evaluator.interClusterDensity());
+    System.out.println("Intra-cluster density = "
+        + evaluator.intraClusterDensity());
+    System.out.println("Inter-cluster density = "
+        + evaluator.interClusterDensity());
     System.out.println("Separation = " + evaluator.separation());
   }
-
+  
   @Test
   public void testDirichlet() throws Exception {
-    ClusteringTestUtils.writePointsToFile(sampleData, getTestTempFilePath("testdata/file1"),
fs, conf);
-    DistributionDescription description =
-        new DistributionDescription(GaussianClusterDistribution.class.getName(),
-                                    DenseVector.class.getName(),
-                                    null,
-                                    2);
-    DirichletDriver.run(testdata, output, description, 15, 5, 1.0, true, true, 0, true);
+    ClusteringTestUtils.writePointsToFile(sampleData,
+        getTestTempFilePath("testdata/file1"), fs, conf);
+    DistributionDescription description = new DistributionDescription(
+        GaussianClusterDistribution.class.getName(),
+        DenseVector.class.getName(), null, 2);
+    DirichletDriver.run(testdata, output, description, 15, 5, 1.0, true, true,
+        0, true);
     int numIterations = 10;
     Path clustersIn = new Path(output, "clusters-0");
-    RepresentativePointsDriver.run(conf,
-                                   clustersIn,
-                                   new Path(output, "clusteredPoints"),
-                                   output,
-                                   new EuclideanDistanceMeasure(),
-                                   numIterations,
-                                   true);
+    RepresentativePointsDriver.run(conf, clustersIn, new Path(output,
+        "clusteredPoints"), output, new EuclideanDistanceMeasure(),
+        numIterations, true);
     CDbwEvaluator evaluator = new CDbwEvaluator(conf, clustersIn);
-    //printRepPoints(numIterations);
+    // printRepPoints(numIterations);
     // now print out the Results
     System.out.println("Dirichlet CDbw = " + evaluator.getCDbw());
-    System.out.println("Intra-cluster density = " + evaluator.intraClusterDensity());
-    System.out.println("Inter-cluster density = " + evaluator.interClusterDensity());
+    System.out.println("Intra-cluster density = "
+        + evaluator.intraClusterDensity());
+    System.out.println("Inter-cluster density = "
+        + evaluator.interClusterDensity());
     System.out.println("Separation = " + evaluator.separation());
   }
-
+  
 }



Mime
View raw message