commons-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From er...@apache.org
Subject svn commit: r1081744 - in /commons/proper/math/trunk/src: main/java/org/apache/commons/math/stat/clustering/KMeansPlusPlusClusterer.java site/xdoc/changes.xml test/java/org/apache/commons/math/stat/clustering/KMeansPlusPlusClustererTest.java
Date Tue, 15 Mar 2011 12:15:02 GMT
Author: erans
Date: Tue Mar 15 12:15:02 2011
New Revision: 1081744

URL: http://svn.apache.org/viewvc?rev=1081744&view=rev
Log:
MATH-546
Wrong  variable type ("int" instead of "double").

Modified:
    commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/clustering/KMeansPlusPlusClusterer.java
    commons/proper/math/trunk/src/site/xdoc/changes.xml
    commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/clustering/KMeansPlusPlusClustererTest.java

Modified: commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/clustering/KMeansPlusPlusClusterer.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/clustering/KMeansPlusPlusClusterer.java?rev=1081744&r1=1081743&r2=1081744&view=diff
==============================================================================
--- commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/clustering/KMeansPlusPlusClusterer.java
(original)
+++ commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/clustering/KMeansPlusPlusClusterer.java
Tue Mar 15 12:15:02 2011
@@ -172,7 +172,7 @@ public class KMeansPlusPlusClusterer<T e
         while (resultSet.size() < k) {
             // For each data point x, compute D(x), the distance between x and
             // the nearest center that has already been chosen.
-            int sum = 0;
+            double sum = 0;
             for (int i = 0; i < pointSet.size(); i++) {
                 final T p = pointSet.get(i);
                 final Cluster<T> nearest = getNearestCluster(resultSet, p);

Modified: commons/proper/math/trunk/src/site/xdoc/changes.xml
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/site/xdoc/changes.xml?rev=1081744&r1=1081743&r2=1081744&view=diff
==============================================================================
--- commons/proper/math/trunk/src/site/xdoc/changes.xml (original)
+++ commons/proper/math/trunk/src/site/xdoc/changes.xml Tue Mar 15 12:15:02 2011
@@ -52,6 +52,9 @@ The <action> type attribute can be add,u
     If the output is not quite correct, check for invisible trailing spaces!
      -->
     <release version="3.0" date="TBD" description="TBD">
+      <action dev="erans" type="fix" issue="MATH-546" due-to="Nate Paymer">
+        Fixed bug in "KMeansPlusPlusClusterer".
+      </action>
       <action dev="erans" type="update" issue="MATH-542">
         All exceptions defined in Commons Math provide a context and a compound
         message list.

Modified: commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/clustering/KMeansPlusPlusClustererTest.java
URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/clustering/KMeansPlusPlusClustererTest.java?rev=1081744&r1=1081743&r2=1081744&view=diff
==============================================================================
--- commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/clustering/KMeansPlusPlusClustererTest.java
(original)
+++ commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/clustering/KMeansPlusPlusClustererTest.java
Tue Mar 15 12:15:02 2011
@@ -20,7 +20,9 @@ package org.apache.commons.math.stat.clu
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
+import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.List;
 import java.util.Random;
 
@@ -166,4 +168,84 @@ public class KMeansPlusPlusClustererTest
 
     }
 
+    /**
+     * A helper class for testSmallDistances(). This class is similar to EuclideanIntegerPoint,
but
+     * it defines a different distanceFrom() method that tends to return distances less than
1.
+     */
+    private class CloseIntegerPoint implements Clusterable<CloseIntegerPoint> {
+        public CloseIntegerPoint(EuclideanIntegerPoint point) {
+            euclideanPoint = point;
+        }
+
+        public double distanceFrom(CloseIntegerPoint p) {
+            return euclideanPoint.distanceFrom(p.euclideanPoint) * 0.001;
+        }
+
+        public CloseIntegerPoint centroidOf(Collection<CloseIntegerPoint> p) {
+            Collection<EuclideanIntegerPoint> euclideanPoints =
+                new ArrayList<EuclideanIntegerPoint>();
+            for (CloseIntegerPoint point : p) {
+                euclideanPoints.add(point.euclideanPoint);
+            }
+            return new CloseIntegerPoint(euclideanPoint.centroidOf(euclideanPoints));
+        }
+
+        @Override
+        public boolean equals(Object o) {
+            if (!(o instanceof CloseIntegerPoint)) {
+                return false;
+            }
+            CloseIntegerPoint p = (CloseIntegerPoint) o;
+
+            return euclideanPoint.equals(p.euclideanPoint);
+        }
+
+        @Override
+        public int hashCode() {
+            return euclideanPoint.hashCode();
+        }
+
+        private EuclideanIntegerPoint euclideanPoint;
+    }
+
+    /**
+     * Test points that are very close together. See issue MATH-546.
+     */
+    @Test
+    public void testSmallDistances() {
+        // Create a bunch of CloseIntegerPoints. Most are identical, but one is different
by a
+        // small distance.
+        int[] repeatedArray = { 0 };
+        int[] uniqueArray = { 1 };
+        CloseIntegerPoint repeatedPoint =
+            new CloseIntegerPoint(new EuclideanIntegerPoint(repeatedArray));
+        CloseIntegerPoint uniquePoint =
+            new CloseIntegerPoint(new EuclideanIntegerPoint(uniqueArray));
+
+        Collection<CloseIntegerPoint> points = new ArrayList<CloseIntegerPoint>();
+        final int NUM_REPEATED_POINTS = 10 * 1000;
+        for (int i = 0; i < NUM_REPEATED_POINTS; ++i) {
+            points.add(repeatedPoint);
+        }
+        points.add(uniquePoint);
+
+        // Ask a KMeansPlusPlusClusterer to run zero iterations (i.e., to simply choose initial
+        // cluster centers).
+        final long RANDOM_SEED = 0;
+        final int NUM_CLUSTERS = 2;
+        final int NUM_ITERATIONS = 0;
+        KMeansPlusPlusClusterer<CloseIntegerPoint> clusterer =
+            new KMeansPlusPlusClusterer<CloseIntegerPoint>(new Random(RANDOM_SEED));
+        List<Cluster<CloseIntegerPoint>> clusters =
+            clusterer.cluster(points, NUM_CLUSTERS, NUM_ITERATIONS);
+
+        // Check that one of the chosen centers is the unique point.
+        boolean uniquePointIsCenter = false;
+        for (Cluster<CloseIntegerPoint> cluster : clusters) {
+            if (cluster.getCenter().equals(uniquePoint)) {
+                uniquePointIsCenter = true;
+            }
+        }
+        assertTrue(uniquePointIsCenter);
+    }
 }



Mime
View raw message