commons-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From er...@apache.org
Subject [commons-math] branch master updated: MATH-1525: Make "EmptyClusterStrategy" and related logic in "KMeansPlusPlusClusterer" reusable.
Date Sat, 21 Mar 2020 15:21:36 GMT
This is an automated email from the ASF dual-hosted git repository.

erans pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-math.git


The following commit(s) were added to refs/heads/master by this push:
     new baf8d0a  MATH-1525: Make "EmptyClusterStrategy" and related logic in "KMeansPlusPlusClusterer"
reusable.
     new 22373ae  Merge branch 'MATH-1525__ChenTao'
baf8d0a is described below

commit baf8d0a40468fc844db7aab4b20fd5b3f3e225a4
Author: CT <chentao@qq.com>
AuthorDate: Sat Mar 21 21:11:17 2020 +0800

    MATH-1525: Make "EmptyClusterStrategy" and related logic in "KMeansPlusPlusClusterer"
reusable.
---
 .../ml/clustering/KMeansPlusPlusClusterer.java     | 67 ++++++++++++++--------
 1 file changed, 42 insertions(+), 25 deletions(-)

diff --git a/src/main/java/org/apache/commons/math4/ml/clustering/KMeansPlusPlusClusterer.java
b/src/main/java/org/apache/commons/math4/ml/clustering/KMeansPlusPlusClusterer.java
index bf656ae..41d316f 100644
--- a/src/main/java/org/apache/commons/math4/ml/clustering/KMeansPlusPlusClusterer.java
+++ b/src/main/java/org/apache/commons/math4/ml/clustering/KMeansPlusPlusClusterer.java
@@ -189,6 +189,14 @@ public class KMeansPlusPlusClusterer<T extends Clusterable> extends
Clusterer<T>
     }
 
     /**
+     * Return the CentroidInitializer used by this instance.
+     * @return the CentroidInitializer
+     */
+    CentroidInitializer getCentroidInitializer() {
+        return centroidInitializer;
+    }
+
+    /**
      * Runs the K-means++ clustering algorithm.
      *
      * @param points the points to cluster
@@ -219,36 +227,14 @@ public class KMeansPlusPlusClusterer<T extends Clusterable> extends
Clusterer<T>
         // iterate through updating the centers until we're done
         final int max = (maxIterations < 0) ? Integer.MAX_VALUE : maxIterations;
         for (int count = 0; count < max; count++) {
-            boolean emptyCluster = false;
-            List<CentroidCluster<T>> newClusters = new ArrayList<>();
-            for (final CentroidCluster<T> cluster : clusters) {
-                final Clusterable newCenter;
-                if (cluster.getPoints().isEmpty()) {
-                    switch (emptyStrategy) {
-                        case LARGEST_VARIANCE :
-                            newCenter = getPointFromLargestVarianceCluster(clusters);
-                            break;
-                        case LARGEST_POINTS_NUMBER :
-                            newCenter = getPointFromLargestNumberCluster(clusters);
-                            break;
-                        case FARTHEST_POINT :
-                            newCenter = getFarthestPoint(clusters);
-                            break;
-                        default :
-                            throw new ConvergenceException(LocalizedFormats.EMPTY_CLUSTER_IN_K_MEANS);
-                    }
-                    emptyCluster = true;
-                } else {
-                    newCenter = cluster.centroid();
-                }
-                newClusters.add(new CentroidCluster<T>(newCenter));
-            }
+            boolean hasEmptyCluster = clusters.stream().anyMatch(cluster->cluster.getPoints().isEmpty());
+            List<CentroidCluster<T>> newClusters = adjustClustersCenters(clusters);
             int changes = assignPointsToClusters(newClusters, points, assignments);
             clusters = newClusters;
 
             // if there were no more changes in the point-to-cluster assignment
             // and there are no empty clusters left, return the current clusters
-            if (changes == 0 && !emptyCluster) {
+            if (changes == 0 && !hasEmptyCluster) {
                 return clusters;
             }
         }
@@ -256,6 +242,37 @@ public class KMeansPlusPlusClusterer<T extends Clusterable> extends
Clusterer<T>
     }
 
     /**
+     * Adjust the clusters's centers with means of points
+     * @param clusters the origin clusters
+     * @return adjusted clusters with center points
+     */
+    List<CentroidCluster<T>> adjustClustersCenters(List<CentroidCluster<T>>
clusters) {
+        List<CentroidCluster<T>> newClusters = new ArrayList<>();
+        for (final CentroidCluster<T> cluster : clusters) {
+            final Clusterable newCenter;
+            if (cluster.getPoints().isEmpty()) {
+                switch (emptyStrategy) {
+                    case LARGEST_VARIANCE :
+                        newCenter = getPointFromLargestVarianceCluster(clusters);
+                        break;
+                    case LARGEST_POINTS_NUMBER :
+                        newCenter = getPointFromLargestNumberCluster(clusters);
+                        break;
+                    case FARTHEST_POINT :
+                        newCenter = getFarthestPoint(clusters);
+                        break;
+                    default :
+                        throw new ConvergenceException(LocalizedFormats.EMPTY_CLUSTER_IN_K_MEANS);
+                }
+            } else {
+                newCenter = cluster.centroid();
+            }
+            newClusters.add(new CentroidCluster<>(newCenter));
+        }
+        return newClusters;
+    }
+
+    /**
      * Adds the given points to the closest {@link Cluster}.
      *
      * @param clusters the {@link Cluster}s to add the points to


Mime
View raw message