ignite-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ch...@apache.org
Subject [1/3] ignite git commit: IGNITE-8169: [ML] Adopt KMeans to the new Partitioned Dataset and cleanup old code
Date Mon, 16 Apr 2018 17:20:58 GMT
Repository: ignite
Updated Branches:
  refs/heads/master 228254ae3 -> 9e21cec02


http://git-wip-us.apache.org/repos/asf/ignite/blob/9e21cec0/modules/ml/src/test/java/org/apache/ignite/ml/clustering/KMeansDistributedClustererTestSingleNode.java
----------------------------------------------------------------------
diff --git a/modules/ml/src/test/java/org/apache/ignite/ml/clustering/KMeansDistributedClustererTestSingleNode.java
b/modules/ml/src/test/java/org/apache/ignite/ml/clustering/KMeansDistributedClustererTestSingleNode.java
deleted file mode 100644
index 705db7a..0000000
--- a/modules/ml/src/test/java/org/apache/ignite/ml/clustering/KMeansDistributedClustererTestSingleNode.java
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.ignite.ml.clustering;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-import java.util.stream.Collectors;
-import java.util.stream.IntStream;
-import org.apache.ignite.Ignite;
-import org.apache.ignite.internal.util.IgniteUtils;
-import org.apache.ignite.ml.math.StorageConstants;
-import org.apache.ignite.ml.math.Vector;
-import org.apache.ignite.ml.math.VectorUtils;
-import org.apache.ignite.ml.math.distances.DistanceMeasure;
-import org.apache.ignite.ml.math.distances.EuclideanDistance;
-import org.apache.ignite.ml.math.functions.Functions;
-import org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix;
-import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
-import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest;
-import org.junit.Assert;
-
-import static org.apache.ignite.ml.clustering.KMeansUtil.checkIsInEpsilonNeighbourhood;
-
-/**
- * This test checks logic of clustering (checks for clusters structures).
- */
-public class KMeansDistributedClustererTestSingleNode extends GridCommonAbstractTest {
-    /**
-     * Number of nodes in grid. We should use 1 in this test because otherwise algorithm
will be unstable
-     * (We cannot guarantee the order in which results are returned from each node).
-     */
-    private static final int NODE_COUNT = 1;
-
-    /** Grid instance. */
-    private Ignite ignite;
-
-    /**
-     * Default constructor.
-     */
-    public KMeansDistributedClustererTestSingleNode() {
-        super(false);
-    }
-
-    /**
-     * {@inheritDoc}
-     */
-    @Override protected void beforeTest() throws Exception {
-        ignite = grid(NODE_COUNT);
-    }
-
-    /** {@inheritDoc} */
-    @Override protected void beforeTestsStarted() throws Exception {
-        for (int i = 1; i <= NODE_COUNT; i++)
-            startGrid(i);
-    }
-
-    /** {@inheritDoc} */
-    @Override protected void afterTestsStopped() throws Exception {
-        stopAllGrids();
-    }
-
-    /** */
-    public void testPerformClusterAnalysisDegenerate() {
-        IgniteUtils.setCurrentIgniteName(ignite.configuration().getIgniteInstanceName());
-
-        KMeansDistributedClusterer clusterer = new KMeansDistributedClusterer(new EuclideanDistance(),
1, 1, 1L);
-
-        double[] v1 = new double[] {1959, 325100};
-        double[] v2 = new double[] {1960, 373200};
-
-        SparseDistributedMatrix points = new SparseDistributedMatrix(2, 2, StorageConstants.ROW_STORAGE_MODE,
-            StorageConstants.RANDOM_ACCESS_MODE);
-
-        points.setRow(0, v1);
-        points.setRow(1, v2);
-
-        KMeansModel mdl = clusterer.cluster(points, 1);
-
-        Assert.assertEquals(1, mdl.centers().length);
-        Assert.assertEquals(2, mdl.centers()[0].size());
-    }
-
-    /** */
-    public void testClusterizationOnDatasetWithObviousStructure() throws IOException {
-        IgniteUtils.setCurrentIgniteName(ignite.configuration().getIgniteInstanceName());
-
-        int ptsCnt = 10000;
-        int squareSideLen = 10000;
-
-        Random rnd = new Random(123456L);
-
-        // Let centers be in the vertices of square.
-        Map<Integer, Vector> centers = new HashMap<>();
-        centers.put(100, new DenseLocalOnHeapVector(new double[] {0.0, 0.0}));
-        centers.put(900, new DenseLocalOnHeapVector(new double[] {squareSideLen, 0.0}));
-        centers.put(3000, new DenseLocalOnHeapVector(new double[] {0.0, squareSideLen}));
-        centers.put(6000, new DenseLocalOnHeapVector(new double[] {squareSideLen, squareSideLen}));
-
-        int centersCnt = centers.size();
-
-        SparseDistributedMatrix points = new SparseDistributedMatrix(ptsCnt, 2, StorageConstants.ROW_STORAGE_MODE,
-            StorageConstants.RANDOM_ACCESS_MODE);
-
-        List<Integer> permutation = IntStream.range(0, ptsCnt).boxed().collect(Collectors.toList());
-        Collections.shuffle(permutation, rnd);
-
-        Vector[] mc = new Vector[centersCnt];
-        Arrays.fill(mc, VectorUtils.zeroes(2));
-
-        int centIdx = 0;
-        int totalCnt = 0;
-
-        List<Vector> massCenters = new ArrayList<>();
-
-        for (Integer count : centers.keySet()) {
-            for (int i = 0; i < count; i++) {
-                Vector pnt = new DenseLocalOnHeapVector(2).assign(centers.get(count));
-                // Perturbate point on random value.
-                pnt.map(val -> val + rnd.nextDouble() * squareSideLen / 100);
-                mc[centIdx] = mc[centIdx].plus(pnt);
-                points.assignRow(permutation.get(totalCnt), pnt);
-                totalCnt++;
-            }
-            massCenters.add(mc[centIdx].times(1 / (double)count));
-            centIdx++;
-        }
-
-        EuclideanDistance dist = new EuclideanDistance();
-        OrderedNodesComparator comp = new OrderedNodesComparator(centers.values().toArray(new
Vector[] {}), dist);
-
-        massCenters.sort(comp);
-        KMeansDistributedClusterer clusterer = new KMeansDistributedClusterer(dist, 3, 100,
1L);
-
-        KMeansModel mdl = clusterer.cluster(points, 4);
-        Vector[] resCenters = mdl.centers();
-        Arrays.sort(resCenters, comp);
-
-        checkIsInEpsilonNeighbourhood(resCenters, massCenters.toArray(new Vector[] {}), 30.0);
-
-        points.destroy();
-    }
-
-    /** */
-    private static class OrderedNodesComparator implements Comparator<Vector> {
-        /** */
-        private final DistanceMeasure measure;
-
-        /** */
-        List<Vector> orderedNodes;
-
-        /** */
-        OrderedNodesComparator(Vector[] orderedNodes, DistanceMeasure measure) {
-            this.orderedNodes = Arrays.asList(orderedNodes);
-            this.measure = measure;
-        }
-
-        /** */
-        private int findClosestNodeIndex(Vector v) {
-            return Functions.argmin(orderedNodes, v1 -> measure.compute(v1, v)).get1();
-        }
-
-        /** */
-        @Override public int compare(Vector v1, Vector v2) {
-            int ind1 = findClosestNodeIndex(v1);
-            int ind2 = findClosestNodeIndex(v2);
-
-            int signum = (int)Math.signum(ind1 - ind2);
-
-            if (signum != 0)
-                return signum;
-
-            return (int)Math.signum(orderedNodes.get(ind1).minus(v1).kNorm(2) -
-                orderedNodes.get(ind2).minus(v2).kNorm(2));
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/ignite/blob/9e21cec0/modules/ml/src/test/java/org/apache/ignite/ml/clustering/KMeansLocalClustererTest.java
----------------------------------------------------------------------
diff --git a/modules/ml/src/test/java/org/apache/ignite/ml/clustering/KMeansLocalClustererTest.java
b/modules/ml/src/test/java/org/apache/ignite/ml/clustering/KMeansLocalClustererTest.java
deleted file mode 100644
index cd9b2ed..0000000
--- a/modules/ml/src/test/java/org/apache/ignite/ml/clustering/KMeansLocalClustererTest.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.ignite.ml.clustering;
-
-import org.apache.ignite.ml.math.distances.EuclideanDistance;
-import org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix;
-import org.junit.Assert;
-import org.junit.Test;
-
-/** */
-public class KMeansLocalClustererTest {
-    /**
-     * Two points, one cluster, one iteration
-     */
-    @Test
-    public void testPerformClusterAnalysisDegenerate() {
-        KMeansLocalClusterer clusterer = new KMeansLocalClusterer(new EuclideanDistance(),
1, 1L);
-
-        double[] v1 = new double[] {1959, 325100};
-        double[] v2 = new double[] {1960, 373200};
-
-        DenseLocalOnHeapMatrix points = new DenseLocalOnHeapMatrix(new double[][] {
-            v1,
-            v2});
-
-        KMeansModel mdl = clusterer.cluster(points, 1);
-
-        Assert.assertEquals(1, mdl.centers().length);
-        Assert.assertEquals(2, mdl.centers()[0].size());
-    }
-}

http://git-wip-us.apache.org/repos/asf/ignite/blob/9e21cec0/modules/ml/src/test/java/org/apache/ignite/ml/clustering/KMeansModelTest.java
----------------------------------------------------------------------
diff --git a/modules/ml/src/test/java/org/apache/ignite/ml/clustering/KMeansModelTest.java
b/modules/ml/src/test/java/org/apache/ignite/ml/clustering/KMeansModelTest.java
new file mode 100644
index 0000000..5b3ad85
--- /dev/null
+++ b/modules/ml/src/test/java/org/apache/ignite/ml/clustering/KMeansModelTest.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.ml.clustering;
+
+import org.apache.ignite.ml.TestUtils;
+import org.apache.ignite.ml.clustering.kmeans.KMeansModel;
+import org.apache.ignite.ml.math.Vector;
+import org.apache.ignite.ml.math.distances.DistanceMeasure;
+import org.apache.ignite.ml.math.distances.EuclideanDistance;
+import org.apache.ignite.ml.math.exceptions.CardinalityException;
+import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
+import org.apache.ignite.ml.regressions.linear.LinearRegressionModel;
+import org.apache.ignite.ml.svm.SVMLinearBinaryClassificationModel;
+import org.apache.ignite.ml.svm.SVMLinearMultiClassClassificationModel;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * Tests for {@link KMeansModel}.
+ */
+public class KMeansModelTest {
+    /** Precision in test checks. */
+    private static final double PRECISION = 1e-6;
+
+    /** */
+    @Test
+    public void predictClusters() {
+        DistanceMeasure distanceMeasure = new EuclideanDistance();
+
+        Vector[] centers = new DenseLocalOnHeapVector[4];
+
+        centers[0] = new DenseLocalOnHeapVector(new double[]{1.0, 1.0});
+        centers[1] = new DenseLocalOnHeapVector(new double[]{-1.0, 1.0});
+        centers[2] = new DenseLocalOnHeapVector(new double[]{1.0, -1.0});
+        centers[3] = new DenseLocalOnHeapVector(new double[]{-1.0, -1.0});
+
+        KMeansModel mdl = new KMeansModel(centers, distanceMeasure);
+
+        Assert.assertEquals(mdl.apply(new DenseLocalOnHeapVector(new double[]{1.1, 1.1})),
0.0, PRECISION);
+        Assert.assertEquals(mdl.apply(new DenseLocalOnHeapVector(new double[]{-1.1, 1.1})),
1.0, PRECISION);
+        Assert.assertEquals(mdl.apply(new DenseLocalOnHeapVector(new double[]{1.1, -1.1})),
2.0, PRECISION);
+        Assert.assertEquals(mdl.apply(new DenseLocalOnHeapVector(new double[]{-1.1, -1.1})),
3.0, PRECISION);
+
+        Assert.assertEquals(mdl.distanceMeasure(), distanceMeasure);
+        Assert.assertEquals(mdl.amountOfClusters(), 4);
+        Assert.assertArrayEquals(mdl.centers(), centers);
+    }
+}

http://git-wip-us.apache.org/repos/asf/ignite/blob/9e21cec0/modules/ml/src/test/java/org/apache/ignite/ml/clustering/KMeansTrainerTest.java
----------------------------------------------------------------------
diff --git a/modules/ml/src/test/java/org/apache/ignite/ml/clustering/KMeansTrainerTest.java
b/modules/ml/src/test/java/org/apache/ignite/ml/clustering/KMeansTrainerTest.java
new file mode 100644
index 0000000..846d0de
--- /dev/null
+++ b/modules/ml/src/test/java/org/apache/ignite/ml/clustering/KMeansTrainerTest.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.ml.clustering;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import org.apache.ignite.ml.clustering.kmeans.KMeansModel;
+import org.apache.ignite.ml.clustering.kmeans.KMeansTrainer;
+import org.apache.ignite.ml.dataset.impl.local.LocalDatasetBuilder;
+import org.apache.ignite.ml.math.Vector;
+import org.apache.ignite.ml.math.distances.EuclideanDistance;
+import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+/**
+ * Tests for {@link KMeansTrainer}.
+ */
+public class KMeansTrainerTest {
+    /** Precision in test checks. */
+    private static final double PRECISION = 1e-2;
+
+    /**
+     * A few points, one cluster, one iteration
+     */
+    @Test
+    public void findOneClusters() {
+
+        Map<Integer, double[]> data = new HashMap<>();
+        data.put(0, new double[] {1.0, 1.0, 1.0});
+        data.put(1, new double[] {1.0, 2.0, 1.0});
+        data.put(2, new double[] {2.0, 1.0, 1.0});
+        data.put(3, new double[] {-1.0, -1.0, 2.0});
+        data.put(4, new double[] {-1.0, -2.0, 2.0});
+        data.put(5, new double[] {-2.0, -1.0, 2.0});
+
+        KMeansTrainer trainer = new KMeansTrainer()
+            .withDistance(new EuclideanDistance())
+            .withK(1)
+            .withMaxIterations(1)
+            .withEpsilon(PRECISION);
+
+        KMeansModel knnMdl = trainer.fit(
+            new LocalDatasetBuilder<>(data, 2),
+            (k, v) -> Arrays.copyOfRange(v, 0, v.length - 1),
+            (k, v) -> v[2]
+        );
+
+        Vector firstVector = new DenseLocalOnHeapVector(new double[] {2.0, 2.0});
+        assertEquals(knnMdl.apply(firstVector), 0.0, PRECISION);
+        Vector secondVector = new DenseLocalOnHeapVector(new double[] {-2.0, -2.0});
+        assertEquals(knnMdl.apply(secondVector), 0.0, PRECISION);
+        assertEquals(trainer.getMaxIterations(), 1);
+        assertEquals(trainer.getEpsilon(), PRECISION, PRECISION);
+    }
+}

http://git-wip-us.apache.org/repos/asf/ignite/blob/9e21cec0/modules/ml/src/test/java/org/apache/ignite/ml/clustering/KMeansUtil.java
----------------------------------------------------------------------
diff --git a/modules/ml/src/test/java/org/apache/ignite/ml/clustering/KMeansUtil.java b/modules/ml/src/test/java/org/apache/ignite/ml/clustering/KMeansUtil.java
deleted file mode 100644
index 420678f..0000000
--- a/modules/ml/src/test/java/org/apache/ignite/ml/clustering/KMeansUtil.java
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.ignite.ml.clustering;
-
-import org.apache.ignite.ml.math.Vector;
-
-import static org.junit.Assert.assertTrue;
-
-/** Utilities for k-means tests. */
-class KMeansUtil {
-    /** */
-    static void checkIsInEpsilonNeighbourhood(Vector[] v1s, Vector[] v2s, double epsilon)
{
-        for (int i = 0; i < v1s.length; i++) {
-            assertTrue("Not in epsilon neighbourhood (index " + i + ") ",
-                v1s[i].minus(v2s[i]).kNorm(2) < epsilon);
-        }
-    }
-}


Mime
View raw message