labs-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From k...@apache.org
Subject svn commit: r1426735 - /labs/alike/trunk/src/java/org/apache/alike/FindOptimalCluster.java
Date Sat, 29 Dec 2012 10:11:29 GMT
Author: koji
Date: Sat Dec 29 10:11:28 2012
New Revision: 1426735

URL: http://svn.apache.org/viewvc?rev=1426735&view=rev
Log:
add optimal cluster finder

Added:
    labs/alike/trunk/src/java/org/apache/alike/FindOptimalCluster.java

Added: labs/alike/trunk/src/java/org/apache/alike/FindOptimalCluster.java
URL: http://svn.apache.org/viewvc/labs/alike/trunk/src/java/org/apache/alike/FindOptimalCluster.java?rev=1426735&view=auto
==============================================================================
--- labs/alike/trunk/src/java/org/apache/alike/FindOptimalCluster.java (added)
+++ labs/alike/trunk/src/java/org/apache/alike/FindOptimalCluster.java Sat Dec 29 10:11:28
2012
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.alike;
+
+import java.io.IOException;
+
+/**
+ * This program finds the best suited cluster from multiple cluster dump files.
+ *
+ */
+public class FindOptimalCluster {
+
+  static int K;
+  static int D;
+
+  /**
+   * The main program that takes the path to alikeconfig.xml as the first argument and
+   * one or more arguments for cluster dump files path.
+   * 
+   * @param args file path to alikeconfig.xml and one or more path to cluster dump files
+   * @throws IOException 
+   * 
+   */
+  public static void main(String[] args) throws IOException {
+    if(args.length < 2){
+      printUsage(1);
+    }
+    
+    AlikeConfig config = new AlikeConfig(args[0]);
+
+    K = config.getNumOfClusters();
+    D = config.getNumOfDimensions();
+    ClusterDumpReader clusterDumpReader = new ClusterDumpReader(K, D);
+
+    double minError = Double.MAX_VALUE;
+    String optimalClusterDumpFile = null;
+    for(int i = 1; i < args.length; i++){
+      String dumpFile = args[i];
+
+      // read cluster centroids
+      double[][] centroids = clusterDumpReader.getCentroids(dumpFile);
+      
+      MeanErrorExecutor executor =
+          new MeanErrorExecutor(K, centroids, config.getDistanceCalculator());
+      FileUtil.executeRecursively(executor, dumpFile);
+      double me = executor.getMeanError();
+      System.out.printf("%s : %f\n", dumpFile, me);
+      if(me < minError){
+        me = minError;
+        optimalClusterDumpFile = dumpFile;
+      }
+    }
+    
+    System.out.printf("\n%s is the best suited cluster w/ mean error is %f\n", optimalClusterDumpFile,
minError);
+  }
+
+  static void printUsage(int exit){
+    System.err.printf("Usage: $ java %s <path-to-alikeconfig.xml> <path-to-cluster-dump-file-1>"
+
+                        " [dump-file-2 ...]\n",
+        QuantizeVectors.class.getName());
+    System.err.println("\t<path-to-alikeconfig.xml> the file path to alikeconfig.xml");
+    System.err.println("\t<path-to-cluster-dump-file> the file path to cluster dump
file");
+
+    if(exit >= 0){
+      System.exit(exit);
+    }
+  }
+
+  static class MeanErrorExecutor extends VisualDescriptorsExecutorBase {
+    
+    private double sum;
+    private int count;
+
+    public MeanErrorExecutor(int k, double[][] centroids, DistanceCalculator distanceCalculator)
{
+      super(k, centroids, distanceCalculator);
+      sum = 0;
+      count = 0;
+    }
+
+    @Override
+    protected void minFoundProcess(String key, int pos, double minDistance){
+      sum += minDistance;
+      count++;
+    }
+    
+    public double getMeanError(){
+      return sum / count;
+    }
+  }
+}



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@labs.apache.org
For additional commands, e-mail: commits-help@labs.apache.org


Mime
View raw message