mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From isa...@apache.org
Subject svn commit: r816594 - in /lucene/mahout/trunk/core/src/main/java/org/apache/mahout: clustering/dirichlet/ clustering/fuzzykmeans/ clustering/meanshift/ common/commandline/
Date Fri, 18 Sep 2009 12:12:48 GMT
Author: isabel
Date: Fri Sep 18 12:12:47 2009
New Revision: 816594

URL: http://svn.apache.org/viewvc?rev=816594&view=rev
Log:
MAHOUT-138 - converted meanshift and added a generator for default
options.

Added:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java
Modified:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletJob.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansJob.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyJob.java

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java?rev=816594&r1=816593&r2=816594&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java
Fri Sep 18 12:12:47 2009
@@ -40,6 +40,7 @@
 import org.apache.mahout.matrix.SparseVector;
 import org.apache.mahout.matrix.Vector;
 import org.apache.mahout.common.CommandLineUtil;
+import org.apache.mahout.common.commandline.DefaultOptionCreator;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -67,21 +68,11 @@
     ArgumentBuilder abuilder = new ArgumentBuilder();
     GroupBuilder gbuilder = new GroupBuilder();
 
-    Option inputOpt = obuilder.withLongName("input").withRequired(true).withShortName("i").
-        withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create()).
-        withDescription("The Path for input Vectors. Must be a SequenceFile of Writable,
Vector").withShortName("i").create();
-    
-    Option outputOpt = obuilder.withLongName("output").withRequired(true).withShortName("o").
-        withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create()).
-        withDescription("The directory pathname for output points.").create();
-    
-    Option maxIterOpt = obuilder.withLongName("maxIter").withRequired(true).withShortName("x").
-        withArgument(abuilder.withName("maxIter").withMinimum(1).withMaximum(1).create()).
-        withDescription("The maximum number of iterations.").create();
-    
-    Option topicsOpt = obuilder.withLongName("numModels").withRequired(true).withArgument(
-        abuilder.withName("numModels").withMinimum(1).withMaximum(1).create()).withDescription(
-        "The number of models").withShortName("k").create();
+    Option inputOpt = DefaultOptionCreator.inputOption(obuilder, abuilder);
+    Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder);
+    Option maxIterOpt = DefaultOptionCreator.maxIterOption(obuilder, abuilder);
+    Option topicsOpt = DefaultOptionCreator.kOption(obuilder, abuilder);
+    Option helpOpt = DefaultOptionCreator.helpOption(obuilder);
 
     Option mOpt = obuilder.withLongName("alpha").withRequired(true).withShortName("m").
         withArgument(abuilder.withName("alpha").withMinimum(1).withMaximum(1).create()).
@@ -95,9 +86,6 @@
         withArgument(abuilder.withName("maxRed").withMinimum(1).withMaximum(1).create()).
         withDescription("The number of reduce tasks.").create();
 
-    Option helpOpt = obuilder.withLongName("help").
-        withDescription("Print out help").withShortName("h").create();
-
     Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(modelOpt).
         withOption(maxIterOpt).withOption(mOpt).withOption(topicsOpt).withOption(helpOpt).
         withOption(numRedOpt).create();

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletJob.java?rev=816594&r1=816593&r2=816594&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletJob.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletJob.java
Fri Sep 18 12:12:47 2009
@@ -30,6 +30,7 @@
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.log4j.Logger;
 import org.apache.mahout.common.CommandLineUtil;
+import org.apache.mahout.common.commandline.DefaultOptionCreator;
 
 import java.io.IOException;
 
@@ -46,21 +47,11 @@
     ArgumentBuilder abuilder = new ArgumentBuilder();
     GroupBuilder gbuilder = new GroupBuilder();
 
-    Option inputOpt = obuilder.withLongName("input").withRequired(true).withShortName("i").
-        withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create()).
-        withDescription("The Path for input Vectors. Must be a SequenceFile of Writable,
Vector").withShortName("i").create();
-    
-    Option outputOpt = obuilder.withLongName("output").withRequired(true).withShortName("o").
-        withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create()).
-        withDescription("The directory pathname for output points.").create();
-    
-    Option maxIterOpt = obuilder.withLongName("maxIter").withRequired(true).withShortName("x").
-        withArgument(abuilder.withName("maxIter").withMinimum(1).withMaximum(1).create()).
-        withDescription("The maximum number of iterations.").create();
-    
-    Option topicsOpt = obuilder.withLongName("numModels").withRequired(true).withArgument(
-        abuilder.withName("numModels").withMinimum(1).withMaximum(1).create()).withDescription(
-        "The number of models").withShortName("k").create();
+    Option inputOpt = DefaultOptionCreator.inputOption(obuilder, abuilder);
+    Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder);
+    Option maxIterOpt = DefaultOptionCreator.maxIterOption(obuilder, abuilder);
+    Option topicsOpt = DefaultOptionCreator.kOption(obuilder, abuilder);
+    Option helpOpt = DefaultOptionCreator.helpOption(obuilder);
 
     Option mOpt = obuilder.withLongName("alpha").withRequired(true).withShortName("m").
         withArgument(abuilder.withName("alpha").withMinimum(1).withMaximum(1).create()).
@@ -70,9 +61,6 @@
         withArgument(abuilder.withName("modelClass").withMinimum(1).withMaximum(1).create()).
           withDescription("The ModelDistribution class name.").create();
 
-    Option helpOpt = obuilder.withLongName("help").
-        withDescription("Print out help").withShortName("h").create();
-
     Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(modelOpt).
         withOption(maxIterOpt).withOption(mOpt).withOption(topicsOpt).withOption(helpOpt).create();
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansJob.java?rev=816594&r1=816593&r2=816594&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansJob.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansJob.java
Fri Sep 18 12:12:47 2009
@@ -27,6 +27,7 @@
 import org.apache.commons.cli2.commandline.Parser;
 import org.apache.mahout.clustering.canopy.CanopyDriver;
 import org.apache.mahout.matrix.Vector;
+import org.apache.mahout.common.commandline.DefaultOptionCreator;
 import org.apache.mahout.common.distance.ManhattanDistanceMeasure;
 import org.apache.mahout.common.CommandLineUtil;
 import org.slf4j.Logger;
@@ -48,29 +49,16 @@
     ArgumentBuilder abuilder = new ArgumentBuilder();
     GroupBuilder gbuilder = new GroupBuilder();
 
-    Option inputOpt = obuilder.withLongName("input").withRequired(true).withShortName("i").
-        withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create()).
-        withDescription("The Path for input Vectors. Must be a SequenceFile of Writable,
Vector").withShortName("i").create();
-    
+    Option inputOpt = DefaultOptionCreator.inputOption(obuilder, abuilder);
+    Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder);
+    Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption(obuilder, abuilder);
+    Option measureClassOpt = DefaultOptionCreator.distanceOption(obuilder, abuilder);
+    Option maxIterOpt = DefaultOptionCreator.maxIterOption(obuilder, abuilder);
+    Option helpOpt = DefaultOptionCreator.helpOption(obuilder);
+
     Option clustersOpt = obuilder.withLongName("clusters").withRequired(true).withShortName("c").
         withArgument(abuilder.withName("clusters").withMinimum(1).withMaximum(1).create()).
         withDescription("The directory pathname for initial clusters.").create();
-
-    Option outputOpt = obuilder.withLongName("output").withRequired(true).withShortName("o").
-        withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create()).
-        withDescription("The directory pathname for output points.").create();
-    
-    Option measureClassOpt = obuilder.withLongName("measure").withRequired(true).withShortName("d").
-        withArgument(abuilder.withName("measure").withMinimum(1).withMaximum(1).create()).
-        withDescription("The classname of the DistanceMeasure.").create();
-    
-    Option convergenceDeltaOpt = obuilder.withLongName("convergencedelta").withRequired(true).withShortName("v").
-        withArgument(abuilder.withName("convergenceDelta").withMinimum(1).withMaximum(1).create()).
-        withDescription("The convergence delta value.").create();
-    
-    Option maxIterOpt = obuilder.withLongName("maxIter").withRequired(true).withShortName("x").
-        withArgument(abuilder.withName("maxIter").withMinimum(1).withMaximum(1).create()).
-        withDescription("The maximum number of iterations.").create();
     
     Option numMapOpt = obuilder.withLongName("maxMap").withRequired(true).withShortName("p").
         withArgument(abuilder.withName("maxMap").withMinimum(1).withMaximum(1).create()).
@@ -92,9 +80,6 @@
         withArgument(abuilder.withName("vectorclass").withMinimum(1).withMaximum(1).create()).
         withDescription("Class name of vector implementation to use.").create();
     
-    Option helpOpt = obuilder.withLongName("help").
-        withDescription("Print out help").withShortName("h").create();
-
     Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(clustersOpt).
         withOption(outputOpt).withOption(measureClassOpt).withOption(convergenceDeltaOpt).
         withOption(maxIterOpt).withOption(numMapOpt).withOption(numRedOpt).withOption(doCanopyOpt).

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java?rev=816594&r1=816593&r2=816594&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java
Fri Sep 18 12:12:47 2009
@@ -17,6 +17,14 @@
 
 package org.apache.mahout.clustering.meanshift;
 
+import org.apache.commons.cli2.CommandLine;
+import org.apache.commons.cli2.Group;
+import org.apache.commons.cli2.Option;
+import org.apache.commons.cli2.OptionException;
+import org.apache.commons.cli2.builder.ArgumentBuilder;
+import org.apache.commons.cli2.builder.DefaultOptionBuilder;
+import org.apache.commons.cli2.builder.GroupBuilder;
+import org.apache.commons.cli2.commandline.Parser;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.FileInputFormat;
@@ -25,6 +33,8 @@
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.SequenceFileInputFormat;
 import org.apache.hadoop.mapred.SequenceFileOutputFormat;
+import org.apache.mahout.common.CommandLineUtil;
+import org.apache.mahout.common.commandline.DefaultOptionCreator;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -39,14 +49,53 @@
   }
 
   public static void main(String[] args) {
-    String input = args[0];
-    String output = args[1];
-    String measureClassName = args[2];
-    double t1 = Double.parseDouble(args[3]);
-    double t2 = Double.parseDouble(args[4]);
-    double convergenceDelta = Double.parseDouble(args[5]);
-    runJob(input, output, output + MeanShiftCanopy.CONTROL_PATH_KEY,
+    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
+    ArgumentBuilder abuilder = new ArgumentBuilder();
+    GroupBuilder gbuilder = new GroupBuilder();
+
+    Option inputOpt = DefaultOptionCreator.inputOption(obuilder, abuilder);
+    Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder);
+    Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption(obuilder, abuilder);
+    Option helpOpt = DefaultOptionCreator.helpOption(obuilder);    
+
+    Option modelOpt = obuilder.withLongName("distanceClass").withRequired(true).withShortName("d").
+        withArgument(abuilder.withName("distanceClass").withMinimum(1).withMaximum(1).create()).
+        withDescription("The distance measure class name.").create();
+
+
+    Option threshold1Opt = obuilder.withLongName("threshold_1").withRequired(true).withShortName("t1").
+        withArgument(abuilder.withName("threshold_1").withMinimum(1).withMaximum(1).create()).
+        withDescription("The T1 distance threshold.").create();
+
+    Option threshold2Opt = obuilder.withLongName("threshold_2").withRequired(true).withShortName("t2").
+        withArgument(abuilder.withName("threshold_2").withMinimum(1).withMaximum(1).create()).
+        withDescription("The T1 distance threshold.").create();
+
+    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(modelOpt).
+        withOption(helpOpt).withOption(convergenceDeltaOpt).withOption(threshold1Opt).
+        withOption(threshold2Opt).create();
+
+    try {
+      Parser parser = new Parser();
+      parser.setGroup(group);
+      CommandLine cmdLine = parser.parse(args);
+      if (cmdLine.hasOption(helpOpt)) {
+        CommandLineUtil.printHelp(group);
+        return;
+      }
+
+      String input = cmdLine.getValue(inputOpt).toString();
+      String output = cmdLine.getValue(outputOpt).toString();
+      String measureClassName = cmdLine.getValue(modelOpt).toString();
+      double t1 = Double.parseDouble(cmdLine.getValue(threshold1Opt).toString());
+      double t2 = Double.parseDouble(cmdLine.getValue(threshold2Opt).toString());
+      double convergenceDelta = Double.parseDouble(cmdLine.getValue(convergenceDeltaOpt).toString());
+      runJob(input, output, output + MeanShiftCanopy.CONTROL_PATH_KEY,
         measureClassName, t1, t2, convergenceDelta);
+    } catch (OptionException e) {
+      log.error("Exception parsing command line: ", e);
+      CommandLineUtil.printHelp(group);
+    }
   }
 
   /**

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyJob.java?rev=816594&r1=816593&r2=816594&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyJob.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyJob.java
Fri Sep 18 12:12:47 2009
@@ -17,9 +17,19 @@
 
 package org.apache.mahout.clustering.meanshift;
 
+import org.apache.commons.cli2.CommandLine;
+import org.apache.commons.cli2.Group;
+import org.apache.commons.cli2.Option;
+import org.apache.commons.cli2.OptionException;
+import org.apache.commons.cli2.builder.ArgumentBuilder;
+import org.apache.commons.cli2.builder.DefaultOptionBuilder;
+import org.apache.commons.cli2.builder.GroupBuilder;
+import org.apache.commons.cli2.commandline.Parser;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapred.JobConf;
+import org.apache.mahout.common.CommandLineUtil;
+import org.apache.mahout.common.commandline.DefaultOptionCreator;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -36,15 +46,55 @@
   }
 
   public static void main(String[] args) throws IOException {
-    String input = args[0];
-    String output = args[1];
-    String measureClassName = args[2];
-    double t1 = Double.parseDouble(args[3]);
-    double t2 = Double.parseDouble(args[4]);
-    double convergenceDelta = Double.parseDouble(args[5]);
-    int maxIterations = Integer.parseInt(args[6]);
-    runJob(input, output, measureClassName, t1, t2, convergenceDelta,
-        maxIterations);
+    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
+    ArgumentBuilder abuilder = new ArgumentBuilder();
+    GroupBuilder gbuilder = new GroupBuilder();
+
+    Option inputOpt = DefaultOptionCreator.inputOption(obuilder, abuilder);
+    Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder);
+    Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption(obuilder, abuilder);
+    Option maxIterOpt = DefaultOptionCreator.maxIterOption(obuilder, abuilder);
+    Option helpOpt = DefaultOptionCreator.helpOption(obuilder);    
+
+    Option modelOpt = obuilder.withLongName("distanceClass").withRequired(true).withShortName("d").
+        withArgument(abuilder.withName("distanceClass").withMinimum(1).withMaximum(1).create()).
+        withDescription("The distance measure class name.").create();
+
+
+    Option threshold1Opt = obuilder.withLongName("threshold_1").withRequired(true).withShortName("t1").
+        withArgument(abuilder.withName("threshold_1").withMinimum(1).withMaximum(1).create()).
+        withDescription("The T1 distance threshold.").create();
+
+    Option threshold2Opt = obuilder.withLongName("threshold_2").withRequired(true).withShortName("t2").
+        withArgument(abuilder.withName("threshold_2").withMinimum(1).withMaximum(1).create()).
+        withDescription("The T1 distance threshold.").create();
+
+    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(modelOpt).
+        withOption(helpOpt).withOption(convergenceDeltaOpt).withOption(threshold1Opt).withOption(maxIterOpt).
+        withOption(threshold2Opt).create();
+
+    try {
+      Parser parser = new Parser();
+      parser.setGroup(group);
+      CommandLine cmdLine = parser.parse(args);
+      if (cmdLine.hasOption(helpOpt)) {
+        CommandLineUtil.printHelp(group);
+        return;
+      }
+
+      String input = cmdLine.getValue(inputOpt).toString();
+      String output = cmdLine.getValue(outputOpt).toString();
+      String measureClassName = cmdLine.getValue(modelOpt).toString();
+      double t1 = Double.parseDouble(cmdLine.getValue(threshold1Opt).toString());
+      double t2 = Double.parseDouble(cmdLine.getValue(threshold2Opt).toString());
+      double convergenceDelta = Double.parseDouble(cmdLine.getValue(convergenceDeltaOpt).toString());
+      int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterOpt).toString());
+      runJob(input, output, measureClassName, t1, t2, convergenceDelta,
+          maxIterations);
+    } catch (OptionException e) {
+      log.error("Exception parsing command line: ", e);
+      CommandLineUtil.printHelp(group);
+    }
   }
 
   /**

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java?rev=816594&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java
(added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java
Fri Sep 18 12:12:47 2009
@@ -0,0 +1,108 @@
+package org.apache.mahout.common.commandline;
+
+import org.apache.commons.cli2.Option;
+import org.apache.commons.cli2.builder.ArgumentBuilder;
+import org.apache.commons.cli2.builder.DefaultOptionBuilder;
+
+public class DefaultOptionCreator {
+  /**
+   * Returns a default command line option for convergence delta specification.
+   */
+  public static Option convergenceOption(
+      final DefaultOptionBuilder obuilder, final ArgumentBuilder abuilder) {
+    Option convergenceDeltaOpt = obuilder.withLongName("convergencedelta")
+        .withRequired(true).withShortName("v").withArgument(
+            abuilder.withName("convergenceDelta").withMinimum(1).withMaximum(1)
+                .create()).withDescription("The convergence delta value.")
+        .create();
+    return convergenceDeltaOpt;
+  }
+
+  /**
+   * Returns a default command line option for output directory specification.
+   */
+  public static Option outputOption(final DefaultOptionBuilder obuilder,
+      final ArgumentBuilder abuilder) {
+    Option outputOpt = obuilder.withLongName("output").withRequired(true)
+        .withShortName("o").withArgument(
+            abuilder.withName("output").withMinimum(1).withMaximum(1).create())
+        .withDescription("The directory pathname for output points.").create();
+    return outputOpt;
+  }
+
+  /**
+   * Returns a default command line option for input directory specification.
+   */
+  public static Option inputOption(final DefaultOptionBuilder obuilder,
+      final ArgumentBuilder abuilder) {
+    Option inputOpt = obuilder
+        .withLongName("input")
+        .withRequired(true)
+        .withShortName("i")
+        .withArgument(
+            abuilder.withName("input").withMinimum(1).withMaximum(1).create())
+        .withDescription(
+            "The Path for input Vectors. Must be a SequenceFile of Writable, Vector")
+        .create();
+    return inputOpt;
+  }
+
+  /**
+   * Returns a default command line option for specification of numbers of
+   * clusters to create.
+   */
+  public static Option kOption(DefaultOptionBuilder obuilder,
+      ArgumentBuilder abuilder) {
+    Option clustersOpt = obuilder
+        .withLongName("k")
+        .withRequired(false)
+        .withArgument(
+            abuilder.withName("k").withMinimum(1).withMaximum(1).create())
+        .withDescription(
+            "The k in k-Means.  If specified, then a random selection of k Vectors will be
chosen as the Centroid and written to the clusters output path.")
+        .withShortName("k").create();
+    return clustersOpt;
+  }
+
+  /**
+   * Returns a default command line option for specification of max number of
+   * iterations.
+   */
+  public static Option maxIterOption(DefaultOptionBuilder obuilder,
+      ArgumentBuilder abuilder) {
+    Option maxIterOpt = obuilder
+        .withLongName("maxIter")
+        .withRequired(true)
+        .withShortName("x")
+        .withArgument(
+            abuilder.withName("maxIter").withMinimum(1).withMaximum(1).create())
+        .withDescription("The maximum number of iterations.").create();
+    return maxIterOpt;
+  }
+
+  /**
+   * Returns a default command line option for specification of distance measure
+   * class to use.
+   */
+  public static Option distanceOption(DefaultOptionBuilder obuilder,
+      ArgumentBuilder abuilder) {
+    Option measureClassOpt = obuilder
+        .withLongName("measure")
+        .withRequired(true)
+        .withShortName("d")
+        .withArgument(
+            abuilder.withName("measure").withMinimum(1).withMaximum(1).create())
+        .withDescription("The classname of the DistanceMeasure.").create();
+    return measureClassOpt;
+  }
+
+  /**
+   * Returns a default command line option for help.
+   * */
+  public static Option helpOption(DefaultOptionBuilder obuilder) {
+    Option helpOpt = obuilder.withLongName("help").
+    withDescription("Print out help").withShortName("h").create();
+    return helpOpt;
+  }
+
+}



Mime
View raw message