mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From robina...@apache.org
Subject svn commit: r909914 [2/5] - in /lucene/mahout/trunk/core/src: main/java/org/apache/mahout/clustering/ main/java/org/apache/mahout/clustering/canopy/ main/java/org/apache/mahout/clustering/dirichlet/ main/java/org/apache/mahout/clustering/dirichlet/mode...
Date Sat, 13 Feb 2010 21:08:12 GMT
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java?rev=909914&r1=909913&r2=909914&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java Sat Feb 13 21:07:53 2010
@@ -50,58 +50,58 @@
 import org.slf4j.LoggerFactory;
 
 public class DirichletDriver {
-
+  
   public static final String STATE_IN_KEY = "org.apache.mahout.clustering.dirichlet.stateIn";
-
+  
   public static final String MODEL_FACTORY_KEY = "org.apache.mahout.clustering.dirichlet.modelFactory";
-
+  
   public static final String MODEL_PROTOTYPE_KEY = "org.apache.mahout.clustering.dirichlet.modelPrototype";
-
+  
   public static final String PROTOTYPE_SIZE_KEY = "org.apache.mahout.clustering.dirichlet.prototypeSize";
-
+  
   public static final String NUM_CLUSTERS_KEY = "org.apache.mahout.clustering.dirichlet.numClusters";
-
+  
   public static final String ALPHA_0_KEY = "org.apache.mahout.clustering.dirichlet.alpha_0";
-
+  
   private static final Logger log = LoggerFactory.getLogger(DirichletDriver.class);
-
-  private DirichletDriver() {
-  }
-
+  
+  private DirichletDriver() { }
+  
   public static void main(String[] args) throws Exception {
     DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
     ArgumentBuilder abuilder = new ArgumentBuilder();
     GroupBuilder gbuilder = new GroupBuilder();
-
+    
     Option inputOpt = DefaultOptionCreator.inputOption().create();
     Option outputOpt = DefaultOptionCreator.outputOption().create();
     Option maxIterOpt = DefaultOptionCreator.maxIterOption().create();
     Option topicsOpt = DefaultOptionCreator.kOption().create();
     Option helpOpt = DefaultOptionCreator.helpOption();
-
+    
     Option mOpt = obuilder.withLongName("alpha").withRequired(true).withShortName("m").withArgument(
-        abuilder.withName("alpha").withMinimum(1).withMaximum(1).create()).withDescription(
-        "The alpha0 value for the DirichletDistribution.").create();
-
+      abuilder.withName("alpha").withMinimum(1).withMaximum(1).create()).withDescription(
+      "The alpha0 value for the DirichletDistribution.").create();
+    
     Option modelOpt = obuilder.withLongName("modelClass").withRequired(true).withShortName("d").withArgument(
-        abuilder.withName("modelClass").withMinimum(1).withMaximum(1).create())
-        .withDescription("The ModelDistribution class name.").create();
-
-    Option prototypeOpt = obuilder.withLongName("modelPrototypeClass").withRequired(true).withShortName("p").withArgument(
-        abuilder.withName("prototypeClass").withMinimum(1).withMaximum(1).create()).withDescription(
-        "The ModelDistribution prototype Vector class name.").create();
-
-    Option sizeOpt = obuilder.withLongName("prototypeSize").withRequired(true).withShortName("s").withArgument(
-        abuilder.withName("prototypeSize").withMinimum(1).withMaximum(1).create()).withDescription(
-        "The ModelDistribution prototype Vector size.").create();
-
+      abuilder.withName("modelClass").withMinimum(1).withMaximum(1).create()).withDescription(
+      "The ModelDistribution class name.").create();
+    
+    Option prototypeOpt = obuilder.withLongName("modelPrototypeClass").withRequired(true).withShortName("p")
+        .withArgument(abuilder.withName("prototypeClass").withMinimum(1).withMaximum(1).create())
+        .withDescription("The ModelDistribution prototype Vector class name.").create();
+    
+    Option sizeOpt = obuilder.withLongName("prototypeSize").withRequired(true).withShortName("s")
+        .withArgument(abuilder.withName("prototypeSize").withMinimum(1).withMaximum(1).create())
+        .withDescription("The ModelDistribution prototype Vector size.").create();
+    
     Option numRedOpt = obuilder.withLongName("maxRed").withRequired(true).withShortName("r").withArgument(
-        abuilder.withName("maxRed").withMinimum(1).withMaximum(1).create()).withDescription("The number of reduce tasks.").create();
-
-    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(modelOpt).withOption(
-        prototypeOpt).withOption(sizeOpt).withOption(maxIterOpt).withOption(mOpt).withOption(topicsOpt).withOption(helpOpt)
-        .withOption(numRedOpt).create();
-
+      abuilder.withName("maxRed").withMinimum(1).withMaximum(1).create()).withDescription(
+      "The number of reduce tasks.").create();
+    
+    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt)
+        .withOption(modelOpt).withOption(prototypeOpt).withOption(sizeOpt).withOption(maxIterOpt).withOption(
+          mOpt).withOption(topicsOpt).withOption(helpOpt).withOption(numRedOpt).create();
+    
     try {
       Parser parser = new Parser();
       parser.setGroup(group);
@@ -110,7 +110,7 @@
         CommandLineUtil.printHelp(group);
         return;
       }
-
+      
       String input = cmdLine.getValue(inputOpt).toString();
       String output = cmdLine.getValue(outputOpt).toString();
       String modelFactory = cmdLine.getValue(modelOpt).toString();
@@ -120,23 +120,31 @@
       int numModels = Integer.parseInt(cmdLine.getValue(topicsOpt).toString());
       int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterOpt).toString());
       double alpha_0 = Double.parseDouble(cmdLine.getValue(mOpt).toString());
-      runJob(input, output, modelFactory, modelPrototype, prototypeSize, numModels, maxIterations, alpha_0, numReducers);
+      DirichletDriver.runJob(input, output, modelFactory, modelPrototype, prototypeSize, numModels,
+        maxIterations, alpha_0, numReducers);
     } catch (OptionException e) {
-      log.error("Exception parsing command line: ", e);
+      DirichletDriver.log.error("Exception parsing command line: ", e);
       CommandLineUtil.printHelp(group);
     }
   }
-
+  
   /**
    * Run the job using supplied arguments
-   *
-   * @param input         the directory pathname for input points
-   * @param output        the directory pathname for output points
-   * @param modelFactory  the String ModelDistribution class name to use
-   * @param numClusters   the number of models
-   * @param maxIterations the maximum number of iterations
-   * @param alpha_0       the alpha_0 value for the DirichletDistribution
-   * @param numReducers   the number of Reducers desired
+   * 
+   * @param input
+   *          the directory pathname for input points
+   * @param output
+   *          the directory pathname for output points
+   * @param modelFactory
+   *          the String ModelDistribution class name to use
+   * @param numClusters
+   *          the number of models
+   * @param maxIterations
+   *          the maximum number of iterations
+   * @param alpha_0
+   *          the alpha_0 value for the DirichletDistribution
+   * @param numReducers
+   *          the number of Reducers desired
    * @deprecated since it presumes 2-d, dense vector model prototypes
    */
   @Deprecated
@@ -146,22 +154,34 @@
                             int numClusters,
                             int maxIterations,
                             double alpha_0,
-                            int numReducers)
-      throws ClassNotFoundException, InstantiationException, IllegalAccessException, IOException,
-             SecurityException, NoSuchMethodException, InvocationTargetException {
-    runJob(input, output, modelFactory, "org.apache.mahout.math.DenseVector", 2, numClusters, maxIterations, alpha_0, numReducers);
+                            int numReducers) throws ClassNotFoundException,
+                                            InstantiationException,
+                                            IllegalAccessException,
+                                            IOException,
+                                            SecurityException,
+                                            NoSuchMethodException,
+                                            InvocationTargetException {
+    DirichletDriver.runJob(input, output, modelFactory, "org.apache.mahout.math.DenseVector", 2, numClusters,
+      maxIterations, alpha_0, numReducers);
   }
-
+  
   /**
    * Run the job using supplied arguments
-   *
-   * @param input         the directory pathname for input points
-   * @param output        the directory pathname for output points
-   * @param modelFactory  the String ModelDistribution class name to use
-   * @param numClusters   the number of models
-   * @param maxIterations the maximum number of iterations
-   * @param alpha_0       the alpha_0 value for the DirichletDistribution
-   * @param numReducers   the number of Reducers desired
+   * 
+   * @param input
+   *          the directory pathname for input points
+   * @param output
+   *          the directory pathname for output points
+   * @param modelFactory
+   *          the String ModelDistribution class name to use
+   * @param numClusters
+   *          the number of models
+   * @param maxIterations
+   *          the maximum number of iterations
+   * @param alpha_0
+   *          the alpha_0 value for the DirichletDistribution
+   * @param numReducers
+   *          the number of Reducers desired
    */
   public static void runJob(String input,
                             String output,
@@ -171,34 +191,45 @@
                             int numClusters,
                             int maxIterations,
                             double alpha_0,
-                            int numReducers)
-      throws ClassNotFoundException, InstantiationException, IllegalAccessException, IOException,
-      SecurityException, NoSuchMethodException, InvocationTargetException {
-
+                            int numReducers) throws ClassNotFoundException,
+                                            InstantiationException,
+                                            IllegalAccessException,
+                                            IOException,
+                                            SecurityException,
+                                            NoSuchMethodException,
+                                            InvocationTargetException {
+    
     String stateIn = output + "/state-0";
-    writeInitialState(output, stateIn, modelFactory, modelPrototype, prototypeSize, numClusters, alpha_0);
-
+    DirichletDriver.writeInitialState(output, stateIn, modelFactory, modelPrototype, prototypeSize,
+      numClusters, alpha_0);
+    
     for (int iteration = 0; iteration < maxIterations; iteration++) {
-      log.info("Iteration {}", iteration);
+      DirichletDriver.log.info("Iteration {}", iteration);
       // point the output to a new directory per iteration
       String stateOut = output + "/state-" + (iteration + 1);
-      runIteration(input, stateIn, stateOut, modelFactory, modelPrototype, prototypeSize, numClusters, alpha_0, numReducers);
+      DirichletDriver.runIteration(input, stateIn, stateOut, modelFactory, modelPrototype, prototypeSize,
+        numClusters, alpha_0, numReducers);
       // now point the input to the old output directory
       stateIn = stateOut;
     }
   }
-
+  
   private static void writeInitialState(String output,
                                         String stateIn,
                                         String modelFactory,
                                         String modelPrototype,
                                         int prototypeSize,
                                         int numModels,
-                                        double alpha_0)
-      throws ClassNotFoundException, InstantiationException, IllegalAccessException, IOException,
-      SecurityException, NoSuchMethodException, InvocationTargetException {
-
-    DirichletState<VectorWritable> state = createState(modelFactory, modelPrototype, prototypeSize, numModels, alpha_0);
+                                        double alpha_0) throws ClassNotFoundException,
+                                                       InstantiationException,
+                                                       IllegalAccessException,
+                                                       IOException,
+                                                       SecurityException,
+                                                       NoSuchMethodException,
+                                                       InvocationTargetException {
+    
+    DirichletState<VectorWritable> state = DirichletDriver.createState(modelFactory, modelPrototype,
+      prototypeSize, numModels, alpha_0);
     JobConf job = new JobConf(KMeansDriver.class);
     Path outPath = new Path(output);
     FileSystem fs = FileSystem.get(outPath.toUri(), job);
@@ -210,110 +241,138 @@
       writer.close();
     }
   }
-
+  
   /**
-   * Creates a DirichletState object from the given arguments. Note that the modelFactory
-   * is presumed to be a subclass of VectorModelDistribution that can be initialized with
-   * a concrete Vector prototype.
+   * Creates a DirichletState object from the given arguments. Note that the modelFactory is presumed to be a
+   * subclass of VectorModelDistribution that can be initialized with a concrete Vector prototype.
    * 
-   * @param modelFactory a String which is the class name of the model factory
-   * @param modelPrototype a String which is the class name of the Vector used to initialize the factory
-   * @param prototypeSize an int number of dimensions of the model prototype vector
-   * @param numModels an int number of models to be created
-   * @param alpha_0 the double alpha_0 argument to the algorithm
+   * @param modelFactory
+   *          a String which is the class name of the model factory
+   * @param modelPrototype
+   *          a String which is the class name of the Vector used to initialize the factory
+   * @param prototypeSize
+   *          an int number of dimensions of the model prototype vector
+   * @param numModels
+   *          an int number of models to be created
+   * @param alpha_0
+   *          the double alpha_0 argument to the algorithm
    * @return an initialized DirichletState
    */
   public static DirichletState<VectorWritable> createState(String modelFactory,
                                                            String modelPrototype,
                                                            int prototypeSize,
                                                            int numModels,
-                                                           double alpha_0)
-      throws ClassNotFoundException, InstantiationException, IllegalAccessException,
-      SecurityException, NoSuchMethodException, IllegalArgumentException, InvocationTargetException {
-
+                                                           double alpha_0) throws ClassNotFoundException,
+                                                                          InstantiationException,
+                                                                          IllegalAccessException,
+                                                                          SecurityException,
+                                                                          NoSuchMethodException,
+                                                                          IllegalArgumentException,
+                                                                          InvocationTargetException {
+    
     ClassLoader ccl = Thread.currentThread().getContextClassLoader();
-    Class<? extends VectorModelDistribution> cl = ccl.loadClass(modelFactory).asSubclass(VectorModelDistribution.class);
+    Class<? extends VectorModelDistribution> cl = ccl.loadClass(modelFactory).asSubclass(
+      VectorModelDistribution.class);
     VectorModelDistribution factory = cl.newInstance();
-
+    
     Class<? extends Vector> vcl = ccl.loadClass(modelPrototype).asSubclass(Vector.class);
     Constructor<? extends Vector> v = vcl.getConstructor(int.class);
     factory.setModelPrototype(new VectorWritable(v.newInstance(prototypeSize)));
     return new DirichletState<VectorWritable>(factory, numModels, alpha_0, 1, 1);
   }
-
+  
   /**
    * Run the job using supplied arguments
-   *
-   * @param input        the directory pathname for input points
-   * @param stateIn      the directory pathname for input state
-   * @param stateOut     the directory pathname for output state
-   * @param modelFactory the class name of the model factory class
-   * @param modelPrototype TODO
-   * @param prototypeSize TODO
-   * @param numClusters  the number of clusters
-   * @param alpha_0      alpha_0
-   * @param numReducers  the number of Reducers desired
+   * 
+   * @param input
+   *          the directory pathname for input points
+   * @param stateIn
+   *          the directory pathname for input state
+   * @param stateOut
+   *          the directory pathname for output state
+   * @param modelFactory
+   *          the class name of the model factory class
+   * @param modelPrototype
+   *          TODO
+   * @param prototypeSize
+   *          TODO
+   * @param numClusters
+   *          the number of clusters
+   * @param alpha_0
+   *          alpha_0
+   * @param numReducers
+   *          the number of Reducers desired
    */
-  public static void runIteration(String input, String stateIn, String stateOut, String modelFactory, String modelPrototype,
-      int prototypeSize, int numClusters, double alpha_0, int numReducers) {
+  public static void runIteration(String input,
+                                  String stateIn,
+                                  String stateOut,
+                                  String modelFactory,
+                                  String modelPrototype,
+                                  int prototypeSize,
+                                  int numClusters,
+                                  double alpha_0,
+                                  int numReducers) {
     Configurable client = new JobClient();
     JobConf conf = new JobConf(DirichletDriver.class);
-
+    
     conf.setOutputKeyClass(Text.class);
     conf.setOutputValueClass(DirichletCluster.class);
     conf.setMapOutputKeyClass(Text.class);
     conf.setMapOutputValueClass(VectorWritable.class);
-
+    
     FileInputFormat.setInputPaths(conf, new Path(input));
     Path outPath = new Path(stateOut);
     FileOutputFormat.setOutputPath(conf, outPath);
-
+    
     conf.setMapperClass(DirichletMapper.class);
     conf.setReducerClass(DirichletReducer.class);
     conf.setNumReduceTasks(numReducers);
     conf.setInputFormat(SequenceFileInputFormat.class);
     conf.setOutputFormat(SequenceFileOutputFormat.class);
-    conf.set(STATE_IN_KEY, stateIn);
-    conf.set(MODEL_FACTORY_KEY, modelFactory);
-    conf.set(MODEL_PROTOTYPE_KEY, modelPrototype);
-    conf.set(PROTOTYPE_SIZE_KEY, Integer.toString(prototypeSize));
-    conf.set(NUM_CLUSTERS_KEY, Integer.toString(numClusters));
-    conf.set(ALPHA_0_KEY, Double.toString(alpha_0));
-
+    conf.set(DirichletDriver.STATE_IN_KEY, stateIn);
+    conf.set(DirichletDriver.MODEL_FACTORY_KEY, modelFactory);
+    conf.set(DirichletDriver.MODEL_PROTOTYPE_KEY, modelPrototype);
+    conf.set(DirichletDriver.PROTOTYPE_SIZE_KEY, Integer.toString(prototypeSize));
+    conf.set(DirichletDriver.NUM_CLUSTERS_KEY, Integer.toString(numClusters));
+    conf.set(DirichletDriver.ALPHA_0_KEY, Double.toString(alpha_0));
+    
     client.setConf(conf);
     try {
       JobClient.runJob(conf);
     } catch (IOException e) {
-      log.warn(e.toString(), e);
+      DirichletDriver.log.warn(e.toString(), e);
     }
   }
-
+  
   /**
    * Run the job using supplied arguments
-   *
-   * @param input   the directory pathname for input points
-   * @param stateIn the directory pathname for input state
-   * @param output  the directory pathname for output points
+   * 
+   * @param input
+   *          the directory pathname for input points
+   * @param stateIn
+   *          the directory pathname for input state
+   * @param output
+   *          the directory pathname for output points
    */
   public static void runClustering(String input, String stateIn, String output) {
     Configurable client = new JobClient();
     JobConf conf = new JobConf(DirichletDriver.class);
-
+    
     conf.setOutputKeyClass(Text.class);
     conf.setOutputValueClass(Text.class);
-
+    
     FileInputFormat.setInputPaths(conf, new Path(input));
     Path outPath = new Path(output);
     FileOutputFormat.setOutputPath(conf, outPath);
-
+    
     conf.setMapperClass(DirichletMapper.class);
     conf.setNumReduceTasks(0);
-
+    
     client.setConf(conf);
     try {
       JobClient.runJob(conf);
     } catch (IOException e) {
-      log.warn(e.toString(), e);
+      DirichletDriver.log.warn(e.toString(), e);
     }
   }
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletJob.java?rev=909914&r1=909913&r2=909914&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletJob.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletJob.java Sat Feb 13 21:07:53 2010
@@ -37,44 +37,43 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-public class DirichletJob {
-
+public final class DirichletJob {
+  
   private static final Logger log = LoggerFactory.getLogger(DirichletJob.class);
-
-  private DirichletJob() {
-  }
-
+  
+  private DirichletJob() { }
+  
   public static void main(String[] args) throws Exception {
     DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
     ArgumentBuilder abuilder = new ArgumentBuilder();
     GroupBuilder gbuilder = new GroupBuilder();
-
+    
     Option inputOpt = DefaultOptionCreator.inputOption().create();
     Option outputOpt = DefaultOptionCreator.outputOption().create();
     Option maxIterOpt = DefaultOptionCreator.maxIterOption().create();
     Option topicsOpt = DefaultOptionCreator.kOption().create();
     Option helpOpt = DefaultOptionCreator.helpOption();
-
+    
     Option mOpt = obuilder.withLongName("alpha").withRequired(true).withShortName("m").withArgument(
-        abuilder.withName("alpha").withMinimum(1).withMaximum(1).create()).withDescription(
-        "The alpha0 value for the DirichletDistribution.").create();
-
+      abuilder.withName("alpha").withMinimum(1).withMaximum(1).create()).withDescription(
+      "The alpha0 value for the DirichletDistribution.").create();
+    
     Option modelOpt = obuilder.withLongName("modelClass").withRequired(true).withShortName("d").withArgument(
-        abuilder.withName("modelClass").withMinimum(1).withMaximum(1).create())
-        .withDescription("The ModelDistribution class name.").create();
-
-    Option prototypeOpt = obuilder.withLongName("modelPrototypeClass").withRequired(true).withShortName("p").withArgument(
-        abuilder.withName("prototypeClass").withMinimum(1).withMaximum(1).create()).withDescription(
-        "The ModelDistribution prototype Vector class name.").create();
-
-    Option sizeOpt = obuilder.withLongName("prototypeSize").withRequired(true).withShortName("s").withArgument(
-        abuilder.withName("prototypeSize").withMinimum(1).withMaximum(1).create()).withDescription(
-        "The ModelDistribution prototype Vector size.").create();
-
-    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(modelOpt).withOption(
-        prototypeOpt).withOption(sizeOpt).withOption(maxIterOpt).withOption(mOpt).withOption(topicsOpt).withOption(helpOpt)
-        .create();
-
+      abuilder.withName("modelClass").withMinimum(1).withMaximum(1).create()).withDescription(
+      "The ModelDistribution class name.").create();
+    
+    Option prototypeOpt = obuilder.withLongName("modelPrototypeClass").withRequired(true).withShortName("p")
+        .withArgument(abuilder.withName("prototypeClass").withMinimum(1).withMaximum(1).create())
+        .withDescription("The ModelDistribution prototype Vector class name.").create();
+    
+    Option sizeOpt = obuilder.withLongName("prototypeSize").withRequired(true).withShortName("s")
+        .withArgument(abuilder.withName("prototypeSize").withMinimum(1).withMaximum(1).create())
+        .withDescription("The ModelDistribution prototype Vector size.").create();
+    
+    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt)
+        .withOption(modelOpt).withOption(prototypeOpt).withOption(sizeOpt).withOption(maxIterOpt).withOption(
+          mOpt).withOption(topicsOpt).withOption(helpOpt).create();
+    
     try {
       Parser parser = new Parser();
       parser.setGroup(group);
@@ -83,7 +82,7 @@
         CommandLineUtil.printHelp(group);
         return;
       }
-
+      
       String input = cmdLine.getValue(inputOpt).toString();
       String output = cmdLine.getValue(outputOpt).toString();
       String modelFactory = cmdLine.getValue(modelOpt).toString();
@@ -92,25 +91,34 @@
       int numModels = Integer.parseInt(cmdLine.getValue(topicsOpt).toString());
       int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterOpt).toString());
       double alpha_0 = Double.parseDouble(cmdLine.getValue(mOpt).toString());
-      runJob(input, output, modelFactory, modelPrototype, prototypeSize, numModels, maxIterations, alpha_0);
+      DirichletJob.runJob(input, output, modelFactory, modelPrototype, prototypeSize, numModels,
+        maxIterations, alpha_0);
     } catch (OptionException e) {
-      log.error("Exception parsing command line: ", e);
+      DirichletJob.log.error("Exception parsing command line: ", e);
       CommandLineUtil.printHelp(group);
     }
-
+    
   }
-
+  
   /**
    * Run the job using supplied arguments, deleting the output directory if it exists beforehand
-   *
-   * @param input         the directory pathname for input points
-   * @param output        the directory pathname for output points
-   * @param modelFactory  the ModelDistribution class name
-   * @param modelPrototype the Vector class name used by the modelFactory
-   * @param prototypeSize the size of the prototype vector
-   * @param numModels     the number of Models
-   * @param maxIterations the maximum number of iterations
-   * @param alpha_0       the alpha0 value for the DirichletDistribution
+   * 
+   * @param input
+   *          the directory pathname for input points
+   * @param output
+   *          the directory pathname for output points
+   * @param modelFactory
+   *          the ModelDistribution class name
+   * @param modelPrototype
+   *          the Vector class name used by the modelFactory
+   * @param prototypeSize
+   *          the size of the prototype vector
+   * @param numModels
+   *          the number of Models
+   * @param maxIterations
+   *          the maximum number of iterations
+   * @param alpha_0
+   *          the alpha0 value for the DirichletDistribution
    */
   public static void runJob(String input,
                             String output,
@@ -119,9 +127,13 @@
                             int prototypeSize,
                             int numModels,
                             int maxIterations,
-                            double alpha_0)
-      throws IOException, ClassNotFoundException, InstantiationException,
-      IllegalAccessException, SecurityException, NoSuchMethodException, InvocationTargetException {
+                            double alpha_0) throws IOException,
+                                           ClassNotFoundException,
+                                           InstantiationException,
+                                           IllegalAccessException,
+                                           SecurityException,
+                                           NoSuchMethodException,
+                                           InvocationTargetException {
     // delete the output directory
     Configuration conf = new JobConf(DirichletJob.class);
     Path outPath = new Path(output);
@@ -130,6 +142,7 @@
       fs.delete(outPath, true);
     }
     fs.mkdirs(outPath);
-    DirichletDriver.runJob(input, output, modelFactory, modelPrototype, prototypeSize, numModels, maxIterations, alpha_0, 1);
+    DirichletDriver.runJob(input, output, modelFactory, modelPrototype, prototypeSize, numModels,
+      maxIterations, alpha_0, 1);
   }
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletMapper.java?rev=909914&r1=909913&r2=909914&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletMapper.java Sat Feb 13 21:07:53 2010
@@ -17,6 +17,9 @@
 
 package org.apache.mahout.clustering.dirichlet;
 
+import java.io.IOException;
+import java.lang.reflect.InvocationTargetException;
+
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -30,39 +33,37 @@
 import org.apache.hadoop.mapred.OutputLogFilter;
 import org.apache.hadoop.mapred.Reporter;
 import org.apache.mahout.math.DenseVector;
-import org.apache.mahout.math.function.TimesFunction;
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
-
-import java.io.IOException;
-import java.lang.reflect.InvocationTargetException;
+import org.apache.mahout.math.function.TimesFunction;
 
 public class DirichletMapper extends MapReduceBase implements
-    Mapper<WritableComparable<?>, VectorWritable, Text, VectorWritable> {
-
+    Mapper<WritableComparable<?>,VectorWritable,Text,VectorWritable> {
+  
   private DirichletState<VectorWritable> state;
-
+  
   @Override
-  public void map(WritableComparable<?> key, VectorWritable v,
-      OutputCollector<Text, VectorWritable> output, Reporter reporter)
-      throws IOException {
+  public void map(WritableComparable<?> key,
+                  VectorWritable v,
+                  OutputCollector<Text,VectorWritable> output,
+                  Reporter reporter) throws IOException {
     // compute a normalized vector of probabilities that v is described by each model
-    Vector pi = normalizedProbabilities(state, v);
+    Vector pi = DirichletMapper.normalizedProbabilities(state, v);
     // then pick one model by sampling a Multinomial distribution based upon them
     // see: http://en.wikipedia.org/wiki/Multinomial_distribution
     int k = UncommonDistributions.rMultinom(pi);
     output.collect(new Text(String.valueOf(k)), v);
   }
-
+  
   public void configure(DirichletState<VectorWritable> state) {
     this.state = state;
   }
-
+  
   @Override
   public void configure(JobConf job) {
     super.configure(job);
     try {
-      state = getDirichletState(job);
+      state = DirichletMapper.getDirichletState(job);
     } catch (NumberFormatException e) {
       throw new IllegalStateException(e);
     } catch (SecurityException e) {
@@ -75,27 +76,27 @@
       throw new IllegalStateException(e);
     }
   }
-
-  public static DirichletState<VectorWritable> getDirichletState(JobConf job)
-      throws SecurityException, IllegalArgumentException, NoSuchMethodException, InvocationTargetException {
+  
+  public static DirichletState<VectorWritable> getDirichletState(JobConf job) throws SecurityException,
+                                                                             IllegalArgumentException,
+                                                                             NoSuchMethodException,
+                                                                             InvocationTargetException {
     String statePath = job.get(DirichletDriver.STATE_IN_KEY);
     String modelFactory = job.get(DirichletDriver.MODEL_FACTORY_KEY);
     String modelPrototype = job.get(DirichletDriver.MODEL_PROTOTYPE_KEY);
     String prototypeSize = job.get(DirichletDriver.PROTOTYPE_SIZE_KEY);
     String numClusters = job.get(DirichletDriver.NUM_CLUSTERS_KEY);
     String alpha_0 = job.get(DirichletDriver.ALPHA_0_KEY);
-
+    
     try {
       double alpha = Double.parseDouble(alpha_0);
-      DirichletState<VectorWritable> state = DirichletDriver.createState(
-          modelFactory, modelPrototype, Integer.parseInt(prototypeSize),
-          Integer.parseInt(numClusters), alpha);
+      DirichletState<VectorWritable> state = DirichletDriver.createState(modelFactory, modelPrototype,
+        Integer.parseInt(prototypeSize), Integer.parseInt(numClusters), alpha);
       Path path = new Path(statePath);
       FileSystem fs = FileSystem.get(path.toUri(), job);
       FileStatus[] status = fs.listStatus(path, new OutputLogFilter());
       for (FileStatus s : status) {
-        SequenceFile.Reader reader = new SequenceFile.Reader(fs, s.getPath(),
-            job);
+        SequenceFile.Reader reader = new SequenceFile.Reader(fs, s.getPath(), job);
         try {
           Text key = new Text();
           DirichletCluster<VectorWritable> cluster = new DirichletCluster<VectorWritable>();
@@ -121,17 +122,18 @@
       throw new IllegalStateException(e);
     }
   }
-
+  
   /**
-   * Compute a normalized vector of probabilities that v is described by each model using the mixture and the model
-   * pdfs
-   *
-   * @param state the DirichletState<Vector> of this iteration
-   * @param v     an Vector
+   * Compute a normalized vector of probabilities that v is described by each model using the mixture and the
+   * model pdfs
+   * 
+   * @param state
+   *          the DirichletState<Vector> of this iteration
+   * @param v
+   *          an Vector
    * @return the Vector of probabilities
    */
-  private static Vector normalizedProbabilities(
-      DirichletState<VectorWritable> state, VectorWritable v) {
+  private static Vector normalizedProbabilities(DirichletState<VectorWritable> state, VectorWritable v) {
     Vector pi = new DenseVector(state.getNumClusters());
     double max = 0;
     for (int k = 0; k < state.getNumClusters(); k++) {

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletReducer.java?rev=909914&r1=909913&r2=909914&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletReducer.java Sat Feb 13 21:07:53 2010
@@ -17,6 +17,10 @@
 
 package org.apache.mahout.clustering.dirichlet;
 
+import java.io.IOException;
+import java.lang.reflect.InvocationTargetException;
+import java.util.Iterator;
+
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.MapReduceBase;
@@ -26,25 +30,22 @@
 import org.apache.mahout.clustering.dirichlet.models.Model;
 import org.apache.mahout.math.VectorWritable;
 
-import java.io.IOException;
-import java.lang.reflect.InvocationTargetException;
-import java.util.Iterator;
-
 public class DirichletReducer extends MapReduceBase implements
-    Reducer<Text, VectorWritable, Text, DirichletCluster<VectorWritable>> {
-
+    Reducer<Text,VectorWritable,Text,DirichletCluster<VectorWritable>> {
+  
   private DirichletState<VectorWritable> state;
-
+  
   private Model<VectorWritable>[] newModels;
-
+  
   public Model<VectorWritable>[] getNewModels() {
     return newModels;
   }
-
+  
   @Override
-  public void reduce(Text key, Iterator<VectorWritable> values,
-                     OutputCollector<Text, DirichletCluster<VectorWritable>> output, Reporter reporter)
-      throws IOException {
+  public void reduce(Text key,
+                     Iterator<VectorWritable> values,
+                     OutputCollector<Text,DirichletCluster<VectorWritable>> output,
+                     Reporter reporter) throws IOException {
     int k = Integer.parseInt(key.toString());
     Model<VectorWritable> model = newModels[k];
     while (values.hasNext()) {
@@ -56,12 +57,12 @@
     cluster.setModel(model);
     output.collect(key, cluster);
   }
-
+  
   public void configure(DirichletState<VectorWritable> state) {
     this.state = state;
     this.newModels = state.getModelFactory().sampleFromPosterior(state.getModels());
   }
-
+  
   @Override
   public void configure(JobConf job) {
     super.configure(job);
@@ -80,5 +81,5 @@
     }
     this.newModels = state.getModelFactory().sampleFromPosterior(state.getModels());
   }
-
+  
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletState.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletState.java?rev=909914&r1=909913&r2=909914&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletState.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletState.java Sat Feb 13 21:07:53 2010
@@ -17,28 +17,31 @@
 
 package org.apache.mahout.clustering.dirichlet;
 
+import java.util.ArrayList;
+import java.util.List;
+
 import org.apache.mahout.clustering.dirichlet.models.Model;
 import org.apache.mahout.clustering.dirichlet.models.ModelDistribution;
 import org.apache.mahout.math.DenseVector;
 import org.apache.mahout.math.Vector;
 
-import java.util.ArrayList;
-import java.util.List;
-
 public class DirichletState<O> {
-
+  
   private int numClusters; // the number of clusters
-
+  
   private ModelDistribution<O> modelFactory; // the factory for models
-
+  
   private List<DirichletCluster<O>> clusters; // the clusters for this iteration
-
+  
   private Vector mixture; // the mixture vector
-
+  
   private double alpha_0; // alpha_0
-
+  
   public DirichletState(ModelDistribution<O> modelFactory,
-                        int numClusters, double alpha_0, int thin, int burnin) {
+                        int numClusters,
+                        double alpha_0,
+                        int thin,
+                        int burnin) {
     this.numClusters = numClusters;
     this.modelFactory = modelFactory;
     this.alpha_0 = alpha_0;
@@ -47,45 +50,44 @@
     for (Model<O> m : modelFactory.sampleFromPrior(numClusters)) {
       clusters.add(new DirichletCluster<O>(m));
     }
-    // sample the mixture parameters from a Dirichlet distribution on the totalCounts 
+    // sample the mixture parameters from a Dirichlet distribution on the totalCounts
     mixture = UncommonDistributions.rDirichlet(totalCounts(), alpha_0);
   }
-
-  public DirichletState() {
-  }
-
+  
+  public DirichletState() { }
+  
   public int getNumClusters() {
     return numClusters;
   }
-
+  
   public void setNumClusters(int numClusters) {
     this.numClusters = numClusters;
   }
-
+  
   public ModelDistribution<O> getModelFactory() {
     return modelFactory;
   }
-
+  
   public void setModelFactory(ModelDistribution<O> modelFactory) {
     this.modelFactory = modelFactory;
   }
-
+  
   public List<DirichletCluster<O>> getClusters() {
     return clusters;
   }
-
+  
   public void setClusters(List<DirichletCluster<O>> clusters) {
     this.clusters = clusters;
   }
-
+  
   public Vector getMixture() {
     return mixture;
   }
-
+  
   public void setMixture(Vector mixture) {
     this.mixture = mixture;
   }
-
+  
   public Vector totalCounts() {
     Vector result = new DenseVector(numClusters);
     for (int i = 0; i < numClusters; i++) {
@@ -93,11 +95,12 @@
     }
     return result;
   }
-
+  
   /**
    * Update the receiver with the new models
-   *
-   * @param newModels a Model<Observation>[] of new models
+   * 
+   * @param newModels
+   *          a Model<Observation>[] of new models
    */
   public void update(Model<O>[] newModels) {
     // compute new model parameters based upon observations and update models
@@ -108,12 +111,14 @@
     // update the mixture
     mixture = UncommonDistributions.rDirichlet(totalCounts(), alpha_0);
   }
-
+  
   /**
    * return the adjusted probability that x is described by the kth model
-   *
-   * @param x an Observation
-   * @param k an int index of a model
+   * 
+   * @param x
+   *          an Observation
+   * @param k
+   *          an int index of a model
    * @return the double probability
    */
   public double adjustedProbability(O x, int k) {
@@ -121,13 +126,13 @@
     double mix = mixture.get(k);
     return mix * pdf;
   }
-
+  
   public Model<O>[] getModels() {
-    Model<O>[] result = (Model<O>[]) new Model[numClusters];
+    Model<O>[] result = new Model[numClusters];
     for (int i = 0; i < numClusters; i++) {
       result[i] = clusters.get(i).getModel();
     }
     return result;
   }
-
+  
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/JsonClusterAdapter.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/JsonClusterAdapter.java?rev=909914&r1=909913&r2=909914&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/JsonClusterAdapter.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/JsonClusterAdapter.java Sat Feb 13 21:07:53 2010
@@ -35,10 +35,11 @@
 import com.google.gson.JsonSerializationContext;
 import com.google.gson.JsonSerializer;
 
-public class JsonClusterAdapter implements JsonSerializer<DirichletCluster<?>>, JsonDeserializer<DirichletCluster<?>> {
-
+public class JsonClusterAdapter implements JsonSerializer<DirichletCluster<?>>,
+    JsonDeserializer<DirichletCluster<?>> {
+  
   private static final Logger log = LoggerFactory.getLogger(JsonClusterAdapter.class);
-
+  
   @Override
   public JsonElement serialize(DirichletCluster<?> src, Type typeOfSrc, JsonSerializationContext context) {
     GsonBuilder builder = new GsonBuilder();
@@ -50,10 +51,9 @@
     obj.add("modelJson", new JsonPrimitive(gson.toJson(src)));
     return obj;
   }
-
+  
   @Override
-  public DirichletCluster<?> deserialize(JsonElement json, Type typeOfT, JsonDeserializationContext context)
-      throws JsonParseException {
+  public DirichletCluster<?> deserialize(JsonElement json, Type typeOfT, JsonDeserializationContext context) throws JsonParseException {
     GsonBuilder builder = new GsonBuilder();
     builder.registerTypeAdapter(Vector.class, new JsonVectorAdapter());
     Gson gson = builder.create();
@@ -66,7 +66,7 @@
     try {
       cl = ccl.loadClass(klass);
     } catch (ClassNotFoundException e) {
-      log.warn("Error while loading class", e);
+      JsonClusterAdapter.log.warn("Error while loading class", e);
     }
     Model<Vector> model = (Model<Vector>) gson.fromJson(modelJson, cl);
     return new DirichletCluster<Vector>(model, total);

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/JsonModelAdapter.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/JsonModelAdapter.java?rev=909914&r1=909913&r2=909914&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/JsonModelAdapter.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/JsonModelAdapter.java Sat Feb 13 21:07:53 2010
@@ -35,14 +35,12 @@
 import com.google.gson.JsonSerializationContext;
 import com.google.gson.JsonSerializer;
 
-public class JsonModelAdapter implements JsonSerializer<Model<?>>,
-    JsonDeserializer<Model<?>> {
-
+public class JsonModelAdapter implements JsonSerializer<Model<?>>, JsonDeserializer<Model<?>> {
+  
   private static final Logger log = LoggerFactory.getLogger(JsonModelAdapter.class);
-
+  
   @Override
-  public JsonElement serialize(Model<?> src, Type typeOfSrc,
-                               JsonSerializationContext context) {
+  public JsonElement serialize(Model<?> src, Type typeOfSrc, JsonSerializationContext context) {
     GsonBuilder builder = new GsonBuilder();
     builder.registerTypeAdapter(Vector.class, new JsonVectorAdapter());
     Gson gson = builder.create();
@@ -51,10 +49,9 @@
     obj.add("model", new JsonPrimitive(gson.toJson(src)));
     return obj;
   }
-
+  
   @Override
-  public Model<?> deserialize(JsonElement json, Type typeOfT,
-                              JsonDeserializationContext context) throws JsonParseException {
+  public Model<?> deserialize(JsonElement json, Type typeOfT, JsonDeserializationContext context) throws JsonParseException {
     GsonBuilder builder = new GsonBuilder();
     builder.registerTypeAdapter(Vector.class, new JsonVectorAdapter());
     Gson gson = builder.create();
@@ -66,7 +63,7 @@
     try {
       cl = ccl.loadClass(klass);
     } catch (ClassNotFoundException e) {
-      log.warn("Error while loading class", e);
+      JsonModelAdapter.log.warn("Error while loading class", e);
     }
     return (Model<?>) gson.fromJson(model, cl);
   }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/UncommonDistributions.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/UncommonDistributions.java?rev=909914&r1=909913&r2=909914&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/UncommonDistributions.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/UncommonDistributions.java Sat Feb 13 21:07:53 2010
@@ -17,151 +17,160 @@
 
 package org.apache.mahout.clustering.dirichlet;
 
+import java.util.Random;
+
+import org.apache.mahout.common.RandomUtils;
 import org.apache.mahout.math.DenseVector;
 import org.apache.mahout.math.Vector;
-import org.apache.mahout.common.RandomUtils;
 import org.uncommons.maths.random.GaussianGenerator;
 
-import java.util.Random;
-
 public final class UncommonDistributions {
-
+  
   private static final double sqrt2pi = Math.sqrt(2.0 * Math.PI);
-
+  
   private static final Random random = RandomUtils.getRandom();
-
-  private UncommonDistributions() {
-  }
-
-  //=============== start of BSD licensed code. See LICENSE.txt
-  /**
-   * Returns a double sampled according to this distribution.  Uniformly fast for all k > 0.  (Reference: Non-Uniform
-   * Random Variate Generation, Devroye http://cgm.cs.mcgill.ca/~luc/rnbookindex.html)  Uses Cheng's rejection algorithm
-   * (GB) for k>=1, rejection from Weibull distribution for 0 < k < 1.
+  
+  private UncommonDistributions() { }
+  
+  // =============== start of BSD licensed code. See LICENSE.txt
+  /**
+   * Returns a double sampled according to this distribution. Uniformly fast for all k > 0. (Reference:
+   * Non-Uniform Random Variate Generation, Devroye http://cgm.cs.mcgill.ca/~luc/rnbookindex.html) Uses
+   * Cheng's rejection algorithm (GB) for k>=1, rejection from Weibull distribution for 0 < k < 1.
    */
   public static double rGamma(double k, double lambda) {
     boolean accept = false;
     if (k >= 1) {
-      //Cheng's algorithm
-      double b = (k - Math.log(4));
-      double c = (k + Math.sqrt(2 * k - 1));
+      // Cheng's algorithm
+      double b = k - Math.log(4);
+      double c = k + Math.sqrt(2 * k - 1);
       double lam = Math.sqrt(2 * k - 1);
-      double cheng = (1 + Math.log(4.5));
+      double cheng = 1 + Math.log(4.5);
       double x;
       do {
-        double u = random.nextDouble();
-        double v = random.nextDouble();
-        double y = ((1 / lam) * Math.log(v / (1 - v)));
-        x = (k * Math.exp(y));
-        double z = (u * v * v);
-        double r = (b + (c * y) - x);
-        if ((r >= ((4.5 * z) - cheng)) || (r >= Math.log(z))) {
+        double u = UncommonDistributions.random.nextDouble();
+        double v = UncommonDistributions.random.nextDouble();
+        double y = 1 / lam * Math.log(v / (1 - v));
+        x = k * Math.exp(y);
+        double z = u * v * v;
+        double r = b + c * y - x;
+        if ((r >= 4.5 * z - cheng) || (r >= Math.log(z))) {
           accept = true;
         }
       } while (!accept);
       return x / lambda;
     } else {
-      //Weibull algorithm
-      double c = (1 / k);
-      double d = ((1 - k) * Math.pow(k, (k / (1 - k))));
+      // Weibull algorithm
+      double c = 1 / k;
+      double d = (1 - k) * Math.pow(k, (k / (1 - k)));
       double x;
       do {
-        double u = random.nextDouble();
-        double v = random.nextDouble();
+        double u = UncommonDistributions.random.nextDouble();
+        double v = UncommonDistributions.random.nextDouble();
         double z = -Math.log(u);
         double e = -Math.log(v);
         x = Math.pow(z, c);
-        if ((z + e) >= (d + x)) {
+        if (z + e >= d + x) {
           accept = true;
         }
       } while (!accept);
       return x / lambda;
     }
   }
-
-  //============= end of BSD licensed code
-
+  
+  // ============= end of BSD licensed code
+  
   /**
    * Returns a random sample from a beta distribution with the given shapes
-   *
-   * @param shape1 a double representing shape1
-   * @param shape2 a double representing shape2
+   * 
+   * @param shape1
+   *          a double representing shape1
+   * @param shape2
+   *          a double representing shape2
    * @return a Vector of samples
    */
   public static double rBeta(double shape1, double shape2) {
-    double gam1 = rGamma(shape1, 1);
-    double gam2 = rGamma(shape2, 1);
+    double gam1 = UncommonDistributions.rGamma(shape1, 1);
+    double gam2 = UncommonDistributions.rGamma(shape2, 1);
     return gam1 / (gam1 + gam2);
-
+    
   }
-
+  
   /**
    * Returns a vector of random samples from a beta distribution with the given shapes
-   *
-   * @param K      the number of samples to return
-   * @param shape1 a double representing shape1
-   * @param shape2 a double representing shape2
+   * 
+   * @param K
+   *          the number of samples to return
+   * @param shape1
+   *          a double representing shape1
+   * @param shape2
+   *          a double representing shape2
    * @return a Vector of samples
    */
   public static Vector rBeta(int K, double shape1, double shape2) {
-    //List<Double> params = new ArrayList<Double>(2);
-    //params.add(shape1);
-    //params.add(Math.max(0, shape2));
+    // List<Double> params = new ArrayList<Double>(2);
+    // params.add(shape1);
+    // params.add(Math.max(0, shape2));
     Vector result = new DenseVector(K);
     for (int i = 0; i < K; i++) {
-      result.set(i, rBeta(shape1, shape2));
+      result.set(i, UncommonDistributions.rBeta(shape1, shape2));
     }
     return result;
   }
-
+  
   /**
    * Return a random sample from the chi-squared (chi^2) distribution with df degrees of freedom.
-   *
+   * 
    * @return a double sample
    */
   public static double rChisq(double df) {
     double result = 0.0;
     for (int i = 0; i < df; i++) {
-      double sample = rNorm(0, 1);
+      double sample = UncommonDistributions.rNorm(0, 1);
       result += sample * sample;
     }
     return result;
   }
-
+  
   /**
    * Return a random value from a normal distribution with the given mean and standard deviation
-   *
-   * @param mean a double mean value
-   * @param sd   a double standard deviation
+   * 
+   * @param mean
+   *          a double mean value
+   * @param sd
+   *          a double standard deviation
    * @return a double sample
    */
   public static double rNorm(double mean, double sd) {
-    GaussianGenerator dist = new GaussianGenerator(mean, sd, random);
+    GaussianGenerator dist = new GaussianGenerator(mean, sd, UncommonDistributions.random);
     return dist.nextValue();
   }
-
+  
   /**
    * Return the normal density function value for the sample x
-   *
+   * 
    * pdf = 1/[sqrt(2*p)*s] * e^{-1/2*[(x-m)/s]^2}
-   *
-   * @param x a double sample value
-   * @param m a double mean value
-   * @param s a double standard deviation
+   * 
+   * @param x
+   *          a double sample value
+   * @param m
+   *          a double mean value
+   * @param s
+   *          a double standard deviation
    * @return a double probability value
    */
   public static double dNorm(double x, double m, double s) {
     double xms = (x - m) / s;
-    double ex = (xms * xms) / 2;
+    double ex = xms * xms / 2;
     double exp = Math.exp(-ex);
-    return exp / (sqrt2pi * s);
+    return exp / (UncommonDistributions.sqrt2pi * s);
   }
-
+  
   /** Returns one sample from a multinomial. */
   public static int rMultinom(Vector probabilities) {
     // our probability argument are not normalized.
     double total = probabilities.zSum();
-    double nextDouble = random.nextDouble();
+    double nextDouble = UncommonDistributions.random.nextDouble();
     double p = nextDouble * total;
     for (int i = 0; i < probabilities.size(); i++) {
       double p_i = probabilities.get(i);
@@ -174,18 +183,20 @@
     // can't happen except for round-off error so we don't care what we return here
     return 0;
   }
-
+  
   /**
    * Returns a multinomial vector sampled from the given probabilities
-   *
+   * 
    * rmultinom should be implemented as successive binomial sampling.
-   *
+   * 
    * Keep a normalizing amount that starts with 1 (I call it total).
-   *
+   * 
    * For each i k[i] = rbinom(p[i] / total, size); total -= p[i]; size -= k[i];
-   *
-   * @param size          the size parameter of the binomial distribution
-   * @param probabilities a Vector of probabilities
+   * 
+   * @param size
+   *          the size parameter of the binomial distribution
+   * @param probabilities
+   *          a Vector of probabilities
    * @return a multinomial distribution Vector
    */
   public static Vector rMultinom(int size, Vector probabilities) {
@@ -193,20 +204,20 @@
     double total = probabilities.zSum();
     int cardinality = probabilities.size();
     Vector result = new DenseVector(cardinality);
-    for (int i = 0; total > 0 && i < cardinality; i++) {
+    for (int i = 0; (total > 0) && (i < cardinality); i++) {
       double p = probabilities.get(i);
-      int ki = rBinomial(size, p / total);
+      int ki = UncommonDistributions.rBinomial(size, p / total);
       total -= p;
       size -= ki;
       result.set(i, ki);
     }
     return result;
   }
-
+  
   /**
-   * Returns an integer sampled according to this distribution.  Takes time proprotional to np + 1.  (Reference:
-   * Non-Uniform Random Variate Generation, Devroye http://cgm.cs.mcgill.ca/~luc/rnbookindex.html) Second time-waiting
-   * algorithm.
+   * Returns an integer sampled according to this distribution. Takes time proprotional to np + 1. (Reference:
+   * Non-Uniform Random Variate Generation, Devroye http://cgm.cs.mcgill.ca/~luc/rnbookindex.html) Second
+   * time-waiting algorithm.
    */
   public static int rBinomial(int n, double p) {
     if (p >= 1) {
@@ -216,9 +227,9 @@
     double sum = 0;
     int x = 0;
     while (sum <= q) {
-      double u = random.nextDouble();
+      double u = UncommonDistributions.random.nextDouble();
       double e = -Math.log(u);
-      sum += (e / (n - x));
+      sum += e / (n - x);
       x += 1;
     }
     if (x == 0) {
@@ -226,13 +237,15 @@
     }
     return x - 1;
   }
-
+  
   /**
-   * Sample from a Dirichlet distribution, returning a vector of probabilities using a
-   * stick-breaking algorithm
-   *
-   * @param totalCounts an unnormalized count Vector
-   * @param alpha_0 a double
+   * Sample from a Dirichlet distribution, returning a vector of probabilities using a stick-breaking
+   * algorithm
+   * 
+   * @param totalCounts
+   *          an unnormalized count Vector
+   * @param alpha_0
+   *          a double
    * @return a Vector of probabilities
    */
   public static Vector rDirichlet(Vector totalCounts, double alpha_0) {
@@ -242,12 +255,12 @@
     for (int k = 0; k < pi.size(); k++) {
       double countK = totalCounts.get(k);
       total -= countK;
-      double betaK = rBeta(1 + countK, Math.max(0, alpha_0 + total));
+      double betaK = UncommonDistributions.rBeta(1 + countK, Math.max(0, alpha_0 + total));
       double piK = betaK * remainder;
       pi.set(k, piK);
       remainder -= piK;
     }
     return pi;
   }
-
+  
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalDistribution.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalDistribution.java?rev=909914&r1=909913&r2=909914&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalDistribution.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalDistribution.java Sat Feb 13 21:07:53 2010
@@ -22,39 +22,40 @@
 import org.apache.mahout.math.VectorWritable;
 
 /**
- * An implementation of the ModelDistribution interface suitable for testing the DirichletCluster algorithm. Uses a
- * Normal Distribution to sample the prior model values. Model values have a vector standard deviation, allowing
- * assymetrical regions to be covered by a model.
+ * An implementation of the ModelDistribution interface suitable for testing the DirichletCluster algorithm.
+ * Uses a Normal Distribution to sample the prior model values. Model values have a vector standard deviation,
+ * allowing assymetrical regions to be covered by a model.
  */
-public class AsymmetricSampledNormalDistribution extends VectorModelDistribution  {
-
+public class AsymmetricSampledNormalDistribution extends VectorModelDistribution {
+  
   public AsymmetricSampledNormalDistribution() {
     super();
   }
-
+  
   public AsymmetricSampledNormalDistribution(VectorWritable modelPrototype) {
     super(modelPrototype);
   }
-
+  
   @Override
   public Model<VectorWritable>[] sampleFromPrior(int howMany) {
     Model<VectorWritable>[] result = new AsymmetricSampledNormalModel[howMany];
     for (int i = 0; i < howMany; i++) {
       Vector prototype = getModelPrototype().get();
       Vector mean = prototype.like();
-      for (int j = 0; j < prototype.size(); j++)
+      for (int j = 0; j < prototype.size(); j++) {
         mean.set(j, UncommonDistributions.rNorm(0, 1));
+      }
       Vector sd = prototype.like();
-      for (int j = 0; j < prototype.size(); j++)
+      for (int j = 0; j < prototype.size(); j++) {
         sd.set(j, UncommonDistributions.rNorm(1, 1));
+      }
       result[i] = new AsymmetricSampledNormalModel(mean, sd);
     }
     return result;
   }
-
+  
   @Override
-  public Model<VectorWritable>[] sampleFromPosterior(
-      Model<VectorWritable>[] posterior) {
+  public Model<VectorWritable>[] sampleFromPosterior(Model<VectorWritable>[] posterior) {
     Model<VectorWritable>[] result = new AsymmetricSampledNormalModel[posterior.length];
     for (int i = 0; i < posterior.length; i++) {
       AsymmetricSampledNormalModel m = (AsymmetricSampledNormalModel) posterior[i];
@@ -62,5 +63,5 @@
     }
     return result;
   }
-
+  
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalModel.java?rev=909914&r1=909913&r2=909914&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalModel.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/AsymmetricSampledNormalModel.java Sat Feb 13 21:07:53 2010
@@ -33,29 +33,27 @@
 import com.google.gson.reflect.TypeToken;
 
 public class AsymmetricSampledNormalModel implements Model<VectorWritable> {
-
+  
   private static final double sqrt2pi = Math.sqrt(2.0 * Math.PI);
-
+  
   // the parameters
   private Vector mean;
-
+  
   private Vector stdDev;
-
+  
   // the observation statistics, initialized by the first observation
   private int s0 = 0;
-
+  
   private Vector s1;
-
+  
   private Vector s2;
-
   
-  private static final Type modelType = new TypeToken<Model<Vector>>() {
-  }.getType();
-
+  private static final Type modelType = new TypeToken<Model<Vector>>() { }.getType();
+  
   public AsymmetricSampledNormalModel() {
     super();
   }
-
+  
   public AsymmetricSampledNormalModel(Vector mean, Vector stdDev) {
     super();
     this.mean = mean;
@@ -64,24 +62,24 @@
     this.s1 = mean.like();
     this.s2 = mean.like();
   }
-
+  
   public Vector getMean() {
     return mean;
   }
-
+  
   public Vector getStdDev() {
     return stdDev;
   }
-
+  
   /**
    * Return an instance with the same parameters
-   *
+   * 
    * @return an AsymmetricSampledNormalModel
    */
   AsymmetricSampledNormalModel sample() {
     return new AsymmetricSampledNormalModel(mean, stdDev);
   }
-
+  
   @Override
   public void observe(VectorWritable v) {
     Vector x = v.get();
@@ -97,7 +95,7 @@
       s2 = s2.plus(x.times(x));
     }
   }
-
+  
   @Override
   public void computeParameters() {
     if (s0 == 0) {
@@ -111,20 +109,22 @@
       stdDev.assign(Double.MIN_NORMAL);
     }
   }
-
+  
   /**
    * Calculate a pdf using the supplied sample and stdDev
-   *
-   * @param x  a Vector sample
-   * @param sd a double std deviation
+   * 
+   * @param x
+   *          a Vector sample
+   * @param sd
+   *          a double std deviation
    */
   private double pdf(Vector x, double sd) {
     double sd2 = sd * sd;
     double exp = -(x.dot(x) - 2 * x.dot(mean) + mean.dot(mean)) / (2 * sd2);
     double ex = Math.exp(exp);
-    return ex / (sd * sqrt2pi);
+    return ex / (sd * AsymmetricSampledNormalModel.sqrt2pi);
   }
-
+  
   @Override
   public double pdf(VectorWritable v) {
     Vector x = v.get();
@@ -136,17 +136,17 @@
     }
     return pdf;
   }
-
+  
   @Override
   public int count() {
     return s0;
   }
-
+  
   @Override
   public String toString() {
     return asFormatString(null);
   }
-
+  
   @Override
   public String asFormatString(String[] bindings) {
     StringBuilder buf = new StringBuilder(50);
@@ -161,7 +161,7 @@
     buf.append('}');
     return buf.toString();
   }
-
+  
   @Override
   public void readFields(DataInput in) throws IOException {
     VectorWritable temp = new VectorWritable();
@@ -175,7 +175,7 @@
     temp.readFields(in);
     this.s2 = temp.get();
   }
-
+  
   @Override
   public void write(DataOutput out) throws IOException {
     VectorWritable.writeVector(out, mean);
@@ -184,12 +184,12 @@
     VectorWritable.writeVector(out, s1);
     VectorWritable.writeVector(out, s2);
   }
-
+  
   @Override
   public String asJsonString() {
     GsonBuilder builder = new GsonBuilder();
     builder.registerTypeAdapter(Model.class, new JsonModelAdapter());
     Gson gson = builder.create();
-    return gson.toJson(this, modelType);
+    return gson.toJson(this, AsymmetricSampledNormalModel.modelType);
   }
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/L1Model.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/L1Model.java?rev=909914&r1=909913&r2=909914&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/L1Model.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/L1Model.java Sat Feb 13 21:07:53 2010
@@ -33,69 +33,68 @@
 import com.google.gson.reflect.TypeToken;
 
 public class L1Model implements Model<VectorWritable> {
-
+  
   private static final DistanceMeasure measure = new ManhattanDistanceMeasure();
-
+  
   public L1Model() {
     super();
   }
-
+  
   public L1Model(Vector v) {
     observed = v.like();
     coefficients = v;
   }
-
+  
   private Vector coefficients;
-
+  
   private int count = 0;
-
+  
   private Vector observed;
-
-  private static final Type modelType = new TypeToken<Model<Vector>>() {
-  }.getType();
-
+  
+  private static final Type modelType = new TypeToken<Model<Vector>>() { }.getType();
+  
   @Override
   public void computeParameters() {
     coefficients = observed.divide(count);
   }
-
+  
   @Override
   public int count() {
     return count;
   }
-
+  
   @Override
   public void observe(VectorWritable x) {
     count++;
     x.get().addTo(observed);
   }
-
+  
   @Override
   public double pdf(VectorWritable x) {
-    return Math.exp(-measure.distance(x.get(), coefficients));
+    return Math.exp(-L1Model.measure.distance(x.get(), coefficients));
   }
-
+  
   @Override
   public void readFields(DataInput in) throws IOException {
     VectorWritable temp = new VectorWritable();
     temp.readFields(in);
     coefficients = temp.get();
   }
-
+  
   @Override
   public void write(DataOutput out) throws IOException {
     VectorWritable.writeVector(out, coefficients);
   }
-
+  
   public L1Model sample() {
     return new L1Model(coefficients.clone());
   }
-
+  
   @Override
   public String toString() {
     return asFormatString(null);
   }
-
+  
   @Override
   public String asFormatString(String[] bindings) {
     StringBuilder buf = new StringBuilder();
@@ -106,8 +105,10 @@
     buf.append('}');
     return buf.toString();
   }
-
-  /* (non-Javadoc)
+  
+  /*
+   * (non-Javadoc)
+   * 
    * @see org.apache.mahout.clustering.Printable#asJsonString()
    */
   @Override
@@ -115,7 +116,7 @@
     GsonBuilder builder = new GsonBuilder();
     builder.registerTypeAdapter(Model.class, new JsonModelAdapter());
     Gson gson = builder.create();
-    return gson.toJson(this, modelType);
+    return gson.toJson(this, L1Model.modelType);
   }
-
+  
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/L1ModelDistribution.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/L1ModelDistribution.java?rev=909914&r1=909913&r2=909914&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/L1ModelDistribution.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/L1ModelDistribution.java Sat Feb 13 21:07:53 2010
@@ -21,19 +21,19 @@
 import org.apache.mahout.math.VectorWritable;
 
 /**
- * An implementation of the ModelDistribution interface suitable for testing the DirichletCluster algorithm. Uses a
- * L1Distribution
+ * An implementation of the ModelDistribution interface suitable for testing the DirichletCluster algorithm.
+ * Uses a L1Distribution
  */
 public class L1ModelDistribution extends VectorModelDistribution {
-
+  
   public L1ModelDistribution(VectorWritable modelPrototype) {
     super(modelPrototype);
   }
-
+  
   public L1ModelDistribution() {
     super();
   }
-
+  
   @Override
   public Model<VectorWritable>[] sampleFromPrior(int howMany) {
     Model<VectorWritable>[] result = new L1Model[howMany];
@@ -43,7 +43,7 @@
     }
     return result;
   }
-
+  
   @Override
   public Model<VectorWritable>[] sampleFromPosterior(Model<VectorWritable>[] posterior) {
     Model<VectorWritable>[] result = new L1Model[posterior.length];

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/Model.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/Model.java?rev=909914&r1=909913&r2=909914&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/Model.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/Model.java Sat Feb 13 21:07:53 2010
@@ -21,32 +21,37 @@
 import org.apache.mahout.clustering.Printable;
 
 /**
- * A model is a probability distribution over observed data points and allows the probability of any data point to be
- * computed.
+ * A model is a probability distribution over observed data points and allows the probability of any data
+ * point to be computed.
  */
 public interface Model<O> extends Writable, Printable {
-
+  
   /**
    * Observe the given observation, retaining information about it
-   *
-   * @param x an Observation from the posterior
+   * 
+   * @param x
+   *          an Observation from the posterior
    */
   void observe(O x);
-
-  /** Compute a new set of posterior parameters based upon the Observations that have been observed since my creation */
+  
+  /**
+   * Compute a new set of posterior parameters based upon the Observations that have been observed since my
+   * creation
+   */
   void computeParameters();
-
+  
   /**
    * Return the probability that the observation is described by this model
-   *
-   * @param x an Observation from the posterior
+   * 
+   * @param x
+   *          an Observation from the posterior
    * @return the probability that x is in the receiver
    */
   double pdf(O x);
-
+  
   /**
    * Return the number of observations that have been observed by this model
-   *
+   * 
    * @return an int
    */
   int count();

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/ModelDistribution.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/ModelDistribution.java?rev=909914&r1=909913&r2=909914&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/ModelDistribution.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/ModelDistribution.java Sat Feb 13 21:07:53 2010
@@ -19,21 +19,23 @@
 
 /** A model distribution allows us to sample a model from its prior distribution. */
 public interface ModelDistribution<O> {
-
+  
   /**
    * Return a list of models sampled from the prior
-   *
-   * @param howMany the int number of models to return
+   * 
+   * @param howMany
+   *          the int number of models to return
    * @return a Model<Observation>[] representing what is known apriori
    */
   Model<O>[] sampleFromPrior(int howMany);
-
+  
   /**
    * Return a list of models sampled from the posterior
-   *
-   * @param posterior the Model<Observation>[] after observations
+   * 
+   * @param posterior
+   *          the Model<Observation>[] after observations
    * @return a Model<Observation>[] representing what is known apriori
    */
   Model<O>[] sampleFromPosterior(Model<O>[] posterior);
-
+  
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModel.java?rev=909914&r1=909913&r2=909914&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModel.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModel.java Sat Feb 13 21:07:53 2010
@@ -17,43 +17,41 @@
 
 package org.apache.mahout.clustering.dirichlet.models;
 
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.lang.reflect.Type;
+
 import org.apache.mahout.clustering.ClusterBase;
 import org.apache.mahout.clustering.dirichlet.JsonModelAdapter;
-import org.apache.mahout.math.function.SquareRootFunction;
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
+import org.apache.mahout.math.function.SquareRootFunction;
 
 import com.google.gson.Gson;
 import com.google.gson.GsonBuilder;
 import com.google.gson.reflect.TypeToken;
 
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.lang.reflect.Type;
-
 public class NormalModel implements Model<VectorWritable> {
-
+  
   private static final double sqrt2pi = Math.sqrt(2.0 * Math.PI);
-
+  
   // the parameters
   private Vector mean;
-
+  
   private double stdDev;
-
+  
   // the observation statistics, initialized by the first observation
   private int s0 = 0;
-
+  
   private Vector s1;
-
+  
   private Vector s2;
-
-  private static final Type modelType = new TypeToken<Model<Vector>>() {
-  }.getType();
-
-  public NormalModel() {
-  }
-
+  
+  private static final Type modelType = new TypeToken<Model<Vector>>() { }.getType();
+  
+  public NormalModel() { }
+  
   public NormalModel(Vector mean, double stdDev) {
     this.mean = mean;
     this.stdDev = stdDev;
@@ -61,28 +59,28 @@
     this.s1 = mean.like();
     this.s2 = mean.like();
   }
-
+  
   int getS0() {
     return s0;
   }
-
+  
   public Vector getMean() {
     return mean;
   }
-
+  
   public double getStdDev() {
     return stdDev;
   }
-
+  
   /**
    * TODO: Return a proper sample from the posterior. For now, return an instance with the same parameters
-   *
+   * 
    * @return an NormalModel
    */
   public NormalModel sample() {
     return new NormalModel(mean, stdDev);
   }
-
+  
   @Override
   public void observe(VectorWritable x) {
     s0++;
@@ -98,7 +96,7 @@
       s2 = s2.plus(v.times(v));
     }
   }
-
+  
   @Override
   public void computeParameters() {
     if (s0 == 0) {
@@ -113,26 +111,26 @@
       stdDev = Double.MIN_VALUE;
     }
   }
-
+  
   @Override
   public double pdf(VectorWritable v) {
     Vector x = v.get();
     double sd2 = stdDev * stdDev;
     double exp = -(x.dot(x) - 2 * x.dot(mean) + mean.dot(mean)) / (2 * sd2);
     double ex = Math.exp(exp);
-    return ex / (stdDev * sqrt2pi);
+    return ex / (stdDev * NormalModel.sqrt2pi);
   }
-
+  
   @Override
   public int count() {
     return s0;
   }
-
+  
   @Override
   public String toString() {
     return asFormatString(null);
   }
-
+  
   @Override
   public String asFormatString(String[] bindings) {
     StringBuilder buf = new StringBuilder();
@@ -143,7 +141,7 @@
     buf.append(" sd=").append(String.format("%.2f", stdDev)).append('}');
     return buf.toString();
   }
-
+  
   @Override
   public void readFields(DataInput in) throws IOException {
     VectorWritable temp = new VectorWritable();
@@ -156,7 +154,7 @@
     temp.readFields(in);
     this.s2 = temp.get();
   }
-
+  
   @Override
   public void write(DataOutput out) throws IOException {
     VectorWritable.writeVector(out, mean);
@@ -165,12 +163,12 @@
     VectorWritable.writeVector(out, s1);
     VectorWritable.writeVector(out, s2);
   }
-
+  
   @Override
   public String asJsonString() {
     GsonBuilder builder = new GsonBuilder();
     builder.registerTypeAdapter(Model.class, new JsonModelAdapter());
     Gson gson = builder.create();
-    return gson.toJson(this, modelType);
+    return gson.toJson(this, NormalModel.modelType);
   }
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModelDistribution.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModelDistribution.java?rev=909914&r1=909913&r2=909914&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModelDistribution.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/NormalModelDistribution.java Sat Feb 13 21:07:53 2010
@@ -21,19 +21,19 @@
 import org.apache.mahout.math.VectorWritable;
 
 /**
- * An implementation of the ModelDistribution interface suitable for testing the DirichletCluster algorithm. Uses a
- * Normal Distribution
+ * An implementation of the ModelDistribution interface suitable for testing the DirichletCluster algorithm.
+ * Uses a Normal Distribution
  */
 public class NormalModelDistribution extends VectorModelDistribution {
-
+  
   public NormalModelDistribution(VectorWritable modelPrototype) {
     super(modelPrototype);
   }
-
+  
   public NormalModelDistribution() {
     super();
   }
-
+  
   @Override
   public Model<VectorWritable>[] sampleFromPrior(int howMany) {
     Model<VectorWritable>[] result = new NormalModel[howMany];
@@ -43,10 +43,9 @@
     }
     return result;
   }
-
+  
   @Override
-  public Model<VectorWritable>[] sampleFromPosterior(
-      Model<VectorWritable>[] posterior) {
+  public Model<VectorWritable>[] sampleFromPosterior(Model<VectorWritable>[] posterior) {
     Model<VectorWritable>[] result = new NormalModel[posterior.length];
     for (int i = 0; i < posterior.length; i++) {
       NormalModel m = (NormalModel) posterior[i];

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/SampledNormalDistribution.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/SampledNormalDistribution.java?rev=909914&r1=909913&r2=909914&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/SampledNormalDistribution.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/SampledNormalDistribution.java Sat Feb 13 21:07:53 2010
@@ -22,19 +22,19 @@
 import org.apache.mahout.math.VectorWritable;
 
 /**
- * An implementation of the ModelDistribution interface suitable for testing the DirichletCluster algorithm. Uses a
- * Normal Distribution to sample the prior model values.
+ * An implementation of the ModelDistribution interface suitable for testing the DirichletCluster algorithm.
+ * Uses a Normal Distribution to sample the prior model values.
  */
 public class SampledNormalDistribution extends NormalModelDistribution {
-
+  
   public SampledNormalDistribution() {
     super();
   }
-
+  
   public SampledNormalDistribution(VectorWritable modelPrototype) {
     super(modelPrototype);
   }
-
+  
   @Override
   public Model<VectorWritable>[] sampleFromPrior(int howMany) {
     Model<VectorWritable>[] result = new SampledNormalModel[howMany];
@@ -42,18 +42,18 @@
       Vector prototype = getModelPrototype().get();
       int card = prototype.size();
       double[] m = new double[card];
-      for (int j = 0; j < card; j++)
+      for (int j = 0; j < card; j++) {
         m[j] = UncommonDistributions.rNorm(0, 1);
+      }
       Vector mean = prototype.like();
       mean.assign(m);
       result[i] = new SampledNormalModel(mean, 1);
     }
     return result;
   }
-
+  
   @Override
-  public Model<VectorWritable>[] sampleFromPosterior(
-      Model<VectorWritable>[] posterior) {
+  public Model<VectorWritable>[] sampleFromPosterior(Model<VectorWritable>[] posterior) {
     Model<VectorWritable>[] result = new SampledNormalModel[posterior.length];
     for (int i = 0; i < posterior.length; i++) {
       SampledNormalModel m = (SampledNormalModel) posterior[i];

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/SampledNormalModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/SampledNormalModel.java?rev=909914&r1=909913&r2=909914&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/SampledNormalModel.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/SampledNormalModel.java Sat Feb 13 21:07:53 2010
@@ -21,30 +21,30 @@
 import org.apache.mahout.math.Vector;
 
 public class SampledNormalModel extends NormalModel {
-
+  
   public SampledNormalModel() {
     super();
   }
-
+  
   public SampledNormalModel(Vector mean, double sd) {
     super(mean, sd);
   }
-
+  
   @Override
   public String toString() {
     return asFormatString(null);
   }
-
+  
   /**
    * Return an instance with the same parameters
-   *
+   * 
    * @return an SampledNormalModel
    */
   @Override
   public NormalModel sample() {
     return new SampledNormalModel(getMean(), getStdDev());
   }
-
+  
   @Override
   public String asFormatString(String[] bindings) {
     StringBuilder buf = new StringBuilder();

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/VectorModelDistribution.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/VectorModelDistribution.java?rev=909914&r1=909913&r2=909914&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/VectorModelDistribution.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/VectorModelDistribution.java Sat Feb 13 21:07:53 2010
@@ -18,34 +18,34 @@
 
 import org.apache.mahout.math.VectorWritable;
 
-public abstract class VectorModelDistribution implements
-    ModelDistribution<VectorWritable> {
-
+public abstract class VectorModelDistribution implements ModelDistribution<VectorWritable> {
+  
   protected VectorModelDistribution() {
     super();
   }
-
+  
   protected VectorModelDistribution(VectorWritable modelPrototype) {
     super();
     this.modelPrototype = modelPrototype;
   }
-
+  
   // a prototype instance used for creating prior model distributions using like(). It
   // should be of the class and cardinality desired for the particular application.
   private VectorWritable modelPrototype;
-
+  
   /**
    * @return the modelPrototype
    */
   public VectorWritable getModelPrototype() {
     return modelPrototype;
   }
-
+  
   /**
-   * @param modelPrototype the modelPrototype to set
+   * @param modelPrototype
+   *          the modelPrototype to set
    */
   public void setModelPrototype(VectorWritable modelPrototype) {
     this.modelPrototype = modelPrototype;
   }
-
+  
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterMapper.java?rev=909914&r1=909913&r2=909914&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterMapper.java Sat Feb 13 21:07:53 2010
@@ -17,6 +17,10 @@
 
 package org.apache.mahout.clustering.fuzzykmeans;
 
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.mapred.JobConf;
@@ -26,40 +30,39 @@
 import org.apache.hadoop.mapred.Reporter;
 import org.apache.mahout.math.VectorWritable;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
 public class FuzzyKMeansClusterMapper extends MapReduceBase implements
-    Mapper<WritableComparable<?>, VectorWritable, Text, FuzzyKMeansOutput> {
-
+    Mapper<WritableComparable<?>,VectorWritable,Text,FuzzyKMeansOutput> {
+  
   private final List<SoftCluster> clusters = new ArrayList<SoftCluster>();
   private FuzzyKMeansClusterer clusterer;
-
+  
   @Override
-  public void map(WritableComparable<?> key, VectorWritable point,
-                  OutputCollector<Text, FuzzyKMeansOutput> output, Reporter reporter) throws IOException {
+  public void map(WritableComparable<?> key,
+                  VectorWritable point,
+                  OutputCollector<Text,FuzzyKMeansOutput> output,
+                  Reporter reporter) throws IOException {
     clusterer.outputPointWithClusterProbabilities(key.toString(), point.get(), clusters, output);
   }
-
+  
   /**
    * Configure the mapper by providing its clusters. Used by unit tests.
-   *
-   * @param clusters a List<Cluster>
+   * 
+   * @param clusters
+   *          a List<Cluster>
    */
   void config(List<SoftCluster> clusters) {
     this.clusters.clear();
     this.clusters.addAll(clusters);
   }
-
+  
   @Override
   public void configure(JobConf job) {
-
+    
     super.configure(job);
     clusterer = new FuzzyKMeansClusterer(job);
-
+    
     String clusterPath = job.get(FuzzyKMeansConfigKeys.CLUSTER_PATH_KEY);
-    if (clusterPath != null && clusterPath.length() > 0) {
+    if ((clusterPath != null) && (clusterPath.length() > 0)) {
       FuzzyKMeansUtil.configureWithClusterInfo(clusterPath, clusters);
     }
     
@@ -67,5 +70,5 @@
       throw new IllegalStateException("Cluster is empty!!!");
     }
   }
-
+  
 }



Mime
View raw message