mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From robina...@apache.org
Subject svn commit: r896311 [1/4] - in /lucene/mahout/trunk: core/src/main/java/org/apache/mahout/classifier/ core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/ core/src/main/java/org/apache/mahout/classifier/bayes/common/ core/src/main/java/org/...
Date Wed, 06 Jan 2010 02:46:23 GMT
Author: robinanil
Date: Wed Jan  6 02:46:22 2010
New Revision: 896311

URL: http://svn.apache.org/viewvc?rev=896311&view=rev
Log:
MAHOUT-220 First of Bayes classifier codestyle cleanup

Modified:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ClassifierResult.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/BayesAlgorithm.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/CBayesAlgorithm.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesParameters.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/ByScoreLabelResultComparator.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/HBaseBayesDatastore.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/InMemoryBayesDatastore.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/exceptions/InvalidDatastoreException.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/interfaces/Algorithm.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/interfaces/Datastore.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/io/SequenceFileModelReader.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesClassifierDriver.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesClassifierMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesClassifierReducer.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesDriver.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerDriver.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerReducer.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesDriver.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerDriver.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerReducer.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesConstants.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureDriver.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureOutputFormat.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureReducer.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesJob.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfDriver.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfOutputFormat.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfReducer.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerDriver.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerOutputFormat.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerReducer.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/JobExecutor.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/model/ClassifierContext.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/FileLineIterator.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/bayes/BayesClassifierTest.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/bayes/BayesFeatureMapperTest.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/bayes/BayesFileFormatterTest.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/bayes/CBayesClassifierTest.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/PrepareTwentyNewsgroups.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TrainClassifier.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorDriver.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorMapper.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorReducer.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaXmlSplitter.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/XmlInputFormat.java

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java?rev=896311&r1=896310&r2=896311&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java Wed Jan  6 02:46:22 2010
@@ -17,6 +17,19 @@
 
 package org.apache.mahout.classifier;
 
+import java.io.File;
+import java.io.FileFilter;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.io.Reader;
+import java.io.Writer;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.List;
+
 import org.apache.commons.cli2.CommandLine;
 import org.apache.commons.cli2.Group;
 import org.apache.commons.cli2.Option;
@@ -33,64 +46,66 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.File;
-import java.io.FileFilter;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.io.OutputStreamWriter;
-import java.io.Reader;
-import java.io.Writer;
-import java.nio.charset.Charset;
-import java.util.ArrayList;
-import java.util.List;
-
 /**
- * Flatten a file into format that can be read by the Bayes M/R job. <p/> One document per line, first token is the
- * label followed by a tab, rest of the line are the terms.
+ * Flatten a file into format that can be read by the Bayes M/R job.
+ * <p/>
+ * One document per line, first token is the label followed by a tab, rest of
+ * the line are the terms.
  */
-public class BayesFileFormatter {
-
-  private static final Logger log = LoggerFactory.getLogger(BayesFileFormatter.class);
-
+public final class BayesFileFormatter {
+  
+  private static final Logger log = LoggerFactory
+      .getLogger(BayesFileFormatter.class);
+  
   private static final String LINE_SEP = System.getProperty("line.separator");
-
-  private BayesFileFormatter() {
-  }
-
+  
+  private BayesFileFormatter() { }
+  
   /**
-   * Collapse all the files in the inputDir into a single file in the proper Bayes format, 1 document per line
-   *
-   * @param label      The label
-   * @param analyzer   The analyzer to use
-   * @param inputDir   The input Directory
-   * @param charset    The charset of the input files
-   * @param outputFile The file to collapse to
+   * Collapse all the files in the inputDir into a single file in the proper
+   * Bayes format, 1 document per line
+   * 
+   * @param label
+   *          The label
+   * @param analyzer
+   *          The analyzer to use
+   * @param inputDir
+   *          The input Directory
+   * @param charset
+   *          The charset of the input files
+   * @param outputFile
+   *          The file to collapse to
    */
   public static void collapse(String label, Analyzer analyzer, File inputDir,
-                              Charset charset, File outputFile) throws IOException {
+      Charset charset, File outputFile) throws IOException {
     Writer writer = new OutputStreamWriter(new FileOutputStream(outputFile),
         charset);
     try {
       inputDir.listFiles(new FileProcessor(label, analyzer, charset, writer));
-      // listFiles() is called here as a way to recursively visit files, actually
+      // listFiles() is called here as a way to recursively visit files,
+      // actually
     } finally {
       IOUtils.quietClose(writer);
     }
   }
-
+  
   /**
    * Write the input files to the outdir, one output file per input file
-   *
-   * @param label    The label of the file
-   * @param analyzer The analyzer to use
-   * @param input    The input file or directory. May not be null
-   * @param charset  The Character set of the input files
-   * @param outDir   The output directory. Files will be written there with the same name as the input file
+   * 
+   * @param label
+   *          The label of the file
+   * @param analyzer
+   *          The analyzer to use
+   * @param input
+   *          The input file or directory. May not be null
+   * @param charset
+   *          The Character set of the input files
+   * @param outDir
+   *          The output directory. Files will be written there with the same
+   *          name as the input file
    */
   public static void format(String label, Analyzer analyzer, File input,
-                            Charset charset, File outDir) throws IOException {
+      Charset charset, File outDir) throws IOException {
     if (input.isDirectory()) {
       input.listFiles(new FileProcessor(label, analyzer, charset, outDir));
     } else {
@@ -103,49 +118,52 @@
       }
     }
   }
-
+  
   /**
-   * Hack the FileFilter mechanism so that we don't get stuck on large directories and don't have to loop the list
-   * twice
+   * Hack the FileFilter mechanism so that we don't get stuck on large
+   * directories and don't have to loop the list twice
    */
-  private static class FileProcessor implements FileFilter {
+  private static final class FileProcessor implements FileFilter {
     private final String label;
-
+    
     private final Analyzer analyzer;
-
+    
     private File outputDir;
-
+    
     private final Charset charset;
-
+    
     private Writer writer;
-
+    
     /**
      * Use this when you want to collapse all files to a single file
-     *
-     * @param label  The label
-     * @param writer must not be null and will not be closed
+     * 
+     * @param label
+     *          The label
+     * @param writer
+     *          must not be null and will not be closed
      */
     private FileProcessor(String label, Analyzer analyzer, Charset charset,
-                          Writer writer) {
+        Writer writer) {
       this.label = label;
       this.analyzer = analyzer;
       this.charset = charset;
       this.writer = writer;
     }
-
+    
     /**
      * Use this when you want a writer per file
-     *
-     * @param outputDir must not be null.
+     * 
+     * @param outputDir
+     *          must not be null.
      */
     private FileProcessor(String label, Analyzer analyzer, Charset charset,
-                          File outputDir) {
+        File outputDir) {
       this.label = label;
       this.analyzer = analyzer;
       this.charset = charset;
       this.outputDir = outputDir;
     }
-
+    
     @Override
     public boolean accept(File file) {
       if (file.isFile()) {
@@ -176,19 +194,25 @@
       return false;
     }
   }
-
+  
   /**
    * Write the tokens and the label from the Reader to the writer
-   *
-   * @param label    The label
-   * @param analyzer The analyzer to use
-   * @param inFile   the file to read and whose contents are passed to the analyzer
-   * @param charset  character encoding to assume when reading the input file
-   * @param writer   The Writer, is not closed by this method
-   * @throws java.io.IOException if there was a problem w/ the reader
+   * 
+   * @param label
+   *          The label
+   * @param analyzer
+   *          The analyzer to use
+   * @param inFile
+   *          the file to read and whose contents are passed to the analyzer
+   * @param charset
+   *          character encoding to assume when reading the input file
+   * @param writer
+   *          The Writer, is not closed by this method
+   * @throws java.io.IOException
+   *           if there was a problem w/ the reader
    */
   private static void writeFile(String label, Analyzer analyzer, File inFile,
-                                Charset charset, Writer writer) throws IOException {
+      Charset charset, Writer writer) throws IOException {
     Reader reader = new InputStreamReader(new FileInputStream(inFile), charset);
     try {
       TokenStream ts = analyzer.tokenStream(label, reader);
@@ -206,18 +230,20 @@
       IOUtils.quietClose(reader);
     }
   }
-
+  
   /**
    * Convert a Reader to a vector
-   *
-   * @param analyzer The Analyzer to use
-   * @param reader   The reader to feed to the Analyzer
+   * 
+   * @param analyzer
+   *          The Analyzer to use
+   * @param reader
+   *          The reader to feed to the Analyzer
    * @return An array of unique tokens
    */
-  public static String[] readerToDocument(Analyzer analyzer, Reader reader)
-      throws IOException {
+  public static String[] readerToDocument(Analyzer analyzer,
+                                          Reader reader) throws IOException {
     TokenStream ts = analyzer.tokenStream("", reader);
-
+    
     Token token;
     List<String> coll = new ArrayList<String>();
     while ((token = ts.next()) != null) {
@@ -228,56 +254,76 @@
     }
     return coll.toArray(new String[coll.size()]);
   }
-
+  
   /**
    * Run the FileFormatter
-   *
-   * @param args The input args. Run with -h to see the help
-   * @throws ClassNotFoundException if the Analyzer can't be found
-   * @throws IllegalAccessException if the Analyzer can't be constructed
-   * @throws InstantiationException if the Analyzer can't be constructed
-   * @throws IOException            if the files can't be dealt with properly
+   * 
+   * @param args
+   *          The input args. Run with -h to see the help
+   * @throws ClassNotFoundException
+   *           if the Analyzer can't be found
+   * @throws IllegalAccessException
+   *           if the Analyzer can't be constructed
+   * @throws InstantiationException
+   *           if the Analyzer can't be constructed
+   * @throws IOException
+   *           if the files can't be dealt with properly
    */
   public static void main(String[] args) throws ClassNotFoundException,
       IllegalAccessException, InstantiationException, IOException {
     DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
     ArgumentBuilder abuilder = new ArgumentBuilder();
     GroupBuilder gbuilder = new GroupBuilder();
-
-    Option inputOpt = obuilder.withLongName("input").withRequired(true).withArgument(
-        abuilder.withName("input").withMinimum(1).withMaximum(1).create()).
-        withDescription("The Input file").withShortName("i").create();
-
-    Option outputOpt = obuilder.withLongName("output").withRequired(true).withArgument(
-        abuilder.withName("output").withMinimum(1).withMaximum(1).create()).
-        withDescription("The output file").withShortName("o").create();
-
-    Option labelOpt = obuilder.withLongName("label").withRequired(true).withArgument(
-        abuilder.withName("label").withMinimum(1).withMaximum(1).create()).
-        withDescription("The label of the file").withShortName("l").create();
-
-    Option analyzerOpt = obuilder.withLongName("analyzer").withArgument(
-        abuilder.withName("analyzer").withMinimum(1).withMaximum(1).create()).
-        withDescription("The fully qualified class name of the analyzer to use.  Must have a no-arg constructor.  Default is the StandardAnalyzer").withShortName("a").create();
-
+    
+    Option inputOpt = obuilder.withLongName("input").withRequired(true)
+        .withArgument(
+            abuilder.withName("input").withMinimum(1).withMaximum(1).create())
+        .withDescription("The Input file").withShortName("i").create();
+    
+    Option outputOpt = obuilder.withLongName("output").withRequired(true)
+        .withArgument(
+            abuilder.withName("output").withMinimum(1).withMaximum(1).create())
+        .withDescription("The output file").withShortName("o").create();
+    
+    Option labelOpt = obuilder.withLongName("label").withRequired(true)
+        .withArgument(
+            abuilder.withName("label").withMinimum(1).withMaximum(1).create())
+        .withDescription("The label of the file").withShortName("l").create();
+    
+    Option analyzerOpt = obuilder
+        .withLongName("analyzer")
+        .withArgument(
+            abuilder.withName("analyzer").withMinimum(1).withMaximum(1)
+                .create())
+        .withDescription(
+            "The fully qualified class name of the analyzer to use. "
+                + "Must have a no-arg constructor.  Default is the StandardAnalyzer")
+        .withShortName("a").create();
+    
     Option charsetOpt = obuilder.withLongName("charset").withArgument(
-        abuilder.withName("charset").withMinimum(1).withMaximum(1).create()).
-        withDescription("The character encoding of the input file").withShortName("c").create();
-
-    Option collapseOpt = obuilder.withLongName("collapse").withRequired(true).withArgument(
-        abuilder.withName("collapse").withMinimum(1).withMaximum(1).create()).
-        withDescription("Collapse a whole directory to a single file, one doc per line").withShortName("p").create();
-
-    Option helpOpt = obuilder.withLongName("help").withRequired(true).
-        withDescription("Print out help").withShortName("h").create();
-    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(labelOpt).withOption(analyzerOpt).withOption(charsetOpt).withOption(collapseOpt).withOption(helpOpt).create();
+        abuilder.withName("charset").withMinimum(1).withMaximum(1).create())
+        .withDescription("The character encoding of the input file")
+        .withShortName("c").create();
+    
+    Option collapseOpt = obuilder.withLongName("collapse").withRequired(true)
+        .withArgument(
+            abuilder.withName("collapse").withMinimum(1).withMaximum(1)
+                .create()).withDescription(
+            "Collapse a whole directory to a single file, one doc per line")
+        .withShortName("p").create();
+    
+    Option helpOpt = obuilder.withLongName("help").withRequired(true)
+        .withDescription("Print out help").withShortName("h").create();
+    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(
+        outputOpt).withOption(labelOpt).withOption(analyzerOpt).withOption(
+        charsetOpt).withOption(collapseOpt).withOption(helpOpt).create();
     try {
       Parser parser = new Parser();
       parser.setGroup(group);
       CommandLine cmdLine = parser.parse(args);
-
+      
       if (cmdLine.hasOption(helpOpt)) {
-
+        
         return;
       }
       File input = new File((String) cmdLine.getValue(inputOpt));
@@ -285,8 +331,8 @@
       String label = (String) cmdLine.getValue(labelOpt);
       Analyzer analyzer;
       if (cmdLine.hasOption(analyzerOpt)) {
-        analyzer = Class.forName(
-            (String) cmdLine.getValue(analyzerOpt)).asSubclass(Analyzer.class).newInstance();
+        analyzer = Class.forName((String) cmdLine.getValue(analyzerOpt))
+            .asSubclass(Analyzer.class).newInstance();
       } else {
         analyzer = new StandardAnalyzer();
       }
@@ -295,13 +341,13 @@
         charset = Charset.forName((String) cmdLine.getValue(charsetOpt));
       }
       boolean collapse = cmdLine.hasOption(collapseOpt);
-
+      
       if (collapse) {
         collapse(label, analyzer, input, charset, output);
       } else {
         format(label, analyzer, input, charset, output);
       }
-
+      
     } catch (OptionException e) {
       log.error("Exception", e);
     }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ClassifierResult.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ClassifierResult.java?rev=896311&r1=896310&r2=896311&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ClassifierResult.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ClassifierResult.java Wed Jan  6 02:46:22 2010
@@ -17,44 +17,44 @@
 
 package org.apache.mahout.classifier;
 
-/** Result of a Document Classification. The label and the associated score(Usually probabilty) */
+/**
+ * Result of a Document Classification. The label and the associated
+ * score(Usually probabilty)
+ */
 public class ClassifierResult {
   private String label;
   private double score;
-
-  public ClassifierResult() {
-  }
+  
+  public ClassifierResult() { }
 
   public ClassifierResult(String label, double score) {
     this.label = label;
     this.score = score;
   }
-
+  
   public ClassifierResult(String label) {
     this.label = label;
   }
-
+  
   public String getLabel() {
     return label;
   }
-
+  
   public double getScore() {
     return score;
   }
-
+  
   public void setLabel(String label) {
     this.label = label;
   }
-
+  
   public void setScore(double score) {
     this.score = score;
   }
-
+  
   @Override
   public String toString() {
-    return "ClassifierResult{" +
-        "category='" + label + '\'' +
-        ", score=" + score +
-        '}';
+    return "ClassifierResult{" + "category='" + label + '\'' + ", score="
+        + score + '}';
   }
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java?rev=896311&r1=896310&r2=896311&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java Wed Jan  6 02:46:22 2010
@@ -17,10 +17,15 @@
 
 package org.apache.mahout.classifier;
 
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.InputStreamReader;
+import java.nio.charset.Charset;
+import java.util.List;
+
 import org.apache.commons.cli2.CommandLine;
 import org.apache.commons.cli2.Group;
 import org.apache.commons.cli2.Option;
-import org.apache.commons.cli2.OptionException;
 import org.apache.commons.cli2.builder.ArgumentBuilder;
 import org.apache.commons.cli2.builder.DefaultOptionBuilder;
 import org.apache.commons.cli2.builder.GroupBuilder;
@@ -28,153 +33,137 @@
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.util.Version;
-
 import org.apache.mahout.classifier.bayes.algorithm.BayesAlgorithm;
 import org.apache.mahout.classifier.bayes.algorithm.CBayesAlgorithm;
 import org.apache.mahout.classifier.bayes.common.BayesParameters;
 import org.apache.mahout.classifier.bayes.datastore.HBaseBayesDatastore;
 import org.apache.mahout.classifier.bayes.datastore.InMemoryBayesDatastore;
-import org.apache.mahout.classifier.bayes.exceptions.InvalidDatastoreException;
 import org.apache.mahout.classifier.bayes.interfaces.Algorithm;
 import org.apache.mahout.classifier.bayes.interfaces.Datastore;
 import org.apache.mahout.classifier.bayes.model.ClassifierContext;
 import org.apache.mahout.common.nlp.NGrams;
-
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.nio.charset.Charset;
-import java.util.List;
-
-public class Classify {
-
+/**
+ * Runs the Bayes classifier using the given model location(HDFS/HBASE)
+ * 
+ */
+public final class Classify {
+  
   private static final Logger log = LoggerFactory.getLogger(Classify.class);
-
-  private Classify() {
-  }
-
-
-  public static void main(String[] args) throws IOException,
-      ClassNotFoundException, IllegalAccessException, InstantiationException,
-      OptionException, InvalidDatastoreException {
-
+  
+  private Classify() { }
+  
+  public static void main(String[] args) throws Exception {
+    
     DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
     ArgumentBuilder abuilder = new ArgumentBuilder();
     GroupBuilder gbuilder = new GroupBuilder();
-
-
+    
     Option pathOpt = obuilder.withLongName("path").withRequired(true)
         .withArgument(
             abuilder.withName("path").withMinimum(1).withMaximum(1).create())
         .withDescription("The local file system path").withShortName("m")
         .create();
-
+    
     Option classifyOpt = obuilder.withLongName("classify").withRequired(true)
         .withArgument(
             abuilder.withName("classify").withMinimum(1).withMaximum(1)
                 .create()).withDescription("The doc to classify")
         .withShortName("").create();
-
+    
     Option encodingOpt = obuilder.withLongName("encoding").withRequired(true)
         .withArgument(
             abuilder.withName("encoding").withMinimum(1).withMaximum(1)
                 .create())
         .withDescription("The file encoding.  Default: UTF-8").withShortName(
             "e").create();
-
-
+    
     Option analyzerOpt = obuilder.withLongName("analyzer").withRequired(true)
         .withArgument(
             abuilder.withName("analyzer").withMinimum(1).withMaximum(1)
                 .create()).withDescription("The Analyzer to use")
         .withShortName("a").create();
-
-
+    
     Option defaultCatOpt = obuilder.withLongName("defaultCat").withRequired(
         true).withArgument(
         abuilder.withName("defaultCat").withMinimum(1).withMaximum(1).create())
         .withDescription("The default category").withShortName("d").create();
-
-
+    
     Option gramSizeOpt = obuilder.withLongName("gramSize").withRequired(true)
         .withArgument(
             abuilder.withName("gramSize").withMinimum(1).withMaximum(1)
                 .create()).withDescription("Size of the n-gram").withShortName(
             "ng").create();
-
-
+    
     Option typeOpt = obuilder.withLongName("classifierType").withRequired(true)
         .withArgument(
             abuilder.withName("classifierType").withMinimum(1).withMaximum(1)
                 .create()).withDescription("Type of classifier").withShortName(
             "type").create();
-
+    
     Option dataSourceOpt = obuilder.withLongName("dataSource").withRequired(
         true).withArgument(
         abuilder.withName("dataSource").withMinimum(1).withMaximum(1).create())
         .withDescription("Location of model: hdfs|hbase").withShortName(
             "source").create();
-
+    
     Group options = gbuilder.withName("Options").withOption(pathOpt)
         .withOption(classifyOpt).withOption(encodingOpt)
         .withOption(analyzerOpt).withOption(defaultCatOpt).withOption(
             gramSizeOpt).withOption(typeOpt).withOption(dataSourceOpt).create();
-
+    
     Parser parser = new Parser();
     parser.setGroup(options);
     CommandLine cmdLine = parser.parse(args);
-
-
+    
     int gramSize = 1;
     if (cmdLine.hasOption(gramSizeOpt)) {
       gramSize = Integer.parseInt((String) cmdLine.getValue(gramSizeOpt));
-
+      
     }
-
+    
     BayesParameters params = new BayesParameters(gramSize);
-
+    
     String modelBasePath = (String) cmdLine.getValue(pathOpt);
-
+    
     log.info("Loading model from: {}", params.print());
-
+    
     Algorithm algorithm;
     Datastore datastore;
-
+    
     String classifierType = (String) cmdLine.getValue(typeOpt);
-
+    
     String dataSource = (String) cmdLine.getValue(dataSourceOpt);
     if (dataSource.equals("hdfs")) {
       if (classifierType.equalsIgnoreCase("bayes")) {
-        log.info("Testing Bayes Classifier");
+        log.info("Using Bayes Classifier");
         algorithm = new BayesAlgorithm();
         datastore = new InMemoryBayesDatastore(params);
       } else if (classifierType.equalsIgnoreCase("cbayes")) {
-        log.info("Testing Complementary Bayes Classifier");
+        log.info("Using Complementary Bayes Classifier");
         algorithm = new CBayesAlgorithm();
         datastore = new InMemoryBayesDatastore(params);
       } else {
         throw new IllegalArgumentException("Unrecognized classifier type: "
             + classifierType);
       }
-
+      
     } else if (dataSource.equals("hbase")) {
       if (classifierType.equalsIgnoreCase("bayes")) {
-        log.info("Testing Bayes Classifier");
+        log.info("Using Bayes Classifier");
         algorithm = new BayesAlgorithm();
         datastore = new HBaseBayesDatastore(modelBasePath, params);
       } else if (classifierType.equalsIgnoreCase("cbayes")) {
-        log.info("Testing Complementary Bayes Classifier");
+        log.info("Using Complementary Bayes Classifier");
         algorithm = new CBayesAlgorithm();
         datastore = new HBaseBayesDatastore(modelBasePath, params);
       } else {
         throw new IllegalArgumentException("Unrecognized classifier type: "
             + classifierType);
       }
-
+      
     } else {
       throw new IllegalArgumentException("Unrecognized dataSource type: "
           + dataSource);
@@ -199,7 +188,7 @@
     if (analyzer == null) {
       analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
     }
-
+    
     log.info("Converting input document to proper format");
     String[] document = BayesFileFormatter.readerToDocument(analyzer,
         new InputStreamReader(new FileInputStream(docPath), Charset
@@ -208,15 +197,15 @@
     for (String token : document) {
       line.append(token).append(' ');
     }
-
+    
     List<String> doc = new NGrams(line.toString(), gramSize)
         .generateNGramsWithoutLabel();
-
+    
     log.info("Done converting");
     log.info("Classifying document: {}", docPath);
     ClassifierResult category = classifier.classifyDocument(doc
         .toArray(new String[doc.size()]), defaultCat);
     log.info("Category for {} is {}", docPath, category);
-
+    
   }
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java?rev=896311&r1=896310&r2=896311&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java Wed Jan  6 02:46:22 2010
@@ -17,27 +17,29 @@
 
 package org.apache.mahout.classifier;
 
-import org.apache.commons.lang.StringUtils;
-import org.apache.mahout.common.Summarizable;
-
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.Map;
 
+import org.apache.commons.lang.StringUtils;
+import org.apache.mahout.common.Summarizable;
+
 /**
- * The ConfusionMatrix Class stores the result of Classification of a Test Dataset.
- *
+ * The ConfusionMatrix Class stores the result of Classification of a Test
+ * Dataset.
+ * 
  * See http://en.wikipedia.org/wiki/Confusion_matrix for background
  */
 public class ConfusionMatrix implements Summarizable {
-
+  
   private final Collection<String> labels;
-
-  private final Map<String, Integer> labelMap = new HashMap<String, Integer>();
-
-  private int[][] confusionMatrix = null;
+  
+  private final Map<String,Integer> labelMap = new HashMap<String,Integer>();
+  
+  private int[][] confusionMatrix;
+  
   private String defaultLabel = "unknown";
-
+  
   public ConfusionMatrix(Collection<String> labels, String defaultLabel) {
     this.labels = labels;
     confusionMatrix = new int[labels.size() + 1][labels.size() + 1];
@@ -47,15 +49,15 @@
     }
     labelMap.put(defaultLabel, labelMap.size());
   }
-
+  
   public int[][] getConfusionMatrix() {
     return confusionMatrix;
   }
-
+  
   public Collection<String> getLabels() {
     return labels;
   }
-
+  
   public double getAccuracy(String label) {
     int labelId = labelMap.get(label);
     int labelTotal = 0;
@@ -68,13 +70,12 @@
     }
     return 100.0 * correct / labelTotal;
   }
-
+  
   public int getCorrect(String label) {
     int labelId = labelMap.get(label);
     return confusionMatrix[labelId][labelId];
   }
-
-
+  
   public double getTotal(String label) {
     int labelId = labelMap.get(label);
     int labelTotal = 0;
@@ -83,52 +84,55 @@
     }
     return labelTotal;
   }
-
+  
   public void addInstance(String correctLabel, ClassifierResult classifiedResult) {
     incrementCount(correctLabel, classifiedResult.getLabel());
   }
-
+  
   public void addInstance(String correctLabel, String classifiedLabel) {
     incrementCount(correctLabel, classifiedLabel);
   }
-
+  
   public int getCount(String correctLabel, String classifiedLabel) {
     if (labels.contains(correctLabel)
-        && labels.contains(classifiedLabel) == false && defaultLabel.equals(classifiedLabel) == false) {
-      throw new IllegalArgumentException("Label not found " + correctLabel + ' ' + classifiedLabel);
+        && labels.contains(classifiedLabel) == false
+        && defaultLabel.equals(classifiedLabel) == false) {
+      throw new IllegalArgumentException("Label not found " + correctLabel
+          + ' ' + classifiedLabel);
     }
     int correctId = labelMap.get(correctLabel);
     int classifiedId = labelMap.get(classifiedLabel);
     return confusionMatrix[correctId][classifiedId];
   }
-
+  
   public void putCount(String correctLabel, String classifiedLabel, int count) {
     if (labels.contains(correctLabel)
-        && labels.contains(classifiedLabel) == false && defaultLabel.equals(classifiedLabel) == false) {
+        && labels.contains(classifiedLabel) == false
+        && defaultLabel.equals(classifiedLabel) == false) {
       throw new IllegalArgumentException("Label not found");
     }
     int correctId = labelMap.get(correctLabel);
     int classifiedId = labelMap.get(classifiedLabel);
     confusionMatrix[correctId][classifiedId] = count;
   }
-
+  
   public void incrementCount(String correctLabel, String classifiedLabel,
-                             int count) {
+      int count) {
     putCount(correctLabel, classifiedLabel, count
         + getCount(correctLabel, classifiedLabel));
   }
-
+  
   public void incrementCount(String correctLabel, String classifiedLabel) {
     incrementCount(correctLabel, classifiedLabel, 1);
   }
-
+  
   public ConfusionMatrix merge(ConfusionMatrix b) {
     if (labels.size() != b.getLabels().size()) {
       throw new IllegalArgumentException("The Labels do not Match");
     }
-
-    //if (labels.containsAll(b.getLabels()))
-    //  ;
+    
+    // if (labels.containsAll(b.getLabels()))
+    // ;
     for (String correctLabel : this.labels) {
       for (String classifiedLabel : this.labels) {
         incrementCount(correctLabel, classifiedLabel, b.getCount(correctLabel,
@@ -137,39 +141,48 @@
     }
     return this;
   }
-
+  
   @Override
   public String summarize() {
     String lineSep = System.getProperty("line.separator");
     StringBuilder returnString = new StringBuilder();
-    returnString
-        .append("=======================================================").append(lineSep);
+    returnString.append(
+        "=======================================================").append(
+        lineSep);
     returnString.append("Confusion Matrix\n");
-    returnString
-        .append("-------------------------------------------------------").append(lineSep);
-
+    returnString.append(
+        "-------------------------------------------------------").append(
+        lineSep);
+    
     for (String correctLabel : this.labels) {
-      returnString.append(StringUtils.rightPad(getSmallLabel(labelMap.get(correctLabel)), 5)).append('\t');
+      returnString.append(
+          StringUtils.rightPad(getSmallLabel(labelMap.get(correctLabel)), 5))
+          .append('\t');
     }
-
+    
     returnString.append("<--Classified as").append(lineSep);
-
+    
     for (String correctLabel : this.labels) {
       int labelTotal = 0;
       for (String classifiedLabel : this.labels) {
-        returnString.append(StringUtils.rightPad(Integer.toString(getCount(
-            correctLabel, classifiedLabel)), 5)).append('\t');
+        returnString.append(
+            StringUtils.rightPad(Integer.toString(getCount(correctLabel,
+                classifiedLabel)), 5)).append('\t');
         labelTotal += getCount(correctLabel, classifiedLabel);
       }
-      returnString.append(" |  ").append(StringUtils.rightPad(String.valueOf(labelTotal), 6)).append('\t')
-          .append(StringUtils.rightPad(getSmallLabel(labelMap.get(correctLabel)), 5))
+      returnString.append(" |  ").append(
+          StringUtils.rightPad(String.valueOf(labelTotal), 6)).append('\t')
+          .append(
+              StringUtils
+                  .rightPad(getSmallLabel(labelMap.get(correctLabel)), 5))
           .append(" = ").append(correctLabel).append(lineSep);
     }
-    returnString.append("Default Category: ").append(defaultLabel).append(": ").append(labelMap.get(defaultLabel)).append(lineSep);
+    returnString.append("Default Category: ").append(defaultLabel).append(": ")
+        .append(labelMap.get(defaultLabel)).append(lineSep);
     returnString.append(lineSep);
     return returnString.toString();
   }
-
+  
   static String getSmallLabel(int i) {
     int val = i;
     StringBuilder returnString = new StringBuilder();
@@ -181,5 +194,5 @@
     } while (val > 0);
     return returnString.toString();
   }
-
+  
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java?rev=896311&r1=896310&r2=896311&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java Wed Jan  6 02:46:22 2010
@@ -17,16 +17,21 @@
 
 package org.apache.mahout.classifier;
 
-import org.apache.commons.lang.StringUtils;
-import org.apache.mahout.common.Summarizable;
-
 import java.text.DecimalFormat;
 import java.util.Collection;
 
-public class ResultAnalyzer implements Summarizable {
-
-  private ConfusionMatrix confusionMatrix = null;
+import org.apache.commons.lang.StringUtils;
+import org.apache.mahout.common.Summarizable;
 
+/**
+ * ResultAnalyzer captures the classification statistics and displays in a
+ * tabular manner
+ * 
+ */
+public class ResultAnalyzer implements Summarizable {
+  
+  private ConfusionMatrix confusionMatrix;
+  
   /*
    * === Summary ===
    * 
@@ -35,25 +40,28 @@
    * mean squared error 0.0817 Relative absolute error 9.9344 % Root relative
    * squared error 37.2742 % Total Number of Instances 683
    */
-  private int correctlyClassified = 0;
-
-  private int incorrectlyClassified = 0;
-
+  private int correctlyClassified;
+  
+  private int incorrectlyClassified;
+  
   public ResultAnalyzer(Collection<String> labelSet, String defaultLabel) {
     confusionMatrix = new ConfusionMatrix(labelSet, defaultLabel);
   }
-
+  
   public ConfusionMatrix getConfusionMatrix() {
     return this.confusionMatrix;
   }
-
+  
   /**
-   *
-   * @param correctLabel The correct label
-   * @param classifiedResult The classified result
+   * 
+   * @param correctLabel
+   *          The correct label
+   * @param classifiedResult
+   *          The classified result
    * @return whether the instance was correct or not
    */
-  public boolean addInstance(String correctLabel, ClassifierResult classifiedResult) {
+  public boolean addInstance(String correctLabel,
+                             ClassifierResult classifiedResult) {
     boolean result = correctLabel.equals(classifiedResult.getLabel());
     if (result) {
       correctlyClassified++;
@@ -63,16 +71,16 @@
     confusionMatrix.addInstance(correctLabel, classifiedResult);
     return result;
   }
-
+  
   @Override
   public String toString() {
     return "";
   }
-
+  
   @Override
   public String summarize() {
     StringBuilder returnString = new StringBuilder();
-
+    
     returnString
         .append("=======================================================\n");
     returnString.append("Summary\n");
@@ -80,24 +88,34 @@
         .append("-------------------------------------------------------\n");
     int totalClassified = correctlyClassified + incorrectlyClassified;
     double percentageCorrect = (double) 100 * correctlyClassified
-        / (totalClassified);
+                               / totalClassified;
     double percentageIncorrect = (double) 100 * incorrectlyClassified
-        / (totalClassified);
+                                 / totalClassified;
     DecimalFormat decimalFormatter = new DecimalFormat("0.####");
-
-    returnString.append(StringUtils.rightPad("Correctly Classified Instances",
-        40)).append(": ").append(StringUtils.leftPad(Integer.toString(correctlyClassified), 10))
-        .append('\t').append(StringUtils.leftPad(decimalFormatter.format(percentageCorrect), 10)).append("%\n");
-    returnString.append(StringUtils.rightPad(
-        "Incorrectly Classified Instances", 40)).append(": ").append(StringUtils
-        .leftPad(Integer.toString(incorrectlyClassified), 10)).append('\t')
-        .append(StringUtils.leftPad(decimalFormatter.format(percentageIncorrect), 10)).append("%\n");
-    returnString.append(StringUtils.rightPad("Total Classified Instances", 40)).append(": ")
-        .append(StringUtils.leftPad(Integer.toString(totalClassified), 10)).append('\n');
+    
+    returnString.append(
+      StringUtils.rightPad("Correctly Classified Instances", 40)).append(": ")
+        .append(StringUtils.leftPad(Integer.toString(correctlyClassified), 10))
+        .append('\t').append(
+          StringUtils.leftPad(decimalFormatter.format(percentageCorrect), 10))
+        .append("%\n");
+    returnString
+        .append(StringUtils.rightPad("Incorrectly Classified Instances", 40))
+        .append(": ")
+        .append(
+          StringUtils.leftPad(Integer.toString(incorrectlyClassified), 10))
+        .append('\t')
+        .append(
+          StringUtils.leftPad(decimalFormatter.format(percentageIncorrect), 10))
+        .append("%\n");
+    returnString.append(StringUtils.rightPad("Total Classified Instances", 40))
+        .append(": ").append(
+          StringUtils.leftPad(Integer.toString(totalClassified), 10)).append(
+          '\n');
     returnString.append('\n');
-
+    
     returnString.append(confusionMatrix.summarize());
-
+    
     return returnString.toString();
   }
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/BayesAlgorithm.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/BayesAlgorithm.java?rev=896311&r1=896310&r2=896311&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/BayesAlgorithm.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/BayesAlgorithm.java Wed Jan  6 02:46:22 2010
@@ -30,17 +30,20 @@
 import org.apache.mahout.classifier.bayes.exceptions.InvalidDatastoreException;
 import org.apache.mahout.classifier.bayes.interfaces.Algorithm;
 import org.apache.mahout.classifier.bayes.interfaces.Datastore;
-
-public class BayesAlgorithm implements Algorithm{
-
+/**
+ * Class implementing the Naive Bayes Classifier Algorithm
+ *
+ */
+public class BayesAlgorithm implements Algorithm {
+  
   @Override
   public ClassifierResult classifyDocument(String[] document,
-      Datastore datastore, String defaultCategory)
-      throws InvalidDatastoreException {
+                                           Datastore datastore,
+                                           String defaultCategory) throws InvalidDatastoreException {
     ClassifierResult result = new ClassifierResult(defaultCategory);
     double max = Double.MAX_VALUE;
-    Collection<String> categories = datastore.getKeys("labelWeight");    
-
+    Collection<String> categories = datastore.getKeys("labelWeight");
+    
     for (String category : categories) {
       double prob = documentWeight(datastore, category, document);
       if (prob < max) {
@@ -51,15 +54,16 @@
     result.setScore(max);
     return result;
   }
-
+  
   @Override
   public ClassifierResult[] classifyDocument(String[] document,
-      Datastore datastore, String defaultCategory, int numResults)
-      throws InvalidDatastoreException {
-    Collection<String> categories = datastore.getKeys("labelWeight");    
-    PriorityQueue<ClassifierResult> pq =
-        new PriorityQueue<ClassifierResult>(numResults, new ByScoreLabelResultComparator());
-    for (String category : categories){
+                                             Datastore datastore,
+                                             String defaultCategory,
+                                             int numResults) throws InvalidDatastoreException {
+    Collection<String> categories = datastore.getKeys("labelWeight");
+    PriorityQueue<ClassifierResult> pq = new PriorityQueue<ClassifierResult>(
+        numResults, new ByScoreLabelResultComparator());
+    for (String category : categories) {
       double prob = documentWeight(datastore, category, document);
       if (prob > 0.0) {
         pq.add(new ClassifierResult(category, prob));
@@ -68,9 +72,9 @@
         }
       }
     }
-
+    
     if (pq.isEmpty()) {
-      return new ClassifierResult[] { new ClassifierResult(defaultCategory, 0.0) };
+      return new ClassifierResult[] {new ClassifierResult(defaultCategory, 0.0)};
     } else {
       List<ClassifierResult> result = new ArrayList<ClassifierResult>(pq.size());
       while (pq.isEmpty() == false) {
@@ -82,52 +86,50 @@
   }
   
   @Override
-  public double featureWeight(Datastore datastore, String label, String feature)
-      throws InvalidDatastoreException {       
+  public double featureWeight(Datastore datastore, String label, String feature) throws InvalidDatastoreException {
     
     double result = datastore.getWeight("weight", feature, label);
     double vocabCount = datastore.getWeight("sumWeight", "vocabCount");
-    double sumLabelWeight = datastore.getWeight("labelWeight", label);    
+    double sumLabelWeight = datastore.getWeight("labelWeight", label);
     double numerator = result + datastore.getWeight("params", "alpha_i");
-    double denominator = (sumLabelWeight + vocabCount);
+    double denominator = sumLabelWeight + vocabCount;
     double weight = Math.log(numerator / denominator);
     result = -weight;
     return result;
   }
-
+  
   @Override
   public void initialize(Datastore datastore) throws InvalidDatastoreException {
     datastore.getWeight("weight", "test", "test");
     datastore.getWeight("labelWeight", "test");
     datastore.getWeight("thetaNormalizer", "test");
   }
-
+  
   @Override
-  public double documentWeight(Datastore datastore, String label,
-      String[] document) throws InvalidDatastoreException {
-    Map<String, int[]> wordList = new HashMap<String, int[]>(1000);
+  public double documentWeight(Datastore datastore,
+                               String label,
+                               String[] document) throws InvalidDatastoreException {
+    Map<String,int[]> wordList = new HashMap<String,int[]>(1000);
     for (String word : document) {
       int[] count = wordList.get(word);
       if (count == null) {
-        count = new int[] { 0 };
+        count = new int[] {0};
         wordList.put(word, count);
       }
       count[0]++;
     }
     double result = 0.0;
-    for (Map.Entry<String, int[]> entry : wordList.entrySet()) {
+    for (Map.Entry<String,int[]> entry : wordList.entrySet()) {
       String word = entry.getKey();
       int count = entry.getValue()[0];
       result += count * featureWeight(datastore, label, word);
     }
     return result;
   }
-
+  
   @Override
-  public Collection<String> getLabels(Datastore datastore)
-      throws InvalidDatastoreException {
+  public Collection<String> getLabels(Datastore datastore) throws InvalidDatastoreException {
     return datastore.getKeys("labelWeight");
   }
-
-
+  
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/CBayesAlgorithm.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/CBayesAlgorithm.java?rev=896311&r1=896310&r2=896311&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/CBayesAlgorithm.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/algorithm/CBayesAlgorithm.java Wed Jan  6 02:46:22 2010
@@ -30,17 +30,20 @@
 import org.apache.mahout.classifier.bayes.exceptions.InvalidDatastoreException;
 import org.apache.mahout.classifier.bayes.interfaces.Algorithm;
 import org.apache.mahout.classifier.bayes.interfaces.Datastore;
-
+/**
+ * Class implementing the Complementary Naive Bayes Classifier Algorithm
+ *
+ */
 public class CBayesAlgorithm implements Algorithm {
-
+  
   @Override
   public ClassifierResult classifyDocument(String[] document,
-      Datastore datastore, String defaultCategory)
-      throws InvalidDatastoreException {
+                                           Datastore datastore,
+                                           String defaultCategory) throws InvalidDatastoreException {
     ClassifierResult result = new ClassifierResult(defaultCategory);
     double max = Double.MIN_VALUE;
     Collection<String> categories = datastore.getKeys("labelWeight");
-
+    
     for (String category : categories) {
       double prob = documentWeight(datastore, category, document);
       if (max < prob) {
@@ -51,14 +54,15 @@
     result.setScore(max);
     return result;
   }
-
+  
   @Override
   public ClassifierResult[] classifyDocument(String[] document,
-      Datastore datastore, String defaultCategory, int numResults)
-      throws InvalidDatastoreException {
+                                             Datastore datastore,
+                                             String defaultCategory,
+                                             int numResults) throws InvalidDatastoreException {
     Collection<String> categories = datastore.getKeys("labelWeight");
-    PriorityQueue<ClassifierResult> pq =
-        new PriorityQueue<ClassifierResult>(numResults, new ByScoreLabelResultComparator());
+    PriorityQueue<ClassifierResult> pq = new PriorityQueue<ClassifierResult>(
+        numResults, new ByScoreLabelResultComparator());
     for (String category : categories) {
       double prob = documentWeight(datastore, category, document);
       if (prob > 0.0) {
@@ -68,9 +72,9 @@
         }
       }
     }
-
+    
     if (pq.isEmpty()) {
-      return new ClassifierResult[] { new ClassifierResult(defaultCategory, 0.0) };
+      return new ClassifierResult[] {new ClassifierResult(defaultCategory, 0.0)};
     } else {
       List<ClassifierResult> result = new ArrayList<ClassifierResult>(pq.size());
       while (pq.isEmpty() == false) {
@@ -80,63 +84,62 @@
       return result.toArray(new ClassifierResult[pq.size()]);
     }
   }
-
+  
   @Override
-  public double featureWeight(Datastore datastore, String label, String feature)
-      throws InvalidDatastoreException {
-
+  public double featureWeight(Datastore datastore, String label, String feature) throws InvalidDatastoreException {
+    
     double result = datastore.getWeight("weight", feature, label);
     double vocabCount = datastore.getWeight("sumWeight", "vocabCount");
-
-    double sigma_j = datastore.getWeight("weight", feature, "sigma_j");
-    double sigma_jSigma_k = datastore.getWeight("sumWeight", "sigma_jSigma_k");
-    double sigma_k = datastore.getWeight("labelWeight", label);
-
+    
+    double featureSum = datastore.getWeight("weight", feature, "sigma_j");
+    double totalSum = datastore.getWeight("sumWeight", "sigma_jSigma_k");
+    double labelSum = datastore.getWeight("labelWeight", label);
+    
     double thetaNormalizer = datastore.getWeight("thetaNormalizer", label);
-
-    double numerator = sigma_j - result + datastore.getWeight("params", "alpha_i");
-    double denominator = (sigma_jSigma_k - sigma_k + vocabCount);
+    
+    double numerator = featureSum - result
+                       + datastore.getWeight("params", "alpha_i");
+    double denominator = totalSum - labelSum + vocabCount;
     
     double weight = Math.log(numerator / denominator);
-    //System.out.println(feature + " " + label+ "\t" +result + " " + vocabCount + " " + sigma_j + " " + sigma_k+ " " + sigma_jSigma_k+ " " + thetaNormalizer + " "+numerator + " " +denominator);
     
     result = weight / thetaNormalizer;
     
     return result;
   }
-
+  
   @Override
   public void initialize(Datastore datastore) throws InvalidDatastoreException {
     datastore.getWeight("weight", "test", "test");
     datastore.getWeight("labelWeight", "test");
     datastore.getWeight("thetaNormalizer", "test");
   }
-
+  
   @Override
-  public double documentWeight(Datastore datastore, String label,
-      String[] document) throws InvalidDatastoreException {
-    Map<String, int[]> wordList = new HashMap<String, int[]>(1000);
+  public double documentWeight(Datastore datastore,
+                               String label,
+                               String[] document) throws InvalidDatastoreException {
+    Map<String,int[]> wordList = new HashMap<String,int[]>(1000);
     for (String word : document) {
       int[] count = wordList.get(word);
       if (count == null) {
-        count = new int[] { 0 };
+        count = new int[] {0};
         wordList.put(word, count);
       }
       count[0]++;
     }
     double result = 0.0;
-    for (Map.Entry<String, int[]> entry : wordList.entrySet()) {
+    for (Map.Entry<String,int[]> entry : wordList.entrySet()) {
       String word = entry.getKey();
       int count = entry.getValue()[0];
       result += count * featureWeight(datastore, label, word);
     }
     return result;
   }
-
+  
   @Override
-  public Collection<String> getLabels(Datastore datastore)
-      throws InvalidDatastoreException {
+  public Collection<String> getLabels(Datastore datastore) throws InvalidDatastoreException {
     return datastore.getKeys("labelWeight");
   }
-
+  
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesParameters.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesParameters.java?rev=896311&r1=896310&r2=896311&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesParameters.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesParameters.java Wed Jan  6 02:46:22 2010
@@ -17,14 +17,16 @@
 
 package org.apache.mahout.classifier.bayes.common;
 
-
 import org.apache.mahout.common.Parameters;
-
+/**
+ * BayesParameter used for passing parameters to the Map/Reduce Jobs
+ * parameters include gramSize, 
+ */
 public class BayesParameters extends Parameters {
-
-  public BayesParameters(int gramSize){
+  
+  public BayesParameters(int gramSize) {
     super();
     set("gramSize", Integer.toString(gramSize));
   }
- 
+  
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/ByScoreLabelResultComparator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/ByScoreLabelResultComparator.java?rev=896311&r1=896310&r2=896311&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/ByScoreLabelResultComparator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/ByScoreLabelResultComparator.java Wed Jan  6 02:46:22 2010
@@ -17,13 +17,18 @@
 
 package org.apache.mahout.classifier.bayes.common;
 
-import org.apache.mahout.classifier.ClassifierResult;
-
 import java.io.Serializable;
 import java.util.Comparator;
 
-public final class ByScoreLabelResultComparator implements Comparator<ClassifierResult>, Serializable {
+import org.apache.mahout.classifier.ClassifierResult;
 
+/**
+ * Compare two results of classification and return the lowest valued one
+ * 
+ */
+public final class ByScoreLabelResultComparator implements
+    Comparator<ClassifierResult>, Serializable {
+  
   @Override
   public int compare(ClassifierResult cr1, ClassifierResult cr2) {
     double score1 = cr1.getScore();
@@ -36,5 +41,5 @@
       return cr1.getLabel().compareTo(cr2.getLabel());
     }
   }
-
+  
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/HBaseBayesDatastore.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/HBaseBayesDatastore.java?rev=896311&r1=896310&r2=896311&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/HBaseBayesDatastore.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/HBaseBayesDatastore.java Wed Jan  6 02:46:22 2010
@@ -30,87 +30,91 @@
 import org.apache.hadoop.hbase.client.HTable;
 import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.mahout.common.Parameters;
 import org.apache.mahout.classifier.bayes.exceptions.InvalidDatastoreException;
 import org.apache.mahout.classifier.bayes.interfaces.Datastore;
 import org.apache.mahout.classifier.bayes.mapreduce.common.BayesConstants;
+import org.apache.mahout.common.Parameters;
 import org.apache.mahout.common.cache.Cache;
 import org.apache.mahout.common.cache.HybridCache;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+/**
+ * Class implementing the Datastore for Algorithms to read HBase based model
+ * 
+ */
 public class HBaseBayesDatastore implements Datastore {
-
-  private static final Logger log = LoggerFactory.getLogger(HBaseBayesDatastore.class);
-
-  private HBaseConfiguration config = null;
-
-  private HTable table = null;
-
-  private Cache<String, Result> tableCache = null;
-
+  
+  private static final Logger log = LoggerFactory
+      .getLogger(HBaseBayesDatastore.class);
+  
+  private HBaseConfiguration config;
+  
+  private HTable table;
+  
+  private Cache<String,Result> tableCache;
+  
   private final String hbaseTable;
-
-  private Parameters parameters = null;
-
+  
+  private Parameters parameters;
+  
   private double thetaNormalizer = 1.0;
-
-  private double alpha_i = 1.0;
-
-  private final Map<String, Set<String>> keys = new HashMap<String, Set<String>>();
-
+  
+  private double alphaI = 1.0;
+  
+  private final Map<String,Set<String>> keys = new HashMap<String,Set<String>>();
+  
   private double vocabCount = -1.0;
-
-  private double sigma_jSigma_k = -1.0;
-
-
+  
+  private double sigmaJSigmaK = -1.0;
+  
   public HBaseBayesDatastore(String hbaseTable, Parameters params) {
     this.hbaseTable = hbaseTable;
     this.parameters = params;
-    this.tableCache = new HybridCache<String, Result>(50000, 100000);
-    alpha_i = Double.valueOf(parameters.get("alpha_i", "1.0"));
+    this.tableCache = new HybridCache<String,Result>(50000, 100000);
+    alphaI = Double.valueOf(parameters.get("alpha_i", "1.0"));
   }
-
+  
   protected HBaseConfiguration getConfig() {
     return config;
   }
-
+  
   protected HTable getTable() {
     return table;
   }
-
-  protected Cache<String, Result> getTableCache() {
+  
+  protected Cache<String,Result> getTableCache() {
     return tableCache;
   }
-
+  
   protected String getHbaseTable() {
     return hbaseTable;
   }
-
+  
   protected Parameters getParameters() {
     return parameters;
   }
-
+  
   protected double getThetaNormalizer() {
     return thetaNormalizer;
   }
-
-  protected double getAlpha_i() {
-    return alpha_i;
+  
+  protected double getAlphaI() {
+    return alphaI;
   }
-
-  Map<String, Set<String>> getKeys() {
+  
+  Map<String,Set<String>> getKeys() {
     return keys;
   }
-
+  
   protected double getVocabCount() {
     return vocabCount;
   }
-
-  protected double getSigma_jSigma_k() {
-    return sigma_jSigma_k;
+  
+  protected double getSigmaJSigmaK() {
+    return sigmaJSigmaK;
   }
-
+  
   @Override
   public void initialize() throws InvalidDatastoreException {
     config = new HBaseConfiguration(new Configuration());
@@ -122,38 +126,36 @@
     Collection<String> labels = getKeys("thetaNormalizer");
     for (String label : labels) {
       thetaNormalizer = Math.max(thetaNormalizer, Math.abs(getWeightFromHbase(
-          BayesConstants.LABEL_THETA_NORMALIZER, label)));
+        BayesConstants.LABEL_THETA_NORMALIZER, label)));
     }
     for (String label : labels) {
-      log.info("{} {} {} {}", new Object[] {
-               label,
-               getWeightFromHbase(BayesConstants.LABEL_THETA_NORMALIZER, label),
-               thetaNormalizer,
-               getWeightFromHbase(BayesConstants.LABEL_THETA_NORMALIZER, label)
-                 / thetaNormalizer});
+      log.info("{} {} {} {}",
+        new Object[] {label,
+                      getWeightFromHbase(BayesConstants.LABEL_THETA_NORMALIZER,
+                        label),
+                      thetaNormalizer,
+                      getWeightFromHbase(BayesConstants.LABEL_THETA_NORMALIZER,
+                        label) / thetaNormalizer});
     }
   }
-
+  
   @Override
-  public Collection<String> getKeys(String name)
-      throws InvalidDatastoreException {
-    if (keys.containsKey(name))
-      return keys.get(name);
+  public Collection<String> getKeys(String name) throws InvalidDatastoreException {
+    if (keys.containsKey(name)) return keys.get(name);
     Result r;
     if (name.equals("labelWeight")) {
       r = getRowFromHbase(BayesConstants.LABEL_SUM);
     } else if (name.equals("thetaNormalizer")) {
       r = getRowFromHbase(BayesConstants.LABEL_THETA_NORMALIZER);
-    } else
-      r = getRowFromHbase(name);
-
+    } else r = getRowFromHbase(name);
+    
     if (r == null) {
       log.error("Encountered NULL");
       throw new InvalidDatastoreException("Encountered NULL");
     }
-
-    Set<byte[]> labelBytes = r.getNoVersionMap().get(Bytes.toBytes(BayesConstants.HBASE_COLUMN_FAMILY))
-        .keySet();
+    
+    Set<byte[]> labelBytes = r.getNoVersionMap().get(
+      Bytes.toBytes(BayesConstants.HBASE_COLUMN_FAMILY)).keySet();
     Set<String> keySet = new HashSet<String>();
     for (byte[] key : labelBytes) {
       keySet.add(Bytes.toString(key));
@@ -161,48 +163,40 @@
     keys.put(name, keySet);
     return keySet;
   }
-
+  
   @Override
-  public double getWeight(String matrixName, String row, String column)
-      throws InvalidDatastoreException {
+  public double getWeight(String matrixName, String row, String column) throws InvalidDatastoreException {
     if ("weight".equals(matrixName)) {
-      if (column.equals("sigma_j"))
-        return getSigma_jFromHbase(row);
-      else
-        return getWeightFromHbase(row, column);
-    } else
-      throw new InvalidDatastoreException();
+      if (column.equals("sigma_j")) return getSigmaJFromHbase(row);
+      else return getWeightFromHbase(row, column);
+    } else throw new InvalidDatastoreException();
   }
-
+  
   @Override
-  public double getWeight(String vectorName, String index)
-      throws InvalidDatastoreException {
+  public double getWeight(String vectorName, String index) throws InvalidDatastoreException {
     if (vectorName.equals("sumWeight")) {
-      if (index.equals("vocabCount"))
-        return getVocabCountFromHbase();
-      else if (index.equals("sigma_jSigma_k"))
-        return getSigma_jSigma_kFromHbase();
-      else
-        throw new InvalidDatastoreException();
-
+      if (index.equals("vocabCount")) return getVocabCountFromHbase();
+      else if (index.equals("sigma_jSigma_k")) return getSigmaJSigmaKFromHbase();
+      else throw new InvalidDatastoreException();
+      
     } else if (vectorName.equals("labelWeight")) {
       return getWeightFromHbase(BayesConstants.LABEL_SUM, index);
     } else if (vectorName.equals("thetaNormalizer")) {
       return getWeightFromHbase(BayesConstants.LABEL_THETA_NORMALIZER, index)
-          / thetaNormalizer;
+             / thetaNormalizer;
     } else if (vectorName.equals("params")) {
-      if(index.equals("alpha_i")) return alpha_i;
+      if (index.equals("alpha_i")) return alphaI;
       else throw new InvalidDatastoreException();
     } else {
-
+      
       throw new InvalidDatastoreException();
     }
   }
-
+  
   protected double getCachedCell(String row, String family, String column) {
-    Result r;
-
-    if ((r = tableCache.get(row)) == null) {
+    Result r = tableCache.get(row);
+    
+    if (r == null) {
       Get g = new Get(Bytes.toBytes(row));
       g.addFamily(Bytes.toBytes(family));
       try {
@@ -213,57 +207,55 @@
       tableCache.set(row, r);
     }
     byte[] value = r.getValue(
-        Bytes.toBytes(BayesConstants.HBASE_COLUMN_FAMILY), Bytes
-            .toBytes(column));
-    if (value == null)
-      return 0.0;
+      Bytes.toBytes(BayesConstants.HBASE_COLUMN_FAMILY), Bytes.toBytes(column));
+    if (value == null) return 0.0;
     return Bytes.toDouble(value);
-
+    
   }
-
+  
   protected double getWeightFromHbase(String feature, String label) {
     return getCachedCell(feature, BayesConstants.HBASE_COLUMN_FAMILY, label);
   }
-
+  
   protected Result getRowFromHbase(String feature) {
-    Result r = null;
+    Result r = tableCache.get(feature);
     try {
-      if ((r = tableCache.get(feature)) == null) {
+      if (r == null) {
         Get g = new Get(Bytes.toBytes(feature));
         g.addFamily(Bytes.toBytes(BayesConstants.HBASE_COLUMN_FAMILY));
         r = table.get(g);
         tableCache.set(feature, r);
         return r;
-      } else
-        return r;
-
+      } else return r;
+      
     } catch (IOException e) {
       return r;
     }
   }
-
-  protected double getSigma_jFromHbase(String feature) {
-    return getCachedCell(feature, BayesConstants.HBASE_COLUMN_FAMILY, BayesConstants.FEATURE_SUM);
+  
+  protected double getSigmaJFromHbase(String feature) {
+    return getCachedCell(feature, BayesConstants.HBASE_COLUMN_FAMILY,
+      BayesConstants.FEATURE_SUM);
   }
-
+  
   protected double getVocabCountFromHbase() {
     if (vocabCount == -1.0) {
       vocabCount = getCachedCell(BayesConstants.HBASE_COUNTS_ROW,
-          BayesConstants.HBASE_COLUMN_FAMILY, BayesConstants.FEATURE_SET_SIZE);
+        BayesConstants.HBASE_COLUMN_FAMILY, BayesConstants.FEATURE_SET_SIZE);
       return vocabCount;
     } else {
       return vocabCount;
     }
   }
-
-  protected double getSigma_jSigma_kFromHbase() {
-    if (sigma_jSigma_k == -1.0) {
-      sigma_jSigma_k = getCachedCell(BayesConstants.HBASE_COUNTS_ROW,
-          BayesConstants.HBASE_COLUMN_FAMILY, BayesConstants.TOTAL_SUM);
-      return sigma_jSigma_k;
+  
+  protected double getSigmaJSigmaKFromHbase() {
+    if (sigmaJSigmaK == -1.0) {
+      sigmaJSigmaK = getCachedCell(BayesConstants.HBASE_COUNTS_ROW,
+        BayesConstants.HBASE_COLUMN_FAMILY, BayesConstants.TOTAL_SUM);
+      return sigmaJSigmaK;
     } else {
-      return sigma_jSigma_k;
+      return sigmaJSigmaK;
     }
   }
-
+  
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/InMemoryBayesDatastore.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/InMemoryBayesDatastore.java?rev=896311&r1=896310&r2=896311&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/InMemoryBayesDatastore.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/InMemoryBayesDatastore.java Wed Jan  6 02:46:22 2010
@@ -31,20 +31,24 @@
 import org.apache.mahout.common.Parameters;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-
+/**
+ * Class implementing the Datastore for Algorithms to read In-Memory model
+ * 
+ */
 public class InMemoryBayesDatastore implements Datastore {
 
   private static final Logger log = LoggerFactory.getLogger(InMemoryBayesDatastore.class);
 
-  private final Map<String, Map<String, Map<String, Double>>> matrices = new HashMap<String, Map<String, Map<String, Double>>>();
-
-  private final Map<String, Map<String, Double>> vectors = new HashMap<String, Map<String, Double>>();
-
-  private Parameters params = null;
-
+  private final Map<String,Map<String,Map<String,Double>>> matrices 
+    = new HashMap<String,Map<String,Map<String,Double>>>();
+  
+  private final Map<String,Map<String,Double>> vectors = new HashMap<String,Map<String,Double>>();
+  
+  private Parameters params;
+  
   private double thetaNormalizer = 1.0;
 
-  private double alpha_i = 1.0;
+  private double alphaI = 1.0;
 
   public InMemoryBayesDatastore(Parameters params) {
 
@@ -61,7 +65,7 @@
         + "/trainer-weights/Sigma_kSigma_j/part-*");
     params.set("thetaNormalizer", basePath + "/trainer-thetaNormalizer/part-*");
     params.set("weight", basePath + "/trainer-tfIdf/trainer-tfIdf/part-*");
-    alpha_i = Double.valueOf(params.get("alpha_i", "1.0"));
+    alphaI = Double.valueOf(params.get("alpha_i", "1.0"));
   }
 
   @Override
@@ -81,120 +85,119 @@
           "thetaNormalizer", label)));
     }
     for (String label : labels) {
-      log.info("{} {} {} {}", new Object[] {
-               label,
-               vectorGetCell("thetaNormalizer", label),
-               thetaNormalizer,
-               vectorGetCell("thetaNormalizer", label) / thetaNormalizer});
+      log.info("{} {} {} {}", new Object[] {label,
+                                            vectorGetCell("thetaNormalizer",
+                                              label),
+                                            thetaNormalizer,
+                                            vectorGetCell("thetaNormalizer",
+                                              label) / thetaNormalizer});
     }
   }
 
   @Override
-  public Collection<String> getKeys(String name)
-      throws InvalidDatastoreException {
+  public Collection<String> getKeys(String name) throws InvalidDatastoreException {
     return vectors.get("labelWeight").keySet();
   }
-
+  
   @Override
-  public double getWeight(String matrixName, String row, String column)
-      throws InvalidDatastoreException {
+  public double getWeight(String matrixName, String row, String column) throws InvalidDatastoreException {
     return matrixGetCell(matrixName, row, column);
   }
-
+  
   @Override
-  public double getWeight(String vectorName, String index)
-      throws InvalidDatastoreException {
-    if (vectorName.equals("thetaNormalizer"))
-      return vectorGetCell(vectorName, index) / thetaNormalizer;
+  public double getWeight(String vectorName, String index) throws InvalidDatastoreException {
+    if (vectorName.equals("thetaNormalizer")) return vectorGetCell(vectorName,
+      index)
+                                                     / thetaNormalizer;
     else if (vectorName.equals("params")) {
-      if(index.equals("alpha_i")) return alpha_i;
+      if (index.equals("alpha_i")) return alphaI;
       else throw new InvalidDatastoreException();
-    } 
+    }
     return vectorGetCell(vectorName, index);
   }
-
-  private double matrixGetCell(String matrixName, String row, String col)
-      throws InvalidDatastoreException {
-    Map<String, Map<String, Double>> matrix = matrices.get(matrixName);
+  
+  private double matrixGetCell(String matrixName, String row, String col) throws InvalidDatastoreException {
+    Map<String,Map<String,Double>> matrix = matrices.get(matrixName);
     if (matrix == null) {
       throw new InvalidDatastoreException();
     }
-    Map<String, Double> rowVector = matrix.get(row);
+    Map<String,Double> rowVector = matrix.get(row);
     if (rowVector == null) {
       return 0.0;
     }
     return nullToZero(rowVector.get(col));
   }
-
-  private double vectorGetCell(String vectorName, String index)
-      throws InvalidDatastoreException {
-
-    Map<String, Double> vector = vectors.get(vectorName);
+  
+  private double vectorGetCell(String vectorName, String index) throws InvalidDatastoreException {
+    
+    Map<String,Double> vector = vectors.get(vectorName);
     if (vector == null) {
       throw new InvalidDatastoreException();
     }
     return nullToZero(vector.get(index));
   }
-
-  private void matrixPutCell(String matrixName, String row, String col,
-      double weight) {
-    Map<String, Map<String, Double>> matrix = matrices.get(matrixName);
+  
+  private void matrixPutCell(String matrixName,
+                             String row,
+                             String col,
+                             double weight) {
+    Map<String,Map<String,Double>> matrix = matrices.get(matrixName);
     if (matrix == null) {
-      matrix = new HashMap<String, Map<String, Double>>();
+      matrix = new HashMap<String,Map<String,Double>>();
       matrices.put(matrixName, matrix);
     }
-    Map<String, Double> rowVector = matrix.get(row);
+    Map<String,Double> rowVector = matrix.get(row);
     if (rowVector == null) {
-      rowVector = new HashMap<String, Double>();
+      rowVector = new HashMap<String,Double>();
       matrix.put(row, rowVector);
     }
     rowVector.put(col, weight);
   }
-
+  
   private void vectorPutCell(String vectorName, String index, double weight) {
-
-    Map<String, Double> vector = vectors.get(vectorName);
+    
+    Map<String,Double> vector = vectors.get(vectorName);
     if (vector == null) {
-      vector = new HashMap<String, Double>();
+      vector = new HashMap<String,Double>();
       vectors.put(vectorName, vector);
     }
     vector.put(index, weight);
   }
-
+  
   private long sizeOfMatrix(String matrixName) {
-    Map<String, Map<String, Double>> matrix = matrices.get(matrixName);
+    Map<String,Map<String,Double>> matrix = matrices.get(matrixName);
     if (matrix == null) {
       return 0;
     }
     return matrix.size();
   }
-
+  
   public void loadFeatureWeight(String feature, String label, double weight) {
     matrixPutCell("weight", feature, label, weight);
   }
-
+  
   public void setSumFeatureWeight(String feature, double weight) {
     matrixPutCell("weight", feature, "sigma_j", weight);
   }
-
+  
   public void setSumLabelWeight(String label, double weight) {
     vectorPutCell("labelWeight", label, weight);
   }
-
+  
   public void setThetaNormalizer(String label, double weight) {
     vectorPutCell("thetaNormalizer", label, weight);
   }
-
-  public void setSigma_jSigma_k(double weight) {
+  
+  public void setSigmaJSigmaK(double weight) {
     vectorPutCell("sumWeight", "sigma_jSigma_k", weight);
   }
-
+  
   public void updateVocabCount() {
     vectorPutCell("sumWeight", "vocabCount", sizeOfMatrix("weight"));
   }
-
+  
   private static double nullToZero(Double value) {
     return value == null ? 0.0 : value;
   }
-
+  
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/exceptions/InvalidDatastoreException.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/exceptions/InvalidDatastoreException.java?rev=896311&r1=896310&r2=896311&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/exceptions/InvalidDatastoreException.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/exceptions/InvalidDatastoreException.java Wed Jan  6 02:46:22 2010
@@ -17,11 +17,14 @@
 
 package org.apache.mahout.classifier.bayes.exceptions;
 
+/**
+ * Exception thrown when illegal access is done on the datastore or when the
+ * backend storage goes down. Check inner exception for detail
+ */
 public final class InvalidDatastoreException extends Exception {
-
-  public InvalidDatastoreException() {
-  }
-
+  
+  public InvalidDatastoreException() { }
+  
   public InvalidDatastoreException(String message) {
     super(message);
   }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/interfaces/Algorithm.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/interfaces/Algorithm.java?rev=896311&r1=896310&r2=896311&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/interfaces/Algorithm.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/interfaces/Algorithm.java Wed Jan  6 02:46:22 2010
@@ -22,6 +22,10 @@
 import org.apache.mahout.classifier.ClassifierResult;
 import org.apache.mahout.classifier.bayes.exceptions.InvalidDatastoreException;
 
+/**
+ * The algorithm interface for implementing variations of bayes Algorithm
+ * 
+ */
 public interface Algorithm {
   /**
    * Initialize the data store and verifies the data in it.
@@ -30,76 +34,89 @@
    * @throws InvalidDatastoreException
    */
   void initialize(Datastore datastore) throws InvalidDatastoreException;
-
+  
   /**
    * Classify the document and return the Result
    * 
-   * @param document The document to classify
-   * @param datastore The data store(InMemory, HBase)
-   * @param defaultCategory The default category to assign Ties are broken by
-   *        comparing the category
+   * @param document
+   *          The document to classify
+   * @param datastore
+   *          The data store(InMemory, HBase)
+   * @param defaultCategory
+   *          The default category to assign Ties are broken by comparing the
+   *          category
    * @return A Collection of
    *         {@link org.apache.mahout.classifier.ClassifierResult}s.
    * @throws InvalidDatastoreException
    */
   ClassifierResult classifyDocument(String[] document,
-      Datastore datastore, String defaultCategory)
-      throws InvalidDatastoreException;
-
+                                    Datastore datastore,
+                                    String defaultCategory) throws InvalidDatastoreException;
+  
   /**
    * Classify the document and return the top <code>numResults</code>
    * 
-   * @param document The document to classify
-   * @param datastore The
-   *        {@link org.apache.mahout.classifier.bayes.interfaces.Datastore}
-   *        (InMemory, HBase)
-   * @param defaultCategory The default category to assign
-   * @param numResults The maximum number of results to return, ranked by score.
-   *        Ties are broken by comparing the category
+   * @param document
+   *          The document to classify
+   * @param datastore
+   *          The
+   *          {@link org.apache.mahout.classifier.bayes.interfaces.Datastore}
+   *          (InMemory, HBase)
+   * @param defaultCategory
+   *          The default category to assign
+   * @param numResults
+   *          The maximum number of results to return, ranked by score. Ties are
+   *          broken by comparing the category
    * @return A Collection of
    *         {@link org.apache.mahout.classifier.ClassifierResult}s.
    * @throws InvalidDatastoreException
    */
   ClassifierResult[] classifyDocument(String[] document,
-      Datastore datastore, String defaultCategory, int numResults)
-      throws InvalidDatastoreException;
-
+                                      Datastore datastore,
+                                      String defaultCategory,
+                                      int numResults) throws InvalidDatastoreException;
+  
   /**
    * Get the weighted probability of the feature.
    * 
-   * @param datastore The
-   *        {@link org.apache.mahout.classifier.bayes.interfaces.Datastore}
-   *        (InMemory, HBase)
-   * @param label The label of the feature
-   * @param feature The feature to calc. the prob. for
+   * @param datastore
+   *          The
+   *          {@link org.apache.mahout.classifier.bayes.interfaces.Datastore}
+   *          (InMemory, HBase)
+   * @param label
+   *          The label of the feature
+   * @param feature
+   *          The feature to calc. the prob. for
    * @return The weighted probability
    * @throws InvalidDatastoreException
    */
-  double featureWeight(Datastore datastore, String label, String feature)
-      throws InvalidDatastoreException;
-
+  double featureWeight(Datastore datastore, String label, String feature) throws InvalidDatastoreException;
+  
   /**
    * Calculate the document weight as the dot product of document vector and the
    * corresponding weight vector of a particular class
    * 
-   * @param datastore The
-   *        {@link org.apache.mahout.classifier.bayes.interfaces.Datastore}
-   *        (InMemory, HBase)
-   * @param label The label to calculate the probability of
-   * @param document The document
+   * @param datastore
+   *          The
+   *          {@link org.apache.mahout.classifier.bayes.interfaces.Datastore}
+   *          (InMemory, HBase)
+   * @param label
+   *          The label to calculate the probability of
+   * @param document
+   *          The document
    * @return The probability
    * @throws InvalidDatastoreException
    * @see Algorithm#featureWeight(Datastore, String, String)
    */
-  double documentWeight(Datastore datastore, String label,
-      String[] document) throws InvalidDatastoreException;
-
+  double documentWeight(Datastore datastore, String label, String[] document) throws InvalidDatastoreException;
+  
   /**
    * Returns the labels in the given Model
    * 
-   * @param datastore The
-   *        {@link org.apache.mahout.classifier.bayes.interfaces.Datastore}
-   *        (InMemory, HBase)
+   * @param datastore
+   *          The
+   *          {@link org.apache.mahout.classifier.bayes.interfaces.Datastore}
+   *          (InMemory, HBase)
    * @throws InvalidDatastoreException
    * @return {@link Collection} of labels
    */

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/interfaces/Datastore.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/interfaces/Datastore.java?rev=896311&r1=896310&r2=896311&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/interfaces/Datastore.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/interfaces/Datastore.java Wed Jan  6 02:46:22 2010
@@ -19,7 +19,10 @@
 
 import java.util.Collection;
 import org.apache.mahout.classifier.bayes.exceptions.InvalidDatastoreException;
-
+/**
+ * The Datastore interface for the {@link Algorithm} to use
+ *
+ */
 public interface Datastore {
   /**
    * Gets a double value from the Matrix pointed to by the



Mime
View raw message