mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From robina...@apache.org
Subject svn commit: r909882 [3/5] - in /lucene/mahout/trunk/core/src/main/java/org/apache/mahout: classifier/ classifier/bayes/ classifier/bayes/algorithm/ classifier/bayes/common/ classifier/bayes/datastore/ classifier/bayes/exceptions/ classifier/bayes/inter...
Date Sat, 13 Feb 2010 19:57:24 GMT
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerReducer.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerReducer.java Sat Feb 13 19:57:04 2010
@@ -39,8 +39,7 @@
 public class BayesWeightSummerReducer extends MapReduceBase implements
     Reducer<StringTuple,DoubleWritable,StringTuple,DoubleWritable> {
   
-  private static final Logger log = LoggerFactory
-      .getLogger(BayesWeightSummerReducer.class);
+  private static final Logger log = LoggerFactory.getLogger(BayesWeightSummerReducer.class);
   
   private HTable table;
   
@@ -70,20 +69,19 @@
         String feature = key.stringAt(1);
         
         Put bu = new Put(Bytes.toBytes(feature));
-        bu.add(Bytes.toBytes(BayesConstants.HBASE_COLUMN_FAMILY), Bytes
-            .toBytes(BayesConstants.FEATURE_SUM), Bytes.toBytes(sum));
+        bu.add(Bytes.toBytes(BayesConstants.HBASE_COLUMN_FAMILY), Bytes.toBytes(BayesConstants.FEATURE_SUM),
+          Bytes.toBytes(sum));
         table.put(bu);
         
       } else if (key.stringAt(0).equals(BayesConstants.LABEL_SUM)) {
         String label = key.stringAt(1);
         Put bu = new Put(Bytes.toBytes(BayesConstants.LABEL_SUM));
-        bu.add(Bytes.toBytes(BayesConstants.HBASE_COLUMN_FAMILY), Bytes
-            .toBytes(label), Bytes.toBytes(sum));
+        bu.add(Bytes.toBytes(BayesConstants.HBASE_COLUMN_FAMILY), Bytes.toBytes(label), Bytes.toBytes(sum));
         table.put(bu);
       } else if (key.stringAt(0).equals(BayesConstants.TOTAL_SUM)) {
         Put bu = new Put(Bytes.toBytes(BayesConstants.HBASE_COUNTS_ROW));
-        bu.add(Bytes.toBytes(BayesConstants.HBASE_COLUMN_FAMILY), Bytes
-            .toBytes(BayesConstants.TOTAL_SUM), Bytes.toBytes(sum));
+        bu.add(Bytes.toBytes(BayesConstants.HBASE_COLUMN_FAMILY), Bytes.toBytes(BayesConstants.TOTAL_SUM),
+          Bytes.toBytes(sum));
         table.put(bu);
       }
     }
@@ -94,15 +92,17 @@
   @Override
   public void configure(JobConf job) {
     try {
-      Parameters params = Parameters
-          .fromString(job.get("bayes.parameters", ""));
-      if (params.get("dataSource").equals("hbase")) useHbase = true;
-      else return;
+      Parameters params = Parameters.fromString(job.get("bayes.parameters", ""));
+      if (params.get("dataSource").equals("hbase")) {
+        useHbase = true;
+      } else {
+        return;
+      }
       
       HBaseConfiguration hBconf = new HBaseConfiguration(job);
       table = new HTable(hBconf, job.get("output.table"));
     } catch (IOException e) {
-      log.error("Unexpected error during configuration", e);
+      BayesWeightSummerReducer.log.error("Unexpected error during configuration", e);
     }
     
   }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/JobExecutor.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/JobExecutor.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/JobExecutor.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/JobExecutor.java Sat Feb 13 19:57:04 2010
@@ -30,9 +30,10 @@
 import org.apache.mahout.common.commandline.DefaultOptionCreator;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+
 /**
  * Base class for executing the Bayes Map/Reduce Jobs
- *
+ * 
  */
 public final class JobExecutor {
   /** Logger for this class. */
@@ -41,8 +42,7 @@
   private JobExecutor() { }
   
   /**
-   * Execute a bayes classification job. Input and output path are parsed from
-   * the input parameters.
+   * Execute a bayes classification job. Input and output path are parsed from the input parameters.
    * 
    * @param args
    *          input parameters.
@@ -58,8 +58,8 @@
     Option outputOpt = DefaultOptionCreator.outputOption().create();
     Option helpOpt = DefaultOptionCreator.helpOption();
     
-    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(
-      outputOpt).withOption(helpOpt).create();
+    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(helpOpt)
+        .create();
     
     try {
       Parser parser = new Parser();
@@ -76,7 +76,7 @@
       
       job.runJob(input, output, new BayesParameters(1));
     } catch (OptionException e) {
-      log.error(e.getMessage());
+      JobExecutor.log.error(e.getMessage());
       CommandLineUtil.printHelp(group);
     }
   }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/model/ClassifierContext.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/model/ClassifierContext.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/model/ClassifierContext.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/model/ClassifierContext.java Sat Feb 13 19:57:04 2010
@@ -23,9 +23,10 @@
 import org.apache.mahout.classifier.bayes.exceptions.InvalidDatastoreException;
 import org.apache.mahout.classifier.bayes.interfaces.Algorithm;
 import org.apache.mahout.classifier.bayes.interfaces.Datastore;
+
 /**
  * The Classifier Wrapper used for choosing the {@link Algorithm} and {@link Datastore}
- *
+ * 
  */
 public class ClassifierContext {
   
@@ -38,8 +39,7 @@
   }
   
   /**
-   * Initializes the Context. Gets the necessary data and checks if the
-   * Datastore is valid
+   * Initializes the Context. Gets the necessary data and checks if the Datastore is valid
    * 
    * @throws InvalidDatastoreException
    */
@@ -54,14 +54,11 @@
    * @param document
    *          The document to classify
    * @param defaultCategory
-   *          The default category to assign Ties are broken by comparing the
-   *          category
-   * @return A Collection of
-   *         {@link org.apache.mahout.classifier.ClassifierResult}s.
+   *          The default category to assign Ties are broken by comparing the category
+   * @return A Collection of {@link org.apache.mahout.classifier.ClassifierResult}s.
    * @throws InvalidDatastoreException
    */
-  public ClassifierResult classifyDocument(String[] document,
-                                           String defaultCategory) throws InvalidDatastoreException {
+  public ClassifierResult classifyDocument(String[] document, String defaultCategory) throws InvalidDatastoreException {
     return algorithm.classifyDocument(document, datastore, defaultCategory);
   }
   
@@ -73,17 +70,15 @@
    * @param defaultCategory
    *          The default category to assign
    * @param numResults
-   *          The maximum number of results to return, ranked by score. Ties are
-   *          broken by comparing the category
-   * @return A Collection of
-   *         {@link org.apache.mahout.classifier.ClassifierResult}s.
+   *          The maximum number of results to return, ranked by score. Ties are broken by comparing the
+   *          category
+   * @return A Collection of {@link org.apache.mahout.classifier.ClassifierResult}s.
    * @throws InvalidDatastoreException
    */
   public ClassifierResult[] classifyDocument(String[] document,
                                              String defaultCategory,
                                              int numResults) throws InvalidDatastoreException {
-    return algorithm.classifyDocument(document, datastore, defaultCategory,
-      numResults);
+    return algorithm.classifyDocument(document, datastore, defaultCategory, numResults);
   }
   
   /**

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/CommandLineUtil.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/CommandLineUtil.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/CommandLineUtil.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/CommandLineUtil.java Sat Feb 13 19:57:04 2010
@@ -21,14 +21,13 @@
 import org.apache.commons.cli2.util.HelpFormatter;
 
 public final class CommandLineUtil {
-
-  private CommandLineUtil() {
-  }
-
+  
+  private CommandLineUtil() { }
+  
   public static void printHelp(Group group) {
     HelpFormatter formatter = new HelpFormatter();
     formatter.setGroup(group);
     formatter.print();
   }
-
+  
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/FileLineIterable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/FileLineIterable.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/FileLineIterable.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/FileLineIterable.java Sat Feb 13 19:57:04 2010
@@ -24,48 +24,49 @@
 import java.util.Iterator;
 
 /**
- * Iterable representing the lines of a text file. It can produce an {@link Iterator} over those lines. This assumes the
- * text file's lines are delimited in a manner consistent with how {@link java.io.BufferedReader} defines lines.
- *
+ * Iterable representing the lines of a text file. It can produce an {@link Iterator} over those lines. This
+ * assumes the text file's lines are delimited in a manner consistent with how {@link java.io.BufferedReader}
+ * defines lines.
+ * 
  * This class will uncompress files that end in .zip or .gz accordingly, too.
  */
 public final class FileLineIterable implements Iterable<String> {
-
+  
   private static final Charset UTF8 = Charset.forName("UTF-8");
-
+  
   private final InputStream is;
   private final Charset encoding;
   private final boolean skipFirstLine;
-
+  
   /** Creates a {@link FileLineIterable} over a given file, assuming a UTF-8 encoding. */
   public FileLineIterable(File file) throws IOException {
-    this(file, UTF8, false);
+    this(file, FileLineIterable.UTF8, false);
   }
-
+  
   /** Creates a {@link FileLineIterable} over a given file, assuming a UTF-8 encoding. */
   public FileLineIterable(File file, boolean skipFirstLine) throws IOException {
-    this(file, UTF8, skipFirstLine);
+    this(file, FileLineIterable.UTF8, skipFirstLine);
   }
-
+  
   /** Creates a {@link FileLineIterable} over a given file, using the given encoding. */
   public FileLineIterable(File file, Charset encoding, boolean skipFirstLine) throws IOException {
     this(FileLineIterator.getFileInputStream(file), encoding, skipFirstLine);
   }
-
+  
   public FileLineIterable(InputStream is) {
-    this(is, UTF8, false);
+    this(is, FileLineIterable.UTF8, false);
   }
-
+  
   public FileLineIterable(InputStream is, boolean skipFirstLine) {
-    this(is, UTF8, skipFirstLine);
+    this(is, FileLineIterable.UTF8, skipFirstLine);
   }
-
+  
   public FileLineIterable(InputStream is, Charset encoding, boolean skipFirstLine) {
     this.is = is;
     this.encoding = encoding;
     this.skipFirstLine = skipFirstLine;
   }
-
+  
   @Override
   public Iterator<String> iterator() {
     try {
@@ -74,5 +75,5 @@
       throw new IllegalStateException(ioe);
     }
   }
-
+  
 }
\ No newline at end of file

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/FileLineIterator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/FileLineIterator.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/FileLineIterator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/FileLineIterator.java Sat Feb 13 19:57:04 2010
@@ -21,7 +21,6 @@
 import java.io.Closeable;
 import java.io.File;
 import java.io.FileInputStream;
-import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
@@ -33,56 +32,62 @@
 import org.apache.mahout.cf.taste.impl.common.SkippingIterator;
 
 /**
- * Iterates over the lines of a text file. This assumes the text file's lines are delimited in a manner consistent with
- * how {@link BufferedReader} defines lines.
- *
+ * Iterates over the lines of a text file. This assumes the text file's lines are delimited in a manner
+ * consistent with how {@link BufferedReader} defines lines.
+ * 
  * This class will uncompress files that end in .zip or .gz accordingly, too.
  */
 public final class FileLineIterator implements SkippingIterator<String>, Closeable {
-
+  
   private static final Charset UTF8 = Charset.forName("UTF-8");
-
+  
   private final BufferedReader reader;
   private String nextLine;
-
+  
   /**
    * Creates a {@link FileLineIterator} over a given file, assuming a UTF-8 encoding.
-   *
-   * @throws FileNotFoundException if the file does not exist
-   * @throws IOException           if the file cannot be read
+   * 
+   * @throws FileNotFoundException
+   *           if the file does not exist
+   * @throws IOException
+   *           if the file cannot be read
    */
   public FileLineIterator(File file) throws IOException {
-    this(file, UTF8, false);
+    this(file, FileLineIterator.UTF8, false);
   }
-
+  
   /**
    * Creates a {@link FileLineIterator} over a given file, assuming a UTF-8 encoding.
-   *
-   * @throws FileNotFoundException if the file does not exist
-   * @throws IOException           if the file cannot be read
+   * 
+   * @throws FileNotFoundException
+   *           if the file does not exist
+   * @throws IOException
+   *           if the file cannot be read
    */
   public FileLineIterator(File file, boolean skipFirstLine) throws IOException {
-    this(file, UTF8, skipFirstLine);
+    this(file, FileLineIterator.UTF8, skipFirstLine);
   }
-
+  
   /**
    * Creates a {@link FileLineIterator} over a given file, using the given encoding.
-   *
-   * @throws FileNotFoundException if the file does not exist
-   * @throws IOException           if the file cannot be read
+   * 
+   * @throws FileNotFoundException
+   *           if the file does not exist
+   * @throws IOException
+   *           if the file cannot be read
    */
   public FileLineIterator(File file, Charset encoding, boolean skipFirstLine) throws IOException {
-    this(getFileInputStream(file), encoding, skipFirstLine);
+    this(FileLineIterator.getFileInputStream(file), encoding, skipFirstLine);
   }
-
+  
   public FileLineIterator(InputStream is) throws IOException {
-    this(is, UTF8, false);
+    this(is, FileLineIterator.UTF8, false);
   }
-
+  
   public FileLineIterator(InputStream is, boolean skipFirstLine) throws IOException {
-    this(is, UTF8, skipFirstLine);
+    this(is, FileLineIterator.UTF8, skipFirstLine);
   }
-
+  
   public FileLineIterator(InputStream is, Charset encoding, boolean skipFirstLine) throws IOException {
     reader = new BufferedReader(new InputStreamReader(is, encoding));
     if (skipFirstLine) {
@@ -90,7 +95,7 @@
     }
     nextLine = reader.readLine();
   }
-
+  
   static InputStream getFileInputStream(File file) throws IOException {
     InputStream is = new FileInputStream(file);
     String name = file.getName();
@@ -102,16 +107,16 @@
       return is;
     }
   }
-
+  
   public String peek() {
     return nextLine;
   }
-
+  
   @Override
   public boolean hasNext() {
     return nextLine != null;
   }
-
+  
   @Override
   public String next() {
     if (nextLine == null) {
@@ -130,7 +135,7 @@
     }
     return result;
   }
-
+  
   /**
    * @throws UnsupportedOperationException
    */
@@ -138,7 +143,7 @@
   public void remove() {
     throw new UnsupportedOperationException();
   }
-
+  
   @Override
   public void skip(int n) {
     try {
@@ -149,11 +154,11 @@
       close();
     }
   }
-
+  
   @Override
   public void close() {
     nextLine = null;
     IOUtils.quietClose(reader);
   }
-
+  
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java Sat Feb 13 19:57:04 2010
@@ -31,24 +31,24 @@
   
   private static final Logger log = LoggerFactory.getLogger(HadoopUtil.class);
   
-  private HadoopUtil() {}
+  private HadoopUtil() { }
   
   public static void overwriteOutput(String output) throws IOException {
     Configuration conf = new JobConf(KMeansDriver.class);
     Path outPath = new Path(output);
     FileSystem fs = FileSystem.get(outPath.toUri(), conf);
     if (fs.exists(outPath)) {
-      log.warn("Deleting {}", outPath);
+      HadoopUtil.log.warn("Deleting {}", outPath);
       fs.delete(outPath, true);
     }
-    log.warn("Creating dir {}", outPath);
+    HadoopUtil.log.warn("Creating dir {}", outPath);
     fs.mkdirs(outPath);
   }
   
   public static void deletePath(String output, FileSystem fs) throws IOException {
     Path outPath = new Path(output);
     if (fs.exists(outPath)) {
-      log.warn("Deleting {}", outPath);
+      HadoopUtil.log.warn("Deleting {}", outPath);
       fs.delete(outPath, true);
     }
   }
@@ -56,14 +56,14 @@
   public static void deletePaths(Iterable<Path> paths, FileSystem fs) throws IOException {
     for (Path path : paths) {
       if (fs.exists(path)) {
-        log.warn("Deleting {}", path);
+        HadoopUtil.log.warn("Deleting {}", path);
         fs.delete(path, true);
       }
     }
   }
   
   public static void rename(Path from, Path to, FileSystem fs) throws IOException {
-    log.warn("Renaming " + from.toUri() + " to " + to.toUri());
+    HadoopUtil.log.warn("Renaming " + from.toUri() + " to " + to.toUri());
     fs.rename(from, to);
   }
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/IOUtils.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/IOUtils.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/IOUtils.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/IOUtils.java Sat Feb 13 19:57:04 2010
@@ -17,9 +17,6 @@
 
 package org.apache.mahout.common;
 
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
 import java.io.Closeable;
 import java.io.IOException;
 import java.sql.Connection;
@@ -27,68 +24,77 @@
 import java.sql.SQLException;
 import java.sql.Statement;
 
-/** <p>I/O-related utility methods that don't have a better home.</p> */
-public final class IOUtils {
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
+/**
+ * <p>
+ * I/O-related utility methods that don't have a better home.
+ * </p>
+ */
+public final class IOUtils {
+  
   private static final Logger log = LoggerFactory.getLogger(IOUtils.class);
-
-  private IOUtils() {
-  }
-
+  
+  private IOUtils() { }
+  
   public static void quietClose(Closeable closeable) {
     if (closeable != null) {
       try {
         closeable.close();
       } catch (IOException ioe) {
-        log.warn("Unexpected exception while closing; continuing", ioe);
+        IOUtils.log.warn("Unexpected exception while closing; continuing", ioe);
       }
     }
   }
-
+  
   // Sheez, why can't ResultSet, Statement and Connection implement Closeable?
-
+  
   public static void quietClose(ResultSet closeable) {
     if (closeable != null) {
       try {
         closeable.close();
       } catch (SQLException sqle) {
-        log.warn("Unexpected exception while closing; continuing", sqle);
+        IOUtils.log.warn("Unexpected exception while closing; continuing", sqle);
       }
     }
   }
-
+  
   public static void quietClose(Statement closeable) {
     if (closeable != null) {
       try {
         closeable.close();
       } catch (SQLException sqle) {
-        log.warn("Unexpected exception while closing; continuing", sqle);
+        IOUtils.log.warn("Unexpected exception while closing; continuing", sqle);
       }
     }
   }
-
+  
   public static void quietClose(Connection closeable) {
     if (closeable != null) {
       try {
         closeable.close();
       } catch (SQLException sqle) {
-        log.warn("Unexpected exception while closing; continuing", sqle);
+        IOUtils.log.warn("Unexpected exception while closing; continuing", sqle);
       }
     }
   }
-
+  
   /**
-   * Closes a {@link ResultSet}, {@link Statement} and {@link Connection} (if not null) and logs (but does not rethrow)
-   * any resulting {@link SQLException}. This is useful for cleaning up after a database query.
-   *
-   * @param resultSet  {@link ResultSet} to close
-   * @param statement  {@link Statement} to close
-   * @param connection {@link Connection} to close
+   * Closes a {@link ResultSet}, {@link Statement} and {@link Connection} (if not null) and logs (but does not
+   * rethrow) any resulting {@link SQLException}. This is useful for cleaning up after a database query.
+   * 
+   * @param resultSet
+   *          {@link ResultSet} to close
+   * @param statement
+   *          {@link Statement} to close
+   * @param connection
+   *          {@link Connection} to close
    */
   public static void quietClose(ResultSet resultSet, Statement statement, Connection connection) {
-    quietClose(resultSet);
-    quietClose(statement);
-    quietClose(connection);
+    IOUtils.quietClose(resultSet);
+    IOUtils.quietClose(statement);
+    IOUtils.quietClose(connection);
   }
-
+  
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/IntegerTuple.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/IntegerTuple.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/IntegerTuple.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/IntegerTuple.java Sat Feb 13 19:57:04 2010
@@ -33,26 +33,27 @@
  * 
  */
 public class IntegerTuple implements WritableComparable<IntegerTuple> {
-
+  
   private List<Integer> tuple = new ArrayList<Integer>();
-
-  public IntegerTuple() {
-  }
-
+  
+  public IntegerTuple() { }
+  
   public IntegerTuple(Integer firstEntry) {
     add(firstEntry);
   }
   
   public IntegerTuple(Collection<Integer> entries) {
-    for(Integer entry: entries)
+    for (Integer entry : entries) {
       add(entry);
+    }
   }
   
   public IntegerTuple(Integer[] entries) {
-    for(Integer entry: entries)
+    for (Integer entry : entries) {
       add(entry);
+    }
   }
-
+  
   /**
    * add an entry to the end of the list
    * 
@@ -62,7 +63,7 @@
   public boolean add(Integer entry) {
     return tuple.add(entry);
   }
-
+  
   /**
    * Fetches the string at the given location
    * 
@@ -72,7 +73,7 @@
   public Integer integerAt(int index) {
     return tuple.get(index);
   }
-
+  
   /**
    * Replaces the string at the given index with the given newString
    * 
@@ -83,7 +84,7 @@
   public Integer replaceAt(int index, Integer newInteger) {
     return tuple.set(index, newInteger);
   }
-
+  
   /**
    * Fetch the list of entries from the tuple
    * 
@@ -92,7 +93,7 @@
   public List<Integer> getEntries() {
     return Collections.unmodifiableList(this.tuple);
   }
-
+  
   /**
    * Returns the length of the tuple
    * 
@@ -101,34 +102,39 @@
   public int length() {
     return this.tuple.size();
   }
-
+  
   @Override
   public String toString() {
     return tuple.toString();
   }
-
+  
   @Override
   public int hashCode() {
     return tuple.hashCode();
   }
-
+  
   @Override
   public boolean equals(Object obj) {
-    if (this == obj)
+    if (this == obj) {
       return true;
-    if (obj == null)
+    }
+    if (obj == null) {
       return false;
-    if (getClass() != obj.getClass())
+    }
+    if (getClass() != obj.getClass()) {
       return false;
+    }
     IntegerTuple other = (IntegerTuple) obj;
     if (tuple == null) {
-      if (other.tuple != null)
+      if (other.tuple != null) {
         return false;
-    } else if (!tuple.equals(other.tuple))
+      }
+    } else if (!tuple.equals(other.tuple)) {
       return false;
+    }
     return true;
   }
-
+  
   @Override
   public void readFields(DataInput in) throws IOException {
     int len = in.readInt();
@@ -138,15 +144,15 @@
       tuple.add(data);
     }
   }
-
+  
   @Override
   public void write(DataOutput out) throws IOException {
     out.writeInt(tuple.size());
-    for (Integer entry : tuple) {     
-      out.writeInt(entry);     
+    for (Integer entry : tuple) {
+      out.writeInt(entry);
     }
   }
-
+  
   @Override
   public int compareTo(IntegerTuple otherTuple) {
     int thisLength = length();
@@ -154,8 +160,9 @@
     int min = Math.min(thisLength, otherLength);
     for (int i = 0; i < min; i++) {
       int ret = this.tuple.get(i).compareTo(otherTuple.integerAt(i));
-      if (ret == 0)
+      if (ret == 0) {
         continue;
+      }
       return ret;
     }
     if (thisLength < otherLength) {
@@ -166,5 +173,5 @@
       return 0;
     }
   }
-
+  
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/LongPair.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/LongPair.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/LongPair.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/LongPair.java Sat Feb 13 19:57:04 2010
@@ -21,27 +21,27 @@
 
 /** A simple (ordered) pair of longs. */
 public final class LongPair implements Comparable<LongPair>, Serializable {
-
+  
   private final long first;
   private final long second;
-
+  
   public LongPair(long first, long second) {
     this.first = first;
     this.second = second;
   }
-
+  
   public long getFirst() {
     return first;
   }
-
+  
   public long getSecond() {
     return second;
   }
-
+  
   public LongPair swap() {
     return new LongPair(second, first);
   }
-
+  
   @Override
   public boolean equals(Object obj) {
     if (!(obj instanceof LongPair)) {
@@ -50,7 +50,7 @@
     LongPair otherPair = (LongPair) obj;
     return first == otherPair.getFirst() && second == otherPair.getSecond();
   }
-
+  
   @Override
   public int hashCode() {
     int firstHash = RandomUtils.hashLong(first);
@@ -58,12 +58,12 @@
     // for (a,b) versus (b,a)
     return (firstHash >>> 16 | firstHash << 16) ^ RandomUtils.hashLong(second);
   }
-
+  
   @Override
   public String toString() {
     return '(' + String.valueOf(first) + ',' + second + ')';
   }
-
+  
   @Override
   public int compareTo(LongPair o) {
     if (first < o.getFirst()) {
@@ -74,5 +74,5 @@
       return second < o.getSecond() ? -1 : second > o.getSecond() ? 1 : 0;
     }
   }
-
+  
 }
\ No newline at end of file

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Pair.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Pair.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Pair.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Pair.java Sat Feb 13 19:57:04 2010
@@ -21,56 +21,56 @@
 
 /** A simple (ordered) pair of two objects. Elements may be null. */
 public final class Pair<A, B> implements Serializable {
-
+  
   private final A first;
   private final B second;
-
+  
   public Pair(A first, B second) {
     this.first = first;
     this.second = second;
   }
-
+  
   public A getFirst() {
     return first;
   }
-
+  
   public B getSecond() {
     return second;
   }
-
+  
   public Pair<B, A> swap() {
     return new Pair<B, A>(second, first);
   }
-
+  
   @Override
   public boolean equals(Object obj) {
-    if (!(obj instanceof Pair)) {
+    if (!(obj instanceof Pair<?, ?>)) {
       return false;
     }
     Pair<?, ?> otherPair = (Pair<?, ?>) obj;
-    return isEqualOrNulls(first, otherPair.getFirst()) &&
-           isEqualOrNulls(second, otherPair.getSecond());
+    return Pair.isEqualOrNulls(first, otherPair.getFirst()) &&
+    Pair.isEqualOrNulls(second, otherPair.getSecond());
   }
-
+  
   private static boolean isEqualOrNulls(Object obj1, Object obj2) {
     return obj1 == null ? obj2 == null : obj1.equals(obj2);
   }
-
+  
   @Override
   public int hashCode() {
-    int firstHash = hashCodeNull(first);
+    int firstHash = Pair.hashCodeNull(first);
     // Flip top and bottom 16 bits; this makes the hash function probably different
     // for (a,b) versus (b,a)
-    return (firstHash >>> 16 | firstHash << 16) ^ hashCodeNull(second);
+    return (firstHash >>> 16 | firstHash << 16) ^ Pair.hashCodeNull(second);
   }
-
+  
   private static int hashCodeNull(Object obj) {
     return obj == null ? 0 : obj.hashCode();
   }
-
+  
   @Override
   public String toString() {
     return '(' + String.valueOf(first) + ',' + second + ')';
   }
-
+  
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Parameters.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Parameters.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Parameters.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Parameters.java Sat Feb 13 19:57:04 2010
@@ -28,60 +28,62 @@
 import org.slf4j.LoggerFactory;
 
 public class Parameters {
-
+  
   private static final Logger log = LoggerFactory.getLogger(Parameters.class);
-
-  private Map<String, String> params = new HashMap<String, String>();
-
-  //private Configuration conf = new Configuration();
-
+  
+  private Map<String,String> params = new HashMap<String,String>();
+  
+  // private Configuration conf = new Configuration();
+  
   public Parameters() {
 
   }
-
-  private Parameters(Map<String, String> params) {
+  
+  private Parameters(Map<String,String> params) {
     this.params = params;
   }
-
+  
   public String get(String key) {
     return params.get(key);
   }
-
+  
   public String get(String key, String defaultValue) {
     String ret = params.get(key);
     return ret == null ? defaultValue : ret;
   }
-
+  
   public void set(String key, String value) {
     params.put(key, value);
   }
-
+  
   @Override
   public String toString() {
     Configuration conf = new Configuration();
-    conf.set("io.serializations",
-        "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization");
-    DefaultStringifier<Map<String, String>> mapStringifier = new DefaultStringifier<Map<String, String>>(conf,
+    conf
+        .set("io.serializations",
+          "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization");
+    DefaultStringifier<Map<String,String>> mapStringifier = new DefaultStringifier<Map<String,String>>(conf,
         GenericsUtil.getClass(params));
     try {
       return mapStringifier.toString(params);
     } catch (IOException e) {
-      log.info("Encountered IOException while deserializing returning empty string", e);
+      Parameters.log.info("Encountered IOException while deserializing returning empty string", e);
       return "";
     }
-
+    
   }
-
+  
   public String print() {
     return params.toString();
   }
-
+  
   public static Parameters fromString(String serializedString) throws IOException {
     Configuration conf = new Configuration();
-    conf.set("io.serializations",
-        "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization");
-    Map<String, String> params = new HashMap<String, String>();
-    DefaultStringifier<Map<String, String>> mapStringifier = new DefaultStringifier<Map<String, String>>(conf,
+    conf
+        .set("io.serializations",
+          "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization");
+    Map<String,String> params = new HashMap<String,String>();
+    DefaultStringifier<Map<String,String>> mapStringifier = new DefaultStringifier<Map<String,String>>(conf,
         GenericsUtil.getClass(params));
     params = mapStringifier.fromString(serializedString);
     return new Parameters(params);

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/RandomUtils.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/RandomUtils.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/RandomUtils.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/RandomUtils.java Sat Feb 13 19:57:04 2010
@@ -17,41 +17,45 @@
 
 package org.apache.mahout.common;
 
-import org.uncommons.maths.random.MersenneTwisterRNG;
-
-import java.util.Random;
 import java.nio.charset.Charset;
+import java.util.Random;
+
+import org.uncommons.maths.random.MersenneTwisterRNG;
 
 /**
- * <p>The source of random stuff for the whole project. This lets us make all randomness in the project predictable, if
- * desired, for when we run unit tests, which should be repeatable.</p>
- *
- * <p>This class is increasingly incorrectly named as it also includes other mathematical utility methods.</p>
+ * <p>
+ * The source of random stuff for the whole project. This lets us make all randomness in the project
+ * predictable, if desired, for when we run unit tests, which should be repeatable.
+ * </p>
+ * 
+ * <p>
+ * This class is increasingly incorrectly named as it also includes other mathematical utility methods.
+ * </p>
  */
 public final class RandomUtils {
-
+  
   private static final byte[] STANDARD_SEED = "Mahout=Hadoop+ML".getBytes(Charset.forName("US-ASCII"));
-
+  
   private static boolean testSeed;
-
+  
   /** The largest prime less than 2<sup>31</sup>-1 that is the smaller of a twin prime pair. */
   public static final int MAX_INT_SMALLER_TWIN_PRIME = 2147482949;
-
-  private RandomUtils() {
-  }
-
+  
+  private RandomUtils() { }
+  
   public static void useTestSeed() {
-    testSeed = true;
+    RandomUtils.testSeed = true;
   }
-
+  
   public static Random getRandom() {
-    return testSeed ? new MersenneTwisterRNG(STANDARD_SEED) : new MersenneTwisterRNG();
+    return RandomUtils.testSeed ? new MersenneTwisterRNG(RandomUtils.STANDARD_SEED)
+        : new MersenneTwisterRNG();
   }
-
+  
   public static Random getRandom(long seed) {
-    return new MersenneTwisterRNG(longSeedtoBytes(seed));
+    return new MersenneTwisterRNG(RandomUtils.longSeedtoBytes(seed));
   }
-
+  
   public static byte[] longSeedtoBytes(long seed) {
     byte[] seedBytes = new byte[16];
     seedBytes[0] = (byte) (seed >>> 56);
@@ -60,58 +64,58 @@
     seedBytes[3] = (byte) (seed >>> 32);
     seedBytes[4] = (byte) (seed >>> 24);
     seedBytes[5] = (byte) (seed >>> 16);
-    seedBytes[6] = (byte) (seed >>>  8);
+    seedBytes[6] = (byte) (seed >>> 8);
     seedBytes[7] = (byte) seed;
     System.arraycopy(seedBytes, 0, seedBytes, 8, 8);
     return seedBytes;
   }
-
+  
   public static long seedBytesToLong(byte[] seed) {
-    return
-        ((seed[0] & 0xFFL) << 56) |
-        ((seed[1] & 0xFFL) << 48) |
-        ((seed[2] & 0xFFL) << 40) |
-        ((seed[3] & 0xFFL) << 32) |
-        ((seed[4] & 0xFFL) << 24) |
-        ((seed[5] & 0xFFL) << 16) |
-        ((seed[6] & 0xFFL) <<  8) |
-         (seed[7] & 0xFFL);
+    return (seed[0] & 0xFFL) << 56 | (seed[1] & 0xFFL) << 48 | (seed[2] & 0xFFL) << 40
+           | (seed[3] & 0xFFL) << 32 | (seed[4] & 0xFFL) << 24 | (seed[5] & 0xFFL) << 16
+           | (seed[6] & 0xFFL) << 8 | seed[7] & 0xFFL;
   }
-
+  
   /** @return what {@link Double#hashCode()} would return for the same value */
   public static int hashDouble(double value) {
     // Just copied from Double.hashCode
     long bits = Double.doubleToLongBits(value);
-    return (int) (bits ^ (bits >>> 32));
+    return (int) (bits ^ bits >>> 32);
   }
-
+  
   public static int hashFloat(float value) {
     return Float.floatToIntBits(value);
   }
-
+  
   public static int hashLong(long value) {
-    return (int) (value ^ (value >>> 32));
+    return (int) (value ^ value >>> 32);
   }
-
+  
   /**
-   * <p>Finds next-largest "twin primes": numbers p and p+2 such that both are prime. Finds the smallest such p such
-   * that the smaller twin, p, is greater than or equal to n. Returns p+2, the larger of the two twins.</p>
+   * <p>
+   * Finds next-largest "twin primes": numbers p and p+2 such that both are prime. Finds the smallest such p
+   * such that the smaller twin, p, is greater than or equal to n. Returns p+2, the larger of the two twins.
+   * </p>
    */
   public static int nextTwinPrime(int n) {
-    if (n > MAX_INT_SMALLER_TWIN_PRIME) {
+    if (n > RandomUtils.MAX_INT_SMALLER_TWIN_PRIME) {
       throw new IllegalArgumentException();
     }
     if (n <= 3) {
       return 3;
     }
-    int next = nextPrime(n);
-    while (isNotPrime(next + 2)) {
-      next = nextPrime(next + 4);
+    int next = RandomUtils.nextPrime(n);
+    while (RandomUtils.isNotPrime(next + 2)) {
+      next = RandomUtils.nextPrime(next + 4);
     }
     return next + 2;
   }
-
-  /** <p>Finds smallest prime p such that p is greater than or equal to n.</p> */
+  
+  /**
+   * <p>
+   * Finds smallest prime p such that p is greater than or equal to n.
+   * </p>
+   */
   public static int nextPrime(int n) {
     if (n < 2) {
       return 2;
@@ -119,18 +123,18 @@
     // Make sure the number is odd. Is this too clever?
     n |= 0x1;
     // There is no problem with overflow since Integer.MAX_INT is prime, as it happens
-    while (isNotPrime(n)) {
+    while (RandomUtils.isNotPrime(n)) {
       n += 2;
     }
     return n;
   }
-
+  
   /** @return <code>true</code> iff n is not a prime */
   public static boolean isNotPrime(int n) {
     if (n < 2 || (n & 0x1) == 0) { // < 2 or even
       return true;
     }
-    int max = 1 + (int) Math.sqrt((double) n);
+    int max = 1 + (int) Math.sqrt(n);
     for (int d = 3; d <= max; d += 2) {
       if (n % d == 0) {
         return true;
@@ -138,5 +142,5 @@
     }
     return false;
   }
-
+  
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/StringRecordIterator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/StringRecordIterator.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/StringRecordIterator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/StringRecordIterator.java Sat Feb 13 19:57:04 2010
@@ -23,7 +23,7 @@
 import java.util.regex.Pattern;
 
 public class StringRecordIterator implements Iterator<Pair<List<String>,Long>> {
-
+  
   private static final Long ONE = 1L;
   
   private final Iterator<String> lineIterator;
@@ -43,7 +43,7 @@
   public Pair<List<String>,Long> next() {
     String line = lineIterator.next();
     String[] items = splitter.split(line);
-    return new Pair<List<String>,Long>(Arrays.asList(items), ONE);
+    return new Pair<List<String>,Long>(Arrays.asList(items), StringRecordIterator.ONE);
   }
   
   @Override

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/StringTuple.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/StringTuple.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/StringTuple.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/StringTuple.java Sat Feb 13 19:57:04 2010
@@ -31,12 +31,11 @@
  * An Ordered List of Strings which can be used in a Hadoop Map/Reduce Job
  */
 public final class StringTuple implements WritableComparable<StringTuple> {
-
+  
   private List<String> tuple = new ArrayList<String>();
-
-  public StringTuple() {
-  }
-
+  
+  public StringTuple() { }
+  
   public StringTuple(String firstEntry) {
     add(firstEntry);
   }
@@ -46,13 +45,13 @@
       add(entry);
     }
   }
-
+  
   public StringTuple(String[] entries) {
     for (String entry : entries) {
       add(entry);
     }
   }
-
+  
   /**
    * add an entry to the end of the list
    * 
@@ -62,7 +61,7 @@
   public boolean add(String entry) {
     return tuple.add(entry);
   }
-
+  
   /**
    * Fetches the string at the given location
    * 
@@ -72,7 +71,7 @@
   public String stringAt(int index) {
     return tuple.get(index);
   }
-
+  
   /**
    * Replaces the string at the given index with the given newString
    * 
@@ -83,7 +82,7 @@
   public String replaceAt(int index, String newString) {
     return tuple.set(index, newString);
   }
-
+  
   /**
    * Fetch the list of entries from the tuple
    * 
@@ -92,7 +91,7 @@
   public List<String> getEntries() {
     return Collections.unmodifiableList(this.tuple);
   }
-
+  
   /**
    * Returns the length of the tuple
    * 
@@ -101,34 +100,39 @@
   public int length() {
     return this.tuple.size();
   }
-
+  
   @Override
   public String toString() {
     return tuple.toString();
   }
-
+  
   @Override
   public int hashCode() {
     return tuple.hashCode();
   }
-
+  
   @Override
   public boolean equals(Object obj) {
-    if (this == obj)
+    if (this == obj) {
       return true;
-    if (obj == null)
+    }
+    if (obj == null) {
       return false;
-    if (getClass() != obj.getClass())
+    }
+    if (getClass() != obj.getClass()) {
       return false;
+    }
     StringTuple other = (StringTuple) obj;
     if (tuple == null) {
-      if (other.tuple != null)
+      if (other.tuple != null) {
         return false;
-    } else if (!tuple.equals(other.tuple))
+      }
+    } else if (!tuple.equals(other.tuple)) {
       return false;
+    }
     return true;
   }
-
+  
   @Override
   public void readFields(DataInput in) throws IOException {
     int len = in.readInt();
@@ -139,7 +143,7 @@
       tuple.add(value.toString());
     }
   }
-
+  
   @Override
   public void write(DataOutput out) throws IOException {
     out.writeInt(tuple.size());
@@ -149,7 +153,7 @@
       value.write(out);
     }
   }
-
+  
   @Override
   public int compareTo(StringTuple otherTuple) {
     int thisLength = length();
@@ -169,5 +173,5 @@
       return 0;
     }
   }
-
+  
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/StringUtils.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/StringUtils.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/StringUtils.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/StringUtils.java Sat Feb 13 19:57:04 2010
@@ -17,40 +17,42 @@
 
 package org.apache.mahout.common;
 
-import com.thoughtworks.xstream.XStream;
-
 import java.util.regex.Pattern;
 
+import com.thoughtworks.xstream.XStream;
+
 /**
  * Offers two methods to convert an object to a string representation and restore the object given its string
  * representation. Should use Hadoop Stringifier whenever available.
  */
 public final class StringUtils {
-
+  
   private static final XStream xstream = new XStream();
   private static final Pattern NEWLINE_PATTERN = Pattern.compile("\n");
-
+  
   private StringUtils() {
-    // do nothing
+  // do nothing
   }
-
+  
   /**
    * Converts the object to a one-line string representation
-   *
-   * @param obj the object to convert
+   * 
+   * @param obj
+   *          the object to convert
    * @return the string representation of the object
    */
   public static String toString(Object obj) {
-    return NEWLINE_PATTERN.matcher(xstream.toXML(obj)).replaceAll("");
+    return StringUtils.NEWLINE_PATTERN.matcher(StringUtils.xstream.toXML(obj)).replaceAll("");
   }
-
+  
   /**
    * Restores the object from its string representation.
-   *
-   * @param str the string representation of the object
+   * 
+   * @param str
+   *          the string representation of the object
    * @return restored object
    */
   public static <T> T fromString(String str) {
-    return (T) xstream.fromXML(str);
+    return (T) StringUtils.xstream.fromXML(str);
   }
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Summarizable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Summarizable.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Summarizable.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Summarizable.java Sat Feb 13 19:57:04 2010
@@ -18,11 +18,11 @@
 package org.apache.mahout.common;
 
 /**
- * A Summarizable Interface. All Classes which implements this has to have a summarize function which generates a string
- * summary of the data contained in it
+ * A Summarizable Interface. All Classes which implements this has to have a summarize function which
+ * generates a string summary of the data contained in it
  */
 public interface Summarizable {
-
+  
   /** @return Summary of the data inside the class */
   String summarize();
 }
\ No newline at end of file

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/TimingStatistics.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/TimingStatistics.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/TimingStatistics.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/TimingStatistics.java Sat Feb 13 19:57:04 2010
@@ -20,92 +20,91 @@
 import java.io.Serializable;
 
 public final class TimingStatistics implements Serializable {
-
+  
   private int nCalls;
   private long minTime;
   private long maxTime;
   private long sumTime;
   private double sumSquaredTime;
-
+  
   /** Creates a new instance of CallStats */
-  public TimingStatistics() {
-  }
-
-  public TimingStatistics(int nCalls, long minTime, long maxTime, long sumTime,
-                          double sumSquaredTime) {
+  public TimingStatistics() { }
+  
+  public TimingStatistics(int nCalls, long minTime, long maxTime, long sumTime, double sumSquaredTime) {
     this.nCalls = nCalls;
     this.minTime = minTime;
     this.maxTime = maxTime;
     this.sumTime = sumTime;
     this.sumSquaredTime = sumSquaredTime;
   }
-
+  
   public synchronized int getNCalls() {
     return nCalls;
   }
-
+  
   public synchronized long getMinTime() {
     return Math.max(0, minTime);
   }
-
+  
   public synchronized long getMaxTime() {
     return maxTime;
   }
-
+  
   public synchronized long getSumTime() {
     return sumTime;
   }
-
+  
   public synchronized double getSumSquaredTime() {
     return sumSquaredTime;
   }
-
+  
   public synchronized long getMeanTime() {
     return nCalls == 0 ? 0 : sumTime / nCalls;
   }
-
+  
   public synchronized long getStdDevTime() {
-    if (nCalls == 0)
+    if (nCalls == 0) {
       return 0;
+    }
     double mean = getMeanTime();
     double meanSquared = mean * mean;
     double meanOfSquares = sumSquaredTime / nCalls;
     double variance = meanOfSquares - meanSquared;
-    if (variance < 0)
-      return 0;  // might happen due to rounding error
+    if (variance < 0) {
+      return 0; // might happen due to rounding error
+    }
     return (long) Math.sqrt(variance);
   }
-
+  
+  @Override
   public synchronized String toString() {
-    return '\n' +
-        "nCalls = " + nCalls + ";\n" +
-        "sumTime = " + sumTime / 1000000000.0f + "s;\n" +
-        "minTime = " + minTime / 1000000.0f + "ms;\n" +
-        "maxTime = " + maxTime / 1000000.0f + "ms;\n" +
-        "meanTime = " + getMeanTime() / 1000000.0f + "ms;\n" +
-        "stdDevTime = " + getStdDevTime() / 1000000.0f + "ms;";
+    return '\n' + "nCalls = " + nCalls + ";\n" + "sumTime = " + sumTime / 1000000000.0f + "s;\n"
+           + "minTime = " + minTime / 1000000.0f + "ms;\n" + "maxTime = " + maxTime / 1000000.0f + "ms;\n"
+           + "meanTime = " + getMeanTime() / 1000000.0f + "ms;\n" + "stdDevTime = " + getStdDevTime()
+           / 1000000.0f + "ms;";
   }
-
+  
   public Call newCall() {
     return new Call();
   }
-
+  
   public class Call {
     private final long startTime = System.nanoTime();
-
-    private Call() {
-    }
-
+    
+    private Call() { }
+    
     public void end() {
       long elapsed = System.nanoTime() - startTime;
       synchronized (TimingStatistics.this) {
         nCalls++;
-        if (elapsed < minTime || nCalls == 1)
+        if (elapsed < minTime || nCalls == 1) {
           minTime = elapsed;
-        if (elapsed > maxTime)
+        }
+        if (elapsed > maxTime) {
           maxTime = elapsed;
+        }
         sumTime += elapsed;
-        sumSquaredTime += (double) (elapsed * elapsed);
+        sumSquaredTime += elapsed * elapsed;
       }
     }
   }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/Cache.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/Cache.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/Cache.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/Cache.java Sat Feb 13 19:57:04 2010
@@ -17,38 +17,41 @@
 
 package org.apache.mahout.common.cache;
 
-public interface Cache<K, V> {
+public interface Cache<K,V> {
   /**
-   * Gets the Value from the Cache, If the object doesnt exist default behaviour
-   * is to return null.
+   * Gets the Value from the Cache, If the object doesnt exist default behaviour is to return null.
    * 
    * @param key
    * @return V
    */
   V get(K key);
-
+  
   /**
    * returns true if the Cache contains the key
+   * 
    * @param key
    * @return boolean
    */
   boolean contains(K key);
-
+  
   /**
    * puts the key and its value into the cache
+   * 
    * @param key
    * @param value
    */
   void set(K key, V value);
-
+  
   /**
    * returns the current size of the cache
+   * 
    * @return long
    */
   long size();
-
+  
   /**
    * returns the total capacity of the cache defined at contruction time
+   * 
    * @return long
    */
   long capacity();

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/HybridCache.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/HybridCache.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/HybridCache.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/HybridCache.java Sat Feb 13 19:57:04 2010
@@ -17,75 +17,78 @@
 
 package org.apache.mahout.common.cache;
 
-public class HybridCache<K, V> implements Cache<K, V> {
-
-  private int LFUCapacity = 0;
-
-  private int LRUCapacity = 0;
-
-  private LRUCache<K, V> lruCache = null;
-
-  private LFUCache<K, V> lfuCache = null;
-
+public class HybridCache<K,V> implements Cache<K,V> {
+  
+  private int lfuCapacity;
+  
+  private int lruCapacity;
+  
+  private LRUCache<K,V> lruCache;
+  
+  private LFUCache<K,V> lfuCache;
+  
   public HybridCache(int lfuCapacity, int lruCapacity) {
-
-    this.LFUCapacity = lfuCapacity;
-    this.LRUCapacity = lruCapacity;
-
-    lruCache = new LRUCache<K, V>(LRUCapacity);
-    lfuCache = new LFUCache<K, V>(LFUCapacity);
-
+    
+    this.lfuCapacity = lfuCapacity;
+    this.lruCapacity = lruCapacity;
+    
+    lruCache = new LRUCache<K,V>(lruCapacity);
+    lfuCache = new LFUCache<K,V>(lfuCapacity);
+    
   }
-
+  
   @Override
   public long capacity() {
-    return LFUCapacity + LRUCapacity;
+    return lfuCapacity + lruCapacity;
   }
-
+  
   @Override
   public V get(K key) {
     V LRUObject = LRUGet(key);
-    if (LRUObject != null)
+    if (LRUObject != null) {
       return LRUObject;
-
+    }
+    
     V lFUObject = LFUGet(key);
-    if (lFUObject != null)
+    if (lFUObject != null) {
       return lFUObject;
-
+    }
+    
     return null;
   }
-
+  
   private V LFUGet(K key) {
-    if (lfuCache.getEvictionCount() >= LFUCapacity)
+    if (lfuCache.getEvictionCount() >= lfuCapacity) {
       return lfuCache.quickGet(key);
+    }
     return lfuCache.get(key);
   }
-
+  
   private V LRUGet(K key) {
     return lruCache.get(key);
   }
-
+  
   @Override
   public void set(K key, V value) {
-
-    if (lfuCache.size() < LFUCapacity)
+    
+    if (lfuCache.size() < lfuCapacity) {
       lfuCache.set(key, value);
-    else if (lfuCache.getEvictionCount() < LFUCapacity) {
+    } else if (lfuCache.getEvictionCount() < lfuCapacity) {
       lfuCache.set(key, value);
       lruCache.set(key, value);
     } else {
       lruCache.set(key, value);
     }
   }
-
+  
   @Override
   public long size() {
     return lfuCache.size() + lruCache.size();
   }
-
+  
   @Override
   public boolean contains(K key) {
     return lruCache.contains(key) || lfuCache.contains(key);
   }
-
+  
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/LFUCache.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/LFUCache.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/LFUCache.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/LFUCache.java Sat Feb 13 19:57:04 2010
@@ -29,65 +29,67 @@
 
 import org.apache.mahout.common.Pair;
 
-public class LFUCache<K, V> implements Cache<K, V> {
-
-  private SortedMap<Long, Set<K>> evictionMap = null;
-
-  private Map<K, Pair<V, AtomicLong>> dataMap = null;
-
-  private int capacity = 0;
-
-  private int evictionCount = 0;
-
-  public LFUCache(int capacity) {
-
+public class LFUCache<K,V> implements Cache<K,V> {
+  
+  private SortedMap<Long,Set<K>> evictionMap;
+  
+  private Map<K,Pair<V,AtomicLong>> dataMap;
+  
+  private int capacity;
+  
+  private int evictionCount;
+  
+  public LFUCache(int capacity) {   
     this.capacity = capacity;
-
-    evictionMap = new TreeMap<Long, Set<K>>();
-    dataMap = new HashMap<K, Pair<V, AtomicLong>>(capacity);
-
+    
+    evictionMap = new TreeMap<Long,Set<K>>();
+    dataMap = new HashMap<K,Pair<V,AtomicLong>>(capacity); 
   }
-
+  
   @Override
   public long capacity() {
     return capacity;
   }
-
+  
   public int getEvictionCount() {
     return this.evictionCount;
   }
-
+  
   @Override
   public V get(K key) {
-    Pair<V, AtomicLong> data = dataMap.get(key);
-    if (data == null)
+    Pair<V,AtomicLong> data = dataMap.get(key);
+    if (data == null) {
       return null;
-    else {
+    } else {
       V value = data.getFirst();
       AtomicLong count = data.getSecond();
       long oldCount = count.getAndIncrement();
       incrementHit(key, oldCount);
       return value;
     }
-
+    
   }
   
-  public V quickGet(K key){
-    Pair<V, AtomicLong> data = dataMap.get(key);
-    if (data == null)
+  public V quickGet(K key) {
+    Pair<V,AtomicLong> data = dataMap.get(key);
+    if (data == null) {
       return null;
-    else
+    } else {
       return data.getFirst();
+    }
   }
-
+  
   private void incrementHit(K key, long count) {
     Set<K> keys = evictionMap.get(count);
-    if (keys == null)
+    if (keys == null) {
       throw new ConcurrentModificationException();
-    if (keys.remove(key) == false)
+    }
+    if (keys.remove(key) == false) {
       throw new ConcurrentModificationException();
-    if (keys.isEmpty())
+    }
+    if (keys.isEmpty()) {
       evictionMap.remove(count);
+    }
     count++;
     Set<K> keysNew = evictionMap.get(count);
     if (keysNew == null) {
@@ -96,19 +98,20 @@
     }
     keysNew.add(key);
   }
-
+  
   @Override
   public void set(K key, V value) {
-    if (dataMap.containsKey(key))
+    if (dataMap.containsKey(key)) {
       return;
+    }
     if (capacity == dataMap.size()) // Cache Full
     {
       removeLeastFrequent();
     }
     AtomicLong count = new AtomicLong(1L);
-    Pair<V, AtomicLong> data = new Pair<V, AtomicLong>(value, count);
+    Pair<V,AtomicLong> data = new Pair<V,AtomicLong>(value, count);
     dataMap.put(key, data);
-
+    
     Long countKey = 1L;
     Set<K> keys = evictionMap.get(countKey);
     if (keys == null) {
@@ -116,29 +119,31 @@
       evictionMap.put(countKey, keys);
     }
     keys.add(key);
-
+    
   }
+  
   private void removeLeastFrequent() {
     Long key = evictionMap.firstKey();
     Set<K> values = evictionMap.get(key);
     Iterator<K> it = values.iterator();
     K keyToBeRemoved = it.next();
     values.remove(keyToBeRemoved);
-    if (values.isEmpty())
+    if (values.isEmpty()) {
       evictionMap.remove(key);
+    }
     dataMap.remove(keyToBeRemoved);
     evictionCount++;
-
+    
   }
-
+  
   @Override
   public long size() {
     return dataMap.size();
   }
-
+  
   @Override
   public boolean contains(K key) {
-    return (dataMap.containsKey(key));
+    return dataMap.containsKey(key);
   }
-
+  
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/LRUCache.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/LRUCache.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/LRUCache.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/LRUCache.java Sat Feb 13 19:57:04 2010
@@ -20,48 +20,48 @@
 import java.util.LinkedHashMap;
 import java.util.Map;
 
-public class LRUCache<K, V> implements Cache<K, V> {
-
-  private int capacity = 0;
+public class LRUCache<K,V> implements Cache<K,V> {
+  
+  private int capacity;
   
-  private Map<K, V> lruCache = null;
+  private Map<K,V> lruCache;
   
   public LRUCache(final int capacity) {
-
+    
     this.capacity = capacity;
-
-    lruCache = new LinkedHashMap<K,V>( (int)(capacity/0.75f + 1), 0.75f, true) { 
+    
+    lruCache = new LinkedHashMap<K,V>((int) (capacity / 0.75f + 1), 0.75f, true) {
       @Override
-      protected boolean removeEldestEntry (Map.Entry<K,V> eldest) {
+      protected boolean removeEldestEntry(Map.Entry<K,V> eldest) {
         return size() > capacity;
       }
     };
-      
+    
   }
-
+  
   @Override
   public long capacity() {
     return capacity;
   }
-
+  
   @Override
   public V get(K key) {
     return lruCache.get(key);
   }
-
+  
   @Override
   public void set(K key, V value) {
-      lruCache.put(key,value);
+    lruCache.put(key, value);
   }
-
+  
   @Override
   public long size() {
     return lruCache.size();
   }
-
+  
   @Override
   public boolean contains(K key) {
-    return (lruCache.containsKey(key));  
+    return lruCache.containsKey(key);
   }
-
+  
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/LeastKCache.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/LeastKCache.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/LeastKCache.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/LeastKCache.java Sat Feb 13 19:57:04 2010
@@ -22,41 +22,40 @@
 import java.util.Map;
 import java.util.PriorityQueue;
 
-public class LeastKCache<K extends Comparable<? super K>, V> implements
-    Cache<K, V> {
-
-  private int capacity = 0;
-
-  private Map<K, V> cache = null;
-
-  private PriorityQueue<K> queue = null;
-
+public class LeastKCache<K extends Comparable<? super K>,V> implements Cache<K,V> {
+  
+  private int capacity;
+  
+  private Map<K,V> cache;
+  
+  private PriorityQueue<K> queue;
+  
   public LeastKCache(int capacity) {
-
+    
     this.capacity = capacity;
-
-    cache = new HashMap<K, V>(capacity);
+    
+    cache = new HashMap<K,V>(capacity);
     queue = new PriorityQueue<K>(capacity, new Comparator<K>() {
-
+      
       @Override
       public int compare(K o1, K o2) {
         return o2.compareTo(o1);
       }
-
+      
     });
-
+    
   }
-
+  
   @Override
   public final long capacity() {
     return capacity;
   }
-
+  
   @Override
   public final V get(K key) {
     return cache.get(key);
   }
-
+  
   @Override
   public final void set(K key, V value) {
     if (contains(key) == false) {
@@ -68,15 +67,15 @@
       cache.remove(k);
     }
   }
-
+  
   @Override
   public final long size() {
     return cache.size();
   }
-
+  
   @Override
   public final boolean contains(K key) {
-    return (cache.containsKey(key));
+    return cache.containsKey(key);
   }
-
+  
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java Sat Feb 13 19:57:04 2010
@@ -22,92 +22,74 @@
 import org.apache.commons.cli2.builder.DefaultOptionBuilder;
 
 public final class DefaultOptionCreator {
-
-  private DefaultOptionCreator() {
-  }
-
+  
+  private DefaultOptionCreator() { }
+  
   /**
    * Returns a default command line option for convergence delta specification.
    */
   public static DefaultOptionBuilder convergenceOption() {
-    return new DefaultOptionBuilder().withLongName("convergenceDelta")
-        .withRequired(true).withShortName("v").withArgument(
-            new ArgumentBuilder().withName("convergenceDelta").withMinimum(1).withMaximum(1)
-                .create()).withDescription("The convergence delta value.");
+    return new DefaultOptionBuilder().withLongName("convergenceDelta").withRequired(true).withShortName("v")
+        .withArgument(
+          new ArgumentBuilder().withName("convergenceDelta").withMinimum(1).withMaximum(1).create())
+        .withDescription("The convergence delta value.");
   }
-
+  
   /**
    * Returns a default command line option for output directory specification.
    */
   public static DefaultOptionBuilder outputOption() {
-    return new DefaultOptionBuilder().withLongName("output").withRequired(true)
-        .withShortName("o").withArgument(
-            new ArgumentBuilder().withName("output").withMinimum(1).withMaximum(1).create())
+    return new DefaultOptionBuilder().withLongName("output").withRequired(true).withShortName("o")
+        .withArgument(new ArgumentBuilder().withName("output").withMinimum(1).withMaximum(1).create())
         .withDescription("The directory pathname for output.");
   }
-
+  
   /**
    * Returns a default command line option for input directory specification.
    */
   public static DefaultOptionBuilder inputOption() {
-    return new DefaultOptionBuilder()
-        .withLongName("input")
-        .withRequired(true)
-        .withShortName("i")
-        .withArgument(
-            new ArgumentBuilder().withName("input").withMinimum(1).withMaximum(1).create())
-        .withDescription(
-            "Path to job input directory");
+    return new DefaultOptionBuilder().withLongName("input").withRequired(true).withShortName("i")
+        .withArgument(new ArgumentBuilder().withName("input").withMinimum(1).withMaximum(1).create())
+        .withDescription("Path to job input directory");
   }
-
+  
   /**
-   * Returns a default command line option for specification of numbers of
-   * clusters to create.
+   * Returns a default command line option for specification of numbers of clusters to create.
    */
   public static DefaultOptionBuilder kOption() {
     return new DefaultOptionBuilder()
         .withLongName("k")
         .withRequired(true)
-        .withArgument(
-            new ArgumentBuilder().withName("k").withMinimum(1).withMaximum(1).create())
+        .withArgument(new ArgumentBuilder().withName("k").withMinimum(1).withMaximum(1).create())
         .withDescription(
-            "The k in k-Means. k random Vectors will be chosen as the Centroid and written to the clusters output path.")
+          "The k in k-Means. k random Vectors will be chosen as the Centroid and written to the clusters output path.")
         .withShortName("k");
   }
-
+  
   /**
-   * Returns a default command line option for specification of max number of
-   * iterations.
+   * Returns a default command line option for specification of max number of iterations.
    */
   public static DefaultOptionBuilder maxIterOption() {
-    return new DefaultOptionBuilder()
-        .withLongName("maxIter")
-        .withRequired(true)
-        .withShortName("x")
-        .withArgument(
-            new ArgumentBuilder().withName("maxIter").withMinimum(1).withMaximum(1).create())
+    return new DefaultOptionBuilder().withLongName("maxIter").withRequired(true).withShortName("x")
+        .withArgument(new ArgumentBuilder().withName("maxIter").withMinimum(1).withMaximum(1).create())
         .withDescription("The maximum number of iterations.");
   }
-
+  
   /**
-   * Returns a default command line option for specification of distance measure
-   * class to use.
+   * Returns a default command line option for specification of distance measure class to use.
    */
   public static DefaultOptionBuilder distanceOption() {
-    return new DefaultOptionBuilder()
-        .withLongName("measure")
-        .withRequired(true)
-        .withShortName("d")
-        .withArgument(
-            new ArgumentBuilder().withName("measure").withMinimum(1).withMaximum(1).create())
+    return new DefaultOptionBuilder().withLongName("measure").withRequired(true).withShortName("d")
+        .withArgument(new ArgumentBuilder().withName("measure").withMinimum(1).withMaximum(1).create())
         .withDescription("The classname of the DistanceMeasure.");
   }
-
+  
   /**
    * Returns a default command line option for help.
    * */
   public static Option helpOption() {
-    return new DefaultOptionBuilder().withLongName("help").withDescription("Print out help").withShortName("h").create();
+    return new DefaultOptionBuilder().withLongName("help").withDescription("Print out help").withShortName(
+      "h").create();
   }
-
+  
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/CosineDistanceMeasure.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/CosineDistanceMeasure.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/CosineDistanceMeasure.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/CosineDistanceMeasure.java Sat Feb 13 19:57:04 2010
@@ -17,36 +17,36 @@
 
 package org.apache.mahout.common.distance;
 
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.mahout.math.CardinalityException;
-import org.apache.mahout.math.Vector;
-import org.apache.mahout.common.parameters.Parameter;
-
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Iterator;
 
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.mahout.common.parameters.Parameter;
+import org.apache.mahout.math.CardinalityException;
+import org.apache.mahout.math.Vector;
+
 /**
  * This class implements a cosine distance metric by dividing the dot product of two vectors by the product of their
  * lengths
  */
 public class CosineDistanceMeasure implements DistanceMeasure {
-
+  
   @Override
   public void configure(JobConf job) {
     // nothing to do
   }
-
+  
   @Override
   public Collection<Parameter<?>> getParameters() {
     return Collections.emptyList();
   }
-
+  
   @Override
   public void createParameters(String prefix, JobConf jobConf) {
     // nothing to do
   }
-
+  
   public static double distance(double[] p1, double[] p2) {
     double dotProduct = 0.0;
     double lengthSquaredp1 = 0.0;
@@ -57,15 +57,15 @@
       dotProduct += p1[i] * p2[i];
     }
     double denominator = Math.sqrt(lengthSquaredp1) * Math.sqrt(lengthSquaredp2);
-
+    
     // correct for floating-point rounding errors
     if (denominator < dotProduct) {
       denominator = dotProduct;
     }
-
-    return 1.0 - (dotProduct / denominator);
+    
+    return 1.0 - dotProduct / denominator;
   }
-
+  
   @Override
   public double distance(Vector v1, Vector v2) {
     if (v1.size() != v2.size()) {
@@ -83,21 +83,36 @@
       Vector.Element elt = iter.next();
       lengthSquaredv2 += elt.get() * elt.get();
     }
-
+    
     double dotProduct = v1.dot(v2);
     double denominator = Math.sqrt(lengthSquaredv1) * Math.sqrt(lengthSquaredv2);
-
+    
     // correct for floating-point rounding errors
     if (denominator < dotProduct) {
       denominator = dotProduct;
     }
-
-    return 1.0 - (dotProduct / denominator);
+    
+    return 1.0 - dotProduct / denominator;
   }
-
+  
   @Override
   public double distance(double centroidLengthSquare, Vector centroid, Vector v) {
-    return distance(centroid, v); // TODO
+    Iterator<Vector.Element> iter = v.iterateNonZero();
+    double lengthSquaredv = 0.0;
+    while (iter.hasNext()) {
+      Vector.Element elt = iter.next();
+      lengthSquaredv += elt.get() * elt.get();
+    }
+    
+    double dotProduct = centroid.dot(v);
+    double denominator = Math.sqrt(centroidLengthSquare) * Math.sqrt(lengthSquaredv);
+    
+    // correct for floating-point rounding errors
+    if (denominator < dotProduct) {
+      denominator = dotProduct;
+    }
+    
+    return 1.0 - dotProduct / denominator;
   }
-
+  
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/DistanceMeasure.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/DistanceMeasure.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/DistanceMeasure.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/DistanceMeasure.java Sat Feb 13 19:57:04 2010
@@ -17,28 +17,32 @@
 
 package org.apache.mahout.common.distance;
 
-import org.apache.mahout.math.Vector;
 import org.apache.mahout.common.parameters.Parametered;
+import org.apache.mahout.math.Vector;
 
 /** This interface is used for objects which can determine a distance metric between two points */
 public interface DistanceMeasure extends Parametered {
-
+  
   /**
    * Returns the distance metric applied to the arguments
-   *
-   * @param v1 a Vector defining a multidimensional point in some feature space
-   * @param v2 a Vector defining a multidimensional point in some feature space
+   * 
+   * @param v1
+   *          a Vector defining a multidimensional point in some feature space
+   * @param v2
+   *          a Vector defining a multidimensional point in some feature space
    * @return a scalar doubles of the distance
    */
   double distance(Vector v1, Vector v2);
-
+  
   /**
-   * Optimized version of distance metric for sparse vectors. This distance computation requires operations proportional
-   * to the number of non-zero elements in the vector instead of the cardinality of the vector.
-   *
-   * @param centroidLengthSquare Square of the length of centroid
-   * @param centroid             Centroid vector
+   * Optimized version of distance metric for sparse vectors. This distance computation requires operations
+   * proportional to the number of non-zero elements in the vector instead of the cardinality of the vector.
+   * 
+   * @param centroidLengthSquare
+   *          Square of the length of centroid
+   * @param centroid
+   *          Centroid vector
    */
   double distance(double centroidLengthSquare, Vector centroid, Vector v);
-
+  
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/EuclideanDistanceMeasure.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/EuclideanDistanceMeasure.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/EuclideanDistanceMeasure.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/EuclideanDistanceMeasure.java Sat Feb 13 19:57:04 2010
@@ -20,18 +20,20 @@
 import org.apache.mahout.math.Vector;
 
 /**
- * This class implements a Euclidean distance metric by summing the square root of the squared differences between each
- * coordinate. <p/> If you don't care about the true distance and only need the values for comparison, then the base
- * class, {@link SquaredEuclideanDistanceMeasure}, will be faster since it doesn't do the actual square root of the
+ * This class implements a Euclidean distance metric by summing the square root of the squared differences
+ * between each coordinate.
+ * <p/>
+ * If you don't care about the true distance and only need the values for comparison, then the base class,
+ * {@link SquaredEuclideanDistanceMeasure}, will be faster since it doesn't do the actual square root of the
  * squared differences.
  */
 public class EuclideanDistanceMeasure extends SquaredEuclideanDistanceMeasure {
-
+  
   @Override
   public double distance(Vector v1, Vector v2) {
     return Math.sqrt(super.distance(v1, v2));
   }
-
+  
   @Override
   public double distance(double centroidLengthSquare, Vector centroid, Vector v) {
     return Math.sqrt(super.distance(centroidLengthSquare, centroid, v));

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/ManhattanDistanceMeasure.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/ManhattanDistanceMeasure.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/ManhattanDistanceMeasure.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/ManhattanDistanceMeasure.java Sat Feb 13 19:57:04 2010
@@ -17,21 +17,21 @@
 
 package org.apache.mahout.common.distance;
 
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.mahout.math.CardinalityException;
-import org.apache.mahout.math.Vector;
-import org.apache.mahout.common.parameters.Parameter;
-
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Iterator;
 
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.mahout.common.parameters.Parameter;
+import org.apache.mahout.math.CardinalityException;
+import org.apache.mahout.math.Vector;
+
 /**
- * This class implements a "manhattan distance" metric by summing the absolute values of the difference between each
- * coordinate
+ * This class implements a "manhattan distance" metric by summing the absolute values of the difference
+ * between each coordinate
  */
 public class ManhattanDistanceMeasure implements DistanceMeasure {
-
+  
   public static double distance(double[] p1, double[] p2) {
     double result = 0.0;
     for (int i = 0; i < p1.length; i++) {
@@ -39,22 +39,22 @@
     }
     return result;
   }
-
+  
   @Override
   public void configure(JobConf job) {
-    // nothing to do
+  // nothing to do
   }
-
+  
   @Override
   public Collection<Parameter<?>> getParameters() {
     return Collections.emptyList();
   }
-
+  
   @Override
   public void createParameters(String prefix, JobConf jobConf) {
-    // nothing to do
+  // nothing to do
   }
-
+  
   @Override
   public double distance(Vector v1, Vector v2) {
     if (v1.size() != v2.size()) {
@@ -62,17 +62,19 @@
     }
     double result = 0;
     Vector vector = v1.minus(v2);
-    Iterator<Vector.Element> iter = vector.iterateNonZero();//this contains all non zero elements between the two
+    Iterator<Vector.Element> iter = vector.iterateNonZero(); // this contains all non zero elements between
+                                                             // the
+    // two
     while (iter.hasNext()) {
       Vector.Element e = iter.next();
       result += Math.abs(v2.getQuick(e.index()) - v1.getQuick(e.index()));
     }
     return result;
   }
-
+  
   @Override
   public double distance(double centroidLengthSquare, Vector centroid, Vector v) {
     return distance(centroid, v); // TODO
   }
-
+  
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/SquaredEuclideanDistanceMeasure.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/SquaredEuclideanDistanceMeasure.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/SquaredEuclideanDistanceMeasure.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/SquaredEuclideanDistanceMeasure.java Sat Feb 13 19:57:04 2010
@@ -17,40 +17,41 @@
 
 package org.apache.mahout.common.distance;
 
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.mahout.math.Vector;
-import org.apache.mahout.common.parameters.Parameter;
-
 import java.util.Collection;
 import java.util.Collections;
 
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.mahout.common.parameters.Parameter;
+import org.apache.mahout.math.Vector;
+
 /**
- * Like {@link EuclideanDistanceMeasure} but it does not take the square root. <p/> Thus, it is
- * not actually the Euclidean Distance, but it is saves on computation when you only need the distance for comparison
- * and don't care about the actual value as a distance.
+ * Like {@link EuclideanDistanceMeasure} but it does not take the square root.
+ * <p/>
+ * Thus, it is not actually the Euclidean Distance, but it is saves on computation when you only need the
+ * distance for comparison and don't care about the actual value as a distance.
  */
 public class SquaredEuclideanDistanceMeasure implements DistanceMeasure {
-
+  
   @Override
   public void configure(JobConf job) {
-    // nothing to do
+  // nothing to do
   }
-
+  
   @Override
   public Collection<Parameter<?>> getParameters() {
     return Collections.emptyList();
   }
-
+  
   @Override
   public void createParameters(String prefix, JobConf jobConf) {
-    // nothing to do
+  // nothing to do
   }
-
+  
   @Override
   public double distance(Vector v1, Vector v2) {
     return v1.getDistanceSquared(v2);
   }
-
+  
   @Override
   public double distance(double centroidLengthSquare, Vector centroid, Vector v) {
     return centroidLengthSquare - 2 * v.dot(centroid) + v.getLengthSquared();

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/TanimotoDistanceMeasure.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/TanimotoDistanceMeasure.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/TanimotoDistanceMeasure.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/TanimotoDistanceMeasure.java Sat Feb 13 19:57:04 2010
@@ -17,37 +17,35 @@
 
 package org.apache.mahout.common.distance;
 
-import org.apache.mahout.math.Vector;
-
 import java.util.Iterator;
 
+import org.apache.mahout.math.Vector;
+
 /**
  * Tanimoto coefficient implementation.
- *
+ * 
  * http://en.wikipedia.org/wiki/Jaccard_index
  */
 public class TanimotoDistanceMeasure extends WeightedDistanceMeasure {
-
-
+  
   /**
    * Calculates the distance between two vectors.
    * 
-   * The coefficient (a measure of similarity) is:
-   * T(a, b) = a.b / (|a|^2 + |b|^2 - a.b)
-   *
+   * The coefficient (a measure of similarity) is: T(a, b) = a.b / (|a|^2 + |b|^2 - a.b)
+   * 
    * The distance d(a,b) = 1 - T(a,b)
-   *
+   * 
    * @return 0 for perfect match, > 0 for greater distance
    */
   @Override
   public double distance(Vector a, Vector b) {
     double ab = dot(a, b);
     double denominator = dot(a, a) + dot(b, b) - ab;
-    if(denominator < ab) {  // correct for fp round-off: distance >= 0
+    if (denominator < ab) { // correct for fp round-off: distance >= 0
       denominator = ab;
     }
-    if(denominator > 0) {
-        // denom == 0 only when dot(a,a) == dot(b,b) == dot(a,b) == 0
+    if (denominator > 0) {
+      // denom == 0 only when dot(a,a) == dot(b,b) == dot(a,b) == 0
       return 1 - ab / denominator;
     } else {
       return 0;
@@ -70,11 +68,10 @@
     }
     return dot;
   }
-
+  
   @Override
   public double distance(double centroidLengthSquare, Vector centroid, Vector v) {
     return distance(centroid, v); // TODO
   }
-
+  
 }
-

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/WeightedDistanceMeasure.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/WeightedDistanceMeasure.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/WeightedDistanceMeasure.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/WeightedDistanceMeasure.java Sat Feb 13 19:57:04 2010
@@ -17,45 +17,47 @@
 
 package org.apache.mahout.common.distance;
 
+import java.io.DataInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapred.JobConf;
-import org.apache.mahout.math.DenseVector;
-import org.apache.mahout.math.Vector;
 import org.apache.mahout.common.parameters.ClassParameter;
 import org.apache.mahout.common.parameters.Parameter;
 import org.apache.mahout.common.parameters.PathParameter;
+import org.apache.mahout.math.DenseVector;
+import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
 
-import java.io.DataInputStream;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.List;
-
 /** Abstract implementation of DistanceMeasure with support for weights. */
 public abstract class WeightedDistanceMeasure implements DistanceMeasure {
-
+  
   private List<Parameter<?>> parameters;
   private Parameter<Path> weightsFile;
   private ClassParameter vectorClass;
   private Vector weights;
-
+  
   @Override
   public void createParameters(String prefix, JobConf jobConf) {
     parameters = new ArrayList<Parameter<?>>();
-    weightsFile = new PathParameter(prefix, "weightsFile", jobConf, null, "Path on DFS to a file containing the weights.");
+    weightsFile = new PathParameter(prefix, "weightsFile", jobConf, null,
+        "Path on DFS to a file containing the weights.");
     parameters.add(weightsFile);
-    vectorClass = new ClassParameter(prefix, "vectorClass", jobConf, DenseVector.class, "Class<Vector> file specified in parameter weightsFile has been serialized with.");
+    vectorClass = new ClassParameter(prefix, "vectorClass", jobConf, DenseVector.class,
+        "Class<Vector> file specified in parameter weightsFile has been serialized with.");
     parameters.add(vectorClass);
   }
-
+  
   @Override
   public Collection<Parameter<?>> getParameters() {
     return parameters;
   }
-
+  
   @Override
   public void configure(JobConf jobConf) {
     if (parameters == null) {
@@ -84,14 +86,13 @@
       throw new IllegalStateException(e);
     }
   }
-
+  
   public Vector getWeights() {
     return weights;
   }
-
+  
   public void setWeights(Vector weights) {
     this.weights = weights;
   }
-
-
+  
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/WeightedEuclideanDistanceMeasure.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/WeightedEuclideanDistanceMeasure.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/WeightedEuclideanDistanceMeasure.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/WeightedEuclideanDistanceMeasure.java Sat Feb 13 19:57:04 2010
@@ -17,16 +17,16 @@
 
 package org.apache.mahout.common.distance;
 
-import org.apache.mahout.math.Vector;
-
 import java.util.Iterator;
 
+import org.apache.mahout.math.Vector;
+
 /**
- * This class implements a Euclidean distance metric by summing the square root of the squared differences between each
- * coordinate,  optionally adding weights.
+ * This class implements a Euclidean distance metric by summing the square root of the squared differences
+ * between each coordinate, optionally adding weights.
  */
 public class WeightedEuclideanDistanceMeasure extends WeightedDistanceMeasure {
-
+  
   @Override
   public double distance(Vector p1, Vector p2) {
     double result = 0;
@@ -47,10 +47,10 @@
     }
     return Math.sqrt(result);
   }
-
+  
   @Override
   public double distance(double centroidLengthSquare, Vector centroid, Vector v) {
     return distance(centroid, v); // TODO
   }
-
+  
 }



Mime
View raw message