mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sro...@apache.org
Subject svn commit: r1072858 [2/3] - in /mahout/trunk: core/ core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ core/src/main/java/org/apache/mahout/cf/taste/impl/common/jdbc/ core/s...
Date Mon, 21 Feb 2011 06:47:07 GMT
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java?rev=1072858&r1=1072857&r2=1072858&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java Mon Feb 21 06:47:02 2011
@@ -51,12 +51,10 @@ import org.slf4j.LoggerFactory;
 
 public class MeanShiftCanopyDriver extends AbstractJob {
 
-  public static final String INPUT_IS_CANOPIES_OPTION = "inputIsCanopies";
-
   private static final Logger log = LoggerFactory.getLogger(MeanShiftCanopyDriver.class);
 
+  public static final String INPUT_IS_CANOPIES_OPTION = "inputIsCanopies";
   public static final String STATE_IN_KEY = "org.apache.mahout.clustering.meanshift.stateInKey";
-
   private static final String CONTROL_CONVERGED = "control/converged";
 
   public static void main(String[] args) throws Exception {
@@ -95,11 +93,23 @@ public class MeanShiftCanopyDriver exten
     double convergenceDelta = Double.parseDouble(getOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION));
     int maxIterations = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION));
     boolean inputIsCanopies = hasOption(INPUT_IS_CANOPIES_OPTION);
-    boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(DefaultOptionCreator.SEQUENTIAL_METHOD);
+    boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(
+        DefaultOptionCreator.SEQUENTIAL_METHOD);
     ClassLoader ccl = Thread.currentThread().getContextClassLoader();
     DistanceMeasure measure = ccl.loadClass(measureClass).asSubclass(DistanceMeasure.class).newInstance();
 
-    run(getConf(), input, output, measure, t1, t2, convergenceDelta, maxIterations, inputIsCanopies, runClustering, runSequential);
+    run(getConf(),
+        input,
+        output,
+        measure,
+        t1,
+        t2,
+        convergenceDelta,
+        maxIterations,
+        inputIsCanopies,
+        runClustering,
+        runSequential);
+
     return 0;
   }
 
@@ -138,8 +148,8 @@ public class MeanShiftCanopyDriver exten
                   int maxIterations,
                   boolean inputIsCanopies,
                   boolean runClustering,
-                  boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException, InstantiationException,
-      IllegalAccessException {
+                  boolean runSequential)
+    throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException {
     Path clustersIn = new Path(output, Cluster.INITIAL_CLUSTERS_DIR);
     if (inputIsCanopies) {
       clustersIn = input;
@@ -147,10 +157,10 @@ public class MeanShiftCanopyDriver exten
       createCanopyFromVectors(conf, input, clustersIn, measure, runSequential);
     }
 
-    Path clustersOut = buildClusters(conf, clustersIn, output, measure, t1, t2, convergenceDelta, maxIterations, runSequential);
+    Path clustersOut =
+        buildClusters(conf, clustersIn, output, measure, t1, t2, convergenceDelta, maxIterations, runSequential);
     if (runClustering) {
-      clusterData(conf,
-                  inputIsCanopies ? input : new Path(output, Cluster.INITIAL_CLUSTERS_DIR),
+      clusterData(inputIsCanopies ? input : new Path(output, Cluster.INITIAL_CLUSTERS_DIR),
                   clustersOut,
                   new Path(output, Cluster.CLUSTERED_POINTS_DIR),
                   runSequential);
@@ -159,17 +169,6 @@ public class MeanShiftCanopyDriver exten
 
   /**
    * Convert input vectors to MeanShiftCanopies for further processing
-   * 
-   * @param conf
-   * @param input
-   * @param output
-   * @param measure
-   * @param runSequential
-   * @throws IOException
-   * @throws InterruptedException
-   * @throws ClassNotFoundException
-   * @throws InstantiationException
-   * @throws IllegalAccessException
    */
   public static void createCanopyFromVectors(Configuration conf,
                                              Path input,
@@ -191,8 +190,8 @@ public class MeanShiftCanopyDriver exten
    * @param output the Path to the initial clusters directory
    * @param measure the DistanceMeasure
    */
-  private static void createCanopyFromVectorsSeq(Path input, Path output, DistanceMeasure measure) throws IOException,
-      InstantiationException, IllegalAccessException {
+  private static void createCanopyFromVectorsSeq(Path input, Path output, DistanceMeasure measure)
+    throws IOException, InstantiationException, IllegalAccessException {
     Configuration conf = new Configuration();
     FileSystem fs = FileSystem.get(input.toUri(), conf);
     FileStatus[] status = fs.listStatus(input, new OutputLogFilter());
@@ -221,17 +220,9 @@ public class MeanShiftCanopyDriver exten
 
   /**
    * Convert vectors to MeanShiftCanopies using Hadoop
-   * 
-   * @param conf
-   * @param input
-   * @param output
-   * @param measure
-   * @throws IOException
-   * @throws InterruptedException
-   * @throws ClassNotFoundException
    */
   private static void createCanopyFromVectorsMR(Configuration conf, Path input, Path output, DistanceMeasure measure)
-      throws IOException, InterruptedException, ClassNotFoundException {
+    throws IOException, InterruptedException, ClassNotFoundException {
     conf.set(KMeansConfigKeys.DISTANCE_MEASURE_KEY, measure.getClass().getName());
     Job job = new Job(conf);
     job.setJarByClass(MeanShiftCanopyDriver.class);
@@ -245,7 +236,7 @@ public class MeanShiftCanopyDriver exten
     FileInputFormat.setInputPaths(job, input);
     FileOutputFormat.setOutputPath(job, output);
 
-    if (job.waitForCompletion(true) == false) {
+    if (!job.waitForCompletion(true)) {
       throw new InterruptedException("Mean Shift createCanopyFromVectorsMR failed on input " + input.toString());
     }
   }
@@ -278,8 +269,8 @@ public class MeanShiftCanopyDriver exten
                             double t2,
                             double convergenceDelta,
                             int maxIterations,
-                            boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException,
-      InstantiationException, IllegalAccessException {
+                            boolean runSequential)
+    throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException {
     if (runSequential) {
       return buildClustersSeq(clustersIn, output, measure, t1, t2, convergenceDelta, maxIterations);
     } else {
@@ -289,18 +280,6 @@ public class MeanShiftCanopyDriver exten
 
   /**
    * Build new clusters sequentially
-   * 
-   * @param clustersIn
-   * @param output
-   * @param measure
-   * @param t1
-   * @param t2
-   * @param convergenceDelta
-   * @param maxIterations
-   * @return
-   * @throws IOException
-   * @throws InstantiationException
-   * @throws IllegalAccessException
    */
   private static Path buildClustersSeq(Path clustersIn,
                                        Path output,
@@ -308,7 +287,8 @@ public class MeanShiftCanopyDriver exten
                                        double t1,
                                        double t2,
                                        double convergenceDelta,
-                                       int maxIterations) throws IOException, InstantiationException, IllegalAccessException {
+                                       int maxIterations)
+    throws IOException, InstantiationException, IllegalAccessException {
     MeanShiftCanopyClusterer clusterer = new MeanShiftCanopyClusterer(measure, t1, t2, convergenceDelta);
     List<MeanShiftCanopy> clusters = new ArrayList<MeanShiftCanopy>();
     Configuration conf = new Configuration();
@@ -327,7 +307,7 @@ public class MeanShiftCanopyDriver exten
         reader.close();
       }
     }
-    boolean[] converged = { false };
+    boolean[] converged = {false};
     int iteration = 1;
     while (!converged[0] && iteration <= maxIterations) {
       log.info("Mean Shift Iteration: {}", iteration);
@@ -340,9 +320,14 @@ public class MeanShiftCanopyDriver exten
                                                            MeanShiftCanopy.class);
       try {
         for (MeanShiftCanopy cluster : clusters) {
-          log.debug("Writing Cluster:{} center:{} numPoints:{} radius:{} to: {}", new Object[] { cluster.getId(),
-              AbstractCluster.formatVector(cluster.getCenter(), null), cluster.getNumPoints(),
-              AbstractCluster.formatVector(cluster.getRadius(), null), clustersOut.getName() });
+          log.debug("Writing Cluster:{} center:{} numPoints:{} radius:{} to: {}",
+                    new Object[] {
+                        cluster.getId(),
+                        AbstractCluster.formatVector(cluster.getCenter(), null),
+                        cluster.getNumPoints(),
+                        AbstractCluster.formatVector(cluster.getRadius(), null),
+                        clustersOut.getName()
+                    });
           writer.append(new Text(cluster.getIdentifier()), cluster);
         }
       } finally {
@@ -356,19 +341,6 @@ public class MeanShiftCanopyDriver exten
 
   /**
    * Build new clusters using Hadoop
-   * 
-   * @param conf
-   * @param clustersIn
-   * @param output
-   * @param measure
-   * @param t1
-   * @param t2
-   * @param convergenceDelta
-   * @param maxIterations
-   * @return
-   * @throws IOException
-   * @throws InterruptedException
-   * @throws ClassNotFoundException
    */
   private static Path buildClustersMR(Configuration conf,
                                       Path clustersIn,
@@ -377,7 +349,8 @@ public class MeanShiftCanopyDriver exten
                                       double t1,
                                       double t2,
                                       double convergenceDelta,
-                                      int maxIterations) throws IOException, InterruptedException, ClassNotFoundException {
+                                      int maxIterations)
+    throws IOException, InterruptedException, ClassNotFoundException {
     // iterate until the clusters converge
     boolean converged = false;
     int iteration = 1;
@@ -422,7 +395,8 @@ public class MeanShiftCanopyDriver exten
                                      String measureClassName,
                                      double t1,
                                      double t2,
-                                     double convergenceDelta) throws IOException, InterruptedException, ClassNotFoundException {
+                                     double convergenceDelta)
+    throws IOException, InterruptedException, ClassNotFoundException {
 
     conf.set(MeanShiftCanopyConfigKeys.DISTANCE_MEASURE_KEY, measureClassName);
     conf.set(MeanShiftCanopyConfigKeys.CLUSTER_CONVERGENCE_KEY, String.valueOf(convergenceDelta));
@@ -443,15 +417,13 @@ public class MeanShiftCanopyDriver exten
     job.setInputFormatClass(SequenceFileInputFormat.class);
     job.setOutputFormatClass(SequenceFileOutputFormat.class);
     job.setJarByClass(MeanShiftCanopyDriver.class);
-    if (job.waitForCompletion(true) == false) {
+    if (!job.waitForCompletion(true)) {
       throw new InterruptedException("Mean Shift Iteration failed on input " + input.toString());
     }
   }
 
   /**
    * Run the job using supplied arguments
-   * @param conf 
-   *          the Configuration to use
    * @param input
    *          the directory pathname for input points
    * @param clustersIn
@@ -460,8 +432,8 @@ public class MeanShiftCanopyDriver exten
    *          the directory pathname for output clustered points
    * @param runSequential if true run in sequential execution mode
    */
-  public static void clusterData(Configuration conf, Path input, Path clustersIn, Path output, boolean runSequential)
-      throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException {
+  public static void clusterData(Path input, Path clustersIn, Path output, boolean runSequential)
+    throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException {
     if (runSequential) {
       clusterDataSeq(input, clustersIn, output);
     } else {
@@ -471,16 +443,9 @@ public class MeanShiftCanopyDriver exten
 
   /**
    * Cluster the data sequentially
-   * 
-   * @param input
-   * @param clustersIn
-   * @param output
-   * @throws IOException
-   * @throws InstantiationException
-   * @throws IllegalAccessException
    */
-  private static void clusterDataSeq(Path input, Path clustersIn, Path output) throws IOException, InstantiationException,
-      IllegalAccessException {
+  private static void clusterDataSeq(Path input, Path clustersIn, Path output)
+    throws IOException, InstantiationException, IllegalAccessException {
     Collection<MeanShiftCanopy> clusters = new ArrayList<MeanShiftCanopy>();
     Configuration conf = new Configuration();
     FileSystem fs = FileSystem.get(clustersIn.toUri(), conf);
@@ -526,16 +491,9 @@ public class MeanShiftCanopyDriver exten
 
   /**
    * Cluster the data using Hadoop
-   * 
-   * @param input
-   * @param clustersIn
-   * @param output
-   * @throws IOException
-   * @throws InterruptedException
-   * @throws ClassNotFoundException
    */
-  private static void clusterDataMR(Path input, Path clustersIn, Path output) throws IOException, InterruptedException,
-      ClassNotFoundException {
+  private static void clusterDataMR(Path input, Path clustersIn, Path output)
+    throws IOException, InterruptedException, ClassNotFoundException {
     Configuration conf = new Configuration();
     conf.set(STATE_IN_KEY, clustersIn.toString());
     Job job = new Job(conf, "Mean Shift Driver running clusterData over input: " + input);
@@ -551,7 +509,7 @@ public class MeanShiftCanopyDriver exten
     FileInputFormat.setInputPaths(job, input);
     FileOutputFormat.setOutputPath(job, output);
 
-    if (job.waitForCompletion(true) == false) {
+    if (!job.waitForCompletion(true)) {
       throw new InterruptedException("Mean Shift Clustering failed on clustersIn " + clustersIn.toString());
     }
   }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyReducer.java?rev=1072858&r1=1072857&r2=1072858&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyReducer.java Mon Feb 21 06:47:02 2011
@@ -49,7 +49,7 @@ public class MeanShiftCanopyReducer exte
     for (MeanShiftCanopy canopy : canopies) {
       boolean converged = clusterer.shiftToMean(canopy);
       if (converged) {
-      context.getCounter("Clustering", "Converged Clusters").increment(1);
+        context.getCounter("Clustering", "Converged Clusters").increment(1);
       }
       allConverged = converged && allConverged;
       context.write(new Text(canopy.getIdentifier()), canopy);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/minhash/HashFactory.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/minhash/HashFactory.java?rev=1072858&r1=1072857&r2=1072858&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/minhash/HashFactory.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/minhash/HashFactory.java Mon Feb 21 06:47:02 2011
@@ -21,7 +21,7 @@ import org.apache.mahout.vectorizer.enco
 
 import java.util.Random;
 
-public class HashFactory {
+public final class HashFactory {
 
   private HashFactory() {
   }
@@ -49,6 +49,8 @@ public class HashFactory {
           hashFunction[i] = new MurmurHashWrapper(seed.nextInt());
         }
         break;
+      default:
+        throw new IllegalStateException("Unknown type: " + type);
     }
     return hashFunction;
   }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/minhash/MinHashMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/minhash/MinHashMapper.java?rev=1072858&r1=1072857&r2=1072858&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/minhash/MinHashMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/minhash/MinHashMapper.java Mon Feb 21 06:47:02 2011
@@ -88,7 +88,7 @@ public class MinHashMapper extends Mappe
         bytesToHash[0] = (byte) (value >> 24);
         bytesToHash[1] = (byte) (value >> 16);
         bytesToHash[2] = (byte) (value >> 8);
-        bytesToHash[3] = (byte) (value);
+        bytesToHash[3] = (byte) value;
         int hashIndex = hashFunction[i].hash(bytesToHash);
         if (minHashValues[i] > hashIndex) {
           minHashValues[i] = hashIndex;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/common/AffinityMatrixInputJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/common/AffinityMatrixInputJob.java?rev=1072858&r1=1072857&r2=1072858&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/common/AffinityMatrixInputJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/common/AffinityMatrixInputJob.java Mon Feb 21 06:47:02 2011
@@ -71,10 +71,10 @@ public final class AffinityMatrixInputJo
     throws IOException, InterruptedException, ClassNotFoundException {
     Path seqFiles = new Path(output, "seqfiles-" + (System.nanoTime() & 0xFF));
     runJob(input, seqFiles, dimensions, dimensions);
-    DistributedRowMatrix A = new DistributedRowMatrix(seqFiles, 
+    DistributedRowMatrix a = new DistributedRowMatrix(seqFiles,
         new Path(seqFiles, "seqtmp-" + (System.nanoTime() & 0xFF)), 
         dimensions, dimensions);
-    A.setConf(new Configuration());
-    return A;
+    a.setConf(new Configuration());
+    return a;
   }
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/common/AffinityMatrixInputMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/common/AffinityMatrixInputMapper.java?rev=1072858&r1=1072857&r2=1072858&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/common/AffinityMatrixInputMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/common/AffinityMatrixInputMapper.java Mon Feb 21 06:47:02 2011
@@ -55,21 +55,18 @@ public class AffinityMatrixInputMapper
 
     // enforce well-formed textual representation of the graph
     if (elements.length != 3) {
-      throw new IOException("Expected input of length 3, received " +
-          elements.length + ". Please make sure you adhere to " +
-          "the structure of (i,j,value) for representing a graph " +
-          "in text.");
+      throw new IOException("Expected input of length 3, received "
+          + elements.length + ". Please make sure you adhere to "
+          + "the structure of (i,j,value) for representing a graph in text.");
     } else if (elements[0].length() == 0 || elements[1].length() == 0 || elements[2].length() == 0) {
-      throw new IOException("Found an element of 0 length. Please " +
-          "be sure you adhere to the structure of (i,j,value) for " +
-          "representing a graph in text.");
+      throw new IOException("Found an element of 0 length. Please be sure you adhere to the structure of "
+          + "(i,j,value) for  representing a graph in text.");
     }
 
     // parse the line of text into a DistributedRowMatrix entry,
     // making the row (elements[0]) the key to the Reducer, and
     // setting the column (elements[1]) in the entry itself
-    DistributedRowMatrix.MatrixEntryWritable toAdd =
-      new DistributedRowMatrix.MatrixEntryWritable();
+    DistributedRowMatrix.MatrixEntryWritable toAdd = new DistributedRowMatrix.MatrixEntryWritable();
     IntWritable row = new IntWritable(Integer.valueOf(elements[0]));
     toAdd.setRow(-1); // already set as the Reducer's key
     toAdd.setCol(Integer.valueOf(elements[1]));

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/common/IntDoublePairWritable.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/common/IntDoublePairWritable.java?rev=1072858&r1=1072857&r2=1072858&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/common/IntDoublePairWritable.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/common/IntDoublePairWritable.java Mon Feb 21 06:47:02 2011
@@ -36,7 +36,8 @@ public class IntDoublePairWritable imple
   private int key;
   private double value;
   
-  public IntDoublePairWritable() {}
+  public IntDoublePairWritable() {
+  }
   
   public IntDoublePairWritable(int k, double v) {
     this.key = k;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/common/VertexWritable.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/common/VertexWritable.java?rev=1072858&r1=1072857&r2=1072858&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/common/VertexWritable.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/common/VertexWritable.java Mon Feb 21 06:47:02 2011
@@ -40,7 +40,8 @@ public class VertexWritable implements W
   /** an extra type delimeter, can probably be null */
   private String type;
   
-  public VertexWritable() {}
+  public VertexWritable() {
+  }
 
   public VertexWritable(int i, int j, double v, String t) {
     this.i = i;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsSensitivityMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsSensitivityMapper.java?rev=1072858&r1=1072857&r2=1072858&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsSensitivityMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsSensitivityMapper.java Mon Feb 21 06:47:02 2011
@@ -76,26 +76,26 @@ public class EigencutsSensitivityMapper 
     Map<Integer, EigencutsSensitivityNode> columns = new HashMap<Integer, EigencutsSensitivityNode>();
     Vector ev = vw.get();
     for (int i = 0; i < ev.size(); i++) {
-      double minS_ij = Double.MAX_VALUE;
+      double minsij = Double.MAX_VALUE;
       int minInd = -1;
       for (int j = 0; j < ev.size(); j++) {          
-        double S_ij = performSensitivityCalculation(eigenvalue, ev.get(i), 
+        double sij = performSensitivityCalculation(eigenvalue, ev.get(i),
             ev.get(j), diagonal.get(i), diagonal.get(j));
         
         // perform non-maximal suppression
         // is this the smallest value in the row?
-        if (S_ij < minS_ij) {
-          minS_ij = S_ij;
+        if (sij < minsij) {
+          minsij = sij;
           minInd = j;
         }
       }
       
       // is this the smallest value in the column?
       Integer column = minInd;
-      EigencutsSensitivityNode value = new EigencutsSensitivityNode(i, minInd, minS_ij);
+      EigencutsSensitivityNode value = new EigencutsSensitivityNode(i, minInd, minsij);
       if (!columns.containsKey(column)) {
         columns.put(column, value);
-      } else if (columns.get(column).getSensitivity() > minS_ij) {
+      } else if (columns.get(column).getSensitivity() > minsij) {
         columns.remove(column);
         columns.put(column, value);
       }
@@ -114,36 +114,26 @@ public class EigencutsSensitivityMapper 
    * (log(2) / lambda_k * log(lambda_k) * log(lambda_k^beta0 / 2)) * [
    * - (((u_i / sqrt(d_i)) - (u_j / sqrt(d_j)))^2 + (1 - lambda) * 
    *   ((u_i^2 / d_i) + (u_j^2 / d_j))) ]
-   * 
-   * @param eigenvalue
-   * @param ev_i
-   * @param ev_j
-   * @param diag_i
-   * @param diag_j
-   * @return
    */
-  private double performSensitivityCalculation(double eigenvalue, double
-      ev_i, double ev_j, double diag_i, double diag_j) {
+  private double performSensitivityCalculation(double eigenvalue,
+                                               double evi,
+                                               double evj,
+                                               double diagi,
+                                               double diagj) {
     
-    double firsthalf = Functions.LOGARITHM.apply(2) / (
-        eigenvalue * Functions.LOGARITHM.apply(eigenvalue) * 
-        Functions.LOGARITHM.apply(Functions.POW.apply(eigenvalue, beta0) / 2));
+    double firsthalf = Functions.LOGARITHM.apply(2)
+        / (eigenvalue * Functions.LOGARITHM.apply(eigenvalue)
+           * Functions.LOGARITHM.apply(Functions.POW.apply(eigenvalue, beta0) / 2));
     
-    double secondhalf = -Functions.POW.apply(((ev_i / 
-        Functions.SQRT.apply(diag_i)) - (ev_j / 
-        Functions.SQRT.apply(diag_j))), 2) + (1 - eigenvalue) * 
-        ((Functions.POW.apply(ev_i, 2) / diag_i) + 
-        (Functions.POW.apply(ev_j, 2) / diag_j));
+    double secondhalf =
+        -Functions.POW.apply(((evi / Functions.SQRT.apply(diagi)) - (evj / Functions.SQRT.apply(diagj))), 2)
+        + (1.0 - eigenvalue) * ((Functions.POW.apply(evi, 2) / diagi) + (Functions.POW.apply(evj, 2) / diagj));
     
     return firsthalf * secondhalf;
   }
   
   /**
    * Utility helper method, used for unit testing.
-   * @param beta0
-   * @param epsilon
-   * @param eigenvalues
-   * @param diagonal
    */
   void setup(double beta0, double epsilon, Vector eigenvalues, Vector diagonal) {
     this.beta0 = beta0;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/IntTuple.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/IntTuple.java?rev=1072858&r1=1072857&r2=1072858&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/IntTuple.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/IntTuple.java Mon Feb 21 06:47:02 2011
@@ -32,7 +32,8 @@ public final class IntTuple implements W
   
   private IntArrayList tuple = new IntArrayList();
   
-  public IntTuple() {}
+  public IntTuple() {
+  }
   
   public IntTuple(int firstEntry) {
     add(firstEntry);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/StableFixedSizeSamplingIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/StableFixedSizeSamplingIterator.java?rev=1072858&r1=1072857&r2=1072858&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/StableFixedSizeSamplingIterator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/StableFixedSizeSamplingIterator.java Mon Feb 21 06:47:02 2011
@@ -53,7 +53,7 @@ public class StableFixedSizeSamplingIter
     }
     
     Collections.sort(buf);
-    return new DelegateIterator(buf);
+    return new DelegateIterator<T>(buf);
   }
   
   private static final class Entry<T> implements Comparable<Entry<T>> {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/Builder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/Builder.java?rev=1072858&r1=1072857&r2=1072858&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/Builder.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/Builder.java Mon Feb 21 06:47:02 2011
@@ -54,17 +54,11 @@ public abstract class Builder {
   
   private static final Logger log = LoggerFactory.getLogger(Builder.class);
   
-  /** Tree Builder Component */
   private final TreeBuilder treeBuilder;
-  
   private final Path dataPath;
-  
   private final Path datasetPath;
-  
   private final Long seed;
-  
   private final Configuration conf;
-  
   private String outputDirName = "output";
   
   protected Builder(TreeBuilder treeBuilder, Path dataPath, Path datasetPath, Long seed, Configuration conf) {
@@ -293,7 +287,7 @@ public abstract class Builder {
     throws IOException, ClassNotFoundException, InterruptedException;
   
   public DecisionForest build(int nbTrees, PredictionCallback callback)
-      throws IOException, ClassNotFoundException, InterruptedException {
+    throws IOException, ClassNotFoundException, InterruptedException {
     // int numTrees = getNbTrees(conf);
     
     Path outputPath = getOutputPath(conf);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/Classifier.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/Classifier.java?rev=1072858&r1=1072857&r2=1072858&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/Classifier.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/Classifier.java Mon Feb 21 06:47:02 2011
@@ -59,19 +59,12 @@ public class Classifier {
   private static final Logger log = LoggerFactory.getLogger(Classifier.class);
 
   private final Path forestPath;
-
   private final Path inputPath;
-
   private final Path datasetPath;
-
   private final Configuration conf;
-
-  /**
-   * If not null, the Job will build the confusionMatrix.
-   */
+  /** If not null, the Job will build the confusionMatrix. */
   private final ResultAnalyzer analyzer;
   private final Dataset dataset;
-
   private final Path outputPath; // path that will containt the final output of the classifier
   private final Path mappersOutputPath; // mappers will output here
 
@@ -79,7 +72,12 @@ public class Classifier {
     return analyzer;
   }
 
-  public Classifier(Path forestPath, Path inputPath, Path datasetPath, Path outputPath, Configuration conf, boolean analyze) throws IOException {
+  public Classifier(Path forestPath,
+                    Path inputPath,
+                    Path datasetPath,
+                    Path outputPath,
+                    Configuration conf,
+                    boolean analyze) throws IOException {
     this.forestPath = forestPath;
     this.inputPath = inputPath;
     this.datasetPath = datasetPath;
@@ -207,16 +205,11 @@ public class Classifier {
 
     /** used to convert input values to data instances */
     private DataConverter converter;
-
     private DecisionForest forest;
-
     private final Random rng = RandomUtils.getRandom();
-
     private boolean first = true;
-
     private final Text lvalue = new Text();
 
-
     @Override
     protected void setup(Context context) throws IOException, InterruptedException {
       super.setup(context);    //To change body of overridden methods use File | Settings | File Templates.

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/PartialBuilder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/PartialBuilder.java?rev=1072858&r1=1072857&r2=1072858&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/PartialBuilder.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/PartialBuilder.java Mon Feb 21 06:47:02 2011
@@ -159,8 +159,8 @@ public class PartialBuilder extends Buil
                                       Node[] trees,
                                       PredictionCallback callback) throws IOException {
     Preconditions.checkArgument((keys == null && trees == null) || (keys != null && trees != null),
-        "if keys is null, trees should also be null" );
-    Preconditions.checkArgument(keys == null || keys.length == trees.length, "keys.length != trees.length" );
+        "if keys is null, trees should also be null");
+    Preconditions.checkArgument(keys == null || keys.length == trees.length, "keys.length != trees.length");
 
     Configuration conf = job.getConfiguration();
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/Step2Mapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/Step2Mapper.java?rev=1072858&r1=1072857&r2=1072858&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/Step2Mapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/Step2Mapper.java Mon Feb 21 06:47:02 2011
@@ -49,15 +49,10 @@ public class Step2Mapper extends Mapper<
   private static final Logger log = LoggerFactory.getLogger(Step2Mapper.class);
   
   private TreeID[] keys;
-  
   private Node[] trees;
-  
   private SingleTreePredictions[] callbacks;
-  
   private DataConverter converter;
-  
   private int partition = -1;
-  
   /** num treated instances */
   private int instanceId;
   
@@ -70,7 +65,7 @@ public class Step2Mapper extends Mapper<
     
     log.info("DistributedCache.getCacheFiles(): {}", ArrayUtils.toString(files));
     
-    Preconditions.checkArgument(files != null && files.length >= 2, "missing paths from the DistributedCache" );
+    Preconditions.checkArgument(files != null && files.length >= 2, "missing paths from the DistributedCache");
     
     Path datasetPath = new Path(files[0].getPath());
     Dataset dataset = Dataset.load(conf, datasetPath);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/MatrixWritable.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/MatrixWritable.java?rev=1072858&r1=1072857&r2=1072858&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/MatrixWritable.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/MatrixWritable.java Mon Feb 21 06:47:02 2011
@@ -107,7 +107,7 @@ public class MatrixWritable implements W
     int rows = in.readInt();
     int columns = in.readInt();
 
-    Matrix r ;
+    Matrix r;
     if (dense) {
       r = new DenseMatrix(rows, columns);
     } else {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java?rev=1072858&r1=1072857&r2=1072858&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java Mon Feb 21 06:47:02 2011
@@ -158,7 +158,7 @@ public final class TimesSquaredJob {
       try {
         URI[] localFiles = DistributedCache.getCacheFiles(conf);
         Preconditions.checkArgument(localFiles != null && localFiles.length >= 1,
-          "missing paths from the DistributedCache" );
+                                    "missing paths from the DistributedCache");
         Path inputVectorPath = new Path(localFiles[0].getPath());
         FileSystem fs = inputVectorPath.getFileSystem(conf);
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java?rev=1072858&r1=1072857&r2=1072858&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java Mon Feb 21 06:47:02 2011
@@ -66,14 +66,18 @@ public class DistributedLanczosSolver ex
   /**
    * Factored-out LanczosSolver for the purpose of invoking it programmatically
    */
-  public void runJob(Configuration originalConfig, Path inputPath,
-		  			Path outputTmpPath, int numRows, int numCols,
-		  			boolean isSymmetric, int desiredRank, Matrix eigenVectors, 
-		  			List<Double> eigenValues, String outputEigenVectorPathString) 
-  					throws IOException {
-	  DistributedRowMatrix matrix = new DistributedRowMatrix(
-			  							inputPath, outputTmpPath, 
-			  							numRows, numCols);
+  public void runJob(Configuration originalConfig,
+                     Path inputPath,
+                     Path outputTmpPath,
+                     int numRows,
+                     int numCols,
+                     boolean isSymmetric,
+                     int desiredRank,
+                     Matrix eigenVectors,
+                     List<Double> eigenValues,
+                     String outputEigenVectorPathString) throws IOException {
+	  DistributedRowMatrix matrix =
+        new DistributedRowMatrix(inputPath, outputTmpPath, numRows, numCols);
 	  matrix.setConf(new Configuration(originalConfig));
 	  setConf(originalConfig);
 	  solve(matrix, desiredRank, eigenVectors, eigenValues, isSymmetric);
@@ -184,13 +188,13 @@ public class DistributedLanczosSolver ex
    * @param eigenVectors The eigenvectors to be serialized
    * @param eigenValues The eigenvalues to be serialized
    * @param outputPath The path (relative to the current Configuration's FileSystem) to save the output to.
-   * @throws IOException
    */
   public void serializeOutput(Matrix eigenVectors, List<Double> eigenValues, Path outputPath) throws IOException {
     log.info("Persisting {} eigenVectors and eigenValues to: {}", eigenVectors.numRows(), outputPath);
     Configuration conf = getConf() != null ? getConf() : new Configuration();
     FileSystem fs = FileSystem.get(conf);
-    SequenceFile.Writer seqWriter = new SequenceFile.Writer(fs, conf, outputPath, IntWritable.class, VectorWritable.class);
+    SequenceFile.Writer seqWriter =
+        new SequenceFile.Writer(fs, conf, outputPath, IntWritable.class, VectorWritable.class);
     IntWritable iw = new IntWritable();
     for (int i = 0; i < eigenVectors.numRows() - 1; i++) {
       Vector v = eigenVectors.getRow(i);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/RowSimilarityJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/RowSimilarityJob.java?rev=1072858&r1=1072857&r2=1072858&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/RowSimilarityJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/RowSimilarityJob.java Mon Feb 21 06:47:02 2011
@@ -41,7 +41,6 @@ import org.apache.mahout.math.VarIntWrit
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
 import org.apache.mahout.math.hadoop.DistributedRowMatrix;
-import org.apache.mahout.math.hadoop.DistributedRowMatrix.MatrixEntryWritable;
 import org.apache.mahout.math.hadoop.similarity.vector.DistributedVectorSimilarity;
 
 /**
@@ -96,8 +95,8 @@ public class RowSimilarityJob extends Ab
     addInputOption();
     addOutputOption();
     addOption("numberOfColumns", "r", "Number of columns in the input matrix");
-    addOption("similarityClassname", "s", "Name of distributed similarity class to instantiate, alternatively use " + 
-        "one of the predefined similarities (" + SimilarityType.listEnumNames() + ')');
+    addOption("similarityClassname", "s", "Name of distributed similarity class to instantiate, alternatively use "
+        + "one of the predefined similarities (" + SimilarityType.listEnumNames() + ')');
     addOption("maxSimilaritiesPerRow", "m", "Number of maximum similarities per row (default: "
               + DEFAULT_MAX_SIMILARITIES_PER_ROW + ')', String.valueOf(DEFAULT_MAX_SIMILARITIES_PER_ROW));
 
@@ -152,7 +151,7 @@ public class RowSimilarityJob extends Ab
                                Cooccurrence.class,
                                SimilarityReducer.class,
                                SimilarityMatrixEntryKey.class,
-                               MatrixEntryWritable.class,
+                               DistributedRowMatrix.MatrixEntryWritable.class,
                                SequenceFileOutputFormat.class);
 
       Configuration pairwiseConf = pairwiseSimilarity.getConfiguration();
@@ -167,7 +166,7 @@ public class RowSimilarityJob extends Ab
                                SequenceFileInputFormat.class,
                                Mapper.class,
                                SimilarityMatrixEntryKey.class,
-                               MatrixEntryWritable.class,
+                               DistributedRowMatrix.MatrixEntryWritable.class,
                                EntriesToVectorsReducer.class,
                                IntWritable.class,
                                VectorWritable.class,
@@ -208,7 +207,7 @@ public class RowSimilarityJob extends Ab
 
     @Override
     protected void map(IntWritable row, VectorWritable vectorWritable, Context ctx)
-        throws IOException, InterruptedException {
+      throws IOException, InterruptedException {
 
       Vector v = vectorWritable.get();
       double weight = similarity.weight(v);
@@ -231,7 +230,7 @@ public class RowSimilarityJob extends Ab
 
     @Override
     protected void reduce(VarIntWritable column, Iterable<WeightedOccurrence> weightedOccurrences, Context ctx)
-        throws IOException, InterruptedException {
+      throws IOException, InterruptedException {
 
       List<WeightedOccurrence> collectedWeightedOccurrences = new ArrayList<WeightedOccurrence>();
       for (WeightedOccurrence weightedOccurrence : weightedOccurrences) {
@@ -251,7 +250,7 @@ public class RowSimilarityJob extends Ab
 
     @Override
     protected void map(VarIntWritable column, WeightedOccurrenceArray weightedOccurrenceArray, Context ctx)
-        throws IOException, InterruptedException {
+      throws IOException, InterruptedException {
 
       WeightedOccurrence[] weightedOccurrences = weightedOccurrenceArray.getWeightedOccurrences();
 
@@ -285,7 +284,7 @@ public class RowSimilarityJob extends Ab
    * computes the pairwise similarities
    */
   public static class SimilarityReducer
-      extends Reducer<WeightedRowPair,Cooccurrence,SimilarityMatrixEntryKey, MatrixEntryWritable> {
+      extends Reducer<WeightedRowPair,Cooccurrence,SimilarityMatrixEntryKey, DistributedRowMatrix.MatrixEntryWritable> {
 
     private DistributedVectorSimilarity similarity;
     private int numberOfColumns;
@@ -302,7 +301,7 @@ public class RowSimilarityJob extends Ab
 
     @Override
     protected void reduce(WeightedRowPair rowPair, Iterable<Cooccurrence> cooccurrences, Context ctx)
-        throws IOException, InterruptedException {
+      throws IOException, InterruptedException {
 
       int rowA = rowPair.getRowA();
       int rowB = rowPair.getRowB();
@@ -312,7 +311,7 @@ public class RowSimilarityJob extends Ab
       if (!Double.isNaN(similarityValue)) {
         ctx.getCounter(Counter.SIMILAR_ROWS).increment(1);
         SimilarityMatrixEntryKey key = new SimilarityMatrixEntryKey();
-        MatrixEntryWritable entry = new MatrixEntryWritable();
+        DistributedRowMatrix.MatrixEntryWritable entry = new DistributedRowMatrix.MatrixEntryWritable();
         entry.setVal(similarityValue);
 
         entry.setRow(rowA);
@@ -331,10 +330,10 @@ public class RowSimilarityJob extends Ab
   }
 
   /**
-   * collects all {@link MatrixEntryWritable} for each column and creates a {@link VectorWritable}
+   * collects all {@link DistributedRowMatrix.MatrixEntryWritable} for each column and creates a {@link VectorWritable}
    */
   public static class EntriesToVectorsReducer
-      extends Reducer<SimilarityMatrixEntryKey, MatrixEntryWritable,IntWritable,VectorWritable> {
+      extends Reducer<SimilarityMatrixEntryKey, DistributedRowMatrix.MatrixEntryWritable,IntWritable,VectorWritable> {
 
     private int maxSimilaritiesPerRow;
 
@@ -348,11 +347,12 @@ public class RowSimilarityJob extends Ab
     }
 
     @Override
-    protected void reduce(SimilarityMatrixEntryKey key, Iterable<MatrixEntryWritable> entries, Context ctx)
-        throws IOException, InterruptedException {
+    protected void reduce(SimilarityMatrixEntryKey key,
+                          Iterable<DistributedRowMatrix.MatrixEntryWritable> entries,
+                          Context ctx) throws IOException, InterruptedException {
       RandomAccessSparseVector temporaryVector = new RandomAccessSparseVector(Integer.MAX_VALUE, maxSimilaritiesPerRow);
       int similaritiesSet = 0;
-      for (MatrixEntryWritable entry : entries) {
+      for (DistributedRowMatrix.MatrixEntryWritable entry : entries) {
         temporaryVector.setQuick(entry.getCol(), entry.getVal());
         if (++similaritiesSet == maxSimilaritiesPerRow) {
           break;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocReducer.java?rev=1072858&r1=1072857&r2=1072858&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocReducer.java Mon Feb 21 06:47:02 2011
@@ -98,7 +98,7 @@ public class CollocReducer extends Reduc
    * Sum frequencies for unigrams and deliver to the collector
    */
   protected void processUnigram(GramKey key, Iterator<Gram> values, Context context)
-      throws IOException, InterruptedException {
+    throws IOException, InterruptedException {
 
     int freq = 0;
     Gram value = null;
@@ -129,7 +129,8 @@ public class CollocReducer extends Reduc
    *  some extra work.
    * @throws InterruptedException 
    */
-  protected void processSubgram(GramKey key, Iterator<Gram> values, Context context) throws IOException, InterruptedException {
+  protected void processSubgram(GramKey key, Iterator<Gram> values, Context context)
+    throws IOException, InterruptedException {
 
     Gram subgram = null;
     Gram currentNgram = null;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/ConstantValueEncoder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/ConstantValueEncoder.java?rev=1072858&r1=1072857&r2=1072858&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/ConstantValueEncoder.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/ConstantValueEncoder.java Mon Feb 21 06:47:02 2011
@@ -33,7 +33,7 @@ public class ConstantValueEncoder extend
     String name = getName();
     for (int i = 0; i < probes; i++) {
       int n = hashForProbe(originalForm, data.size(), name, i);
-      if(isTraceEnabled()){
+      if (isTraceEnabled()) {
         trace((String) null, n);                
       }
       data.set(n, data.get(n) + getWeight(originalForm,weight));

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoder.java?rev=1072858&r1=1072857&r2=1072858&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoder.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoder.java Mon Feb 21 06:47:02 2011
@@ -48,7 +48,7 @@ public class ContinuousValueEncoder exte
 
   @Override
   protected double getWeight(byte[] originalForm, double w) {
-    if (originalForm!=null) {
+    if (originalForm != null) {
       return w * Double.parseDouble(new String(originalForm));
     } else {
       return w;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/FeatureVectorEncoder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/FeatureVectorEncoder.java?rev=1072858&r1=1072857&r2=1072858&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/FeatureVectorEncoder.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/FeatureVectorEncoder.java Mon Feb 21 06:47:02 2011
@@ -81,7 +81,7 @@ public abstract class FeatureVectorEncod
    * @param weight       The weight to be applied to this feature.
    * @param data         The vector to which the value should be added.
    */
-  public void addToVector(String originalForm, double weight, Vector data){
+  public void addToVector(String originalForm, double weight, Vector data) {
     addToVector(bytesForString(originalForm), weight, data);        
   }
 
@@ -116,7 +116,7 @@ public abstract class FeatureVectorEncod
     return Collections.singletonList(hashForProbe(originalForm, dataSize, name, probe));
   }
 
-  protected double getWeight(byte[] originalForm, double w){
+  protected double getWeight(byte[] originalForm, double w) {
     return 1.0;
   }
 
@@ -140,22 +140,21 @@ public abstract class FeatureVectorEncod
   }
 
   /**
-    * Hash a byte array and an integer into the range [0..numFeatures-1].
-    *
-    * @param term        The bytes.
-    * @param probe       An integer that modifies the resulting hash.
-    * @param numFeatures The range into which the resulting hash must fit.
-    * @return An integer in the range [0..numFeatures-1] that has good spread for small changes in
-    *         term and probe.
-    */
-   protected int hash(byte[] term, int probe, int numFeatures) {
-     long r = MurmurHash.hash64A(term, probe) % numFeatures;
-     if (r < 0) {
-       r += numFeatures;
-     }
-     return (int) r;
-   }
-
+   * Hash a byte array and an integer into the range [0..numFeatures-1].
+   *
+   * @param term        The bytes.
+   * @param probe       An integer that modifies the resulting hash.
+   * @param numFeatures The range into which the resulting hash must fit.
+   * @return An integer in the range [0..numFeatures-1] that has good spread for small changes in
+   *         term and probe.
+   */
+  protected int hash(byte[] term, int probe, int numFeatures) {
+    long r = MurmurHash.hash64A(term, probe) % numFeatures;
+    if (r < 0) {
+      r += numFeatures;
+    }
+    return (int) r;
+  }
 
   /**
    * Hash two strings and an integer into the range [0..numFeatures-1].
@@ -245,7 +244,7 @@ public abstract class FeatureVectorEncod
     return name;
   }
 
-  protected boolean isTraceEnabled(){
+  protected boolean isTraceEnabled() {
     return traceDictionary != null;
   }
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/LuceneTextValueEncoder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/LuceneTextValueEncoder.java?rev=1072858&r1=1072857&r2=1072858&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/LuceneTextValueEncoder.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/LuceneTextValueEncoder.java Mon Feb 21 06:47:02 2011
@@ -89,7 +89,7 @@ public class LuceneTextValueEncoder exte
     }
   }
 
-  private static class LuceneTokenIterable implements Iterable<String> {
+  private static final class LuceneTokenIterable implements Iterable<String> {
     private boolean firstTime = true;
     private final TokenStream tokenStream;
 
@@ -118,7 +118,7 @@ public class LuceneTextValueEncoder exte
     }
   }
 
-  private static class TokenStreamIterator implements Iterator<String> {
+  private static final class TokenStreamIterator implements Iterator<String> {
     private final TokenStream tokenStream;
     private String bufferedToken;
 
@@ -175,7 +175,7 @@ public class LuceneTextValueEncoder exte
     }
   }
 
-  private static class TokenizationException extends RuntimeException {
+  private static final class TokenizationException extends RuntimeException {
     private TokenizationException(String msg, Throwable cause) {
       super(msg, cause);
     }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/TextValueEncoder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/TextValueEncoder.java?rev=1072858&r1=1072857&r2=1072858&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/TextValueEncoder.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/TextValueEncoder.java Mon Feb 21 06:47:02 2011
@@ -98,9 +98,9 @@ public class TextValueEncoder extends Fe
   }
 
   @Override
-  protected Iterable<Integer> hashesForProbe(byte[] originalForm, int dataSize, String name, int probe){
+  protected Iterable<Integer> hashesForProbe(byte[] originalForm, int dataSize, String name, int probe) {
     Collection<Integer> hashes = new ArrayList<Integer>();
-    for (String word : tokenize(new String(originalForm, Charsets.UTF_8))){
+    for (String word : tokenize(new String(originalForm, Charsets.UTF_8))) {
       hashes.add(hashForProbe(bytesForString(word), dataSize, name, probe));
     }
     return hashes;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/WordValueEncoder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/WordValueEncoder.java?rev=1072858&r1=1072857&r2=1072858&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/WordValueEncoder.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/WordValueEncoder.java Mon Feb 21 06:47:02 2011
@@ -46,7 +46,7 @@ public abstract class WordValueEncoder e
     double weight = getWeight(originalForm, w);
     for (int i = 0; i < probes; i++) {
       int n = hashForProbe(originalForm, data.size(), name, i);
-      if(isTraceEnabled()){
+      if (isTraceEnabled()) {
         trace(originalForm, n);        
       }
       data.set(n, data.get(n) + weight);

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestClusterInterface.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestClusterInterface.java?rev=1072858&r1=1072857&r2=1072858&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestClusterInterface.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestClusterInterface.java Mon Feb 21 06:47:02 2011
@@ -51,7 +51,7 @@ public final class TestClusterInterface 
     Vector m = new DenseVector(d);
     Cluster model = new NormalModel(5, m, 0.75);
     String format = model.asFormatString(null);
-    assertEquals("format", "nm{n=0 m=[1.100, 2.200, 3.300] sd=0.75}", format);
+    assertEquals("nm{n=0 m=[1.100, 2.200, 3.300] sd=0.75}", format);
     String json = model.asJsonString();
     GsonBuilder builder = new GsonBuilder();
     builder.registerTypeAdapter(Model.class, new JsonModelAdapter());
@@ -66,7 +66,7 @@ public final class TestClusterInterface 
     Vector m = new DenseVector(d);
     Cluster model = new SampledNormalModel(5, m, 0.75);
     String format = model.asFormatString(null);
-    assertEquals("format", "snm{n=0 m=[1.100, 2.200, 3.300] sd=0.75}", format);
+    assertEquals("snm{n=0 m=[1.100, 2.200, 3.300] sd=0.75}", format);
     String json = model.asJsonString();
     GsonBuilder builder = new GsonBuilder();
     builder.registerTypeAdapter(Model.class, new JsonModelAdapter());
@@ -81,7 +81,7 @@ public final class TestClusterInterface 
     Vector m = new DenseVector(d);
     Cluster model = new AsymmetricSampledNormalModel(5, m, m);
     String format = model.asFormatString(null);
-    assertEquals("format", "asnm{n=0 m=[1.100, 2.200, 3.300] sd=[1.100, 2.200, 3.300]}", format);
+    assertEquals("asnm{n=0 m=[1.100, 2.200, 3.300] sd=[1.100, 2.200, 3.300]}", format);
     String json = model.asJsonString();
     GsonBuilder builder = new GsonBuilder();
     builder.registerTypeAdapter(Model.class, new JsonModelAdapter());
@@ -96,7 +96,7 @@ public final class TestClusterInterface 
     Vector m = new DenseVector(d);
     Cluster model = new L1Model(5, m);
     String format = model.asFormatString(null);
-    assertEquals("format", "l1m{n=0 c=[1.100, 2.200, 3.300]}", format);
+    assertEquals("l1m{n=0 c=[1.100, 2.200, 3.300]}", format);
     String json = model.asJsonString();
     GsonBuilder builder = new GsonBuilder();
     builder.registerTypeAdapter(Model.class, new JsonModelAdapter());
@@ -112,7 +112,7 @@ public final class TestClusterInterface 
     NormalModel model = new NormalModel(5, m, 0.75);
     Cluster cluster = new DirichletCluster(model, 35.0);
     String format = cluster.asFormatString(null);
-    assertEquals("format", "C-5: nm{n=0 m=[1.100, 2.200, 3.300] sd=0.75}", format);
+    assertEquals("C-5: nm{n=0 m=[1.100, 2.200, 3.300] sd=0.75}", format);
   }
 
   @Test
@@ -137,7 +137,7 @@ public final class TestClusterInterface 
     AsymmetricSampledNormalModel model = new AsymmetricSampledNormalModel(5, m, m);
     Cluster cluster = new DirichletCluster(model, 35.0);
     String format = cluster.asFormatString(null);
-    assertEquals("format", "C-5: asnm{n=0 m=[1.100, 2.200, 3.300] sd=[1.100, 2.200, 3.300]}", format);
+    assertEquals("C-5: asnm{n=0 m=[1.100, 2.200, 3.300] sd=[1.100, 2.200, 3.300]}", format);
   }
 
   @Test
@@ -163,7 +163,7 @@ public final class TestClusterInterface 
     L1Model model = new L1Model(5, m);
     Cluster cluster = new DirichletCluster(model, 35.0);
     String format = cluster.asFormatString(null);
-    assertEquals("format", "C-5: l1m{n=0 c=[1.100, 2.200, 3.300]}", format);
+    assertEquals("C-5: l1m{n=0 c=[1.100, 2.200, 3.300]}", format);
   }
 
   @Test
@@ -188,8 +188,7 @@ public final class TestClusterInterface 
     Vector m = new DenseVector(d);
     Cluster cluster = new Canopy(m, 123, measure);
     String formatString = cluster.asFormatString(null);
-    System.out.println(formatString);
-    assertEquals("format", "C-123{n=0 c=[1.100, 2.200, 3.300] r=[]}", formatString);
+    assertEquals("C-123{n=0 c=[1.100, 2.200, 3.300] r=[]}", formatString);
   }
 
   @Test
@@ -199,8 +198,7 @@ public final class TestClusterInterface 
     m.assign(d);
     Cluster cluster = new Canopy(m, 123, measure);
     String formatString = cluster.asFormatString(null);
-    System.out.println(formatString);
-    assertEquals("format", "C-123{n=0 c=[0:1.100, 2:3.300] r=[]}", formatString);
+    assertEquals("C-123{n=0 c=[0:1.100, 2:3.300] r=[]}", formatString);
   }
 
   @Test
@@ -210,8 +208,7 @@ public final class TestClusterInterface 
     Cluster cluster = new Canopy(m, 123, measure);
     String[] bindings = { "fee", null, null };
     String formatString = cluster.asFormatString(bindings);
-    System.out.println(formatString);
-    assertEquals("format", "C-123{n=0 c=[fee:1.100, 1:2.200, 2:3.300] r=[]}", formatString);
+    assertEquals("C-123{n=0 c=[fee:1.100, 1:2.200, 2:3.300] r=[]}", formatString);
   }
 
   @Test
@@ -221,8 +218,7 @@ public final class TestClusterInterface 
     m.assign(d);
     Cluster cluster = new Canopy(m, 123, measure);
     String formatString = cluster.asFormatString(null);
-    System.out.println(formatString);
-    assertEquals("format", "C-123{n=0 c=[0:1.100, 2:3.300] r=[]}", formatString);
+    assertEquals("C-123{n=0 c=[0:1.100, 2:3.300] r=[]}", formatString);
   }
 
   @Test
@@ -231,8 +227,7 @@ public final class TestClusterInterface 
     Vector m = new DenseVector(d);
     Cluster cluster = new org.apache.mahout.clustering.kmeans.Cluster(m, 123, measure);
     String formatString = cluster.asFormatString(null);
-    System.out.println(formatString);
-    assertEquals("format", "CL-123{n=0 c=[1.100, 2.200, 3.300] r=[]}", formatString);
+    assertEquals("CL-123{n=0 c=[1.100, 2.200, 3.300] r=[]}", formatString);
   }
 
   @Test
@@ -242,8 +237,7 @@ public final class TestClusterInterface 
     m.assign(d);
     Cluster cluster = new org.apache.mahout.clustering.kmeans.Cluster(m, 123, measure);
     String formatString = cluster.asFormatString(null);
-    System.out.println(formatString);
-    assertEquals("format", "CL-123{n=0 c=[0:1.100, 2:3.300] r=[]}", formatString);
+    assertEquals("CL-123{n=0 c=[0:1.100, 2:3.300] r=[]}", formatString);
   }
 
   @Test
@@ -253,8 +247,7 @@ public final class TestClusterInterface 
     Cluster cluster = new org.apache.mahout.clustering.kmeans.Cluster(m, 123, measure);
     String[] bindings = { "fee", null, "foo" };
     String formatString = cluster.asFormatString(bindings);
-    System.out.println(formatString);
-    assertEquals("format", "CL-123{n=0 c=[fee:1.100, 1:2.200, foo:3.300] r=[]}", formatString);
+    assertEquals("CL-123{n=0 c=[fee:1.100, 1:2.200, foo:3.300] r=[]}", formatString);
   }
 
   @Test
@@ -264,8 +257,7 @@ public final class TestClusterInterface 
     m.assign(d);
     Cluster cluster = new org.apache.mahout.clustering.kmeans.Cluster(m, 123, measure);
     String formatString = cluster.asFormatString(null);
-    System.out.println(formatString);
-    assertEquals("format", "CL-123{n=0 c=[0:1.100, 2:3.300] r=[]}", formatString);
+    assertEquals("CL-123{n=0 c=[0:1.100, 2:3.300] r=[]}", formatString);
   }
 
   @Test
@@ -274,8 +266,7 @@ public final class TestClusterInterface 
     Vector m = new DenseVector(d);
     Cluster cluster = new MeanShiftCanopy(m, 123, measure);
     String formatString = cluster.asFormatString(null);
-    System.out.println(formatString);
-    assertEquals("format", "MSC-123{n=0 c=[1.100, 2.200, 3.300] r=[]}", formatString);
+    assertEquals("MSC-123{n=0 c=[1.100, 2.200, 3.300] r=[]}", formatString);
   }
 
   @Test
@@ -285,8 +276,7 @@ public final class TestClusterInterface 
     m.assign(d);
     Cluster cluster = new MeanShiftCanopy(m, 123, measure);
     String formatString = cluster.asFormatString(null);
-    System.out.println(formatString);
-    assertEquals("format", "MSC-123{n=0 c=[0:1.100, 2:3.300] r=[]}", formatString);
+    assertEquals("MSC-123{n=0 c=[0:1.100, 2:3.300] r=[]}", formatString);
   }
 
   @Test
@@ -296,8 +286,7 @@ public final class TestClusterInterface 
     Cluster cluster = new MeanShiftCanopy(m, 123, measure);
     String[] bindings = { "fee", null, "foo" };
     String formatString = cluster.asFormatString(bindings);
-    System.out.println(formatString);
-    assertEquals("format", "MSC-123{n=0 c=[fee:1.100, 1:2.200, foo:3.300] r=[]}", formatString);
+    assertEquals("MSC-123{n=0 c=[fee:1.100, 1:2.200, foo:3.300] r=[]}", formatString);
   }
 
   @Test
@@ -308,8 +297,7 @@ public final class TestClusterInterface 
     Cluster cluster = new MeanShiftCanopy(m, 123, measure);
     String[] bindings = { "fee", null, "foo" };
     String formatString = cluster.asFormatString(bindings);
-    System.out.println(formatString);
-    assertEquals("format", "MSC-123{n=0 c=[fee:1.100, foo:3.300] r=[]}", formatString);
+    assertEquals("MSC-123{n=0 c=[fee:1.100, foo:3.300] r=[]}", formatString);
   }
 
 }

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestGaussianAccumulators.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestGaussianAccumulators.java?rev=1072858&r1=1072857&r2=1072858&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestGaussianAccumulators.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestGaussianAccumulators.java Mon Feb 21 06:47:02 2011
@@ -38,7 +38,6 @@ public final class TestGaussianAccumulat
   private int sampleN;
   private Vector sampleMean;
   private Vector sampleStd;
-  private Vector sampleVar;
 
   @Override
   @Before
@@ -54,7 +53,7 @@ public final class TestGaussianAccumulat
     }
     sampleMean = sum.divide(sampleN);
 
-    sampleVar = new DenseVector(2);
+    Vector sampleVar = new DenseVector(2);
     for (VectorWritable v : sampleData) {
       Vector delta = v.get().minus(sampleMean);
       delta.times(delta).addTo(sampleVar);
@@ -124,8 +123,14 @@ public final class TestGaussianAccumulat
       accumulator.observe(vw.get(), 1.0);
     }
     accumulator.compute();
-    log.info("OL Observed {} samples m=[{}, {}] sd=[{}, {}]", new Object[] { accumulator.getN(), accumulator.getMean().get(0),
-        accumulator.getMean().get(1), accumulator.getStd().get(0), accumulator.getStd().get(1) });
+    log.info("OL Observed {} samples m=[{}, {}] sd=[{}, {}]",
+             new Object[] {
+                 accumulator.getN(),
+                 accumulator.getMean().get(0),
+                 accumulator.getMean().get(1),
+                 accumulator.getStd().get(0),
+                 accumulator.getStd().get(1)
+             });
     assertEquals("OL N", sampleN, accumulator.getN(), EPSILON);
     assertEquals("OL Mean", sampleMean.zSum(), accumulator.getMean().zSum(), EPSILON);
     assertEquals("OL Std", sampleStd.zSum(), accumulator.getStd().zSum(), EPSILON);
@@ -138,8 +143,14 @@ public final class TestGaussianAccumulat
       accumulator.observe(vw.get(), 1.0);
     }
     accumulator.compute();
-    log.info("RS Observed {} samples m=[{}, {}] sd=[{}, {}]", new Object[] { (int) accumulator.getN(),
-        accumulator.getMean().get(0), accumulator.getMean().get(1), accumulator.getStd().get(0), accumulator.getStd().get(1) });
+    log.info("RS Observed {} samples m=[{}, {}] sd=[{}, {}]",
+             new Object[] {
+                 (int) accumulator.getN(),
+                 accumulator.getMean().get(0),
+                 accumulator.getMean().get(1),
+                 accumulator.getStd().get(0),
+                 accumulator.getStd().get(1)
+             });
     assertEquals("OL N", sampleN, accumulator.getN(), EPSILON);
     assertEquals("OL Mean", sampleMean.zSum(), accumulator.getMean().zSum(), EPSILON);
     assertEquals("OL Std", sampleStd.zSum(), accumulator.getStd().zSum(), 0.0001);

Propchange: mahout/trunk/examples/
------------------------------------------------------------------------------
--- svn:ignore (original)
+++ svn:ignore Mon Feb 21 06:47:02 2011
@@ -1,15 +1,14 @@
 work
-.classpath
+.pmd
+.ruleset
+.project
+build
 .settings
 input
-build
-output
-target
-temp
-.project
-dist
+.classpath
 *.iml
+temp
+output
 .checkstyle
-.pmd
-.ruleset
 testdata
+dist

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/SplitBayesInput.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/SplitBayesInput.java?rev=1072858&r1=1072857&r2=1072858&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/SplitBayesInput.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/SplitBayesInput.java Mon Feb 21 06:47:02 2011
@@ -262,8 +262,7 @@ public class SplitBayesInput {
   public void splitDirectory(Path inputDir) throws IOException {
     if (fs.getFileStatus(inputDir) == null) {
       throw new IOException(inputDir + " does not exist");
-    }
-    else if (!fs.getFileStatus(inputDir).isDir()) {
+    } else if (!fs.getFileStatus(inputDir).isDir()) {
       throw new IOException(inputDir + " is not a directory");
     }
 
@@ -283,8 +282,7 @@ public class SplitBayesInput {
   public void splitFile(Path inputFile) throws IOException {
     if (fs.getFileStatus(inputFile) == null) {
       throw new IOException(inputFile + " does not exist");
-    }
-    else if (fs.getFileStatus(inputFile).isDir()) {
+    } else if (fs.getFileStatus(inputFile).isDir()) {
       throw new IOException(inputFile + " is a directory");
     }
     
@@ -558,7 +556,8 @@ public class SplitBayesInput {
                                 "%s is not a directory", testOutputDirectory);
   }
   
-  /** Count the lines in the file specified as returned by <code>BufferedReader.readLine()</code>
+  /**
+   * Count the lines in the file specified as returned by <code>BufferedReader.readLine()</code>
    * 
    * @param inputFile 
    *   the file whose lines will be counted

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SimpleCsvExamples.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SimpleCsvExamples.java?rev=1072858&r1=1072857&r2=1072858&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SimpleCsvExamples.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SimpleCsvExamples.java Mon Feb 21 06:47:02 2011
@@ -57,7 +57,7 @@ import java.util.Random;
  * This doesn't demonstrate text encoding which is subject to somewhat different tricks.  The basic
  * idea of caching hash locations and byte level parsing still very much applies to text, however.
  */
-public class SimpleCsvExamples {
+public final class SimpleCsvExamples {
 
   public static final int SEPARATOR_CHAR = '\t';
   public static final String SEPARATOR = "\t";
@@ -137,16 +137,16 @@ public class SimpleCsvExamples {
   }
 
 
-  private static class Line {
-    private static final Splitter onTabs = Splitter.on(SEPARATOR).trimResults();
-    public static final Joiner withCommas = Joiner.on(SEPARATOR);
+  private static final class Line {
+    private static final Splitter ON_TABS = Splitter.on(SEPARATOR).trimResults();
+    public static final Joiner WITH_COMMAS = Joiner.on(SEPARATOR);
 
     public static final Random rand = RandomUtils.getRandom();
 
     private final List<String> data;
 
     private Line(CharSequence line) {
-      data = Lists.newArrayList(onTabs.split(line));
+      data = Lists.newArrayList(ON_TABS.split(line));
     }
 
     private Line() {
@@ -184,7 +184,7 @@ public class SimpleCsvExamples {
 
     @Override
     public String toString() {
-      return withCommas.join(data);
+      return WITH_COMMAS.join(data);
     }
 
     public String get(int field) {
@@ -192,7 +192,7 @@ public class SimpleCsvExamples {
     }
   }
 
-  private static class FastLine {
+  private static final class FastLine {
 
     private final ByteBuffer base;
     private final IntArrayList start = new IntArrayList();
@@ -241,7 +241,8 @@ public class SimpleCsvExamples {
     }
   }
 
-  private static class FastLineReader implements Closeable {
+  private static final
+  class FastLineReader implements Closeable {
     private final InputStream in;
     private final ByteBuffer buf = ByteBuffer.allocate(100000);
 

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainLogistic.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainLogistic.java?rev=1072858&r1=1072857&r2=1072858&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainLogistic.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainLogistic.java Mon Feb 21 06:47:02 2011
@@ -305,7 +305,7 @@ public final class TrainLogistic {
   static BufferedReader open(String inputFile) throws IOException {
     InputStream in;
     try {
-      in= Resources.getResource(inputFile).openStream();
+      in = Resources.getResource(inputFile).openStream();
     } catch (IllegalArgumentException e) {
       in = new FileInputStream(new File(inputFile));
     }

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainNewsGroups.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainNewsGroups.java?rev=1072858&r1=1072857&r2=1072858&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainNewsGroups.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainNewsGroups.java Mon Feb 21 06:47:02 2011
@@ -300,7 +300,7 @@ public final class TrainNewsGroups {
             countWords(analyzer, words, in);
           }
           line = reader.readLine();
-        } while (line.startsWith(" "));
+        } while (line != null && line.startsWith(" "));
       }
       if (leakType < 3) {
         countWords(analyzer, words, reader);

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java?rev=1072858&r1=1072857&r2=1072858&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java Mon Feb 21 06:47:02 2011
@@ -69,7 +69,8 @@ class DisplayFuzzyKMeans extends Display
                             true,
                             threshold,
                             true);
-      loadClusters(output);
+
+    loadClusters(output);
     //} else {
     //  List<Vector> points = new ArrayList<Vector>();
     //  for (VectorWritable sample : SAMPLE_DATA) {

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/minhash/LastfmClusterEvaluator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/minhash/LastfmClusterEvaluator.java?rev=1072858&r1=1072857&r2=1072858&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/minhash/LastfmClusterEvaluator.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/minhash/LastfmClusterEvaluator.java Mon Feb 21 06:47:02 2011
@@ -133,7 +133,7 @@ public final class LastfmClusterEvaluato
     System.out.println("\nTest Results");
     System.out.println("=============");
     System.out.println(" (A) Listeners in same cluster with simiarity above threshold ("
- + threshold + ") : " + similarListeners);
+                           + threshold + ") : " + similarListeners);
     System.out.println(" (B) All listeners: " + allListeners);
     NumberFormat format = NumberFormat.getInstance();
     format.setMaximumFractionDigits(2);

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java?rev=1072858&r1=1072857&r2=1072858&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java Mon Feb 21 06:47:02 2011
@@ -148,8 +148,7 @@ public final class Job extends AbstractJ
                   double alpha0,
                   boolean emitMostLikely,
                   double threshold)
-    throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException,
-           SecurityException, InterruptedException {
+    throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException, InterruptedException {
     Path directoryContainingConvertedInput = new Path(output, DIRECTORY_CONTAINING_CONVERTED_INPUT);
     InputDriver.runJob(input, directoryContainingConvertedInput, "org.apache.mahout.math.RandomAccessSparseVector");
     DirichletDriver.run(directoryContainingConvertedInput,

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java?rev=1072858&r1=1072857&r2=1072858&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java Mon Feb 21 06:47:02 2011
@@ -135,7 +135,7 @@ public final class Job extends AbstractJ
     log.info("Preparing Input");
     InputDriver.runJob(input, directoryContainingConvertedInput, "org.apache.mahout.math.RandomAccessSparseVector");
     log.info("Running random seed to get initial clusters");
-    Path clusters= new Path(output, Cluster.INITIAL_CLUSTERS_DIR);
+    Path clusters = new Path(output, Cluster.INITIAL_CLUSTERS_DIR);
     clusters = RandomSeedGenerator.buildRandom(directoryContainingConvertedInput, clusters, k, measure);
     log.info("Running KMeans");
     KMeansDriver.run(conf,

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosTool.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosTool.java?rev=1072858&r1=1072857&r2=1072858&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosTool.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosTool.java Mon Feb 21 06:47:02 2011
@@ -180,8 +180,10 @@ public final class CDInfosTool {
     return new Descriptors(desc);
   }
 
-  private static void storeDescriptions(FileSystem fs, Path inpath, Descriptors descriptors, List<String> descriptions)
-      throws IOException {
+  private static void storeDescriptions(FileSystem fs,
+                                        Path inpath,
+                                        Descriptors descriptors,
+                                        List<String> descriptions) throws IOException {
     // TODO should become part of FileInfoParser
 
     Path infpath = FileInfoParser.getInfoFile(fs, inpath);

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractMatrix.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractMatrix.java?rev=1072858&r1=1072857&r2=1072858&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractMatrix.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractMatrix.java Mon Feb 21 06:47:02 2011
@@ -608,7 +608,7 @@ public abstract class AbstractMatrix imp
 
     public Iterator<Element> iterator() {
       return new Iterator<Element>() {
-        private int i = 0;
+        private int i;
         public boolean hasNext() {
           return i < size();
         }

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractVector.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractVector.java?rev=1072858&r1=1072857&r2=1072858&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractVector.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractVector.java Mon Feb 21 06:47:02 2011
@@ -163,7 +163,7 @@ public abstract class AbstractVector imp
   }
   
   public Vector logNormalize() {
-      return logNormalize(2, Math.sqrt(dotSelf()));
+    return logNormalize(2.0, Math.sqrt(dotSelf()));
   }
   
   public Vector logNormalize(double power) {
@@ -238,7 +238,7 @@ public abstract class AbstractVector imp
       throw new CardinalityException(size, v.size());
     }
     // if this and v has a cached lengthSquared, dot product is quickest way to compute this.
-    if(lengthSquared >= 0 && v instanceof AbstractVector && ((AbstractVector)v).lengthSquared >= 0) {
+    if (lengthSquared >= 0 && v instanceof AbstractVector && ((AbstractVector)v).lengthSquared >= 0) {
       return lengthSquared + v.getLengthSquared() - 2 * this.dot(v);
     }
     Vector randomlyAccessed;

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/OrthonormalityVerifier.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/OrthonormalityVerifier.java?rev=1072858&r1=1072857&r2=1072858&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/OrthonormalityVerifier.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/OrthonormalityVerifier.java Mon Feb 21 06:47:02 2011
@@ -27,7 +27,7 @@ public final class OrthonormalityVerifie
 
   public static VectorIterable pairwiseInnerProducts(Iterable<MatrixSlice> basis) {
     DenseMatrix out = null;
-    for(MatrixSlice slice1 : basis) {
+    for (MatrixSlice slice1 : basis) {
       List<Double> dots = new ArrayList<Double>();
       for (MatrixSlice slice2 : basis) {
         dots.add(slice1.vector().dot(slice2.vector()));

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/SingularValueDecomposition.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/SingularValueDecomposition.java?rev=1072858&r1=1072857&r2=1072858&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/SingularValueDecomposition.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/SingularValueDecomposition.java Mon Feb 21 06:47:02 2011
@@ -22,7 +22,7 @@ public class SingularValueDecomposition 
   private final int n;
   
   /**To handle the case where numRows() < numCols() and to use the fact that SVD(A')=VSU'=> SVD(A')'=SVD(A)**/
-  private boolean transpositionNeeded; 
+  private boolean transpositionNeeded = false;
   
   /**
    * Constructs and returns a new singular value decomposition object; The

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/VectorList.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/VectorList.java?rev=1072858&r1=1072857&r2=1072858&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/VectorList.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/VectorList.java Mon Feb 21 06:47:02 2011
@@ -24,7 +24,6 @@ import com.google.gson.JsonDeserializati
 import com.google.gson.JsonDeserializer;
 import com.google.gson.JsonElement;
 import com.google.gson.JsonObject;
-import com.google.gson.JsonParseException;
 import com.google.gson.reflect.TypeToken;
 
 import java.lang.reflect.Type;
@@ -269,8 +268,7 @@ public class VectorList extends Abstract
     private final Type collectionType = new TypeToken<List<Vector>>(){}.getType();
     private final Type labelType = new TypeToken<Map<String, Integer>>(){}.getType();
 
-    public VectorList deserialize(JsonElement json, Type typeOfT, JsonDeserializationContext context)
-      throws JsonParseException {
+    public VectorList deserialize(JsonElement json, Type typeOfT, JsonDeserializationContext context) {
       JsonObject jo = json.getAsJsonObject();
       VectorList r = new VectorList(jo.get("columns").getAsInt());
       r.data = context.deserialize(jo.get("data"), collectionType);



Mime
View raw message