mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sro...@apache.org
Subject svn commit: r991909 [1/2] - in /mahout/trunk: core/src/main/java/org/apache/mahout/classifier/ core/src/main/java/org/apache/mahout/classifier/bayes/datastore/ core/src/main/java/org/apache/mahout/classifier/evaluation/ core/src/main/java/org/apache/ma...
Date Thu, 02 Sep 2010 12:28:05 GMT
Author: srowen
Date: Thu Sep  2 12:28:03 2010
New Revision: 991909

URL: http://svn.apache.org/viewvc?rev=991909&view=rev
Log:
Delete some unused math code. More style stuff. Perhaps make a few tests deterministic.

Removed:
    mahout/trunk/math/src/main/java/org/apache/mahout/math/Partitioning.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/function/Double27Function.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/function/Double5Function.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/function/IntIntIntProcedure.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/math/IntFunctions.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/engine/DRand.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/engine/RandomGenerator.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleFactory3D.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleMatrix3D.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleMatrix3DProcedure.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/doublealgo/
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/AbstractFormatter.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/DenseDoubleMatrix3D.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/Former.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/FormerFactory.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/RCDoubleMatrix2D.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/SelectedDenseDoubleMatrix3D.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/SelectedSparseDoubleMatrix3D.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/SparseDoubleMatrix3D.java
    mahout/trunk/math/src/test/java/org/apache/mahout/math/AIOOBInSortingTest.java
Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/HBaseBayesDatastore.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/InMemoryBayesDatastore.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/evaluation/Auc.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java
    mahout/trunk/core/src/main/java/org/apache/mahout/ep/State.java
    mahout/trunk/core/src/main/java/org/apache/mahout/ep/ThreadedEvolutionaryProcess.java
    mahout/trunk/core/src/main/java/org/apache/mahout/vectors/ConstantValueEncoder.java
    mahout/trunk/core/src/main/java/org/apache/mahout/vectors/TextValueEncoder.java
    mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticRegressionTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sgd/CsvRecordFactoryTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sgd/ModelSerializerTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/ep/EvolutionaryProcessTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/ep/ThreadedEvolutionaryProcessTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/vectors/InteractionValueEncoderTest.java
    mahout/trunk/etc/findbugs-exclude.xml
    mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorMapper.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayClustering.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDRule.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/GenericPermuting.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/math/Bessel.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/NegativeBinomial.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleFactory2D.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleMatrix1D.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleMatrix2D.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/AbstractMatrix1D.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/AbstractMatrix2D.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/AbstractMatrix3D.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/DenseDoubleMatrix2D.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/RCMDoubleMatrix2D.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/SparseDoubleMatrix2D.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/TridiagonalDoubleMatrix2D.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/LUDecompositionQuick.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/Property.java
    mahout/trunk/math/src/test/java/org/apache/mahout/math/TestSingularValueDecomposition.java
    mahout/trunk/math/src/test/java/org/apache/mahout/math/jet/random/NegativeBinomialTest.java
    mahout/trunk/math/src/test/java/org/apache/mahout/math/jet/random/NormalTest.java
    mahout/trunk/maven/src/main/resources/findbugs-exclude.xml
    mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
    mahout/trunk/utils/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/CollocMapperTest.java

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java?rev=991909&r1=991908&r2=991909&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java Thu Sep  2 12:28:03 2010
@@ -119,12 +119,12 @@ public final class Classify {
     String classifierType = (String) cmdLine.getValue(typeOpt);
     
     String dataSource = (String) cmdLine.getValue(dataSourceOpt);
-    if (dataSource.equals("hdfs")) {
-      if (classifierType.equalsIgnoreCase("bayes")) {
+    if ("hdfs".equals(dataSource)) {
+      if ("bayes".equalsIgnoreCase(classifierType)) {
         log.info("Using Bayes Classifier");
         algorithm = new BayesAlgorithm();
         datastore = new InMemoryBayesDatastore(params);
-      } else if (classifierType.equalsIgnoreCase("cbayes")) {
+      } else if ("cbayes".equalsIgnoreCase(classifierType)) {
         log.info("Using Complementary Bayes Classifier");
         algorithm = new CBayesAlgorithm();
         datastore = new InMemoryBayesDatastore(params);
@@ -132,12 +132,12 @@ public final class Classify {
         throw new IllegalArgumentException("Unrecognized classifier type: " + classifierType);
       }
       
-    } else if (dataSource.equals("hbase")) {
-      if (classifierType.equalsIgnoreCase("bayes")) {
+    } else if ("hbase".equals(dataSource)) {
+      if ("bayes".equalsIgnoreCase(classifierType)) {
         log.info("Using Bayes Classifier");
         algorithm = new BayesAlgorithm();
         datastore = new HBaseBayesDatastore(modelBasePath, params);
-      } else if (classifierType.equalsIgnoreCase("cbayes")) {
+      } else if ("cbayes".equalsIgnoreCase(classifierType)) {
         log.info("Using Complementary Bayes Classifier");
         algorithm = new CBayesAlgorithm();
         datastore = new HBaseBayesDatastore(modelBasePath, params);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/HBaseBayesDatastore.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/HBaseBayesDatastore.java?rev=991909&r1=991908&r2=991909&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/HBaseBayesDatastore.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/HBaseBayesDatastore.java Thu Sep  2 12:28:03 2010
@@ -143,9 +143,9 @@ public class HBaseBayesDatastore impleme
       return keys.get(name);
     }
     Result r;
-    if (name.equals("labelWeight")) {
+    if ("labelWeight".equals(name)) {
       r = getRowFromHbase(BayesConstants.LABEL_SUM);
-    } else if (name.equals("thetaNormalizer")) {
+    } else if ("thetaNormalizer".equals(name)) {
       r = getRowFromHbase(BayesConstants.LABEL_THETA_NORMALIZER);
     } else {
       r = getRowFromHbase(name);
@@ -169,7 +169,7 @@ public class HBaseBayesDatastore impleme
   @Override
   public double getWeight(String matrixName, String row, String column) throws InvalidDatastoreException {
     if ("weight".equals(matrixName)) {
-      if (column.equals("sigma_j")) {
+      if ("sigma_j".equals(column)) {
         return getSigmaJFromHbase(row);
       } else {
         return getWeightFromHbase(row, column);
@@ -181,21 +181,21 @@ public class HBaseBayesDatastore impleme
   
   @Override
   public double getWeight(String vectorName, String index) throws InvalidDatastoreException {
-    if (vectorName.equals("sumWeight")) {
-      if (index.equals("vocabCount")) {
+    if ("sumWeight".equals(vectorName)) {
+      if ("vocabCount".equals(index)) {
         return getVocabCountFromHbase();
-      } else if (index.equals("sigma_jSigma_k")) {
+      } else if ("sigma_jSigma_k".equals(index)) {
         return getSigmaJSigmaKFromHbase();
       } else {
         throw new InvalidDatastoreException();
       }
       
-    } else if (vectorName.equals("labelWeight")) {
+    } else if ("labelWeight".equals(vectorName)) {
       return getWeightFromHbase(BayesConstants.LABEL_SUM, index);
-    } else if (vectorName.equals("thetaNormalizer")) {
+    } else if ("thetaNormalizer".equals(vectorName)) {
       return getWeightFromHbase(BayesConstants.LABEL_THETA_NORMALIZER, index) / thetaNormalizer;
-    } else if (vectorName.equals("params")) {
-      if (index.equals("alpha_i")) {
+    } else if ("params".equals(vectorName)) {
+      if ("alpha_i".equals(index)) {
         return alphaI;
       } else {
         throw new InvalidDatastoreException();

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/InMemoryBayesDatastore.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/InMemoryBayesDatastore.java?rev=991909&r1=991908&r2=991909&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/InMemoryBayesDatastore.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/InMemoryBayesDatastore.java Thu Sep  2 12:28:03 2010
@@ -83,10 +83,10 @@ public class InMemoryBayesDatastore impl
     }
     for (String label : getKeys("")) {
       log.info("{} {} {} {}", new Object[] {
-       label,
-       thetaNormalizerPerLabel.get(getLabelID(label)),
-       thetaNormalizer,
-       thetaNormalizerPerLabel.get(getLabelID(label)) / thetaNormalizer
+        label,
+        thetaNormalizerPerLabel.get(getLabelID(label)),
+        thetaNormalizer,
+        thetaNormalizerPerLabel.get(getLabelID(label)) / thetaNormalizer
       });
     }
   }
@@ -98,8 +98,8 @@ public class InMemoryBayesDatastore impl
   
   @Override
   public double getWeight(String matrixName, String row, String column) throws InvalidDatastoreException {
-    if (matrixName.equals("weight")) {
-      if (column.equals("sigma_j")) {
+    if ("weight".equals(matrixName)) {
+      if ("sigma_j".equals(column)) {
         return sigmaJ.get(getFeatureID(row));
       } else {
         return weightMatrix.getQuick(getFeatureID(row), getLabelID(column));
@@ -111,23 +111,23 @@ public class InMemoryBayesDatastore impl
   
   @Override
   public double getWeight(String vectorName, String index) throws InvalidDatastoreException {
-    if (vectorName.equals("sumWeight")) {
-      if (index.equals("sigma_jSigma_k")) {
+    if ("sumWeight".equals(vectorName)) {
+      if ("sigma_jSigma_k".equals(index)) {
         return sigmaJsigmaK;
-      } else if (index.equals("vocabCount")) {
+      } else if ("vocabCount".equals(index)) {
         return featureDictionary.size();
       } else {
         throw new InvalidDatastoreException();
       }
-    } else if (vectorName.equals("thetaNormalizer")) {
+    } else if ("thetaNormalizer".equals(vectorName)) {
       return thetaNormalizerPerLabel.get(getLabelID(index)) / thetaNormalizer;
-    } else if (vectorName.equals("params")) {
-      if (index.equals("alpha_i")) {
+    } else if ("params".equals(vectorName)) {
+      if ("alpha_i".equals(index)) {
         return alphaI;
       } else {
         throw new InvalidDatastoreException();
       }
-    } else if (vectorName.equals("labelWeight")) {
+    } else if ("labelWeight".equals(vectorName)) {
       return sigmaK.get(getLabelID(index));
     } else {
       throw new InvalidDatastoreException();

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/evaluation/Auc.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/evaluation/Auc.java?rev=991909&r1=991908&r2=991909&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/evaluation/Auc.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/evaluation/Auc.java Thu Sep  2 12:28:03 2010
@@ -17,6 +17,7 @@
 
 package org.apache.mahout.classifier.evaluation;
 
+import org.apache.mahout.common.RandomUtils;
 import org.apache.mahout.math.DenseMatrix;
 import org.apache.mahout.math.Matrix;
 import org.apache.mahout.math.list.DoubleArrayList;
@@ -33,9 +34,7 @@ import java.util.Random;
  */
 public class Auc {
   private int maxBufferSize = 10000;
-  private DoubleArrayList[] scores = new DoubleArrayList[]{
-          new DoubleArrayList(), new DoubleArrayList()
-  };
+  private final DoubleArrayList[] scores = {new DoubleArrayList(), new DoubleArrayList()};
   private Random rand;
   private int samples = 0;
   private double threshold;
@@ -44,7 +43,7 @@ public class Auc {
 
   private boolean probabilityScore = true;
 
-  private boolean hasScore = false;
+  private boolean hasScore;
 
   // exposed for testing only
 
@@ -61,7 +60,7 @@ public class Auc {
    * @param threshold The threshold to use in computing the confusion matrix.
    */
   public Auc(double threshold) {
-    this(new Random());
+    this(RandomUtils.getRandom());
     this.threshold = threshold;
   }
 
@@ -85,7 +84,7 @@ public class Auc {
     int predictedClass = (score > threshold) ? 1 : 0;
     confusion.set(trueValue, predictedClass, confusion.get(trueValue, predictedClass) + 1);
     if (isProbabilityScore()) {
-      double limited = Math.max(1e-20, Math.min(score, 1 - 1e-20));
+      double limited = Math.max(1.0e-20, Math.min(score, 1 - 1.0e-20));
       entropy.set(trueValue, 0, Math.log(1 - limited));
       entropy.set(trueValue, 1, Math.log(limited));
     }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java?rev=991909&r1=991908&r2=991909&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java Thu Sep  2 12:28:03 2010
@@ -19,6 +19,7 @@ package org.apache.mahout.clustering.fuz
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.List;
 
 import org.apache.hadoop.conf.Configuration;
@@ -30,7 +31,7 @@ import org.apache.hadoop.fs.PathFilter;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
@@ -90,9 +91,6 @@ public class FuzzyKMeansDriver extends A
    * @param threshold 
    *          a double threshold value emits all clusters having greater pdf (emitMostLikely = false)
    * @param runSequential if true run in sequential execution mode
-   * @throws IOException 
-   * @throws IllegalAccessException 
-   * @throws InstantiationException 
    */
   public static void runJob(Path input,
                             Path clustersIn,
@@ -105,21 +103,21 @@ public class FuzzyKMeansDriver extends A
                             boolean runClustering,
                             boolean emitMostLikely,
                             double threshold,
-                            boolean runSequential) throws IOException, ClassNotFoundException, InterruptedException,
-      InstantiationException, IllegalAccessException {
+                            boolean runSequential)
+    throws IOException, ClassNotFoundException, InterruptedException, InstantiationException, IllegalAccessException {
 
-    new FuzzyKMeansDriver().job(input,
-                                clustersIn,
-                                output,
-                                measure,
-                                convergenceDelta,
-                                maxIterations,
-                                numReduceTasks,
-                                m,
-                                runClustering,
-                                emitMostLikely,
-                                threshold,
-                                runSequential);
+    job(input,
+        clustersIn,
+        output,
+        measure,
+        convergenceDelta,
+        maxIterations,
+        numReduceTasks,
+        m,
+        runClustering,
+        emitMostLikely,
+        threshold,
+        runSequential);
   }
 
   @Override
@@ -130,7 +128,8 @@ public class FuzzyKMeansDriver extends A
     addOption(DefaultOptionCreator.distanceMeasureOption().create());
     addOption(DefaultOptionCreator.clustersInOption()
         .withDescription("The input centroids, as Vectors.  Must be a SequenceFile of Writable, Cluster/Canopy.  "
-            + "If k is also specified, then a random set of vectors will be selected" + " and written out to this path first")
+            + "If k is also specified, then a random set of vectors will be selected"
+            + " and written out to this path first")
         .create());
     addOption(DefaultOptionCreator.numClustersOption()
         .withDescription("The k in k-Means.  If specified, then a random selection of k Vectors will be chosen"
@@ -175,7 +174,8 @@ public class FuzzyKMeansDriver extends A
           .get(DefaultOptionCreator.NUM_CLUSTERS_OPTION)), measure);
     }
     boolean runClustering = hasOption(DefaultOptionCreator.CLUSTERING_OPTION);
-    boolean runSequential = (getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(DefaultOptionCreator.SEQUENTIAL_METHOD));
+    boolean runSequential = (getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(
+        DefaultOptionCreator.SEQUENTIAL_METHOD));
     job(input,
         clusters,
         output,
@@ -204,22 +204,19 @@ public class FuzzyKMeansDriver extends A
    *          the classname of the DistanceMeasure
    * @param convergenceDelta
    *          the convergence delta value
-   * @param iterationNumber
-   *          the iteration number that is going to run
    * @param m
    *          the fuzzification factor - see
    *          http://en.wikipedia.org/wiki/Data_clustering#Fuzzy_c-means_clustering
    * @return true if the iteration successfully runs
    * @throws IOException 
    */
-  private boolean runIteration(Path input,
-                               Path clustersIn,
-                               Path clustersOut,
-                               String measureClass,
-                               double convergenceDelta,
-                               int numReduceTasks,
-                               int iterationNumber,
-                               float m) throws IOException {
+  private static boolean runIteration(Path input,
+                                      Path clustersIn,
+                                      Path clustersOut,
+                                      String measureClass,
+                                      double convergenceDelta,
+                                      int numReduceTasks,
+                                      float m) throws IOException {
 
     Configuration conf = new Configuration();
     conf.set(FuzzyKMeansConfigKeys.CLUSTER_PATH_KEY, clustersIn.toString());
@@ -273,8 +270,6 @@ public class FuzzyKMeansDriver extends A
    *          the directory pathname for initial & computed clusters
    * @param output
    *          the directory pathname for output points
-   * @param measureClass
-   *          the classname of the DistanceMeasure
    * @param convergenceDelta
    *          the convergence delta value
    * @param maxIterations
@@ -291,25 +286,20 @@ public class FuzzyKMeansDriver extends A
    * @param threshold 
    *          a double threshold value emits all clusters having greater pdf (emitMostLikely = false)
    * @param runSequential if true run in sequential execution mode
-   * @throws IOException
-   * @throws ClassNotFoundException
-   * @throws InterruptedException
-   * @throws IllegalAccessException 
-   * @throws InstantiationException 
    */
-  public void job(Path input,
-                  Path clustersIn,
-                  Path output,
-                  DistanceMeasure measure,
-                  double convergenceDelta,
-                  int maxIterations,
-                  int numReduceTasks,
-                  float m,
-                  boolean runClustering,
-                  boolean emitMostLikely,
-                  double threshold,
-                  boolean runSequential) throws IOException, ClassNotFoundException, InterruptedException, InstantiationException,
-      IllegalAccessException {
+  public static void job(Path input,
+                         Path clustersIn,
+                         Path output,
+                         DistanceMeasure measure,
+                         double convergenceDelta,
+                         int maxIterations,
+                         int numReduceTasks,
+                         float m,
+                         boolean runClustering,
+                         boolean emitMostLikely,
+                         double threshold,
+                         boolean runSequential)
+    throws IOException, ClassNotFoundException, InterruptedException, InstantiationException, IllegalAccessException {
     Path clustersOut = buildClusters(input,
                                      clustersIn,
                                      output,
@@ -355,35 +345,31 @@ public class FuzzyKMeansDriver extends A
    *          http://en.wikipedia.org/wiki/Data_clustering#Fuzzy_c-means_clustering
    * @param runSequential if true run in sequential execution mode
    * @return the Path of the final clusters directory
-   * @throws IOException
-   * @throws IllegalAccessException 
-   * @throws InstantiationException 
    */
-  public Path buildClusters(Path input,
-                            Path clustersIn,
-                            Path output,
-                            DistanceMeasure measure,
-                            double convergenceDelta,
-                            int maxIterations,
-                            int numReduceTasks,
-                            float m,
-                            boolean runSequential) throws IOException, InstantiationException, IllegalAccessException {
+  public static Path buildClusters(Path input,
+                                   Path clustersIn,
+                                   Path output,
+                                   DistanceMeasure measure,
+                                   double convergenceDelta,
+                                   int maxIterations,
+                                   int numReduceTasks,
+                                   float m,
+                                   boolean runSequential)
+    throws IOException, InstantiationException, IllegalAccessException {
     if (runSequential) {
-      return buildClustersSeq(input, clustersIn, output, measure, convergenceDelta, maxIterations, numReduceTasks, m);
-
+      return buildClustersSeq(input, clustersIn, output, measure, convergenceDelta, maxIterations, m);
     } else {
       return buildClustersMR(input, clustersIn, output, measure, convergenceDelta, maxIterations, numReduceTasks, m);
     }
   }
 
-  private Path buildClustersSeq(Path input,
-                                Path clustersIn,
-                                Path output,
-                                DistanceMeasure measure,
-                                double convergenceDelta,
-                                int maxIterations,
-                                int numReduceTasks,
-                                float m) throws IOException, InstantiationException, IllegalAccessException {
+  private static Path buildClustersSeq(Path input,
+                                      Path clustersIn,
+                                      Path output,
+                                      DistanceMeasure measure,
+                                      double convergenceDelta,
+                                      int maxIterations,
+                                      float m) throws IOException, InstantiationException, IllegalAccessException {
     FuzzyKMeansClusterer clusterer = new FuzzyKMeansClusterer(measure, convergenceDelta, m);
     List<SoftCluster> clusters = new ArrayList<SoftCluster>();
 
@@ -401,7 +387,7 @@ public class FuzzyKMeansDriver extends A
       for (FileStatus s : status) {
         SequenceFile.Reader reader = new SequenceFile.Reader(fs, s.getPath(), conf);
         try {
-          WritableComparable<?> key = (WritableComparable<?>) reader.getKeyClass().newInstance();
+          Writable key = reader.getKeyClass().asSubclass(Writable.class).newInstance();
           VectorWritable vw = (VectorWritable) reader.getValueClass().newInstance();
           while (reader.next(key, vw)) {
             clusterer.addPointToClusters(clusters, vw.get());
@@ -420,9 +406,12 @@ public class FuzzyKMeansDriver extends A
                                                            SoftCluster.class);
       try {
         for (SoftCluster cluster : clusters) {
-          log.info("Writing Cluster:" + cluster.getId() + " center:" + AbstractCluster.formatVector(cluster.getCenter(), null)
-              + " numPoints:" + cluster.getNumPoints() + " radius:" + AbstractCluster.formatVector(cluster.getRadius(), null)
-              + " to: " + clustersOut.getName());
+          log.info("Writing Cluster:{} center:{} numPoints:{} radius:{} to: {}",
+                   new Object[] {cluster.getId(),
+                                 AbstractCluster.formatVector(cluster.getCenter(), null),
+                                 cluster.getNumPoints(),
+                                 AbstractCluster.formatVector(cluster.getRadius(), null),
+                                 clustersOut.getName()});
           writer.append(new Text(cluster.getIdentifier()), cluster);
         }
       } finally {
@@ -434,14 +423,14 @@ public class FuzzyKMeansDriver extends A
     return clustersIn;
   }
 
-  private Path buildClustersMR(Path input,
-                               Path clustersIn,
-                               Path output,
-                               DistanceMeasure measure,
-                               double convergenceDelta,
-                               int maxIterations,
-                               int numReduceTasks,
-                               float m) throws IOException {
+  private static Path buildClustersMR(Path input,
+                                      Path clustersIn,
+                                      Path output,
+                                      DistanceMeasure measure,
+                                      double convergenceDelta,
+                                      int maxIterations,
+                                      int numReduceTasks,
+                                      float m) throws IOException {
     boolean converged = false;
     int iteration = 1;
 
@@ -457,7 +446,6 @@ public class FuzzyKMeansDriver extends A
                                measure.getClass().getName(),
                                convergenceDelta,
                                numReduceTasks,
-                               iteration,
                                m);
 
       // now point the input to the old output directory
@@ -482,38 +470,33 @@ public class FuzzyKMeansDriver extends A
    *          the convergence delta value
    * @param emitMostLikely
    *          a boolean if true emit only most likely cluster for each point
-   * @param threshold 
+   * @param threshold
    *          a double threshold value emits all clusters having greater pdf (emitMostLikely = false)
    * @param runSequential if true run in sequential execution mode
-   * @throws IOException 
-   * @throws IllegalAccessException 
-   * @throws InstantiationException 
    */
-  public void clusterData(Path input,
-                          Path clustersIn,
-                          Path output,
-                          DistanceMeasure measure,
-                          double convergenceDelta,
-                          float m,
-                          boolean emitMostLikely,
-                          double threshold,
-                          boolean runSequential) throws IOException, ClassNotFoundException, InterruptedException,
-      InstantiationException, IllegalAccessException {
+  public static void clusterData(Path input,
+                                 Path clustersIn,
+                                 Path output,
+                                 DistanceMeasure measure,
+                                 double convergenceDelta,
+                                 float m,
+                                 boolean emitMostLikely,
+                                 double threshold,
+                                 boolean runSequential) 
+    throws IOException, ClassNotFoundException, InterruptedException, InstantiationException, IllegalAccessException {
     if (runSequential) {
-      clusterDataSeq(input, clustersIn, output, measure, convergenceDelta, m, emitMostLikely, threshold);
+      clusterDataSeq(input, clustersIn, output, measure, convergenceDelta, m);
     } else {
       clusterDataMR(input, clustersIn, output, measure, convergenceDelta, m, emitMostLikely, threshold);
     }
   }
 
-  private void clusterDataSeq(Path input,
+  private static void clusterDataSeq(Path input,
                               Path clustersIn,
                               Path output,
                               DistanceMeasure measure,
                               double convergenceDelta,
-                              float m,
-                              boolean emitMostLikely,
-                              double threshold) throws IOException, InterruptedException, InstantiationException,
+                              float m) throws IOException, InstantiationException,
       IllegalAccessException {
     FuzzyKMeansClusterer clusterer = new FuzzyKMeansClusterer(measure, convergenceDelta, m);
     List<SoftCluster> clusters = new ArrayList<SoftCluster>();
@@ -533,7 +516,7 @@ public class FuzzyKMeansDriver extends A
                                                            IntWritable.class,
                                                            WeightedVectorWritable.class);
       try {
-        WritableComparable<?> key = (WritableComparable<?>) reader.getKeyClass().newInstance();
+        Writable key = reader.getKeyClass().asSubclass(Writable.class).newInstance();
         VectorWritable vw = (VectorWritable) reader.getValueClass().newInstance();
         while (reader.next(key, vw)) {
           clusterer.emitPointToClusters(vw, clusters, writer);
@@ -547,14 +530,15 @@ public class FuzzyKMeansDriver extends A
 
   }
 
-  private void clusterDataMR(Path input,
-                             Path clustersIn,
-                             Path output,
-                             DistanceMeasure measure,
-                             double convergenceDelta,
-                             float m,
-                             boolean emitMostLikely,
-                             double threshold) throws IOException, InterruptedException, ClassNotFoundException {
+  private static void clusterDataMR(Path input,
+                                    Path clustersIn,
+                                    Path output,
+                                    DistanceMeasure measure,
+                                    double convergenceDelta,
+                                    float m,
+                                    boolean emitMostLikely,
+                                    double threshold)
+    throws IOException, InterruptedException, ClassNotFoundException {
     Configuration conf = new Configuration();
     conf.set(FuzzyKMeansConfigKeys.CLUSTER_PATH_KEY, clustersIn.toString());
     conf.set(FuzzyKMeansConfigKeys.DISTANCE_MEASURE_KEY, measure.getClass().getName());
@@ -596,10 +580,10 @@ public class FuzzyKMeansDriver extends A
    * @throws IOException
    *           if there was an IO error
    */
-  private boolean isConverged(Path filePath, Configuration conf, FileSystem fs) throws IOException {
+  private static boolean isConverged(Path filePath, Configuration conf, FileSystem fs) throws IOException {
 
     Path clusterPath = new Path(filePath, "*");
-    List<Path> result = new ArrayList<Path>();
+    Collection<Path> result = new ArrayList<Path>();
 
     PathFilter clusterFileFilter = new PathFilter() {
       @Override
@@ -608,7 +592,8 @@ public class FuzzyKMeansDriver extends A
       }
     };
 
-    FileStatus[] matches = fs.listStatus(FileUtil.stat2Paths(fs.globStatus(clusterPath, clusterFileFilter)), clusterFileFilter);
+    FileStatus[] matches =
+        fs.listStatus(FileUtil.stat2Paths(fs.globStatus(clusterPath, clusterFileFilter)), clusterFileFilter);
 
     for (FileStatus match : matches) {
       result.add(fs.makeQualified(match.getPath()));
@@ -625,7 +610,7 @@ public class FuzzyKMeansDriver extends A
          * new KeyValueLineRecordReader(conf, new FileSplit(p, 0, fs .getFileStatus(p).getLen(), (String[])
          * null));
          */
-        Text key = new Text();
+        Writable key = new Text();
         SoftCluster value = new SoftCluster();
         while (converged && reader.next(key, value)) {
           converged = value.isConverged();

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java?rev=991909&r1=991908&r2=991909&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java Thu Sep  2 12:28:03 2010
@@ -96,7 +96,7 @@ public class MeanShiftCanopyDriver exten
                             boolean inputIsCanopies,
                             boolean runClustering,
                             boolean runSequential)
-      throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException {
+    throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException {
     new MeanShiftCanopyDriver().job(input,
                                     output,
                                     measure,
@@ -177,7 +177,7 @@ public class MeanShiftCanopyDriver exten
    * @param convergenceDelta
    *          the double convergence criteria
    */
-  private void runIteration(Path input,
+  private static void runIteration(Path input,
                                    Path output,
                                    Path control,
                                    String measureClassName,
@@ -244,7 +244,7 @@ public class MeanShiftCanopyDriver exten
                   boolean inputIsCanopies,
                   boolean runClustering,
                   boolean runSequential)
-      throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException {
+    throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException {
     Path clustersIn = new Path(output, Cluster.INITIAL_CLUSTERS_DIR);
     if (inputIsCanopies) {
       clustersIn = input;
@@ -258,13 +258,12 @@ public class MeanShiftCanopyDriver exten
       clusterData(inputIsCanopies ? input : new Path(output, Cluster.INITIAL_CLUSTERS_DIR),
                   clustersOut,
                   new Path(output, Cluster.CLUSTERED_POINTS_DIR),
-                  runSequential,
-                  measure);
+                  runSequential);
     }
   }
 
-  public void createCanopyFromVectors(Path input, Path output, DistanceMeasure measure, boolean runSequential)
-      throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException {
+  public static void createCanopyFromVectors(Path input, Path output, DistanceMeasure measure, boolean runSequential)
+    throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException {
     if (runSequential) {
       createCanopyFromVectorsSeq(input, output, measure);
     } else {
@@ -277,8 +276,8 @@ public class MeanShiftCanopyDriver exten
    * @param output the Path to the initial clusters directory
    * @param measure the DistanceMeasure
    */
-  private void createCanopyFromVectorsSeq(Path input, Path output, DistanceMeasure measure)
-      throws IOException, InstantiationException, IllegalAccessException {
+  private static void createCanopyFromVectorsSeq(Path input, Path output, DistanceMeasure measure)
+    throws IOException, InstantiationException, IllegalAccessException {
     Configuration conf = new Configuration();
     FileSystem fs = FileSystem.get(input.toUri(), conf);
     FileStatus[] status = fs.listStatus(input, new OutputLogFilter());
@@ -305,8 +304,8 @@ public class MeanShiftCanopyDriver exten
     }
   }
 
-  private void createCanopyFromVectorsMR(Path input, Path output, DistanceMeasure measure)
-      throws IOException, InterruptedException, ClassNotFoundException {
+  private static void createCanopyFromVectorsMR(Path input, Path output, DistanceMeasure measure)
+    throws IOException, InterruptedException, ClassNotFoundException {
     Configuration conf = new Configuration();
     conf.set(KMeansConfigKeys.DISTANCE_MEASURE_KEY, measure.getClass().getName());
     Job job = new Job(conf);
@@ -359,14 +358,14 @@ public class MeanShiftCanopyDriver exten
     }
   }
 
-  private Path buildClustersSeq(Path clustersIn,
+  private static Path buildClustersSeq(Path clustersIn,
                                        Path output,
                                        DistanceMeasure measure,
                                        double t1,
                                        double t2,
                                        double convergenceDelta,
                                        int maxIterations)
-      throws IOException, InstantiationException, IllegalAccessException {
+    throws IOException, InstantiationException, IllegalAccessException {
     MeanShiftCanopyClusterer clusterer = new MeanShiftCanopyClusterer(measure, t1, t2, convergenceDelta);
     List<MeanShiftCanopy> clusters = new ArrayList<MeanShiftCanopy>();
     Configuration conf = new Configuration();
@@ -422,7 +421,7 @@ public class MeanShiftCanopyDriver exten
                                       double t2,
                                       double convergenceDelta,
                                       int maxIterations)
-      throws IOException, InterruptedException, ClassNotFoundException {
+    throws IOException, InterruptedException, ClassNotFoundException {
     // iterate until the clusters converge
     boolean converged = false;
     int iteration = 1;
@@ -450,23 +449,21 @@ public class MeanShiftCanopyDriver exten
    * @param output
    *          the directory pathname for output clustered points
    * @param runSequential if true run in sequential execution mode
-   * @param measure the DistanceMeasure to use
    */
-  public void clusterData(Path input,
+  public static void clusterData(Path input,
                                  Path clustersIn,
                                  Path output,
-                                 boolean runSequential,
-                                 DistanceMeasure measure)
-      throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException {
+                                 boolean runSequential)
+    throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException {
     if (runSequential) {
-      clusterDataSeq(input, clustersIn, output, measure);
+      clusterDataSeq(input, clustersIn, output);
     } else {
       clusterDataMR(input, clustersIn, output);
     }
   }
 
-  private void clusterDataSeq(Path input, Path clustersIn, Path output, DistanceMeasure measure)
-      throws IOException, InstantiationException, IllegalAccessException {
+  private static void clusterDataSeq(Path input, Path clustersIn, Path output)
+    throws IOException, InstantiationException, IllegalAccessException {
     Collection<MeanShiftCanopy> clusters = new ArrayList<MeanShiftCanopy>();
     Configuration conf = new Configuration();
     FileSystem fs = FileSystem.get(clustersIn.toUri(), conf);
@@ -512,8 +509,8 @@ public class MeanShiftCanopyDriver exten
     }
   }
 
-  private void clusterDataMR(Path input, Path clustersIn, Path output)
-      throws IOException, InterruptedException, ClassNotFoundException {
+  private static void clusterDataMR(Path input, Path clustersIn, Path output)
+    throws IOException, InterruptedException, ClassNotFoundException {
     Configuration conf = new Configuration();
     conf.set(STATE_IN_KEY, clustersIn.toString());
     Job job = new Job(conf);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/ep/State.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/ep/State.java?rev=991909&r1=991908&r2=991909&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/ep/State.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/ep/State.java Thu Sep  2 12:28:03 2010
@@ -1,10 +1,13 @@
 package org.apache.mahout.ep;
 
 import com.google.common.collect.Lists;
+import org.apache.mahout.common.RandomUtils;
 
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.Locale;
 import java.util.Random;
+import java.util.concurrent.atomic.AtomicInteger;
 
 /**
  * Records evolutionary state and provides a mutation operation for recorded-step meta-mutation.
@@ -27,11 +30,11 @@ import java.util.Random;
  */
 public class State<T extends Payload<T>> implements Comparable<State<T>> {
   // object count is kept to break ties in comparison.
-  static volatile int objectCount = 0;
+  private static final AtomicInteger objectCount = new AtomicInteger();
 
-  private int id = objectCount++;
+  private int id = objectCount.getAndIncrement();
 
-  private transient Random gen = new Random();
+  private Random gen = RandomUtils.getRandom();
 
   // current state
   private double[] params;
@@ -187,7 +190,8 @@ public class State<T extends Payload<T>>
   }
 
   public void setMaps(Iterable<Mapping> maps) {
-    this.maps = Lists.newArrayList(maps).toArray(new Mapping[0]);
+    Collection<Mapping> list = Lists.newArrayList(maps);
+    this.maps = list.toArray(new Mapping[list.size()]);
   }
 
   public void setValue(double v) {
@@ -206,13 +210,9 @@ public class State<T extends Payload<T>>
    * @return -1, 0, 1 if the other state is better, identical or worse than this one.
    */
   @Override
-  public int compareTo(State other) {
+  public int compareTo(State<T> other) {
     int r = Double.compare(other.value, this.value);
-    if (r == 0) {
-      return this.id - other.id;
-    } else {
-      return r;
-    }
+    return r == 0 ? this.id - other.id : r;
   }
 
   public String toString() {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/ep/ThreadedEvolutionaryProcess.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/ep/ThreadedEvolutionaryProcess.java?rev=991909&r1=991908&r2=991909&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/ep/ThreadedEvolutionaryProcess.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/ep/ThreadedEvolutionaryProcess.java Thu Sep  2 12:28:03 2010
@@ -1,3 +1,20 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.mahout.ep;
 
 import com.google.common.collect.Maps;
@@ -23,37 +40,40 @@ import java.util.concurrent.Future;
  * optimization is inherently time-bounded which is useful for some scheduled operations.
  */
 public class ThreadedEvolutionaryProcess {
-  private volatile int taskCount = 0;
-  private volatile int processCount = 0;
 
-  private volatile int maxTask = 0;
+  private static final PriorityQueue<State<?>> resultPopulation = new PriorityQueue<State<?>>();
+
+  private volatile int taskCount;
+  private volatile int processCount;
+
+  private volatile int maxTask;
+
+  private final Deque<State<?>> pending = new LinkedList<State<?>>();
+  private final Set<Future<State<?>>> working = Sets.newHashSet();
 
-  private Deque<State> pending = new LinkedList<State>();
-  private Set<Future<State>> working = Sets.newHashSet();
-  static PriorityQueue<State> resultPopulation = new PriorityQueue<State>();
-
-  private ExecutorService pool;
-  private ExecutorCompletionService<State> ecs;
-  private int threadCount;
-  private Map<Integer, Mapping> mappingTable = Maps.newHashMap();
+  private final ExecutorService pool;
+  private final ExecutorCompletionService<State<?>> ecs;
+  private final int threadCount;
+  private final Map<Integer, Mapping> mappingTable = Maps.newHashMap();
 
   public ThreadedEvolutionaryProcess(int threadCount) {
     this.threadCount = threadCount;
     pool = Executors.newFixedThreadPool(threadCount);
-    ecs = new ExecutorCompletionService<State>(pool);
+    ecs = new ExecutorCompletionService<State<?>>(pool);
   }
 
   public void setMap(int i, Mapping m) {
     mappingTable.put(i, m);
   }
 
-  public State optimize(Function f, int dim, long timeLimit, int parentDepth) throws InterruptedException, ExecutionException {
+  public State<?> optimize(Function f, int dim, long timeLimit, int parentDepth)
+    throws InterruptedException, ExecutionException {
     long t0 = System.currentTimeMillis();
 
     // start with a few points near 0.  These will get transformed
-    State s0 = new State(new double[dim], 0.5);
-    for (Integer key : mappingTable.keySet()) {
-      s0.setMap(key, mappingTable.get(key));
+    State<?> s0 = new State(new double[dim], 0.5);
+    for (Map.Entry<Integer, Mapping> entry : mappingTable.entrySet()) {
+      s0.setMap(entry.getKey(), entry.getValue());
     }
 
     pending.add(s0);
@@ -64,38 +84,38 @@ public class ThreadedEvolutionaryProcess
     // then work until the clock runs out
     do {
       // launch new tasks until we fill the available slots
-      while (working.size() < threadCount && pending.size() > 0) {
-        State next = pending.removeFirst();
+      while (working.size() < threadCount && !pending.isEmpty()) {
+        State<?> next = pending.removeFirst();
         working.add(ecs.submit(new EvalTask(f, next)));
         processCount++;
       }
 
       // wait for at least one result, then grab any additional results
-      Future<State> result = ecs.take();
+      Future<State<?>> result = ecs.take();
       while (result != null) {
-        State r = result.get();
+        State<?> r = result.get();
         resultPopulation.add(r);
         working.remove(result);
         result = ecs.poll();
       }
 
       // now spawn new pending tasks from the best in recent history
-      State[] parents = new State[parentDepth];
-      Iterator<State> j = resultPopulation.iterator();
+      State<?>[] parents = new State[parentDepth];
+      Iterator<State<?>> j = resultPopulation.iterator();
       for (int i = 0; i < parentDepth && j.hasNext(); i++) {
         parents[i] = j.next();
       }
 
       int k = 0;
       while (pending.size() + working.size() < threadCount) {
-        State tmp = parents[(k++) % parentDepth];
+        State<?> tmp = parents[(k++) % parentDepth];
         pending.add(tmp.mutate());
       }
     } while (System.currentTimeMillis() - t0 < timeLimit);
 
     // wait for last results to dribble in
-    while (working.size() > 0) {
-      Future<State> result = ecs.take();
+    while (!working.isEmpty()) {
+      Future<State<?>> result = ecs.take();
       working.remove(result);
       resultPopulation.add(result.get());
     }
@@ -111,11 +131,11 @@ public class ThreadedEvolutionaryProcess
     return String.format(Locale.ENGLISH, "Launched %d function evaluations\nMaximum threading width was %d", processCount, maxTask);
   }
 
-  public class EvalTask implements Callable<State> {
-    private Function f;
-    private State what;
+  public class EvalTask implements Callable<State<?>> {
+    private final Function f;
+    private final State<?> what;
 
-    public EvalTask(Function f, State what) {
+    public EvalTask(Function f, State<?> what) {
       this.f = f;
       this.what = what;
     }
@@ -124,10 +144,9 @@ public class ThreadedEvolutionaryProcess
      * Computes a result, or throws an exception if unable to do so.
      *
      * @return computed result
-     * @throws Exception if unable to compute a result
      */
     @Override
-    public State call() throws Exception {
+    public State<?> call() {
       taskCount++;
       maxTask = Math.max(taskCount, maxTask);
       try {
@@ -139,7 +158,7 @@ public class ThreadedEvolutionaryProcess
     }
   }
 
-  public abstract static class Function {
-    public abstract double apply(double[] params);
+  public interface Function {
+    double apply(double[] params);
   }
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectors/ConstantValueEncoder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectors/ConstantValueEncoder.java?rev=991909&r1=991908&r2=991909&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectors/ConstantValueEncoder.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectors/ConstantValueEncoder.java Thu Sep  2 12:28:03 2010
@@ -48,6 +48,7 @@ public class ConstantValueEncoder extend
     return getName();
   }
 
+  @Override
   protected int hashForProbe(String originalForm, int dataSize, String name, int probe){
     return hash(name, probe, dataSize);
   }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectors/TextValueEncoder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectors/TextValueEncoder.java?rev=991909&r1=991908&r2=991909&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectors/TextValueEncoder.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectors/TextValueEncoder.java Thu Sep  2 12:28:03 2010
@@ -64,6 +64,7 @@ public class TextValueEncoder extends Fe
     return 0;
   }
 
+  @Override
   protected Iterable<Integer> hashesForProbe(String originalForm, int dataSize, String name, int probe){
     List<Integer> hashes = new ArrayList<Integer>();
     for (String word : tokenize(originalForm)){

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticRegressionTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticRegressionTest.java?rev=991909&r1=991908&r2=991909&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticRegressionTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticRegressionTest.java Thu Sep  2 12:28:03 2010
@@ -21,7 +21,6 @@ import org.apache.mahout.common.RandomUt
 import org.apache.mahout.math.DenseVector;
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.jet.random.Exponential;
-import org.apache.mahout.math.jet.random.engine.MersenneTwister;
 import org.junit.Assert;
 import org.junit.Test;
 
@@ -32,8 +31,8 @@ public class AdaptiveLogisticRegressionT
   public void testTrain() {
     // we make up data for a simple model
 
-    final Random gen = RandomUtils.getRandom();
-    final Exponential exp = new Exponential(0.5, gen);
+    Random gen = RandomUtils.getRandom();
+    Exponential exp = new Exponential(0.5, gen);
     Vector beta = new DenseVector(200);
     for (Vector.Element element : beta) {
       int sign = 1;
@@ -44,7 +43,7 @@ public class AdaptiveLogisticRegressionT
     }
 
     AdaptiveLogisticRegression.Wrapper cl = new AdaptiveLogisticRegression.Wrapper(2, 200, new L1());
-    cl.update(new double[]{1e-5, 1});
+    cl.update(new double[]{1.0e-5, 1});
 
     for (int i = 0; i < 10000; i++) {
       AdaptiveLogisticRegression.TrainingExample r = getExample(i, gen, beta);
@@ -63,13 +62,15 @@ public class AdaptiveLogisticRegressionT
       x.train(r.getKey(), r.getActual(), r.getInstance());
       if (i % 1000 == 0) {
         if (x.getBest() != null) {
-          System.out.printf("%10d %10.4f %10.8f %.3f\n", i, x.auc(), Math.log10(x.getBest().getMappedParams()[0]), x.getBest().getMappedParams()[1]);
+          System.out.printf("%10d %10.4f %10.8f %.3f\n",
+                            i, x.auc(),
+                            Math.log10(x.getBest().getMappedParams()[0]), x.getBest().getMappedParams()[1]);
         }
       }
     }
   }
 
-  private AdaptiveLogisticRegression.TrainingExample getExample(int i, Random gen, Vector beta) {
+  private static AdaptiveLogisticRegression.TrainingExample getExample(int i, Random gen, Vector beta) {
     Vector data = new DenseVector(200);
 
     for (Vector.Element element : data) {
@@ -88,8 +89,8 @@ public class AdaptiveLogisticRegressionT
   public void copyLearnsAsExpected() {
     RandomUtils.useTestSeed();
 
-    final Random gen = RandomUtils.getRandom();
-    final Exponential exp = new Exponential(0.5, gen);
+    Random gen = RandomUtils.getRandom();
+    Exponential exp = new Exponential(0.5, gen);
     Vector beta = new DenseVector(200);
     for (Vector.Element element : beta) {
         int sign = 1;
@@ -113,7 +114,6 @@ public class AdaptiveLogisticRegressionT
 
     // then switch to a copy of that learner ... progress should continue
     AdaptiveLogisticRegression.Wrapper w2 = w.copy();
-    double auc2;
 
     for (int i = 0; i < 5000; i++) {
       if (i % 1000 == 0) {
@@ -121,7 +121,7 @@ public class AdaptiveLogisticRegressionT
           Assert.assertEquals("Should have started with no data", 0.5, w2.getLearner().auc(), 0.0001);
         }
         if (i == 1000) {
-          auc2 = w2.getLearner().auc();
+          double auc2 = w2.getLearner().auc();
           Assert.assertTrue("Should have had head-start", Math.abs(auc2 - 0.5) > 0.1);
           Assert.assertTrue("AUC should improve quickly on copy", auc1 < auc2);
         }
@@ -130,7 +130,7 @@ public class AdaptiveLogisticRegressionT
       AdaptiveLogisticRegression.TrainingExample r = getExample(i, gen, beta);
       w2.train(r);
     }
-    Assert.assertEquals("Original should not change after copy is updated", auc1, w.getLearner().auc(), 1e-5);
+    Assert.assertEquals("Original should not change after copy is updated", auc1, w.getLearner().auc(), 1.0e-5);
 
     // this improvement is really quite lenient
     Assert.assertTrue("AUC should improve significantly on copy", auc1 < w2.getLearner().auc() - 0.05);

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sgd/CsvRecordFactoryTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sgd/CsvRecordFactoryTest.java?rev=991909&r1=991908&r2=991909&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sgd/CsvRecordFactoryTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sgd/CsvRecordFactoryTest.java Thu Sep  2 12:28:03 2010
@@ -53,8 +53,8 @@ public class CsvRecordFactoryTest {
     Assert.assertEquals(5.3, v.maxValue(), 0);
     v.set(v.maxValueIndex(), 0);
     Assert.assertEquals(8.0, v.norm(0), 0);
-    Assert.assertEquals(10.339850002884626, v.norm(1), 1e-6);
-    Assert.assertEquals(1.5849625007211563, v.maxValue(), 1e-6);
+    Assert.assertEquals(10.339850002884626, v.norm(1), 1.0e-6);
+    Assert.assertEquals(1.5849625007211563, v.maxValue(), 1.0e-6);
 
     v.assign(0);
     t = csv.processLine("ignore,5.3,invalid,line, \"and more text and more\",ignore", v);
@@ -66,8 +66,8 @@ public class CsvRecordFactoryTest {
     Assert.assertEquals(5.3, v.maxValue(), 0);
     v.set(v.maxValueIndex(), 0);
     Assert.assertEquals(8.0, v.norm(0), 0);
-    Assert.assertEquals(10.339850002884626, v.norm(1), 1e-6);
-    Assert.assertEquals(1.5849625007211563, v.maxValue(), 1e-6);
+    Assert.assertEquals(10.339850002884626, v.norm(1), 1.0e-6);
+    Assert.assertEquals(1.5849625007211563, v.maxValue(), 1.0e-6);
   }
 
   @Test

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sgd/ModelSerializerTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sgd/ModelSerializerTest.java?rev=991909&r1=991908&r2=991909&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sgd/ModelSerializerTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sgd/ModelSerializerTest.java Thu Sep  2 12:28:03 2010
@@ -28,6 +28,7 @@ import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.function.Functions;
 import org.apache.mahout.math.function.UnaryFunction;
 import org.apache.mahout.math.stats.OnlineAuc;
+import org.junit.Before;
 import org.junit.Test;
 
 import java.io.StringReader;
@@ -39,21 +40,23 @@ import java.util.Random;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
-/**
- * Created by IntelliJ IDEA. User: tdunning Date: Aug 31, 2010 Time: 6:45:22 PM To change this
- * template use File | Settings | File Templates.
- */
 public class ModelSerializerTest {
+
+  @Before
+  public void setUp() {
+    RandomUtils.useTestSeed();
+  }
+
   @Test
   public void testSoftLimitDeserialization() {
     Mapping m = ModelSerializer.gson().fromJson(new StringReader("{\"min\":-18.420680743952367,\"max\":-2.3025850929940455,\"scale\":1.0}"), Mapping.SoftLimit.class);
     assertTrue(m instanceof Mapping.SoftLimit);
-    assertEquals((-18.420680743952367 + -2.3025850929940455) / 2, m.apply(0), 1e-6);
+    assertEquals((-18.420680743952367 + -2.3025850929940455) / 2, m.apply(0), 1.0e-6);
 
     String data = "{\"class\":\"org.apache.mahout.ep.Mapping$SoftLimit\",\"value\":{\"min\":-18.420680743952367,\"max\":-2.3025850929940455,\"scale\":1.0}}";
     m = ModelSerializer.gson().fromJson(new StringReader(data), Mapping.class);
     assertTrue(m instanceof Mapping.SoftLimit);
-    assertEquals((-18.420680743952367 + -2.3025850929940455) / 2, m.apply(0), 1e-6);
+    assertEquals((-18.420680743952367 + -2.3025850929940455) / 2, m.apply(0), 1.0e-6);
   }
 
   @Test
@@ -61,7 +64,7 @@ public class ModelSerializerTest {
     String data = "{\"class\":\"org.apache.mahout.ep.Mapping$LogLimit\",\"value\":{\"wrapped\":{\"class\":\"org.apache.mahout.ep.Mapping$SoftLimit\",\"value\":{\"min\":-18.420680743952367,\"max\":-2.3025850929940455,\"scale\":1.0}}}}";
     Mapping m = ModelSerializer.gson().fromJson(new StringReader(data), Mapping.class);
     assertTrue(m instanceof Mapping.LogLimit);
-    assertEquals(Math.sqrt(Math.exp(-18.420680743952367) * Math.exp(-2.3025850929940455)), m.apply(0), 1e-6);
+    assertEquals(Math.sqrt(Math.exp(-18.420680743952367) * Math.exp(-2.3025850929940455)), m.apply(0), 1.0e-6);
   }
 
   @Test
@@ -100,12 +103,12 @@ public class ModelSerializerTest {
     Gson gson = ModelSerializer.gson();
     String s = gson.toJson(olr);
     OnlineLogisticRegression olr2 = gson.fromJson(new StringReader(s), OnlineLogisticRegression.class);
-    assertEquals(0, olr.getBeta().minus(olr2.getBeta()).aggregate(Functions.MAX, Functions.IDENTITY), 1e-6);
+    assertEquals(0, olr.getBeta().minus(olr2.getBeta()).aggregate(Functions.MAX, Functions.IDENTITY), 1.0e-6);
 
     train(olr, 100);
     train(olr2, 100);
 
-    assertEquals(0, olr.getBeta().minus(olr2.getBeta()).aggregate(Functions.MAX, Functions.IDENTITY), 1e-6);
+    assertEquals(0, olr.getBeta().minus(olr2.getBeta()).aggregate(Functions.MAX, Functions.IDENTITY), 1.0e-6);
   }
 
   @Test
@@ -117,7 +120,7 @@ public class ModelSerializerTest {
     CrossFoldLearner olr2 = gson.fromJson(new StringReader(s), CrossFoldLearner.class);
     double auc1 = learner.auc();
     assertTrue(auc1 > 0.85);
-    assertEquals(auc1, olr2.auc(), 1e-6);
+    assertEquals(auc1, olr2.auc(), 1.0e-6);
 
     train(learner, 100);
     train(olr2, 100);
@@ -137,7 +140,7 @@ public class ModelSerializerTest {
     AdaptiveLogisticRegression olr2 = gson.fromJson(new StringReader(s), AdaptiveLogisticRegression.class);
     double auc1 = learner.auc();
     assertTrue(auc1 > 0.85);
-    assertEquals(auc1, olr2.auc(), 1e-6);
+    assertEquals(auc1, olr2.auc(), 1.0e-6);
 
     train(learner, 1000);
     train(olr2, 1000);
@@ -152,7 +155,8 @@ public class ModelSerializerTest {
     Random gen = new Random(1);
     List<AdaptiveLogisticRegression.TrainingExample> x1 = Lists.newArrayList();
     for (int i = 0; i < 10; i++) {
-      AdaptiveLogisticRegression.TrainingExample t = new AdaptiveLogisticRegression.TrainingExample(i, i % 3, randomVector(gen, 5));
+      AdaptiveLogisticRegression.TrainingExample t =
+          new AdaptiveLogisticRegression.TrainingExample(i, i % 3, randomVector(gen, 5));
       x1.add(t);
     }
 
@@ -168,12 +172,12 @@ public class ModelSerializerTest {
     for (AdaptiveLogisticRegression.TrainingExample example : x1) {
       AdaptiveLogisticRegression.TrainingExample example2 = it.next();
       assertEquals(example.getKey(), example2.getKey());
-      assertEquals(0, example.getInstance().minus(example2.getInstance()).maxValue(), 1e-6);
+      assertEquals(0, example.getInstance().minus(example2.getInstance()).maxValue(), 1.0e-6);
       assertEquals(example.getActual(), example2.getActual());
     }
   }
 
-  private void train(OnlineLearner olr, int n) {
+  private static void train(OnlineLearner olr, int n) {
     Vector beta = new DenseVector(new double[]{1, -1, 0, 0.5, -0.5});
     final Random gen = new Random(1);
     for (int i = 0; i < n; i++) {
@@ -184,7 +188,7 @@ public class ModelSerializerTest {
     }
   }
 
-  private Vector randomVector(final Random gen, int n) {
+  private static Vector randomVector(final Random gen, int n) {
     Vector x = new DenseVector(n);
     x.assign(new UnaryFunction() {
       @Override

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/ep/EvolutionaryProcessTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/ep/EvolutionaryProcessTest.java?rev=991909&r1=991908&r2=991909&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/ep/EvolutionaryProcessTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/ep/EvolutionaryProcessTest.java Thu Sep  2 12:28:03 2010
@@ -1,12 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.mahout.ep;
 
+import org.apache.mahout.common.RandomUtils;
 import org.junit.Assert;
+import org.junit.Before;
 import org.junit.Test;
 
 import java.util.Random;
 import java.util.concurrent.ExecutionException;
 
 public class EvolutionaryProcessTest {
+
+  @Before
+  public void setUp() {
+    RandomUtils.useTestSeed();
+  }
+
   @Test
   public void converges() throws ExecutionException, InterruptedException {
     State<Foo> s0 = new State<Foo>(new double[5], 1);

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/ep/ThreadedEvolutionaryProcessTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/ep/ThreadedEvolutionaryProcessTest.java?rev=991909&r1=991908&r2=991909&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/ep/ThreadedEvolutionaryProcessTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/ep/ThreadedEvolutionaryProcessTest.java Thu Sep  2 12:28:03 2010
@@ -1,23 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.mahout.ep;
 
+import org.apache.mahout.common.RandomUtils;
 import org.junit.Assert;
+import org.junit.Before;
 import org.junit.Test;
 
 import java.util.Locale;
-import java.util.Random;
 import java.util.concurrent.ExecutionException;
 
 public class ThreadedEvolutionaryProcessTest {
+
+  @Before
+  public void setUp() {
+    RandomUtils.useTestSeed();
+  }
+
   @Test
   public void testOptimize() throws ExecutionException, InterruptedException {
     ThreadedEvolutionaryProcess ep = new ThreadedEvolutionaryProcess(50);
-    State x = ep.optimize(new ThreadedEvolutionaryProcess.Function() {
+    State<?> x = ep.optimize(new ThreadedEvolutionaryProcess.Function() {
       /**
        * Implements a skinny quadratic bowl.
        */
       @Override
       public double apply(double[] params) {
-        Random rand = new Random(1);
         double sum = 0;
         int i = 0;
         for (double x : params) {
@@ -27,7 +50,7 @@ public class ThreadedEvolutionaryProcess
         }
         try {
           // variable delays to emulate a tricky function
-          Thread.sleep((long) Math.floor(-2 * Math.log(1 - rand.nextDouble())));
+          Thread.sleep((long) Math.floor(-2 * Math.log(1 - Math.random())));
         } catch (InterruptedException e) {
           // ignore interruptions
         }
@@ -36,7 +59,7 @@ public class ThreadedEvolutionaryProcess
       }
     }, 5, 200, 2);
 
-    System.out.printf("%s\n", ep);
+    System.out.println(ep);
     double[] r = x.getMappedParams();
     int i = 0;
     for (double v : r) {

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/vectors/InteractionValueEncoderTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/vectors/InteractionValueEncoderTest.java?rev=991909&r1=991908&r2=991909&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/vectors/InteractionValueEncoderTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/vectors/InteractionValueEncoderTest.java Thu Sep  2 12:28:03 2010
@@ -17,16 +17,11 @@
 
 package org.apache.mahout.vectors;
 
-import com.google.common.collect.ImmutableMap;
 import org.apache.mahout.math.DenseVector;
 import org.apache.mahout.math.Vector;
 import org.junit.Assert;
 import org.junit.Test;
 
-import java.util.Locale;
-
-import static org.junit.Assert.assertEquals;
-
 public class InteractionValueEncoderTest {
   @Test
   public void testAddToVector() {

Modified: mahout/trunk/etc/findbugs-exclude.xml
URL: http://svn.apache.org/viewvc/mahout/trunk/etc/findbugs-exclude.xml?rev=991909&r1=991908&r2=991909&view=diff
==============================================================================
--- mahout/trunk/etc/findbugs-exclude.xml (original)
+++ mahout/trunk/etc/findbugs-exclude.xml Thu Sep  2 12:28:03 2010
@@ -16,6 +16,9 @@
     <Bug pattern="EI_EXPOSE_REP2"/>
   </Match>
   <Match>
+    <Bug pattern="FE_FLOATING_POINT_EQUALITY"/>
+  </Match>
+  <Match>
     <Bug pattern="SIC_INNER_SHOULD_BE_STATIC_ANON"/>
   </Match>
   <Match>

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorMapper.java?rev=991909&r1=991908&r2=991909&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorMapper.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorMapper.java Thu Sep  2 12:28:03 2010
@@ -20,6 +20,7 @@ package org.apache.mahout.classifier.bay
 import java.io.IOException;
 import java.io.StringReader;
 import java.util.HashSet;
+import java.util.Locale;
 import java.util.Set;
 import java.util.regex.Pattern;
 
@@ -72,8 +73,9 @@ public class WikipediaDatasetCreatorMapp
       while (stream.incrementToken()) {
         contents.append(termAtt.termBuffer(), 0, termAtt.termLength()).append(' ');
       }
-      context.write(new Text(WikipediaDatasetCreatorMapper.SPACE_NON_ALPHA_PATTERN.matcher(catMatch).replaceAll("_")), new Text(
-          contents.toString()));
+      context.write(
+          new Text(WikipediaDatasetCreatorMapper.SPACE_NON_ALPHA_PATTERN.matcher(catMatch).replaceAll("_")),
+          new Text(contents.toString()));
     }
   }
 
@@ -108,8 +110,8 @@ public class WikipediaDatasetCreatorMapp
     } catch (InstantiationException e) {
       throw new IllegalStateException(e);
     }
-    log.info("Configure: Input Categories size: {} Exact Match: {} Analyzer: {}", new Object[] { inputCategories.size(),
-        exactMatchOnly, analyzer.getClass().getName() });
+    log.info("Configure: Input Categories size: {} Exact Match: {} Analyzer: {}",
+             new Object[] { inputCategories.size(), exactMatchOnly, analyzer.getClass().getName()});
   }
 
   private String findMatchingCategory(String document) {
@@ -121,7 +123,7 @@ public class WikipediaDatasetCreatorMapp
       if (endIndex >= document.length() || endIndex < 0) {
         break;
       }
-      String category = document.substring(categoryIndex, endIndex).toLowerCase().trim();
+      String category = document.substring(categoryIndex, endIndex).toLowerCase(Locale.ENGLISH).trim();
       // categories.add(category.toLowerCase());
       if (exactMatchOnly && inputCategories.contains(category)) {
         return category;

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayClustering.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayClustering.java?rev=991909&r1=991908&r2=991909&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayClustering.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayClustering.java Thu Sep  2 12:28:03 2010
@@ -65,7 +65,7 @@ public class DisplayClustering extends F
 
   protected static final List<List<Cluster>> CLUSTERS = new ArrayList<List<Cluster>>();
 
-  protected static final Color[] COLORS = {
+  static final Color[] COLORS = {
       Color.red, Color.orange, Color.yellow, Color.green, Color.blue, Color.magenta, Color.lightGray
   };
 
@@ -73,7 +73,7 @@ public class DisplayClustering extends F
 
   protected static final double T2 = 2.8;
   
-  protected static double significance = 0.05;
+  static double significance = 0.05;
 
   protected static int res; // screen resolution
 
@@ -224,10 +224,10 @@ public class DisplayClustering extends F
   protected static void generateSamples(int num, double mx, double my, double sd) {
     double[] params = {mx, my, sd, sd};
     SAMPLE_PARAMS.add(new DenseVector(params));
-    log.info("Generating {} samples m=[{}, {}] sd={}", new Object[] { num, mx, my, sd });
+    log.info("Generating {} samples m=[{}, {}] sd={}", new Object[] {num, mx, my, sd});
     for (int i = 0; i < num; i++) {
-      SAMPLE_DATA.add(new VectorWritable(new DenseVector(new double[] { UncommonDistributions.rNorm(mx, sd),
-          UncommonDistributions.rNorm(my, sd) })));
+      SAMPLE_DATA.add(new VectorWritable(new DenseVector(
+          new double[] {UncommonDistributions.rNorm(mx, sd), UncommonDistributions.rNorm(my, sd)})));
     }
   }
 
@@ -300,10 +300,10 @@ public class DisplayClustering extends F
   protected static void generate2dSamples(int num, double mx, double my, double sdx, double sdy) {
     double[] params = {mx, my, sdx, sdy};
     SAMPLE_PARAMS.add(new DenseVector(params));
-    log.info("Generating {} samples m=[{}, {}] sd=[{}, {}]", new Object[] { num, mx, my, sdx, sdy });
+    log.info("Generating {} samples m=[{}, {}] sd=[{}, {}]", new Object[] {num, mx, my, sdx, sdy});
     for (int i = 0; i < num; i++) {
-      SAMPLE_DATA.add(new VectorWritable(new DenseVector(new double[] { UncommonDistributions.rNorm(mx, sdx),
-          UncommonDistributions.rNorm(my, sdy) })));
+      SAMPLE_DATA.add(new VectorWritable(new DenseVector(
+          new double[] {UncommonDistributions.rNorm(mx, sdx), UncommonDistributions.rNorm(my, sdy)})));
     }
   }
 

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDRule.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDRule.java?rev=991909&r1=991908&r2=991909&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDRule.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDRule.java Thu Sep  2 12:28:03 2010
@@ -242,7 +242,7 @@ public class CDRule implements Rule {
     if (this == obj) {
       return true;
     }
-    if (obj == null || !(obj instanceof CDRule)) {
+    if (!(obj instanceof CDRule)) {
       return false;
     }
     CDRule rule = (CDRule) obj;

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/GenericPermuting.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/GenericPermuting.java?rev=991909&r1=991908&r2=991909&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/GenericPermuting.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/GenericPermuting.java Thu Sep  2 12:28:03 2010
@@ -8,10 +8,6 @@ It is provided "as is" without expressed
 */
 package org.apache.mahout.math;
 
-import org.apache.mahout.common.RandomUtils;
-import org.apache.mahout.math.jet.random.Uniform;
-import org.apache.mahout.math.jet.random.engine.MersenneTwister;
-
 /**
  Generically reorders (permutes) arbitrary shaped data (for example, an array, three arrays, a 2-d matrix, two linked lists) using an <i>in-place</i> swapping algorithm.
  Imagine having a couple of apples. For some reason you decide to reorder them. The green one before the red one. The pale one after the shiny one, etc. This class helps to do the job.
@@ -104,143 +100,6 @@ public class GenericPermuting {
   }
 
   /**
-   * Returns the <tt>p</tt>-th permutation of the sequence <tt>[0,1,...,N-1]</tt>. A small but smart and efficient
-   * routine, ported from <A HREF="http://www.hep.net/wwwmirrors/cernlib/CNASDOC/shortwrups_html3/node255.html">
-   * Cernlib</A>. The <A HREF="ftp://asisftp.cern.ch/cernlib/share/pro/src/mathlib/gen/v/permu.F"> Fortran source</A>. A
-   * sequence of <tt>N</tt> distinct elements has <tt>N!</tt> permutations, which are enumerated in lexicographical
-   * order <tt>1 .. N!</tt>. <p> This is, for example, useful for Monte-Carlo-tests where one might want to compute
-   * <tt>k</tt> distinct and random permutations of a sequence, obtaining <tt>p</tt> from {@link
-   * org.apache.mahout.math.jet.random.sampling} without replacement or a random engine like {@link
-   * org.apache.mahout.math.jet.random.engine.MersenneTwister}. <br> Note: When <tt>N!</tt> exceeds the 64-bit range (i.e.
-   * for <tt>N > 20</tt>), this method has <i>different</i> behaviour: it makes a sequence <tt>[0,1,...,N-1]</tt> and
-   * randomizes it, seeded with parameter <tt>p</tt>. <p> <b>Examples:</b>
-   * <pre>
-   * http://www.hep.net/wwwmirrors/cernlib/CNASDOC/shortwrups_html3/node255.html
-   * // exactly lexicographically enumerated (ascending)
-   * permutation(1,3) --> [ 0,1,2 ]
-   * permutation(2,3) --> [ 0,2,1 ]
-   * permutation(3,3) --> [ 1,0,2 ]
-   * permutation(4,3) --> [ 1,2,0 ]
-   * permutation(5,3) --> [ 2,0,1 ]
-   * permutation(6,3) --> [ 2,1,0 ]
-   * permutation(1      ,20) --> [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
-   * permutation(2      ,20) --> [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 18]
-   * permutation(1000000,20) --> [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 12, 17, 18, 13, 19, 11, 15, 14, 16, 10]
-   * permutation(20! -2 ,20) --> [19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 1, 2, 0]
-   * permutation(20! -1 ,20) --> [19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 0, 1]
-   * permutation(20!    ,20) --> [19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-   * <br>
-   * // not exactly enumerated, rather randomly shuffled
-   * permutation(1,21) --> [18, 20, 11, 0, 15, 1, 19, 13, 3, 6, 16, 17, 9, 5, 12, 4, 7, 14, 8, 10, 2]
-   * permutation(2,21) --> [1, 9, 4, 16, 14, 13, 11, 20, 10, 8, 18, 0, 15, 3, 17, 5, 12, 2, 6, 7, 19]
-   * permutation(3,21) --> [12, 0, 19, 1, 20, 5, 8, 16, 6, 14, 2, 4, 3, 17, 11, 13, 9, 10, 15, 18, 7]
-   * </pre>
-   *
-   * @param p the lexicographical ordinal number of the permutation to be computed.
-   * @param N the length of the sequence to be generated.
-   * @return the <tt>p</tt>-th permutation.
-   * @throws IllegalArgumentException if <tt>p < 1 || N < 0 || p > N!</tt>.
-   */
-  public static int[] permutation(long p, int N) {
-    if (p < 1) {
-      throw new IllegalArgumentException("Permutations are enumerated 1 .. N!");
-    }
-    if (N < 0) {
-      throw new IllegalArgumentException("Must satisfy N >= 0");
-    }
-
-    int[] permutation = new int[N];
-
-    if (N > 20) { // factorial(21) would overflow 64-bit long)
-      // Simply make a list (0,1,..N-1) and randomize it, seeded with "p".
-      // Note that this is perhaps not what you want...
-      for (int i = N; --i >= 0;) {
-        permutation[i] = i;
-      }
-      Uniform gen = new Uniform(RandomUtils.getRandom());
-      for (int i = 0; i < N - 1; i++) {
-        int random = gen.nextIntFromTo(i, N - 1);
-
-        //swap(i, random)
-        int tmp = permutation[random];
-        permutation[random] = permutation[i];
-        permutation[i] = tmp;
-      }
-
-      return permutation;
-    }
-
-    // the normal case - exact enumeration
-    if (p > org.apache.mahout.math.jet.math.Arithmetic.longFactorial(N)) {
-      throw new IllegalArgumentException("N too large (a sequence of N elements only has N! permutations).");
-    }
-
-    int[] tmp = new int[N];
-    for (int i = 1; i <= N; i++) {
-      tmp[i - 1] = i;
-    }
-
-    long io = p - 1;
-    for (int M = N - 1; M >= 1; M--) {
-      long fac = org.apache.mahout.math.jet.math.Arithmetic.longFactorial(M);
-      int in = ((int) (io / fac)) + 1;
-      io %= fac;
-      permutation[N - M - 1] = tmp[in - 1];
-
-      for (int j = in; j <= M; j++) {
-        tmp[j - 1] = tmp[j];
-      }
-    }
-    if (N > 0) {
-      permutation[N - 1] = tmp[0];
-    }
-
-    for (int i = N; --i >= 0;) {
-      permutation[i] -= 1;
-    }
-    return permutation;
-  }
-
-  /**
-   * A non-generic variant of reordering, specialized for <tt>int[]</tt>, same semantics. Quicker than generic
-   * reordering. Also for convenience (forget about the Swapper object).
-   */
-  public static void permute(int[] list, int[] indexes) {
-    int[] copy = list.clone();
-    for (int i = list.length; --i >= 0;) {
-      list[i] = copy[indexes[i]];
-    }
-  }
-
-  /**
-   * Deprecated. Generically reorders arbitrary shaped generic data <tt>g</tt> such that <tt>g[i] == g[indexes[i]]</tt>.
-   * (The generic data may be one array, a 2-d matrix, two linked lists or whatever). This class swaps elements around,
-   * in a way that avoids stumbling over its own feet. <p> <b>Example:</b>
-   * <pre>
-   * Reordering
-   * [A,B,C,D,E] with indexes [0,4,2,3,1] yields
-   * [A,E,C,D,B]
-   * In other words g[0]<--g[0], g[1]<--g[4], g[2]<--g[2], g[3]<--g[3], g[4]<--g[1].
-   *
-   * Reordering
-   * [A,B,C,D,E] with indexes [0,4,1,2,3] yields
-   * [A,E,B,C,D]
-   * In other words g[0]<--g[0], g[1]<--g[4], g[2]<--g[1], g[3]<--g[2], g[4]<--g[3].
-   * </pre>
-   * <p>
-   *
-   * @param indexes the permutation indexes.
-   * @param swapper an object that knows how to swap two indexes a,b.
-   * @param work    the working storage, must satisfy <tt>work.length >= indexes.length</tt>; set <tt>work==null</tt> if
-   *                you don't care about performance.
-   * @deprecated
-   */
-  @Deprecated
-  public static void permute(int[] indexes, org.apache.mahout.math.Swapper swapper, int[] work) {
-    permute(indexes, swapper, work, null);
-  }
-
-  /**
    * Generically reorders arbitrary shaped generic data <tt>g</tt> such that <tt>g[i] == g[indexes[i]]</tt>. (The
    * generic data may be one array, a 2-d matrix, two linked lists or whatever). This class swaps elements around, in a
    * way that avoids stumbling over its own feet. <p> <b>Example:</b>
@@ -300,14 +159,4 @@ public class GenericPermuting {
     }
   }
 
-  /**
-   * A non-generic variant of reordering, specialized for <tt>Object[]</tt>, same semantics. Quicker than generic
-   * reordering. Also for convenience (forget about the Swapper object).
-   */
-  public static void permute(Object[] list, int[] indexes) {
-    Object[] copy = list.clone();
-    for (int i = list.length; --i >= 0;) {
-      list[i] = copy[indexes[i]];
-    }
-  }
 }



Mime
View raw message