mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sro...@apache.org
Subject svn commit: r1140141 - in /mahout/trunk: core/src/main/java/org/apache/mahout/cf/taste/hadoop/ core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/eval/ core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ core/src/main/java/org/apache/mahou...
Date Mon, 27 Jun 2011 12:42:23 GMT
Author: srowen
Date: Mon Jun 27 12:42:21 2011
New Revision: 1140141

URL: http://svn.apache.org/viewvc?rev=1140141&view=rev
Log:
Style

Removed:
    mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/RandomSamplingAssistant.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/package-info.java
Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/eval/InMemoryFactorizationEvaluator.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorSplitterMapper.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/MySQLJDBCIDMigrator.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/ClassifierContext.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesClassifierMapper.java
    mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java
    mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/IteratorTokenStream.java
    mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/TokenStreamIterator.java
    mahout/trunk/core/src/main/java/org/apache/mahout/common/nlp/NGrams.java
    mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataConverter.java
    mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataLoader.java
    mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DescriptorUtils.java
    mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java
    mahout/trunk/core/src/main/java/org/apache/mahout/graph/common/SimplifyGraphJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/graph/model/Triangle.java
    mahout/trunk/core/src/main/java/org/apache/mahout/graph/model/UndirectedEdge.java
    mahout/trunk/core/src/main/java/org/apache/mahout/graph/model/UndirectedEdgeWithDegrees.java
    mahout/trunk/core/src/main/java/org/apache/mahout/graph/model/Vertex.java
    mahout/trunk/core/src/main/java/org/apache/mahout/graph/model/VertexWithDegree.java
    mahout/trunk/core/src/main/java/org/apache/mahout/graph/package-info.java
    mahout/trunk/core/src/main/java/org/apache/mahout/graph/triangles/JoinableUndirectedEdge.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/HdfsBackedLanczosState.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/Omega.java
    mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/FeatureVectorEncoder.java
    mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DescriptorUtilsTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/graph/common/DegreeDistributionJobTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/graph/common/LocalClusteringCoefficientJob.java
    mahout/trunk/core/src/test/java/org/apache/mahout/graph/common/LocalClusteringCoefficientJobTest.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/analysis/WikipediaAnalyzer.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/svd/DataModelFactorizablePreferences.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/svd/Track1SVDRunner.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/minhash/LastfmDataConverter.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/DataLine.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMapper.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/DescriptionUtils.java
    mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMapperTest.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/ConnectionPoolDataSource.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/text/ChunkedWriter.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/text/PrefixAdditionFilter.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromDirectory.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/RandomSampler.java

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java Mon Jun 27 12:42:21 2011
@@ -17,15 +17,9 @@
 
 package org.apache.mahout.cf.taste.hadoop;
 
-import com.google.common.base.Charsets;
-import com.google.common.io.ByteStreams;
-import com.google.common.io.Closeables;
-import com.google.common.primitives.Bytes;
 import com.google.common.primitives.Longs;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.IOUtils;
 import org.apache.mahout.common.Pair;
 import org.apache.mahout.common.iterator.sequencefile.PathFilters;
 import org.apache.mahout.common.iterator.sequencefile.PathType;
@@ -34,9 +28,6 @@ import org.apache.mahout.math.VarIntWrit
 import org.apache.mahout.math.VarLongWritable;
 import org.apache.mahout.math.map.OpenIntLongHashMap;
 
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
 import java.util.regex.Pattern;
 
 /**

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/eval/InMemoryFactorizationEvaluator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/eval/InMemoryFactorizationEvaluator.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/eval/InMemoryFactorizationEvaluator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/eval/InMemoryFactorizationEvaluator.java Mon Jun 27 12:42:21 2011
@@ -89,9 +89,9 @@ public class InMemoryFactorizationEvalua
 
     FullRunningAverage rmseAvg = new FullRunningAverage();
     FullRunningAverage maeAvg = new FullRunningAverage();
-    int pairsUsed = 1;
     Writer writer = new OutputStreamWriter(System.out);
     try {
+      int pairsUsed = 1;
       for (Preference pref : readProbePreferences(pairs)) {
         int userID = (int) pref.getUserID();
         int itemID = (int) pref.getItemID();
@@ -102,12 +102,12 @@ public class InMemoryFactorizationEvalua
         rmseAvg.addDatum(err * err);
         maeAvg.addDatum(Math.abs(err));
         writer.write("Probe [" + pairsUsed + "], rating of user [" + userID + "] towards item [" + itemID + "], " +
-            "[" + rating + "] estimated [" + estimate + "]\n");
+                         '[' + rating + "] estimated [" + estimate + "]\n");
         pairsUsed++;
       }
       double rmse = Math.sqrt(rmseAvg.getAverage());
       double mae = maeAvg.getAverage();
-      writer.write("RMSE: " + rmse + ", MAE: " + mae + "\n");
+      writer.write("RMSE: " + rmse + ", MAE: " + mae + '\n');
     } finally {
       Closeables.closeQuietly(writer);
     }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java Mon Jun 27 12:42:21 2011
@@ -52,7 +52,7 @@ public final class ToUserVectorReducer e
 
   private int minPreferences;
 
-  public enum Counters { USERS };
+  public enum Counters { USERS }
 
   @Override
   protected void setup(Context ctx) throws IOException, InterruptedException {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorSplitterMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorSplitterMapper.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorSplitterMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorSplitterMapper.java Mon Jun 27 12:42:21 2011
@@ -22,7 +22,6 @@ import org.apache.hadoop.conf.Configurat
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.mahout.cf.taste.common.TopK;
 import org.apache.mahout.cf.taste.impl.common.FastIDSet;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java Mon Jun 27 12:42:21 2011
@@ -25,7 +25,6 @@ import org.apache.hadoop.fs.Path;
 
 import org.apache.hadoop.io.DoubleWritable;
 import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
@@ -35,7 +34,6 @@ import org.apache.hadoop.util.ToolRunner
 import org.apache.mahout.cf.taste.hadoop.EntityEntityWritable;
 import org.apache.mahout.cf.taste.hadoop.EntityPrefWritable;
 import org.apache.mahout.cf.taste.hadoop.MaybePruneRowsMapper;
-import org.apache.mahout.cf.taste.hadoop.TasteHadoopUtils;
 import org.apache.mahout.cf.taste.hadoop.ToItemPrefsMapper;
 import org.apache.mahout.cf.taste.hadoop.item.ItemIDIndexMapper;
 import org.apache.mahout.cf.taste.hadoop.item.ItemIDIndexReducer;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/MySQLJDBCIDMigrator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/MySQLJDBCIDMigrator.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/MySQLJDBCIDMigrator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/MySQLJDBCIDMigrator.java Mon Jun 27 12:42:21 2011
@@ -39,7 +39,7 @@ import javax.sql.DataSource;
  * Separately, note that in a MySQL database, the following function calls will convert a string value into a
  * numeric value in the same way that the standard implementations in this package do. This may be useful in
  * writing SQL statements for use with
- * {@link org.apache.mahout.cf.taste.impl.model.jdbc.AbstractJDBCDataModel} subclasses which convert string
+ * {@code AbstractJDBCDataModel} subclasses which convert string
  * column values to appropriate numeric values -- though this should be viewed as a temporary arrangement
  * since it will impact performance:
  * </p>

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java Mon Jun 27 12:42:21 2011
@@ -32,9 +32,13 @@ import org.apache.commons.cli2.commandli
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.util.Version;
-import org.apache.mahout.classifier.bayes.*;
+import org.apache.mahout.classifier.bayes.Algorithm;
+import org.apache.mahout.classifier.bayes.BayesAlgorithm;
+import org.apache.mahout.classifier.bayes.BayesParameters;
+import org.apache.mahout.classifier.bayes.CBayesAlgorithm;
 import org.apache.mahout.classifier.bayes.Datastore;
 import org.apache.mahout.classifier.bayes.ClassifierContext;
+import org.apache.mahout.classifier.bayes.InMemoryBayesDatastore;
 import org.apache.mahout.common.nlp.NGrams;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/ClassifierContext.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/ClassifierContext.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/ClassifierContext.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/ClassifierContext.java Mon Jun 27 12:42:21 2011
@@ -20,9 +20,6 @@ package org.apache.mahout.classifier.bay
 import java.util.Collection;
 
 import org.apache.mahout.classifier.ClassifierResult;
-import org.apache.mahout.classifier.bayes.Algorithm;
-import org.apache.mahout.classifier.bayes.Datastore;
-import org.apache.mahout.classifier.bayes.InvalidDatastoreException;
 
 /**
  * The Classifier Wrapper used for choosing the {@link Algorithm} and {@link Datastore}

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesClassifierMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesClassifierMapper.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesClassifierMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesClassifierMapper.java Mon Jun 27 12:42:21 2011
@@ -28,10 +28,13 @@ import org.apache.hadoop.mapred.Mapper;
 import org.apache.hadoop.mapred.OutputCollector;
 import org.apache.hadoop.mapred.Reporter;
 import org.apache.mahout.classifier.ClassifierResult;
-import org.apache.mahout.classifier.bayes.*;
+import org.apache.mahout.classifier.bayes.BayesAlgorithm;
+import org.apache.mahout.classifier.bayes.BayesParameters;
+import org.apache.mahout.classifier.bayes.CBayesAlgorithm;
 import org.apache.mahout.classifier.bayes.InMemoryBayesDatastore;
 import org.apache.mahout.classifier.bayes.Algorithm;
 import org.apache.mahout.classifier.bayes.Datastore;
+import org.apache.mahout.classifier.bayes.InvalidDatastoreException;
 import org.apache.mahout.classifier.bayes.mapreduce.common.BayesConstants;
 import org.apache.mahout.classifier.bayes.ClassifierContext;
 import org.apache.mahout.common.StringTuple;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java Mon Jun 27 12:42:21 2011
@@ -451,14 +451,8 @@ public abstract class AbstractJob extend
   /**
    * necessary to make this job (having a combined input path) work on Amazon S3, hopefully this is obsolete when MultipleInputs is available
    * again
-   *
-   * @param job
-   * @param referencePath
-   * @param inputPathOne
-   * @param inputPathTwo
-   * @throws IOException
    */
-  public void setS3SafeCombinedInputPath(Job job, Path referencePath, Path inputPathOne, Path inputPathTwo)
+  public static void setS3SafeCombinedInputPath(Job job, Path referencePath, Path inputPathOne, Path inputPathTwo)
       throws IOException {
     FileSystem fs = FileSystem.get(referencePath.toUri(), job.getConfiguration());
     FileInputFormat.setInputPaths(job, inputPathOne.makeQualified(fs), inputPathTwo.makeQualified(fs));

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java Mon Jun 27 12:42:21 2011
@@ -340,12 +340,12 @@ public final class DefaultOptionCreator 
   
   public static DefaultOptionBuilder kernelProfileOption() {
     return new DefaultOptionBuilder()
-        .withLongName(DefaultOptionCreator.KERNEL_PROFILE_OPTION)
+        .withLongName(KERNEL_PROFILE_OPTION)
         .withRequired(false)
         .withShortName("kp")
         .withArgument(
             new ArgumentBuilder()
-                .withName(DefaultOptionCreator.KERNEL_PROFILE_OPTION)
+                .withName(KERNEL_PROFILE_OPTION)
                 .withDefault(TriangularKernelProfile.class.getName())
                 .withMinimum(1).withMaximum(1).create())
         .withDescription(

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/IteratorTokenStream.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/IteratorTokenStream.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/IteratorTokenStream.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/IteratorTokenStream.java Mon Jun 27 12:42:21 2011
@@ -20,8 +20,6 @@ package org.apache.mahout.common.lucene;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 
-
-import java.io.IOException;
 import java.util.Iterator;
 
 /** Used to emit tokens from an input string array in the style of TokenStream */
@@ -35,7 +33,7 @@ public final class IteratorTokenStream e
   }
 
   @Override
-  public boolean incrementToken() throws IOException {
+  public boolean incrementToken() {
     if (iterator.hasNext()) {
       clearAttributes();
       termAtt.append(iterator.next());

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/TokenStreamIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/TokenStreamIterator.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/TokenStreamIterator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/TokenStreamIterator.java Mon Jun 27 12:42:21 2011
@@ -44,7 +44,7 @@ public final class TokenStreamIterator e
         return endOfData();
       }
     } catch (IOException e) {
-      throw new RuntimeException("IO error while tokenizing", e);
+      throw new IllegalStateException("IO error while tokenizing", e);
     }
   }
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/nlp/NGrams.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/nlp/NGrams.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/nlp/NGrams.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/nlp/NGrams.java Mon Jun 27 12:42:21 2011
@@ -17,17 +17,19 @@
 
 package org.apache.mahout.common.nlp;
 
+import com.google.common.base.Splitter;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 
+import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
-import java.util.StringTokenizer;
 
 public class NGrams {
+
+  private static final Splitter SPACE = Splitter.on(' ');
   
   private final String line;
-  
   private final int gramSize;
   
   public NGrams(String line, int gramSize) {
@@ -38,13 +40,13 @@ public class NGrams {
   public Map<String,List<String>> generateNGrams() {
     Map<String,List<String>> returnDocument = Maps.newHashMap();
     
-    StringTokenizer tokenizer = new StringTokenizer(line);
+    Iterator<String> tokenizer = SPACE.split(line).iterator();
     List<String> tokens = Lists.newArrayList();
-    String labelName = tokenizer.nextToken();
+    String labelName = tokenizer.next();
     List<String> previousN1Grams = Lists.newArrayList();
-    while (tokenizer.hasMoreTokens()) {
+    while (tokenizer.hasNext()) {
       
-      String nextToken = tokenizer.nextToken();
+      String nextToken = tokenizer.next();
       if (previousN1Grams.size() == gramSize) {
         previousN1Grams.remove(0);
       }
@@ -65,14 +67,11 @@ public class NGrams {
   }
   
   public List<String> generateNGramsWithoutLabel() {
-    
-    StringTokenizer tokenizer = new StringTokenizer(line);
+
     List<String> tokens = Lists.newArrayList();
-    
     List<String> previousN1Grams = Lists.newArrayList();
-    while (tokenizer.hasMoreTokens()) {
+    for (String nextToken : SPACE.split(line)) {
       
-      String nextToken = tokenizer.nextToken();
       if (previousN1Grams.size() == gramSize) {
         previousN1Grams.remove(0);
       }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataConverter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataConverter.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataConverter.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataConverter.java Mon Jun 27 12:42:21 2011
@@ -18,7 +18,6 @@
 package org.apache.mahout.df.data;
 
 import java.util.Arrays;
-import java.util.StringTokenizer;
 
 import org.apache.commons.lang.ArrayUtils;
 import org.apache.mahout.math.DenseVector;
@@ -44,8 +43,8 @@ public class DataConverter {
     // all attributes (categorical, numerical), ignored, label
     int nball = dataset.nbAttributes() + dataset.getIgnored().length + 1;
     
-    StringTokenizer tokenizer = new StringTokenizer(string, ", ");
-    Preconditions.checkArgument(tokenizer.countTokens() == nball, "Wrong number of attributes in the string");
+    String[] tokens = string.split("[, ]");
+    Preconditions.checkArgument(tokens.length == nball, "Wrong number of attributes in the string");
     
     int nbattrs = dataset.nbAttributes();
     DenseVector vector = new DenseVector(nbattrs);
@@ -53,7 +52,7 @@ public class DataConverter {
     int aId = 0;
     int label = -1;
     for (int attr = 0; attr < nball; attr++) {
-      String token = tokenizer.nextToken().trim();
+      String token = tokens[attr].trim();
       
       if (ArrayUtils.contains(dataset.getIgnored(), attr)) {
         continue; // IGNORED

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataLoader.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataLoader.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataLoader.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataLoader.java Mon Jun 27 12:42:21 2011
@@ -20,7 +20,6 @@ package org.apache.mahout.df.data;
 import java.io.IOException;
 import java.util.List;
 import java.util.Scanner;
-import java.util.StringTokenizer;
 
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
@@ -61,23 +60,17 @@ public final class DataLoader {
    * @return null if there are missing values '?'
    */
   private static Instance parseString(int id, Attribute[] attrs, List<String>[] values, String string) {
-    StringTokenizer tokenizer = new StringTokenizer(string, ", ");
-    Preconditions.checkArgument(tokenizer.countTokens() == attrs.length, "Wrong number of attributes in the string");
+    String[] tokens = string.split("[, ]");
+    Preconditions.checkArgument(tokens.length == attrs.length, "Wrong number of attributes in the string");
 
     // extract tokens and check is there is any missing value
-    String[] tokens = new String[attrs.length];
     for (int attr = 0; attr < attrs.length; attr++) {
-      String token = tokenizer.nextToken();
-      
       if (attrs[attr].isIgnored()) {
         continue;
       }
-      
-      if ("?".equals(token)) {
+      if ("?".equals(tokens[attr])) {
         return null; // missing value
       }
-      
-      tokens[attr] = token;
     }
     
     int nbattrs = Dataset.countAttributes(attrs);
@@ -249,22 +242,5 @@ public final class DataLoader {
     
     return new Dataset(attrs, values, id);
   }
-  
-  /**
-   * constructs the data
-   * 
-   * @param attrs
-   *          attributes description
-   * @param vectors
-   *          data elements
-   * @param values
-   *          used to convert CATEGORICAL attributes to Integer
-   */
-  /*
-  private static Data constructData(Attribute[] attrs, List<Instance> vectors, List<String>[] values) {
-    Dataset dataset = new Dataset(attrs, values, vectors.size());
-    
-    return new Data(dataset, vectors);
-  }
-   */
+
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DescriptorUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DescriptorUtils.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DescriptorUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DescriptorUtils.java Mon Jun 27 12:42:21 2011
@@ -17,10 +17,12 @@
 
 package org.apache.mahout.df.data;
 
-import java.util.Collection;
+import com.google.common.base.Splitter;
+
+import java.util.List;
 import java.util.Locale;
-import java.util.StringTokenizer;
 
+import com.google.common.base.Splitter;
 import com.google.common.collect.Lists;
 import org.apache.mahout.df.data.Dataset.Attribute;
 
@@ -28,6 +30,9 @@ import org.apache.mahout.df.data.Dataset
  * Contains various methods that deal with descriptor strings
  */
 public final class DescriptorUtils {
+
+  private static final Splitter SPACE = Splitter.on(' ').omitEmptyStrings();
+
   private DescriptorUtils() { }
   
   /**
@@ -36,26 +41,23 @@ public final class DescriptorUtils {
    * @throws DescriptorException
    *           if a bad token is encountered
    */
-  public static Attribute[] parseDescriptor(String descriptor) throws DescriptorException {
-    StringTokenizer tokenizer = new StringTokenizer(descriptor);
-    Attribute[] attributes = new Attribute[tokenizer.countTokens()];
-    
-    for (int attr = 0; attr < attributes.length; attr++) {
-      String token = tokenizer.nextToken().toUpperCase(Locale.ENGLISH);
+  public static Attribute[] parseDescriptor(CharSequence descriptor) throws DescriptorException {
+    List<Attribute> attributes = Lists.newArrayList();
+    for (String token : SPACE.split(descriptor)) {
+      token = token.toUpperCase(Locale.ENGLISH);
       if ("I".equals(token)) {
-        attributes[attr] = Attribute.IGNORED;
+        attributes.add(Attribute.IGNORED);
       } else if ("N".equals(token)) {
-        attributes[attr] = Attribute.NUMERICAL;
+        attributes.add(Attribute.NUMERICAL);
       } else if ("C".equals(token)) {
-        attributes[attr] = Attribute.CATEGORICAL;
+        attributes.add(Attribute.CATEGORICAL);
       } else if ("L".equals(token)) {
-        attributes[attr] = Attribute.LABEL;
+        attributes.add(Attribute.LABEL);
       } else {
         throw new DescriptorException("Bad Token : " + token);
       }
     }
-    
-    return attributes;
+    return attributes.toArray(new Attribute[attributes.size()]);
   }
   
   /**
@@ -63,15 +65,8 @@ public final class DescriptorUtils {
    * for example "3 N I N N 2 C L 5 I" generates "N N N I N N C C L I I I I I".<br>
    * this useful when describing datasets with a large number of attributes
    */
-  public static String generateDescriptor(String description) throws DescriptorException {
-    StringTokenizer tokenizer = new StringTokenizer(description, " ");
-    Collection<String> tokens = Lists.newArrayList();
-    
-    while (tokenizer.hasMoreTokens()) {
-      tokens.add(tokenizer.nextToken());
-    }
-    
-    return generateDescriptor(tokens);
+  public static String generateDescriptor(CharSequence description) throws DescriptorException {
+    return generateDescriptor(SPACE.split(description));
   }
   
   /**

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java Mon Jun 27 12:42:21 2011
@@ -199,9 +199,6 @@ public final class MahoutDriver {
         Properties properties = new Properties();
         properties.load(propsStream);
         return properties;
-      } catch (IOException ioe) {
-        log.warn("Error while loading {}", resource, ioe);
-        // Continue
       } finally {
         Closeables.closeQuietly(propsStream);
       }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/graph/common/SimplifyGraphJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/graph/common/SimplifyGraphJob.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/graph/common/SimplifyGraphJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/graph/common/SimplifyGraphJob.java Mon Jun 27 12:42:21 2011
@@ -18,7 +18,6 @@
 package org.apache.mahout.graph.common;
 
 import java.io.IOException;
-import java.util.Map;
 import java.util.regex.Pattern;
 
 import org.apache.hadoop.fs.Path;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/graph/model/Triangle.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/graph/model/Triangle.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/graph/model/Triangle.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/graph/model/Triangle.java Mon Jun 27 12:42:21 2011
@@ -92,6 +92,6 @@ public class Triangle implements Writabl
 
   @Override
   public String toString() {
-    return "(" + first.getId() + "," + second.getId() + "," + third.getId() + ")";
+    return "(" + first.getId() + ',' + second.getId() + ',' + third.getId() + ')';
   }
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/graph/model/UndirectedEdge.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/graph/model/UndirectedEdge.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/graph/model/UndirectedEdge.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/graph/model/UndirectedEdge.java Mon Jun 27 12:42:21 2011
@@ -87,7 +87,7 @@ public class UndirectedEdge implements W
 
   @Override
   public String toString() {
-    return "(" + first.getId() + "," + second.getId() + ")";
+    return "(" + first.getId() + ',' + second.getId() + ')';
   }
 
   @Override

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/graph/model/UndirectedEdgeWithDegrees.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/graph/model/UndirectedEdgeWithDegrees.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/graph/model/UndirectedEdgeWithDegrees.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/graph/model/UndirectedEdgeWithDegrees.java Mon Jun 27 12:42:21 2011
@@ -87,7 +87,7 @@ public class UndirectedEdgeWithDegrees i
 
   @Override
   public String toString() {
-    return "(" + first + ", " + second + ")";
+    return "(" + first + ", " + second + ')';
   }
 
   @Override

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/graph/model/Vertex.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/graph/model/Vertex.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/graph/model/Vertex.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/graph/model/Vertex.java Mon Jun 27 12:42:21 2011
@@ -55,13 +55,13 @@ public class Vertex implements WritableC
     return this.id;
   }
 
-  /** Compares this instance to another according to the <code>id</code> attribute. */
+  /** Compares this instance to another according to the {@code id} attribute. */
   @Override
   public int compareTo(Vertex other) {
     return Longs.compare(id, other.id);
   }
 
-  /** Compares this instance to another according to the <code>id</code> attribute */
+  /** Compares this instance to another according to the {@code id} attribute */
   @Override
   public boolean equals(Object other) {
     if (other instanceof Vertex) {
@@ -76,7 +76,7 @@ public class Vertex implements WritableC
   }
 
   /**
-   * The hash code the <code>id</code> attribute
+   * The hash code the {@code id} attribute
    */
   @Override
   public int hashCode() {
@@ -85,7 +85,7 @@ public class Vertex implements WritableC
   
   @Override
   public String toString() {
-    return "(" + id + ")";
+    return "(" + id + ')';
   }
 
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/graph/model/VertexWithDegree.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/graph/model/VertexWithDegree.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/graph/model/VertexWithDegree.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/graph/model/VertexWithDegree.java Mon Jun 27 12:42:21 2011
@@ -81,6 +81,6 @@ public class VertexWithDegree implements
 
   @Override
   public String toString() {
-    return "(" + vertex + "," + degree + ")";
+    return "(" + vertex + ',' + degree + ')';
   }
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/graph/package-info.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/graph/package-info.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/graph/package-info.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/graph/package-info.java Mon Jun 27 12:42:21 2011
@@ -1,9 +1,9 @@
 /**
  * This package brings graph algorithms to<em>Mahout</em>.
  * <ol>
- * 	<li><code>org.apache.mahout.graph.common</code> contains chainable tools to prepare arbitrary graphs</li>
- * 	<li><code>org.apache.mahout.graph.model</code> models several graph elements</li>
- * 	<li><code>org.apache.mahout.graph.triangles</code> contains triangle enumeration code</li>
+ * 	<li>{@code org.apache.mahout.graph.common} contains chainable tools to prepare arbitrary graphs</li>
+ * 	<li>{@code org.apache.mahout.graph.model} models several graph elements</li>
+ * 	<li>{@code org.apache.mahout.graph.triangles} contains triangle enumeration code</li>
  * </ol>
  */
 package org.apache.mahout.graph;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/graph/triangles/JoinableUndirectedEdge.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/graph/triangles/JoinableUndirectedEdge.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/graph/triangles/JoinableUndirectedEdge.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/graph/triangles/JoinableUndirectedEdge.java Mon Jun 27 12:42:21 2011
@@ -88,7 +88,7 @@ public class JoinableUndirectedEdge impl
 
   @Override
   public String toString() {
-    return "(" + edge + "," +marked + ")";
+    return "(" + edge + ',' + marked + ')';
   }
 
   public static class SecondarySortComparator extends WritableComparator implements Serializable {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/HdfsBackedLanczosState.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/HdfsBackedLanczosState.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/HdfsBackedLanczosState.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/HdfsBackedLanczosState.java Mon Jun 27 12:42:21 2011
@@ -237,13 +237,4 @@ public class HdfsBackedLanczosState exte
     return diagonalMatrix;
   }
 
-  @Override
-  public void setBasisVector(int i, Vector vector) {
-    super.setBasisVector(i, vector);
-  }
-
-  @Override
-  public void setRightSingularVector(int i, Vector vector) {
-    super.setRightSingularVector(i, vector);
-  }
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/Omega.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/Omega.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/Omega.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/Omega.java Mon Jun 27 12:42:21 2011
@@ -45,9 +45,7 @@ public class Omega {
    */
   public double getQuick(int row, int column) {
     long hash = murmur64((long) row << Integer.SIZE | column, 8, seed);
-    double result = hash / UNIFORM_DIVISOR;
-    //assert result >= -1.0 && result < 1.0;
-    return result;
+    return hash / UNIFORM_DIVISOR;
   }
 
   public void accumDots(int aIndex, double aElement, double[] yRow) {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/FeatureVectorEncoder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/FeatureVectorEncoder.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/FeatureVectorEncoder.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/FeatureVectorEncoder.java Mon Jun 27 12:42:21 2011
@@ -148,7 +148,7 @@ public abstract class FeatureVectorEncod
    * @return An integer in the range [0..numFeatures-1] that has good spread for small changes in
    *         term and probe.
    */
-  protected int hash(byte[] term, int probe, int numFeatures) {
+  protected static int hash(byte[] term, int probe, int numFeatures) {
     long r = MurmurHash.hash64A(term, probe) % numFeatures;
     if (r < 0) {
       r += numFeatures;
@@ -166,7 +166,7 @@ public abstract class FeatureVectorEncod
    * @return An integer in the range [0..numFeatures-1] that has good spread for small changes in
    *         term and probe.
    */
-  protected int hash(String term1, String term2, int probe, int numFeatures) {
+  protected static int hash(String term1, String term2, int probe, int numFeatures) {
     long r = MurmurHash.hash64A(bytesForString(term1), probe);
     r = MurmurHash.hash64A(bytesForString(term2), (int) r) % numFeatures;
     if (r < 0) {
@@ -272,11 +272,7 @@ public abstract class FeatureVectorEncod
     this.traceDictionary = traceDictionary;
   }
 
-  protected byte[] bytesForString(String x) {
-    if (x != null) {
-      return x.getBytes(Charsets.UTF_8);
-    } else {
-      return EMPTY_ARRAY;
-    }
+  protected static byte[] bytesForString(String x) {
+    return x == null ? EMPTY_ARRAY : x.getBytes(Charsets.UTF_8);
   }
 }

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DescriptorUtilsTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DescriptorUtilsTest.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DescriptorUtilsTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DescriptorUtilsTest.java Mon Jun 27 12:42:21 2011
@@ -28,7 +28,7 @@ public final class DescriptorUtilsTest e
 
   /**
    * Test method for
-   * {@link org.apache.mahout.df.data.DescriptorUtils#parseDescriptor(java.lang.String)}.
+   * {@link org.apache.mahout.df.data.DescriptorUtils#parseDescriptor(java.lang.CharSequence)}.
    */
   @Test
   public void testParseDescriptor() throws Exception {
@@ -85,7 +85,7 @@ public final class DescriptorUtilsTest e
     }
   }
   
-  private static void validate(String descriptor, String description) throws DescriptorException {
+  private static void validate(String descriptor, CharSequence description) throws DescriptorException {
     assertEquals(descriptor, DescriptorUtils.generateDescriptor(description));
   }
 

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/graph/common/DegreeDistributionJobTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/graph/common/DegreeDistributionJobTest.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/graph/common/DegreeDistributionJobTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/graph/common/DegreeDistributionJobTest.java Mon Jun 27 12:42:21 2011
@@ -36,6 +36,8 @@ import java.util.Map;
 
 public class DegreeDistributionJobTest extends MahoutTestCase {
 
+  private static final Splitter TAB = Splitter.on('\t');
+
   @Test
   public void toyIntegrationTest() throws Exception {
 
@@ -73,8 +75,8 @@ public class DegreeDistributionJobTest e
         "--output", outputDir.getAbsolutePath(), "--tempDir", tempDir.getAbsolutePath() });
 
     Map<Integer,Integer> degreeDistribution = Maps.newHashMap();
-    for (String line : new FileLineIterable(new File(outputDir, "part-r-00000"))) {
-      String[] tokens = Iterables.toArray(Splitter.on("\t").split(line), String.class);
+    for (CharSequence line : new FileLineIterable(new File(outputDir, "part-r-00000"))) {
+      String[] tokens = Iterables.toArray(TAB.split(line), String.class);
       degreeDistribution.put(Integer.parseInt(tokens[0]), Integer.parseInt(tokens[1]));
     }
 

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/graph/common/LocalClusteringCoefficientJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/graph/common/LocalClusteringCoefficientJob.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/graph/common/LocalClusteringCoefficientJob.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/graph/common/LocalClusteringCoefficientJob.java Mon Jun 27 12:42:21 2011
@@ -150,7 +150,7 @@ public class LocalClusteringCoefficientJ
       }
 
       double localClusteringCoefficient = numEdges > 1 ?
-          (double) numTriangles / (double) (numEdges * (numEdges - 1)) : 0d;
+          (double) numTriangles / (double) (numEdges * (numEdges - 1)) : 0.0;
 
       ctx.write(new LongWritable(vertex.getId()), new DoubleWritable(localClusteringCoefficient));
     }

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/graph/common/LocalClusteringCoefficientJobTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/graph/common/LocalClusteringCoefficientJobTest.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/graph/common/LocalClusteringCoefficientJobTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/graph/common/LocalClusteringCoefficientJobTest.java Mon Jun 27 12:42:21 2011
@@ -37,70 +37,72 @@ import java.util.Map;
 
 public class LocalClusteringCoefficientJobTest extends MahoutTestCase {
 
-    @Test
-    public void toyIntegrationTest() throws Exception {
+  private static final Splitter TAB = Splitter.on('\t');
 
-      File edgesFile = getTestTempFile("edges.seq");
-      File trianglesFile = getTestTempFile("triangles.seq");
-      File outputDir = getTestTempDir("output");
-      outputDir.delete();
-      File tempDir = getTestTempDir("tmp");
-
-      Configuration conf = new Configuration();
-      FileSystem fs = FileSystem.get(conf);
-
-      SequenceFile.Writer edgesWriter = new SequenceFile.Writer(fs, conf, new Path(edgesFile.getAbsolutePath()),
-          UndirectedEdge.class, NullWritable.class);
-      try {
-        edgesWriter.append(new UndirectedEdge(0, 1), NullWritable.get());
-        edgesWriter.append(new UndirectedEdge(0, 2), NullWritable.get());
-        edgesWriter.append(new UndirectedEdge(0, 3), NullWritable.get());
-        edgesWriter.append(new UndirectedEdge(0, 4), NullWritable.get());
-        edgesWriter.append(new UndirectedEdge(0, 5), NullWritable.get());
-        edgesWriter.append(new UndirectedEdge(0, 6), NullWritable.get());
-        edgesWriter.append(new UndirectedEdge(0, 7), NullWritable.get());
-        edgesWriter.append(new UndirectedEdge(1, 2), NullWritable.get());
-        edgesWriter.append(new UndirectedEdge(1, 3), NullWritable.get());
-        edgesWriter.append(new UndirectedEdge(2, 3), NullWritable.get());
-        edgesWriter.append(new UndirectedEdge(4, 5), NullWritable.get());
-        edgesWriter.append(new UndirectedEdge(4, 7), NullWritable.get());
-      } finally {
-        Closeables.closeQuietly(edgesWriter);
-      }
-
-      SequenceFile.Writer trianglesWriter = new SequenceFile.Writer(fs, conf, new Path(trianglesFile.getAbsolutePath()),
-          Triangle.class, NullWritable.class);
-      try {
-        trianglesWriter.append(new Triangle(0, 1, 2), NullWritable.get());
-        trianglesWriter.append(new Triangle(0, 1, 3), NullWritable.get());
-        trianglesWriter.append(new Triangle(0, 2, 3), NullWritable.get());
-        trianglesWriter.append(new Triangle(0, 4, 5), NullWritable.get());
-        trianglesWriter.append(new Triangle(0, 4, 7), NullWritable.get());
-        trianglesWriter.append(new Triangle(1, 2, 3), NullWritable.get());
-      } finally {
-        Closeables.closeQuietly(trianglesWriter);
-      }
-
-      LocalClusteringCoefficientJob clusteringCoefficientJob = new LocalClusteringCoefficientJob();
-      clusteringCoefficientJob.setConf(conf);
-      clusteringCoefficientJob.run(new String[] { "--edges", edgesFile.getAbsolutePath(),
-          "--triangles", trianglesFile.getAbsolutePath(), "--output", outputDir.getAbsolutePath(),
-          "--tempDir", tempDir.getAbsolutePath() });
-
-      Map<Long,Double> localClusteringCoefficients = Maps.newHashMap();
-      for (String line : new FileLineIterable(new File(outputDir, "part-r-00000"))) {
-        String[] tokens = Iterables.toArray(Splitter.on("\t").split(line), String.class);
-        localClusteringCoefficients.put(Long.parseLong(tokens[0]), Double.parseDouble(tokens[1]));
-      }
-
-      assertEquals(8, localClusteringCoefficients.size());
-      assertEquals(0.119047, localClusteringCoefficients.get(0L), EPSILON);
-      assertEquals(0.5, localClusteringCoefficients.get(1L), EPSILON);
-      assertEquals(0.5, localClusteringCoefficients.get(2L), EPSILON);
-      assertEquals(0.5, localClusteringCoefficients.get(3L), EPSILON);
-      assertEquals(0.333333, localClusteringCoefficients.get(4L), EPSILON);
-      assertEquals(0.5, localClusteringCoefficients.get(5L), EPSILON);
-      assertEquals(0, localClusteringCoefficients.get(6L), EPSILON);
-      assertEquals(0.5, localClusteringCoefficients.get(7L), EPSILON);
+  @Test
+  public void toyIntegrationTest() throws Exception {
+
+    File edgesFile = getTestTempFile("edges.seq");
+    File trianglesFile = getTestTempFile("triangles.seq");
+    File outputDir = getTestTempDir("output");
+    outputDir.delete();
+    File tempDir = getTestTempDir("tmp");
+
+    Configuration conf = new Configuration();
+    FileSystem fs = FileSystem.get(conf);
+
+    SequenceFile.Writer edgesWriter = new SequenceFile.Writer(fs, conf, new Path(edgesFile.getAbsolutePath()),
+        UndirectedEdge.class, NullWritable.class);
+    try {
+      edgesWriter.append(new UndirectedEdge(0, 1), NullWritable.get());
+      edgesWriter.append(new UndirectedEdge(0, 2), NullWritable.get());
+      edgesWriter.append(new UndirectedEdge(0, 3), NullWritable.get());
+      edgesWriter.append(new UndirectedEdge(0, 4), NullWritable.get());
+      edgesWriter.append(new UndirectedEdge(0, 5), NullWritable.get());
+      edgesWriter.append(new UndirectedEdge(0, 6), NullWritable.get());
+      edgesWriter.append(new UndirectedEdge(0, 7), NullWritable.get());
+      edgesWriter.append(new UndirectedEdge(1, 2), NullWritable.get());
+      edgesWriter.append(new UndirectedEdge(1, 3), NullWritable.get());
+      edgesWriter.append(new UndirectedEdge(2, 3), NullWritable.get());
+      edgesWriter.append(new UndirectedEdge(4, 5), NullWritable.get());
+      edgesWriter.append(new UndirectedEdge(4, 7), NullWritable.get());
+    } finally {
+      Closeables.closeQuietly(edgesWriter);
+    }
+
+    SequenceFile.Writer trianglesWriter = new SequenceFile.Writer(fs, conf, new Path(trianglesFile.getAbsolutePath()),
+        Triangle.class, NullWritable.class);
+    try {
+      trianglesWriter.append(new Triangle(0, 1, 2), NullWritable.get());
+      trianglesWriter.append(new Triangle(0, 1, 3), NullWritable.get());
+      trianglesWriter.append(new Triangle(0, 2, 3), NullWritable.get());
+      trianglesWriter.append(new Triangle(0, 4, 5), NullWritable.get());
+      trianglesWriter.append(new Triangle(0, 4, 7), NullWritable.get());
+      trianglesWriter.append(new Triangle(1, 2, 3), NullWritable.get());
+    } finally {
+      Closeables.closeQuietly(trianglesWriter);
     }
+
+    LocalClusteringCoefficientJob clusteringCoefficientJob = new LocalClusteringCoefficientJob();
+    clusteringCoefficientJob.setConf(conf);
+    clusteringCoefficientJob.run(new String[] { "--edges", edgesFile.getAbsolutePath(),
+        "--triangles", trianglesFile.getAbsolutePath(), "--output", outputDir.getAbsolutePath(),
+        "--tempDir", tempDir.getAbsolutePath() });
+
+    Map<Long,Double> localClusteringCoefficients = Maps.newHashMap();
+    for (CharSequence line : new FileLineIterable(new File(outputDir, "part-r-00000"))) {
+      String[] tokens = Iterables.toArray(TAB.split(line), String.class);
+      localClusteringCoefficients.put(Long.parseLong(tokens[0]), Double.parseDouble(tokens[1]));
+    }
+
+    assertEquals(8, localClusteringCoefficients.size());
+    assertEquals(0.119047, localClusteringCoefficients.get(0L), EPSILON);
+    assertEquals(0.5, localClusteringCoefficients.get(1L), EPSILON);
+    assertEquals(0.5, localClusteringCoefficients.get(2L), EPSILON);
+    assertEquals(0.5, localClusteringCoefficients.get(3L), EPSILON);
+    assertEquals(0.333333, localClusteringCoefficients.get(4L), EPSILON);
+    assertEquals(0.5, localClusteringCoefficients.get(5L), EPSILON);
+    assertEquals(0.0, localClusteringCoefficients.get(6L), EPSILON);
+    assertEquals(0.5, localClusteringCoefficients.get(7L), EPSILON);
+  }
 }

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/analysis/WikipediaAnalyzer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/analysis/WikipediaAnalyzer.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/analysis/WikipediaAnalyzer.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/analysis/WikipediaAnalyzer.java Mon Jun 27 12:42:21 2011
@@ -18,8 +18,8 @@
 package org.apache.mahout.analysis;
 
 import java.io.Reader;
+import java.util.Set;
 
-import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.StopAnalyzer;
 import org.apache.lucene.analysis.StopFilter;
@@ -37,7 +37,7 @@ public class WikipediaAnalyzer extends S
     super(Version.LUCENE_31, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
   }
   
-  public WikipediaAnalyzer(CharArraySet stopSet) {
+  public WikipediaAnalyzer(Set<?> stopSet) {
     super(Version.LUCENE_31, stopSet);
   }
   

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/svd/DataModelFactorizablePreferences.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/svd/DataModelFactorizablePreferences.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/svd/DataModelFactorizablePreferences.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/svd/DataModelFactorizablePreferences.java Mon Jun 27 12:42:21 2011
@@ -18,6 +18,7 @@
 package org.apache.mahout.cf.taste.example.kddcup.track1.svd;
 
 import com.google.common.collect.Lists;
+import org.apache.mahout.cf.taste.common.TasteException;
 import org.apache.mahout.cf.taste.impl.common.FastIDSet;
 import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
 import org.apache.mahout.cf.taste.impl.model.GenericPreference;
@@ -58,8 +59,8 @@ public class DataModelFactorizablePrefer
           preferences.add(new GenericPreference(userID, preference.getItemID(), preference.getValue()));
         }
       }
-    } catch (Exception e) {
-      throw new IllegalStateException("Unable to create factorizable preferences!", e);
+    } catch (TasteException te) {
+      throw new IllegalStateException("Unable to create factorizable preferences!", te);
     }
   }
 

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/svd/Track1SVDRunner.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/svd/Track1SVDRunner.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/svd/Track1SVDRunner.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/svd/Track1SVDRunner.java Mon Jun 27 12:42:21 2011
@@ -84,8 +84,8 @@ public class Track1SVDRunner {
     log.info("Estimating validation preferences...");
     int prefsProcessed = 0;
     RunningAverage average = new FullRunningAverage();
-    DataFileIterable validations = new DataFileIterable(KDDCupDataModel.getValidationFile(dataFileDirectory));
-    for (Pair<PreferenceArray,long[]> validationPair : validations) {
+    for (Pair<PreferenceArray,long[]> validationPair :
+         new DataFileIterable(KDDCupDataModel.getValidationFile(dataFileDirectory))) {
       for (Preference validationPref : validationPair.getFirst()) {
         double estimate = estimatePreference(factorization, validationPref.getUserID(), validationPref.getItemID(),
             factorizablePreferences.getMinPreference(), factorizablePreferences.getMaxPreference());
@@ -107,8 +107,8 @@ public class Track1SVDRunner {
     try {
       out = new BufferedOutputStream(new FileOutputStream(resultFile));
 
-      DataFileIterable tests = new DataFileIterable(KDDCupDataModel.getTestFile(dataFileDirectory));
-      for (Pair<PreferenceArray,long[]> testPair : tests) {
+      for (Pair<PreferenceArray,long[]> testPair :
+           new DataFileIterable(KDDCupDataModel.getTestFile(dataFileDirectory))) {
         for (Preference testPref : testPair.getFirst()) {
           double estimate = estimatePreference(factorization, testPref.getUserID(), testPref.getItemID(),
               factorizablePreferences.getMinPreference(), factorizablePreferences.getMaxPreference());

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/minhash/LastfmDataConverter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/minhash/LastfmDataConverter.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/minhash/LastfmDataConverter.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/minhash/LastfmDataConverter.java Mon Jun 27 12:42:21 2011
@@ -56,6 +56,9 @@ public final class LastfmDataConverter {
     Lastfm(int totalRecords) {
       this.totalRecords = totalRecords;
     }
+    int getTotalRecords() {
+      return totalRecords;
+    }
   }
 
   private LastfmDataConverter() {
@@ -95,7 +98,7 @@ public final class LastfmDataConverter {
    *          Type of dataset - 360K Users or 1K Users
    */
   public static Map<String, List<Integer>> convertToItemFeatures(String inputFile, Lastfm dataSet) throws IOException {
-    long totalRecords = dataSet.totalRecords;
+    long totalRecords = dataSet.getTotalRecords();
     Map<String, Integer> featureIdxMap = Maps.newHashMap();
     Map<String, List<Integer>> itemFeaturesMap = Maps.newHashMap();
     String msg = usedMemory() + "Converting data to internal vector format: ";

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/DataLine.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/DataLine.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/DataLine.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/DataLine.java Mon Jun 27 12:42:21 2011
@@ -19,14 +19,17 @@ package org.apache.mahout.ga.watchmaker.
 
 import com.google.common.collect.Lists;
 
+import java.util.Collections;
 import java.util.List;
-import java.util.StringTokenizer;
+import java.util.regex.Pattern;
 
 /**
  * Represents one line of a Dataset. Contains only real attributs.
  */
 public class DataLine {
-  
+
+  private static final Pattern COMMA = Pattern.compile(",");
+
   private final double[] attributes;
   
   public DataLine() {
@@ -34,7 +37,7 @@ public class DataLine {
     attributes = new double[nba];
   }
   
-  public DataLine(String dl) {
+  public DataLine(CharSequence dl) {
     this();
     set(dl);
   }
@@ -48,16 +51,13 @@ public class DataLine {
     return attributes[index];
   }
   
-  public void set(String dlstr) {
+  public void set(CharSequence dlstr) {
     DataSet dataset = DataSet.getDataSet();
     
     // extract tokens
-    StringTokenizer tokenizer = new StringTokenizer(dlstr, ",");
     List<String> tokens = Lists.newArrayList();
-    while (tokenizer.hasMoreTokens()) {
-      tokens.add(tokenizer.nextToken());
-    }
-    
+    Collections.addAll(tokens, COMMA.split(dlstr));
+
     // remove any ignored attribute
     List<Integer> ignored = dataset.getIgnoredAttributes();
     for (int index = ignored.size() - 1; index >= 0; index--) {

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMapper.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMapper.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMapper.java Mon Jun 27 12:42:21 2011
@@ -42,9 +42,16 @@ public class CDMapper extends Mapper<Lon
 
   public static final String CLASSDISCOVERY_TARGET_LABEL = "mahout.ga.classdiscovery.target";
 
-  List<Rule> rules;
+  private List<Rule> rules;
+  private int target;
 
-  int target;
+  List<Rule> getRules() {
+    return rules;
+  }
+
+  int getTarget() {
+    return target;
+  }
 
   @Override
   protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/DescriptionUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/DescriptionUtils.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/DescriptionUtils.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/DescriptionUtils.java Mon Jun 27 12:42:21 2011
@@ -18,13 +18,16 @@
 package org.apache.mahout.ga.watchmaker.cd.tool;
 
 import java.util.Collection;
-import java.util.StringTokenizer;
+import java.util.Collections;
+import java.util.regex.Pattern;
 
 /**
  * Utility functions to handle Attribute's description strings.
  */
 public final class DescriptionUtils {
 
+  private static final Pattern COMMA = Pattern.compile(",");
+
   private DescriptionUtils() {
   }
 
@@ -57,10 +60,10 @@ public final class DescriptionUtils {
     return buffer.toString();
   }
   
-  public static double[] extractNumericalRange(String description) {
-    StringTokenizer tokenizer = new StringTokenizer(description, ",");
-    double min = Double.parseDouble(tokenizer.nextToken());
-    double max = Double.parseDouble(tokenizer.nextToken());
+  public static double[] extractNumericalRange(CharSequence description) {
+    String[] tokens = COMMA.split(description);
+    double min = Double.parseDouble(tokens[0]);
+    double max = Double.parseDouble(tokens[1]);
     return new double[] {min,max};
   }
   /**
@@ -70,12 +73,9 @@ public final class DescriptionUtils {
    * @param target the extracted values will be added to this collection. It
    *        will not be cleared.
    */
-  public static void extractNominalValues(String description,
+  public static void extractNominalValues(CharSequence description,
                                           Collection<String> target) {
-    StringTokenizer tokenizer = new StringTokenizer(description, ",");
-    while (tokenizer.hasMoreTokens()) {
-      target.add(tokenizer.nextToken());
-    }
+    Collections.addAll(target, COMMA.split(description));
   }
   
 }

Modified: mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMapperTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMapperTest.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMapperTest.java (original)
+++ mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMapperTest.java Mon Jun 27 12:42:21 2011
@@ -85,8 +85,8 @@ public final class CDMapperTest extends 
 
     // test the mapper
     DummyOutputCollector<LongWritable, CDFitness> collector = new DummyOutputCollector<LongWritable, CDFitness>();
-    for (int index1 = 0; index1 < mapper.rules.size(); index1++) {
-      CDFitness eval1 = CDMapper.evaluate(mapper.target, mapper.rules.get(index1).classify(dl), dl.getLabel());
+    for (int index1 = 0; index1 < mapper.getRules().size(); index1++) {
+      CDFitness eval1 = CDMapper.evaluate(mapper.getTarget(), mapper.getRules().get(index1).classify(dl), dl.getLabel());
       collector.collect(new LongWritable(index1), eval1);
     }
 

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/ConnectionPoolDataSource.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/ConnectionPoolDataSource.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/ConnectionPoolDataSource.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/ConnectionPoolDataSource.java Mon Jun 27 12:42:21 2011
@@ -27,7 +27,6 @@ import javax.sql.DataSource;
 import org.apache.commons.dbcp.ConnectionFactory;
 import org.apache.commons.dbcp.PoolableConnectionFactory;
 import org.apache.commons.dbcp.PoolingDataSource;
-import org.apache.commons.pool.PoolableObjectFactory;
 import org.apache.commons.pool.impl.GenericObjectPool;
 
 import com.google.common.base.Preconditions;
@@ -49,9 +48,8 @@ public final class ConnectionPoolDataSou
     objectPool.setTestOnReturn(false);
     objectPool.setTestWhileIdle(true);
     objectPool.setTimeBetweenEvictionRunsMillis(60 * 1000L);
-    PoolableObjectFactory factory = new PoolableConnectionFactory(connectionFactory, objectPool, null,
-      "SELECT 1", false, false);
-    objectPool.setFactory(factory);
+    // Constructor actually sets itself as factory on pool
+    new PoolableConnectionFactory(connectionFactory, objectPool, null, "SELECT 1", false, false);
     delegate = new PoolingDataSource(objectPool);
   }
   

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/ChunkedWriter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/ChunkedWriter.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/ChunkedWriter.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/ChunkedWriter.java Mon Jun 27 12:42:21 2011
@@ -66,7 +66,7 @@ public final class ChunkedWriter impleme
   }
 
   @Override
-  public void close() throws IOException {
+  public void close() {
     Closeables.closeQuietly(writer);
   }
 }

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java Mon Jun 27 12:42:21 2011
@@ -19,6 +19,7 @@ package org.apache.mahout.text;
 import java.io.IOException;
 import java.io.Reader;
 import java.util.Arrays;
+import java.util.Set;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import org.apache.lucene.analysis.ASCIIFoldingFilter;
@@ -46,7 +47,7 @@ public final class MailArchivesClusterin
   // extended set of stop words composed of common mail terms like "hi",
   // HTML tags, and Java keywords asmany of the messages in the archives
   // are subversion check-in notifications
-  private static final CharArraySet STOP_WORDS = new CharArraySet(Version.LUCENE_31, Arrays.asList(
+  private static final Set<?> STOP_WORDS = new CharArraySet(Version.LUCENE_31, Arrays.asList(
     "3d","7bit","a0","about","above","abstract","across","additional","after",
     "afterwards","again","against","align","all","almost","alone","along",
     "already","also","although","always","am","among","amongst","amoungst",
@@ -108,7 +109,7 @@ public final class MailArchivesClusterin
     super(Version.LUCENE_31, STOP_WORDS);
   }
 
-  public MailArchivesClusteringAnalyzer(CharArraySet stopSet) {
+  public MailArchivesClusteringAnalyzer(Set<?> stopSet) {
     super(Version.LUCENE_31, stopSet);
   }
   

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/PrefixAdditionFilter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/PrefixAdditionFilter.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/PrefixAdditionFilter.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/PrefixAdditionFilter.java Mon Jun 27 12:42:21 2011
@@ -22,7 +22,6 @@ import org.apache.hadoop.conf.Configurat
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.IOUtils;
 import org.apache.mahout.common.iterator.FileLineIterable;
 
 import java.io.IOException;

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromDirectory.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromDirectory.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromDirectory.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromDirectory.java Mon Jun 27 12:42:21 2011
@@ -49,11 +49,11 @@ public class SequenceFilesFromDirectory 
   public static final String[] KEY_PREFIX_OPTION = {"keyPrefix", "prefix"};
   public static final String[] CHARSET_OPTION = {"charset", "c"};
 
-  public void run(Configuration conf,
-                  String keyPrefix,
-                  Map<String, String> options,
-                  Path input,
-                  Path output)
+  public static void run(Configuration conf,
+                         String keyPrefix,
+                         Map<String, String> options,
+                         Path input,
+                         Path output)
     throws InstantiationException, IllegalAccessException, InvocationTargetException, IOException,
            NoSuchMethodException, ClassNotFoundException {
     FileSystem fs = FileSystem.get(input.toUri(), conf);

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/RandomSampler.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/RandomSampler.java?rev=1140141&r1=1140140&r2=1140141&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/RandomSampler.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/RandomSampler.java Mon Jun 27 12:42:21 2011
@@ -121,83 +121,10 @@ import java.util.Random;
  * <p><b>Implementation:</b> after J.S. Vitter, An Efficient Algorithm for Sequential Random Sampling,
  * ACM Transactions on Mathematical Software, Vol 13, 1987.
  * Paper available <A HREF="http://www.cs.duke.edu/~jsv"> here</A>.
- *
- * @see RandomSamplingAssistant
  */
+public final class RandomSampler {
 
-/** @deprecated until unit tests are in place.  Until this time, this class/interface is unsupported. */
-@Deprecated
-public class RandomSampler {
-
-  //public class RandomSampler extends Object implements java.io.Serializable {
-  private long n;
-  private long N;
-  private long low;
-  private final Random randomGenerator;
-
-  /**
-   * Constructs a random sampler that computes and delivers sorted random sets in blocks. A set block can be retrieved
-   * with method <tt>nextBlock</tt>. Successive calls to method <tt>nextBlock</tt> will deliver as many random numbers
-   * as required.
-   *
-   * @param n               the total number of elements to choose (must be <tt>n &gt;= 0</tt> and <tt>n &lt;= N</tt>).
-   * @param N               the interval to choose random numbers from is <tt>[low,low+N-1]</tt>.
-   * @param low             the interval to choose random numbers from is <tt>[low,low+N-1]</tt>. Hint: If
-   *                        <tt>low==0</tt>, then random numbers will be drawn from the interval <tt>[0,N-1]</tt>.
-   * @param randomGenerator a random number generator. Set this parameter to <tt>null</tt> to use the default random
-   *                        number generator.
-   */
-  public RandomSampler(long n, long N, long low, Random randomGenerator) {
-    if (n < 0) {
-      throw new IllegalArgumentException("n must be >= 0");
-    }
-    if (n > N) {
-      throw new IllegalArgumentException("n must by <= N");
-    }
-    this.n = n;
-    this.N = N;
-    this.low = low;
-
-    if (randomGenerator == null) {
-      randomGenerator = RandomUtils.getRandom();
-    }
-    this.randomGenerator = randomGenerator;
-  }
-
-  Random getRandomGenerator() {
-    return randomGenerator;
-  }
-
-  /**
-   * Computes the next <tt>count</tt> random numbers of the sorted random set specified on instance construction and
-   * fills them into <tt>values</tt>, starting at index <tt>fromIndex</tt>.
-   *
-   * <p>Numbers are filled into the specified array starting at index <tt>fromIndex</tt> to the right. The array is
-   * returned sorted ascending in the range filled with numbers.
-   *
-   * @param count     the number of elements to be filled into <tt>values</tt> by this call (must be &gt;= 0).
-   * @param values    the array into which the random numbers are to be filled; must have a length <tt>&gt;=
-   *                  count+fromIndex</tt>.
-   * @param fromIndex the first index within <tt>values</tt> to be filled with numbers (inclusive).
-   */
-  public void nextBlock(int count, long[] values, int fromIndex) {
-    if (count > n) {
-      throw new IllegalArgumentException("Random sample exhausted.");
-    }
-    if (count < 0) {
-      throw new IllegalArgumentException("Negative count.");
-    }
-
-    if (count == 0) {
-      return;
-    } //nothing to do
-
-    sample(n, N, count, low, values, fromIndex, randomGenerator);
-
-    long lastSample = values[fromIndex + count - 1];
-    n -= count;
-    N = N - lastSample - 1 + low;
-    low = lastSample + 1;
+  private RandomSampler() {
   }
 
   /**
@@ -220,8 +147,8 @@ public class RandomSampler {
    * @param fromIndex       the first index within <tt>values</tt> to be filled with numbers (inclusive).
    * @param randomGenerator a random number generator.
    */
-  protected static void rejectMethodD(long n, long N, int count, long low, long[] values, int fromIndex,
-                                      Random randomGenerator) {
+  private static void rejectMethodD(long n, long N, int count, long low, long[] values, int fromIndex,
+                                    Random randomGenerator) {
     /*  This algorithm is applicable if a large percentage (90%..100%) of N shall be sampled.
       In such cases it is more efficient than sampleMethodA() and sampleMethodD().
         The idea is that it is more efficient to express
@@ -385,8 +312,8 @@ public class RandomSampler {
     if (count == N) { // rare case treated quickly
       long val = low;
       int limit = fromIndex + count;
-      for (int i = fromIndex; i < limit;) {
-        values[i++] = val++;
+      for (int i = fromIndex; i < limit; i++) {
+        values[i] = val++;
       }
       return;
     }
@@ -420,8 +347,8 @@ public class RandomSampler {
    * @param fromIndex       the first index within <tt>values</tt> to be filled with numbers (inclusive).
    * @param randomGenerator a random number generator.
    */
-  protected static void sampleMethodA(long n, long N, int count, long low, long[] values, int fromIndex,
-                                      Random randomGenerator) {
+  private static void sampleMethodA(long n, long N, int count, long low, long[] values, int fromIndex,
+                                    Random randomGenerator) {
     long chosen = -1 + low;
 
     double top = N - n;
@@ -472,8 +399,8 @@ public class RandomSampler {
    * @param fromIndex       the first index within <tt>values</tt> to be filled with numbers (inclusive).
    * @param randomGenerator a random number generator.
    */
-  protected static void sampleMethodD(long n, long N, int count, long low, long[] values, int fromIndex,
-                                      Random randomGenerator) {
+  private static void sampleMethodD(long n, long N, int count, long low, long[] values, int fromIndex,
+                                    Random randomGenerator) {
     long chosen = -1 + low;
 
     double nreal = n;



Mime
View raw message