mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sro...@apache.org
Subject svn commit: r986405 [1/6] - in /mahout/trunk: buildtools/ buildtools/src/main/resources/ core/src/main/java/org/apache/mahout/cf/taste/eval/ core/src/main/java/org/apache/mahout/cf/taste/hadoop/ core/src/main/java/org/apache/mahout/cf/taste/hadoop/item...
Date Tue, 17 Aug 2010 17:34:19 GMT
Author: srowen
Date: Tue Aug 17 17:34:14 2010
New Revision: 986405

URL: http://svn.apache.org/viewvc?rev=986405&view=rev
Log:
Another massive try at removing javadoc, PMD, and checkstyle warnings. Started to remove some code in math/ that appears to be dead as it has been duplicated and improved separately.

Removed:
    mahout/trunk/math/src/main/java/org/apache/mahout/math/Timer.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/list/
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/Blas.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/CholeskyDecomposition.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/Matrix2DMatrix2DFunction.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/SeqBlas.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/SingularValueDecomposition.java
Modified:
    mahout/trunk/buildtools/pom.xml
    mahout/trunk/buildtools/src/main/resources/mahout-pmd-ruleset.xml
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderEvaluator.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/MostSimilarItemPairsMapper.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerMapper.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerMapper.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureMapper.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfMapper.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/FeatureLabelComparator.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/discriminative/WinnowTrainer.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/AbstractOnlineLogisticRegression.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/AbstractCluster.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterBase.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletMapper.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterer.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterer.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterer.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java
    mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/common/CommandLineUtil.java
    mahout/trunk/core/src/main/java/org/apache/mahout/common/TimingStatistics.java
    mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/MahalanobisDistanceMeasure.java
    mahout/trunk/core/src/main/java/org/apache/mahout/df/Bagging.java
    mahout/trunk/core/src/main/java/org/apache/mahout/df/ErrorEstimate.java
    mahout/trunk/core/src/main/java/org/apache/mahout/df/builder/DefaultTreeBuilder.java
    mahout/trunk/core/src/main/java/org/apache/mahout/df/callback/ForestPredictions.java
    mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataLoader.java
    mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataUtils.java
    mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Dataset.java
    mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DescriptorUtils.java
    mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/inmem/InMemBuilder.java
    mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/inmem/InMemInputFormat.java
    mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/inmem/InMemMapper.java
    mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/PartialBuilder.java
    mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/Step0Job.java
    mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/Step1Mapper.java
    mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/Step2Mapper.java
    mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/TreeID.java
    mahout/trunk/core/src/main/java/org/apache/mahout/df/node/Node.java
    mahout/trunk/core/src/main/java/org/apache/mahout/df/split/DefaultIgSplit.java
    mahout/trunk/core/src/main/java/org/apache/mahout/df/split/IgSplit.java
    mahout/trunk/core/src/main/java/org/apache/mahout/df/split/OptIgSplit.java
    mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowth.java
    mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthMapper.java
    mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionTree.java
    mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java
    mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/Pattern.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/RowSimilarityJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/SimilarityMatrixEntryKey.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedUncenteredZeroAssumingCosineVectorSimilarity.java
    mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/similarity/file/FileItemSimilarityTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sgd/OnlineLogisticRegressionTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/common/AbstractJobTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyReporter.java
    mahout/trunk/core/src/test/java/org/apache/mahout/common/MahoutTestCase.java
    mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DataLoaderTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DatasetTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/df/data/Utils.java
    mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/InterResultsTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/MockContext.java
    mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/PartialBuilderTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/PartialSequentialBuilder.java
    mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/PartitionBugTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/Step0JobTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/Step1MapperTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/Step2MapperTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/df/split/DefaultIgSplitTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/df/split/OptIgSplitTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/MathHelper.java
    mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/TestRowSimilarityJob.java
    mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedEuclideanDistanceVectorSimilarityTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedPearsonCorrelationVectorSimilarityTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedTanimotoCoefficientVectorSimilarityTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedUncenteredCosineVectorSimilarityTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedUncenteredZeroAssumingCosineVectorSimilarityTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedVectorSimilarityTestCase.java
    mahout/trunk/eclipse/pom.xml
    mahout/trunk/eclipse/src/main/resources/mahout-pmd-ruleset.xml
    mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/SplitBayesInput.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorMapper.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaXmlSplitter.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDRule.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/DatasetSplit.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolCombiner.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolReducer.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/text/WikipediaMapper.java
    mahout/trunk/examples/src/test/java/org/apache/mahout/classifier/bayes/SplitBayesInputTest.java
    mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosToolTest.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractMatrix.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractVector.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/Algebra.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/DenseVector.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/JsonMatrixAdapter.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/JsonVectorAdapter.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/Matrix.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/OrthonormalityVerifier.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/QRDecomposition.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/RandomAccessSparseVector.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/SingularValueDecomposition.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/SparseRowMatrix.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/VectorView.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/AsyncEigenVerifier.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/hebbian/HebbianSolver.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/lanczos/LanczosSolver.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/function/Functions.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/function/VectorFunctions.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/Descriptive.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/UnknownDoubleQuantileEstimator.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/Utils.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleFactory1D.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleFactory2D.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleFactory3D.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleMatrix1D.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleMatrix2D.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleMatrix3D.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/doublealgo/Statistic.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/doublealgo/Transform.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/DenseDoubleMatrix1D.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/DenseDoubleMatrix2D.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/RCDoubleMatrix2D.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/SparseDoubleMatrix2D.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/TridiagonalDoubleMatrix2D.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/Algebra.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/EigenvalueDecomposition.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/LUDecomposition.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/Property.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/QRDecomposition.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/LogLikelihood.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/OnlineAuc.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/OnlineSummarizer.java
    mahout/trunk/math/src/test/java/org/apache/mahout/math/AbstractTestVector.java
    mahout/trunk/math/src/test/java/org/apache/mahout/math/MahoutTestCase.java
    mahout/trunk/math/src/test/java/org/apache/mahout/math/MatrixTest.java
    mahout/trunk/math/src/test/java/org/apache/mahout/math/QRDecompositionTest.java
    mahout/trunk/math/src/test/java/org/apache/mahout/math/TestMatrixView.java
    mahout/trunk/math/src/test/java/org/apache/mahout/math/TestVectorView.java
    mahout/trunk/math/src/test/java/org/apache/mahout/math/VectorTest.java
    mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/SolverTest.java
    mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/OnlineAucTest.java
    mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/OnlineSummarizerTest.java
    mahout/trunk/maven/src/main/resources/mahout-pmd-ruleset.xml
    mahout/trunk/utils/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java
    mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java
    mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
    mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java
    mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/CachedTermInfo.java
    mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java
    mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/term/TFPartialVectorReducer.java
    mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/term/TermDocumentCountMapper.java
    mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/tfidf/TFIDFPartialVectorReducer.java
    mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/dirichlet/TestL1ModelClustering.java
    mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/VectorWriterTest.java

Modified: mahout/trunk/buildtools/pom.xml
URL: http://svn.apache.org/viewvc/mahout/trunk/buildtools/pom.xml?rev=986405&r1=986404&r2=986405&view=diff
==============================================================================
--- mahout/trunk/buildtools/pom.xml (original)
+++ mahout/trunk/buildtools/pom.xml Tue Aug 17 17:34:14 2010
@@ -30,7 +30,7 @@
     <version>6</version>
   </parent>
   
-  <name>Buildtools - jar file used to configure PMD and Checkstyle</name>
+  <name>Mahout Build Tools</name>
 
   <packaging>jar</packaging>
 

Modified: mahout/trunk/buildtools/src/main/resources/mahout-pmd-ruleset.xml
URL: http://svn.apache.org/viewvc/mahout/trunk/buildtools/src/main/resources/mahout-pmd-ruleset.xml?rev=986405&r1=986404&r2=986405&view=diff
==============================================================================
--- mahout/trunk/buildtools/src/main/resources/mahout-pmd-ruleset.xml (original)
+++ mahout/trunk/buildtools/src/main/resources/mahout-pmd-ruleset.xml Tue Aug 17 17:34:14 2010
@@ -127,7 +127,7 @@
     <!--<rule ref="rulesets/junit.xml/JUnitStaticSuite"/>-->
     <!--<rule ref="rulesets/junit.xml/JUnitSpelling"/>-->
     <!--<rule ref="rulesets/junit.xml/JUnitAssertionsShouldIncludeMessage"/>-->
-    <rule ref="rulesets/junit.xml/JUnitTestsShouldIncludeAssert"/>
+    <!--<rule ref="rulesets/junit.xml/JUnitTestsShouldIncludeAssert"/>-->
     <!--<rule ref="rulesets/junit.xml/TestClassWithoutTestCases"/>-->
     <!--<rule ref="rulesets/junit.xml/UnnecessaryBooleanAssertion"/>-->
     <!--<rule ref="rulesets/junit.xml/UseAssertEqualsInsteadOfAssertTrue"/>-->
@@ -148,7 +148,7 @@
     <rule ref="rulesets/naming.xml/VariableNamingConventions"/>
     <rule ref="rulesets/naming.xml/MethodNamingConventions"/>
     <rule ref="rulesets/naming.xml/ClassNamingConventions"/>
-    <rule ref="rulesets/naming.xml/AbstractNaming"/>
+    <!--<rule ref="rulesets/naming.xml/AbstractNaming"/>-->
     <!--<rule ref="rulesets/naming.xml/AvoidDollarSigns"/>-->
     <!--<rule ref="rulesets/naming.xml/MethodWithSameNameAsEnclosingClass"/>-->
     <!--<rule ref="rulesets/naming.xml/SuspiciousHashcodeMethodName"/>-->
@@ -179,8 +179,8 @@
     <!-- <rule ref="rulesets/strings.xml/AvoidConcatenatingNonLiteralsInStringBuffer"/>-->
     <rule ref="rulesets/strings.xml/UnnecessaryCaseChange"/>
 
-    <rule ref="rulesets/sunsecure.xml/MethodReturnsInternalArray"/>
-    <rule ref="rulesets/sunsecure.xml/ArrayIsStoredDirectly"/>
+    <!--<rule ref="rulesets/sunsecure.xml/MethodReturnsInternalArray"/>-->
+    <!--<rule ref="rulesets/sunsecure.xml/ArrayIsStoredDirectly"/>-->
 
     <rule ref="rulesets/unusedcode.xml/UnusedLocalVariable"/>
     <rule ref="rulesets/unusedcode.xml/UnusedPrivateField"/>

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderEvaluator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderEvaluator.java?rev=986405&r1=986404&r2=986405&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderEvaluator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderEvaluator.java Tue Aug 17 17:34:14 2010
@@ -62,7 +62,7 @@ public interface RecommenderEvaluator {
    * @param recommenderBuilder
    *          object that can build a {@link org.apache.mahout.cf.taste.recommender.Recommender} to test
    * @param dataModelBuilder
-   *          @param dataModelBuilder {@link DataModelBuilder} to use, or if null, a default {@link DataModel}
+   *          {@link DataModelBuilder} to use, or if null, a default {@link DataModel}
    *          implementation will be used
    * @param dataModel
    *          dataset to test on

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java?rev=986405&r1=986404&r2=986405&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java Tue Aug 17 17:34:14 2010
@@ -35,27 +35,26 @@ import java.nio.charset.Charset;
 import java.util.regex.Pattern;
 
 /**
- * some helper methods for the hadoop-related stuff in org.apache.mahout.cf.taste
+ * Some helper methods for the hadoop-related stuff in org.apache.mahout.cf.taste
  */
 public final class TasteHadoopUtils {
 
-  /** standard delimiter of textual preference data */
+  /** Standard delimiter of textual preference data */
   private static final Pattern PREFERENCE_TOKEN_DELIMITER = Pattern.compile("[\t,]");
 
   private TasteHadoopUtils() {
   }
 
   /**
-   * splits a preference data line into string tokens
-   *
-   * @param line
-   * @return
+   * Splits a preference data line into string tokens
    */
-  public static String[] splitPrefTokens(String line) {
+  public static String[] splitPrefTokens(CharSequence line) {
     return PREFERENCE_TOKEN_DELIMITER.split(line);
   }
 
-  /** a path filter used to read files written by hadoop */
+  /**
+   * A path filter used to read files written by Hadoop.
+   */
   public static final PathFilter PARTS_FILTER = new PathFilter() {
     @Override
     public boolean accept(Path path) {
@@ -64,21 +63,14 @@ public final class TasteHadoopUtils {
   };
 
   /**
-   * maps a long to an int
-   *
-   * @param id
-   * @return
+   * Maps a long to an int
    */
   public static int idToIndex(long id) {
     return 0x7FFFFFFF & ((int) id ^ (int) (id >>> 32));
   }
 
   /**
-   * reads a binary mapping file
-   * 
-   * @param itemIDIndexPathStr
-   * @param conf
-   * @return
+   * Reads a binary mapping file
    */
   public static OpenIntLongHashMap readItemIDIndexMap(String itemIDIndexPathStr, Configuration conf) {
     OpenIntLongHashMap indexItemIDMap = new OpenIntLongHashMap();
@@ -104,16 +96,11 @@ public final class TasteHadoopUtils {
   }
 
   /**
-   * reads a text-based outputfile that only contains an int
-   * 
-   * @param conf
-   * @param outputDir
-   * @return
-   * @throws IOException
+   * Reads a text-based outputfile that only contains an int
    */
   public static int readIntFromFile(Configuration conf, Path outputDir) throws IOException {
     FileSystem fs = FileSystem.get(outputDir.toUri(), conf);
-    Path outputFile = fs.listStatus(outputDir, TasteHadoopUtils.PARTS_FILTER)[0].getPath();
+    Path outputFile = fs.listStatus(outputDir, PARTS_FILTER)[0].getPath();
     InputStream in = null;
     try  {
       in = fs.open(outputFile);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java?rev=986405&r1=986404&r2=986405&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java Tue Aug 17 17:34:14 2010
@@ -33,12 +33,16 @@ import org.apache.mahout.common.FileLine
 import org.apache.mahout.math.RandomAccessSparseVector;
 import org.apache.mahout.math.VarLongWritable;
 import org.apache.mahout.math.Vector;
-import org.apache.mahout.math.Vector.Element;
 import org.apache.mahout.math.function.UnaryFunction;
 import org.apache.mahout.math.map.OpenIntLongHashMap;
 
 import java.io.IOException;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.PriorityQueue;
+import java.util.Queue;
 
 /**
  * <p>computes prediction values for each user</p>
@@ -127,7 +131,7 @@ public final class AggregateAndRecommend
           : predictionVector.plus(prefAndSimilarityColumn.getSimilarityColumn());
     }
 
-    Iterator<Element> predictions = predictionVector.iterateNonZero();
+    Iterator<Vector.Element> predictions = predictionVector.iterateNonZero();
     List<RecommendedItem> recommendations = new ArrayList<RecommendedItem>();
     while (predictions.hasNext() && recommendations.size() < recommendationsPerUser) {
       Vector.Element prediction = predictions.next();
@@ -159,7 +163,7 @@ public final class AggregateAndRecommend
       Vector simColumn = prefAndSimilarityColumn.getSimilarityColumn();
       float prefValue = prefAndSimilarityColumn.getPrefValue();
       /* count the number of items used for each prediction */
-      Iterator<Element> usedItemsIterator = simColumn.iterateNonZero();
+      Iterator<Vector.Element> usedItemsIterator = simColumn.iterateNonZero();
       while (usedItemsIterator.hasNext()) {
         int itemIDIndex = usedItemsIterator.next().index();
         numberOfSimilarItemsUsed.setQuick(itemIDIndex, numberOfSimilarItemsUsed.getQuick(itemIDIndex) + 1);
@@ -178,9 +182,9 @@ public final class AggregateAndRecommend
     }
 
     Vector recommendationVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
-    Iterator<Element> iterator = numerators.iterateNonZero();
+    Iterator<Vector.Element> iterator = numerators.iterateNonZero();
     while (iterator.hasNext()) {
-      Element element = iterator.next();
+      Vector.Element element = iterator.next();
       int itemIDIndex = element.index();
       /* preference estimations must be based on at least 2 datapoints */
       if (numberOfSimilarItemsUsed.getQuick(itemIDIndex) > 1) {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/MostSimilarItemPairsMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/MostSimilarItemPairsMapper.java?rev=986405&r1=986404&r2=986405&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/MostSimilarItemPairsMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/MostSimilarItemPairsMapper.java Tue Aug 17 17:34:14 2010
@@ -23,7 +23,7 @@ import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.mahout.cf.taste.hadoop.EntityEntityWritable;
 import org.apache.mahout.cf.taste.hadoop.TasteHadoopUtils;
-import org.apache.mahout.math.Vector.Element;
+import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
 import org.apache.mahout.math.map.OpenIntLongHashMap;
 
@@ -61,10 +61,10 @@ public final class MostSimilarItemPairsM
     Queue<SimilarItem> topMostSimilarItems = new PriorityQueue<SimilarItem>(maxSimilarItemsPerItem + 1,
         Collections.reverseOrder(SimilarItem.COMPARE_BY_SIMILARITY));
 
-    Iterator<Element> similarityVectorIterator = similarityVector.get().iterateNonZero();
+    Iterator<Vector.Element> similarityVectorIterator = similarityVector.get().iterateNonZero();
 
     while (similarityVectorIterator.hasNext()) {
-      Element element = similarityVectorIterator.next();
+      Vector.Element element = similarityVectorIterator.next();
       int index = element.index();
       double value = element.get();
       /* ignore self similarities */

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerMapper.java?rev=986405&r1=986404&r2=986405&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerMapper.java Tue Aug 17 17:34:14 2010
@@ -83,16 +83,14 @@ public class BayesThetaNormalizerMapper 
       DefaultStringifier<Map<String,Double>> mapStringifier = new DefaultStringifier<Map<String,Double>>(job,
           GenericsUtil.getClass(labelWeightSumTemp));
       
-      String labelWeightSumString = mapStringifier.toString(labelWeightSumTemp);
-      labelWeightSumString = job.get("cnaivebayes.sigma_k", labelWeightSumString);
+      String labelWeightSumString = job.get("cnaivebayes.sigma_k", mapStringifier.toString(labelWeightSumTemp));
       labelWeightSumTemp = mapStringifier.fromString(labelWeightSumString);
       for (Map.Entry<String, Double> stringDoubleEntry : labelWeightSumTemp.entrySet()) {
         this.labelWeightSum.put(stringDoubleEntry.getKey(), stringDoubleEntry.getValue());
       }
       DefaultStringifier<Double> stringifier = new DefaultStringifier<Double>(job, GenericsUtil
           .getClass(sigmaJSigmaK));
-      String sigmaJSigmaKString = stringifier.toString(sigmaJSigmaK);
-      sigmaJSigmaKString = job.get("cnaivebayes.sigma_jSigma_k", sigmaJSigmaKString);
+      String sigmaJSigmaKString = job.get("cnaivebayes.sigma_jSigma_k", stringifier.toString(sigmaJSigmaK));
       sigmaJSigmaK = stringifier.fromString(sigmaJSigmaKString);
       
       String vocabCountString = stringifier.toString(vocabCount);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerMapper.java?rev=986405&r1=986404&r2=986405&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerMapper.java Tue Aug 17 17:34:14 2010
@@ -112,8 +112,7 @@ public class CBayesThetaNormalizerMapper
       DefaultStringifier<Map<String,Double>> mapStringifier = new DefaultStringifier<Map<String,Double>>(job,
           GenericsUtil.getClass(labelWeightSumTemp));
       
-      String labelWeightSumString = mapStringifier.toString(labelWeightSumTemp);
-      labelWeightSumString = job.get("cnaivebayes.sigma_k", labelWeightSumString);
+      String labelWeightSumString = job.get("cnaivebayes.sigma_k", mapStringifier.toString(labelWeightSumTemp));
       labelWeightSumTemp = mapStringifier.fromString(labelWeightSumString);
       for (Map.Entry<String, Double> stringDoubleEntry : labelWeightSumTemp.entrySet()) {
         this.labelWeightSum.put(stringDoubleEntry.getKey(), stringDoubleEntry.getValue());
@@ -121,12 +120,10 @@ public class CBayesThetaNormalizerMapper
       
       DefaultStringifier<Double> stringifier = new DefaultStringifier<Double>(job, GenericsUtil
           .getClass(sigmaJSigmaK));
-      String sigmaJSigmaKString = stringifier.toString(sigmaJSigmaK);
-      sigmaJSigmaKString = job.get("cnaivebayes.sigma_jSigma_k", sigmaJSigmaKString);
+      String sigmaJSigmaKString = job.get("cnaivebayes.sigma_jSigma_k", stringifier.toString(sigmaJSigmaK));
       sigmaJSigmaK = stringifier.fromString(sigmaJSigmaKString);
       
-      String vocabCountString = stringifier.toString(vocabCount);
-      vocabCountString = job.get("cnaivebayes.vocabCount", vocabCountString);
+      String vocabCountString = job.get("cnaivebayes.vocabCount", stringifier.toString(vocabCount));
       vocabCount = stringifier.fromString(vocabCountString);
       
       Parameters params = Parameters.fromString(job.get("bayes.parameters", ""));

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureMapper.java?rev=986405&r1=986404&r2=986405&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureMapper.java Tue Aug 17 17:34:14 2010
@@ -36,7 +36,6 @@ import org.apache.mahout.common.Paramete
 import org.apache.mahout.common.StringTuple;
 import org.apache.mahout.common.iterator.ArrayIterator;
 import org.apache.mahout.math.function.ObjectIntProcedure;
-import org.apache.mahout.math.function.ObjectProcedure;
 import org.apache.mahout.math.map.OpenObjectIntHashMap;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfMapper.java?rev=986405&r1=986404&r2=986405&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfMapper.java Tue Aug 17 17:34:14 2010
@@ -95,8 +95,8 @@ public class BayesTfIdfMapper extends Ma
       DefaultStringifier<Map<String,Double>> mapStringifier = new DefaultStringifier<Map<String,Double>>(job,
           GenericsUtil.getClass(labelDocCountTemp));
       
-      String labelDocumentCountString = mapStringifier.toString(labelDocCountTemp);
-      labelDocumentCountString = job.get("cnaivebayes.labelDocumentCounts", labelDocumentCountString);
+      String labelDocumentCountString =
+          job.get("cnaivebayes.labelDocumentCounts", mapStringifier.toString(labelDocCountTemp));
       
       labelDocCountTemp = mapStringifier.fromString(labelDocumentCountString);
       for (Map.Entry<String, Double> stringDoubleEntry : labelDocCountTemp.entrySet()) {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/FeatureLabelComparator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/FeatureLabelComparator.java?rev=986405&r1=986404&r2=986405&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/FeatureLabelComparator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/FeatureLabelComparator.java Tue Aug 17 17:34:14 2010
@@ -23,14 +23,8 @@ import org.apache.hadoop.io.WritableComp
 import org.apache.hadoop.io.WritableComparator;
 import org.apache.mahout.common.StringTuple;
 
-/**
- * 
- */
 public class FeatureLabelComparator extends WritableComparator {
-  
-  /**
-   * @param keyClass
-   */
+
   public FeatureLabelComparator() {
     super(StringTuple.class, true);
   }
@@ -39,37 +33,31 @@ public class FeatureLabelComparator exte
   public int compare(WritableComparable a, WritableComparable b) {
     StringTuple ta = (StringTuple) a;
     StringTuple tb = (StringTuple) b;
-    
-    String tmpa, tmpb;
-    int cmp;
-    
-    if (ta.length() < 2 || ta.length() > 3 || tb.length() < 2
-        || tb.length() > 3) {
+
+    if (ta.length() < 2 || ta.length() > 3 || tb.length() < 2 || tb.length() > 3) {
       throw new IllegalArgumentException("StringTuple length out of bounds");
     }
     
     // token
-    tmpa = ta.length() == 2 ? ta.stringAt(1) : ta.stringAt(2);
-    tmpb = tb.length() == 2 ? tb.stringAt(1) : tb.stringAt(2);
-    cmp = tmpa.compareTo(tmpb);
-    if (cmp != 0) return cmp;
+    String tmpa = ta.length() == 2 ? ta.stringAt(1) : ta.stringAt(2);
+    String tmpb = tb.length() == 2 ? tb.stringAt(1) : tb.stringAt(2);
+    int cmp = tmpa.compareTo(tmpb);
+    if (cmp != 0) {
+      return cmp;
+    }
     
     // type, FEATURE_TF first, then FEATURE_COUNT, then DF or anything else.
     cmp = ta.stringAt(0).compareTo(tb.stringAt(0));
     if (cmp != 0) {
       if (ta.stringAt(0).equals(BayesConstants.FEATURE_TF)) {
         return -1;
-      }
-      else if (tb.stringAt(0).equals(BayesConstants.FEATURE_TF)) {
+      } else if (tb.stringAt(0).equals(BayesConstants.FEATURE_TF)) {
         return 1;
-      }
-      else if (ta.stringAt(0).equals(BayesConstants.FEATURE_COUNT)) {
+      } else if (ta.stringAt(0).equals(BayesConstants.FEATURE_COUNT)) {
         return -1;
-      }
-      else if (tb.stringAt(0).equals(BayesConstants.FEATURE_COUNT)) {
+      } else if (tb.stringAt(0).equals(BayesConstants.FEATURE_COUNT)) {
         return 1;
-      }
-      else {
+      } else {
         return cmp;
       }
     }
@@ -80,7 +68,6 @@ public class FeatureLabelComparator exte
     
     cmp = tmpa.compareTo(tmpb);
     return cmp;
-    
   }
   
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/discriminative/WinnowTrainer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/discriminative/WinnowTrainer.java?rev=986405&r1=986404&r2=986405&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/discriminative/WinnowTrainer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/discriminative/WinnowTrainer.java Tue Aug 17 17:34:14 2010
@@ -19,7 +19,6 @@ package org.apache.mahout.classifier.dis
 import java.util.Iterator;
 
 import org.apache.mahout.math.Vector;
-import org.apache.mahout.math.Vector.Element;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -70,18 +69,18 @@ public class WinnowTrainer extends Linea
       // case one
       Vector updateVector = dataPoint.times(1 / this.promotionStep);
       log.info("Winnow update positive: {}", updateVector);
-      Iterator<Element> iter = updateVector.iterateNonZero();
+      Iterator<Vector.Element> iter = updateVector.iterateNonZero();
       while (iter.hasNext()) {
-        Element element = iter.next();
+        Vector.Element element = iter.next();
         model.timesDelta(element.index(), element.get());
       }
     } else {
       // case two
       Vector updateVector = dataPoint.times(1 / this.promotionStep);
       log.info("Winnow update negative: {}", updateVector);
-      Iterator<Element> iter = updateVector.iterateNonZero();
+      Iterator<Vector.Element> iter = updateVector.iterateNonZero();
       while (iter.hasNext()) {
-        Element element = iter.next();
+        Vector.Element element = iter.next();
         model.timesDelta(element.index(), element.get());
       }
     }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/AbstractOnlineLogisticRegression.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/AbstractOnlineLogisticRegression.java?rev=986405&r1=986404&r2=986405&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/AbstractOnlineLogisticRegression.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/AbstractOnlineLogisticRegression.java Tue Aug 17 17:34:14 2010
@@ -76,11 +76,11 @@ public abstract class AbstractOnlineLogi
   private Vector logisticLink(Vector v) {
     double max = v.maxValue();
     if (max < 40) {
-      v.assign(Functions.exp);
+      v.assign(Functions.EXP);
       double sum = 1 + v.norm(1);
       return v.divide(sum);
     } else {
-      v.assign(Functions.minus(max)).assign(Functions.exp);
+      v.assign(Functions.minus(max)).assign(Functions.EXP);
       return v;
     }
   }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/AbstractCluster.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/AbstractCluster.java?rev=986405&r1=986404&r2=986405&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/AbstractCluster.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/AbstractCluster.java Tue Aug 17 17:34:14 2010
@@ -12,7 +12,6 @@ import org.apache.mahout.math.JsonVector
 import org.apache.mahout.math.NamedVector;
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
-import org.apache.mahout.math.Vector.Element;
 import org.apache.mahout.math.function.SquareRootFunction;
 
 import com.google.gson.Gson;
@@ -214,11 +213,7 @@ public abstract class AbstractCluster im
    * @return the new centroid
    */
   public Vector computeCentroid() {
-    if (s0 == 0) {
-      return getCenter();
-    } else {
-      return s1.divide(s0);
-    }
+    return s0 == 0 ? getCenter() : s1.divide(s0);
   }
 
   /**
@@ -235,7 +230,7 @@ public abstract class AbstractCluster im
       buf.append(((NamedVector) v).getName()).append(" = ");
     }
     int nzero = 0;
-    Iterator<Element> iterateNonZero = v.iterateNonZero();
+    Iterator<Vector.Element> iterateNonZero = v.iterateNonZero();
     while (iterateNonZero.hasNext()) {
       iterateNonZero.next();
       nzero++;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterBase.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterBase.java?rev=986405&r1=986404&r2=986405&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterBase.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterBase.java Tue Aug 17 17:34:14 2010
@@ -88,7 +88,6 @@ public abstract class ClusterBase implem
 
   /**
    * @deprecated
-   * @return
    */
   @Deprecated
   public abstract String asFormatString();

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java?rev=986405&r1=986404&r2=986405&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java Tue Aug 17 17:34:14 2010
@@ -18,7 +18,6 @@
 package org.apache.mahout.clustering.canopy;
 
 import java.io.DataInput;
-import java.io.DataOutput;
 import java.io.IOException;
 
 import org.apache.mahout.clustering.AbstractCluster;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java?rev=986405&r1=986404&r2=986405&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java Tue Aug 17 17:34:14 2010
@@ -29,7 +29,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
@@ -81,7 +81,8 @@ public class DirichletDriver extends Abs
   }
 
   @Override
-  public int run(String[] args) throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException,
+  public int run(String[] args)
+      throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException,
       NoSuchMethodException, InvocationTargetException, InterruptedException {
     addInputOption();
     addOutputOption();
@@ -121,7 +122,8 @@ public class DirichletDriver extends Abs
     double threshold = Double.parseDouble(getOption(DefaultOptionCreator.THRESHOLD_OPTION));
     double alpha0 = Double.parseDouble(getOption(ALPHA_OPTION));
     boolean runClustering = hasOption(DefaultOptionCreator.CLUSTERING_OPTION);
-    boolean runSequential = (getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(DefaultOptionCreator.SEQUENTIAL_METHOD));
+    boolean runSequential = (getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(
+        DefaultOptionCreator.SEQUENTIAL_METHOD));
 
     job(input,
         output,
@@ -164,7 +166,6 @@ public class DirichletDriver extends Abs
    * @param threshold 
    *          a double threshold value emits all clusters having greater pdf (emitMostLikely = false)
    * @param runSequential execute sequentially if true
-   * @throws InterruptedException 
    */
   public static void runJob(Path input,
                             Path output,
@@ -177,7 +178,8 @@ public class DirichletDriver extends Abs
                             boolean runClustering,
                             boolean emitMostLikely,
                             double threshold,
-                            boolean runSequential) throws ClassNotFoundException, InstantiationException, IllegalAccessException,
+                            boolean runSequential)
+      throws ClassNotFoundException, InstantiationException, IllegalAccessException,
       IOException, SecurityException, NoSuchMethodException, InvocationTargetException, InterruptedException {
 
     new DirichletDriver().job(input,
@@ -214,7 +216,8 @@ public class DirichletDriver extends Abs
                                                     String modelPrototype,
                                                     int prototypeSize,
                                                     int numModels,
-                                                    double alpha0) throws ClassNotFoundException, InstantiationException,
+                                                    double alpha0)
+      throws ClassNotFoundException, InstantiationException,
       IllegalAccessException, SecurityException, NoSuchMethodException, IllegalArgumentException, InvocationTargetException {
 
     ClassLoader ccl = Thread.currentThread().getContextClassLoader();
@@ -238,7 +241,7 @@ public class DirichletDriver extends Abs
     int protoSize = 0;
     for (FileStatus s : status) {
       SequenceFile.Reader reader = new SequenceFile.Reader(fs, s.getPath(), conf);
-      WritableComparable<?> key = (WritableComparable<?>) reader.getKeyClass().newInstance();
+      Writable key = reader.getKeyClass().asSubclass(Writable.class).newInstance();
       VectorWritable value = new VectorWritable();
       if (reader.next(key, value)) {
         protoSize = value.get().size();
@@ -258,13 +261,6 @@ public class DirichletDriver extends Abs
    * @param prototypeSize the int size of the modelPrototype vectors
    * @param numModels the int number of models to generate
    * @param alpha0 the double alpha_0 argument to the DirichletDistribution
-   * @throws ClassNotFoundException
-   * @throws InstantiationException
-   * @throws IllegalAccessException
-   * @throws IOException
-   * @throws SecurityException
-   * @throws NoSuchMethodException
-   * @throws InvocationTargetException
    */
   private void writeInitialState(Path output,
                                  Path stateOut,
@@ -272,7 +268,8 @@ public class DirichletDriver extends Abs
                                  String modelPrototype,
                                  int prototypeSize,
                                  int numModels,
-                                 double alpha0) throws ClassNotFoundException, InstantiationException, IllegalAccessException,
+                                 double alpha0)
+      throws ClassNotFoundException, InstantiationException, IllegalAccessException,
       IOException, SecurityException, NoSuchMethodException, InvocationTargetException {
 
     DirichletState<VectorWritable> state = createState(modelFactory, modelPrototype, prototypeSize, numModels, alpha0);
@@ -376,13 +373,6 @@ public class DirichletDriver extends Abs
    * @param threshold 
    *          a double threshold value emits all clusters having greater pdf (emitMostLikely = false)
    * @param runSequential execute sequentially if true
-   * @throws IOException
-   * @throws InstantiationException
-   * @throws IllegalAccessException
-   * @throws ClassNotFoundException
-   * @throws NoSuchMethodException
-   * @throws InvocationTargetException
-   * @throws InterruptedException
    */
   public void job(Path input,
                   Path output,
@@ -395,7 +385,8 @@ public class DirichletDriver extends Abs
                   boolean runClustering,
                   boolean emitMostLikely,
                   double threshold,
-                  boolean runSequential) throws IOException, InstantiationException, IllegalAccessException,
+                  boolean runSequential)
+      throws IOException, InstantiationException, IllegalAccessException,
       ClassNotFoundException, NoSuchMethodException, InvocationTargetException, InterruptedException {
     Path clustersOut = buildClusters(input,
                                      output,
@@ -407,7 +398,12 @@ public class DirichletDriver extends Abs
                                      numReducers,
                                      runSequential);
     if (runClustering) {
-      clusterData(input, clustersOut, new Path(output, Cluster.CLUSTERED_POINTS_DIR), emitMostLikely, threshold, runSequential);
+      clusterData(input,
+                  clustersOut,
+                  new Path(output, Cluster.CLUSTERED_POINTS_DIR),
+                  emitMostLikely,
+                  threshold,
+                  runSequential);
     }
   }
 
@@ -475,36 +471,16 @@ public class DirichletDriver extends Abs
     return clustersIn;
   }
 
-  /**
-   * @param input
-   * @param output
-   * @param modelFactory
-   * @param modelPrototype
-   * @param numClusters
-   * @param maxIterations
-   * @param alpha0
-   * @param numReducers
-   * @param clustersIn
-   * @param protoSize
-   * @return
-   * @throws IOException
-   * @throws InterruptedException
-   * @throws ClassNotFoundException
-   * @throws InvocationTargetException 
-   * @throws NoSuchMethodException 
-   * @throws IllegalAccessException 
-   * @throws InstantiationException 
-   */
-  private Path buildClustersSeq(Path input,
-                                Path output,
-                                String modelFactory,
-                                String modelPrototype,
-                                int numClusters,
-                                int maxIterations,
-                                double alpha0,
-                                int numReducers,
-                                Path clustersIn,
-                                int protoSize)
+  private static Path buildClustersSeq(Path input,
+                                       Path output,
+                                       String modelFactory,
+                                       String modelPrototype,
+                                       int numClusters,
+                                       int maxIterations,
+                                       double alpha0,
+                                       int numReducers,
+                                       Path clustersIn,
+                                       int protoSize)
       throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException,
              NoSuchMethodException, InvocationTargetException {
     for (int iteration = 1; iteration <= maxIterations; iteration++) {
@@ -526,7 +502,7 @@ public class DirichletDriver extends Abs
       for (FileStatus s : status) {
         SequenceFile.Reader reader = new SequenceFile.Reader(fs, s.getPath(), conf);
         try {
-          WritableComparable<?> key = (WritableComparable<?>) reader.getKeyClass().newInstance();
+          Writable key = reader.getKeyClass().asSubclass(Writable.class).newInstance();
           VectorWritable vw = (VectorWritable) reader.getValueClass().newInstance();
           while (reader.next(key, vw)) {
             clusterer.observe(newModels, vw);
@@ -545,37 +521,29 @@ public class DirichletDriver extends Abs
     return clustersIn;
   }
 
-  /**
-   * @param input
-   * @param output
-   * @param modelFactory
-   * @param modelPrototype
-   * @param numClusters
-   * @param maxIterations
-   * @param alpha0
-   * @param numReducers
-   * @param clustersIn
-   * @param protoSize
-   * @return
-   * @throws IOException
-   * @throws InterruptedException
-   * @throws ClassNotFoundException
-   */
-  private Path buildClustersMR(Path input,
-                               Path output,
-                               String modelFactory,
-                               String modelPrototype,
-                               int numClusters,
-                               int maxIterations,
-                               double alpha0,
-                               int numReducers,
-                               Path clustersIn,
-                               int protoSize) throws IOException, InterruptedException, ClassNotFoundException {
+  private static Path buildClustersMR(Path input,
+                                      Path output,
+                                      String modelFactory,
+                                      String modelPrototype,
+                                      int numClusters,
+                                      int maxIterations,
+                                      double alpha0,
+                                      int numReducers,
+                                      Path clustersIn,
+                                      int protoSize) throws IOException, InterruptedException, ClassNotFoundException {
     for (int iteration = 1; iteration <= maxIterations; iteration++) {
       log.info("Iteration {}", iteration);
       // point the output to a new directory per iteration
       Path clustersOut = new Path(output, Cluster.CLUSTERS_DIR + iteration);
-      runIteration(input, clustersIn, clustersOut, modelFactory, modelPrototype, protoSize, numClusters, alpha0, numReducers);
+      runIteration(input,
+                   clustersIn,
+                   clustersOut,
+                   modelFactory,
+                   modelPrototype,
+                   protoSize,
+                   numClusters,
+                   alpha0,
+                   numReducers);
       // now point the input to the old output directory
       clustersIn = clustersOut;
     }
@@ -596,13 +564,13 @@ public class DirichletDriver extends Abs
    * @param threshold 
    *          a double threshold value emits all clusters having greater pdf (emitMostLikely = false)
    * @param runSequential execute sequentially if true
-   * @throws ClassNotFoundException 
-   * @throws InterruptedException 
-   * @throws IOException 
-   * @throws IllegalAccessException 
-   * @throws InstantiationException 
    */
-  public void clusterData(Path input, Path stateIn, Path output, boolean emitMostLikely, double threshold, boolean runSequential)
+  public static void clusterData(Path input,
+                                 Path stateIn,
+                                 Path output,
+                                 boolean emitMostLikely,
+                                 double threshold,
+                                 boolean runSequential)
       throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException {
     if (runSequential) {
       clusterDataSeq(input, stateIn, output, emitMostLikely, threshold);
@@ -632,7 +600,7 @@ public class DirichletDriver extends Abs
                                                            IntWritable.class,
                                                            WeightedVectorWritable.class);
       try {
-        WritableComparable<?> key = (WritableComparable<?>) reader.getKeyClass().newInstance();
+        Writable key = reader.getKeyClass().asSubclass(Writable.class).newInstance();
         VectorWritable vw = (VectorWritable) reader.getValueClass().newInstance();
         while (reader.next(key, vw)) {
           clusterer.emitPointToClusters(vw, clusters, writer);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletMapper.java?rev=986405&r1=986404&r2=986405&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletMapper.java Tue Aug 17 17:34:14 2010
@@ -26,6 +26,7 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.mahout.clustering.kmeans.OutputLogFilter;
@@ -37,7 +38,8 @@ public class DirichletMapper extends Map
   private DirichletClusterer<VectorWritable> clusterer;
 
   @Override
-  protected void map(WritableComparable<?> key, VectorWritable v, Context context) throws IOException, InterruptedException {
+  protected void map(WritableComparable<?> key, VectorWritable v, Context context)
+      throws IOException, InterruptedException {
     int k = clusterer.assignToModel(v);
     context.write(new Text(String.valueOf(k)), v);
   }
@@ -98,30 +100,15 @@ public class DirichletMapper extends Map
     }
   }
 
-  /**
-   * @param conf
-   * @param statePath
-   * @param modelFactory
-   * @param modelPrototype
-   * @param alpha
-   * @param pSize
-   * @param k
-   * @return
-   * @throws ClassNotFoundException
-   * @throws InstantiationException
-   * @throws IllegalAccessException
-   * @throws NoSuchMethodException
-   * @throws InvocationTargetException
-   * @throws IOException
-   */
   protected static DirichletState<VectorWritable> loadState(Configuration conf,
                                                             String statePath,
                                                             String modelFactory,
                                                             String modelPrototype,
                                                             double alpha,
                                                             int pSize,
-                                                            int k) throws ClassNotFoundException, InstantiationException,
-      IllegalAccessException, NoSuchMethodException, InvocationTargetException, IOException {
+                                                            int k)
+      throws ClassNotFoundException, InstantiationException, IllegalAccessException,
+      NoSuchMethodException, InvocationTargetException, IOException {
     DirichletState<VectorWritable> state = DirichletDriver.createState(modelFactory, modelPrototype, pSize, k, alpha);
     Path path = new Path(statePath);
     FileSystem fs = FileSystem.get(path.toUri(), conf);
@@ -129,7 +116,7 @@ public class DirichletMapper extends Map
     for (FileStatus s : status) {
       SequenceFile.Reader reader = new SequenceFile.Reader(fs, s.getPath(), conf);
       try {
-        Text key = new Text();
+        Writable key = new Text();
         DirichletCluster<VectorWritable> cluster = new DirichletCluster<VectorWritable>();
         while (reader.next(key, cluster)) {
           int index = Integer.parseInt(key.toString());

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterer.java?rev=986405&r1=986404&r2=986405&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterer.java Tue Aug 17 17:34:14 2010
@@ -19,16 +19,17 @@ package org.apache.mahout.clustering.fuz
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.List;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.io.SequenceFile.Writer;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.mahout.clustering.ClusterObservations;
 import org.apache.mahout.clustering.WeightedVectorWritable;
+import org.apache.mahout.clustering.kmeans.Cluster;
 import org.apache.mahout.common.distance.DistanceMeasure;
 import org.apache.mahout.math.DenseVector;
 import org.apache.mahout.math.Vector;
@@ -50,10 +51,6 @@ public class FuzzyKMeansClusterer {
 
   /**
     * Init the fuzzy k-means clusterer with the distance measure to use for comparison.
-    * 
-   * @param measure
-   * @param convergenceDelta
-   * @param m
    */
   public FuzzyKMeansClusterer(DistanceMeasure measure, double convergenceDelta, double m) {
     this.measure = measure;
@@ -84,7 +81,7 @@ public class FuzzyKMeansClusterer {
    * @return
    *          a List<List<SoftCluster>> of clusters produced per iteration
    */
-  public static List<List<SoftCluster>> clusterPoints(List<Vector> points,
+  public static List<List<SoftCluster>> clusterPoints(Iterable<Vector> points,
                                                       List<SoftCluster> clusters,
                                                       DistanceMeasure measure,
                                                       double threshold,
@@ -116,7 +113,7 @@ public class FuzzyKMeansClusterer {
    * @param clusterList
    *          the List<Cluster> clusters
    */
-  protected static boolean runFuzzyKMeansIteration(List<Vector> points,
+  protected static boolean runFuzzyKMeansIteration(Iterable<Vector> points,
                                                    List<SoftCluster> clusterList,
                                                    FuzzyKMeansClusterer clusterer) {
     for (Vector point : points) {
@@ -161,11 +158,10 @@ public class FuzzyKMeansClusterer {
    *          a List<SoftCluster>
    * @param context
    *          the Context to emit into
-   * @throws InterruptedException 
    */
   public void emitPointProbToCluster(Vector point,
                                      List<SoftCluster> clusters,
-                                     Mapper<WritableComparable<?>, VectorWritable, Text, ClusterObservations>.Context context)
+                                     Mapper<?,?,Text,ClusterObservations>.Context context)
       throws IOException, InterruptedException {
 
     List<Double> clusterDistanceList = new ArrayList<Double>();
@@ -176,15 +172,16 @@ public class FuzzyKMeansClusterer {
     for (int i = 0; i < clusters.size(); i++) {
       SoftCluster cluster = clusters.get(i);
       Text key = new Text(cluster.getIdentifier());
-      ClusterObservations value = new ClusterObservations(computeProbWeight(clusterDistanceList.get(i), clusterDistanceList),
-                                                          point,
-                                                          point.times(point));
+      ClusterObservations value =
+          new ClusterObservations(computeProbWeight(clusterDistanceList.get(i), clusterDistanceList),
+                                  point,
+                                  point.times(point));
       context.write(key, value);
     }
   }
 
   /** Computes the probability of a point belonging to a cluster */
-  public double computeProbWeight(double clusterDistance, List<Double> clusterDistanceList) {
+  public double computeProbWeight(double clusterDistance, Iterable<Double> clusterDistanceList) {
     if (clusterDistance == 0) {
       clusterDistance = MINIMAL_VALUE;
     }
@@ -203,7 +200,7 @@ public class FuzzyKMeansClusterer {
    * 
    * @return if the cluster is converged
    */
-  public boolean computeConvergence(SoftCluster cluster) {
+  public boolean computeConvergence(Cluster cluster) {
     return cluster.computeConvergence(measure, convergenceDelta);
   }
 
@@ -217,7 +214,7 @@ public class FuzzyKMeansClusterer {
 
   public void emitPointToClusters(VectorWritable point,
                                   List<SoftCluster> clusters,
-                                  Mapper<WritableComparable<?>, VectorWritable, IntWritable, WeightedVectorWritable>.Context context)
+                                  Mapper<?,?,IntWritable,WeightedVectorWritable>.Context context)
       throws IOException, InterruptedException {
     // calculate point distances for all clusters    
     List<Double> clusterDistanceList = new ArrayList<Double>();
@@ -243,7 +240,7 @@ public class FuzzyKMeansClusterer {
   private void emitMostLikelyCluster(Vector point,
                                      List<SoftCluster> clusters,
                                      Vector pi,
-                                     Mapper<WritableComparable<?>, VectorWritable, IntWritable, WeightedVectorWritable>.Context context)
+                                     Mapper<?,?,IntWritable,WeightedVectorWritable>.Context context)
       throws IOException, InterruptedException {
     int clusterId = -1;
     double clusterPdf = 0;
@@ -263,9 +260,9 @@ public class FuzzyKMeansClusterer {
    * Emit the point to all clusters
    */
   private void emitAllClusters(Vector point,
-                               List<SoftCluster> clusters,
+                               Collection<SoftCluster> clusters,
                                Vector pi,
-                               Mapper<WritableComparable<?>, VectorWritable, IntWritable, WeightedVectorWritable>.Context context)
+                               Mapper<?,?,IntWritable,WeightedVectorWritable>.Context context)
       throws IOException, InterruptedException {
     for (int i = 0; i < clusters.size(); i++) {
       double pdf = pi.get(i);
@@ -276,10 +273,6 @@ public class FuzzyKMeansClusterer {
     }
   }
 
-  /**
-   * @param clusterList
-   * @param point
-   */
   protected void addPointToClusters(List<SoftCluster> clusterList, Vector point) {
     List<Double> clusterDistanceList = new ArrayList<Double>();
     for (SoftCluster cluster : clusterList) {
@@ -292,7 +285,7 @@ public class FuzzyKMeansClusterer {
     }
   }
 
-  protected boolean testConvergence(List<SoftCluster> clusters) {
+  protected boolean testConvergence(Iterable<SoftCluster> clusters) {
     boolean converged = true;
     for (SoftCluster cluster : clusters) {
       if (!cluster.computeConvergence(measure, convergenceDelta)) {
@@ -322,7 +315,8 @@ public class FuzzyKMeansClusterer {
     }
   }
 
-  private void emitAllClusters(Vector point, List<SoftCluster> clusters, Vector pi, Writer writer) throws IOException {
+  private void emitAllClusters(Vector point, Collection<SoftCluster> clusters, Vector pi, Writer writer)
+      throws IOException {
     for (int i = 0; i < clusters.size(); i++) {
       double pdf = pi.get(i);
       if (pdf > threshold) {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterer.java?rev=986405&r1=986404&r2=986405&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterer.java Tue Aug 17 17:34:14 2010
@@ -23,7 +23,6 @@ import java.util.List;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.io.SequenceFile.Writer;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.mahout.clustering.AbstractCluster;
@@ -60,7 +59,8 @@ public class KMeansClusterer {
     this.convergenceDelta = 0;
   }
 
-  public KMeansClusterer(Configuration conf) throws ClassNotFoundException, InstantiationException, IllegalAccessException {
+  public KMeansClusterer(Configuration conf)
+      throws ClassNotFoundException, InstantiationException, IllegalAccessException {
     ClassLoader ccl = Thread.currentThread().getContextClassLoader();
     Class<?> cl = ccl.loadClass(conf.get(KMeansConfigKeys.DISTANCE_MEASURE_KEY));
     this.measure = (DistanceMeasure) cl.newInstance();
@@ -77,12 +77,10 @@ public class KMeansClusterer {
    *          a point to find a cluster for.
    * @param clusters
    *          a List<Cluster> to test.
-   * @throws InterruptedException 
-   * @throws IOException 
    */
   public void emitPointToNearestCluster(Vector point,
-                                        List<Cluster> clusters,
-                                        Mapper<WritableComparable<?>, VectorWritable, Text, ClusterObservations>.Context context)
+                                        Iterable<Cluster> clusters,
+                                        Mapper<?,?,Text,ClusterObservations>.Context context)
       throws IOException, InterruptedException {
     Cluster nearestCluster = null;
     double nearestDistance = Double.MAX_VALUE;
@@ -105,7 +103,7 @@ public class KMeansClusterer {
    * @param point
    * @param clusters
    */
-  protected void addPointToNearestCluster(Vector point, List<Cluster> clusters) {
+  protected void addPointToNearestCluster(Vector point, Iterable<Cluster> clusters) {
     Cluster closestCluster = null;
     double closestDistance = Double.MAX_VALUE;
     for (Cluster cluster : clusters) {
@@ -120,12 +118,8 @@ public class KMeansClusterer {
 
   /**
    * Sequential implementation to test convergence and update cluster centers
-   * 
-   * @param clusters
-   * @param distanceThreshold
-   * @return
    */
-  protected boolean testConvergence(List<Cluster> clusters, double distanceThreshold) {
+  protected boolean testConvergence(Iterable<Cluster> clusters, double distanceThreshold) {
     boolean converged = true;
     for (Cluster cluster : clusters) {
       if (!computeConvergence(cluster)) {
@@ -137,8 +131,8 @@ public class KMeansClusterer {
   }
 
   public void outputPointWithClusterInfo(Vector vector,
-                                         List<Cluster> clusters,
-                                         Mapper<WritableComparable<?>, VectorWritable, IntWritable, WeightedVectorWritable>.Context context)
+                                         Iterable<Cluster> clusters,
+                                         Mapper<?,?,IntWritable,WeightedVectorWritable>.Context context)
       throws IOException, InterruptedException {
     AbstractCluster nearestCluster = null;
     double nearestDistance = Double.MAX_VALUE;
@@ -161,11 +155,9 @@ public class KMeansClusterer {
    *          a point to find a cluster for.
    * @param clusters
    *          a List<Cluster> to test.
-   * @throws InterruptedException 
-   * @throws IOException 
    */
-  protected void emitPointToNearestCluster(Vector point, List<Cluster> clusters, Writer writer) throws IOException,
-      InterruptedException {
+  protected void emitPointToNearestCluster(Vector point, Iterable<Cluster> clusters, Writer writer)
+      throws IOException, InterruptedException {
     AbstractCluster nearestCluster = null;
     double nearestDistance = Double.MAX_VALUE;
     for (AbstractCluster cluster : clusters) {
@@ -195,7 +187,7 @@ public class KMeansClusterer {
    * @param maxIter
    *          the maximum number of iterations
    */
-  public static List<List<Cluster>> clusterPoints(List<Vector> points,
+  public static List<List<Cluster>> clusterPoints(Iterable<Vector> points,
                                                   List<Cluster> clusters,
                                                   DistanceMeasure measure,
                                                   int maxIter,
@@ -228,10 +220,9 @@ public class KMeansClusterer {
    *          the List<Cluster> clusters
    * @param measure
    *          a DistanceMeasure to use
-   * @return
    */
-  protected static boolean runKMeansIteration(List<Vector> points,
-                                              List<Cluster> clusters,
+  protected static boolean runKMeansIteration(Iterable<Vector> points,
+                                              Iterable<Cluster> clusters,
                                               DistanceMeasure measure,
                                               double distanceThreshold) {
     // iterate through all points, assigning each to the nearest cluster

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java?rev=986405&r1=986404&r2=986405&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java Tue Aug 17 17:34:14 2010
@@ -73,11 +73,7 @@ public class KMeansDriver extends Abstra
    *          the number of reducers
    * @param runClustering 
    *          true if points are to be clustered after iterations are completed
-   * @param runSequential if true execute sequential algorithm 
-   * @throws ClassNotFoundException 
-   * @throws InterruptedException 
-   * @throws IllegalAccessException 
-   * @throws InstantiationException 
+   * @param runSequential if true execute sequential algorithm
    */
   public static void runJob(Path input,
                             Path clustersIn,
@@ -87,8 +83,8 @@ public class KMeansDriver extends Abstra
                             int maxIterations,
                             int numReduceTasks,
                             boolean runClustering,
-                            boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException,
-      InstantiationException, IllegalAccessException {
+                            boolean runSequential)
+      throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException {
     new KMeansDriver().job(input,
                            clustersIn,
                            output,
@@ -108,7 +104,8 @@ public class KMeansDriver extends Abstra
     addOption(DefaultOptionCreator.distanceMeasureOption().create());
     addOption(DefaultOptionCreator.clustersInOption()
         .withDescription("The input centroids, as Vectors.  Must be a SequenceFile of Writable, Cluster/Canopy.  "
-            + "If k is also specified, then a random set of vectors will be selected" + " and written out to this path first")
+            + "If k is also specified, then a random set of vectors will be selected"
+            + " and written out to this path first")
         .create());
     addOption(DefaultOptionCreator.numClustersOption()
         .withDescription("The k in k-Means.  If specified, then a random selection of k Vectors will be chosen"
@@ -142,8 +139,17 @@ public class KMeansDriver extends Abstra
           .parseInt(getOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)));
     }
     boolean runClustering = hasOption(DefaultOptionCreator.CLUSTERING_OPTION);
-    boolean runSequential = (getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(DefaultOptionCreator.SEQUENTIAL_METHOD));
-    job(input, clusters, output, measureClass, convergenceDelta, maxIterations, numReduceTasks, runClustering, runSequential);
+    boolean runSequential =
+        getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(DefaultOptionCreator.SEQUENTIAL_METHOD);
+    job(input,
+        clusters,
+        output,
+        measureClass,
+        convergenceDelta,
+        maxIterations,
+        numReduceTasks,
+        runClustering,
+        runSequential);
     return 0;
   }
 
@@ -168,11 +174,6 @@ public class KMeansDriver extends Abstra
    * @param runClustering 
    *          true if points are to be clustered after iterations are completed
    * @param runSequential if true execute sequential algorithm
-   * @throws IOException
-   * @throws InterruptedException
-   * @throws ClassNotFoundException
-   * @throws IllegalAccessException 
-   * @throws InstantiationException 
    */
   public void job(Path input,
                   Path clustersIn,
@@ -182,7 +183,8 @@ public class KMeansDriver extends Abstra
                   int maxIterations,
                   int numReduceTasks,
                   boolean runClustering,
-                  boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException, InstantiationException,
+                  boolean runSequential)
+      throws IOException, InterruptedException, ClassNotFoundException, InstantiationException,
       IllegalAccessException {
     ClassLoader ccl = Thread.currentThread().getContextClassLoader();
     Class<?> cl = ccl.loadClass(measureClass);
@@ -191,14 +193,24 @@ public class KMeansDriver extends Abstra
     // iterate until the clusters converge
     String delta = Double.toString(convergenceDelta);
     if (log.isInfoEnabled()) {
-      log.info("Input: {} Clusters In: {} Out: {} Distance: {}", new Object[] { input, clustersIn, output, measureClass });
-      log.info("convergence: {} max Iterations: {} num Reduce Tasks: {} Input Vectors: {}", new Object[] { convergenceDelta,
-          maxIterations, numReduceTasks, VectorWritable.class.getName() });
-    }
-    Path clustersOut = buildClusters(input, clustersIn, output, measure, maxIterations, numReduceTasks, delta, runSequential);
+      log.info("Input: {} Clusters In: {} Out: {} Distance: {}",
+               new Object[] { input, clustersIn, output, measureClass });
+      log.info("convergence: {} max Iterations: {} num Reduce Tasks: {} Input Vectors: {}",
+               new Object[] { convergenceDelta, maxIterations, numReduceTasks, VectorWritable.class.getName() });
+    }
+    Path clustersOut = buildClusters(input,
+                                     clustersIn,
+                                     output,
+                                     measure,
+                                     maxIterations,
+                                     numReduceTasks,
+                                     delta,
+                                     runSequential);
     if (runClustering) {
       log.info("Clustering data");
-      clusterData(input, clustersOut, new Path(output, AbstractCluster.CLUSTERED_POINTS_DIR), measure, delta, runSequential);
+      clusterData(input,
+                  clustersOut,
+                  new Path(output, AbstractCluster.CLUSTERED_POINTS_DIR), measure, delta, runSequential);
     }
   }
 
@@ -218,14 +230,9 @@ public class KMeansDriver extends Abstra
    * @param numReduceTasks
    *          the number of reducers
    * @param runSequential if true execute sequential algorithm
-   * @param convergenceDelta
+   * @param delta
    *          the convergence delta value
    * @return the Path of the final clusters directory
-   * @throws IOException
-   * @throws InterruptedException
-   * @throws ClassNotFoundException
-   * @throws IllegalAccessException 
-   * @throws InstantiationException 
    */
   public Path buildClusters(Path input,
                             Path clustersIn,
@@ -287,9 +294,12 @@ public class KMeansDriver extends Abstra
                                                            Cluster.class);
       try {
         for (Cluster cluster : clusters) {
-          log.info("Writing Cluster:" + cluster.getId() + " center:" + AbstractCluster.formatVector(cluster.getCenter(), null)
-              + " numPoints:" + cluster.getNumPoints() + " radius:" + AbstractCluster.formatVector(cluster.getRadius(), null) + " to: "
-              + clustersOut.getName());
+          log.info("Writing Cluster:{} center:{} numPoints:{} radius:{} to: {}",
+              new Object[] { cluster.getId(),
+                             AbstractCluster.formatVector(cluster.getCenter(), null),
+                             cluster.getNumPoints(),
+                             AbstractCluster.formatVector(cluster.getRadius(), null),
+                             clustersOut.getName() });
           writer.append(new Text(cluster.getIdentifier()), cluster);
         }
       } finally {
@@ -301,19 +311,6 @@ public class KMeansDriver extends Abstra
     return clustersIn;
   }
 
-  /**
-   * @param input
-   * @param clustersIn
-   * @param output
-   * @param measure
-   * @param maxIterations
-   * @param numReduceTasks
-   * @param delta
-   * @return
-   * @throws IOException
-   * @throws InterruptedException
-   * @throws ClassNotFoundException
-   */
   private Path buildClustersMR(Path input,
                                Path clustersIn,
                                Path output,
@@ -351,15 +348,14 @@ public class KMeansDriver extends Abstra
    * @param numReduceTasks
    *          the number of reducer tasks
    * @return true if the iteration successfully runs
-   * @throws ClassNotFoundException 
-   * @throws InterruptedException 
    */
-  private boolean runIteration(Path input,
-                               Path clustersIn,
-                               Path clustersOut,
-                               String measureClass,
-                               String convergenceDelta,
-                               int numReduceTasks) throws IOException, InterruptedException, ClassNotFoundException {
+  private static boolean runIteration(Path input,
+                                      Path clustersIn,
+                                      Path clustersOut,
+                                      String measureClass,
+                                      String convergenceDelta,
+                                      int numReduceTasks)
+      throws IOException, InterruptedException, ClassNotFoundException {
     Configuration conf = new Configuration();
     conf.set(KMeansConfigKeys.CLUSTER_PATH_KEY, clustersIn.toString());
     conf.set(KMeansConfigKeys.DISTANCE_MEASURE_KEY, measureClass);
@@ -445,10 +441,6 @@ public class KMeansDriver extends Abstra
    * @param convergenceDelta
    *          the convergence delta value
    * @param runSequential if true execute sequential algorithm
-   * @throws ClassNotFoundException 
-   * @throws InterruptedException 
-   * @throws IllegalAccessException 
-   * @throws InstantiationException 
    */
   public void clusterData(Path input,
                           Path clustersIn,
@@ -493,8 +485,8 @@ public class KMeansDriver extends Abstra
                                                            IntWritable.class,
                                                            WeightedVectorWritable.class);
       try {
-        WritableComparable<?> key = (WritableComparable<?>) reader.getKeyClass().newInstance();
-        VectorWritable vw = (VectorWritable) reader.getValueClass().newInstance();
+        Writable key = reader.getKeyClass().asSubclass(Writable.class).newInstance();
+        VectorWritable vw = reader.getValueClass().asSubclass(VectorWritable.class).newInstance();
         while (reader.next(key, vw)) {
           clusterer.emitPointToNearestCluster(vw.get(), clusters, writer);
           vw = (VectorWritable) reader.getValueClass().newInstance();

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterer.java?rev=986405&r1=986404&r2=986405&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterer.java Tue Aug 17 17:34:14 2010
@@ -18,9 +18,9 @@
 package org.apache.mahout.clustering.meanshift;
 
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.HashSet;
 import java.util.List;
-import java.util.Set;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.mahout.common.distance.DistanceMeasure;
@@ -45,8 +45,8 @@ public class MeanShiftCanopyClusterer {
 
   public MeanShiftCanopyClusterer(Configuration configuration) {
     try {
-      measure = Class.forName(configuration.get(MeanShiftCanopyConfigKeys.DISTANCE_MEASURE_KEY)).asSubclass(DistanceMeasure.class)
-          .newInstance();
+      measure = Class.forName(configuration.get(MeanShiftCanopyConfigKeys.DISTANCE_MEASURE_KEY))
+          .asSubclass(DistanceMeasure.class).newInstance();
       measure.configure(configuration);
     } catch (ClassNotFoundException e) {
       throw new IllegalStateException(e);
@@ -88,7 +88,7 @@ public class MeanShiftCanopyClusterer {
    * @param canopies
    *          the List<Canopy> to be appended
    */
-  public void mergeCanopy(MeanShiftCanopy aCanopy, List<MeanShiftCanopy> canopies) {
+  public void mergeCanopy(MeanShiftCanopy aCanopy, Collection<MeanShiftCanopy> canopies) {
     MeanShiftCanopy closestCoveringCanopy = null;
     double closestNorm = Double.MAX_VALUE;
     for (MeanShiftCanopy canopy : canopies) {
@@ -158,7 +158,7 @@ public class MeanShiftCanopyClusterer {
    * @param numIter
    *          the maximum number of iterations
    */
-  public static List<MeanShiftCanopy> clusterPoints(List<Vector> points,
+  public static List<MeanShiftCanopy> clusterPoints(Iterable<Vector> points,
                                                     DistanceMeasure measure,
                                                     double convergenceThreshold,
                                                     double t1,
@@ -180,12 +180,7 @@ public class MeanShiftCanopyClusterer {
     return canopies;
   }
 
-  /**
-   * @param canopies
-   * @param converged
-   * @return
-   */
-  protected List<MeanShiftCanopy> iterate(List<MeanShiftCanopy> canopies, boolean[] converged) {
+  protected List<MeanShiftCanopy> iterate(Iterable<MeanShiftCanopy> canopies, boolean[] converged) {
     converged[0] = true;
     List<MeanShiftCanopy> migratedCanopies = new ArrayList<MeanShiftCanopy>();
     for (MeanShiftCanopy canopy : canopies) {
@@ -195,8 +190,8 @@ public class MeanShiftCanopyClusterer {
     return migratedCanopies;
   }
 
-  protected static void verifyNonOverlap(List<MeanShiftCanopy> canopies) {
-    Set<Integer> coveredPoints = new HashSet<Integer>();
+  protected static void verifyNonOverlap(Iterable<MeanShiftCanopy> canopies) {
+    Collection<Integer> coveredPoints = new HashSet<Integer>();
     // verify no overlap
     for (MeanShiftCanopy canopy : canopies) {
       for (int v : canopy.getBoundPoints().toList()) {
@@ -210,7 +205,7 @@ public class MeanShiftCanopyClusterer {
     }
   }
 
-  protected static MeanShiftCanopy findCoveringCanopy(MeanShiftCanopy canopy, List<MeanShiftCanopy> clusters) {
+  protected static MeanShiftCanopy findCoveringCanopy(MeanShiftCanopy canopy, Iterable<MeanShiftCanopy> clusters) {
     // canopies use canopyIds assigned when input vectors are processed as vectorIds too
     int vectorId = canopy.getId();
     for (MeanShiftCanopy msc : clusters) {



Mime
View raw message