Return-Path: Delivered-To: apmail-mahout-commits-archive@www.apache.org Received: (qmail 89174 invoked from network); 17 Aug 2010 17:36:07 -0000 Received: from unknown (HELO mail.apache.org) (140.211.11.3) by 140.211.11.9 with SMTP; 17 Aug 2010 17:36:07 -0000 Received: (qmail 98671 invoked by uid 500); 17 Aug 2010 17:36:07 -0000 Delivered-To: apmail-mahout-commits-archive@mahout.apache.org Received: (qmail 98627 invoked by uid 500); 17 Aug 2010 17:36:07 -0000 Mailing-List: contact commits-help@mahout.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@mahout.apache.org Delivered-To: mailing list commits@mahout.apache.org Received: (qmail 98610 invoked by uid 99); 17 Aug 2010 17:36:07 -0000 Received: from Unknown (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 17 Aug 2010 17:36:07 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 17 Aug 2010 17:35:41 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id BC67623889ED; Tue, 17 Aug 2010 17:34:21 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r986405 [1/6] - in /mahout/trunk: buildtools/ buildtools/src/main/resources/ core/src/main/java/org/apache/mahout/cf/taste/eval/ core/src/main/java/org/apache/mahout/cf/taste/hadoop/ core/src/main/java/org/apache/mahout/cf/taste/hadoop/item... Date: Tue, 17 Aug 2010 17:34:19 -0000 To: commits@mahout.apache.org From: srowen@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20100817173421.BC67623889ED@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: srowen Date: Tue Aug 17 17:34:14 2010 New Revision: 986405 URL: http://svn.apache.org/viewvc?rev=986405&view=rev Log: Another massive try at removing javadoc, PMD, and checkstyle warnings. Started to remove some code in math/ that appears to be dead as it has been duplicated and improved separately. Removed: mahout/trunk/math/src/main/java/org/apache/mahout/math/Timer.java mahout/trunk/math/src/main/java/org/apache/mahout/math/list/ mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/Blas.java mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/CholeskyDecomposition.java mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/Matrix2DMatrix2DFunction.java mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/SeqBlas.java mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/SingularValueDecomposition.java Modified: mahout/trunk/buildtools/pom.xml mahout/trunk/buildtools/src/main/resources/mahout-pmd-ruleset.xml mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderEvaluator.java mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/MostSimilarItemPairsMapper.java mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerMapper.java mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerMapper.java mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureMapper.java mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfMapper.java mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/FeatureLabelComparator.java mahout/trunk/core/src/main/java/org/apache/mahout/classifier/discriminative/WinnowTrainer.java mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/AbstractOnlineLogisticRegression.java mahout/trunk/core/src/main/java/org/apache/mahout/clustering/AbstractCluster.java mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterBase.java mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletMapper.java mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterer.java mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterer.java mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterer.java mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java mahout/trunk/core/src/main/java/org/apache/mahout/common/CommandLineUtil.java mahout/trunk/core/src/main/java/org/apache/mahout/common/TimingStatistics.java mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/MahalanobisDistanceMeasure.java mahout/trunk/core/src/main/java/org/apache/mahout/df/Bagging.java mahout/trunk/core/src/main/java/org/apache/mahout/df/ErrorEstimate.java mahout/trunk/core/src/main/java/org/apache/mahout/df/builder/DefaultTreeBuilder.java mahout/trunk/core/src/main/java/org/apache/mahout/df/callback/ForestPredictions.java mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataLoader.java mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataUtils.java mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Dataset.java mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DescriptorUtils.java mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/inmem/InMemBuilder.java mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/inmem/InMemInputFormat.java mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/inmem/InMemMapper.java mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/PartialBuilder.java mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/Step0Job.java mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/Step1Mapper.java mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/Step2Mapper.java mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/TreeID.java mahout/trunk/core/src/main/java/org/apache/mahout/df/node/Node.java mahout/trunk/core/src/main/java/org/apache/mahout/df/split/DefaultIgSplit.java mahout/trunk/core/src/main/java/org/apache/mahout/df/split/IgSplit.java mahout/trunk/core/src/main/java/org/apache/mahout/df/split/OptIgSplit.java mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowth.java mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthMapper.java mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionTree.java mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/Pattern.java mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/RowSimilarityJob.java mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/SimilarityMatrixEntryKey.java mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedUncenteredZeroAssumingCosineVectorSimilarity.java mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/similarity/file/FileItemSimilarityTest.java mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sgd/OnlineLogisticRegressionTest.java mahout/trunk/core/src/test/java/org/apache/mahout/common/AbstractJobTest.java mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyReporter.java mahout/trunk/core/src/test/java/org/apache/mahout/common/MahoutTestCase.java mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DataLoaderTest.java mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DatasetTest.java mahout/trunk/core/src/test/java/org/apache/mahout/df/data/Utils.java mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/InterResultsTest.java mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/MockContext.java mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/PartialBuilderTest.java mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/PartialSequentialBuilder.java mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/PartitionBugTest.java mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/Step0JobTest.java mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/Step1MapperTest.java mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/Step2MapperTest.java mahout/trunk/core/src/test/java/org/apache/mahout/df/split/DefaultIgSplitTest.java mahout/trunk/core/src/test/java/org/apache/mahout/df/split/OptIgSplitTest.java mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/MathHelper.java mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/TestRowSimilarityJob.java mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedEuclideanDistanceVectorSimilarityTest.java mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedPearsonCorrelationVectorSimilarityTest.java mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedTanimotoCoefficientVectorSimilarityTest.java mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedUncenteredCosineVectorSimilarityTest.java mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedUncenteredZeroAssumingCosineVectorSimilarityTest.java mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedVectorSimilarityTestCase.java mahout/trunk/eclipse/pom.xml mahout/trunk/eclipse/src/main/resources/mahout-pmd-ruleset.xml mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/SplitBayesInput.java mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaDatasetCreatorMapper.java mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaXmlSplitter.java mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDRule.java mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/DatasetSplit.java mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolCombiner.java mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolReducer.java mahout/trunk/examples/src/main/java/org/apache/mahout/text/WikipediaMapper.java mahout/trunk/examples/src/test/java/org/apache/mahout/classifier/bayes/SplitBayesInputTest.java mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosToolTest.java mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractMatrix.java mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractVector.java mahout/trunk/math/src/main/java/org/apache/mahout/math/Algebra.java mahout/trunk/math/src/main/java/org/apache/mahout/math/DenseVector.java mahout/trunk/math/src/main/java/org/apache/mahout/math/JsonMatrixAdapter.java mahout/trunk/math/src/main/java/org/apache/mahout/math/JsonVectorAdapter.java mahout/trunk/math/src/main/java/org/apache/mahout/math/Matrix.java mahout/trunk/math/src/main/java/org/apache/mahout/math/OrthonormalityVerifier.java mahout/trunk/math/src/main/java/org/apache/mahout/math/QRDecomposition.java mahout/trunk/math/src/main/java/org/apache/mahout/math/RandomAccessSparseVector.java mahout/trunk/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java mahout/trunk/math/src/main/java/org/apache/mahout/math/SingularValueDecomposition.java mahout/trunk/math/src/main/java/org/apache/mahout/math/SparseRowMatrix.java mahout/trunk/math/src/main/java/org/apache/mahout/math/VectorView.java mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/AsyncEigenVerifier.java mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/hebbian/HebbianSolver.java mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/lanczos/LanczosSolver.java mahout/trunk/math/src/main/java/org/apache/mahout/math/function/Functions.java mahout/trunk/math/src/main/java/org/apache/mahout/math/function/VectorFunctions.java mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/Descriptive.java mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/UnknownDoubleQuantileEstimator.java mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/Utils.java mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleFactory1D.java mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleFactory2D.java mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleFactory3D.java mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleMatrix1D.java mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleMatrix2D.java mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleMatrix3D.java mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/doublealgo/Statistic.java mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/doublealgo/Transform.java mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/DenseDoubleMatrix1D.java mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/DenseDoubleMatrix2D.java mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/RCDoubleMatrix2D.java mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/SparseDoubleMatrix2D.java mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/TridiagonalDoubleMatrix2D.java mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/Algebra.java mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/EigenvalueDecomposition.java mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/LUDecomposition.java mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/Property.java mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/QRDecomposition.java mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/LogLikelihood.java mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/OnlineAuc.java mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/OnlineSummarizer.java mahout/trunk/math/src/test/java/org/apache/mahout/math/AbstractTestVector.java mahout/trunk/math/src/test/java/org/apache/mahout/math/MahoutTestCase.java mahout/trunk/math/src/test/java/org/apache/mahout/math/MatrixTest.java mahout/trunk/math/src/test/java/org/apache/mahout/math/QRDecompositionTest.java mahout/trunk/math/src/test/java/org/apache/mahout/math/TestMatrixView.java mahout/trunk/math/src/test/java/org/apache/mahout/math/TestVectorView.java mahout/trunk/math/src/test/java/org/apache/mahout/math/VectorTest.java mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/SolverTest.java mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/OnlineAucTest.java mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/OnlineSummarizerTest.java mahout/trunk/maven/src/main/resources/mahout-pmd-ruleset.xml mahout/trunk/utils/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/CachedTermInfo.java mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/term/TFPartialVectorReducer.java mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/term/TermDocumentCountMapper.java mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/tfidf/TFIDFPartialVectorReducer.java mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/dirichlet/TestL1ModelClustering.java mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/VectorWriterTest.java Modified: mahout/trunk/buildtools/pom.xml URL: http://svn.apache.org/viewvc/mahout/trunk/buildtools/pom.xml?rev=986405&r1=986404&r2=986405&view=diff ============================================================================== --- mahout/trunk/buildtools/pom.xml (original) +++ mahout/trunk/buildtools/pom.xml Tue Aug 17 17:34:14 2010 @@ -30,7 +30,7 @@ 6 - Buildtools - jar file used to configure PMD and Checkstyle + Mahout Build Tools jar Modified: mahout/trunk/buildtools/src/main/resources/mahout-pmd-ruleset.xml URL: http://svn.apache.org/viewvc/mahout/trunk/buildtools/src/main/resources/mahout-pmd-ruleset.xml?rev=986405&r1=986404&r2=986405&view=diff ============================================================================== --- mahout/trunk/buildtools/src/main/resources/mahout-pmd-ruleset.xml (original) +++ mahout/trunk/buildtools/src/main/resources/mahout-pmd-ruleset.xml Tue Aug 17 17:34:14 2010 @@ -127,7 +127,7 @@ - + @@ -148,7 +148,7 @@ - + @@ -179,8 +179,8 @@ - - + + Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderEvaluator.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderEvaluator.java?rev=986405&r1=986404&r2=986405&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderEvaluator.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderEvaluator.java Tue Aug 17 17:34:14 2010 @@ -62,7 +62,7 @@ public interface RecommenderEvaluator { * @param recommenderBuilder * object that can build a {@link org.apache.mahout.cf.taste.recommender.Recommender} to test * @param dataModelBuilder - * @param dataModelBuilder {@link DataModelBuilder} to use, or if null, a default {@link DataModel} + * {@link DataModelBuilder} to use, or if null, a default {@link DataModel} * implementation will be used * @param dataModel * dataset to test on Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java?rev=986405&r1=986404&r2=986405&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java Tue Aug 17 17:34:14 2010 @@ -35,27 +35,26 @@ import java.nio.charset.Charset; import java.util.regex.Pattern; /** - * some helper methods for the hadoop-related stuff in org.apache.mahout.cf.taste + * Some helper methods for the hadoop-related stuff in org.apache.mahout.cf.taste */ public final class TasteHadoopUtils { - /** standard delimiter of textual preference data */ + /** Standard delimiter of textual preference data */ private static final Pattern PREFERENCE_TOKEN_DELIMITER = Pattern.compile("[\t,]"); private TasteHadoopUtils() { } /** - * splits a preference data line into string tokens - * - * @param line - * @return + * Splits a preference data line into string tokens */ - public static String[] splitPrefTokens(String line) { + public static String[] splitPrefTokens(CharSequence line) { return PREFERENCE_TOKEN_DELIMITER.split(line); } - /** a path filter used to read files written by hadoop */ + /** + * A path filter used to read files written by Hadoop. + */ public static final PathFilter PARTS_FILTER = new PathFilter() { @Override public boolean accept(Path path) { @@ -64,21 +63,14 @@ public final class TasteHadoopUtils { }; /** - * maps a long to an int - * - * @param id - * @return + * Maps a long to an int */ public static int idToIndex(long id) { return 0x7FFFFFFF & ((int) id ^ (int) (id >>> 32)); } /** - * reads a binary mapping file - * - * @param itemIDIndexPathStr - * @param conf - * @return + * Reads a binary mapping file */ public static OpenIntLongHashMap readItemIDIndexMap(String itemIDIndexPathStr, Configuration conf) { OpenIntLongHashMap indexItemIDMap = new OpenIntLongHashMap(); @@ -104,16 +96,11 @@ public final class TasteHadoopUtils { } /** - * reads a text-based outputfile that only contains an int - * - * @param conf - * @param outputDir - * @return - * @throws IOException + * Reads a text-based outputfile that only contains an int */ public static int readIntFromFile(Configuration conf, Path outputDir) throws IOException { FileSystem fs = FileSystem.get(outputDir.toUri(), conf); - Path outputFile = fs.listStatus(outputDir, TasteHadoopUtils.PARTS_FILTER)[0].getPath(); + Path outputFile = fs.listStatus(outputDir, PARTS_FILTER)[0].getPath(); InputStream in = null; try { in = fs.open(outputFile); Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java?rev=986405&r1=986404&r2=986405&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java Tue Aug 17 17:34:14 2010 @@ -33,12 +33,16 @@ import org.apache.mahout.common.FileLine import org.apache.mahout.math.RandomAccessSparseVector; import org.apache.mahout.math.VarLongWritable; import org.apache.mahout.math.Vector; -import org.apache.mahout.math.Vector.Element; import org.apache.mahout.math.function.UnaryFunction; import org.apache.mahout.math.map.OpenIntLongHashMap; import java.io.IOException; -import java.util.*; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.PriorityQueue; +import java.util.Queue; /** *

computes prediction values for each user

@@ -127,7 +131,7 @@ public final class AggregateAndRecommend : predictionVector.plus(prefAndSimilarityColumn.getSimilarityColumn()); } - Iterator predictions = predictionVector.iterateNonZero(); + Iterator predictions = predictionVector.iterateNonZero(); List recommendations = new ArrayList(); while (predictions.hasNext() && recommendations.size() < recommendationsPerUser) { Vector.Element prediction = predictions.next(); @@ -159,7 +163,7 @@ public final class AggregateAndRecommend Vector simColumn = prefAndSimilarityColumn.getSimilarityColumn(); float prefValue = prefAndSimilarityColumn.getPrefValue(); /* count the number of items used for each prediction */ - Iterator usedItemsIterator = simColumn.iterateNonZero(); + Iterator usedItemsIterator = simColumn.iterateNonZero(); while (usedItemsIterator.hasNext()) { int itemIDIndex = usedItemsIterator.next().index(); numberOfSimilarItemsUsed.setQuick(itemIDIndex, numberOfSimilarItemsUsed.getQuick(itemIDIndex) + 1); @@ -178,9 +182,9 @@ public final class AggregateAndRecommend } Vector recommendationVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100); - Iterator iterator = numerators.iterateNonZero(); + Iterator iterator = numerators.iterateNonZero(); while (iterator.hasNext()) { - Element element = iterator.next(); + Vector.Element element = iterator.next(); int itemIDIndex = element.index(); /* preference estimations must be based on at least 2 datapoints */ if (numberOfSimilarItemsUsed.getQuick(itemIDIndex) > 1) { Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/MostSimilarItemPairsMapper.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/MostSimilarItemPairsMapper.java?rev=986405&r1=986404&r2=986405&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/MostSimilarItemPairsMapper.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/MostSimilarItemPairsMapper.java Tue Aug 17 17:34:14 2010 @@ -23,7 +23,7 @@ import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.mapreduce.Mapper; import org.apache.mahout.cf.taste.hadoop.EntityEntityWritable; import org.apache.mahout.cf.taste.hadoop.TasteHadoopUtils; -import org.apache.mahout.math.Vector.Element; +import org.apache.mahout.math.Vector; import org.apache.mahout.math.VectorWritable; import org.apache.mahout.math.map.OpenIntLongHashMap; @@ -61,10 +61,10 @@ public final class MostSimilarItemPairsM Queue topMostSimilarItems = new PriorityQueue(maxSimilarItemsPerItem + 1, Collections.reverseOrder(SimilarItem.COMPARE_BY_SIMILARITY)); - Iterator similarityVectorIterator = similarityVector.get().iterateNonZero(); + Iterator similarityVectorIterator = similarityVector.get().iterateNonZero(); while (similarityVectorIterator.hasNext()) { - Element element = similarityVectorIterator.next(); + Vector.Element element = similarityVectorIterator.next(); int index = element.index(); double value = element.get(); /* ignore self similarities */ Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerMapper.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerMapper.java?rev=986405&r1=986404&r2=986405&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerMapper.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerMapper.java Tue Aug 17 17:34:14 2010 @@ -83,16 +83,14 @@ public class BayesThetaNormalizerMapper DefaultStringifier> mapStringifier = new DefaultStringifier>(job, GenericsUtil.getClass(labelWeightSumTemp)); - String labelWeightSumString = mapStringifier.toString(labelWeightSumTemp); - labelWeightSumString = job.get("cnaivebayes.sigma_k", labelWeightSumString); + String labelWeightSumString = job.get("cnaivebayes.sigma_k", mapStringifier.toString(labelWeightSumTemp)); labelWeightSumTemp = mapStringifier.fromString(labelWeightSumString); for (Map.Entry stringDoubleEntry : labelWeightSumTemp.entrySet()) { this.labelWeightSum.put(stringDoubleEntry.getKey(), stringDoubleEntry.getValue()); } DefaultStringifier stringifier = new DefaultStringifier(job, GenericsUtil .getClass(sigmaJSigmaK)); - String sigmaJSigmaKString = stringifier.toString(sigmaJSigmaK); - sigmaJSigmaKString = job.get("cnaivebayes.sigma_jSigma_k", sigmaJSigmaKString); + String sigmaJSigmaKString = job.get("cnaivebayes.sigma_jSigma_k", stringifier.toString(sigmaJSigmaK)); sigmaJSigmaK = stringifier.fromString(sigmaJSigmaKString); String vocabCountString = stringifier.toString(vocabCount); Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerMapper.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerMapper.java?rev=986405&r1=986404&r2=986405&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerMapper.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerMapper.java Tue Aug 17 17:34:14 2010 @@ -112,8 +112,7 @@ public class CBayesThetaNormalizerMapper DefaultStringifier> mapStringifier = new DefaultStringifier>(job, GenericsUtil.getClass(labelWeightSumTemp)); - String labelWeightSumString = mapStringifier.toString(labelWeightSumTemp); - labelWeightSumString = job.get("cnaivebayes.sigma_k", labelWeightSumString); + String labelWeightSumString = job.get("cnaivebayes.sigma_k", mapStringifier.toString(labelWeightSumTemp)); labelWeightSumTemp = mapStringifier.fromString(labelWeightSumString); for (Map.Entry stringDoubleEntry : labelWeightSumTemp.entrySet()) { this.labelWeightSum.put(stringDoubleEntry.getKey(), stringDoubleEntry.getValue()); @@ -121,12 +120,10 @@ public class CBayesThetaNormalizerMapper DefaultStringifier stringifier = new DefaultStringifier(job, GenericsUtil .getClass(sigmaJSigmaK)); - String sigmaJSigmaKString = stringifier.toString(sigmaJSigmaK); - sigmaJSigmaKString = job.get("cnaivebayes.sigma_jSigma_k", sigmaJSigmaKString); + String sigmaJSigmaKString = job.get("cnaivebayes.sigma_jSigma_k", stringifier.toString(sigmaJSigmaK)); sigmaJSigmaK = stringifier.fromString(sigmaJSigmaKString); - String vocabCountString = stringifier.toString(vocabCount); - vocabCountString = job.get("cnaivebayes.vocabCount", vocabCountString); + String vocabCountString = job.get("cnaivebayes.vocabCount", stringifier.toString(vocabCount)); vocabCount = stringifier.fromString(vocabCountString); Parameters params = Parameters.fromString(job.get("bayes.parameters", "")); Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureMapper.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureMapper.java?rev=986405&r1=986404&r2=986405&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureMapper.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureMapper.java Tue Aug 17 17:34:14 2010 @@ -36,7 +36,6 @@ import org.apache.mahout.common.Paramete import org.apache.mahout.common.StringTuple; import org.apache.mahout.common.iterator.ArrayIterator; import org.apache.mahout.math.function.ObjectIntProcedure; -import org.apache.mahout.math.function.ObjectProcedure; import org.apache.mahout.math.map.OpenObjectIntHashMap; import org.slf4j.Logger; import org.slf4j.LoggerFactory; Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfMapper.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfMapper.java?rev=986405&r1=986404&r2=986405&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfMapper.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfMapper.java Tue Aug 17 17:34:14 2010 @@ -95,8 +95,8 @@ public class BayesTfIdfMapper extends Ma DefaultStringifier> mapStringifier = new DefaultStringifier>(job, GenericsUtil.getClass(labelDocCountTemp)); - String labelDocumentCountString = mapStringifier.toString(labelDocCountTemp); - labelDocumentCountString = job.get("cnaivebayes.labelDocumentCounts", labelDocumentCountString); + String labelDocumentCountString = + job.get("cnaivebayes.labelDocumentCounts", mapStringifier.toString(labelDocCountTemp)); labelDocCountTemp = mapStringifier.fromString(labelDocumentCountString); for (Map.Entry stringDoubleEntry : labelDocCountTemp.entrySet()) { Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/FeatureLabelComparator.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/FeatureLabelComparator.java?rev=986405&r1=986404&r2=986405&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/FeatureLabelComparator.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/FeatureLabelComparator.java Tue Aug 17 17:34:14 2010 @@ -23,14 +23,8 @@ import org.apache.hadoop.io.WritableComp import org.apache.hadoop.io.WritableComparator; import org.apache.mahout.common.StringTuple; -/** - * - */ public class FeatureLabelComparator extends WritableComparator { - - /** - * @param keyClass - */ + public FeatureLabelComparator() { super(StringTuple.class, true); } @@ -39,37 +33,31 @@ public class FeatureLabelComparator exte public int compare(WritableComparable a, WritableComparable b) { StringTuple ta = (StringTuple) a; StringTuple tb = (StringTuple) b; - - String tmpa, tmpb; - int cmp; - - if (ta.length() < 2 || ta.length() > 3 || tb.length() < 2 - || tb.length() > 3) { + + if (ta.length() < 2 || ta.length() > 3 || tb.length() < 2 || tb.length() > 3) { throw new IllegalArgumentException("StringTuple length out of bounds"); } // token - tmpa = ta.length() == 2 ? ta.stringAt(1) : ta.stringAt(2); - tmpb = tb.length() == 2 ? tb.stringAt(1) : tb.stringAt(2); - cmp = tmpa.compareTo(tmpb); - if (cmp != 0) return cmp; + String tmpa = ta.length() == 2 ? ta.stringAt(1) : ta.stringAt(2); + String tmpb = tb.length() == 2 ? tb.stringAt(1) : tb.stringAt(2); + int cmp = tmpa.compareTo(tmpb); + if (cmp != 0) { + return cmp; + } // type, FEATURE_TF first, then FEATURE_COUNT, then DF or anything else. cmp = ta.stringAt(0).compareTo(tb.stringAt(0)); if (cmp != 0) { if (ta.stringAt(0).equals(BayesConstants.FEATURE_TF)) { return -1; - } - else if (tb.stringAt(0).equals(BayesConstants.FEATURE_TF)) { + } else if (tb.stringAt(0).equals(BayesConstants.FEATURE_TF)) { return 1; - } - else if (ta.stringAt(0).equals(BayesConstants.FEATURE_COUNT)) { + } else if (ta.stringAt(0).equals(BayesConstants.FEATURE_COUNT)) { return -1; - } - else if (tb.stringAt(0).equals(BayesConstants.FEATURE_COUNT)) { + } else if (tb.stringAt(0).equals(BayesConstants.FEATURE_COUNT)) { return 1; - } - else { + } else { return cmp; } } @@ -80,7 +68,6 @@ public class FeatureLabelComparator exte cmp = tmpa.compareTo(tmpb); return cmp; - } } Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/discriminative/WinnowTrainer.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/discriminative/WinnowTrainer.java?rev=986405&r1=986404&r2=986405&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/discriminative/WinnowTrainer.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/discriminative/WinnowTrainer.java Tue Aug 17 17:34:14 2010 @@ -19,7 +19,6 @@ package org.apache.mahout.classifier.dis import java.util.Iterator; import org.apache.mahout.math.Vector; -import org.apache.mahout.math.Vector.Element; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -70,18 +69,18 @@ public class WinnowTrainer extends Linea // case one Vector updateVector = dataPoint.times(1 / this.promotionStep); log.info("Winnow update positive: {}", updateVector); - Iterator iter = updateVector.iterateNonZero(); + Iterator iter = updateVector.iterateNonZero(); while (iter.hasNext()) { - Element element = iter.next(); + Vector.Element element = iter.next(); model.timesDelta(element.index(), element.get()); } } else { // case two Vector updateVector = dataPoint.times(1 / this.promotionStep); log.info("Winnow update negative: {}", updateVector); - Iterator iter = updateVector.iterateNonZero(); + Iterator iter = updateVector.iterateNonZero(); while (iter.hasNext()) { - Element element = iter.next(); + Vector.Element element = iter.next(); model.timesDelta(element.index(), element.get()); } } Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/AbstractOnlineLogisticRegression.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/AbstractOnlineLogisticRegression.java?rev=986405&r1=986404&r2=986405&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/AbstractOnlineLogisticRegression.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/AbstractOnlineLogisticRegression.java Tue Aug 17 17:34:14 2010 @@ -76,11 +76,11 @@ public abstract class AbstractOnlineLogi private Vector logisticLink(Vector v) { double max = v.maxValue(); if (max < 40) { - v.assign(Functions.exp); + v.assign(Functions.EXP); double sum = 1 + v.norm(1); return v.divide(sum); } else { - v.assign(Functions.minus(max)).assign(Functions.exp); + v.assign(Functions.minus(max)).assign(Functions.EXP); return v; } } Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/AbstractCluster.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/AbstractCluster.java?rev=986405&r1=986404&r2=986405&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/AbstractCluster.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/AbstractCluster.java Tue Aug 17 17:34:14 2010 @@ -12,7 +12,6 @@ import org.apache.mahout.math.JsonVector import org.apache.mahout.math.NamedVector; import org.apache.mahout.math.Vector; import org.apache.mahout.math.VectorWritable; -import org.apache.mahout.math.Vector.Element; import org.apache.mahout.math.function.SquareRootFunction; import com.google.gson.Gson; @@ -214,11 +213,7 @@ public abstract class AbstractCluster im * @return the new centroid */ public Vector computeCentroid() { - if (s0 == 0) { - return getCenter(); - } else { - return s1.divide(s0); - } + return s0 == 0 ? getCenter() : s1.divide(s0); } /** @@ -235,7 +230,7 @@ public abstract class AbstractCluster im buf.append(((NamedVector) v).getName()).append(" = "); } int nzero = 0; - Iterator iterateNonZero = v.iterateNonZero(); + Iterator iterateNonZero = v.iterateNonZero(); while (iterateNonZero.hasNext()) { iterateNonZero.next(); nzero++; Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterBase.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterBase.java?rev=986405&r1=986404&r2=986405&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterBase.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterBase.java Tue Aug 17 17:34:14 2010 @@ -88,7 +88,6 @@ public abstract class ClusterBase implem /** * @deprecated - * @return */ @Deprecated public abstract String asFormatString(); Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java?rev=986405&r1=986404&r2=986405&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java Tue Aug 17 17:34:14 2010 @@ -18,7 +18,6 @@ package org.apache.mahout.clustering.canopy; import java.io.DataInput; -import java.io.DataOutput; import java.io.IOException; import org.apache.mahout.clustering.AbstractCluster; Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java?rev=986405&r1=986404&r2=986405&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java Tue Aug 17 17:34:14 2010 @@ -29,7 +29,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; @@ -81,7 +81,8 @@ public class DirichletDriver extends Abs } @Override - public int run(String[] args) throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException, + public int run(String[] args) + throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException, NoSuchMethodException, InvocationTargetException, InterruptedException { addInputOption(); addOutputOption(); @@ -121,7 +122,8 @@ public class DirichletDriver extends Abs double threshold = Double.parseDouble(getOption(DefaultOptionCreator.THRESHOLD_OPTION)); double alpha0 = Double.parseDouble(getOption(ALPHA_OPTION)); boolean runClustering = hasOption(DefaultOptionCreator.CLUSTERING_OPTION); - boolean runSequential = (getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(DefaultOptionCreator.SEQUENTIAL_METHOD)); + boolean runSequential = (getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase( + DefaultOptionCreator.SEQUENTIAL_METHOD)); job(input, output, @@ -164,7 +166,6 @@ public class DirichletDriver extends Abs * @param threshold * a double threshold value emits all clusters having greater pdf (emitMostLikely = false) * @param runSequential execute sequentially if true - * @throws InterruptedException */ public static void runJob(Path input, Path output, @@ -177,7 +178,8 @@ public class DirichletDriver extends Abs boolean runClustering, boolean emitMostLikely, double threshold, - boolean runSequential) throws ClassNotFoundException, InstantiationException, IllegalAccessException, + boolean runSequential) + throws ClassNotFoundException, InstantiationException, IllegalAccessException, IOException, SecurityException, NoSuchMethodException, InvocationTargetException, InterruptedException { new DirichletDriver().job(input, @@ -214,7 +216,8 @@ public class DirichletDriver extends Abs String modelPrototype, int prototypeSize, int numModels, - double alpha0) throws ClassNotFoundException, InstantiationException, + double alpha0) + throws ClassNotFoundException, InstantiationException, IllegalAccessException, SecurityException, NoSuchMethodException, IllegalArgumentException, InvocationTargetException { ClassLoader ccl = Thread.currentThread().getContextClassLoader(); @@ -238,7 +241,7 @@ public class DirichletDriver extends Abs int protoSize = 0; for (FileStatus s : status) { SequenceFile.Reader reader = new SequenceFile.Reader(fs, s.getPath(), conf); - WritableComparable key = (WritableComparable) reader.getKeyClass().newInstance(); + Writable key = reader.getKeyClass().asSubclass(Writable.class).newInstance(); VectorWritable value = new VectorWritable(); if (reader.next(key, value)) { protoSize = value.get().size(); @@ -258,13 +261,6 @@ public class DirichletDriver extends Abs * @param prototypeSize the int size of the modelPrototype vectors * @param numModels the int number of models to generate * @param alpha0 the double alpha_0 argument to the DirichletDistribution - * @throws ClassNotFoundException - * @throws InstantiationException - * @throws IllegalAccessException - * @throws IOException - * @throws SecurityException - * @throws NoSuchMethodException - * @throws InvocationTargetException */ private void writeInitialState(Path output, Path stateOut, @@ -272,7 +268,8 @@ public class DirichletDriver extends Abs String modelPrototype, int prototypeSize, int numModels, - double alpha0) throws ClassNotFoundException, InstantiationException, IllegalAccessException, + double alpha0) + throws ClassNotFoundException, InstantiationException, IllegalAccessException, IOException, SecurityException, NoSuchMethodException, InvocationTargetException { DirichletState state = createState(modelFactory, modelPrototype, prototypeSize, numModels, alpha0); @@ -376,13 +373,6 @@ public class DirichletDriver extends Abs * @param threshold * a double threshold value emits all clusters having greater pdf (emitMostLikely = false) * @param runSequential execute sequentially if true - * @throws IOException - * @throws InstantiationException - * @throws IllegalAccessException - * @throws ClassNotFoundException - * @throws NoSuchMethodException - * @throws InvocationTargetException - * @throws InterruptedException */ public void job(Path input, Path output, @@ -395,7 +385,8 @@ public class DirichletDriver extends Abs boolean runClustering, boolean emitMostLikely, double threshold, - boolean runSequential) throws IOException, InstantiationException, IllegalAccessException, + boolean runSequential) + throws IOException, InstantiationException, IllegalAccessException, ClassNotFoundException, NoSuchMethodException, InvocationTargetException, InterruptedException { Path clustersOut = buildClusters(input, output, @@ -407,7 +398,12 @@ public class DirichletDriver extends Abs numReducers, runSequential); if (runClustering) { - clusterData(input, clustersOut, new Path(output, Cluster.CLUSTERED_POINTS_DIR), emitMostLikely, threshold, runSequential); + clusterData(input, + clustersOut, + new Path(output, Cluster.CLUSTERED_POINTS_DIR), + emitMostLikely, + threshold, + runSequential); } } @@ -475,36 +471,16 @@ public class DirichletDriver extends Abs return clustersIn; } - /** - * @param input - * @param output - * @param modelFactory - * @param modelPrototype - * @param numClusters - * @param maxIterations - * @param alpha0 - * @param numReducers - * @param clustersIn - * @param protoSize - * @return - * @throws IOException - * @throws InterruptedException - * @throws ClassNotFoundException - * @throws InvocationTargetException - * @throws NoSuchMethodException - * @throws IllegalAccessException - * @throws InstantiationException - */ - private Path buildClustersSeq(Path input, - Path output, - String modelFactory, - String modelPrototype, - int numClusters, - int maxIterations, - double alpha0, - int numReducers, - Path clustersIn, - int protoSize) + private static Path buildClustersSeq(Path input, + Path output, + String modelFactory, + String modelPrototype, + int numClusters, + int maxIterations, + double alpha0, + int numReducers, + Path clustersIn, + int protoSize) throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException, NoSuchMethodException, InvocationTargetException { for (int iteration = 1; iteration <= maxIterations; iteration++) { @@ -526,7 +502,7 @@ public class DirichletDriver extends Abs for (FileStatus s : status) { SequenceFile.Reader reader = new SequenceFile.Reader(fs, s.getPath(), conf); try { - WritableComparable key = (WritableComparable) reader.getKeyClass().newInstance(); + Writable key = reader.getKeyClass().asSubclass(Writable.class).newInstance(); VectorWritable vw = (VectorWritable) reader.getValueClass().newInstance(); while (reader.next(key, vw)) { clusterer.observe(newModels, vw); @@ -545,37 +521,29 @@ public class DirichletDriver extends Abs return clustersIn; } - /** - * @param input - * @param output - * @param modelFactory - * @param modelPrototype - * @param numClusters - * @param maxIterations - * @param alpha0 - * @param numReducers - * @param clustersIn - * @param protoSize - * @return - * @throws IOException - * @throws InterruptedException - * @throws ClassNotFoundException - */ - private Path buildClustersMR(Path input, - Path output, - String modelFactory, - String modelPrototype, - int numClusters, - int maxIterations, - double alpha0, - int numReducers, - Path clustersIn, - int protoSize) throws IOException, InterruptedException, ClassNotFoundException { + private static Path buildClustersMR(Path input, + Path output, + String modelFactory, + String modelPrototype, + int numClusters, + int maxIterations, + double alpha0, + int numReducers, + Path clustersIn, + int protoSize) throws IOException, InterruptedException, ClassNotFoundException { for (int iteration = 1; iteration <= maxIterations; iteration++) { log.info("Iteration {}", iteration); // point the output to a new directory per iteration Path clustersOut = new Path(output, Cluster.CLUSTERS_DIR + iteration); - runIteration(input, clustersIn, clustersOut, modelFactory, modelPrototype, protoSize, numClusters, alpha0, numReducers); + runIteration(input, + clustersIn, + clustersOut, + modelFactory, + modelPrototype, + protoSize, + numClusters, + alpha0, + numReducers); // now point the input to the old output directory clustersIn = clustersOut; } @@ -596,13 +564,13 @@ public class DirichletDriver extends Abs * @param threshold * a double threshold value emits all clusters having greater pdf (emitMostLikely = false) * @param runSequential execute sequentially if true - * @throws ClassNotFoundException - * @throws InterruptedException - * @throws IOException - * @throws IllegalAccessException - * @throws InstantiationException */ - public void clusterData(Path input, Path stateIn, Path output, boolean emitMostLikely, double threshold, boolean runSequential) + public static void clusterData(Path input, + Path stateIn, + Path output, + boolean emitMostLikely, + double threshold, + boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException { if (runSequential) { clusterDataSeq(input, stateIn, output, emitMostLikely, threshold); @@ -632,7 +600,7 @@ public class DirichletDriver extends Abs IntWritable.class, WeightedVectorWritable.class); try { - WritableComparable key = (WritableComparable) reader.getKeyClass().newInstance(); + Writable key = reader.getKeyClass().asSubclass(Writable.class).newInstance(); VectorWritable vw = (VectorWritable) reader.getValueClass().newInstance(); while (reader.next(key, vw)) { clusterer.emitPointToClusters(vw, clusters, writer); Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletMapper.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletMapper.java?rev=986405&r1=986404&r2=986405&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletMapper.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletMapper.java Tue Aug 17 17:34:14 2010 @@ -26,6 +26,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.mapreduce.Mapper; import org.apache.mahout.clustering.kmeans.OutputLogFilter; @@ -37,7 +38,8 @@ public class DirichletMapper extends Map private DirichletClusterer clusterer; @Override - protected void map(WritableComparable key, VectorWritable v, Context context) throws IOException, InterruptedException { + protected void map(WritableComparable key, VectorWritable v, Context context) + throws IOException, InterruptedException { int k = clusterer.assignToModel(v); context.write(new Text(String.valueOf(k)), v); } @@ -98,30 +100,15 @@ public class DirichletMapper extends Map } } - /** - * @param conf - * @param statePath - * @param modelFactory - * @param modelPrototype - * @param alpha - * @param pSize - * @param k - * @return - * @throws ClassNotFoundException - * @throws InstantiationException - * @throws IllegalAccessException - * @throws NoSuchMethodException - * @throws InvocationTargetException - * @throws IOException - */ protected static DirichletState loadState(Configuration conf, String statePath, String modelFactory, String modelPrototype, double alpha, int pSize, - int k) throws ClassNotFoundException, InstantiationException, - IllegalAccessException, NoSuchMethodException, InvocationTargetException, IOException { + int k) + throws ClassNotFoundException, InstantiationException, IllegalAccessException, + NoSuchMethodException, InvocationTargetException, IOException { DirichletState state = DirichletDriver.createState(modelFactory, modelPrototype, pSize, k, alpha); Path path = new Path(statePath); FileSystem fs = FileSystem.get(path.toUri(), conf); @@ -129,7 +116,7 @@ public class DirichletMapper extends Map for (FileStatus s : status) { SequenceFile.Reader reader = new SequenceFile.Reader(fs, s.getPath(), conf); try { - Text key = new Text(); + Writable key = new Text(); DirichletCluster cluster = new DirichletCluster(); while (reader.next(key, cluster)) { int index = Integer.parseInt(key.toString()); Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterer.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterer.java?rev=986405&r1=986404&r2=986405&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterer.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterer.java Tue Aug 17 17:34:14 2010 @@ -19,16 +19,17 @@ package org.apache.mahout.clustering.fuz import java.io.IOException; import java.util.ArrayList; +import java.util.Collection; import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.io.SequenceFile.Writer; import org.apache.hadoop.mapreduce.Mapper; import org.apache.mahout.clustering.ClusterObservations; import org.apache.mahout.clustering.WeightedVectorWritable; +import org.apache.mahout.clustering.kmeans.Cluster; import org.apache.mahout.common.distance.DistanceMeasure; import org.apache.mahout.math.DenseVector; import org.apache.mahout.math.Vector; @@ -50,10 +51,6 @@ public class FuzzyKMeansClusterer { /** * Init the fuzzy k-means clusterer with the distance measure to use for comparison. - * - * @param measure - * @param convergenceDelta - * @param m */ public FuzzyKMeansClusterer(DistanceMeasure measure, double convergenceDelta, double m) { this.measure = measure; @@ -84,7 +81,7 @@ public class FuzzyKMeansClusterer { * @return * a List> of clusters produced per iteration */ - public static List> clusterPoints(List points, + public static List> clusterPoints(Iterable points, List clusters, DistanceMeasure measure, double threshold, @@ -116,7 +113,7 @@ public class FuzzyKMeansClusterer { * @param clusterList * the List clusters */ - protected static boolean runFuzzyKMeansIteration(List points, + protected static boolean runFuzzyKMeansIteration(Iterable points, List clusterList, FuzzyKMeansClusterer clusterer) { for (Vector point : points) { @@ -161,11 +158,10 @@ public class FuzzyKMeansClusterer { * a List * @param context * the Context to emit into - * @throws InterruptedException */ public void emitPointProbToCluster(Vector point, List clusters, - Mapper, VectorWritable, Text, ClusterObservations>.Context context) + Mapper.Context context) throws IOException, InterruptedException { List clusterDistanceList = new ArrayList(); @@ -176,15 +172,16 @@ public class FuzzyKMeansClusterer { for (int i = 0; i < clusters.size(); i++) { SoftCluster cluster = clusters.get(i); Text key = new Text(cluster.getIdentifier()); - ClusterObservations value = new ClusterObservations(computeProbWeight(clusterDistanceList.get(i), clusterDistanceList), - point, - point.times(point)); + ClusterObservations value = + new ClusterObservations(computeProbWeight(clusterDistanceList.get(i), clusterDistanceList), + point, + point.times(point)); context.write(key, value); } } /** Computes the probability of a point belonging to a cluster */ - public double computeProbWeight(double clusterDistance, List clusterDistanceList) { + public double computeProbWeight(double clusterDistance, Iterable clusterDistanceList) { if (clusterDistance == 0) { clusterDistance = MINIMAL_VALUE; } @@ -203,7 +200,7 @@ public class FuzzyKMeansClusterer { * * @return if the cluster is converged */ - public boolean computeConvergence(SoftCluster cluster) { + public boolean computeConvergence(Cluster cluster) { return cluster.computeConvergence(measure, convergenceDelta); } @@ -217,7 +214,7 @@ public class FuzzyKMeansClusterer { public void emitPointToClusters(VectorWritable point, List clusters, - Mapper, VectorWritable, IntWritable, WeightedVectorWritable>.Context context) + Mapper.Context context) throws IOException, InterruptedException { // calculate point distances for all clusters List clusterDistanceList = new ArrayList(); @@ -243,7 +240,7 @@ public class FuzzyKMeansClusterer { private void emitMostLikelyCluster(Vector point, List clusters, Vector pi, - Mapper, VectorWritable, IntWritable, WeightedVectorWritable>.Context context) + Mapper.Context context) throws IOException, InterruptedException { int clusterId = -1; double clusterPdf = 0; @@ -263,9 +260,9 @@ public class FuzzyKMeansClusterer { * Emit the point to all clusters */ private void emitAllClusters(Vector point, - List clusters, + Collection clusters, Vector pi, - Mapper, VectorWritable, IntWritable, WeightedVectorWritable>.Context context) + Mapper.Context context) throws IOException, InterruptedException { for (int i = 0; i < clusters.size(); i++) { double pdf = pi.get(i); @@ -276,10 +273,6 @@ public class FuzzyKMeansClusterer { } } - /** - * @param clusterList - * @param point - */ protected void addPointToClusters(List clusterList, Vector point) { List clusterDistanceList = new ArrayList(); for (SoftCluster cluster : clusterList) { @@ -292,7 +285,7 @@ public class FuzzyKMeansClusterer { } } - protected boolean testConvergence(List clusters) { + protected boolean testConvergence(Iterable clusters) { boolean converged = true; for (SoftCluster cluster : clusters) { if (!cluster.computeConvergence(measure, convergenceDelta)) { @@ -322,7 +315,8 @@ public class FuzzyKMeansClusterer { } } - private void emitAllClusters(Vector point, List clusters, Vector pi, Writer writer) throws IOException { + private void emitAllClusters(Vector point, Collection clusters, Vector pi, Writer writer) + throws IOException { for (int i = 0; i < clusters.size(); i++) { double pdf = pi.get(i); if (pdf > threshold) { Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterer.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterer.java?rev=986405&r1=986404&r2=986405&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterer.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterer.java Tue Aug 17 17:34:14 2010 @@ -23,7 +23,6 @@ import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.io.SequenceFile.Writer; import org.apache.hadoop.mapreduce.Mapper; import org.apache.mahout.clustering.AbstractCluster; @@ -60,7 +59,8 @@ public class KMeansClusterer { this.convergenceDelta = 0; } - public KMeansClusterer(Configuration conf) throws ClassNotFoundException, InstantiationException, IllegalAccessException { + public KMeansClusterer(Configuration conf) + throws ClassNotFoundException, InstantiationException, IllegalAccessException { ClassLoader ccl = Thread.currentThread().getContextClassLoader(); Class cl = ccl.loadClass(conf.get(KMeansConfigKeys.DISTANCE_MEASURE_KEY)); this.measure = (DistanceMeasure) cl.newInstance(); @@ -77,12 +77,10 @@ public class KMeansClusterer { * a point to find a cluster for. * @param clusters * a List to test. - * @throws InterruptedException - * @throws IOException */ public void emitPointToNearestCluster(Vector point, - List clusters, - Mapper, VectorWritable, Text, ClusterObservations>.Context context) + Iterable clusters, + Mapper.Context context) throws IOException, InterruptedException { Cluster nearestCluster = null; double nearestDistance = Double.MAX_VALUE; @@ -105,7 +103,7 @@ public class KMeansClusterer { * @param point * @param clusters */ - protected void addPointToNearestCluster(Vector point, List clusters) { + protected void addPointToNearestCluster(Vector point, Iterable clusters) { Cluster closestCluster = null; double closestDistance = Double.MAX_VALUE; for (Cluster cluster : clusters) { @@ -120,12 +118,8 @@ public class KMeansClusterer { /** * Sequential implementation to test convergence and update cluster centers - * - * @param clusters - * @param distanceThreshold - * @return */ - protected boolean testConvergence(List clusters, double distanceThreshold) { + protected boolean testConvergence(Iterable clusters, double distanceThreshold) { boolean converged = true; for (Cluster cluster : clusters) { if (!computeConvergence(cluster)) { @@ -137,8 +131,8 @@ public class KMeansClusterer { } public void outputPointWithClusterInfo(Vector vector, - List clusters, - Mapper, VectorWritable, IntWritable, WeightedVectorWritable>.Context context) + Iterable clusters, + Mapper.Context context) throws IOException, InterruptedException { AbstractCluster nearestCluster = null; double nearestDistance = Double.MAX_VALUE; @@ -161,11 +155,9 @@ public class KMeansClusterer { * a point to find a cluster for. * @param clusters * a List to test. - * @throws InterruptedException - * @throws IOException */ - protected void emitPointToNearestCluster(Vector point, List clusters, Writer writer) throws IOException, - InterruptedException { + protected void emitPointToNearestCluster(Vector point, Iterable clusters, Writer writer) + throws IOException, InterruptedException { AbstractCluster nearestCluster = null; double nearestDistance = Double.MAX_VALUE; for (AbstractCluster cluster : clusters) { @@ -195,7 +187,7 @@ public class KMeansClusterer { * @param maxIter * the maximum number of iterations */ - public static List> clusterPoints(List points, + public static List> clusterPoints(Iterable points, List clusters, DistanceMeasure measure, int maxIter, @@ -228,10 +220,9 @@ public class KMeansClusterer { * the List clusters * @param measure * a DistanceMeasure to use - * @return */ - protected static boolean runKMeansIteration(List points, - List clusters, + protected static boolean runKMeansIteration(Iterable points, + Iterable clusters, DistanceMeasure measure, double distanceThreshold) { // iterate through all points, assigning each to the nearest cluster Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java?rev=986405&r1=986404&r2=986405&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java Tue Aug 17 17:34:14 2010 @@ -73,11 +73,7 @@ public class KMeansDriver extends Abstra * the number of reducers * @param runClustering * true if points are to be clustered after iterations are completed - * @param runSequential if true execute sequential algorithm - * @throws ClassNotFoundException - * @throws InterruptedException - * @throws IllegalAccessException - * @throws InstantiationException + * @param runSequential if true execute sequential algorithm */ public static void runJob(Path input, Path clustersIn, @@ -87,8 +83,8 @@ public class KMeansDriver extends Abstra int maxIterations, int numReduceTasks, boolean runClustering, - boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException, - InstantiationException, IllegalAccessException { + boolean runSequential) + throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException { new KMeansDriver().job(input, clustersIn, output, @@ -108,7 +104,8 @@ public class KMeansDriver extends Abstra addOption(DefaultOptionCreator.distanceMeasureOption().create()); addOption(DefaultOptionCreator.clustersInOption() .withDescription("The input centroids, as Vectors. Must be a SequenceFile of Writable, Cluster/Canopy. " - + "If k is also specified, then a random set of vectors will be selected" + " and written out to this path first") + + "If k is also specified, then a random set of vectors will be selected" + + " and written out to this path first") .create()); addOption(DefaultOptionCreator.numClustersOption() .withDescription("The k in k-Means. If specified, then a random selection of k Vectors will be chosen" @@ -142,8 +139,17 @@ public class KMeansDriver extends Abstra .parseInt(getOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION))); } boolean runClustering = hasOption(DefaultOptionCreator.CLUSTERING_OPTION); - boolean runSequential = (getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(DefaultOptionCreator.SEQUENTIAL_METHOD)); - job(input, clusters, output, measureClass, convergenceDelta, maxIterations, numReduceTasks, runClustering, runSequential); + boolean runSequential = + getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(DefaultOptionCreator.SEQUENTIAL_METHOD); + job(input, + clusters, + output, + measureClass, + convergenceDelta, + maxIterations, + numReduceTasks, + runClustering, + runSequential); return 0; } @@ -168,11 +174,6 @@ public class KMeansDriver extends Abstra * @param runClustering * true if points are to be clustered after iterations are completed * @param runSequential if true execute sequential algorithm - * @throws IOException - * @throws InterruptedException - * @throws ClassNotFoundException - * @throws IllegalAccessException - * @throws InstantiationException */ public void job(Path input, Path clustersIn, @@ -182,7 +183,8 @@ public class KMeansDriver extends Abstra int maxIterations, int numReduceTasks, boolean runClustering, - boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, + boolean runSequential) + throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException { ClassLoader ccl = Thread.currentThread().getContextClassLoader(); Class cl = ccl.loadClass(measureClass); @@ -191,14 +193,24 @@ public class KMeansDriver extends Abstra // iterate until the clusters converge String delta = Double.toString(convergenceDelta); if (log.isInfoEnabled()) { - log.info("Input: {} Clusters In: {} Out: {} Distance: {}", new Object[] { input, clustersIn, output, measureClass }); - log.info("convergence: {} max Iterations: {} num Reduce Tasks: {} Input Vectors: {}", new Object[] { convergenceDelta, - maxIterations, numReduceTasks, VectorWritable.class.getName() }); - } - Path clustersOut = buildClusters(input, clustersIn, output, measure, maxIterations, numReduceTasks, delta, runSequential); + log.info("Input: {} Clusters In: {} Out: {} Distance: {}", + new Object[] { input, clustersIn, output, measureClass }); + log.info("convergence: {} max Iterations: {} num Reduce Tasks: {} Input Vectors: {}", + new Object[] { convergenceDelta, maxIterations, numReduceTasks, VectorWritable.class.getName() }); + } + Path clustersOut = buildClusters(input, + clustersIn, + output, + measure, + maxIterations, + numReduceTasks, + delta, + runSequential); if (runClustering) { log.info("Clustering data"); - clusterData(input, clustersOut, new Path(output, AbstractCluster.CLUSTERED_POINTS_DIR), measure, delta, runSequential); + clusterData(input, + clustersOut, + new Path(output, AbstractCluster.CLUSTERED_POINTS_DIR), measure, delta, runSequential); } } @@ -218,14 +230,9 @@ public class KMeansDriver extends Abstra * @param numReduceTasks * the number of reducers * @param runSequential if true execute sequential algorithm - * @param convergenceDelta + * @param delta * the convergence delta value * @return the Path of the final clusters directory - * @throws IOException - * @throws InterruptedException - * @throws ClassNotFoundException - * @throws IllegalAccessException - * @throws InstantiationException */ public Path buildClusters(Path input, Path clustersIn, @@ -287,9 +294,12 @@ public class KMeansDriver extends Abstra Cluster.class); try { for (Cluster cluster : clusters) { - log.info("Writing Cluster:" + cluster.getId() + " center:" + AbstractCluster.formatVector(cluster.getCenter(), null) - + " numPoints:" + cluster.getNumPoints() + " radius:" + AbstractCluster.formatVector(cluster.getRadius(), null) + " to: " - + clustersOut.getName()); + log.info("Writing Cluster:{} center:{} numPoints:{} radius:{} to: {}", + new Object[] { cluster.getId(), + AbstractCluster.formatVector(cluster.getCenter(), null), + cluster.getNumPoints(), + AbstractCluster.formatVector(cluster.getRadius(), null), + clustersOut.getName() }); writer.append(new Text(cluster.getIdentifier()), cluster); } } finally { @@ -301,19 +311,6 @@ public class KMeansDriver extends Abstra return clustersIn; } - /** - * @param input - * @param clustersIn - * @param output - * @param measure - * @param maxIterations - * @param numReduceTasks - * @param delta - * @return - * @throws IOException - * @throws InterruptedException - * @throws ClassNotFoundException - */ private Path buildClustersMR(Path input, Path clustersIn, Path output, @@ -351,15 +348,14 @@ public class KMeansDriver extends Abstra * @param numReduceTasks * the number of reducer tasks * @return true if the iteration successfully runs - * @throws ClassNotFoundException - * @throws InterruptedException */ - private boolean runIteration(Path input, - Path clustersIn, - Path clustersOut, - String measureClass, - String convergenceDelta, - int numReduceTasks) throws IOException, InterruptedException, ClassNotFoundException { + private static boolean runIteration(Path input, + Path clustersIn, + Path clustersOut, + String measureClass, + String convergenceDelta, + int numReduceTasks) + throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); conf.set(KMeansConfigKeys.CLUSTER_PATH_KEY, clustersIn.toString()); conf.set(KMeansConfigKeys.DISTANCE_MEASURE_KEY, measureClass); @@ -445,10 +441,6 @@ public class KMeansDriver extends Abstra * @param convergenceDelta * the convergence delta value * @param runSequential if true execute sequential algorithm - * @throws ClassNotFoundException - * @throws InterruptedException - * @throws IllegalAccessException - * @throws InstantiationException */ public void clusterData(Path input, Path clustersIn, @@ -493,8 +485,8 @@ public class KMeansDriver extends Abstra IntWritable.class, WeightedVectorWritable.class); try { - WritableComparable key = (WritableComparable) reader.getKeyClass().newInstance(); - VectorWritable vw = (VectorWritable) reader.getValueClass().newInstance(); + Writable key = reader.getKeyClass().asSubclass(Writable.class).newInstance(); + VectorWritable vw = reader.getValueClass().asSubclass(VectorWritable.class).newInstance(); while (reader.next(key, vw)) { clusterer.emitPointToNearestCluster(vw.get(), clusters, writer); vw = (VectorWritable) reader.getValueClass().newInstance(); Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterer.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterer.java?rev=986405&r1=986404&r2=986405&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterer.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterer.java Tue Aug 17 17:34:14 2010 @@ -18,9 +18,9 @@ package org.apache.mahout.clustering.meanshift; import java.util.ArrayList; +import java.util.Collection; import java.util.HashSet; import java.util.List; -import java.util.Set; import org.apache.hadoop.conf.Configuration; import org.apache.mahout.common.distance.DistanceMeasure; @@ -45,8 +45,8 @@ public class MeanShiftCanopyClusterer { public MeanShiftCanopyClusterer(Configuration configuration) { try { - measure = Class.forName(configuration.get(MeanShiftCanopyConfigKeys.DISTANCE_MEASURE_KEY)).asSubclass(DistanceMeasure.class) - .newInstance(); + measure = Class.forName(configuration.get(MeanShiftCanopyConfigKeys.DISTANCE_MEASURE_KEY)) + .asSubclass(DistanceMeasure.class).newInstance(); measure.configure(configuration); } catch (ClassNotFoundException e) { throw new IllegalStateException(e); @@ -88,7 +88,7 @@ public class MeanShiftCanopyClusterer { * @param canopies * the List to be appended */ - public void mergeCanopy(MeanShiftCanopy aCanopy, List canopies) { + public void mergeCanopy(MeanShiftCanopy aCanopy, Collection canopies) { MeanShiftCanopy closestCoveringCanopy = null; double closestNorm = Double.MAX_VALUE; for (MeanShiftCanopy canopy : canopies) { @@ -158,7 +158,7 @@ public class MeanShiftCanopyClusterer { * @param numIter * the maximum number of iterations */ - public static List clusterPoints(List points, + public static List clusterPoints(Iterable points, DistanceMeasure measure, double convergenceThreshold, double t1, @@ -180,12 +180,7 @@ public class MeanShiftCanopyClusterer { return canopies; } - /** - * @param canopies - * @param converged - * @return - */ - protected List iterate(List canopies, boolean[] converged) { + protected List iterate(Iterable canopies, boolean[] converged) { converged[0] = true; List migratedCanopies = new ArrayList(); for (MeanShiftCanopy canopy : canopies) { @@ -195,8 +190,8 @@ public class MeanShiftCanopyClusterer { return migratedCanopies; } - protected static void verifyNonOverlap(List canopies) { - Set coveredPoints = new HashSet(); + protected static void verifyNonOverlap(Iterable canopies) { + Collection coveredPoints = new HashSet(); // verify no overlap for (MeanShiftCanopy canopy : canopies) { for (int v : canopy.getBoundPoints().toList()) { @@ -210,7 +205,7 @@ public class MeanShiftCanopyClusterer { } } - protected static MeanShiftCanopy findCoveringCanopy(MeanShiftCanopy canopy, List clusters) { + protected static MeanShiftCanopy findCoveringCanopy(MeanShiftCanopy canopy, Iterable clusters) { // canopies use canopyIds assigned when input vectors are processed as vectorIds too int vectorId = canopy.getId(); for (MeanShiftCanopy msc : clusters) {