Return-Path: Delivered-To: apmail-mahout-commits-archive@www.apache.org Received: (qmail 29238 invoked from network); 9 Nov 2010 13:20:20 -0000 Received: from unknown (HELO mail.apache.org) (140.211.11.3) by 140.211.11.9 with SMTP; 9 Nov 2010 13:20:20 -0000 Received: (qmail 91185 invoked by uid 500); 9 Nov 2010 13:20:52 -0000 Delivered-To: apmail-mahout-commits-archive@mahout.apache.org Received: (qmail 91141 invoked by uid 500); 9 Nov 2010 13:20:52 -0000 Mailing-List: contact commits-help@mahout.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@mahout.apache.org Delivered-To: mailing list commits@mahout.apache.org Received: (qmail 91134 invoked by uid 99); 9 Nov 2010 13:20:51 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 09 Nov 2010 13:20:51 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 09 Nov 2010 13:20:45 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id CD2EF23889D5; Tue, 9 Nov 2010 13:19:29 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1032979 [2/2] - in /mahout/trunk/core/src/main/java/org/apache/mahout: cf/taste/impl/common/ cf/taste/impl/model/ classifier/ classifier/bayes/ classifier/bayes/algorithm/ classifier/bayes/interfaces/ classifier/naivebayes/ classifier/naiv... Date: Tue, 09 Nov 2010 13:19:28 -0000 To: commits@mahout.apache.org From: srowen@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20101109131929.CD2EF23889D5@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java?rev=1032979&r1=1032978&r2=1032979&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java Tue Nov 9 13:19:26 2010 @@ -20,6 +20,7 @@ package org.apache.mahout.fpm.pfpgrowth. import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; @@ -28,7 +29,6 @@ import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; -import java.util.Set; import org.apache.commons.lang.mutable.MutableLong; import org.apache.hadoop.conf.Configuration; @@ -36,6 +36,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.OutputCollector; import org.apache.mahout.common.Pair; import org.apache.mahout.fpm.pfpgrowth.convertors.StatusUpdater; @@ -63,7 +64,7 @@ public class FPGrowth> ret = new ArrayList>(); - Text key = new Text(); + Writable key = new Text(); TopKStringPatterns value = new TopKStringPatterns(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf); // key is feature value is count @@ -146,10 +147,10 @@ public class FPGrowth,Long>> transactionStream, - List> frequencyList, + Collection> frequencyList, long minSupport, int k, - Set returnableFeatures, + Collection returnableFeatures, OutputCollector,Long>>> output, StatusUpdater updater) throws IOException { @@ -178,7 +179,7 @@ public class FPGrowth returnFeatures = new HashSet(); + Collection returnFeatures = new HashSet(); if (returnableFeatures != null && !returnableFeatures.isEmpty()) { for (A attrib : returnableFeatures) { if (attributeIdMapping.containsKey(attrib)) { @@ -206,7 +207,7 @@ public class FPGrowth fpGrowth(FPTree tree, - MutableLong minSupportMutable, + long minSupportValue, int k, - Set requiredFeatures, + Collection requiredFeatures, TopKPatternsOutputConverter outputCollector, StatusUpdater updater) throws IOException { - long minSupportValue = minSupportMutable.longValue(); - Map patterns = new HashMap(); FPTreeDepthCache treeCache = new FPTreeDepthCache(); for (int i = tree.getHeaderTableCount() - 1; i >= 0; i--) { @@ -250,9 +249,8 @@ public class FPGrowth= minSupportMutable.intValue()) { + if (tree.count(tempNode) >= minSupport) { frequentItem.add(tree.attribute(tempNode), tree.count(tempNode)); } } @@ -296,8 +294,11 @@ public class FPGrowth generateTopKFrequentPatterns( Iterator> transactions, - long[] attributeFrequency, long minSupport, int k, int featureSetSize, - Set returnFeatures, TopKPatternsOutputConverter topKPatternsOutputCollector, + long[] attributeFrequency, + long minSupport, + int k, + int featureSetSize, + Collection returnFeatures, TopKPatternsOutputConverter topKPatternsOutputCollector, StatusUpdater updater) throws IOException { FPTree tree = new FPTree(featureSetSize); @@ -306,7 +307,6 @@ public class FPGrowth transaction = transactions.next(); Arrays.sort(transaction.getFirst()); // attribcount += transaction.length; - nodecount += treeAddCount(tree, transaction.getFirst(), transaction - .getSecond(), minSupportMutable, attributeFrequency); + nodecount += treeAddCount(tree, transaction.getFirst(), transaction.getSecond(), minSupport, attributeFrequency); i++; if (i % 10000 == 0) { log.info("FPTree Building: Read {} Transactions", i); @@ -324,8 +323,7 @@ public class FPGrowth currentAttribute) { traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute), - minSupportMutable, conditionalTree, tree); + minSupportMutable.longValue(), conditionalTree, tree); returnedPatterns = growthBottomUp(conditionalTree, minSupportMutable, k, treeCache, level + 1, false, currentAttribute, updater); frequentPatterns = mergeHeap(frequentPatterns, returnedPatterns, @@ -453,7 +451,7 @@ public class FPGrowth currentAttribute) { traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute), - minSupportMutable, conditionalTree, tree); + minSupportMutable.longValue(), conditionalTree, tree); returnedPatterns = growthBottomUp(conditionalTree, minSupportMutable, k, treeCache, level + 1, false, currentAttribute, updater); frequentPatterns = mergeHeap(frequentPatterns, returnedPatterns, @@ -531,7 +529,7 @@ public class FPGrowth sortedSet = new TreeSet(); + private final Collection sortedSet = new TreeSet(); public FPTree() { this(DEFAULT_INITIAL_SIZE, DEFAULT_HEADER_TABLE_INITIAL_SIZE); Modified: mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/MahoutEvaluator.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/MahoutEvaluator.java?rev=1032979&r1=1032978&r2=1032979&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/MahoutEvaluator.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/MahoutEvaluator.java Tue Nov 9 13:19:26 2010 @@ -20,6 +20,7 @@ package org.apache.mahout.ga.watchmaker; import java.io.BufferedWriter; import java.io.IOException; import java.io.OutputStreamWriter; +import java.util.Collection; import java.util.List; import org.apache.hadoop.conf.Configuration; @@ -54,8 +55,8 @@ public final class MahoutEvaluator { * List<Double> that contains the evaluated fitness for each candidate from the * input population, sorted in the same order as the candidates. */ - public static void evaluate(FitnessEvaluator evaluator, List population, List evaluations) - throws IOException, ClassNotFoundException, InterruptedException { + public static void evaluate(FitnessEvaluator evaluator, Iterable population, Collection evaluations) + throws IOException, ClassNotFoundException, InterruptedException { Job job = new Job(); job.setJarByClass(MahoutEvaluator.class); Configuration conf = job.getConfiguration(); @@ -78,7 +79,7 @@ public final class MahoutEvaluator { * population to store * @return input Path */ - private static Path prepareInput(FileSystem fs, List population) throws IOException { + private static Path prepareInput(FileSystem fs, Iterable population) throws IOException { Path inpath = new Path(fs.getWorkingDirectory(), "input"); HadoopUtil.overwriteOutput(inpath); storePopulation(fs, new Path(inpath, "population"), population); @@ -122,7 +123,7 @@ public final class MahoutEvaluator { * @param population * population to store */ - static void storePopulation(FileSystem fs, Path f, List population) throws IOException { + static void storePopulation(FileSystem fs, Path f, Iterable population) throws IOException { FSDataOutputStream out = fs.create(f); BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out)); Modified: mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/OutputUtils.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/OutputUtils.java?rev=1032979&r1=1032978&r2=1032979&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/OutputUtils.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/OutputUtils.java Tue Nov 9 13:19:26 2010 @@ -19,7 +19,7 @@ package org.apache.mahout.ga.watchmaker; import java.io.IOException; import java.util.ArrayList; -import java.util.List; +import java.util.Collection; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; @@ -29,6 +29,7 @@ import org.apache.hadoop.io.DoubleWritab import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.SequenceFile.Reader; import org.apache.hadoop.io.SequenceFile.Sorter; +import org.apache.hadoop.io.Writable; /** Utility Class that deals with the output. */ public final class OutputUtils { @@ -48,7 +49,7 @@ public final class OutputUtils { */ public static Path[] listOutputFiles(FileSystem fs, Path outpath) throws IOException { FileStatus[] status = fs.listStatus(outpath); - List outpaths = new ArrayList(); + Collection outpaths = new ArrayList(); for (FileStatus s : status) { if (!s.isDir()) { outpaths.add(s.getPath()); @@ -72,7 +73,7 @@ public final class OutputUtils { public static void importEvaluations(FileSystem fs, Configuration conf, Path outpath, - List evaluations) throws IOException { + Collection evaluations) throws IOException { Sorter sorter = new Sorter(fs, LongWritable.class, DoubleWritable.class, conf); // merge and sort the outputs @@ -81,7 +82,7 @@ public final class OutputUtils { sorter.merge(outfiles, output); // import the evaluations - LongWritable key = new LongWritable(); + Writable key = new LongWritable(); DoubleWritable value = new DoubleWritable(); Reader reader = new Reader(fs, output, conf); try { Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java?rev=1032979&r1=1032978&r2=1032979&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java Tue Nov 9 13:19:26 2010 @@ -23,6 +23,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.FileOutputFormat; @@ -104,7 +105,7 @@ public final class TimesSquaredJob { Path inputVectorPath = new Path(outputVectorPathBase, INPUT_VECTOR + '/' + now); SequenceFile.Writer inputVectorPathWriter = new SequenceFile.Writer(fs, conf, inputVectorPath, NullWritable.class, VectorWritable.class); - VectorWritable inputVW = new VectorWritable(v); + Writable inputVW = new VectorWritable(v); inputVectorPathWriter.append(NullWritable.get(), inputVW); inputVectorPathWriter.close(); URI ivpURI = inputVectorPath.toUri(); @@ -158,9 +159,7 @@ public final class TimesSquaredJob { Path inputVectorPath = new Path(localFiles[0].getPath()); FileSystem fs = inputVectorPath.getFileSystem(conf); - SequenceFile.Reader reader = new SequenceFile.Reader(fs, - inputVectorPath, - conf); + SequenceFile.Reader reader = new SequenceFile.Reader(fs, inputVectorPath, conf); VectorWritable val = new VectorWritable(); NullWritable nw = NullWritable.get(); reader.next(nw, val); Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java?rev=1032979&r1=1032978&r2=1032979&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java Tue Nov 9 13:19:26 2010 @@ -27,6 +27,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; @@ -194,7 +195,7 @@ public class DistributedLanczosSolver ex IntWritable iw = new IntWritable(); for (int i = 0; i < eigenVectors.numRows() - 1; i++) { Vector v = eigenVectors.getRow(i); - VectorWritable vw = new VectorWritable(v); + Writable vw = new VectorWritable(v); iw.set(i); seqWriter.append(iw, vw); } Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java?rev=1032979&r1=1032978&r2=1032979&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java Tue Nov 9 13:19:26 2010 @@ -19,6 +19,7 @@ package org.apache.mahout.math.hadoop.de import java.io.IOException; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; @@ -30,6 +31,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.util.ToolRunner; import org.apache.mahout.common.AbstractJob; @@ -82,7 +84,7 @@ public class EigenVerificationJob extend private double minEigenValue; - private boolean loadEigensInMemory; + //private boolean loadEigensInMemory; private Path tmpOut; @@ -125,7 +127,6 @@ public class EigenVerificationJob extend * @param minEigenValue a double representing the minimum eigenvalue * @param inMemory a boolean requesting in-memory preparation * @param config the JobConf to use, or null if a default is ok (saves referencing JobConf in calling classes unless needed) - * @throws IOException */ public int run(Path corpusInput, Path eigenInput, @@ -182,7 +183,8 @@ public class EigenVerificationJob extend return OrthonormalityVerifier.pairwiseInnerProducts(eigensToVerify); } - private void saveCleanEigens(Configuration conf, List> prunedEigenMeta) throws IOException { + private void saveCleanEigens(Configuration conf, Collection> prunedEigenMeta) + throws IOException { Path path = new Path(outPath, CLEAN_EIGENVECTORS); FileSystem fs = FileSystem.get(conf); SequenceFile.Writer seqWriter = new SequenceFile.Writer(fs, conf, path, IntWritable.class, VectorWritable.class); @@ -191,9 +193,12 @@ public class EigenVerificationJob extend for (Map.Entry pruneSlice : prunedEigenMeta) { MatrixSlice s = pruneSlice.getKey(); EigenStatus meta = pruneSlice.getValue(); - EigenVector ev = new EigenVector((DenseVector) s.vector(), meta.getEigenValue(), Math.abs(1 - meta.getCosAngle()), s.index()); + EigenVector ev = new EigenVector((DenseVector) s.vector(), + meta.getEigenValue(), + Math.abs(1 - meta.getCosAngle()), + s.index()); log.info("appending {} to {}", ev, path); - VectorWritable vw = new VectorWritable(ev); + Writable vw = new VectorWritable(ev); iw.set(s.index()); seqWriter.append(iw, vw); @@ -264,14 +269,8 @@ public class EigenVerificationJob extend /** * Progammatic invocation of run() - * @param conf TODO * @param eigenInput Output of LanczosSolver * @param corpusInput Input of LanczosSolver - * @param output - * @param inMemory - * @param maxError - * @param minEigenValue - * @param maxEigens */ public void runJob(Configuration conf, Path eigenInput, @@ -279,7 +278,8 @@ public class EigenVerificationJob extend Path output, boolean inMemory, double maxError, - double minEigenValue, int maxEigens) throws IOException { + double minEigenValue, + int maxEigens) throws IOException { // no need to handle command line arguments outPath = output; tmpOut = new Path(outPath, "tmp"); Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java?rev=1032979&r1=1032978&r2=1032979&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java Tue Nov 9 13:19:26 2010 @@ -32,8 +32,6 @@ import org.apache.mahout.common.CommandL import org.apache.mahout.common.HadoopUtil; import org.apache.mahout.vectorizer.collocations.llr.LLRReducer; import org.apache.mahout.vectorizer.common.PartialVectorMerger; -import org.apache.mahout.vectorizer.DictionaryVectorizer; -import org.apache.mahout.vectorizer.DocumentProcessor; import org.apache.mahout.vectorizer.tfidf.TFIDFConverter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/TF.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/TF.java?rev=1032979&r1=1032978&r2=1032979&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/TF.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/TF.java Tue Nov 9 13:19:26 2010 @@ -18,7 +18,7 @@ package org.apache.mahout.vectorizer; /** - * {@link org.apache.mahout.utils.vectors.Weight} based on term frequency only + * {@link Weight} based on term frequency only */ public class TF implements Weight { Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocDriver.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocDriver.java?rev=1032979&r1=1032978&r2=1032979&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocDriver.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocDriver.java Tue Nov 9 13:19:26 2010 @@ -160,7 +160,7 @@ public final class CollocDriver extends } /** - * Generate all ngrams for the {@link org.apache.mahout.utils.vectors.text.DictionaryVectorizer} job + * Generate all ngrams for the {@link org.apache.mahout.vectorizer.DictionaryVectorizer} job * * @param input * input path containing tokenized documents @@ -200,7 +200,7 @@ public final class CollocDriver extends int maxNGramSize, int reduceTasks, int minSupport) - throws IOException, ClassNotFoundException, InterruptedException { + throws IOException, ClassNotFoundException, InterruptedException { Configuration con = new Configuration(baseConf); con.setBoolean(EMIT_UNIGRAMS, emitUnigrams); @@ -247,7 +247,7 @@ public final class CollocDriver extends boolean emitUnigrams, float minLLRValue, int reduceTasks) - throws IOException, InterruptedException, ClassNotFoundException { + throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(baseConf); conf.setLong(LLRReducer.NGRAM_TOTAL, nGramTotal); conf.setBoolean(EMIT_UNIGRAMS, emitUnigrams); Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/AdaptiveWordValueEncoder.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/AdaptiveWordValueEncoder.java?rev=1032979&r1=1032978&r2=1032979&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/AdaptiveWordValueEncoder.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/AdaptiveWordValueEncoder.java Tue Nov 9 13:19:26 2010 @@ -48,11 +48,6 @@ public class AdaptiveWordValueEncoder ex } @Override - protected int hashForProbe(byte[] originalForm, int dataSize, String name, int probe) { - return super.hashForProbe(originalForm, dataSize, name, probe); - } - - @Override protected double getWeight(byte[] originalForm, double w) { return w * weight(originalForm); } Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingContinuousValueEncoder.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingContinuousValueEncoder.java?rev=1032979&r1=1032978&r2=1032979&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingContinuousValueEncoder.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingContinuousValueEncoder.java Tue Nov 9 13:19:26 2010 @@ -49,11 +49,12 @@ public class CachingContinuousValueEncod } protected int hashForProbe(String originalForm, int dataSize, String name, int probe) { - Preconditions.checkArgument(dataSize == this.dataSize, "dataSize argument [" + dataSize + "] does not match expected dataSize [" + this.dataSize + "]"); + Preconditions.checkArgument(dataSize == this.dataSize, + "dataSize argument [" + dataSize + "] does not match expected dataSize [" + this.dataSize + ']'); if (caches[probe].containsKey(originalForm.hashCode())) { return caches[probe].get(originalForm.hashCode()); } - int hash = super.hashForProbe(originalForm.getBytes(), dataSize, name, probe); + int hash = hashForProbe(originalForm.getBytes(), dataSize, name, probe); caches[probe].put(originalForm.hashCode(), hash); return hash; } Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingStaticWordValueEncoder.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingStaticWordValueEncoder.java?rev=1032979&r1=1032978&r2=1032979&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingStaticWordValueEncoder.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingStaticWordValueEncoder.java Tue Nov 9 13:19:26 2010 @@ -50,11 +50,12 @@ public class CachingStaticWordValueEncod } protected int hashForProbe(String originalForm, int dataSize, String name, int probe) { - Preconditions.checkArgument(dataSize == this.dataSize, "dataSize argument [" + dataSize + "] does not match expected dataSize [" + this.dataSize + "]"); + Preconditions.checkArgument(dataSize == this.dataSize, + "dataSize argument [" + dataSize + "] does not match expected dataSize [" + this.dataSize + ']'); if (caches[probe].containsKey(originalForm.hashCode())) { return caches[probe].get(originalForm.hashCode()); } - int hash = super.hashForProbe(originalForm.getBytes(), dataSize, name, probe); + int hash = hashForProbe(originalForm.getBytes(), dataSize, name, probe); caches[probe].put(originalForm.hashCode(), hash); return hash; } Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingValueEncoder.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingValueEncoder.java?rev=1032979&r1=1032978&r2=1032979&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingValueEncoder.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingValueEncoder.java Tue Nov 9 13:19:26 2010 @@ -24,7 +24,7 @@ package org.apache.mahout.vectorizer.enc public abstract class CachingValueEncoder extends FeatureVectorEncoder { private int[] cachedProbes; - public CachingValueEncoder(String name, int seed) { + protected CachingValueEncoder(String name, int seed) { super(name); cacheProbeLocations(seed); } Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/Dictionary.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/Dictionary.java?rev=1032979&r1=1032978&r2=1032979&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/Dictionary.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/Dictionary.java Tue Nov 9 13:19:26 2010 @@ -45,7 +45,7 @@ public class Dictionary { return dict.size(); } - public static Dictionary fromList(List values) { + public static Dictionary fromList(Iterable values) { Dictionary dict = new Dictionary(); for (String value : values) { dict.intern(value); Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/TextValueEncoder.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/TextValueEncoder.java?rev=1032979&r1=1032978&r2=1032979&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/TextValueEncoder.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/TextValueEncoder.java Tue Nov 9 13:19:26 2010 @@ -24,7 +24,7 @@ import com.google.common.collect.Multise import org.apache.mahout.math.Vector; import java.util.ArrayList; -import java.util.List; +import java.util.Collection; import java.util.regex.Pattern; /** @@ -99,7 +99,7 @@ public class TextValueEncoder extends Fe @Override protected Iterable hashesForProbe(byte[] originalForm, int dataSize, String name, int probe){ - List hashes = new ArrayList(); + Collection hashes = new ArrayList(); for (String word : tokenize(new String(originalForm, Charsets.UTF_8))){ hashes.add(hashForProbe(bytesForString(word), dataSize, name, probe)); }