mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sro...@apache.org
Subject svn commit: r1032979 [2/2] - in /mahout/trunk/core/src/main/java/org/apache/mahout: cf/taste/impl/common/ cf/taste/impl/model/ classifier/ classifier/bayes/ classifier/bayes/algorithm/ classifier/bayes/interfaces/ classifier/naivebayes/ classifier/naiv...
Date Tue, 09 Nov 2010 13:19:28 GMT
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java
Tue Nov  9 13:19:26 2010
@@ -20,6 +20,7 @@ package org.apache.mahout.fpm.pfpgrowth.
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashMap;
@@ -28,7 +29,6 @@ import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
-import java.util.Set;
 
 import org.apache.commons.lang.mutable.MutableLong;
 import org.apache.hadoop.conf.Configuration;
@@ -36,6 +36,7 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.mapred.OutputCollector;
 import org.apache.mahout.common.Pair;
 import org.apache.mahout.fpm.pfpgrowth.convertors.StatusUpdater;
@@ -63,7 +64,7 @@ public class FPGrowth<A extends Comparab
     Path path) throws IOException {
 
     List<Pair<String,TopKStringPatterns>> ret = new ArrayList<Pair<String,TopKStringPatterns>>();
-    Text key = new Text();
+    Writable key = new Text();
     TopKStringPatterns value = new TopKStringPatterns();
     SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
     // key is feature value is count
@@ -146,10 +147,10 @@ public class FPGrowth<A extends Comparab
    * @throws IOException
    */
   public final void generateTopKFrequentPatterns(Iterator<Pair<List<A>,Long>>
transactionStream,
-                                                 List<Pair<A,Long>> frequencyList,
+                                                 Collection<Pair<A, Long>> frequencyList,
                                                  long minSupport,
                                                  int k,
-                                                 Set<A> returnableFeatures,
+                                                 Collection<A> returnableFeatures,
                                                  OutputCollector<A,List<Pair<List<A>,Long>>>
output,
                                                  StatusUpdater updater) throws IOException
{
 
@@ -178,7 +179,7 @@ public class FPGrowth<A extends Comparab
 
     log.info("Number of unique items {}", frequencyList.size());
 
-    Set<Integer> returnFeatures = new HashSet<Integer>();
+    Collection<Integer> returnFeatures = new HashSet<Integer>();
     if (returnableFeatures != null && !returnableFeatures.isEmpty()) {
       for (A attrib : returnableFeatures) {
         if (attributeIdMapping.containsKey(attrib)) {
@@ -206,7 +207,7 @@ public class FPGrowth<A extends Comparab
    *
    * @param tree
    *          to be mined
-   * @param minSupportMutable
+   * @param minSupportValue
    *          minimum support of the pattern to keep
    * @param k
    *          Number of top frequent patterns to keep
@@ -218,14 +219,12 @@ public class FPGrowth<A extends Comparab
    * @return Top K Frequent Patterns for each feature and their support
    */
   private Map<Integer,FrequentPatternMaxHeap> fpGrowth(FPTree tree,
-                                                       MutableLong minSupportMutable,
+                                                       long minSupportValue,
                                                        int k,
-                                                       Set<Integer> requiredFeatures,
+                                                       Collection<Integer> requiredFeatures,
                                                        TopKPatternsOutputConverter<A>
outputCollector,
                                                        StatusUpdater updater) throws IOException
{
 
-    long minSupportValue = minSupportMutable.longValue();
-
     Map<Integer,FrequentPatternMaxHeap> patterns = new HashMap<Integer,FrequentPatternMaxHeap>();
     FPTreeDepthCache treeCache = new FPTreeDepthCache();
     for (int i = tree.getHeaderTableCount() - 1; i >= 0; i--) {
@@ -250,9 +249,8 @@ public class FPGrowth<A extends Comparab
 
   private static FrequentPatternMaxHeap generateSinglePathPatterns(FPTree tree,
                                                                    int k,
-                                                                   MutableLong minSupportMutable)
{
-    FrequentPatternMaxHeap frequentPatterns = new FrequentPatternMaxHeap(k,
-      false);
+                                                                   long minSupport) {
+    FrequentPatternMaxHeap frequentPatterns = new FrequentPatternMaxHeap(k, false);
 
     int tempNode = FPTree.ROOTNODEID;
     Pattern frequentItem = new Pattern();
@@ -262,7 +260,7 @@ public class FPGrowth<A extends Comparab
           tempNode);
       }
       tempNode = tree.childAtIndex(tempNode, 0);
-      if (tree.count(tempNode) >= minSupportMutable.intValue()) {
+      if (tree.count(tempNode) >= minSupport) {
         frequentItem.add(tree.attribute(tempNode), tree.count(tempNode));
       }
     }
@@ -296,8 +294,11 @@ public class FPGrowth<A extends Comparab
    */
   private Map<Integer,FrequentPatternMaxHeap> generateTopKFrequentPatterns(
     Iterator<Pair<int[],Long>> transactions,
-    long[] attributeFrequency, long minSupport, int k, int featureSetSize,
-    Set<Integer> returnFeatures, TopKPatternsOutputConverter<A> topKPatternsOutputCollector,
+    long[] attributeFrequency,
+    long minSupport,
+    int k,
+    int featureSetSize,
+    Collection<Integer> returnFeatures, TopKPatternsOutputConverter<A> topKPatternsOutputCollector,
     StatusUpdater updater) throws IOException {
 
     FPTree tree = new FPTree(featureSetSize);
@@ -306,7 +307,6 @@ public class FPGrowth<A extends Comparab
     }
 
     // Constructing initial FPTree from the list of transactions
-    MutableLong minSupportMutable = new MutableLong(minSupport);
     int nodecount = 0;
     // int attribcount = 0;
     int i = 0;
@@ -314,8 +314,7 @@ public class FPGrowth<A extends Comparab
       Pair<int[],Long> transaction = transactions.next();
       Arrays.sort(transaction.getFirst());
       // attribcount += transaction.length;
-      nodecount += treeAddCount(tree, transaction.getFirst(), transaction
-        .getSecond(), minSupportMutable, attributeFrequency);
+      nodecount += treeAddCount(tree, transaction.getFirst(), transaction.getSecond(), minSupport,
attributeFrequency);
       i++;
       if (i % 10000 == 0) {
         log.info("FPTree Building: Read {} Transactions", i);
@@ -324,8 +323,7 @@ public class FPGrowth<A extends Comparab
 
     log.info("Number of Nodes in the FP Tree: {}", nodecount);
 
-    return fpGrowth(tree, minSupportMutable, k, returnFeatures,
-      topKPatternsOutputCollector, updater);
+    return fpGrowth(tree, minSupport, k, returnFeatures, topKPatternsOutputCollector, updater);
   }
 
   private static FrequentPatternMaxHeap growth(FPTree tree,
@@ -350,7 +348,7 @@ public class FPGrowth<A extends Comparab
     while (i < headerTableCount) {
       int attribute = tree.getAttributeAtIndex(i);
       long count = tree.getHeaderSupportCount(attribute);
-      if (count < minSupportMutable.intValue()) {
+      if (count < minSupportMutable.longValue()) {
         i++;
         continue;
       }
@@ -358,7 +356,7 @@ public class FPGrowth<A extends Comparab
       FPTree conditionalTree = treeCache.getFirstLevelTree(attribute);
       if (conditionalTree.isEmpty()) {
         traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute),
-          minSupportMutable, conditionalTree, tree);
+          minSupportMutable.longValue(), conditionalTree, tree);
         // printTree(conditionalTree);
 
       }
@@ -377,7 +375,7 @@ public class FPGrowth<A extends Comparab
         frequentPatterns = mergeHeap(frequentPatterns, returnedPatterns,
           attribute, count, false);
       }
-      if (frequentPatterns.isFull() && minSupportMutable.intValue() < frequentPatterns.leastSupport())
{
+      if (frequentPatterns.isFull() && minSupportMutable.longValue() < frequentPatterns.leastSupport())
{
         minSupportMutable.setValue(frequentPatterns.leastSupport());
       }
       i++;
@@ -413,7 +411,7 @@ public class FPGrowth<A extends Comparab
     }
 
     if (tree.singlePath()) {
-      return generateSinglePathPatterns(tree, k, minSupportMutable);
+      return generateSinglePathPatterns(tree, k, minSupportMutable.longValue());
     }
 
     updater.update("Bottom Up FP Growth");
@@ -428,7 +426,7 @@ public class FPGrowth<A extends Comparab
       FrequentPatternMaxHeap returnedPatterns;
       if (conditionalOfCurrentAttribute) {
         traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute),
-          minSupportMutable, conditionalTree, tree);
+          minSupportMutable.longValue(), conditionalTree, tree);
         returnedPatterns = growthBottomUp(conditionalTree, minSupportMutable,
           k, treeCache, level + 1, true, currentAttribute, updater);
 
@@ -437,7 +435,7 @@ public class FPGrowth<A extends Comparab
       } else {
         if (attribute == currentAttribute) {
           traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute),
-            minSupportMutable, conditionalTree, tree);
+            minSupportMutable.longValue(), conditionalTree, tree);
           returnedPatterns = growthBottomUp(conditionalTree, minSupportMutable,
             k, treeCache, level + 1, true, currentAttribute, updater);
 
@@ -445,7 +443,7 @@ public class FPGrowth<A extends Comparab
             attribute, count, true);
         } else if (attribute > currentAttribute) {
           traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute),
-            minSupportMutable, conditionalTree, tree);
+            minSupportMutable.longValue(), conditionalTree, tree);
           returnedPatterns = growthBottomUp(conditionalTree, minSupportMutable,
             k, treeCache, level + 1, false, currentAttribute, updater);
           frequentPatterns = mergeHeap(frequentPatterns, returnedPatterns,
@@ -453,7 +451,7 @@ public class FPGrowth<A extends Comparab
         }
       }
 
-      if (frequentPatterns.isFull() && minSupportMutable.intValue() < frequentPatterns.leastSupport())
{
+      if (frequentPatterns.isFull() && minSupportMutable.longValue() < frequentPatterns.leastSupport())
{
         minSupportMutable.setValue(frequentPatterns.leastSupport());
       }
     }
@@ -481,14 +479,14 @@ public class FPGrowth<A extends Comparab
       } else {
         int attribute = tree.getAttributeAtIndex(index);
         long count = tree.getHeaderSupportCount(attribute);
-        if (count < minSupportMutable.intValue()) {
+        if (count < minSupportMutable.longValue()) {
           return frequentPatterns;
         }
       }
     }
 
     if (tree.singlePath()) {
-      return generateSinglePathPatterns(tree, k, minSupportMutable);
+      return generateSinglePathPatterns(tree, k, minSupportMutable.longValue());
     }
 
     updater.update("Top Down Growth:");
@@ -505,7 +503,7 @@ public class FPGrowth<A extends Comparab
       FrequentPatternMaxHeap returnedPatterns;
       if (conditionalOfCurrentAttribute) {
         traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute),
-          minSupportMutable, conditionalTree, tree);
+          minSupportMutable.longValue(), conditionalTree, tree);
 
         returnedPatterns = growthBottomUp(conditionalTree, minSupportMutable,
           k, treeCache, level + 1, true, currentAttribute, updater);
@@ -515,7 +513,7 @@ public class FPGrowth<A extends Comparab
       } else {
         if (attribute == currentAttribute) {
           traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute),
-            minSupportMutable, conditionalTree, tree);
+            minSupportMutable.longValue(), conditionalTree, tree);
           returnedPatterns = growthBottomUp(conditionalTree, minSupportMutable,
             k, treeCache, level + 1, true, currentAttribute, updater);
           frequentPatterns = mergeHeap(frequentPatterns, returnedPatterns,
@@ -523,7 +521,7 @@ public class FPGrowth<A extends Comparab
 
         } else if (attribute > currentAttribute) {
           traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute),
-            minSupportMutable, conditionalTree, tree);
+            minSupportMutable.longValue(), conditionalTree, tree);
           returnedPatterns = growthBottomUp(conditionalTree, minSupportMutable,
             k, treeCache, level + 1, false, currentAttribute, updater);
           frequentPatterns = mergeHeap(frequentPatterns, returnedPatterns,
@@ -531,7 +529,7 @@ public class FPGrowth<A extends Comparab
 
         }
       }
-      if (frequentPatterns.isFull() && minSupportMutable.intValue() < frequentPatterns.leastSupport())
{
+      if (frequentPatterns.isFull() && minSupportMutable.longValue() < frequentPatterns.leastSupport())
{
         minSupportMutable.setValue(frequentPatterns.leastSupport());
       }
     }
@@ -555,7 +553,7 @@ public class FPGrowth<A extends Comparab
   }
 
   private static void traverseAndBuildConditionalFPTreeData(int firstConditionalNode,
-                                                            MutableLong minSupportMutable,
+                                                            long minSupport,
                                                             FPTree conditionalTree,
                                                             FPTree tree) {
 
@@ -569,8 +567,7 @@ public class FPGrowth<A extends Comparab
 
       while (pathNode != 0) { // dummy root node
         int attribute = tree.attribute(pathNode);
-        if (tree.getHeaderSupportCount(attribute) < minSupportMutable
-            .intValue()) {
+        if (tree.getHeaderSupportCount(attribute) < minSupport) {
           pathNode = tree.parent(pathNode);
           continue;
         }
@@ -612,16 +609,15 @@ public class FPGrowth<A extends Comparab
 
     tree.clearConditional();
     conditionalTree.reorderHeaderTable();
-    pruneFPTree(minSupportMutable, conditionalTree);
+    pruneFPTree(minSupport, conditionalTree);
     // prune Conditional Tree
 
   }
 
-  private static void pruneFPTree(MutableLong minSupportMutable, FPTree tree) {
+  private static void pruneFPTree(long minSupport, FPTree tree) {
     for (int i = 0; i < tree.getHeaderTableCount(); i++) {
       int currentAttribute = tree.getAttributeAtIndex(i);
-      if (tree.getHeaderSupportCount(currentAttribute) < minSupportMutable
-          .intValue()) {
+      if (tree.getHeaderSupportCount(currentAttribute) < minSupport) {
         int nextNode = tree.getHeaderNext(currentAttribute);
         tree.removeHeaderNext(currentAttribute);
         while (nextNode != -1) {
@@ -650,9 +646,7 @@ public class FPGrowth<A extends Comparab
 
         int parent = tree.parent(nextNode);
 
-        if (!prevNode.containsKey(parent)) {
-          prevNode.put(parent, nextNode);
-        } else {
+        if (prevNode.containsKey(parent)) {
           int prevNodeId = prevNode.get(parent);
           if (tree.childCount(prevNodeId) <= 1 && tree.childCount(nextNode) <=
1) {
             tree.addCount(prevNodeId, tree.count(nextNode));
@@ -662,6 +656,8 @@ public class FPGrowth<A extends Comparab
             }
             tree.setNext(justPrevNode, tree.next(nextNode));
           }
+        } else {
+          prevNode.put(parent, nextNode);
         }
         justPrevNode = nextNode;
         nextNode = tree.next(nextNode);
@@ -692,7 +688,7 @@ public class FPGrowth<A extends Comparab
   private static int treeAddCount(FPTree tree,
                                   int[] myList,
                                   long addCount,
-                                  Number minSupport,
+                                  long minSupport,
                                   long[] attributeFrequency) {
 
     int temp = FPTree.ROOTNODEID;
@@ -700,7 +696,7 @@ public class FPGrowth<A extends Comparab
     boolean addCountMode = true;
 
     for (int attribute : myList) {
-      if (attributeFrequency[attribute] < minSupport.intValue()) {
+      if (attributeFrequency[attribute] < minSupport) {
         return ret;
       }
       int child;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTree.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTree.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTree.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTree.java Tue
Nov  9 13:19:26 2010
@@ -18,7 +18,7 @@
 package org.apache.mahout.fpm.pfpgrowth.fpgrowth;
 
 import java.util.Arrays;
-import java.util.Set;
+import java.util.Collection;
 import java.util.TreeSet;
 
 /**
@@ -72,7 +72,7 @@ public class FPTree {
   
   private boolean singlePath;
   
-  private final Set<Integer> sortedSet = new TreeSet<Integer>();
+  private final Collection<Integer> sortedSet = new TreeSet<Integer>();
   
   public FPTree() {
     this(DEFAULT_INITIAL_SIZE, DEFAULT_HEADER_TABLE_INITIAL_SIZE);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/MahoutEvaluator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/MahoutEvaluator.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/MahoutEvaluator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/MahoutEvaluator.java Tue
Nov  9 13:19:26 2010
@@ -20,6 +20,7 @@ package org.apache.mahout.ga.watchmaker;
 import java.io.BufferedWriter;
 import java.io.IOException;
 import java.io.OutputStreamWriter;
+import java.util.Collection;
 import java.util.List;
 
 import org.apache.hadoop.conf.Configuration;
@@ -54,8 +55,8 @@ public final class MahoutEvaluator {
    *          <code>List&lt;Double&gt;</code> that contains the evaluated
fitness for each candidate from the
    *          input population, sorted in the same order as the candidates.
    */
-  public static void evaluate(FitnessEvaluator<?> evaluator, List<?> population,
List<Double> evaluations)
-      throws IOException, ClassNotFoundException, InterruptedException {
+  public static void evaluate(FitnessEvaluator<?> evaluator, Iterable<?> population,
Collection<Double> evaluations)
+    throws IOException, ClassNotFoundException, InterruptedException {
     Job job = new Job();
     job.setJarByClass(MahoutEvaluator.class);
     Configuration conf = job.getConfiguration();
@@ -78,7 +79,7 @@ public final class MahoutEvaluator {
    *          population to store
    * @return input <code>Path</code>
    */
-  private static Path prepareInput(FileSystem fs, List<?> population) throws IOException
{
+  private static Path prepareInput(FileSystem fs, Iterable<?> population) throws IOException
{
     Path inpath = new Path(fs.getWorkingDirectory(), "input");
     HadoopUtil.overwriteOutput(inpath);
     storePopulation(fs, new Path(inpath, "population"), population);
@@ -122,7 +123,7 @@ public final class MahoutEvaluator {
    * @param population
    *          population to store
    */
-  static void storePopulation(FileSystem fs, Path f, List<?> population) throws IOException
{
+  static void storePopulation(FileSystem fs, Path f, Iterable<?> population) throws
IOException {
     FSDataOutputStream out = fs.create(f);
     BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out));
     

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/OutputUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/OutputUtils.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/OutputUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/OutputUtils.java Tue Nov
 9 13:19:26 2010
@@ -19,7 +19,7 @@ package org.apache.mahout.ga.watchmaker;
 
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.List;
+import java.util.Collection;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
@@ -29,6 +29,7 @@ import org.apache.hadoop.io.DoubleWritab
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.SequenceFile.Reader;
 import org.apache.hadoop.io.SequenceFile.Sorter;
+import org.apache.hadoop.io.Writable;
 
 /** Utility Class that deals with the output. */
 public final class OutputUtils {
@@ -48,7 +49,7 @@ public final class OutputUtils {
    */
   public static Path[] listOutputFiles(FileSystem fs, Path outpath) throws IOException {
     FileStatus[] status = fs.listStatus(outpath);
-    List<Path> outpaths = new ArrayList<Path>();
+    Collection<Path> outpaths = new ArrayList<Path>();
     for (FileStatus s : status) {
       if (!s.isDir()) {
         outpaths.add(s.getPath());
@@ -72,7 +73,7 @@ public final class OutputUtils {
   public static void importEvaluations(FileSystem fs,
                                        Configuration conf,
                                        Path outpath,
-                                       List<Double> evaluations) throws IOException
{
+                                       Collection<Double> evaluations) throws IOException
{
     Sorter sorter = new Sorter(fs, LongWritable.class, DoubleWritable.class, conf);
     
     // merge and sort the outputs
@@ -81,7 +82,7 @@ public final class OutputUtils {
     sorter.merge(outfiles, output);
     
     // import the evaluations
-    LongWritable key = new LongWritable();
+    Writable key = new LongWritable();
     DoubleWritable value = new DoubleWritable();
     Reader reader = new Reader(fs, output, conf);
     try {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java Tue
Nov  9 13:19:26 2010
@@ -23,6 +23,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.mapred.FileInputFormat;
 import org.apache.hadoop.mapred.FileOutputFormat;
@@ -104,7 +105,7 @@ public final class TimesSquaredJob {
     Path inputVectorPath = new Path(outputVectorPathBase, INPUT_VECTOR + '/' + now);
     SequenceFile.Writer inputVectorPathWriter = new SequenceFile.Writer(fs,
             conf, inputVectorPath, NullWritable.class, VectorWritable.class);
-    VectorWritable inputVW = new VectorWritable(v);
+    Writable inputVW = new VectorWritable(v);
     inputVectorPathWriter.append(NullWritable.get(), inputVW);
     inputVectorPathWriter.close();
     URI ivpURI = inputVectorPath.toUri();
@@ -158,9 +159,7 @@ public final class TimesSquaredJob {
         Path inputVectorPath = new Path(localFiles[0].getPath());
         FileSystem fs = inputVectorPath.getFileSystem(conf);
 
-        SequenceFile.Reader reader = new SequenceFile.Reader(fs,
-          inputVectorPath,
-          conf);
+        SequenceFile.Reader reader = new SequenceFile.Reader(fs, inputVectorPath, conf);
         VectorWritable val = new VectorWritable();
         NullWritable nw = NullWritable.get();
         reader.next(nw, val);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java
Tue Nov  9 13:19:26 2010
@@ -27,6 +27,7 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.ToolRunner;
@@ -194,7 +195,7 @@ public class DistributedLanczosSolver ex
     IntWritable iw = new IntWritable();
     for (int i = 0; i < eigenVectors.numRows() - 1; i++) {
       Vector v = eigenVectors.getRow(i);
-      VectorWritable vw = new VectorWritable(v);
+      Writable vw = new VectorWritable(v);
       iw.set(i);
       seqWriter.append(iw, vw);
     }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java
Tue Nov  9 13:19:26 2010
@@ -19,6 +19,7 @@ package org.apache.mahout.math.hadoop.de
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashMap;
@@ -30,6 +31,7 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.util.ToolRunner;
 import org.apache.mahout.common.AbstractJob;
@@ -82,7 +84,7 @@ public class EigenVerificationJob extend
 
   private double minEigenValue;
 
-  private boolean loadEigensInMemory;
+  //private boolean loadEigensInMemory;
 
   private Path tmpOut;
 
@@ -125,7 +127,6 @@ public class EigenVerificationJob extend
    * @param minEigenValue a double representing the minimum eigenvalue
    * @param inMemory a boolean requesting in-memory preparation
    * @param config the JobConf to use, or null if a default is ok (saves referencing JobConf
in calling classes unless needed)
-   * @throws IOException
    */
   public int run(Path corpusInput,
                  Path eigenInput,
@@ -182,7 +183,8 @@ public class EigenVerificationJob extend
     return OrthonormalityVerifier.pairwiseInnerProducts(eigensToVerify);
   }
 
-  private void saveCleanEigens(Configuration conf, List<Map.Entry<MatrixSlice, EigenStatus>>
prunedEigenMeta) throws IOException {
+  private void saveCleanEigens(Configuration conf, Collection<Map.Entry<MatrixSlice,
EigenStatus>> prunedEigenMeta)
+    throws IOException {
     Path path = new Path(outPath, CLEAN_EIGENVECTORS);
     FileSystem fs = FileSystem.get(conf);
     SequenceFile.Writer seqWriter = new SequenceFile.Writer(fs, conf, path, IntWritable.class,
VectorWritable.class);
@@ -191,9 +193,12 @@ public class EigenVerificationJob extend
     for (Map.Entry<MatrixSlice, EigenStatus> pruneSlice : prunedEigenMeta) {
       MatrixSlice s = pruneSlice.getKey();
       EigenStatus meta = pruneSlice.getValue();
-      EigenVector ev = new EigenVector((DenseVector) s.vector(), meta.getEigenValue(), Math.abs(1
- meta.getCosAngle()), s.index());
+      EigenVector ev = new EigenVector((DenseVector) s.vector(),
+                                       meta.getEigenValue(),
+                                       Math.abs(1 - meta.getCosAngle()),
+                                       s.index());
       log.info("appending {} to {}", ev, path);
-      VectorWritable vw = new VectorWritable(ev);
+      Writable vw = new VectorWritable(ev);
       iw.set(s.index());
       seqWriter.append(iw, vw);
 
@@ -264,14 +269,8 @@ public class EigenVerificationJob extend
 
   /**
    * Progammatic invocation of run()
-   * @param conf TODO
    * @param eigenInput Output of LanczosSolver
    * @param corpusInput Input of LanczosSolver
-   * @param output
-   * @param inMemory
-   * @param maxError
-   * @param minEigenValue
-   * @param maxEigens
    */
   public void runJob(Configuration conf,
                      Path eigenInput,
@@ -279,7 +278,8 @@ public class EigenVerificationJob extend
                      Path output,
                      boolean inMemory,
                      double maxError,
-                     double minEigenValue, int maxEigens) throws IOException {
+                     double minEigenValue,
+                     int maxEigens) throws IOException {
     // no need to handle command line arguments
     outPath = output;
     tmpOut = new Path(outPath, "tmp");

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java
Tue Nov  9 13:19:26 2010
@@ -32,8 +32,6 @@ import org.apache.mahout.common.CommandL
 import org.apache.mahout.common.HadoopUtil;
 import org.apache.mahout.vectorizer.collocations.llr.LLRReducer;
 import org.apache.mahout.vectorizer.common.PartialVectorMerger;
-import org.apache.mahout.vectorizer.DictionaryVectorizer;
-import org.apache.mahout.vectorizer.DocumentProcessor;
 import org.apache.mahout.vectorizer.tfidf.TFIDFConverter;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/TF.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/TF.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/TF.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/TF.java Tue Nov  9 13:19:26
2010
@@ -18,7 +18,7 @@
 package org.apache.mahout.vectorizer;
 
 /**
- * {@link org.apache.mahout.utils.vectors.Weight} based on term frequency only
+ * {@link Weight} based on term frequency only
  */
 public class TF implements Weight {
   

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocDriver.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocDriver.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocDriver.java
Tue Nov  9 13:19:26 2010
@@ -160,7 +160,7 @@ public final class CollocDriver extends 
   }
 
   /**
-   * Generate all ngrams for the {@link org.apache.mahout.utils.vectors.text.DictionaryVectorizer}
job
+   * Generate all ngrams for the {@link org.apache.mahout.vectorizer.DictionaryVectorizer}
job
    * 
    * @param input
    *          input path containing tokenized documents
@@ -200,7 +200,7 @@ public final class CollocDriver extends 
                                            int maxNGramSize,
                                            int reduceTasks,
                                            int minSupport)
-      throws IOException, ClassNotFoundException, InterruptedException {
+    throws IOException, ClassNotFoundException, InterruptedException {
 
     Configuration con = new Configuration(baseConf);
     con.setBoolean(EMIT_UNIGRAMS, emitUnigrams);
@@ -247,7 +247,7 @@ public final class CollocDriver extends 
                                               boolean emitUnigrams,
                                               float minLLRValue,
                                               int reduceTasks)
-      throws IOException, InterruptedException, ClassNotFoundException {
+    throws IOException, InterruptedException, ClassNotFoundException {
     Configuration conf = new Configuration(baseConf);
     conf.setLong(LLRReducer.NGRAM_TOTAL, nGramTotal);
     conf.setBoolean(EMIT_UNIGRAMS, emitUnigrams);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/AdaptiveWordValueEncoder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/AdaptiveWordValueEncoder.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/AdaptiveWordValueEncoder.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/AdaptiveWordValueEncoder.java
Tue Nov  9 13:19:26 2010
@@ -48,11 +48,6 @@ public class AdaptiveWordValueEncoder ex
   }
 
   @Override
-  protected int hashForProbe(byte[] originalForm, int dataSize, String name, int probe) {
-    return super.hashForProbe(originalForm, dataSize, name, probe);
-  }
-
-  @Override
   protected double getWeight(byte[] originalForm, double w) {
     return w * weight(originalForm);
   }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingContinuousValueEncoder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingContinuousValueEncoder.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingContinuousValueEncoder.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingContinuousValueEncoder.java
Tue Nov  9 13:19:26 2010
@@ -49,11 +49,12 @@ public class CachingContinuousValueEncod
   }
 
   protected int hashForProbe(String originalForm, int dataSize, String name, int probe) {
-    Preconditions.checkArgument(dataSize == this.dataSize, "dataSize argument [" + dataSize
+ "] does not match expected dataSize [" + this.dataSize + "]");
+    Preconditions.checkArgument(dataSize == this.dataSize,
+        "dataSize argument [" + dataSize + "] does not match expected dataSize [" + this.dataSize
+ ']');
     if (caches[probe].containsKey(originalForm.hashCode())) {
       return caches[probe].get(originalForm.hashCode());
     }
-    int hash = super.hashForProbe(originalForm.getBytes(), dataSize, name, probe);
+    int hash = hashForProbe(originalForm.getBytes(), dataSize, name, probe);
     caches[probe].put(originalForm.hashCode(), hash);
     return hash;
   }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingStaticWordValueEncoder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingStaticWordValueEncoder.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingStaticWordValueEncoder.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingStaticWordValueEncoder.java
Tue Nov  9 13:19:26 2010
@@ -50,11 +50,12 @@ public class CachingStaticWordValueEncod
   }
 
   protected int hashForProbe(String originalForm, int dataSize, String name, int probe) {
-    Preconditions.checkArgument(dataSize == this.dataSize, "dataSize argument [" + dataSize
+ "] does not match expected dataSize [" + this.dataSize + "]");
+    Preconditions.checkArgument(dataSize == this.dataSize,
+        "dataSize argument [" + dataSize + "] does not match expected dataSize [" + this.dataSize
+ ']');
     if (caches[probe].containsKey(originalForm.hashCode())) {
       return caches[probe].get(originalForm.hashCode());
     }
-    int hash = super.hashForProbe(originalForm.getBytes(), dataSize, name, probe);
+    int hash = hashForProbe(originalForm.getBytes(), dataSize, name, probe);
     caches[probe].put(originalForm.hashCode(), hash);
     return hash;
   }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingValueEncoder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingValueEncoder.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingValueEncoder.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingValueEncoder.java
Tue Nov  9 13:19:26 2010
@@ -24,7 +24,7 @@ package org.apache.mahout.vectorizer.enc
 public abstract class CachingValueEncoder extends FeatureVectorEncoder {
   private int[] cachedProbes;
 
-  public CachingValueEncoder(String name, int seed) {
+  protected CachingValueEncoder(String name, int seed) {
     super(name);
     cacheProbeLocations(seed);
   }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/Dictionary.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/Dictionary.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/Dictionary.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/Dictionary.java
Tue Nov  9 13:19:26 2010
@@ -45,7 +45,7 @@ public class Dictionary {
     return dict.size();
   }
 
-  public static Dictionary fromList(List<String> values) {
+  public static Dictionary fromList(Iterable<String> values) {
     Dictionary dict = new Dictionary();
     for (String value : values) {
       dict.intern(value);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/TextValueEncoder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/TextValueEncoder.java?rev=1032979&r1=1032978&r2=1032979&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/TextValueEncoder.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/TextValueEncoder.java
Tue Nov  9 13:19:26 2010
@@ -24,7 +24,7 @@ import com.google.common.collect.Multise
 import org.apache.mahout.math.Vector;
 
 import java.util.ArrayList;
-import java.util.List;
+import java.util.Collection;
 import java.util.regex.Pattern;
 
 /**
@@ -99,7 +99,7 @@ public class TextValueEncoder extends Fe
 
   @Override
   protected Iterable<Integer> hashesForProbe(byte[] originalForm, int dataSize, String
name, int probe){
-    List<Integer> hashes = new ArrayList<Integer>();
+    Collection<Integer> hashes = new ArrayList<Integer>();
     for (String word : tokenize(new String(originalForm, Charsets.UTF_8))){
       hashes.add(hashForProbe(bytesForString(word), dataSize, name, probe));
     }



Mime
View raw message