mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sro...@apache.org
Subject svn commit: r1090865 [2/3] - in /mahout/trunk: core/src/main/java/org/apache/mahout/cf/taste/hadoop/ core/src/main/java/org/apache/mahout/cf/taste/impl/common/jdbc/ core/src/main/java/org/apache/mahout/cf/taste/impl/model/ core/src/main/java/org/apache...
Date Sun, 10 Apr 2011 18:30:08 GMT
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileIterator.java?rev=1090865&r1=1090864&r2=1090865&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileIterator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileIterator.java Sun Apr 10 18:30:05 2011
@@ -20,8 +20,8 @@ package org.apache.mahout.common.iterato
 import java.io.Closeable;
 import java.io.IOException;
 import java.util.Iterator;
-import java.util.NoSuchElementException;
 
+import com.google.common.collect.AbstractIterator;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -29,6 +29,7 @@ import org.apache.hadoop.io.NullWritable
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.util.ReflectionUtils;
+import org.apache.mahout.common.IOUtils;
 import org.apache.mahout.common.Pair;
 
 /**
@@ -36,7 +37,7 @@ import org.apache.mahout.common.Pair;
  * containing key and value.</p>
  */
 public final class SequenceFileIterator<K extends Writable,V extends Writable>
-  implements Iterator<Pair<K,V>>, Closeable {
+  extends AbstractIterator<Pair<K,V>> implements Closeable {
 
   private final SequenceFile.Reader reader;
   private final Configuration conf;
@@ -45,7 +46,6 @@ public final class SequenceFileIterator<
   private final boolean noValue;
   private K key;
   private V value;
-  private boolean available;
   private final boolean reuseKeyValueInstances;
 
   /**
@@ -60,7 +60,6 @@ public final class SequenceFileIterator<
     this.conf = conf;
     keyClass = (Class<K>) reader.getKeyClass();
     valueClass = (Class<V>) reader.getValueClass();
-    available = false;
     noValue = NullWritable.class.equals(valueClass);
     this.reuseKeyValueInstances = reuseKeyValueInstances;
   }
@@ -74,62 +73,37 @@ public final class SequenceFileIterator<
   }
 
   @Override
-  public void close() throws IOException {
-    available = false;
+  public void close() {
     key = null;
     value = null;
-    reader.close();
+    IOUtils.quietClose(reader);
+    endOfData();
   }
 
   @Override
-  public boolean hasNext() {
-    if (!available) {
-      if (!reuseKeyValueInstances || value == null) {
-        key = ReflectionUtils.newInstance(keyClass, conf);
-        if (!noValue) {
-          value = ReflectionUtils.newInstance(valueClass, conf);
-        }
-      }
-      try {
-        if (noValue) {
-          available = reader.next(key);
-        } else {
-          available = reader.next(key, value);
-        }
-        if (!available) {
-          close();
-        }
-        return available;
-      } catch (IOException ioe) {
-        try {
-          close();
-        } catch (IOException ioe2) {
-          throw new IllegalStateException(ioe2);
-        }
-        throw new IllegalStateException(ioe);
+  protected Pair<K,V> computeNext() {
+    if (!reuseKeyValueInstances || value == null) {
+      key = ReflectionUtils.newInstance(keyClass, conf);
+      if (!noValue) {
+        value = ReflectionUtils.newInstance(valueClass, conf);
       }
     }
-    return available;
-  }
-
-  /**
-   * @throws IllegalStateException if path can't be read, or its key or value class can't be instantiated
-   */
-  @Override
-  public Pair<K,V> next() {
-    if (!hasNext()) {
-      throw new NoSuchElementException();
+    try {
+      boolean available;
+      if (noValue) {
+        available = reader.next(key);
+      } else {
+        available = reader.next(key, value);
+      }
+      if (!available) {
+        close();
+        return null;
+      }
+      return new Pair<K,V>(key, value);
+    } catch (IOException ioe) {
+      close();
+      throw new IllegalStateException(ioe);
     }
-    available = false;
-    return new Pair<K,V>(key, value);
-  }
-
-  /**
-   * @throws UnsupportedOperationException
-   */
-  @Override
-  public void remove() {
-    throw new UnsupportedOperationException();
   }
 
 }
\ No newline at end of file

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileValueIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileValueIterator.java?rev=1090865&r1=1090864&r2=1090865&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileValueIterator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileValueIterator.java Sun Apr 10 18:30:05 2011
@@ -20,26 +20,26 @@ package org.apache.mahout.common.iterato
 import java.io.Closeable;
 import java.io.IOException;
 import java.util.Iterator;
-import java.util.NoSuchElementException;
 
+import com.google.common.collect.AbstractIterator;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.util.ReflectionUtils;
+import org.apache.mahout.common.IOUtils;
 
 /**
  * <p>{@link Iterator} over a {@link SequenceFile}'s values only.</p>
  */
-public final class SequenceFileValueIterator<V extends Writable> implements Iterator<V>, Closeable {
+public final class SequenceFileValueIterator<V extends Writable> extends AbstractIterator<V> implements Closeable {
 
   private final SequenceFile.Reader reader;
   private final Configuration conf;
   private final Class<V> valueClass;
   private final Writable key;
   private V value;
-  private boolean available;
   private final boolean reuseKeyValueInstances;
 
   /**
@@ -54,7 +54,6 @@ public final class SequenceFileValueIter
     Class<? extends Writable> keyClass = (Class<? extends Writable>) reader.getKeyClass();
     key = ReflectionUtils.newInstance(keyClass, conf);
     valueClass = (Class<V>) reader.getValueClass();
-    available = false;
     this.reuseKeyValueInstances = reuseKeyValueInstances;
   }
 
@@ -63,54 +62,28 @@ public final class SequenceFileValueIter
   }
 
   @Override
-  public void close() throws IOException {
-    available = false;
+  public void close() {
     value = null;
-    reader.close();
+    IOUtils.quietClose(reader);
+    endOfData();
   }
 
   @Override
-  public boolean hasNext() {
-    if (!available) {
-      if (!reuseKeyValueInstances || value == null) {
-        value = ReflectionUtils.newInstance(valueClass, conf);
-      }
-      try {
-        available = reader.next(key, value);
-        if (!available) {
-          close();
-        }
-        return available;
-      } catch (IOException ioe) {
-        try {
-          close();
-        } catch (IOException ioe2) {
-          throw new IllegalStateException(ioe2);
-        }
-        throw new IllegalStateException(ioe);
-      }
+  protected V computeNext() {
+    if (!reuseKeyValueInstances || value == null) {
+      value = ReflectionUtils.newInstance(valueClass, conf);
     }
-    return available;
-  }
-
-  /**
-   * @throws IllegalStateException if path can't be read, or its key or value class can't be instantiated
-   */
-  @Override
-  public V next() {
-    if (!hasNext()) {
-      throw new NoSuchElementException();
+    try {
+      boolean available = reader.next(key, value);
+      if (!available) {
+        close();
+        return null;
+      }
+      return value;
+    } catch (IOException ioe) {
+      close();
+      throw new IllegalStateException(ioe);
     }
-    available = false;
-    return value;
-  }
-
-  /**
-   * @throws UnsupportedOperationException
-   */
-  @Override
-  public void remove() {
-    throw new UnsupportedOperationException();
   }
 
 }
\ No newline at end of file

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/MultiTransactionTreeIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/MultiTransactionTreeIterator.java?rev=1090865&r1=1090864&r2=1090865&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/MultiTransactionTreeIterator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/MultiTransactionTreeIterator.java Sun Apr 10 18:30:05 2011
@@ -20,55 +20,37 @@ package org.apache.mahout.fpm.pfpgrowth;
 import java.util.Iterator;
 import java.util.List;
 
+import com.google.common.collect.AbstractIterator;
 import org.apache.mahout.common.Pair;
 
 /**
  * Iterates over multiple transaction trees to produce a single iterator of transactions
- * 
  */
-public final class MultiTransactionTreeIterator implements Iterator<List<Integer>> {
-  
-  private Iterator<Pair<List<Integer>,Long>> pIterator;
-  
-  private Pair<List<Integer>,Long> currentPattern;
+public final class MultiTransactionTreeIterator extends AbstractIterator<List<Integer>> {
   
+  private final Iterator<Pair<List<Integer>,Long>> pIterator;
+  private List<Integer> current;
+  private long currentMaxCount;
   private long currentCount;
   
   public MultiTransactionTreeIterator(Iterator<Pair<List<Integer>,Long>> iterator) {
     this.pIterator = iterator;
-    
-    if (pIterator.hasNext()) {
-      currentPattern = pIterator.next();
-      currentCount = 0;
-    } else {
-      pIterator = null;
-    }
-    
   }
-  
-  @Override
-  public boolean hasNext() {
-    return pIterator != null;
-  }
-  
+
   @Override
-  public List<Integer> next() {
-    List<Integer> returnable = currentPattern.getFirst();
-    currentCount++;
-    if (currentCount == currentPattern.getSecond()) {
+  protected List<Integer> computeNext() {
+    if (currentCount >= currentMaxCount) {
       if (pIterator.hasNext()) {
-        currentPattern = pIterator.next();
+        Pair<List<Integer>,Long> nextValue = pIterator.next();
+        current = nextValue.getFirst();
+        currentMaxCount = nextValue.getSecond();
         currentCount = 0;
       } else {
-        pIterator = null;
+        return endOfData();
       }
     }
-    return returnable;
-  }
-  
-  @Override
-  public void remove() {
-    throw new UnsupportedOperationException();
+    currentCount++;
+    return current;
   }
   
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthCombiner.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthCombiner.java?rev=1090865&r1=1090864&r2=1090865&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthCombiner.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthCombiner.java Sun Apr 10 18:30:05 2011
@@ -18,7 +18,6 @@
 package org.apache.mahout.fpm.pfpgrowth;
 
 import java.io.IOException;
-import java.util.Iterator;
 import java.util.List;
 
 import org.apache.hadoop.io.LongWritable;
@@ -29,24 +28,18 @@ import org.apache.mahout.common.Pair;
  *  takes each group of dependent transactions and\ compacts it in a
  * TransactionTree structure
  */
-
-public class ParallelFPGrowthCombiner extends
-    Reducer<LongWritable,TransactionTree,LongWritable,TransactionTree> {
+public class ParallelFPGrowthCombiner extends Reducer<LongWritable,TransactionTree,LongWritable,TransactionTree> {
   
   @Override
   protected void reduce(LongWritable key, Iterable<TransactionTree> values, Context context)
     throws IOException, InterruptedException {
     TransactionTree cTree = new TransactionTree();
     for (TransactionTree tr : values) {
-      Iterator<Pair<List<Integer>,Long>> it = tr.getIterator();
-      while (it.hasNext()) {
-        Pair<List<Integer>,Long> p = it.next();
+      for (Pair<List<Integer>,Long> p : tr) {
         cTree.addPattern(p.getFirst(), p.getSecond());
       }
     }
-    
     context.write(key, cTree.getCompressedTree());
-    
   }
   
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthMapper.java?rev=1090865&r1=1090864&r2=1090865&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthMapper.java Sun Apr 10 18:30:05 2011
@@ -20,7 +20,6 @@ package org.apache.mahout.fpm.pfpgrowth;
 import java.io.IOException;
 import java.util.Collection;
 import java.util.HashSet;
-import java.util.Iterator;
 import java.util.List;
 import java.util.Map.Entry;
 
@@ -41,21 +40,19 @@ public class ParallelFPGrowthMapper exte
   private final OpenIntLongHashMap gListInt = new OpenIntLongHashMap();
   
   @Override
-  protected void map(LongWritable offset, TransactionTree input, Context context) throws IOException,
-                                                                                 InterruptedException {
-    
-    Iterator<Pair<List<Integer>,Long>> it = input.getIterator();
-    while (it.hasNext()) {
-      Pair<List<Integer>,Long> pattern = it.next();
+  protected void map(LongWritable offset, TransactionTree input, Context context)
+    throws IOException, InterruptedException {
+
+    for (Pair<List<Integer>,Long> pattern : input) {
       Integer[] prunedItems = pattern.getFirst().toArray(new Integer[pattern.getFirst().size()]);
-      
+
       Collection<Long> groups = new HashSet<Long>();
       for (int j = prunedItems.length - 1; j >= 0; j--) { // generate group
         // dependent
         // shards
         Integer item = prunedItems[j];
         Long groupID = gListInt.get(item);
-        
+
         if (!groups.contains(groupID)) {
           Integer[] tempItems = new Integer[j + 1];
           System.arraycopy(prunedItems, 0, tempItems, 0, j + 1);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthReducer.java?rev=1090865&r1=1090864&r2=1090865&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthReducer.java Sun Apr 10 18:30:05 2011
@@ -22,7 +22,6 @@ import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashSet;
-import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
@@ -48,7 +47,6 @@ import org.apache.mahout.math.map.OpenOb
  * outputs the the Top K frequent Patterns for each group.
  * 
  */
-
 public class ParallelFPGrowthReducer extends Reducer<LongWritable,TransactionTree,Text,TopKStringPatterns> {
   
   private final List<String> featureReverseMap = new ArrayList<String>();
@@ -64,12 +62,9 @@ public class ParallelFPGrowthReducer ext
   @Override
   protected void reduce(LongWritable key, Iterable<TransactionTree> values, Context context) throws IOException {
     TransactionTree cTree = new TransactionTree();
-    int nodes = 0;
     for (TransactionTree tr : values) {
-      Iterator<Pair<List<Integer>,Long>> it = tr.getIterator();
-      while (it.hasNext()) {
-        Pair<List<Integer>,Long> p = it.next();
-        nodes += cTree.addPattern(p.getFirst(), p.getSecond());
+      for (Pair<List<Integer>,Long> p : tr) {
+        cTree.addPattern(p.getFirst(), p.getSecond());
       }
     }
     
@@ -94,7 +89,7 @@ public class ParallelFPGrowthReducer ext
     
     FPGrowth<Integer> fpGrowth = new FPGrowth<Integer>();
     fpGrowth.generateTopKFrequentPatterns(
-        cTree.getIterator(),
+        cTree.iterator(),
         localFList,
         minSupport,
         maxHeapSize,

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionTree.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionTree.java?rev=1090865&r1=1090864&r2=1090865&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionTree.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionTree.java Sun Apr 10 18:30:05 2011
@@ -28,7 +28,6 @@ import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
-import java.util.Stack;
 
 import org.apache.commons.lang.mutable.MutableLong;
 import org.apache.hadoop.io.VIntWritable;
@@ -44,102 +43,25 @@ import org.slf4j.LoggerFactory;
  * Map/Reduce of {@link PFPGrowth} algorithm by reducing data size passed from the Mapper to the reducer where
  * {@link org.apache.mahout.fpm.pfpgrowth.fpgrowth.FPGrowth} mining is done
  */
-public final class TransactionTree implements Writable {
-  /**
-   * Generates a List of transactions view of Transaction Tree by doing Depth First Traversal on the tree
-   * structure
-   */
-  public final class TransactionTreeIterator implements Iterator<Pair<List<Integer>,Long>> {
-    
-    private final Stack<int[]> depth = new Stack<int[]>();
-    
-    public TransactionTreeIterator() {
-      depth.push(new int[] {0, -1});
-    }
-    
-    @Override
-    public boolean hasNext() {
-      return !depth.isEmpty();
-    }
-    
-    @Override
-    public Pair<List<Integer>,Long> next() {
-      
-      long sum;
-      int childId;
-      do {
-        int[] top = depth.peek();
-        while (top[1] + 1 == childCount[top[0]]) {
-          depth.pop();
-          top = depth.peek();
-        }
-        if (depth.isEmpty()) {
-          return null;
-        }
-        top[1]++;
-        childId = nodeChildren[top[0]][top[1]];
-        depth.push(new int[] {childId, -1});
-        
-        sum = 0;
-        for (int i = childCount[childId] - 1; i >= 0; i--) {
-          sum += nodeCount[nodeChildren[childId][i]];
-        }
-      } while (sum == nodeCount[childId]);
-      
-      List<Integer> data = new ArrayList<Integer>();
-      Iterator<int[]> it = depth.iterator();
-      it.next();
-      while (it.hasNext()) {
-        data.add(attribute[it.next()[0]]);
-      }
-      
-      Pair<List<Integer>,Long> returnable = new Pair<List<Integer>,Long>(data, nodeCount[childId] - sum);
-      
-      int[] top = depth.peek();
-      while (top[1] + 1 == childCount[top[0]]) {
-        depth.pop();
-        if (depth.isEmpty()) {
-          break;
-        }
-        top = depth.peek();
-      }
-      return returnable;
-    }
-    
-    @Override
-    public void remove() {
-      throw new UnsupportedOperationException();
-    }
-    
-  }
-  
+public final class TransactionTree implements Writable, Iterable<Pair<List<Integer>,Long>> {
+
+  private static final Logger log = LoggerFactory.getLogger(TransactionTree.class);
+
   private static final int DEFAULT_CHILDREN_INITIAL_SIZE = 2;
-  
   private static final int DEFAULT_INITIAL_SIZE = 8;
-  
   private static final float GROWTH_RATE = 1.5f;
-  
-  private static final Logger log = LoggerFactory.getLogger(TransactionTree.class);
-  
   private static final int ROOTNODEID = 0;
   
   private int[] attribute;
-  
   private int[] childCount;
-  
   private int[][] nodeChildren;
-  
   private long[] nodeCount;
-  
   private int nodes;
-  
   private boolean representedAsList;
-  
-  private List<Pair<List<Integer>,Long>> transactionSet = new ArrayList<Pair<List<Integer>,Long>>();
+  private List<Pair<List<Integer>,Long>> transactionSet;
   
   public TransactionTree() {
     this(DEFAULT_INITIAL_SIZE);
-    representedAsList = false;
   }
   
   public TransactionTree(int size) {
@@ -156,6 +78,7 @@ public final class TransactionTree imple
   
   public TransactionTree(Integer[] items, Long support) {
     representedAsList = true;
+    transactionSet = new ArrayList<Pair<List<Integer>,Long>>();
     transactionSet.add(new Pair<List<Integer>,Long>(Arrays.asList(items), support));
   }
   
@@ -246,7 +169,7 @@ public final class TransactionTree imple
   
   public Map<Integer,MutableLong> generateFList() {
     Map<Integer,MutableLong> frequencyList = new HashMap<Integer,MutableLong>();
-    Iterator<Pair<List<Integer>,Long>> it = getIterator();
+    Iterator<Pair<List<Integer>,Long>> it = iterator();
     //int items = 0;
     //int count = 0;
     while (it.hasNext()) {
@@ -265,16 +188,14 @@ public final class TransactionTree imple
   
   public TransactionTree getCompressedTree() {
     TransactionTree ctree = new TransactionTree();
-    Iterator<Pair<List<Integer>,Long>> it = getIterator();
+    Iterator<Pair<List<Integer>,Long>> it = iterator();
     final Map<Integer,MutableLong> fList = generateFList();
     int node = 0;
     Comparator<Integer> comparator = new Comparator<Integer>() {
-      
       @Override
       public int compare(Integer o1, Integer o2) {
         return fList.get(o2).compareTo(fList.get(o1));
       }
-      
     };
     int size = 0;
     List<Pair<List<Integer>,Long>> compressedTransactionSet = new ArrayList<Pair<List<Integer>,Long>>();
@@ -296,18 +217,18 @@ public final class TransactionTree imple
     if (node * 4 * 4 + ctree.childCount() * 4 <= size * 4) {
       return ctree;
     } else {
-      ctree = new TransactionTree(compressedTransactionSet);
-      return ctree;
+      return new TransactionTree(compressedTransactionSet);
     }
   }
   
-  public Iterator<Pair<List<Integer>,Long>> getIterator() {
+  @Override
+  public Iterator<Pair<List<Integer>,Long>> iterator() {
     if (this.isTreeEmpty() && !representedAsList) {
       throw new IllegalStateException("This is a bug. Please report this to mahout-user list");
     } else if (representedAsList) {
       return transactionSet.iterator();
     } else {
-      return new TransactionTreeIterator();
+      return new TransactionTreeIterator(this);
     }
   }
   

Added: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionTreeIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionTreeIterator.java?rev=1090865&view=auto
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionTreeIterator.java (added)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionTreeIterator.java Sun Apr 10 18:30:05 2011
@@ -0,0 +1,91 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.fpm.pfpgrowth;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Stack;
+
+import com.google.common.collect.AbstractIterator;
+import org.apache.mahout.common.Pair;
+
+/**
+ * Generates a List of transactions view of Transaction Tree by doing Depth First Traversal on the tree
+ * structure
+ */
+final class TransactionTreeIterator extends AbstractIterator<Pair<List<Integer>,Long>> {
+
+  private final Stack<int[]> depth = new Stack<int[]>();
+  private final TransactionTree transactionTree;
+
+  TransactionTreeIterator(TransactionTree transactionTree) {
+    this.transactionTree = transactionTree;
+    depth.push(new int[] {0, -1});
+  }
+
+  @Override
+  protected Pair<List<Integer>, Long> computeNext() {
+
+    if (depth.isEmpty()) {
+      return endOfData();
+    }
+    
+    long sum;
+    int childId;
+    do {
+      int[] top = depth.peek();
+      while (top[1] + 1 == transactionTree.childCount(top[0])) {
+        depth.pop();
+        top = depth.peek();
+      }
+      if (depth.isEmpty()) {
+        return endOfData();
+      }
+      top[1]++;
+      childId = transactionTree.childAtIndex(top[0], top[1]);
+      depth.push(new int[] {childId, -1});
+      
+      sum = 0;
+      for (int i = transactionTree.childCount(childId) - 1; i >= 0; i--) {
+        sum += transactionTree.count(transactionTree.childAtIndex(childId, i));
+      }
+    } while (sum == transactionTree.count(childId));
+
+    List<Integer> data = new ArrayList<Integer>();
+    Iterator<int[]> it = depth.iterator();
+    it.next();
+    while (it.hasNext()) {
+      data.add(transactionTree.attribute(it.next()[0]));
+    }
+
+    Pair<List<Integer>,Long> returnable = new Pair<List<Integer>,Long>(data, transactionTree.count(childId) - sum);
+
+    int[] top = depth.peek();
+    while (top[1] + 1 == transactionTree.childCount(top[0])) {
+      depth.pop();
+      if (depth.isEmpty()) {
+        break;
+      }
+      top = depth.peek();
+    }
+    return returnable;
+  }
+
+
+}

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/TransactionIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/TransactionIterator.java?rev=1090865&r1=1090864&r2=1090865&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/TransactionIterator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/TransactionIterator.java Sun Apr 10 18:30:05 2011
@@ -21,36 +21,43 @@ import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 
+import com.google.common.base.Function;
+import com.google.common.collect.ForwardingIterator;
+import com.google.common.collect.Iterators;
 import org.apache.mahout.common.Pair;
-import org.apache.mahout.common.iterator.TransformingIterator;
 
 /**
  * Iterates over a Transaction and outputs the transaction integer id mapping and the support of the
  * transaction
  */
-public class TransactionIterator<T> extends TransformingIterator<Pair<List<T>,Long>,Pair<int[],Long>> {
+public class TransactionIterator<T> extends ForwardingIterator<Pair<int[],Long>> {
 
-  private final Map<T,Integer> attributeIdMapping;
   private final int[] transactionBuffer;
+  private final Iterator<Pair<int[],Long>> delegate;
   
-  public TransactionIterator(Iterator<Pair<List<T>,Long>> iterator, Map<T,Integer> attributeIdMapping) {
-    super(iterator);
-    this.attributeIdMapping = attributeIdMapping;
+  public TransactionIterator(Iterator<Pair<List<T>,Long>> transactions, final Map<T,Integer> attributeIdMapping) {
     transactionBuffer = new int[attributeIdMapping.size()];
+    delegate = Iterators.transform(
+        transactions,
+        new Function<Pair<List<T>,Long>, Pair<int[],Long>>() {
+          @Override
+          public Pair<int[],Long> apply(Pair<List<T>,Long> from) {
+            int index = 0;
+            for (T attribute : from.getFirst()) {
+              if (attributeIdMapping.containsKey(attribute)) {
+                transactionBuffer[index++] = attributeIdMapping.get(attribute);
+              }
+            }
+            int[] transactionList = new int[index];
+            System.arraycopy(transactionBuffer, 0, transactionList, 0, index);
+            return new Pair<int[],Long>(transactionList, from.getSecond());
+          }
+        });
   }
 
   @Override
-  protected Pair<int[],Long> transform(Pair<List<T>, Long> in) {
-    int index = 0;
-    for (T attribute : in.getFirst()) {
-      if (attributeIdMapping.containsKey(attribute)) {
-        transactionBuffer[index++] = attributeIdMapping.get(attribute);
-      }
-    }
-    int[] transactionList = new int[index];
-    System.arraycopy(transactionBuffer, 0, transactionList, 0, index);
-    return new Pair<int[],Long>(transactionList, in.getSecond());
+  protected Iterator<Pair<int[],Long>> delegate() {
+    return delegate;
   }
 
-  
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java?rev=1090865&r1=1090864&r2=1090865&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java Sun Apr 10 18:30:05 2011
@@ -17,6 +17,8 @@
 
 package org.apache.mahout.math.hadoop;
 
+import com.google.common.base.Function;
+import com.google.common.collect.Iterators;
 import org.apache.hadoop.conf.Configurable;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
@@ -26,7 +28,6 @@ import org.apache.hadoop.io.WritableComp
 import org.apache.hadoop.mapred.JobClient;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.mahout.common.Pair;
-import org.apache.mahout.common.iterator.TransformingIterator;
 import org.apache.mahout.common.iterator.sequencefile.PathType;
 import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirIterator;
 import org.apache.mahout.math.CardinalityException;
@@ -118,7 +119,15 @@ public class DistributedRowMatrix implem
   @Override
   public Iterator<MatrixSlice> iterateAll() {
     try {
-      return new DistributedMatrixIterator(rowPath, conf);
+      return Iterators.transform(
+          new SequenceFileDirIterator<IntWritable,VectorWritable>(new Path(rowPath, "*"),
+                                                                  PathType.GLOB, null, null, true, conf),
+          new Function<Pair<IntWritable,VectorWritable>,MatrixSlice>() {
+            @Override
+            public MatrixSlice apply(Pair<IntWritable, VectorWritable> from) {
+              return new MatrixSlice(from.getSecond().get(), from.getFirst().get());
+            }
+          });
     } catch (IOException ioe) {
       throw new IllegalStateException(ioe);
     }
@@ -212,21 +221,6 @@ public class DistributedRowMatrix implem
     return iterateAll();
   }
 
-  public static class DistributedMatrixIterator
-    extends TransformingIterator<Pair<IntWritable,VectorWritable>,MatrixSlice> {
-
-    public DistributedMatrixIterator(Path rowPath, Configuration conf) throws IOException {
-      super(new SequenceFileDirIterator<IntWritable,VectorWritable>(
-          new Path(rowPath, "*"), PathType.GLOB, null, null, true, conf));
-    }
-
-    @Override
-    protected MatrixSlice transform(Pair<IntWritable,VectorWritable> in) {
-      return new MatrixSlice(in.getSecond().get(), in.getFirst().get());
-    }
-
-  }
-
   public static class MatrixEntryWritable implements WritableComparable<MatrixEntryWritable> {
     private int row;
     private int col;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/DocumentProcessor.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/DocumentProcessor.java?rev=1090865&r1=1090864&r2=1090865&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/DocumentProcessor.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/DocumentProcessor.java Sun Apr 10 18:30:05 2011
@@ -45,9 +45,7 @@ public final class DocumentProcessor {
   
   public static final String TOKENIZED_DOCUMENT_OUTPUT_FOLDER = "tokenized-documents";
   public static final String ANALYZER_CLASS = "analyzer.class";
-  
-  //public static final Charset CHARSET = Charset.forName("UTF-8");
-  
+
   /**
    * Cannot be initialized. Use the static functions
    */
@@ -69,7 +67,8 @@ public final class DocumentProcessor {
   public static void tokenizeDocuments(Path input,
                                        Class<? extends Analyzer> analyzerClass,
                                        Path output,
-                                       Configuration baseConf) throws IOException, InterruptedException, ClassNotFoundException {
+                                       Configuration baseConf)
+    throws IOException, InterruptedException, ClassNotFoundException {
     Configuration conf = new Configuration(baseConf);
     // this conf parameter needs to be set enable serialisation of conf values
     conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/LuceneTextValueEncoder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/LuceneTextValueEncoder.java?rev=1090865&r1=1090864&r2=1090865&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/LuceneTextValueEncoder.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/LuceneTextValueEncoder.java Sun Apr 10 18:30:05 2011
@@ -25,7 +25,6 @@ import java.io.IOException;
 import java.io.Reader;
 import java.nio.CharBuffer;
 import java.util.Iterator;
-import java.util.NoSuchElementException;
 
 /**
  * Encodes text using a lucene style tokenizer.
@@ -118,66 +117,4 @@ public class LuceneTextValueEncoder exte
     }
   }
 
-  private static final class TokenStreamIterator implements Iterator<String> {
-    private final TokenStream tokenStream;
-    private String bufferedToken;
-
-    private TokenStreamIterator(TokenStream tokenStream) {
-      this.tokenStream = tokenStream;
-    }
-
-    /**
-     * Returns <tt>true</tt> if the iteration has more elements. (In other words, returns <tt>true</tt>
-     * if <tt>next</tt> would return an element rather than throwing an exception.)
-     *
-     * @return <tt>true</tt> if the iterator has more elements.
-     */
-    @Override
-    public boolean hasNext() {
-      if (bufferedToken == null) {
-        boolean r;
-        try {
-          r = tokenStream.incrementToken();
-        } catch (IOException e) {
-          throw new TokenizationException("IO error while tokenizing", e);
-        }
-        if (r) {
-          bufferedToken = tokenStream.getAttribute(TermAttribute.class).term();
-        }
-        return r;
-      } else {
-        return true;
-      }
-    }
-
-    /**
-     * Returns the next element in the iteration.
-     *
-     * @return the next element in the iteration.
-     * @throws NoSuchElementException iteration has no more elements.
-     */
-    @Override
-    public String next() {
-      if (bufferedToken != null) {
-        String r = bufferedToken;
-        bufferedToken = null;
-        return r;
-      } else if (hasNext()) {
-        return next();
-      } else {
-        throw new NoSuchElementException("Ran off end if token stream");
-      }
-    }
-
-    @Override
-    public void remove() {
-      throw new UnsupportedOperationException("Can't remove tokens");
-    }
-  }
-
-  private static final class TokenizationException extends RuntimeException {
-    private TokenizationException(String msg, Throwable cause) {
-      super(msg, cause);
-    }
-  }
 }

Added: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/TokenStreamIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/TokenStreamIterator.java?rev=1090865&view=auto
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/TokenStreamIterator.java (added)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/TokenStreamIterator.java Sun Apr 10 18:30:05 2011
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.vectorizer.encoders;
+
+import java.io.IOException;
+
+import com.google.common.collect.AbstractIterator;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+
+final class TokenStreamIterator extends AbstractIterator<String> {
+
+  private final TokenStream tokenStream;
+
+  TokenStreamIterator(TokenStream tokenStream) {
+    this.tokenStream = tokenStream;
+  }
+
+  @Override
+  protected String computeNext() {
+    try {
+      if (tokenStream.incrementToken()) {
+        return tokenStream.getAttribute(TermAttribute.class).term();
+      } else {
+        return endOfData();
+      }
+    } catch (IOException e) {
+      throw new TokenizationException("IO error while tokenizing", e);
+    }
+  }
+
+}

Added: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/TokenizationException.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/TokenizationException.java?rev=1090865&view=auto
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/TokenizationException.java (added)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/TokenizationException.java Sun Apr 10 18:30:05 2011
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.vectorizer.encoders;
+
+final class TokenizationException extends RuntimeException {
+
+  TokenizationException(String msg, Throwable cause) {
+    super(msg, cause);
+  }
+
+}

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJobTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJobTest.java?rev=1090865&r1=1090864&r2=1090865&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJobTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJobTest.java Sun Apr 10 18:30:05 2011
@@ -26,6 +26,7 @@ import java.nio.charset.Charset;
 import java.util.Arrays;
 import java.util.List;
 
+import com.google.common.base.Charsets;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.DoubleWritable;
@@ -213,7 +214,7 @@ public final class ItemSimilarityJobTest
       }
     })[0];
     BufferedReader reader = new BufferedReader(
-        new InputStreamReader(new FileInputStream(outPart), Charset.forName("UTF-8")));
+        new InputStreamReader(new FileInputStream(outPart), Charsets.UTF_8));
 
     String line;
     int currentLine = 1;
@@ -310,7 +311,7 @@ public final class ItemSimilarityJobTest
       }
     })[0];
     BufferedReader reader = new BufferedReader(
-        new InputStreamReader(new FileInputStream(outPart), Charset.forName("UTF-8")));
+        new InputStreamReader(new FileInputStream(outPart), Charsets.UTF_8));
 
     String line;
     int currentLine = 1;

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/TasteTestCase.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/TasteTestCase.java?rev=1090865&r1=1090864&r2=1090865&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/TasteTestCase.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/TasteTestCase.java Sun Apr 10 18:30:05 2011
@@ -17,6 +17,7 @@
 
 package org.apache.mahout.cf.taste.impl;
 
+import com.google.common.base.Charsets;
 import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
 import org.apache.mahout.common.MahoutTestCase;
 import org.apache.mahout.cf.taste.impl.model.GenericDataModel;
@@ -74,7 +75,7 @@ public abstract class TasteTestCase exte
   }
 
   protected static void writeLines(File file, String... lines) throws FileNotFoundException {
-    PrintWriter writer = new PrintWriter(new OutputStreamWriter(new FileOutputStream(file), Charset.forName("UTF-8")));
+    PrintWriter writer = new PrintWriter(new OutputStreamWriter(new FileOutputStream(file), Charsets.UTF_8));
     try {
       for (String line : lines) {
         writer.println(line);

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/classifier/bayes/BayesClassifierSelfTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/bayes/BayesClassifierSelfTest.java?rev=1090865&r1=1090864&r2=1090865&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/classifier/bayes/BayesClassifierSelfTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/classifier/bayes/BayesClassifierSelfTest.java Sun Apr 10 18:30:05 2011
@@ -24,6 +24,7 @@ import java.io.OutputStreamWriter;
 import java.nio.charset.Charset;
 import java.util.List;
 
+import com.google.common.base.Charsets;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -52,7 +53,7 @@ public final class BayesClassifierSelfTe
 
     File tempInputFile = getTestTempFile("bayesinput");
     BufferedWriter writer = new BufferedWriter(
-        new OutputStreamWriter(new FileOutputStream(tempInputFile), Charset.forName("UTF-8")));
+        new OutputStreamWriter(new FileOutputStream(tempInputFile), Charsets.UTF_8));
     for (String[] entry : ClassifierData.DATA) {
       writer.write(entry[0] + '\t' + entry[1] + '\n');
     }

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/classifier/bayes/BayesFileFormatterTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/bayes/BayesFileFormatterTest.java?rev=1090865&r1=1090864&r2=1090865&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/classifier/bayes/BayesFileFormatterTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/classifier/bayes/BayesFileFormatterTest.java Sun Apr 10 18:30:05 2011
@@ -21,9 +21,9 @@ import java.io.File;
 import java.io.FileOutputStream;
 import java.io.OutputStreamWriter;
 import java.io.Writer;
-import java.nio.charset.Charset;
 import java.util.Iterator;
 
+import com.google.common.base.Charsets;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.WhitespaceAnalyzer;
 import org.apache.mahout.classifier.BayesFileFormatter;
@@ -48,7 +48,7 @@ public final class BayesFileFormatterTes
     out = getTestTempDir("bayes/out");
     for (String word : WORDS) {
       File file = new File(input, word);
-      Writer writer = new OutputStreamWriter(new FileOutputStream(file), Charset.forName("UTF-8"));
+      Writer writer = new OutputStreamWriter(new FileOutputStream(file), Charsets.UTF_8);
       writer.write(word);
       writer.close();
     }
@@ -59,8 +59,7 @@ public final class BayesFileFormatterTes
     Analyzer analyzer = new WhitespaceAnalyzer();
     File[] files = out.listFiles();
     assertEquals("files Size: " + files.length + " is not: " + 0, 0, files.length);
-    Charset charset = Charset.forName("UTF-8");
-    BayesFileFormatter.format("animal", analyzer, input, charset, out);
+    BayesFileFormatter.format("animal", analyzer, input, Charsets.UTF_8, out);
 
     files = out.listFiles();
     assertEquals("files Size: " + files.length + " is not: " + WORDS.length, files.length, WORDS.length);
@@ -79,8 +78,7 @@ public final class BayesFileFormatterTes
     Analyzer analyzer = new WhitespaceAnalyzer();
     File[] files = out.listFiles();
     assertEquals("files Size: " + files.length + " is not: " + 0, 0, files.length);
-    Charset charset = Charset.forName("UTF-8");
-    BayesFileFormatter.collapse("animal", analyzer, input, charset, new File(out, "animal"));
+    BayesFileFormatter.collapse("animal", analyzer, input, Charsets.UTF_8, new File(out, "animal"));
     files = out.listFiles();
     assertEquals("files Size: " + files.length + " is not: " + 1, 1, files.length);
     int count = 0;

Copied: mahout/trunk/core/src/test/java/org/apache/mahout/common/iterator/CountingIteratorTest.java (from r1090549, mahout/trunk/core/src/test/java/org/apache/mahout/common/iterator/SamplerCase.java)
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/common/iterator/CountingIteratorTest.java?p2=mahout/trunk/core/src/test/java/org/apache/mahout/common/iterator/CountingIteratorTest.java&p1=mahout/trunk/core/src/test/java/org/apache/mahout/common/iterator/SamplerCase.java&r1=1090549&r2=1090865&rev=1090865&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/common/iterator/SamplerCase.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/common/iterator/CountingIteratorTest.java Sun Apr 10 18:30:05 2011
@@ -17,85 +17,28 @@
 
 package org.apache.mahout.common.iterator;
 
-import java.util.Collections;
 import java.util.Iterator;
-import java.util.Arrays;
-import java.util.List;
 
 import org.apache.mahout.common.MahoutTestCase;
 import org.junit.Test;
 
-public abstract class SamplerCase extends MahoutTestCase {
-  // these provide access to the underlying implementation
-  protected abstract Iterator<Integer> createSampler(int n, Iterator<Integer> source);
-
-  protected abstract boolean isSorted();
+public final class CountingIteratorTest extends MahoutTestCase {
 
   @Test
   public void testEmptyCase() {
-    assertFalse(createSampler(100, Integers.iterator(0)).hasNext());
+    assertFalse(new CountingIterator(0).hasNext());
   }
 
   @Test
-  public void testSmallInput() {
-    Iterator<Integer> t = createSampler(10, Integers.iterator(1));
-    assertTrue(t.hasNext());
-    assertEquals(0, t.next().intValue());
-    assertFalse(t.hasNext());
-
-    t = createSampler(10, Integers.iterator(1));
-    assertTrue(t.hasNext());
-    assertEquals(0, t.next().intValue());
-    assertFalse(t.hasNext());
+  public void testCount() {
+    Iterator<Integer> it = new CountingIterator(3);
+    assertTrue(it.hasNext());
+    assertEquals(0, (int) it.next());
+    assertTrue(it.hasNext());
+    assertEquals(1, (int) it.next());
+    assertTrue(it.hasNext());
+    assertEquals(2, (int) it.next());
+    assertFalse(it.hasNext());
   }
 
-  @Test
-  public void testAbsurdSize() {
-    Iterator<Integer> t = createSampler(0, Integers.iterator(2));
-    assertFalse(t.hasNext());
-  }
-
-  @Test
-  public void testExactSizeMatch() {
-    Iterator<Integer> t = createSampler(10, Integers.iterator(10));
-    for (int i = 0; i < 10; i++) {
-      assertTrue(t.hasNext());
-      assertEquals(i, t.next().intValue());
-    }
-    assertFalse(t.hasNext());
-  }
-
-  @Test
-  public void testSample() {
-    Iterator<Integer> source = Integers.iterator(100);
-    Iterator<Integer> t = createSampler(15, source);
-
-    // this is just a regression test, not a real test
-    List<Integer> expectedValues = Arrays.asList(16, 23, 2, 3, 32, 85, 6, 53, 8, 75, 15, 81, 12, 59, 14);
-    if (isSorted()) {
-      Collections.sort(expectedValues);
-    }
-    Iterator<Integer> expected = expectedValues.iterator();
-    int last = Integer.MIN_VALUE;
-    for (int i = 0; i < 15; i++) {
-      assertTrue(t.hasNext());
-      int actual = t.next();
-      if (isSorted()) {
-        assertTrue(actual >= last);
-        last = actual;
-      } else {
-        // any of the first few values should be in the original places
-        if (actual < 15) {
-          assertEquals(i, actual);
-        }
-      }
-
-      assertTrue(actual >= 0 && actual < 100);
-
-      // this is just a regression test, but still of some value
-      assertEquals(expected.next().intValue(), actual);
-      assertFalse(source.hasNext());
-    }
-    assertFalse(t.hasNext());
-  }
 }

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/common/iterator/SamplerCase.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/common/iterator/SamplerCase.java?rev=1090865&r1=1090864&r2=1090865&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/common/iterator/SamplerCase.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/common/iterator/SamplerCase.java Sun Apr 10 18:30:05 2011
@@ -33,17 +33,17 @@ public abstract class SamplerCase extend
 
   @Test
   public void testEmptyCase() {
-    assertFalse(createSampler(100, Integers.iterator(0)).hasNext());
+    assertFalse(createSampler(100, new CountingIterator(0)).hasNext());
   }
 
   @Test
   public void testSmallInput() {
-    Iterator<Integer> t = createSampler(10, Integers.iterator(1));
+    Iterator<Integer> t = createSampler(10, new CountingIterator(1));
     assertTrue(t.hasNext());
     assertEquals(0, t.next().intValue());
     assertFalse(t.hasNext());
 
-    t = createSampler(10, Integers.iterator(1));
+    t = createSampler(10, new CountingIterator(1));
     assertTrue(t.hasNext());
     assertEquals(0, t.next().intValue());
     assertFalse(t.hasNext());
@@ -51,13 +51,13 @@ public abstract class SamplerCase extend
 
   @Test
   public void testAbsurdSize() {
-    Iterator<Integer> t = createSampler(0, Integers.iterator(2));
+    Iterator<Integer> t = createSampler(0, new CountingIterator(2));
     assertFalse(t.hasNext());
   }
 
   @Test
   public void testExactSizeMatch() {
-    Iterator<Integer> t = createSampler(10, Integers.iterator(10));
+    Iterator<Integer> t = createSampler(10, new CountingIterator(10));
     for (int i = 0; i < 10; i++) {
       assertTrue(t.hasNext());
       assertEquals(i, t.next().intValue());
@@ -67,11 +67,11 @@ public abstract class SamplerCase extend
 
   @Test
   public void testSample() {
-    Iterator<Integer> source = Integers.iterator(100);
+    Iterator<Integer> source = new CountingIterator(100);
     Iterator<Integer> t = createSampler(15, source);
 
     // this is just a regression test, not a real test
-    List<Integer> expectedValues = Arrays.asList(16, 23, 2, 3, 32, 85, 6, 53, 8, 75, 15, 81, 12, 59, 14);
+    List<Integer> expectedValues = Arrays.asList(83, 56, 69, 96, 4, 59, 70, 7, 93, 52, 39, 11, 16, 67, 26);
     if (isSorted()) {
       Collections.sort(expectedValues);
     }

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/common/iterator/TestSamplingIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/common/iterator/TestSamplingIterator.java?rev=1090865&r1=1090864&r2=1090865&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/common/iterator/TestSamplingIterator.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/common/iterator/TestSamplingIterator.java Sun Apr 10 18:30:05 2011
@@ -26,14 +26,14 @@ public final class TestSamplingIterator 
 
   @Test
   public void testEmptyCase() {
-    assertFalse(new SamplingIterator<Integer>(Integers.iterator(0), 0.9999).hasNext());
-    assertFalse(new SamplingIterator<Integer>(Integers.iterator(0), 1).hasNext());
-    assertFalse(new SamplingIterator<Integer>(Integers.iterator(0), 2).hasNext());
+    assertFalse(new SamplingIterator<Integer>(new CountingIterator(0), 0.9999).hasNext());
+    assertFalse(new SamplingIterator<Integer>(new CountingIterator(0), 1).hasNext());
+    assertFalse(new SamplingIterator<Integer>(new CountingIterator(0), 2).hasNext());
   }
 
   @Test
   public void testSmallInput() {
-    Iterator<Integer> t = new SamplingIterator<Integer>(Integers.iterator(1), 0.9999);
+    Iterator<Integer> t = new SamplingIterator<Integer>(new CountingIterator(1), 0.9999);
     assertTrue(t.hasNext());
     assertEquals(0, t.next().intValue());
     assertFalse(t.hasNext());
@@ -41,13 +41,13 @@ public final class TestSamplingIterator 
 
   @Test
   public void testAbsurdSampleRate() {
-    Iterator<Integer> t = new SamplingIterator<Integer>(Integers.iterator(2), 0);
+    Iterator<Integer> t = new SamplingIterator<Integer>(new CountingIterator(2), 0);
     assertFalse(t.hasNext());
   }
 
   @Test
   public void testExactSizeMatch() {
-    Iterator<Integer> t = new SamplingIterator<Integer>(Integers.iterator(10), 1);
+    Iterator<Integer> t = new SamplingIterator<Integer>(new CountingIterator(10), 1);
     for (int i = 0; i < 10; i++) {
       assertTrue(t.hasNext());
       assertEquals(i, t.next().intValue());
@@ -58,7 +58,7 @@ public final class TestSamplingIterator 
   @Test
   public void testSample() {
     for (int i = 0; i < 100; i++) {
-      Iterator<Integer> t = new SamplingIterator<Integer>(Integers.iterator(1000), 0.1);
+      Iterator<Integer> t = new SamplingIterator<Integer>(new CountingIterator(1000), 0.1);
       int k = 0;
       while (t.hasNext()) {
         int v = t.next();

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/df/data/Utils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/df/data/Utils.java?rev=1090865&r1=1090864&r2=1090865&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/df/data/Utils.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/df/data/Utils.java Sun Apr 10 18:30:05 2011
@@ -25,6 +25,7 @@ import java.nio.charset.Charset;
 import java.util.Arrays;
 import java.util.Random;
 
+import com.google.common.base.Charsets;
 import org.apache.commons.lang.ArrayUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataOutputStream;
@@ -247,7 +248,7 @@ public final class Utils {
 
   private static void writeDataToFile(String[] sData, Path path) throws IOException {
     BufferedWriter output = new BufferedWriter(new OutputStreamWriter(
-        new FileOutputStream(path.toString()), Charset.forName("UTF-8")));
+        new FileOutputStream(path.toString()), Charsets.UTF_8));
     try {
       for (String line : sData) {
         output.write(line);

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthRetailDataTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthRetailDataTest.java?rev=1090865&r1=1090864&r2=1090865&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthRetailDataTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthRetailDataTest.java Sun Apr 10 18:30:05 2011
@@ -21,7 +21,6 @@ import java.io.File;
 import java.io.FileOutputStream;
 import java.io.OutputStreamWriter;
 import java.io.Writer;
-import java.nio.charset.Charset;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
@@ -32,6 +31,7 @@ import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Set;
 
+import com.google.common.base.Charsets;
 import org.apache.mahout.common.MahoutTestCase;
 import org.apache.mahout.common.Pair;
 import org.apache.mahout.common.Parameters;
@@ -62,7 +62,7 @@ public class PFPGrowthRetailDataTest ext
     File input = new File(inputDir, "test.txt");
     params.set(PFPGrowth.INPUT, input.getAbsolutePath());
     params.set(PFPGrowth.OUTPUT, outputDir.getAbsolutePath());
-    Writer writer = new OutputStreamWriter(new FileOutputStream(input), Charset.forName("UTF-8"));
+    Writer writer = new OutputStreamWriter(new FileOutputStream(input), Charsets.UTF_8);
     try {
       StringRecordIterator it = new StringRecordIterator(new FileLineIterable(Resources.getResource(
         "retail.dat").openStream()), "\\s+");

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthTest.java?rev=1090865&r1=1090864&r2=1090865&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthTest.java Sun Apr 10 18:30:05 2011
@@ -28,6 +28,7 @@ import java.util.Collection;
 import java.util.List;
 import java.util.Map;
 
+import com.google.common.base.Charsets;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.mahout.common.MahoutTestCase;
 import org.apache.mahout.common.Pair;
@@ -56,7 +57,7 @@ public final class PFPGrowthTest extends
     File input = new File(inputDir, "test.txt");
     params.set(PFPGrowth.INPUT, input.getAbsolutePath());
     params.set(PFPGrowth.OUTPUT, outputDir.getAbsolutePath());
-    Writer writer = new OutputStreamWriter(new FileOutputStream(input), Charset.forName("UTF-8"));
+    Writer writer = new OutputStreamWriter(new FileOutputStream(input), Charsets.UTF_8);
     try {
       Collection<List<String>> transactions = new ArrayList<List<String>>();
       transactions.add(Arrays.asList("E", "A", "D", "B"));

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/TransactionTreeTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/TransactionTreeTest.java?rev=1090865&r1=1090864&r2=1090865&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/TransactionTreeTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/TransactionTreeTest.java Sun Apr 10 18:30:05 2011
@@ -64,6 +64,7 @@ public final class TransactionTreeTest e
 
   @Test
   public void testTransactionTree() {
+    
     TransactionTree tree = new TransactionTree();
     int nodes = 0;
     int total = 0;
@@ -74,51 +75,46 @@ public final class TransactionTreeTest e
     }
 
     log.info("Input integers: {}", total);
-    log.info("Input data Size: P{", total * SIZE_INT / (double) MEGABYTE);
+    log.info("Input data Size: {}", total * SIZE_INT / (double) MEGABYTE);
     log.info("Nodes in Tree: {}", nodes);
     log.info("Size of Tree: {}", (nodes * SIZE_INT * NUM_OF_FPTREE_FIELDS + tree.childCount() * SIZE_INT)
         / (double) MEGABYTE);
+
     TransactionTree vtree = new TransactionTree();
-    Iterator<Pair<List<Integer>, Long>> it = tree.getIterator();
     StringBuilder sb = new StringBuilder();
     int count = 0;
     int items = 0;
+    Iterator<Pair<List<Integer>,Long>> it = tree.iterator();
     while (it.hasNext()) {
-      Pair<List<Integer>, Long> p = it.next();
+      Pair<List<Integer>,Long> p = it.next();
       vtree.addPattern(p.getFirst(), p.getSecond());
       items += p.getFirst().size();
       count++;
-      String s = p.toString();
-      sb.append(s);
-
+      sb.append(p);
     }
+
     log.info("Number of transaction integers: {}", items);
     log.info("Size of Transactions: {}", (items * SIZE_INT + count * SIZE_LONG) / (double) MEGABYTE);
     log.info("Number of Transactions: {}", count);
+
     tree.getCompressedTree();
-    it = vtree.getIterator();
+    it = vtree.iterator();
     StringBuilder sb1 = new StringBuilder();
     while (it.hasNext()) {
-      Pair<List<Integer>, Long> p = it.next();
-      String s = p.toString();
-      sb1.append(s);
-
+      sb1.append(it.next());
     }
     assertEquals(sb.toString(), sb1.toString());
 
     TransactionTree mtree = new TransactionTree();
-    MultiTransactionTreeIterator mt = new MultiTransactionTreeIterator(vtree.getIterator());
+    MultiTransactionTreeIterator mt = new MultiTransactionTreeIterator(vtree.iterator());
     while (mt.hasNext()) {
       mtree.addPattern(mt.next(), 1);
     }
 
-    it = mtree.getIterator();
+    it = mtree.iterator();
     StringBuilder sb2 = new StringBuilder();
     while (it.hasNext()) {
-      Pair<List<Integer>, Long> p = it.next();
-      String s = p.toString();
-      sb2.append(s);
-
+      sb2.append(it.next());
     }
     assertEquals(sb.toString(), sb2.toString());
   }

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingDataModel.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingDataModel.java?rev=1090865&r1=1090864&r2=1090865&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingDataModel.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/bookcrossing/BookCrossingDataModel.java Sun Apr 10 18:30:05 2011
@@ -23,9 +23,9 @@ import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.OutputStreamWriter;
 import java.io.PrintWriter;
-import java.nio.charset.Charset;
 import java.util.regex.Pattern;
 
+import com.google.common.base.Charsets;
 import org.apache.mahout.cf.taste.example.grouplens.GroupLensDataModel;
 import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
 import org.apache.mahout.common.iterator.FileLineIterable;
@@ -61,7 +61,7 @@ public final class BookCrossingDataModel
     resultFile.delete();
     PrintWriter writer = null;
     try {
-      writer = new PrintWriter(new OutputStreamWriter(new FileOutputStream(resultFile), Charset.forName("UTF-8")));
+      writer = new PrintWriter(new OutputStreamWriter(new FileOutputStream(resultFile), Charsets.UTF_8));
       for (String line : new FileLineIterable(originalFile, true)) {
         // 0 ratings are basically "no rating", ignore them (thanks h.9000)
         if (line.endsWith("\"0\"")) {

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensDataModel.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensDataModel.java?rev=1090865&r1=1090864&r2=1090865&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensDataModel.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/grouplens/GroupLensDataModel.java Sun Apr 10 18:30:05 2011
@@ -24,9 +24,9 @@ import java.io.InputStream;
 import java.io.OutputStream;
 import java.io.OutputStreamWriter;
 import java.io.PrintWriter;
-import java.nio.charset.Charset;
 import java.util.regex.Pattern;
 
+import com.google.common.base.Charsets;
 import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
 import org.apache.mahout.common.iterator.FileLineIterable;
 import org.apache.mahout.common.IOUtils;
@@ -56,7 +56,7 @@ public final class GroupLensDataModel ex
     }
     PrintWriter writer = null;
     try {
-      writer = new PrintWriter(new OutputStreamWriter(new FileOutputStream(resultFile), Charset.forName("UTF-8")));
+      writer = new PrintWriter(new OutputStreamWriter(new FileOutputStream(resultFile), Charsets.UTF_8));
       for (String line : new FileLineIterable(originalFile, false)) {
         int lastDelimiterStart = line.lastIndexOf(COLON_DELIMTER);
         if (lastDelimiterStart < 0) {

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/DataFileIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/DataFileIterator.java?rev=1090865&r1=1090864&r2=1090865&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/DataFileIterator.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/DataFileIterator.java Sun Apr 10 18:30:05 2011
@@ -20,9 +20,9 @@ package org.apache.mahout.cf.taste.examp
 import java.io.Closeable;
 import java.io.File;
 import java.io.IOException;
-import java.util.NoSuchElementException;
 import java.util.regex.Pattern;
 
+import com.google.common.collect.AbstractIterator;
 import org.apache.mahout.cf.taste.impl.common.SkippingIterator;
 import org.apache.mahout.cf.taste.impl.model.GenericUserPreferenceArray;
 import org.apache.mahout.cf.taste.model.PreferenceArray;
@@ -38,7 +38,9 @@ import org.apache.mahout.common.Pair;
  * <p>Timestamps in the data set are relative to some unknown point in time, for anonymity. They are assumed
  * to be relative to the epoch, time 0, or January 1 1970, for purposes here.</p>
  */
-public final class DataFileIterator implements SkippingIterator<Pair<PreferenceArray,long[]>>, Closeable {
+public final class DataFileIterator
+    extends AbstractIterator<Pair<PreferenceArray,long[]>>
+    implements SkippingIterator<Pair<PreferenceArray,long[]>>, Closeable {
 
   private static final Pattern COLON_PATTERN = Pattern.compile(":");
   private static final Pattern PIPE_PATTERN = Pattern.compile("\\|");
@@ -54,15 +56,10 @@ public final class DataFileIterator impl
   }
 
   @Override
-  public boolean hasNext() {
-    return lineIterator.hasNext();
-  }
-
-  @Override
-  public Pair<PreferenceArray,long[]> next() {
+  protected Pair<PreferenceArray, long[]> computeNext() {
 
-    if (!hasNext()) {
-      throw new NoSuchElementException();
+    if (!lineIterator.hasNext()) {
+      return endOfData();
     }
 
     String line = lineIterator.next();
@@ -113,14 +110,6 @@ public final class DataFileIterator impl
     return new Pair<PreferenceArray,long[]>(currentUserPrefs, timestamps);
   }
 
-  /**
-   * @throws UnsupportedOperationException
-   */
-  @Override
-  public void remove() {
-    throw new UnsupportedOperationException();
-  }
-
   @Override
   public void skip(int n) {
     for (int i = 0; i < n; i++) {
@@ -138,6 +127,7 @@ public final class DataFileIterator impl
 
   @Override
   public void close() {
+    endOfData();
     lineIterator.close();
   }
 

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/ToCSV.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/ToCSV.java?rev=1090865&r1=1090864&r2=1090865&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/ToCSV.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/ToCSV.java Sun Apr 10 18:30:05 2011
@@ -23,10 +23,10 @@ import java.io.FileOutputStream;
 import java.io.OutputStream;
 import java.io.OutputStreamWriter;
 import java.io.Writer;
-import java.nio.charset.Charset;
 import java.util.zip.GZIPOutputStream;
 
 
+import com.google.common.base.Charsets;
 import org.apache.mahout.cf.taste.model.PreferenceArray;
 import org.apache.mahout.common.Pair;
 
@@ -52,7 +52,7 @@ public final class ToCSV {
     }
 
     OutputStream outStream = new GZIPOutputStream(new FileOutputStream(outputFile));
-    Writer outWriter = new BufferedWriter(new OutputStreamWriter(outStream, Charset.forName("UTF-8")));
+    Writer outWriter = new BufferedWriter(new OutputStreamWriter(outStream, Charsets.UTF_8));
 
     for (Pair<PreferenceArray,long[]> user : new DataFileIterable(inputFile)) {
       PreferenceArray prefs = user.getFirst();

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/svd/KDDCupFactorizablePreferences.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/svd/KDDCupFactorizablePreferences.java?rev=1090865&r1=1090864&r2=1090865&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/svd/KDDCupFactorizablePreferences.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/svd/KDDCupFactorizablePreferences.java Sun Apr 10 18:30:05 2011
@@ -17,13 +17,16 @@
 
 package org.apache.mahout.cf.taste.example.kddcup.track1.svd;
 
-import org.apache.mahout.cf.taste.example.kddcup.DataFileIterator;
+import com.google.common.base.Function;
+import com.google.common.collect.Iterables;
+import org.apache.mahout.cf.taste.example.kddcup.DataFileIterable;
+import org.apache.mahout.cf.taste.impl.common.AbstractLongPrimitiveIterator;
 import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
 import org.apache.mahout.cf.taste.model.Preference;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
+import org.apache.mahout.common.Pair;
 
 import java.io.File;
-import java.io.IOException;
-import java.util.Iterator;
 
 public class KDDCupFactorizablePreferences implements FactorizablePreferences {
 
@@ -45,16 +48,15 @@ public class KDDCupFactorizablePreferenc
 
   @Override
   public Iterable<Preference> getPreferences() {
-    return new Iterable<Preference>() {
-      @Override
-      public Iterator<Preference> iterator() {
-        try {
-          return new DataFilePreferencesIterator(new DataFileIterator(dataFile));
-        } catch (IOException e) {
-          throw new IllegalStateException("Cannot iterate over datafile!", e);
-        }
-      }
-    };
+    Iterable<Iterable<Preference>> prefIterators =
+        Iterables.transform(new DataFileIterable(dataFile),
+                            new Function<Pair<PreferenceArray,long[]>,Iterable<Preference>>() {
+                              @Override
+                              public Iterable<Preference> apply(Pair<PreferenceArray,long[]> from) {
+                                return from.getFirst();
+                              }
+                            });
+    return Iterables.concat(prefIterators);
   }
 
   @Override
@@ -82,45 +84,12 @@ public class KDDCupFactorizablePreferenc
     return 252800275;
   }
 
-  static class DataFilePreferencesIterator implements Iterator<Preference> {
-
-    private final DataFileIterator dataFileIterator;
-
-    Iterator<Preference> currentUserPrefsIterator;
-
-    public DataFilePreferencesIterator(DataFileIterator dataFileIterator) {
-      this.dataFileIterator = dataFileIterator;
-    }
-
-    @Override
-    public boolean hasNext() {
-      if (currentUserPrefsIterator != null && currentUserPrefsIterator.hasNext()) {
-        return true;
-      } else {
-        return dataFileIterator.hasNext();
-      }
-    }
-
-    @Override
-    public Preference next() {
-      if (currentUserPrefsIterator == null || !currentUserPrefsIterator.hasNext()) {
-        currentUserPrefsIterator = dataFileIterator.next().getFirst().iterator();
-      }
-      return currentUserPrefsIterator.next();
-    }
-
-    @Override
-    public void remove() {
-      throw new UnsupportedOperationException();
-    }
-  }
-
-  static class FixedSizeLongIterator implements LongPrimitiveIterator {
+  static class FixedSizeLongIterator extends AbstractLongPrimitiveIterator {
 
     private long currentValue;
     private final long maximum;
 
-    public FixedSizeLongIterator(long maximum) {
+    FixedSizeLongIterator(long maximum) {
       this.maximum = maximum;
       currentValue = 0;
     }
@@ -146,11 +115,6 @@ public class KDDCupFactorizablePreferenc
     }
 
     @Override
-    public Long next() {
-      return ++currentValue;
-    }
-
-    @Override
     public void remove() {
       throw new UnsupportedOperationException();
     }

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/netflix/TransposeToByUser.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/netflix/TransposeToByUser.java?rev=1090865&r1=1090864&r2=1090865&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/netflix/TransposeToByUser.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/netflix/TransposeToByUser.java Sun Apr 10 18:30:05 2011
@@ -22,12 +22,12 @@ import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.OutputStreamWriter;
 import java.io.PrintWriter;
-import java.nio.charset.Charset;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 
+import com.google.common.base.Charsets;
 import com.google.common.base.Preconditions;
 import org.apache.commons.cli2.OptionException;
 import org.apache.mahout.cf.taste.example.TasteOptionParser;
@@ -102,7 +102,7 @@ public final class TransposeToByUser {
   
   private static void appendStringsToFile(Iterable<String> strings, File file) throws IOException {
     PrintWriter outputStreamWriter =
-      new PrintWriter(new OutputStreamWriter(new FileOutputStream(file, true), Charset.forName("UTF-8")));
+      new PrintWriter(new OutputStreamWriter(new FileOutputStream(file, true), Charsets.UTF_8));
     try {
       for (String s : strings) {
         outputStreamWriter.println(s);

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/SplitBayesInput.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/SplitBayesInput.java?rev=1090865&r1=1090864&r2=1090865&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/SplitBayesInput.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/SplitBayesInput.java Sun Apr 10 18:30:05 2011
@@ -25,6 +25,7 @@ import java.io.Writer;
 import java.nio.charset.Charset;
 import java.util.BitSet;
 
+import com.google.common.base.Charsets;
 import com.google.common.base.Preconditions;
 import org.apache.commons.cli2.CommandLine;
 import org.apache.commons.cli2.Group;
@@ -116,7 +117,7 @@ public class SplitBayesInput {
   private int splitLocation = 100;
   private int testRandomSelectionSize = -1;
   private int testRandomSelectionPct = -1;
-  private Charset charset = Charset.forName("UTF-8");
+  private Charset charset = Charsets.UTF_8;
 
   private final FileSystem fs;
   private Path inputDirectory;

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/XmlInputFormat.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/XmlInputFormat.java?rev=1090865&r1=1090864&r2=1090865&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/XmlInputFormat.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/XmlInputFormat.java Sun Apr 10 18:30:05 2011
@@ -18,8 +18,8 @@
 package org.apache.mahout.classifier.bayes;
 
 import java.io.IOException;
-import java.nio.charset.Charset;
 
+import com.google.common.base.Charsets;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FileSystem;
@@ -44,7 +44,6 @@ public class XmlInputFormat extends Text
 
   public static final String START_TAG_KEY = "xmlinput.start";
   public static final String END_TAG_KEY = "xmlinput.end";
-  private static final Charset UTF8 = Charset.forName("UTF-8");
 
   @Override
   public RecordReader<LongWritable, Text> createRecordReader(InputSplit split, TaskAttemptContext context) {
@@ -73,8 +72,8 @@ public class XmlInputFormat extends Text
     private Text currentValue;
 
     public XmlRecordReader(FileSplit split, Configuration conf) throws IOException {
-      startTag = conf.get(START_TAG_KEY).getBytes(UTF8);
-      endTag = conf.get(END_TAG_KEY).getBytes(UTF8);
+      startTag = conf.get(START_TAG_KEY).getBytes(Charsets.UTF_8);
+      endTag = conf.get(END_TAG_KEY).getBytes(Charsets.UTF_8);
 
       // open the file and seek to the start of the split
       start = split.getStart();

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/PosTagger.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/PosTagger.java?rev=1090865&r1=1090864&r2=1090865&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/PosTagger.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/PosTagger.java Sun Apr 10 18:30:05 2011
@@ -22,7 +22,6 @@ import java.io.IOException;
 import java.io.InputStreamReader;
 import java.net.URL;
 import java.net.URLConnection;
-import java.nio.charset.Charset;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.LinkedList;
@@ -30,6 +29,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.regex.Pattern;
 
+import com.google.common.base.Charsets;
 import org.apache.mahout.common.IOUtils;
 import org.apache.mahout.math.Matrix;
 import org.slf4j.Logger;
@@ -126,7 +126,7 @@ public final class PosTagger {
     List<Integer> hiddenSequence = new LinkedList<Integer>();
 
     BufferedReader input =
-        new BufferedReader(new InputStreamReader(connection.getInputStream(), Charset.forName("UTF-8")));
+        new BufferedReader(new InputStreamReader(connection.getInputStream(), Charsets.UTF_8));
     try {
       String line;
       while ((line = input.readLine()) != null) {

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SimpleCsvExamples.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SimpleCsvExamples.java?rev=1090865&r1=1090864&r2=1090865&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SimpleCsvExamples.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SimpleCsvExamples.java Sun Apr 10 18:30:05 2011
@@ -17,6 +17,7 @@
 
 package org.apache.mahout.classifier.sgd;
 
+import com.google.common.base.Charsets;
 import com.google.common.base.Joiner;
 import com.google.common.base.Splitter;
 import com.google.common.collect.Lists;
@@ -38,7 +39,6 @@ import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.PrintWriter;
 import java.nio.ByteBuffer;
-import java.nio.charset.Charset;
 import java.util.List;
 import java.util.Random;
 
@@ -91,7 +91,7 @@ public final class SimpleCsvExamples {
       }
     } else if ("--parse".equals(args[0])) {
       BufferedReader in = new BufferedReader(
-          new InputStreamReader(new FileInputStream(new File(args[1])), Charset.forName("UTF-8")));
+          new InputStreamReader(new FileInputStream(new File(args[1])), Charsets.UTF_8));
       try {
         String line = in.readLine();
         while (line != null) {

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainLogistic.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainLogistic.java?rev=1090865&r1=1090864&r2=1090865&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainLogistic.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainLogistic.java Sun Apr 10 18:30:05 2011
@@ -17,6 +17,7 @@
 
 package org.apache.mahout.classifier.sgd;
 
+import com.google.common.base.Charsets;
 import com.google.common.collect.Lists;
 import com.google.common.io.Resources;
 import org.apache.commons.cli2.CommandLine;
@@ -39,7 +40,6 @@ import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.OutputStream;
 import java.io.PrintStream;
-import java.nio.charset.Charset;
 import java.util.List;
 import java.util.Locale;
 
@@ -308,6 +308,6 @@ public final class TrainLogistic {
     } catch (IllegalArgumentException e) {
       in = new FileInputStream(new File(inputFile));
     }
-    return new BufferedReader(new InputStreamReader(in, Charset.forName("UTF-8")));
+    return new BufferedReader(new InputStreamReader(in, Charsets.UTF_8));
   }
 }

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainNewsGroups.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainNewsGroups.java?rev=1090865&r1=1090864&r2=1090865&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainNewsGroups.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainNewsGroups.java Sun Apr 10 18:30:05 2011
@@ -17,6 +17,7 @@
 
 package org.apache.mahout.classifier.sgd;
 
+import com.google.common.base.Charsets;
 import com.google.common.collect.ConcurrentHashMultiset;
 import com.google.common.collect.HashMultiset;
 import com.google.common.collect.Lists;
@@ -47,7 +48,6 @@ import java.io.IOException;
 import java.io.InputStreamReader;
 import java.io.Reader;
 import java.io.StringReader;
-import java.nio.charset.Charset;
 import java.text.SimpleDateFormat;
 import java.util.Arrays;
 import java.util.Collection;
@@ -285,7 +285,7 @@ public final class TrainNewsGroups {
     Multiset<String> words = ConcurrentHashMultiset.create();
 
     BufferedReader reader =
-        new BufferedReader(new InputStreamReader(new FileInputStream(file), Charset.forName("UTF-8")));
+        new BufferedReader(new InputStreamReader(new FileInputStream(file), Charsets.UTF_8));
     try {
       String line = reader.readLine();
       Reader dateString = new StringReader(DATE_FORMATS[leakType % 3].format(new Date(date)));

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplaySpectralKMeans.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplaySpectralKMeans.java?rev=1090865&r1=1090864&r2=1090865&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplaySpectralKMeans.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplaySpectralKMeans.java Sun Apr 10 18:30:05 2011
@@ -23,8 +23,8 @@ import java.io.File;
 import java.io.FileOutputStream;
 import java.io.OutputStreamWriter;
 import java.io.Writer;
-import java.nio.charset.Charset;
 
+import com.google.common.base.Charsets;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -58,7 +58,7 @@ class DisplaySpectralKMeans extends Disp
       fs.mkdirs(output);
     }
     Writer writer = new OutputStreamWriter(
-        new FileOutputStream(new File(affinities.toString())), Charset.forName("UTF-8"));
+        new FileOutputStream(new File(affinities.toString())), Charsets.UTF_8);
     try {
       for (int i = 0; i < SAMPLE_DATA.size(); i++) {
         for (int j = 0; j < SAMPLE_DATA.size(); j++) {

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/minhash/LastfmDataConverter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/minhash/LastfmDataConverter.java?rev=1090865&r1=1090864&r2=1090865&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/minhash/LastfmDataConverter.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/minhash/LastfmDataConverter.java Sun Apr 10 18:30:05 2011
@@ -17,6 +17,7 @@
 
 package org.apache.mahout.clustering.minhash;
 
+import com.google.common.base.Charsets;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -31,7 +32,6 @@ import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStreamReader;
-import java.nio.charset.Charset;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
@@ -101,7 +101,7 @@ public final class LastfmDataConverter {
     Map<String, List<Integer>> itemFeaturesMap = new HashMap<String, List<Integer>>();
     String msg = usedMemory() + "Converting data to internal vector format: ";
     BufferedReader br = new BufferedReader(
-        new InputStreamReader(new FileInputStream(new File(inputFile)), Charset.forName("UTF-8")));
+        new InputStreamReader(new FileInputStream(new File(inputFile)), Charsets.UTF_8));
     try {
       System.out.print(msg);
       int prevPercentDone = 1;

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/TestForest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/TestForest.java?rev=1090865&r1=1090864&r2=1090865&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/TestForest.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/TestForest.java Sun Apr 10 18:30:05 2011
@@ -184,7 +184,7 @@ public class TestForest extends Configur
     classifier.run();
 
     if (analyze) {
-      log.info(classifier.getAnalyzer().summarize());
+      log.info("{}", classifier.getAnalyzer());
     }
   }
 
@@ -220,7 +220,7 @@ public class TestForest extends Configur
     log.info("Classification Time: {}", DFUtils.elapsedTime(time));
 
     if (analyzer != null) {
-      log.info(analyzer.summarize());
+      log.info("{}", analyzer);
     }
   }
 



Mime
View raw message