mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From robina...@apache.org
Subject svn commit: r1469528 - in /mahout/trunk: core/src/main/java/org/apache/mahout/common/ integration/src/main/java/org/apache/mahout/benchmark/
Date Thu, 18 Apr 2013 19:06:28 GMT
Author: robinanil
Date: Thu Apr 18 19:06:27 2013
New Revision: 1469528

URL: http://svn.apache.org/r1469528
Log:
MAHOUT-1191 Cleans Up vector benchmarks to be faster and more consistent. Cannot compare the values across earlier versions of this code

Added:
    mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/BenchmarkRunner.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/CloneBenchmark.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/ClosestCentroidBenchmark.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/DistanceBenchmark.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/DotBenchmark.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/MinusBenchmark.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/PlusBenchmark.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/SerializationBenchmark.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/TimesBenchmark.java
Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/common/TimingStatistics.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/TimingStatistics.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/TimingStatistics.java?rev=1469528&r1=1469527&r2=1469528&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/TimingStatistics.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/TimingStatistics.java Thu Apr 18 19:06:27 2013
@@ -18,18 +18,21 @@
 package org.apache.mahout.common;
 
 import java.io.Serializable;
+import java.text.DecimalFormat;
 
 public final class TimingStatistics implements Serializable {
-  
+  private static final DecimalFormat DF = new DecimalFormat("#.##");
   private int nCalls;
   private long minTime;
   private long maxTime;
   private long sumTime;
+  private long leadSumTime;
   private double sumSquaredTime;
-  
+
+
   /** Creates a new instance of CallStats */
   public TimingStatistics() { }
-  
+
   public TimingStatistics(int nCalls, long minTime, long maxTime, long sumTime, double sumSquaredTime) {
     this.nCalls = nCalls;
     this.minTime = minTime;
@@ -37,31 +40,31 @@ public final class TimingStatistics impl
     this.sumTime = sumTime;
     this.sumSquaredTime = sumSquaredTime;
   }
-  
+
   public synchronized int getNCalls() {
     return nCalls;
   }
-  
+
   public synchronized long getMinTime() {
     return Math.max(0, minTime);
   }
-  
+
   public synchronized long getMaxTime() {
     return maxTime;
   }
-  
+
   public synchronized long getSumTime() {
     return sumTime;
   }
-  
+
   public synchronized double getSumSquaredTime() {
     return sumSquaredTime;
   }
-  
+
   public synchronized long getMeanTime() {
     return nCalls == 0 ? 0 : sumTime / nCalls;
   }
-  
+
   public synchronized long getStdDevTime() {
     if (nCalls == 0) {
       return 0;
@@ -75,24 +78,59 @@ public final class TimingStatistics impl
     }
     return (long) Math.sqrt(variance);
   }
-  
+
   @Override
   public synchronized String toString() {
-    return '\n' + "nCalls = " + nCalls + ";\n" + "sum = " + sumTime / 1000000000.0 + "s;\n"
-           + "min = " + minTime / 1000000.0 + "ms;\n" + "max = " + maxTime / 1000000.0 + "ms;\n"
-           + "mean = " + getMeanTime() / 1000000.0 + "ms;\n" + "stdDev = " + getStdDevTime()
-           / 1000000.0 + "ms;";
+    return '\n'
+        + "nCalls = " + nCalls + ";\n"
+        + "sum    = " + DF.format(sumTime / 1000000000.0) + "s;\n"
+        + "min    = " + DF.format(minTime / 1000000.0) + "ms;\n"
+        + "max    = " + DF.format(maxTime / 1000000.0) + "ms;\n"
+        + "mean   = " + DF.format(getMeanTime() / 1000.0) + "us;\n"
+        + "stdDev = " + DF.format(getStdDevTime() / 1000.0) + "us;";
   }
-  
+
   public Call newCall() {
     return new Call();
   }
-  
-  public final class Call {
-    private final long startTime = System.nanoTime();
-    
+
+  /** Ignores counting the performance metrics until leadTimeIsFinished The caller should enough time for the JIT to warm up. */
+  public Call newCall(long leadTimeUsec) {
+    if (leadSumTime > leadTimeUsec) {
+      return new Call();
+    } else {
+      return new LeadTimeCall();
+    }
+  }
+
+  /** Ignores counting the performance metrics. The caller should enough time for the JIT to warm up. */
+  public class LeadTimeCall extends Call {
+
+    private LeadTimeCall() { }
+
+    @Override
+    public void end() {
+      long elapsed = System.nanoTime() - startTime;
+      synchronized (TimingStatistics.this) {
+        leadSumTime += elapsed;
+      }
+    }
+
+    @Override
+    public boolean end(long sumMaxUsec) {
+      end();
+      return false;
+    }
+  }
+
+  /**
+   * A call object that can update performance metrics.
+   */
+  public class Call {
+    protected final long startTime = System.nanoTime();
+
     private Call() { }
-    
+
     public void end() {
       long elapsed = System.nanoTime() - startTime;
       synchronized (TimingStatistics.this) {
@@ -107,5 +145,13 @@ public final class TimingStatistics impl
         sumSquaredTime += elapsed * elapsed;
       }
     }
+
+    /**
+     * Returns true if the sumTime as reached this limit;
+     */
+    public boolean end(long sumMaxUsec) {
+      end();
+      return sumMaxUsec < sumTime;
+    }
   }
 }

Added: mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/BenchmarkRunner.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/BenchmarkRunner.java?rev=1469528&view=auto
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/BenchmarkRunner.java (added)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/BenchmarkRunner.java Thu Apr 18 19:06:27 2013
@@ -0,0 +1,94 @@
+package org.apache.mahout.benchmark;
+
+import java.util.Random;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.mahout.common.RandomUtils;
+import org.apache.mahout.common.TimingStatistics;
+import org.apache.mahout.math.Vector;
+
+import com.google.common.base.Function;
+
+public final class BenchmarkRunner {
+  private static final int BUCKET_SIZE = 10000;
+  private static final Random R = RandomUtils.getRandom();
+  private final long maxTimeUsec;
+  private final long leadTimeUsec;
+
+  public BenchmarkRunner(long leadTimeMs, long maxTimeMs) {
+    maxTimeUsec = TimeUnit.MILLISECONDS.toNanos(maxTimeMs);
+    leadTimeUsec = TimeUnit.MILLISECONDS.toNanos(leadTimeMs);
+  }
+
+  public static abstract class BenchmarkFn implements Function<Integer, Boolean> {
+    protected int randIndex() {
+      return BenchmarkRunner.randIndex();
+    }
+
+    protected boolean randBool() {
+      return BenchmarkRunner.randBool();
+    }
+
+    /**
+     * Adds a random data dependency so that JVM does not remove dead code.
+     */
+    protected boolean depends(Vector v) {
+      return randIndex() < v.getNumNondefaultElements();
+    }
+  }
+
+  public static abstract class BenchmarkFnD implements Function<Integer, Double> {
+    protected int randIndex() {
+      return BenchmarkRunner.randIndex();
+    }
+
+    protected boolean randBool() {
+      return BenchmarkRunner.randBool();
+    }
+
+    /**
+     * Adds a random data dependency so that JVM does not remove dead code.
+     */
+    protected boolean depends(Vector v) {
+      return randIndex() < v.getNumNondefaultElements();
+    }
+  }
+
+  private static int randIndex() {
+    return R.nextInt(BUCKET_SIZE);
+  }
+
+  private static boolean randBool() {
+    return R.nextBoolean();
+  }
+
+  public TimingStatistics benchmark(BenchmarkFn function) {
+    TimingStatistics stats = new TimingStatistics();
+    boolean result = false;
+    while (true) {
+      int i = R.nextInt(BUCKET_SIZE);
+      TimingStatistics.Call call = stats.newCall(leadTimeUsec);
+      result = result ^ function.apply(i);
+      if (call.end(maxTimeUsec)) {
+        break;
+      }
+    }
+    return stats;
+  }
+
+  public TimingStatistics benchmarkD(BenchmarkFnD function) {
+    TimingStatistics stats = new TimingStatistics();
+    double result = 0;
+    while (true) {
+      int i = R.nextInt(BUCKET_SIZE);
+      TimingStatistics.Call call = stats.newCall(leadTimeUsec);
+      result += function.apply(i);
+      if (call.end(maxTimeUsec)) {
+        break;
+      }
+    }
+    // print result to prevent hotspot from eliminating deadcode
+    System.err.println("Result = " + result);
+    return stats;
+  }
+}

Added: mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/CloneBenchmark.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/CloneBenchmark.java?rev=1469528&view=auto
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/CloneBenchmark.java (added)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/CloneBenchmark.java Thu Apr 18 19:06:27 2013
@@ -0,0 +1,45 @@
+package org.apache.mahout.benchmark;
+
+import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_VECTOR;
+import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_SPARSE_VECTOR;
+import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_SPARSE_VECTOR;
+
+import org.apache.mahout.benchmark.BenchmarkRunner.BenchmarkFn;
+
+public class CloneBenchmark {
+  public static final String CLONE = "Clone";
+  private final VectorBenchmarks mark;
+
+  public CloneBenchmark(VectorBenchmarks mark) {
+    this.mark = mark;
+  }
+
+  public void benchmark() {
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        mark.vectors[0][mark.vIndex(i)] = mark.vectors[0][mark.vIndex(i)].clone();
+
+        return depends(mark.vectors[0][mark.vIndex(i)]);
+      }
+    }), CLONE, DENSE_VECTOR);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        mark.vectors[1][mark.vIndex(i)] = mark.vectors[1][mark.vIndex(i)].clone();
+
+        return depends(mark.vectors[1][mark.vIndex(i)]);
+      }
+    }), CLONE, RAND_SPARSE_VECTOR);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        mark.vectors[2][mark.vIndex(i)] = mark.vectors[2][mark.vIndex(i)].clone();
+
+        return depends(mark.vectors[2][mark.vIndex(i)]);
+      }
+    }), CLONE, SEQ_SPARSE_VECTOR);
+  }
+}

Added: mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/ClosestCentroidBenchmark.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/ClosestCentroidBenchmark.java?rev=1469528&view=auto
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/ClosestCentroidBenchmark.java (added)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/ClosestCentroidBenchmark.java Thu Apr 18 19:06:27 2013
@@ -0,0 +1,83 @@
+package org.apache.mahout.benchmark;
+
+import java.io.IOException;
+import java.util.Random;
+
+import org.apache.mahout.common.RandomUtils;
+import org.apache.mahout.common.TimingStatistics;
+import org.apache.mahout.common.distance.DistanceMeasure;
+import org.apache.mahout.math.SparseMatrix;
+import org.apache.mahout.math.Vector;
+
+public class ClosestCentroidBenchmark {
+  public static final String SERIALIZE = "Serialize";
+  public static final String DESERIALIZE = "Deserialize";
+  private final VectorBenchmarks mark;
+
+  public ClosestCentroidBenchmark(VectorBenchmarks mark) {
+    this.mark = mark;
+  }
+
+  public void benchmark(DistanceMeasure measure) throws IOException {
+    SparseMatrix clusterDistances = new SparseMatrix(mark.numClusters, mark.numClusters);
+    for (int i = 0; i < mark.numClusters; i++) {
+      for (int j = 0; j < mark.numClusters; j++) {
+        double distance = Double.POSITIVE_INFINITY;
+        if (i != j) {
+          distance = measure.distance(mark.clusters[i], mark.clusters[j]);
+        }
+        clusterDistances.setQuick(i, j, distance);
+      }
+    }
+
+    long distanceCalculations = 0;
+    TimingStatistics stats = new TimingStatistics();
+    for (int l = 0; l < mark.loop; l++) {
+      TimingStatistics.Call call = stats.newCall(mark.leadTimeUsec);
+      for (int i = 0; i < mark.numVectors; i++) {
+        Vector vector = mark.vectors[1][mark.vIndex(i)];
+        double minDistance = Double.MAX_VALUE;
+        for (int k = 0; k < mark.numClusters; k++) {
+          double distance = measure.distance(vector, mark.clusters[k]);
+          distanceCalculations++;
+          if (distance < minDistance) {
+            minDistance = distance;
+          }
+        }
+      }
+      if (call.end(mark.maxTimeUsec)) {
+        break;
+      }
+    }
+    mark.printStats(stats, measure.getClass().getName(), "Closest C w/o Elkan's trick", "distanceCalculations = "
+        + distanceCalculations);
+
+    distanceCalculations = 0;
+    stats = new TimingStatistics();
+    Random rand = RandomUtils.getRandom();
+    for (int l = 0; l < mark.loop; l++) {
+      TimingStatistics.Call call = stats.newCall(mark.leadTimeUsec);
+      for (int i = 0; i < mark.numVectors; i++) {
+        Vector vector = mark.vectors[1][mark.vIndex(i)];
+        int closestCentroid = rand.nextInt(mark.numClusters);
+        double dist = measure.distance(vector, mark.clusters[closestCentroid]);
+        distanceCalculations++;
+        for (int k = 0; k < mark.numClusters; k++) {
+          if (closestCentroid != k) {
+            double centroidDist = clusterDistances.getQuick(k, closestCentroid);
+            if (centroidDist < 2 * dist) {
+              dist = measure.distance(vector, mark.clusters[k]);
+              closestCentroid = k;
+              distanceCalculations++;
+            }
+          }
+        }
+      }
+      if (call.end(mark.maxTimeUsec)) {
+        break;
+      }
+    }
+    mark.printStats(stats, measure.getClass().getName(), "Closest C w/ Elkan's trick", "distanceCalculations = "
+        + distanceCalculations);
+  }
+}

Added: mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/DistanceBenchmark.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/DistanceBenchmark.java?rev=1469528&view=auto
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/DistanceBenchmark.java (added)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/DistanceBenchmark.java Thu Apr 18 19:06:27 2013
@@ -0,0 +1,87 @@
+package org.apache.mahout.benchmark;
+
+import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_FN_RAND;
+import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_FN_SEQ;
+import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_VECTOR;
+import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_FN_DENSE;
+import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_FN_SEQ;
+import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_SPARSE_VECTOR;
+import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_FN_DENSE;
+import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_FN_RAND;
+import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_SPARSE_VECTOR;
+
+import org.apache.mahout.benchmark.BenchmarkRunner.BenchmarkFnD;
+import org.apache.mahout.common.distance.DistanceMeasure;
+
+public class DistanceBenchmark {
+  private final VectorBenchmarks mark;
+
+  public DistanceBenchmark(VectorBenchmarks mark) {
+    this.mark = mark;
+  }
+
+  public void benchmark(final DistanceMeasure measure) {
+    mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() {
+      @Override
+      public Double apply(Integer i) {
+        return measure.distance(mark.vectors[0][mark.vIndex(i)], mark.vectors[0][mark.vIndex(randIndex())]);
+      }
+    }), measure.getClass().getName(), DENSE_VECTOR);
+
+    mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() {
+      @Override
+      public Double apply(Integer i) {
+        return measure.distance(mark.vectors[1][mark.vIndex(i)], mark.vectors[1][mark.vIndex(randIndex())]);
+      }
+    }), measure.getClass().getName(), RAND_SPARSE_VECTOR);
+
+    mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() {
+      @Override
+      public Double apply(Integer i) {
+        return measure.distance(mark.vectors[2][mark.vIndex(i)], mark.vectors[2][mark.vIndex(randIndex())]);
+      }
+    }), measure.getClass().getName(), SEQ_SPARSE_VECTOR);
+
+    mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() {
+      @Override
+      public Double apply(Integer i) {
+        return measure.distance(mark.vectors[0][mark.vIndex(i)], mark.vectors[1][mark.vIndex(randIndex())]);
+      }
+    }), measure.getClass().getName(), DENSE_FN_RAND);
+
+    mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() {
+      @Override
+      public Double apply(Integer i) {
+        return measure.distance(mark.vectors[0][mark.vIndex(i)], mark.vectors[2][mark.vIndex(randIndex())]);
+      }
+    }), measure.getClass().getName(), DENSE_FN_SEQ);
+
+    mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() {
+      @Override
+      public Double apply(Integer i) {
+        return measure.distance(mark.vectors[1][mark.vIndex(i)], mark.vectors[0][mark.vIndex(randIndex())]);
+      }
+    }), measure.getClass().getName(), RAND_FN_DENSE);
+
+    mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() {
+      @Override
+      public Double apply(Integer i) {
+        return measure.distance(mark.vectors[1][mark.vIndex(i)], mark.vectors[2][mark.vIndex(randIndex())]);
+      }
+    }), measure.getClass().getName(), RAND_FN_SEQ);
+
+    mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() {
+      @Override
+      public Double apply(Integer i) {
+        return measure.distance(mark.vectors[2][mark.vIndex(i)], mark.vectors[0][mark.vIndex(randIndex())]);
+      }
+    }), measure.getClass().getName(), SEQ_FN_DENSE);
+
+    mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() {
+      @Override
+      public Double apply(Integer i) {
+        return measure.distance(mark.vectors[2][mark.vIndex(i)], mark.vectors[1][mark.vIndex(randIndex())]);
+      }
+    }), measure.getClass().getName(), SEQ_FN_RAND);
+  }
+}

Added: mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/DotBenchmark.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/DotBenchmark.java?rev=1469528&view=auto
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/DotBenchmark.java (added)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/DotBenchmark.java Thu Apr 18 19:06:27 2013
@@ -0,0 +1,142 @@
+package org.apache.mahout.benchmark;
+
+import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_FN_RAND;
+import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_FN_SEQ;
+import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_VECTOR;
+import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_FN_DENSE;
+import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_FN_SEQ;
+import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_SPARSE_VECTOR;
+import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_FN_DENSE;
+import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_FN_RAND;
+import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_SPARSE_VECTOR;
+
+import org.apache.mahout.benchmark.BenchmarkRunner.BenchmarkFn;
+import org.apache.mahout.benchmark.BenchmarkRunner.BenchmarkFnD;
+
+public class DotBenchmark {
+  private static final String DOT_PRODUCT = "DotProduct";
+  private static final String NORM1 = "Norm1";
+  private static final String LOG_NORMALIZE = "LogNormalize";
+  private final VectorBenchmarks mark;
+
+  public DotBenchmark(VectorBenchmarks mark) {
+    this.mark = mark;
+  }
+
+  public void benchmark() {
+    benchmarkDot();
+    benchmarkNorm1();
+    benchmarkLogNormalize();
+  }
+
+  private void benchmarkLogNormalize() {
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        return depends(mark.vectors[0][mark.vIndex(i)].logNormalize());
+      }
+    }), LOG_NORMALIZE, DENSE_VECTOR);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        return depends(mark.vectors[1][mark.vIndex(i)].logNormalize());
+      }
+    }), LOG_NORMALIZE, RAND_SPARSE_VECTOR);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        return depends(mark.vectors[2][mark.vIndex(i)].logNormalize());
+      }
+    }), LOG_NORMALIZE, SEQ_SPARSE_VECTOR);
+  }
+
+  private void benchmarkNorm1() {
+    mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() {
+      @Override
+      public Double apply(Integer i) {
+        return mark.vectors[0][mark.vIndex(i)].norm(1);
+      }
+    }), NORM1, DENSE_VECTOR);
+
+    mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() {
+      @Override
+      public Double apply(Integer i) {
+        return mark.vectors[1][mark.vIndex(i)].norm(1);
+      }
+    }), NORM1, RAND_SPARSE_VECTOR);
+
+    mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() {
+      @Override
+      public Double apply(Integer i) {
+        return mark.vectors[2][mark.vIndex(i)].norm(1);
+      }
+    }), NORM1, SEQ_SPARSE_VECTOR);
+  }
+
+  private void benchmarkDot() {
+    mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() {
+      @Override
+      public Double apply(Integer i) {
+        return mark.vectors[0][mark.vIndex(i)].dot(mark.vectors[0][mark.vIndex(randIndex())]);
+      }
+    }), DOT_PRODUCT, DENSE_VECTOR);
+
+    mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() {
+      @Override
+      public Double apply(Integer i) {
+        return mark.vectors[1][mark.vIndex(i)].dot(mark.vectors[1][mark.vIndex(randIndex())]);
+      }
+    }), DOT_PRODUCT, RAND_SPARSE_VECTOR);
+
+    mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() {
+      @Override
+      public Double apply(Integer i) {
+        return mark.vectors[2][mark.vIndex(i)].dot(mark.vectors[2][mark.vIndex(randIndex())]);
+      }
+    }), DOT_PRODUCT, SEQ_SPARSE_VECTOR);
+
+    mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() {
+      @Override
+      public Double apply(Integer i) {
+        return mark.vectors[0][mark.vIndex(i)].dot(mark.vectors[1][mark.vIndex(randIndex())]);
+      }
+    }), DOT_PRODUCT, DENSE_FN_RAND);
+
+    mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() {
+      @Override
+      public Double apply(Integer i) {
+        return mark.vectors[0][mark.vIndex(i)].dot(mark.vectors[2][mark.vIndex(randIndex())]);
+      }
+    }), DOT_PRODUCT, DENSE_FN_SEQ);
+
+    mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() {
+      @Override
+      public Double apply(Integer i) {
+        return mark.vectors[1][mark.vIndex(i)].dot(mark.vectors[0][mark.vIndex(randIndex())]);
+      }
+    }), DOT_PRODUCT, RAND_FN_DENSE);
+
+    mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() {
+      @Override
+      public Double apply(Integer i) {
+        return mark.vectors[1][mark.vIndex(i)].dot(mark.vectors[2][mark.vIndex(randIndex())]);
+      }
+    }), DOT_PRODUCT, RAND_FN_SEQ);
+
+    mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() {
+      @Override
+      public Double apply(Integer i) {
+        return mark.vectors[2][mark.vIndex(i)].dot(mark.vectors[0][mark.vIndex(randIndex())]);
+      }
+    }), DOT_PRODUCT, SEQ_FN_DENSE);
+
+    mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() {
+      @Override
+      public Double apply(Integer i) {
+        return mark.vectors[2][mark.vIndex(i)].dot(mark.vectors[1][mark.vIndex(randIndex())]);
+      }
+    }), DOT_PRODUCT, SEQ_FN_RAND);
+  }
+}

Added: mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/MinusBenchmark.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/MinusBenchmark.java?rev=1469528&view=auto
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/MinusBenchmark.java (added)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/MinusBenchmark.java Thu Apr 18 19:06:27 2013
@@ -0,0 +1,98 @@
+package org.apache.mahout.benchmark;
+
+import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_FN_RAND;
+import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_FN_SEQ;
+import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_VECTOR;
+import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_FN_DENSE;
+import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_FN_SEQ;
+import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_SPARSE_VECTOR;
+import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_FN_DENSE;
+import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_FN_RAND;
+import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_SPARSE_VECTOR;
+
+import org.apache.mahout.benchmark.BenchmarkRunner.BenchmarkFn;
+import org.apache.mahout.math.Vector;
+
+public class MinusBenchmark {
+
+  private static final String MINUS = "Minus";
+  private final VectorBenchmarks mark;
+
+  public MinusBenchmark(VectorBenchmarks mark) {
+    this.mark = mark;
+  }
+
+  public void benchmark() {
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[0][mark.vIndex(i)].minus(mark.vectors[0][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), MINUS, DENSE_VECTOR);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[1][mark.vIndex(i)].minus(mark.vectors[1][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), MINUS, RAND_SPARSE_VECTOR);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[2][mark.vIndex(i)].minus(mark.vectors[2][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), MINUS, SEQ_SPARSE_VECTOR);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[0][mark.vIndex(i)].minus(mark.vectors[1][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), MINUS, DENSE_FN_RAND);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[0][mark.vIndex(i)].minus(mark.vectors[2][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), MINUS, DENSE_FN_SEQ);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[1][mark.vIndex(i)].minus(mark.vectors[0][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), MINUS, RAND_FN_DENSE);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[1][mark.vIndex(i)].minus(mark.vectors[2][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), MINUS, RAND_FN_SEQ);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[2][mark.vIndex(i)].minus(mark.vectors[0][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), MINUS, SEQ_FN_DENSE);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[2][mark.vIndex(i)].minus(mark.vectors[1][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), MINUS, SEQ_FN_RAND);
+  }
+}

Added: mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/PlusBenchmark.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/PlusBenchmark.java?rev=1469528&view=auto
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/PlusBenchmark.java (added)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/PlusBenchmark.java Thu Apr 18 19:06:27 2013
@@ -0,0 +1,98 @@
+package org.apache.mahout.benchmark;
+
+import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_FN_RAND;
+import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_FN_SEQ;
+import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_VECTOR;
+import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_FN_DENSE;
+import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_FN_SEQ;
+import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_SPARSE_VECTOR;
+import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_FN_DENSE;
+import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_FN_RAND;
+import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_SPARSE_VECTOR;
+
+import org.apache.mahout.benchmark.BenchmarkRunner.BenchmarkFn;
+import org.apache.mahout.math.Vector;
+
+public class PlusBenchmark {
+
+  private static final String PLUS = "Plus";
+  private final VectorBenchmarks mark;
+
+  public PlusBenchmark(VectorBenchmarks mark) {
+    this.mark = mark;
+  }
+
+  public void benchmark() {
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[0][mark.vIndex(i)].plus(mark.vectors[0][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), PLUS, DENSE_VECTOR);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[1][mark.vIndex(i)].plus(mark.vectors[1][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), PLUS, RAND_SPARSE_VECTOR);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[2][mark.vIndex(i)].plus(mark.vectors[2][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), PLUS, SEQ_SPARSE_VECTOR);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[0][mark.vIndex(i)].plus(mark.vectors[1][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), PLUS, DENSE_FN_RAND);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[0][mark.vIndex(i)].plus(mark.vectors[2][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), PLUS, DENSE_FN_SEQ);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[1][mark.vIndex(i)].plus(mark.vectors[0][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), PLUS, RAND_FN_DENSE);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[1][mark.vIndex(i)].plus(mark.vectors[2][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), PLUS, RAND_FN_SEQ);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[2][mark.vIndex(i)].plus(mark.vectors[0][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), PLUS, SEQ_FN_DENSE);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[2][mark.vIndex(i)].plus(mark.vectors[1][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), PLUS, SEQ_FN_RAND);
+  }
+}

Added: mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/SerializationBenchmark.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/SerializationBenchmark.java?rev=1469528&view=auto
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/SerializationBenchmark.java (added)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/SerializationBenchmark.java Thu Apr 18 19:06:27 2013
@@ -0,0 +1,115 @@
+package org.apache.mahout.benchmark;
+
+import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_VECTOR;
+import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_SPARSE_VECTOR;
+import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_SPARSE_VECTOR;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Writable;
+import org.apache.mahout.common.TimingStatistics;
+import org.apache.mahout.common.iterator.sequencefile.SequenceFileValueIterator;
+import org.apache.mahout.math.VectorWritable;
+
+import com.google.common.io.Closeables;
+
+public class SerializationBenchmark {
+  public static final String SERIALIZE = "Serialize";
+  public static final String DESERIALIZE = "Deserialize";
+  private final VectorBenchmarks mark;
+
+  public SerializationBenchmark(VectorBenchmarks mark) {
+    this.mark = mark;
+  }
+
+  public void benchmark() throws IOException {
+    serializeBenchmark();
+    deserializeBenchmark();
+  }
+
+  public void serializeBenchmark() throws IOException {
+    Configuration conf = new Configuration();
+    FileSystem fs = FileSystem.get(conf);
+    SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, new Path("/tmp/dense-vector"), IntWritable.class,
+        VectorWritable.class);
+
+    Writable one = new IntWritable(0);
+    VectorWritable vec = new VectorWritable();
+    TimingStatistics stats = new TimingStatistics();
+
+    try {
+      for (int i = 0; i < mark.loop; i++) {
+        TimingStatistics.Call call = stats.newCall(mark.leadTimeUsec);
+        vec.set(mark.vectors[0][mark.vIndex(i)]);
+        writer.append(one, vec);
+        if (call.end(mark.maxTimeUsec)) {
+          break;
+        }
+      }
+    } finally {
+      Closeables.close(writer, true);
+    }
+    mark.printStats(stats, SERIALIZE, DENSE_VECTOR);
+
+    writer = new SequenceFile.Writer(fs, conf, new Path("/tmp/randsparse-vector"), IntWritable.class,
+        VectorWritable.class);
+    stats = new TimingStatistics();
+    try {
+      for (int i = 0; i < mark.loop; i++) {
+        TimingStatistics.Call call = stats.newCall(mark.leadTimeUsec);
+        vec.set(mark.vectors[1][mark.vIndex(i)]);
+        writer.append(one, vec);
+        if (call.end(mark.maxTimeUsec)) {
+          break;
+        }
+      }
+    } finally {
+      Closeables.close(writer, true);
+    }
+    mark.printStats(stats, SERIALIZE, RAND_SPARSE_VECTOR);
+
+    writer = new SequenceFile.Writer(fs, conf, new Path("/tmp/seqsparse-vector"), IntWritable.class,
+        VectorWritable.class);
+    stats = new TimingStatistics();
+    try {
+      for (int i = 0; i < mark.loop; i++) {
+        TimingStatistics.Call call = stats.newCall(mark.leadTimeUsec);
+        vec.set(mark.vectors[2][mark.vIndex(i)]);
+        writer.append(one, vec);
+        if (call.end(mark.maxTimeUsec)) {
+          break;
+        }
+      }
+    } finally {
+      Closeables.close(writer, true);
+    }
+    mark.printStats(stats, SERIALIZE, SEQ_SPARSE_VECTOR);
+
+  }
+
+  public void deserializeBenchmark() throws IOException {
+    doDeserializeBenchmark(DENSE_VECTOR, "/tmp/dense-vector");
+    doDeserializeBenchmark(RAND_SPARSE_VECTOR, "/tmp/randsparse-vector");
+    doDeserializeBenchmark(SEQ_SPARSE_VECTOR, "/tmp/seqsparse-vector");
+  }
+
+  private void doDeserializeBenchmark(String name, String pathString) throws IOException {
+    TimingStatistics stats = new TimingStatistics();
+    TimingStatistics.Call call = stats.newCall(mark.leadTimeUsec);
+    SequenceFileValueIterator<Writable> iterator = new SequenceFileValueIterator<Writable>(new Path(pathString), true,
+        new Configuration());
+    while (iterator.hasNext()) {
+      iterator.next();
+      call.end();
+      call = stats.newCall(mark.leadTimeUsec);
+    }
+    iterator.close();
+    mark.printStats(stats, DESERIALIZE, name);
+  }
+
+}

Added: mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/TimesBenchmark.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/TimesBenchmark.java?rev=1469528&view=auto
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/TimesBenchmark.java (added)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/TimesBenchmark.java Thu Apr 18 19:06:27 2013
@@ -0,0 +1,98 @@
+package org.apache.mahout.benchmark;
+
+import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_FN_RAND;
+import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_FN_SEQ;
+import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_VECTOR;
+import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_FN_DENSE;
+import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_FN_SEQ;
+import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_SPARSE_VECTOR;
+import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_FN_DENSE;
+import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_FN_RAND;
+import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_SPARSE_VECTOR;
+
+import org.apache.mahout.benchmark.BenchmarkRunner.BenchmarkFn;
+import org.apache.mahout.math.Vector;
+
+public class TimesBenchmark {
+
+  private static final String TIMES = "Times";
+  private final VectorBenchmarks mark;
+
+  public TimesBenchmark(VectorBenchmarks mark) {
+    this.mark = mark;
+  }
+
+  public void benchmark() {
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[0][mark.vIndex(i)].times(mark.vectors[0][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), TIMES, DENSE_VECTOR);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[1][mark.vIndex(i)].times(mark.vectors[1][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), TIMES, RAND_SPARSE_VECTOR);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[2][mark.vIndex(i)].times(mark.vectors[2][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), TIMES, SEQ_SPARSE_VECTOR);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[0][mark.vIndex(i)].times(mark.vectors[1][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), TIMES, DENSE_FN_RAND);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[0][mark.vIndex(i)].times(mark.vectors[2][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), TIMES, DENSE_FN_SEQ);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[1][mark.vIndex(i)].times(mark.vectors[0][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), TIMES, RAND_FN_DENSE);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[1][mark.vIndex(i)].times(mark.vectors[2][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), TIMES, RAND_FN_SEQ);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[2][mark.vIndex(i)].times(mark.vectors[0][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), TIMES, SEQ_FN_DENSE);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[2][mark.vIndex(i)].times(mark.vectors[1][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), TIMES, SEQ_FN_RAND);
+  }
+}

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java?rev=1469528&r1=1469527&r2=1469528&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java Thu Apr 18 19:06:27 2013
@@ -18,18 +18,16 @@
 package org.apache.mahout.benchmark;
 
 import java.io.IOException;
+import java.text.DecimalFormat;
 import java.util.BitSet;
 import java.util.Collections;
-import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
-import java.util.Random;
 import java.util.Map.Entry;
+import java.util.Random;
+import java.util.concurrent.TimeUnit;
 import java.util.regex.Pattern;
 
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-import com.google.common.io.Closeables;
 import org.apache.commons.cli2.CommandLine;
 import org.apache.commons.cli2.Group;
 import org.apache.commons.cli2.Option;
@@ -39,72 +37,99 @@ import org.apache.commons.cli2.builder.D
 import org.apache.commons.cli2.builder.GroupBuilder;
 import org.apache.commons.cli2.commandline.Parser;
 import org.apache.commons.lang3.StringUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.Writable;
+import org.apache.mahout.benchmark.BenchmarkRunner.BenchmarkFn;
 import org.apache.mahout.common.CommandLineUtil;
 import org.apache.mahout.common.RandomUtils;
 import org.apache.mahout.common.TimingStatistics;
 import org.apache.mahout.common.commandline.DefaultOptionCreator;
+import org.apache.mahout.common.distance.ChebyshevDistanceMeasure;
 import org.apache.mahout.common.distance.CosineDistanceMeasure;
-import org.apache.mahout.common.distance.DistanceMeasure;
 import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
 import org.apache.mahout.common.distance.ManhattanDistanceMeasure;
+import org.apache.mahout.common.distance.MinkowskiDistanceMeasure;
 import org.apache.mahout.common.distance.SquaredEuclideanDistanceMeasure;
 import org.apache.mahout.common.distance.TanimotoDistanceMeasure;
-import org.apache.mahout.common.iterator.sequencefile.SequenceFileValueIterator;
 import org.apache.mahout.math.DenseVector;
 import org.apache.mahout.math.RandomAccessSparseVector;
 import org.apache.mahout.math.SequentialAccessSparseVector;
-import org.apache.mahout.math.SparseMatrix;
 import org.apache.mahout.math.Vector;
-import org.apache.mahout.math.VectorWritable;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+
 public class VectorBenchmarks {
+  private static final int MAX_TIME_MS = 500;
+  private static final int LEAD_TIME_MS = 100;
+  public static final String CLUSTERS = "Clusters";
+  public static final String CREATE_INCREMENTALLY = "Create (incrementally)";
+  public static final String CREATE_COPY = "Create (copy)";
+
+  public static final String DENSE_FN_SEQ = "Dense.fn(Seq)";
+  public static final String RAND_FN_DENSE = "Rand.fn(Dense)";
+  public static final String SEQ_FN_RAND = "Seq.fn(Rand)";
+  public static final String RAND_FN_SEQ = "Rand.fn(Seq)";
+  public static final String SEQ_FN_DENSE = "Seq.fn(Dense)";
+  public static final String DENSE_FN_RAND = "Dense.fn(Rand)";
+  public static final String SEQ_SPARSE_VECTOR = "SeqSparseVector";
+  public static final String RAND_SPARSE_VECTOR = "RandSparseVector";
+  public static final String DENSE_VECTOR = "DenseVector";
 
   private static final Logger log = LoggerFactory.getLogger(VectorBenchmarks.class);
-
   private static final Pattern TAB_NEWLINE_PATTERN = Pattern.compile("[\n\t]");
   private static final String[] EMPTY = new String[0];
+  private static final DecimalFormat DF = new DecimalFormat("#.##");
+
+  /* package private */
+  final Vector[][] vectors;
+  final Vector[] clusters;
+  final int cardinality;
+  final int numNonZeros;
+  final int numVectors;
+  final int numClusters;
+  final int loop = Integer.MAX_VALUE;
+  final int opsPerUnit;
+  final long maxTimeUsec;
+  final long leadTimeUsec;
 
-  private final Vector[][] vectors;
-  private final Vector[] clusters;
-  private final SparseMatrix clusterDistances;
   private final List<Vector> randomVectors = Lists.newArrayList();
   private final List<int[]> randomVectorIndices = Lists.newArrayList();
   private final List<double[]> randomVectorValues = Lists.newArrayList();
-  private final int cardinality;
-  private final int sparsity;
-  private final int numVectors;
-  private final int loop;
-  private final int opsPerUnit;
-  private final Map<String,Integer> implType = Maps.newHashMap();
-  private final Map<String,List<String[]>> statsMap = Maps.newHashMap();
-  private final int numClusters;
-  
-  public VectorBenchmarks(int cardinality, int sparsity, int numVectors, int numClusters, int loop, int opsPerUnit) {
-    Random r = RandomUtils.getRandom();
+  private final Map<String, Integer> implType = Maps.newHashMap();
+  private final Map<String, List<String[]>> statsMap = Maps.newHashMap();
+  private final BenchmarkRunner runner;
+  private final Random r = RandomUtils.getRandom();
+
+  public VectorBenchmarks(int cardinality, int numNonZeros, int numVectors, int numClusters,
+      int opsPerUnit) {
+    runner = new BenchmarkRunner(LEAD_TIME_MS, MAX_TIME_MS);
+    maxTimeUsec = TimeUnit.MILLISECONDS.toNanos(MAX_TIME_MS);
+    leadTimeUsec = TimeUnit.MILLISECONDS.toNanos(LEAD_TIME_MS);
+
     this.cardinality = cardinality;
-    this.sparsity = sparsity;
+    this.numNonZeros = numNonZeros;
     this.numVectors = numVectors;
     this.numClusters = numClusters;
-    this.loop = loop;
     this.opsPerUnit = opsPerUnit;
+
+    setUpVectors(cardinality, numNonZeros, numVectors);
+
+    vectors = new Vector[3][numVectors];
+    clusters = new Vector[numClusters];
+  }
+
+  private void setUpVectors(int cardinality, int numNonZeros, int numVectors) {
     for (int i = 0; i < numVectors; i++) {
-      Vector v = new SequentialAccessSparseVector(cardinality, sparsity); // sparsity!
+      Vector v = new SequentialAccessSparseVector(cardinality, numNonZeros); // sparsity!
       BitSet featureSpace = new BitSet(cardinality);
-      int[] indexes = new int[sparsity];
-      double[] values = new double[sparsity];
+      int[] indexes = new int[numNonZeros];
+      double[] values = new double[numNonZeros];
       int j = 0;
-      while (j < sparsity) {
+      while (j < numNonZeros) {
         double value = r.nextGaussian();
         int index = r.nextInt(cardinality);
-        if (!featureSpace.get(index)) {
+        if (!featureSpace.get(index) && value != 0) {
           featureSpace.set(index);
           indexes[j] = index;
           values[j++] = value;
@@ -115,28 +140,22 @@ public class VectorBenchmarks {
       randomVectorValues.add(values);
       randomVectors.add(v);
     }
-    vectors = new Vector[3][numVectors];
-    clusters = new Vector[numClusters];
-    clusterDistances = new SparseMatrix(numClusters, numClusters);
   }
-  
-  private void printStats(TimingStatistics stats, String benchmarkName, String implName, String content) {
+
+  void printStats(TimingStatistics stats, String benchmarkName, String implName, String content) {
     printStats(stats, benchmarkName, implName, content, 1);
   }
-  
-  private void printStats(TimingStatistics stats, String benchmarkName, String implName) {
+
+  void printStats(TimingStatistics stats, String benchmarkName, String implName) {
     printStats(stats, benchmarkName, implName, "", 1);
   }
-  
-  private void printStats(TimingStatistics stats,
-                          String benchmarkName,
-                          String implName,
-                          String content,
-                          int multiplier) {
-    float speed = multiplier * loop * numVectors * sparsity * 1000.0f * 12 / stats.getSumTime();
-    float opsPerSec = loop * numVectors * 1000000000.0f / stats.getSumTime();
-    log.info("{} {} \n{} {} \nSpeed: {} UnitsProcessed/sec {} MBytes/sec",
-             benchmarkName, implName, content, stats.toString(), opsPerSec, speed);
+
+  private void printStats(TimingStatistics stats, String benchmarkName, String implName,
+      String content, int multiplier) {
+    float speed = multiplier * stats.getNCalls() * numNonZeros * 1000.0f * 12 / stats.getSumTime();
+    float opsPerSec = stats.getNCalls() * 1000000000.0f / stats.getSumTime();
+    log.info("{} {} \n{} {} \nOps    = {} Units/sec\nIOps   = {} MBytes/sec", benchmarkName,
+        implName, content, stats.toString(), DF.format(opsPerSec), DF.format(speed));
 
     if (!implType.containsKey(implName)) {
       implType.put(implName, implType.size());
@@ -149,44 +168,56 @@ public class VectorBenchmarks {
     while (implStats.size() < implId + 1) {
       implStats.add(EMPTY);
     }
-    implStats.set(implId,
-                  TAB_NEWLINE_PATTERN.split(stats + "\tSpeed = " + opsPerSec + " /sec\tRate = " + speed + " MB/s"));
+    implStats.set(
+        implId,
+        TAB_NEWLINE_PATTERN.split(stats + "\tSpeed  = " + DF.format(opsPerSec) + " /sec\tRate   = "
+            + DF.format(speed) + " MB/s"));
+  }
+
+  public void createData() {
+    for (int i = 0; i < Math.max(numVectors, numClusters); ++i) {
+      vectors[0][vIndex(i)] = new DenseVector(randomVectors.get(vIndex(i)));
+      vectors[1][vIndex(i)] = new RandomAccessSparseVector(randomVectors.get(vIndex(i)));
+      vectors[2][vIndex(i)] = new SequentialAccessSparseVector(randomVectors.get(vIndex(i)));
+      clusters[cIndex(i)] = new RandomAccessSparseVector(randomVectors.get(vIndex(i)));
+    }
   }
-  
+
   public void createBenchmark() {
-    TimingStatistics stats = new TimingStatistics();
-    for (int l = 0; l < loop; l++) {
-      for (int i = 0; i < numVectors; i++) {
-        TimingStatistics.Call call = stats.newCall();
-        vectors[0][i] = new DenseVector(randomVectors.get(i));
-        call.end();
+    printStats(runner.benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        vectors[0][vIndex(i)] = new DenseVector(randomVectors.get(vIndex(i)));
+        return depends(vectors[0][vIndex(i)]);
       }
-    }
-    printStats(stats, "Create (copy)", "DenseVector");
-    
-    stats = new TimingStatistics();
-    for (int l = 0; l < loop; l++) {
-      for (int i = 0; i < numVectors; i++) {
-        TimingStatistics.Call call = stats.newCall();
-        vectors[1][i] = new RandomAccessSparseVector(randomVectors.get(i));
-        call.end();
+    }), CREATE_COPY, DENSE_VECTOR);
+
+    printStats(runner.benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        vectors[1][vIndex(i)] = new RandomAccessSparseVector(randomVectors.get(vIndex(i)));
+        return depends(vectors[1][vIndex(i)]);
       }
-    }
-    printStats(stats, "Create (copy)", "RandSparseVector");
-    
-    stats = new TimingStatistics();
-    for (int l = 0; l < loop; l++) {
-      for (int i = 0; i < numVectors; i++) {
-        TimingStatistics.Call call = stats.newCall();
-        vectors[2][i] = new SequentialAccessSparseVector(randomVectors.get(i));
-        call.end();
+    }), CREATE_COPY, RAND_SPARSE_VECTOR);
+
+    printStats(runner.benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        vectors[2][vIndex(i)] = new SequentialAccessSparseVector(randomVectors.get(vIndex(i)));
+        return depends(vectors[2][vIndex(i)]);
       }
-    }
-    printStats(stats, "Create (copy)", "SeqSparseVector");
-    
+    }), CREATE_COPY, SEQ_SPARSE_VECTOR);
+
+    printStats(runner.benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        clusters[cIndex(i)] = new RandomAccessSparseVector(randomVectors.get(vIndex(i)));
+        return depends(clusters[cIndex(i)]);
+      }
+    }), CREATE_COPY, CLUSTERS);
   }
 
-  private void buildVectorIncrementally(TimingStatistics stats, int randomIndex, Vector v, boolean useSetQuick) {
+  private boolean buildVectorIncrementally(TimingStatistics stats, int randomIndex, Vector v, boolean useSetQuick) {
     int[] indexes = randomVectorIndices.get(randomIndex);
     double[] values = randomVectorValues.get(randomIndex);
     List<Integer> randomOrder = Lists.newArrayList();
@@ -199,7 +230,7 @@ public class VectorBenchmarks {
       permutation[i] = randomOrder.get(i);
     }
 
-    TimingStatistics.Call call = stats.newCall();
+    TimingStatistics.Call call = stats.newCall(leadTimeUsec);
     if (useSetQuick) {
       for (int i : permutation) {
         v.setQuick(indexes[i], values[i]);
@@ -209,599 +240,176 @@ public class VectorBenchmarks {
         v.set(indexes[i], values[i]);
       }
     }
-    call.end();
+    return call.end(maxTimeUsec);
   }
 
   public void incrementalCreateBenchmark() {
     TimingStatistics stats = new TimingStatistics();
-    for (int l = 0; l < loop; l++) {
-      for (int i = 0; i < numVectors; i++) {
-        vectors[0][i] = new DenseVector(cardinality);
-        buildVectorIncrementally(stats, i, vectors[0][i], false);
+    for (int i = 0; i < loop; i++) {
+      vectors[0][vIndex(i)] = new DenseVector(cardinality);
+      if (buildVectorIncrementally(stats, vIndex(i), vectors[0][vIndex(i)], false)) {
+        break;
       }
     }
-    printStats(stats, "Create (incrementally)", "DenseVector");
+    printStats(stats, CREATE_INCREMENTALLY, DENSE_VECTOR);
 
     stats = new TimingStatistics();
-    for (int l = 0; l < loop; l++) {
-      for (int i = 0; i < numVectors; i++) {
-        vectors[1][i] = new RandomAccessSparseVector(cardinality);
-        buildVectorIncrementally(stats, i, vectors[1][i], false);
-      }
-    }
-    printStats(stats, "Create (incrementally)", "RandSparseVector");
-
-//    stats = new TimingStatistics();
-//    for (int l = 0; l < loop; l++) {
-//      for (int i = 0; i < numVectors; i++) {
-//        vectors[2][i] = new SequentialAccessSparseVector(cardinality);
-//        buildVectorIncrementally(stats, i, vectors[2][i], false);
-//      }
-//    }
-//    printStats(stats, "Create (incrementally)", "SeqSparseVector");
-    
-    stats = new TimingStatistics();
-    for (int l = 0; l < loop; l++) {
-      for (int i = 0; i < numClusters; i++) {
-        clusters[i] = new RandomAccessSparseVector(cardinality);
-        buildVectorIncrementally(stats, i, clusters[i], false);
+    for (int i = 0; i < loop; i++) {
+      vectors[1][vIndex(i)] = new RandomAccessSparseVector(cardinality);
+      if (buildVectorIncrementally(stats, vIndex(i), vectors[1][vIndex(i)], false)) {
+        break;
       }
     }
-    printStats(stats, "Create (incrementally)", "Clusters");
-  }
-  
-  public void cloneBenchmark() {
-    TimingStatistics stats = new TimingStatistics();
-    for (int l = 0; l < loop; l++) {
-      for (int i = 0; i < numVectors; i++) {
-        TimingStatistics.Call call = stats.newCall();
-        vectors[0][i] = vectors[0][i].clone();
-        call.end();
-      }
-    }
-    printStats(stats, "Clone", "DenseVector");
-    
-    stats = new TimingStatistics();
-    for (int l = 0; l < loop; l++) {
-      for (int i = 0; i < numVectors; i++) {
-        TimingStatistics.Call call = stats.newCall();
-        vectors[1][i] = vectors[1][i].clone();
-        call.end();
-      }
-    }
-    printStats(stats, "Clone", "RandSparseVector");
-    
-    stats = new TimingStatistics();
-    for (int l = 0; l < loop; l++) {
-      for (int i = 0; i < numVectors; i++) {
-        TimingStatistics.Call call = stats.newCall();
-        vectors[2][i] = vectors[2][i].clone();
-        call.end();
-      }
-    }
-    printStats(stats, "Clone", "SeqSparseVector");
-    
-  }
-  
-  public void serializeBenchmark() throws IOException {
-    Configuration conf = new Configuration();
-    FileSystem fs = FileSystem.get(conf);
-    SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf,
-      new Path("/tmp/dense-vector"), IntWritable.class, VectorWritable.class);
+    printStats(stats, CREATE_INCREMENTALLY, RAND_SPARSE_VECTOR);
 
-    Writable one = new IntWritable(0);
-    VectorWritable vec = new VectorWritable();
-    TimingStatistics stats = new TimingStatistics();
-
-    try {
-      for (int l = 0; l < loop; l++) {
-        for (int i = 0; i < numVectors; i++) {
-          TimingStatistics.Call call = stats.newCall();
-          vec.set(vectors[0][i]);
-          writer.append(one, vec);
-          call.end();
-        }
-      }
-    } finally {
-      Closeables.closeQuietly(writer);
-    }
-    printStats(stats, "Serialize", "DenseVector");
-    
-    writer = new SequenceFile.Writer(fs, conf,
-      new Path("/tmp/randsparse-vector"), IntWritable.class, VectorWritable.class);
-    stats = new TimingStatistics();
-    try {
-      for (int l = 0; l < loop; l++) {
-        for (int i = 0; i < numVectors; i++) {
-          TimingStatistics.Call call = stats.newCall();
-          vec.set(vectors[1][i]);
-          writer.append(one, vec);
-          call.end();
-        }
-      }
-    } finally {
-      Closeables.closeQuietly(writer);
-    }
-    printStats(stats, "Serialize", "RandSparseVector");
-    
-    writer = new SequenceFile.Writer(fs, conf,
-      new Path("/tmp/seqsparse-vector"), IntWritable.class, VectorWritable.class);
     stats = new TimingStatistics();
-    try {
-      for (int l = 0; l < loop; l++) {
-        for (int i = 0; i < numVectors; i++) {
-          TimingStatistics.Call call = stats.newCall();
-          vec.set(vectors[2][i]);
-          writer.append(one, vec);
-          call.end();
-        }
+    for (int i = 0; i < loop; i++) {
+      vectors[2][vIndex(i)] = new SequentialAccessSparseVector(cardinality);
+      if (buildVectorIncrementally(stats, vIndex(i), vectors[2][vIndex(i)], false)) {
+        break;
       }
-    } finally {
-      Closeables.closeQuietly(writer);
     }
-    printStats(stats, "Serialize", "SeqSparseVector");
-    
-  }
-  
-  public void deserializeBenchmark() throws IOException {
-    doDeserializeBenchmark("DenseVector", "/tmp/dense-vector");
-    doDeserializeBenchmark("RandSparseVector", "/tmp/randsparse-vector");
-    doDeserializeBenchmark("SeqSparseVector", "/tmp/seqsparse-vector");
-  }
+    printStats(stats, CREATE_INCREMENTALLY, SEQ_SPARSE_VECTOR);
 
-  private void doDeserializeBenchmark(String name, String pathString) throws IOException {
-    TimingStatistics stats = new TimingStatistics();
-    TimingStatistics.Call call = stats.newCall();
-    Iterator<?> iterator = new SequenceFileValueIterator<Writable>(new Path(pathString), true, new Configuration());
-    while (iterator.hasNext()) {
-      iterator.next();
-      call.end();
-      call = stats.newCall();
-    }
-    printStats(stats, "Deserialize", name);
-  }
-  
-  public void dotBenchmark() {
-    double result = 0;
-    TimingStatistics stats = new TimingStatistics();
-    for (int l = 0; l < loop; l++) {
-      for (int i = 0; i < numVectors; i++) {
-        TimingStatistics.Call call = stats.newCall();
-        result += vectors[0][i].dot(vectors[0][(i + 1) % numVectors]);
-        call.end();
-      }
-    }
-    // print result to prevent hotspot from eliminating deadcode
-    printStats(stats, "DotProduct", "DenseVector", "sum = " + result + ' ');
-    result = 0;
-    stats = new TimingStatistics();
-    for (int l = 0; l < loop; l++) {
-      for (int i = 0; i < numVectors; i++) {
-        TimingStatistics.Call call = stats.newCall();
-        result += vectors[1][i].dot(vectors[1][(i + 1) % numVectors]);
-        call.end();
-      }
-    }
-    // print result to prevent hotspot from eliminating deadcode
-    printStats(stats, "DotProduct", "RandSparseVector", "sum = " + result + ' ');
-    result = 0;
     stats = new TimingStatistics();
-    for (int l = 0; l < loop; l++) {
-      for (int i = 0; i < numVectors; i++) {
-        TimingStatistics.Call call = stats.newCall();
-        result += vectors[2][i].dot(vectors[2][(i + 1) % numVectors]);
-        call.end();
+    for (int i = 0; i < loop; i++) {
+      clusters[cIndex(i)] = new RandomAccessSparseVector(cardinality);
+      if (buildVectorIncrementally(stats, vIndex(i), clusters[cIndex(i)], false)) {
+        break;
       }
     }
-    // print result to prevent hotspot from eliminating deadcode
-    printStats(stats, "DotProduct", "SeqSparseVector", "sum = " + result + ' ');
-    result = 0;
-    stats = new TimingStatistics();
-    for (int l = 0; l < loop; l++) {
-      for (int i = 0; i < numVectors; i++) {
-        TimingStatistics.Call call = stats.newCall();
-        result += vectors[0][i].dot(vectors[1][(i + 1) % numVectors]);
-        call.end();
-      }
-    }
-    // print result to prevent hotspot from eliminating deadcode
-    printStats(stats, "DotProduct", "Dense.fn(Rand)", "sum = " + result + ' ');
-    result = 0;
-    stats = new TimingStatistics();
-    for (int l = 0; l < loop; l++) {
-      for (int i = 0; i < numVectors; i++) {
-        TimingStatistics.Call call = stats.newCall();
-        result += vectors[0][i].dot(vectors[2][(i + 1) % numVectors]);
-        call.end();
-      }
-    }
-    // print result to prevent hotspot from eliminating deadcode
-    printStats(stats, "DotProduct", "Dense.fn(Seq)", "sum = " + result + ' ');
-    result = 0;
-    stats = new TimingStatistics();
-    for (int l = 0; l < loop; l++) {
-      for (int i = 0; i < numVectors; i++) {
-        TimingStatistics.Call call = stats.newCall();
-        result += vectors[1][i].dot(vectors[0][(i + 1) % numVectors]);
-        call.end();
-      }
-    }
-    // print result to prevent hotspot from eliminating deadcode
-    printStats(stats, "DotProduct", "Rand.fn(Dense)", "sum = " + result + ' ');
-    result = 0;
-    stats = new TimingStatistics();
-    for (int l = 0; l < loop; l++) {
-      for (int i = 0; i < numVectors; i++) {
-        TimingStatistics.Call call = stats.newCall();
-        result += vectors[1][i].dot(vectors[2][(i + 1) % numVectors]);
-        call.end();
-      }
-    }
-    // print result to prevent hotspot from eliminating deadcode
-    printStats(stats, "DotProduct", "Rand.fn(Seq)", "sum = " + result + ' ');
-    result = 0;
-    stats = new TimingStatistics();
-    for (int l = 0; l < loop; l++) {
-      for (int i = 0; i < numVectors; i++) {
-        TimingStatistics.Call call = stats.newCall();
-        result += vectors[2][i].dot(vectors[0][(i + 1) % numVectors]);
-        call.end();
-      }
-    }
-    // print result to prevent hotspot from eliminating deadcode
-    printStats(stats, "DotProduct", "Seq.fn(Dense)", "sum = " + result + ' ');
-    result = 0;
-    stats = new TimingStatistics();
-    for (int l = 0; l < loop; l++) {
-      for (int i = 0; i < numVectors; i++) {
-        TimingStatistics.Call call = stats.newCall();
-        result += vectors[2][i].dot(vectors[1][(i + 1) % numVectors]);
-        call.end();
-      }
-    }
-    // print result to prevent hotspot from eliminating deadcode
-    printStats(stats, "DotProduct", "Seq.fn(Rand)", "sum = " + result + ' ');
-
-
+    printStats(stats, CREATE_INCREMENTALLY, CLUSTERS);
   }
 
-
-  public void closestCentroidBenchmark(DistanceMeasure measure) {
-
-    for (int i = 0; i < numClusters; i++) {
-      for (int j = 0; j < numClusters; j++) {
-        double distance = Double.POSITIVE_INFINITY;
-        if (i != j) {
-          distance = measure.distance(clusters[i], clusters[j]);
-        }
-        clusterDistances.setQuick(i, j, distance);
-      }
-    }
-
-    long distanceCalculations = 0;
-    TimingStatistics stats = new TimingStatistics();
-    for (int l = 0; l < loop; l++) {
-      TimingStatistics.Call call = stats.newCall();
-      for (int i = 0; i < numVectors; i++) {
-        Vector vector = vectors[1][i];
-        double minDistance = Double.MAX_VALUE;
-        for (int k = 0; k < numClusters; k++) {
-          double distance = measure.distance(vector, clusters[k]);
-          distanceCalculations++;
-          if (distance < minDistance) {
-            minDistance = distance;
-          }
-        }
-      }
-      call.end();
-    }
-    printStats(stats,
-               measure.getClass().getName(),
-               "Closest center without Elkan's trick",
-               "distanceCalculations = " + distanceCalculations);
-
-
-    distanceCalculations = 0;
-    stats = new TimingStatistics();
-    Random rand = RandomUtils.getRandom();
-    //rand.setSeed(System.currentTimeMillis());
-    for (int l = 0; l < loop; l++) {
-      TimingStatistics.Call call = stats.newCall();
-      for (int i = 0; i < numVectors; i++) {
-        Vector vector = vectors[1][i];
-        int closestCentroid = rand.nextInt(numClusters);
-        double dist = measure.distance(vector, clusters[closestCentroid]);
-        distanceCalculations++;
-        for (int k = 0; k < numClusters; k++) {
-          if (closestCentroid != k) {
-            double centroidDist = clusterDistances.getQuick(k, closestCentroid);
-            if (centroidDist < 2 * dist) {
-              dist = measure.distance(vector, clusters[k]);
-              closestCentroid = k;
-              distanceCalculations++;
-            }
-          }
-        }
-      }
-      call.end();
-    }
-    printStats(stats,
-               measure.getClass().getName(),
-               "Closest center with Elkan's trick",
-               "distanceCalculations = " + distanceCalculations);
+  public int vIndex(int i) {
+    return i % numVectors;
   }
 
-  public void distanceMeasureBenchmark(DistanceMeasure measure) {
-    double result = 0;
-    TimingStatistics stats = new TimingStatistics();
-    for (int l = 0; l < loop; l++) {
-      for (int i = 0; i < numVectors; i++) {
-        TimingStatistics.Call call = stats.newCall();
-        double minDistance = Double.MAX_VALUE;
-        for (int u = 0; u < opsPerUnit; u++) {
-          double distance = measure.distance(vectors[0][i], vectors[0][u]);
-          if (distance < minDistance) {
-            minDistance = distance;
-          }
-        }
-        result += minDistance;
-        call.end();
-      }
-    }
-    // print result to prevent hotspot from eliminating deadcode
-    printStats(stats, measure.getClass().getName(), "DenseVector", "minDistance = " + result + ' ');
-    result = 0;
-    stats = new TimingStatistics();
-    for (int l = 0; l < loop; l++) {
-      for (int i = 0; i < numVectors; i++) {
-        TimingStatistics.Call call = stats.newCall();
-        double minDistance = Double.MAX_VALUE;
-        for (int u = 0; u < opsPerUnit; u++) {
-          double distance = measure.distance(vectors[1][i], vectors[1][u]);
-          if (distance < minDistance) {
-            minDistance = distance;
-          }
-        }
-        result += minDistance;
-        call.end();
-      }
-    }
-    // print result to prevent hotspot from eliminating deadcode
-    printStats(stats, measure.getClass().getName(), "RandSparseVector", "minDistance = " + result
-                                                                                + ' ');
-    result = 0;
-    stats = new TimingStatistics();
-    for (int l = 0; l < loop; l++) {
-      for (int i = 0; i < numVectors; i++) {
-        TimingStatistics.Call call = stats.newCall();
-        double minDistance = Double.MAX_VALUE;
-        for (int u = 0; u < opsPerUnit; u++) {
-          double distance = measure.distance(vectors[2][i], vectors[2][u]);
-          if (distance < minDistance) {
-            minDistance = distance;
-          }
-        }
-        result += minDistance;
-        call.end();
-      }
-    }
-    // print result to prevent hotspot from eliminating deadcode
-    printStats(stats, measure.getClass().getName(), "SeqSparseVector", "minDistance = " + result
-                                                                                    + ' ');
-    result = 0;
-    stats = new TimingStatistics();
-    for (int l = 0; l < loop; l++) {
-      for (int i = 0; i < numVectors; i++) {
-        TimingStatistics.Call call = stats.newCall();
-        double minDistance = Double.MAX_VALUE;
-        for (int u = 0; u < opsPerUnit; u++) {
-          double distance = measure.distance(vectors[0][i], vectors[1][u]);
-          if (distance < minDistance) {
-            minDistance = distance;
-          }
-        }
-        result += minDistance;
-        call.end();
-      }
-    }
-    // print result to prevent hotspot from eliminating deadcode
-    printStats(stats, measure.getClass().getName(), "Dense.fn(Rand)", "minDistance = " + result + ' ');
-    result = 0;
-    stats = new TimingStatistics();
-    for (int l = 0; l < loop; l++) {
-      for (int i = 0; i < numVectors; i++) {
-        TimingStatistics.Call call = stats.newCall();
-        double minDistance = Double.MAX_VALUE;
-        for (int u = 0; u < opsPerUnit; u++) {
-          double distance = measure.distance(vectors[0][i], vectors[2][u]);
-          if (distance < minDistance) {
-            minDistance = distance;
-          }
-        }
-        result += minDistance;
-        call.end();
-      }
-    }
-    // print result to prevent hotspot from eliminating deadcode
-    printStats(stats, measure.getClass().getName(), "Dense.fn(Seq)", "minDistance = " + result
-                                                                                + ' ');
-    result = 0;
-    stats = new TimingStatistics();
-    for (int l = 0; l < loop; l++) {
-      for (int i = 0; i < numVectors; i++) {
-        TimingStatistics.Call call = stats.newCall();
-        double minDistance = Double.MAX_VALUE;
-        for (int u = 0; u < opsPerUnit; u++) {
-          double distance = measure.distance(vectors[1][i], vectors[0][u]);
-          if (distance < minDistance) {
-            minDistance = distance;
-          }
-        }
-        result += minDistance;
-        call.end();
-      }
-    }
-    // print result to prevent hotspot from eliminating deadcode
-    printStats(stats, measure.getClass().getName(), "Rand.fn(Dense)", "minDistance = " + result
-                                                                                    + ' ');
-    stats = new TimingStatistics();
-    for (int l = 0; l < loop; l++) {
-      for (int i = 0; i < numVectors; i++) {
-        TimingStatistics.Call call = stats.newCall();
-        double minDistance = Double.MAX_VALUE;
-        for (int u = 0; u < opsPerUnit; u++) {
-          double distance = measure.distance(vectors[1][i], vectors[2][u]);
-          if (distance < minDistance) {
-            minDistance = distance;
-          }
-        }
-        result += minDistance;
-        call.end();
-      }
-    }
-    // print result to prevent hotspot from eliminating deadcode
-    printStats(stats, measure.getClass().getName(), "Rand.fn(Seq)", "minDistance = " + result + ' ');
-    result = 0;
-    stats = new TimingStatistics();
-    for (int l = 0; l < loop; l++) {
-      for (int i = 0; i < numVectors; i++) {
-        TimingStatistics.Call call = stats.newCall();
-        double minDistance = Double.MAX_VALUE;
-        for (int u = 0; u < opsPerUnit; u++) {
-          double distance = measure.distance(vectors[2][i], vectors[0][u]);
-          if (distance < minDistance) {
-            minDistance = distance;
-          }
-        }
-        result += minDistance;
-        call.end();
-      }
-    }
-    // print result to prevent hotspot from eliminating deadcode
-    printStats(stats, measure.getClass().getName(), "Seq.fn(Dense)", "minDistance = " + result
-                                                                                + ' ');
-    result = 0;
-    stats = new TimingStatistics();
-    for (int l = 0; l < loop; l++) {
-      for (int i = 0; i < numVectors; i++) {
-        TimingStatistics.Call call = stats.newCall();
-        double minDistance = Double.MAX_VALUE;
-        for (int u = 0; u < opsPerUnit; u++) {
-          double distance = measure.distance(vectors[2][i], vectors[1][u]);
-          if (distance < minDistance) {
-            minDistance = distance;
-          }
-        }
-        result += minDistance;
-        call.end();
-      }
-    }
-    // print result to prevent hotspot from eliminating deadcode
-    printStats(stats, measure.getClass().getName(), "Seq.fn(Rand)", "minDistance = " + result
-                                                                                    + ' ');
-    
+  public int cIndex(int i) {
+    return i % numClusters;
   }
-  
+
   public static void main(String[] args) throws IOException {
-    
     DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
     ArgumentBuilder abuilder = new ArgumentBuilder();
     GroupBuilder gbuilder = new GroupBuilder();
-    
-    Option vectorSizeOpt = obuilder.withLongName("vectorSize").withRequired(false).withArgument(
-      abuilder.withName("vs").withMinimum(1).withMaximum(1).create()).withDescription(
-      "Cardinality of the vector. Default 1000").withShortName("vs").create();
-    
-    Option vectorSparsityOpt = obuilder.withLongName("sparsity").withRequired(false).withArgument(
-      abuilder.withName("sp").withMinimum(1).withMaximum(1).create()).withDescription(
-      "Sparsity of the vector. Default 1000").withShortName("sp").create();
-    Option numVectorsOpt = obuilder.withLongName("numVectors").withRequired(false).withArgument(
-      abuilder.withName("nv").withMinimum(1).withMaximum(1).create()).withDescription(
-      "Number of Vectors to create. Default: 100").withShortName("nv").create();
-    Option numClustersOpt = obuilder.withLongName("numClusters").withRequired(false).withArgument(
-          abuilder.withName("vs").withMinimum(1).withMaximum(1).create()).withDescription(
-          "Number of Vectors to create. Default: 10").withShortName("vs").create();
-    Option loopOpt = obuilder.withLongName("loop").withRequired(false).withArgument(
-      abuilder.withName("loop").withMinimum(1).withMaximum(1).create()).withDescription(
-      "Number of times to loop. Default: 200").withShortName("l").create();
-    Option numOpsOpt = obuilder.withLongName("numOps").withRequired(false).withArgument(
-      abuilder.withName("numOps").withMinimum(1).withMaximum(1).create()).withDescription(
-      "Number of operations to do per timer. "
-          + "E.g In distance measure, the distance is calculated numOps times"
-          + " and the total time is measured. Default: 10").withShortName("no").create();
-    
+
+    Option vectorSizeOpt = obuilder
+        .withLongName("vectorSize")
+        .withRequired(false)
+        .withArgument(abuilder.withName("vs").withDefault(1000000).create())
+        .withDescription("Cardinality of the vector. Default: 1000000").withShortName("vs").create();
+    Option numNonZeroOpt = obuilder
+        .withLongName("numNonZero")
+        .withRequired(false)
+        .withArgument(abuilder.withName("nz").withDefault(1000).create())
+        .withDescription("Size of the vector. Default: 1000").withShortName("nz").create();
+    Option numVectorsOpt = obuilder
+        .withLongName("numVectors")
+        .withRequired(false)
+        .withArgument(abuilder.withName("nv").withDefault(25).create())
+        .withDescription("Number of Vectors to create. Default: 25").withShortName("nv").create();
+    Option numClustersOpt = obuilder
+        .withLongName("numClusters")
+        .withRequired(false)
+        .withArgument(abuilder.withName("nc").withDefault(25).create())
+        .withDescription("Number of clusters to create. Default: 25").withShortName("nc").create();
+    Option numOpsOpt = obuilder
+        .withLongName("numOps")
+        .withRequired(false)
+        .withArgument(abuilder.withName("numOps").withDefault(10).create())
+        .withDescription(
+            "Number of operations to do per timer. "
+                + "E.g In distance measure, the distance is calculated numOps times"
+                + " and the total time is measured. Default: 10").withShortName("no").create();
+
     Option helpOpt = DefaultOptionCreator.helpOption();
-    
-    Group group = gbuilder.withName("Options").withOption(vectorSizeOpt).withOption(vectorSparsityOpt)
-        .withOption(numVectorsOpt).withOption(loopOpt).withOption(numOpsOpt).withOption(helpOpt).create();
-    
+
+    Group group = gbuilder.withName("Options").withOption(vectorSizeOpt).withOption(numNonZeroOpt)
+        .withOption(numVectorsOpt).withOption(numOpsOpt).withOption(numClustersOpt).withOption(helpOpt).create();
+
     try {
       Parser parser = new Parser();
       parser.setGroup(group);
       CommandLine cmdLine = parser.parse(args);
-      
+
       if (cmdLine.hasOption(helpOpt)) {
-        CommandLineUtil.printHelp(group);
+        CommandLineUtil.printHelpWithGenericOptions(group);
         return;
       }
-      
-      int cardinality = 1000;
+
+      int cardinality = 1000000;
       if (cmdLine.hasOption(vectorSizeOpt)) {
         cardinality = Integer.parseInt((String) cmdLine.getValue(vectorSizeOpt));
-        
-      }    
-      
+
+      }
+
       int numClusters = 25;
       if (cmdLine.hasOption(numClustersOpt)) {
         numClusters = Integer.parseInt((String) cmdLine.getValue(numClustersOpt));
       }
 
-      int sparsity = 1000;
-      if (cmdLine.hasOption(vectorSparsityOpt)) {
-        sparsity = Integer.parseInt((String) cmdLine.getValue(vectorSparsityOpt));
+      int numNonZero = 1000;
+      if (cmdLine.hasOption(numNonZeroOpt)) {
+        numNonZero = Integer.parseInt((String) cmdLine.getValue(numNonZeroOpt));
       }
 
-      int numVectors = 100;
+      int numVectors = 25;
       if (cmdLine.hasOption(numVectorsOpt)) {
         numVectors = Integer.parseInt((String) cmdLine.getValue(numVectorsOpt));
-        
-      }
-      int loop = 200;
-      if (cmdLine.hasOption(loopOpt)) {
-        loop = Integer.parseInt((String) cmdLine.getValue(loopOpt));
-        
+
       }
+
       int numOps = 10;
       if (cmdLine.hasOption(numOpsOpt)) {
         numOps = Integer.parseInt((String) cmdLine.getValue(numOpsOpt));
-        
+
       }
-      VectorBenchmarks mark = new VectorBenchmarks(cardinality, sparsity, numVectors, numClusters, loop, numOps);
-      mark.createBenchmark();
-      mark.incrementalCreateBenchmark();
-      mark.cloneBenchmark();
-      mark.dotBenchmark();
-      mark.serializeBenchmark();
-      mark.deserializeBenchmark();
-      mark.distanceMeasureBenchmark(new CosineDistanceMeasure());
-      mark.distanceMeasureBenchmark(new SquaredEuclideanDistanceMeasure());
-      mark.distanceMeasureBenchmark(new EuclideanDistanceMeasure());
-      mark.distanceMeasureBenchmark(new ManhattanDistanceMeasure());
-      mark.distanceMeasureBenchmark(new TanimotoDistanceMeasure());
-      
-      mark.closestCentroidBenchmark(new CosineDistanceMeasure());
-      mark.closestCentroidBenchmark(new SquaredEuclideanDistanceMeasure());
-      mark.closestCentroidBenchmark(new EuclideanDistanceMeasure());
-      mark.closestCentroidBenchmark(new ManhattanDistanceMeasure());
-      mark.closestCentroidBenchmark(new TanimotoDistanceMeasure());
-      
+      VectorBenchmarks mark = new VectorBenchmarks(cardinality, numNonZero, numVectors, numClusters, numOps);
+      runBenchmark(mark);
+
       log.info("\n{}", mark);
     } catch (OptionException e) {
       CommandLineUtil.printHelp(group);
     }
-    
   }
-  
+
+  private static void runBenchmark(VectorBenchmarks mark) throws IOException {
+    // Required to set up data.
+    mark.createData();
+
+    mark.createBenchmark();
+    if (mark.cardinality < 200000) {
+      // Too slow.
+      mark.incrementalCreateBenchmark();
+    }
+
+    new CloneBenchmark(mark).benchmark();
+    new DotBenchmark(mark).benchmark();
+    new PlusBenchmark(mark).benchmark();
+    new MinusBenchmark(mark).benchmark();
+    new TimesBenchmark(mark).benchmark();
+    new SerializationBenchmark(mark).benchmark();
+
+    DistanceBenchmark distanceBenchmark = new DistanceBenchmark(mark);
+    distanceBenchmark.benchmark(new CosineDistanceMeasure());
+    distanceBenchmark.benchmark(new SquaredEuclideanDistanceMeasure());
+    distanceBenchmark.benchmark(new EuclideanDistanceMeasure());
+    distanceBenchmark.benchmark(new ManhattanDistanceMeasure());
+    distanceBenchmark.benchmark(new TanimotoDistanceMeasure());
+    distanceBenchmark.benchmark(new ChebyshevDistanceMeasure());
+    distanceBenchmark.benchmark(new MinkowskiDistanceMeasure());
+
+    ClosestCentroidBenchmark centroidBenchmark = new ClosestCentroidBenchmark(mark);
+    centroidBenchmark.benchmark(new CosineDistanceMeasure());
+    centroidBenchmark.benchmark(new SquaredEuclideanDistanceMeasure());
+    centroidBenchmark.benchmark(new EuclideanDistanceMeasure());
+    centroidBenchmark.benchmark(new ManhattanDistanceMeasure());
+    centroidBenchmark.benchmark(new TanimotoDistanceMeasure());
+    centroidBenchmark.benchmark(new ChebyshevDistanceMeasure());
+    centroidBenchmark.benchmark(new MinkowskiDistanceMeasure());
+  }
+
   @Override
   public String toString() {
     int pad = 24;
@@ -824,7 +432,7 @@ public class VectorBenchmarks {
       for (String[] stat : implTokenizedStats) {
         maxStats = Math.max(maxStats, stat.length);
       }
-      
+
       for (int i = 0; i < maxStats; i++) {
         boolean printedName = false;
         for (String[] stats : implTokenizedStats) {
@@ -848,5 +456,8 @@ public class VectorBenchmarks {
     }
     return sb.toString();
   }
-  
+
+  public BenchmarkRunner getRunner() {
+    return runner;
+  }
 }



Mime
View raw message