Return-Path: X-Original-To: apmail-mahout-commits-archive@www.apache.org Delivered-To: apmail-mahout-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 91228F951 for ; Thu, 18 Apr 2013 19:06:54 +0000 (UTC) Received: (qmail 32428 invoked by uid 500); 18 Apr 2013 19:06:54 -0000 Delivered-To: apmail-mahout-commits-archive@mahout.apache.org Received: (qmail 32306 invoked by uid 500); 18 Apr 2013 19:06:53 -0000 Mailing-List: contact commits-help@mahout.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@mahout.apache.org Delivered-To: mailing list commits@mahout.apache.org Received: (qmail 32287 invoked by uid 99); 18 Apr 2013 19:06:53 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 18 Apr 2013 19:06:53 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 18 Apr 2013 19:06:49 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id B7EBC2388847; Thu, 18 Apr 2013 19:06:28 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1469528 - in /mahout/trunk: core/src/main/java/org/apache/mahout/common/ integration/src/main/java/org/apache/mahout/benchmark/ Date: Thu, 18 Apr 2013 19:06:28 -0000 To: commits@mahout.apache.org From: robinanil@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20130418190628.B7EBC2388847@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: robinanil Date: Thu Apr 18 19:06:27 2013 New Revision: 1469528 URL: http://svn.apache.org/r1469528 Log: MAHOUT-1191 Cleans Up vector benchmarks to be faster and more consistent. Cannot compare the values across earlier versions of this code Added: mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/BenchmarkRunner.java mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/CloneBenchmark.java mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/ClosestCentroidBenchmark.java mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/DistanceBenchmark.java mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/DotBenchmark.java mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/MinusBenchmark.java mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/PlusBenchmark.java mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/SerializationBenchmark.java mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/TimesBenchmark.java Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/TimingStatistics.java mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/TimingStatistics.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/TimingStatistics.java?rev=1469528&r1=1469527&r2=1469528&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/common/TimingStatistics.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/common/TimingStatistics.java Thu Apr 18 19:06:27 2013 @@ -18,18 +18,21 @@ package org.apache.mahout.common; import java.io.Serializable; +import java.text.DecimalFormat; public final class TimingStatistics implements Serializable { - + private static final DecimalFormat DF = new DecimalFormat("#.##"); private int nCalls; private long minTime; private long maxTime; private long sumTime; + private long leadSumTime; private double sumSquaredTime; - + + /** Creates a new instance of CallStats */ public TimingStatistics() { } - + public TimingStatistics(int nCalls, long minTime, long maxTime, long sumTime, double sumSquaredTime) { this.nCalls = nCalls; this.minTime = minTime; @@ -37,31 +40,31 @@ public final class TimingStatistics impl this.sumTime = sumTime; this.sumSquaredTime = sumSquaredTime; } - + public synchronized int getNCalls() { return nCalls; } - + public synchronized long getMinTime() { return Math.max(0, minTime); } - + public synchronized long getMaxTime() { return maxTime; } - + public synchronized long getSumTime() { return sumTime; } - + public synchronized double getSumSquaredTime() { return sumSquaredTime; } - + public synchronized long getMeanTime() { return nCalls == 0 ? 0 : sumTime / nCalls; } - + public synchronized long getStdDevTime() { if (nCalls == 0) { return 0; @@ -75,24 +78,59 @@ public final class TimingStatistics impl } return (long) Math.sqrt(variance); } - + @Override public synchronized String toString() { - return '\n' + "nCalls = " + nCalls + ";\n" + "sum = " + sumTime / 1000000000.0 + "s;\n" - + "min = " + minTime / 1000000.0 + "ms;\n" + "max = " + maxTime / 1000000.0 + "ms;\n" - + "mean = " + getMeanTime() / 1000000.0 + "ms;\n" + "stdDev = " + getStdDevTime() - / 1000000.0 + "ms;"; + return '\n' + + "nCalls = " + nCalls + ";\n" + + "sum = " + DF.format(sumTime / 1000000000.0) + "s;\n" + + "min = " + DF.format(minTime / 1000000.0) + "ms;\n" + + "max = " + DF.format(maxTime / 1000000.0) + "ms;\n" + + "mean = " + DF.format(getMeanTime() / 1000.0) + "us;\n" + + "stdDev = " + DF.format(getStdDevTime() / 1000.0) + "us;"; } - + public Call newCall() { return new Call(); } - - public final class Call { - private final long startTime = System.nanoTime(); - + + /** Ignores counting the performance metrics until leadTimeIsFinished The caller should enough time for the JIT to warm up. */ + public Call newCall(long leadTimeUsec) { + if (leadSumTime > leadTimeUsec) { + return new Call(); + } else { + return new LeadTimeCall(); + } + } + + /** Ignores counting the performance metrics. The caller should enough time for the JIT to warm up. */ + public class LeadTimeCall extends Call { + + private LeadTimeCall() { } + + @Override + public void end() { + long elapsed = System.nanoTime() - startTime; + synchronized (TimingStatistics.this) { + leadSumTime += elapsed; + } + } + + @Override + public boolean end(long sumMaxUsec) { + end(); + return false; + } + } + + /** + * A call object that can update performance metrics. + */ + public class Call { + protected final long startTime = System.nanoTime(); + private Call() { } - + public void end() { long elapsed = System.nanoTime() - startTime; synchronized (TimingStatistics.this) { @@ -107,5 +145,13 @@ public final class TimingStatistics impl sumSquaredTime += elapsed * elapsed; } } + + /** + * Returns true if the sumTime as reached this limit; + */ + public boolean end(long sumMaxUsec) { + end(); + return sumMaxUsec < sumTime; + } } } Added: mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/BenchmarkRunner.java URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/BenchmarkRunner.java?rev=1469528&view=auto ============================================================================== --- mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/BenchmarkRunner.java (added) +++ mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/BenchmarkRunner.java Thu Apr 18 19:06:27 2013 @@ -0,0 +1,94 @@ +package org.apache.mahout.benchmark; + +import java.util.Random; +import java.util.concurrent.TimeUnit; + +import org.apache.mahout.common.RandomUtils; +import org.apache.mahout.common.TimingStatistics; +import org.apache.mahout.math.Vector; + +import com.google.common.base.Function; + +public final class BenchmarkRunner { + private static final int BUCKET_SIZE = 10000; + private static final Random R = RandomUtils.getRandom(); + private final long maxTimeUsec; + private final long leadTimeUsec; + + public BenchmarkRunner(long leadTimeMs, long maxTimeMs) { + maxTimeUsec = TimeUnit.MILLISECONDS.toNanos(maxTimeMs); + leadTimeUsec = TimeUnit.MILLISECONDS.toNanos(leadTimeMs); + } + + public static abstract class BenchmarkFn implements Function { + protected int randIndex() { + return BenchmarkRunner.randIndex(); + } + + protected boolean randBool() { + return BenchmarkRunner.randBool(); + } + + /** + * Adds a random data dependency so that JVM does not remove dead code. + */ + protected boolean depends(Vector v) { + return randIndex() < v.getNumNondefaultElements(); + } + } + + public static abstract class BenchmarkFnD implements Function { + protected int randIndex() { + return BenchmarkRunner.randIndex(); + } + + protected boolean randBool() { + return BenchmarkRunner.randBool(); + } + + /** + * Adds a random data dependency so that JVM does not remove dead code. + */ + protected boolean depends(Vector v) { + return randIndex() < v.getNumNondefaultElements(); + } + } + + private static int randIndex() { + return R.nextInt(BUCKET_SIZE); + } + + private static boolean randBool() { + return R.nextBoolean(); + } + + public TimingStatistics benchmark(BenchmarkFn function) { + TimingStatistics stats = new TimingStatistics(); + boolean result = false; + while (true) { + int i = R.nextInt(BUCKET_SIZE); + TimingStatistics.Call call = stats.newCall(leadTimeUsec); + result = result ^ function.apply(i); + if (call.end(maxTimeUsec)) { + break; + } + } + return stats; + } + + public TimingStatistics benchmarkD(BenchmarkFnD function) { + TimingStatistics stats = new TimingStatistics(); + double result = 0; + while (true) { + int i = R.nextInt(BUCKET_SIZE); + TimingStatistics.Call call = stats.newCall(leadTimeUsec); + result += function.apply(i); + if (call.end(maxTimeUsec)) { + break; + } + } + // print result to prevent hotspot from eliminating deadcode + System.err.println("Result = " + result); + return stats; + } +} Added: mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/CloneBenchmark.java URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/CloneBenchmark.java?rev=1469528&view=auto ============================================================================== --- mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/CloneBenchmark.java (added) +++ mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/CloneBenchmark.java Thu Apr 18 19:06:27 2013 @@ -0,0 +1,45 @@ +package org.apache.mahout.benchmark; + +import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_VECTOR; +import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_SPARSE_VECTOR; +import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_SPARSE_VECTOR; + +import org.apache.mahout.benchmark.BenchmarkRunner.BenchmarkFn; + +public class CloneBenchmark { + public static final String CLONE = "Clone"; + private final VectorBenchmarks mark; + + public CloneBenchmark(VectorBenchmarks mark) { + this.mark = mark; + } + + public void benchmark() { + mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() { + @Override + public Boolean apply(Integer i) { + mark.vectors[0][mark.vIndex(i)] = mark.vectors[0][mark.vIndex(i)].clone(); + + return depends(mark.vectors[0][mark.vIndex(i)]); + } + }), CLONE, DENSE_VECTOR); + + mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() { + @Override + public Boolean apply(Integer i) { + mark.vectors[1][mark.vIndex(i)] = mark.vectors[1][mark.vIndex(i)].clone(); + + return depends(mark.vectors[1][mark.vIndex(i)]); + } + }), CLONE, RAND_SPARSE_VECTOR); + + mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() { + @Override + public Boolean apply(Integer i) { + mark.vectors[2][mark.vIndex(i)] = mark.vectors[2][mark.vIndex(i)].clone(); + + return depends(mark.vectors[2][mark.vIndex(i)]); + } + }), CLONE, SEQ_SPARSE_VECTOR); + } +} Added: mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/ClosestCentroidBenchmark.java URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/ClosestCentroidBenchmark.java?rev=1469528&view=auto ============================================================================== --- mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/ClosestCentroidBenchmark.java (added) +++ mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/ClosestCentroidBenchmark.java Thu Apr 18 19:06:27 2013 @@ -0,0 +1,83 @@ +package org.apache.mahout.benchmark; + +import java.io.IOException; +import java.util.Random; + +import org.apache.mahout.common.RandomUtils; +import org.apache.mahout.common.TimingStatistics; +import org.apache.mahout.common.distance.DistanceMeasure; +import org.apache.mahout.math.SparseMatrix; +import org.apache.mahout.math.Vector; + +public class ClosestCentroidBenchmark { + public static final String SERIALIZE = "Serialize"; + public static final String DESERIALIZE = "Deserialize"; + private final VectorBenchmarks mark; + + public ClosestCentroidBenchmark(VectorBenchmarks mark) { + this.mark = mark; + } + + public void benchmark(DistanceMeasure measure) throws IOException { + SparseMatrix clusterDistances = new SparseMatrix(mark.numClusters, mark.numClusters); + for (int i = 0; i < mark.numClusters; i++) { + for (int j = 0; j < mark.numClusters; j++) { + double distance = Double.POSITIVE_INFINITY; + if (i != j) { + distance = measure.distance(mark.clusters[i], mark.clusters[j]); + } + clusterDistances.setQuick(i, j, distance); + } + } + + long distanceCalculations = 0; + TimingStatistics stats = new TimingStatistics(); + for (int l = 0; l < mark.loop; l++) { + TimingStatistics.Call call = stats.newCall(mark.leadTimeUsec); + for (int i = 0; i < mark.numVectors; i++) { + Vector vector = mark.vectors[1][mark.vIndex(i)]; + double minDistance = Double.MAX_VALUE; + for (int k = 0; k < mark.numClusters; k++) { + double distance = measure.distance(vector, mark.clusters[k]); + distanceCalculations++; + if (distance < minDistance) { + minDistance = distance; + } + } + } + if (call.end(mark.maxTimeUsec)) { + break; + } + } + mark.printStats(stats, measure.getClass().getName(), "Closest C w/o Elkan's trick", "distanceCalculations = " + + distanceCalculations); + + distanceCalculations = 0; + stats = new TimingStatistics(); + Random rand = RandomUtils.getRandom(); + for (int l = 0; l < mark.loop; l++) { + TimingStatistics.Call call = stats.newCall(mark.leadTimeUsec); + for (int i = 0; i < mark.numVectors; i++) { + Vector vector = mark.vectors[1][mark.vIndex(i)]; + int closestCentroid = rand.nextInt(mark.numClusters); + double dist = measure.distance(vector, mark.clusters[closestCentroid]); + distanceCalculations++; + for (int k = 0; k < mark.numClusters; k++) { + if (closestCentroid != k) { + double centroidDist = clusterDistances.getQuick(k, closestCentroid); + if (centroidDist < 2 * dist) { + dist = measure.distance(vector, mark.clusters[k]); + closestCentroid = k; + distanceCalculations++; + } + } + } + } + if (call.end(mark.maxTimeUsec)) { + break; + } + } + mark.printStats(stats, measure.getClass().getName(), "Closest C w/ Elkan's trick", "distanceCalculations = " + + distanceCalculations); + } +} Added: mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/DistanceBenchmark.java URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/DistanceBenchmark.java?rev=1469528&view=auto ============================================================================== --- mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/DistanceBenchmark.java (added) +++ mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/DistanceBenchmark.java Thu Apr 18 19:06:27 2013 @@ -0,0 +1,87 @@ +package org.apache.mahout.benchmark; + +import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_FN_RAND; +import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_FN_SEQ; +import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_VECTOR; +import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_FN_DENSE; +import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_FN_SEQ; +import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_SPARSE_VECTOR; +import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_FN_DENSE; +import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_FN_RAND; +import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_SPARSE_VECTOR; + +import org.apache.mahout.benchmark.BenchmarkRunner.BenchmarkFnD; +import org.apache.mahout.common.distance.DistanceMeasure; + +public class DistanceBenchmark { + private final VectorBenchmarks mark; + + public DistanceBenchmark(VectorBenchmarks mark) { + this.mark = mark; + } + + public void benchmark(final DistanceMeasure measure) { + mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() { + @Override + public Double apply(Integer i) { + return measure.distance(mark.vectors[0][mark.vIndex(i)], mark.vectors[0][mark.vIndex(randIndex())]); + } + }), measure.getClass().getName(), DENSE_VECTOR); + + mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() { + @Override + public Double apply(Integer i) { + return measure.distance(mark.vectors[1][mark.vIndex(i)], mark.vectors[1][mark.vIndex(randIndex())]); + } + }), measure.getClass().getName(), RAND_SPARSE_VECTOR); + + mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() { + @Override + public Double apply(Integer i) { + return measure.distance(mark.vectors[2][mark.vIndex(i)], mark.vectors[2][mark.vIndex(randIndex())]); + } + }), measure.getClass().getName(), SEQ_SPARSE_VECTOR); + + mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() { + @Override + public Double apply(Integer i) { + return measure.distance(mark.vectors[0][mark.vIndex(i)], mark.vectors[1][mark.vIndex(randIndex())]); + } + }), measure.getClass().getName(), DENSE_FN_RAND); + + mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() { + @Override + public Double apply(Integer i) { + return measure.distance(mark.vectors[0][mark.vIndex(i)], mark.vectors[2][mark.vIndex(randIndex())]); + } + }), measure.getClass().getName(), DENSE_FN_SEQ); + + mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() { + @Override + public Double apply(Integer i) { + return measure.distance(mark.vectors[1][mark.vIndex(i)], mark.vectors[0][mark.vIndex(randIndex())]); + } + }), measure.getClass().getName(), RAND_FN_DENSE); + + mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() { + @Override + public Double apply(Integer i) { + return measure.distance(mark.vectors[1][mark.vIndex(i)], mark.vectors[2][mark.vIndex(randIndex())]); + } + }), measure.getClass().getName(), RAND_FN_SEQ); + + mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() { + @Override + public Double apply(Integer i) { + return measure.distance(mark.vectors[2][mark.vIndex(i)], mark.vectors[0][mark.vIndex(randIndex())]); + } + }), measure.getClass().getName(), SEQ_FN_DENSE); + + mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() { + @Override + public Double apply(Integer i) { + return measure.distance(mark.vectors[2][mark.vIndex(i)], mark.vectors[1][mark.vIndex(randIndex())]); + } + }), measure.getClass().getName(), SEQ_FN_RAND); + } +} Added: mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/DotBenchmark.java URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/DotBenchmark.java?rev=1469528&view=auto ============================================================================== --- mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/DotBenchmark.java (added) +++ mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/DotBenchmark.java Thu Apr 18 19:06:27 2013 @@ -0,0 +1,142 @@ +package org.apache.mahout.benchmark; + +import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_FN_RAND; +import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_FN_SEQ; +import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_VECTOR; +import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_FN_DENSE; +import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_FN_SEQ; +import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_SPARSE_VECTOR; +import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_FN_DENSE; +import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_FN_RAND; +import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_SPARSE_VECTOR; + +import org.apache.mahout.benchmark.BenchmarkRunner.BenchmarkFn; +import org.apache.mahout.benchmark.BenchmarkRunner.BenchmarkFnD; + +public class DotBenchmark { + private static final String DOT_PRODUCT = "DotProduct"; + private static final String NORM1 = "Norm1"; + private static final String LOG_NORMALIZE = "LogNormalize"; + private final VectorBenchmarks mark; + + public DotBenchmark(VectorBenchmarks mark) { + this.mark = mark; + } + + public void benchmark() { + benchmarkDot(); + benchmarkNorm1(); + benchmarkLogNormalize(); + } + + private void benchmarkLogNormalize() { + mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() { + @Override + public Boolean apply(Integer i) { + return depends(mark.vectors[0][mark.vIndex(i)].logNormalize()); + } + }), LOG_NORMALIZE, DENSE_VECTOR); + + mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() { + @Override + public Boolean apply(Integer i) { + return depends(mark.vectors[1][mark.vIndex(i)].logNormalize()); + } + }), LOG_NORMALIZE, RAND_SPARSE_VECTOR); + + mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() { + @Override + public Boolean apply(Integer i) { + return depends(mark.vectors[2][mark.vIndex(i)].logNormalize()); + } + }), LOG_NORMALIZE, SEQ_SPARSE_VECTOR); + } + + private void benchmarkNorm1() { + mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() { + @Override + public Double apply(Integer i) { + return mark.vectors[0][mark.vIndex(i)].norm(1); + } + }), NORM1, DENSE_VECTOR); + + mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() { + @Override + public Double apply(Integer i) { + return mark.vectors[1][mark.vIndex(i)].norm(1); + } + }), NORM1, RAND_SPARSE_VECTOR); + + mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() { + @Override + public Double apply(Integer i) { + return mark.vectors[2][mark.vIndex(i)].norm(1); + } + }), NORM1, SEQ_SPARSE_VECTOR); + } + + private void benchmarkDot() { + mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() { + @Override + public Double apply(Integer i) { + return mark.vectors[0][mark.vIndex(i)].dot(mark.vectors[0][mark.vIndex(randIndex())]); + } + }), DOT_PRODUCT, DENSE_VECTOR); + + mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() { + @Override + public Double apply(Integer i) { + return mark.vectors[1][mark.vIndex(i)].dot(mark.vectors[1][mark.vIndex(randIndex())]); + } + }), DOT_PRODUCT, RAND_SPARSE_VECTOR); + + mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() { + @Override + public Double apply(Integer i) { + return mark.vectors[2][mark.vIndex(i)].dot(mark.vectors[2][mark.vIndex(randIndex())]); + } + }), DOT_PRODUCT, SEQ_SPARSE_VECTOR); + + mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() { + @Override + public Double apply(Integer i) { + return mark.vectors[0][mark.vIndex(i)].dot(mark.vectors[1][mark.vIndex(randIndex())]); + } + }), DOT_PRODUCT, DENSE_FN_RAND); + + mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() { + @Override + public Double apply(Integer i) { + return mark.vectors[0][mark.vIndex(i)].dot(mark.vectors[2][mark.vIndex(randIndex())]); + } + }), DOT_PRODUCT, DENSE_FN_SEQ); + + mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() { + @Override + public Double apply(Integer i) { + return mark.vectors[1][mark.vIndex(i)].dot(mark.vectors[0][mark.vIndex(randIndex())]); + } + }), DOT_PRODUCT, RAND_FN_DENSE); + + mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() { + @Override + public Double apply(Integer i) { + return mark.vectors[1][mark.vIndex(i)].dot(mark.vectors[2][mark.vIndex(randIndex())]); + } + }), DOT_PRODUCT, RAND_FN_SEQ); + + mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() { + @Override + public Double apply(Integer i) { + return mark.vectors[2][mark.vIndex(i)].dot(mark.vectors[0][mark.vIndex(randIndex())]); + } + }), DOT_PRODUCT, SEQ_FN_DENSE); + + mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() { + @Override + public Double apply(Integer i) { + return mark.vectors[2][mark.vIndex(i)].dot(mark.vectors[1][mark.vIndex(randIndex())]); + } + }), DOT_PRODUCT, SEQ_FN_RAND); + } +} Added: mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/MinusBenchmark.java URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/MinusBenchmark.java?rev=1469528&view=auto ============================================================================== --- mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/MinusBenchmark.java (added) +++ mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/MinusBenchmark.java Thu Apr 18 19:06:27 2013 @@ -0,0 +1,98 @@ +package org.apache.mahout.benchmark; + +import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_FN_RAND; +import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_FN_SEQ; +import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_VECTOR; +import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_FN_DENSE; +import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_FN_SEQ; +import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_SPARSE_VECTOR; +import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_FN_DENSE; +import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_FN_RAND; +import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_SPARSE_VECTOR; + +import org.apache.mahout.benchmark.BenchmarkRunner.BenchmarkFn; +import org.apache.mahout.math.Vector; + +public class MinusBenchmark { + + private static final String MINUS = "Minus"; + private final VectorBenchmarks mark; + + public MinusBenchmark(VectorBenchmarks mark) { + this.mark = mark; + } + + public void benchmark() { + mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() { + @Override + public Boolean apply(Integer i) { + Vector v = mark.vectors[0][mark.vIndex(i)].minus(mark.vectors[0][mark.vIndex(randIndex())]); + return depends(v); + } + }), MINUS, DENSE_VECTOR); + + mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() { + @Override + public Boolean apply(Integer i) { + Vector v = mark.vectors[1][mark.vIndex(i)].minus(mark.vectors[1][mark.vIndex(randIndex())]); + return depends(v); + } + }), MINUS, RAND_SPARSE_VECTOR); + + mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() { + @Override + public Boolean apply(Integer i) { + Vector v = mark.vectors[2][mark.vIndex(i)].minus(mark.vectors[2][mark.vIndex(randIndex())]); + return depends(v); + } + }), MINUS, SEQ_SPARSE_VECTOR); + + mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() { + @Override + public Boolean apply(Integer i) { + Vector v = mark.vectors[0][mark.vIndex(i)].minus(mark.vectors[1][mark.vIndex(randIndex())]); + return depends(v); + } + }), MINUS, DENSE_FN_RAND); + + mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() { + @Override + public Boolean apply(Integer i) { + Vector v = mark.vectors[0][mark.vIndex(i)].minus(mark.vectors[2][mark.vIndex(randIndex())]); + return depends(v); + } + }), MINUS, DENSE_FN_SEQ); + + mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() { + @Override + public Boolean apply(Integer i) { + Vector v = mark.vectors[1][mark.vIndex(i)].minus(mark.vectors[0][mark.vIndex(randIndex())]); + return depends(v); + } + }), MINUS, RAND_FN_DENSE); + + mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() { + @Override + public Boolean apply(Integer i) { + Vector v = mark.vectors[1][mark.vIndex(i)].minus(mark.vectors[2][mark.vIndex(randIndex())]); + return depends(v); + } + }), MINUS, RAND_FN_SEQ); + + mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() { + @Override + public Boolean apply(Integer i) { + Vector v = mark.vectors[2][mark.vIndex(i)].minus(mark.vectors[0][mark.vIndex(randIndex())]); + return depends(v); + } + }), MINUS, SEQ_FN_DENSE); + + mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() { + @Override + public Boolean apply(Integer i) { + Vector v = mark.vectors[2][mark.vIndex(i)].minus(mark.vectors[1][mark.vIndex(randIndex())]); + return depends(v); + } + }), MINUS, SEQ_FN_RAND); + } +} Added: mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/PlusBenchmark.java URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/PlusBenchmark.java?rev=1469528&view=auto ============================================================================== --- mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/PlusBenchmark.java (added) +++ mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/PlusBenchmark.java Thu Apr 18 19:06:27 2013 @@ -0,0 +1,98 @@ +package org.apache.mahout.benchmark; + +import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_FN_RAND; +import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_FN_SEQ; +import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_VECTOR; +import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_FN_DENSE; +import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_FN_SEQ; +import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_SPARSE_VECTOR; +import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_FN_DENSE; +import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_FN_RAND; +import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_SPARSE_VECTOR; + +import org.apache.mahout.benchmark.BenchmarkRunner.BenchmarkFn; +import org.apache.mahout.math.Vector; + +public class PlusBenchmark { + + private static final String PLUS = "Plus"; + private final VectorBenchmarks mark; + + public PlusBenchmark(VectorBenchmarks mark) { + this.mark = mark; + } + + public void benchmark() { + mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() { + @Override + public Boolean apply(Integer i) { + Vector v = mark.vectors[0][mark.vIndex(i)].plus(mark.vectors[0][mark.vIndex(randIndex())]); + return depends(v); + } + }), PLUS, DENSE_VECTOR); + + mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() { + @Override + public Boolean apply(Integer i) { + Vector v = mark.vectors[1][mark.vIndex(i)].plus(mark.vectors[1][mark.vIndex(randIndex())]); + return depends(v); + } + }), PLUS, RAND_SPARSE_VECTOR); + + mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() { + @Override + public Boolean apply(Integer i) { + Vector v = mark.vectors[2][mark.vIndex(i)].plus(mark.vectors[2][mark.vIndex(randIndex())]); + return depends(v); + } + }), PLUS, SEQ_SPARSE_VECTOR); + + mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() { + @Override + public Boolean apply(Integer i) { + Vector v = mark.vectors[0][mark.vIndex(i)].plus(mark.vectors[1][mark.vIndex(randIndex())]); + return depends(v); + } + }), PLUS, DENSE_FN_RAND); + + mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() { + @Override + public Boolean apply(Integer i) { + Vector v = mark.vectors[0][mark.vIndex(i)].plus(mark.vectors[2][mark.vIndex(randIndex())]); + return depends(v); + } + }), PLUS, DENSE_FN_SEQ); + + mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() { + @Override + public Boolean apply(Integer i) { + Vector v = mark.vectors[1][mark.vIndex(i)].plus(mark.vectors[0][mark.vIndex(randIndex())]); + return depends(v); + } + }), PLUS, RAND_FN_DENSE); + + mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() { + @Override + public Boolean apply(Integer i) { + Vector v = mark.vectors[1][mark.vIndex(i)].plus(mark.vectors[2][mark.vIndex(randIndex())]); + return depends(v); + } + }), PLUS, RAND_FN_SEQ); + + mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() { + @Override + public Boolean apply(Integer i) { + Vector v = mark.vectors[2][mark.vIndex(i)].plus(mark.vectors[0][mark.vIndex(randIndex())]); + return depends(v); + } + }), PLUS, SEQ_FN_DENSE); + + mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() { + @Override + public Boolean apply(Integer i) { + Vector v = mark.vectors[2][mark.vIndex(i)].plus(mark.vectors[1][mark.vIndex(randIndex())]); + return depends(v); + } + }), PLUS, SEQ_FN_RAND); + } +} Added: mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/SerializationBenchmark.java URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/SerializationBenchmark.java?rev=1469528&view=auto ============================================================================== --- mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/SerializationBenchmark.java (added) +++ mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/SerializationBenchmark.java Thu Apr 18 19:06:27 2013 @@ -0,0 +1,115 @@ +package org.apache.mahout.benchmark; + +import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_VECTOR; +import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_SPARSE_VECTOR; +import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_SPARSE_VECTOR; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.io.Writable; +import org.apache.mahout.common.TimingStatistics; +import org.apache.mahout.common.iterator.sequencefile.SequenceFileValueIterator; +import org.apache.mahout.math.VectorWritable; + +import com.google.common.io.Closeables; + +public class SerializationBenchmark { + public static final String SERIALIZE = "Serialize"; + public static final String DESERIALIZE = "Deserialize"; + private final VectorBenchmarks mark; + + public SerializationBenchmark(VectorBenchmarks mark) { + this.mark = mark; + } + + public void benchmark() throws IOException { + serializeBenchmark(); + deserializeBenchmark(); + } + + public void serializeBenchmark() throws IOException { + Configuration conf = new Configuration(); + FileSystem fs = FileSystem.get(conf); + SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, new Path("/tmp/dense-vector"), IntWritable.class, + VectorWritable.class); + + Writable one = new IntWritable(0); + VectorWritable vec = new VectorWritable(); + TimingStatistics stats = new TimingStatistics(); + + try { + for (int i = 0; i < mark.loop; i++) { + TimingStatistics.Call call = stats.newCall(mark.leadTimeUsec); + vec.set(mark.vectors[0][mark.vIndex(i)]); + writer.append(one, vec); + if (call.end(mark.maxTimeUsec)) { + break; + } + } + } finally { + Closeables.close(writer, true); + } + mark.printStats(stats, SERIALIZE, DENSE_VECTOR); + + writer = new SequenceFile.Writer(fs, conf, new Path("/tmp/randsparse-vector"), IntWritable.class, + VectorWritable.class); + stats = new TimingStatistics(); + try { + for (int i = 0; i < mark.loop; i++) { + TimingStatistics.Call call = stats.newCall(mark.leadTimeUsec); + vec.set(mark.vectors[1][mark.vIndex(i)]); + writer.append(one, vec); + if (call.end(mark.maxTimeUsec)) { + break; + } + } + } finally { + Closeables.close(writer, true); + } + mark.printStats(stats, SERIALIZE, RAND_SPARSE_VECTOR); + + writer = new SequenceFile.Writer(fs, conf, new Path("/tmp/seqsparse-vector"), IntWritable.class, + VectorWritable.class); + stats = new TimingStatistics(); + try { + for (int i = 0; i < mark.loop; i++) { + TimingStatistics.Call call = stats.newCall(mark.leadTimeUsec); + vec.set(mark.vectors[2][mark.vIndex(i)]); + writer.append(one, vec); + if (call.end(mark.maxTimeUsec)) { + break; + } + } + } finally { + Closeables.close(writer, true); + } + mark.printStats(stats, SERIALIZE, SEQ_SPARSE_VECTOR); + + } + + public void deserializeBenchmark() throws IOException { + doDeserializeBenchmark(DENSE_VECTOR, "/tmp/dense-vector"); + doDeserializeBenchmark(RAND_SPARSE_VECTOR, "/tmp/randsparse-vector"); + doDeserializeBenchmark(SEQ_SPARSE_VECTOR, "/tmp/seqsparse-vector"); + } + + private void doDeserializeBenchmark(String name, String pathString) throws IOException { + TimingStatistics stats = new TimingStatistics(); + TimingStatistics.Call call = stats.newCall(mark.leadTimeUsec); + SequenceFileValueIterator iterator = new SequenceFileValueIterator(new Path(pathString), true, + new Configuration()); + while (iterator.hasNext()) { + iterator.next(); + call.end(); + call = stats.newCall(mark.leadTimeUsec); + } + iterator.close(); + mark.printStats(stats, DESERIALIZE, name); + } + +} Added: mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/TimesBenchmark.java URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/TimesBenchmark.java?rev=1469528&view=auto ============================================================================== --- mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/TimesBenchmark.java (added) +++ mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/TimesBenchmark.java Thu Apr 18 19:06:27 2013 @@ -0,0 +1,98 @@ +package org.apache.mahout.benchmark; + +import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_FN_RAND; +import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_FN_SEQ; +import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_VECTOR; +import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_FN_DENSE; +import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_FN_SEQ; +import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_SPARSE_VECTOR; +import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_FN_DENSE; +import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_FN_RAND; +import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_SPARSE_VECTOR; + +import org.apache.mahout.benchmark.BenchmarkRunner.BenchmarkFn; +import org.apache.mahout.math.Vector; + +public class TimesBenchmark { + + private static final String TIMES = "Times"; + private final VectorBenchmarks mark; + + public TimesBenchmark(VectorBenchmarks mark) { + this.mark = mark; + } + + public void benchmark() { + mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() { + @Override + public Boolean apply(Integer i) { + Vector v = mark.vectors[0][mark.vIndex(i)].times(mark.vectors[0][mark.vIndex(randIndex())]); + return depends(v); + } + }), TIMES, DENSE_VECTOR); + + mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() { + @Override + public Boolean apply(Integer i) { + Vector v = mark.vectors[1][mark.vIndex(i)].times(mark.vectors[1][mark.vIndex(randIndex())]); + return depends(v); + } + }), TIMES, RAND_SPARSE_VECTOR); + + mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() { + @Override + public Boolean apply(Integer i) { + Vector v = mark.vectors[2][mark.vIndex(i)].times(mark.vectors[2][mark.vIndex(randIndex())]); + return depends(v); + } + }), TIMES, SEQ_SPARSE_VECTOR); + + mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() { + @Override + public Boolean apply(Integer i) { + Vector v = mark.vectors[0][mark.vIndex(i)].times(mark.vectors[1][mark.vIndex(randIndex())]); + return depends(v); + } + }), TIMES, DENSE_FN_RAND); + + mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() { + @Override + public Boolean apply(Integer i) { + Vector v = mark.vectors[0][mark.vIndex(i)].times(mark.vectors[2][mark.vIndex(randIndex())]); + return depends(v); + } + }), TIMES, DENSE_FN_SEQ); + + mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() { + @Override + public Boolean apply(Integer i) { + Vector v = mark.vectors[1][mark.vIndex(i)].times(mark.vectors[0][mark.vIndex(randIndex())]); + return depends(v); + } + }), TIMES, RAND_FN_DENSE); + + mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() { + @Override + public Boolean apply(Integer i) { + Vector v = mark.vectors[1][mark.vIndex(i)].times(mark.vectors[2][mark.vIndex(randIndex())]); + return depends(v); + } + }), TIMES, RAND_FN_SEQ); + + mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() { + @Override + public Boolean apply(Integer i) { + Vector v = mark.vectors[2][mark.vIndex(i)].times(mark.vectors[0][mark.vIndex(randIndex())]); + return depends(v); + } + }), TIMES, SEQ_FN_DENSE); + + mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() { + @Override + public Boolean apply(Integer i) { + Vector v = mark.vectors[2][mark.vIndex(i)].times(mark.vectors[1][mark.vIndex(randIndex())]); + return depends(v); + } + }), TIMES, SEQ_FN_RAND); + } +} Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java?rev=1469528&r1=1469527&r2=1469528&view=diff ============================================================================== --- mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java (original) +++ mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java Thu Apr 18 19:06:27 2013 @@ -18,18 +18,16 @@ package org.apache.mahout.benchmark; import java.io.IOException; +import java.text.DecimalFormat; import java.util.BitSet; import java.util.Collections; -import java.util.Iterator; import java.util.List; import java.util.Map; -import java.util.Random; import java.util.Map.Entry; +import java.util.Random; +import java.util.concurrent.TimeUnit; import java.util.regex.Pattern; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.common.io.Closeables; import org.apache.commons.cli2.CommandLine; import org.apache.commons.cli2.Group; import org.apache.commons.cli2.Option; @@ -39,72 +37,99 @@ import org.apache.commons.cli2.builder.D import org.apache.commons.cli2.builder.GroupBuilder; import org.apache.commons.cli2.commandline.Parser; import org.apache.commons.lang3.StringUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.io.SequenceFile; -import org.apache.hadoop.io.Writable; +import org.apache.mahout.benchmark.BenchmarkRunner.BenchmarkFn; import org.apache.mahout.common.CommandLineUtil; import org.apache.mahout.common.RandomUtils; import org.apache.mahout.common.TimingStatistics; import org.apache.mahout.common.commandline.DefaultOptionCreator; +import org.apache.mahout.common.distance.ChebyshevDistanceMeasure; import org.apache.mahout.common.distance.CosineDistanceMeasure; -import org.apache.mahout.common.distance.DistanceMeasure; import org.apache.mahout.common.distance.EuclideanDistanceMeasure; import org.apache.mahout.common.distance.ManhattanDistanceMeasure; +import org.apache.mahout.common.distance.MinkowskiDistanceMeasure; import org.apache.mahout.common.distance.SquaredEuclideanDistanceMeasure; import org.apache.mahout.common.distance.TanimotoDistanceMeasure; -import org.apache.mahout.common.iterator.sequencefile.SequenceFileValueIterator; import org.apache.mahout.math.DenseVector; import org.apache.mahout.math.RandomAccessSparseVector; import org.apache.mahout.math.SequentialAccessSparseVector; -import org.apache.mahout.math.SparseMatrix; import org.apache.mahout.math.Vector; -import org.apache.mahout.math.VectorWritable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; + public class VectorBenchmarks { + private static final int MAX_TIME_MS = 500; + private static final int LEAD_TIME_MS = 100; + public static final String CLUSTERS = "Clusters"; + public static final String CREATE_INCREMENTALLY = "Create (incrementally)"; + public static final String CREATE_COPY = "Create (copy)"; + + public static final String DENSE_FN_SEQ = "Dense.fn(Seq)"; + public static final String RAND_FN_DENSE = "Rand.fn(Dense)"; + public static final String SEQ_FN_RAND = "Seq.fn(Rand)"; + public static final String RAND_FN_SEQ = "Rand.fn(Seq)"; + public static final String SEQ_FN_DENSE = "Seq.fn(Dense)"; + public static final String DENSE_FN_RAND = "Dense.fn(Rand)"; + public static final String SEQ_SPARSE_VECTOR = "SeqSparseVector"; + public static final String RAND_SPARSE_VECTOR = "RandSparseVector"; + public static final String DENSE_VECTOR = "DenseVector"; private static final Logger log = LoggerFactory.getLogger(VectorBenchmarks.class); - private static final Pattern TAB_NEWLINE_PATTERN = Pattern.compile("[\n\t]"); private static final String[] EMPTY = new String[0]; + private static final DecimalFormat DF = new DecimalFormat("#.##"); + + /* package private */ + final Vector[][] vectors; + final Vector[] clusters; + final int cardinality; + final int numNonZeros; + final int numVectors; + final int numClusters; + final int loop = Integer.MAX_VALUE; + final int opsPerUnit; + final long maxTimeUsec; + final long leadTimeUsec; - private final Vector[][] vectors; - private final Vector[] clusters; - private final SparseMatrix clusterDistances; private final List randomVectors = Lists.newArrayList(); private final List randomVectorIndices = Lists.newArrayList(); private final List randomVectorValues = Lists.newArrayList(); - private final int cardinality; - private final int sparsity; - private final int numVectors; - private final int loop; - private final int opsPerUnit; - private final Map implType = Maps.newHashMap(); - private final Map> statsMap = Maps.newHashMap(); - private final int numClusters; - - public VectorBenchmarks(int cardinality, int sparsity, int numVectors, int numClusters, int loop, int opsPerUnit) { - Random r = RandomUtils.getRandom(); + private final Map implType = Maps.newHashMap(); + private final Map> statsMap = Maps.newHashMap(); + private final BenchmarkRunner runner; + private final Random r = RandomUtils.getRandom(); + + public VectorBenchmarks(int cardinality, int numNonZeros, int numVectors, int numClusters, + int opsPerUnit) { + runner = new BenchmarkRunner(LEAD_TIME_MS, MAX_TIME_MS); + maxTimeUsec = TimeUnit.MILLISECONDS.toNanos(MAX_TIME_MS); + leadTimeUsec = TimeUnit.MILLISECONDS.toNanos(LEAD_TIME_MS); + this.cardinality = cardinality; - this.sparsity = sparsity; + this.numNonZeros = numNonZeros; this.numVectors = numVectors; this.numClusters = numClusters; - this.loop = loop; this.opsPerUnit = opsPerUnit; + + setUpVectors(cardinality, numNonZeros, numVectors); + + vectors = new Vector[3][numVectors]; + clusters = new Vector[numClusters]; + } + + private void setUpVectors(int cardinality, int numNonZeros, int numVectors) { for (int i = 0; i < numVectors; i++) { - Vector v = new SequentialAccessSparseVector(cardinality, sparsity); // sparsity! + Vector v = new SequentialAccessSparseVector(cardinality, numNonZeros); // sparsity! BitSet featureSpace = new BitSet(cardinality); - int[] indexes = new int[sparsity]; - double[] values = new double[sparsity]; + int[] indexes = new int[numNonZeros]; + double[] values = new double[numNonZeros]; int j = 0; - while (j < sparsity) { + while (j < numNonZeros) { double value = r.nextGaussian(); int index = r.nextInt(cardinality); - if (!featureSpace.get(index)) { + if (!featureSpace.get(index) && value != 0) { featureSpace.set(index); indexes[j] = index; values[j++] = value; @@ -115,28 +140,22 @@ public class VectorBenchmarks { randomVectorValues.add(values); randomVectors.add(v); } - vectors = new Vector[3][numVectors]; - clusters = new Vector[numClusters]; - clusterDistances = new SparseMatrix(numClusters, numClusters); } - - private void printStats(TimingStatistics stats, String benchmarkName, String implName, String content) { + + void printStats(TimingStatistics stats, String benchmarkName, String implName, String content) { printStats(stats, benchmarkName, implName, content, 1); } - - private void printStats(TimingStatistics stats, String benchmarkName, String implName) { + + void printStats(TimingStatistics stats, String benchmarkName, String implName) { printStats(stats, benchmarkName, implName, "", 1); } - - private void printStats(TimingStatistics stats, - String benchmarkName, - String implName, - String content, - int multiplier) { - float speed = multiplier * loop * numVectors * sparsity * 1000.0f * 12 / stats.getSumTime(); - float opsPerSec = loop * numVectors * 1000000000.0f / stats.getSumTime(); - log.info("{} {} \n{} {} \nSpeed: {} UnitsProcessed/sec {} MBytes/sec", - benchmarkName, implName, content, stats.toString(), opsPerSec, speed); + + private void printStats(TimingStatistics stats, String benchmarkName, String implName, + String content, int multiplier) { + float speed = multiplier * stats.getNCalls() * numNonZeros * 1000.0f * 12 / stats.getSumTime(); + float opsPerSec = stats.getNCalls() * 1000000000.0f / stats.getSumTime(); + log.info("{} {} \n{} {} \nOps = {} Units/sec\nIOps = {} MBytes/sec", benchmarkName, + implName, content, stats.toString(), DF.format(opsPerSec), DF.format(speed)); if (!implType.containsKey(implName)) { implType.put(implName, implType.size()); @@ -149,44 +168,56 @@ public class VectorBenchmarks { while (implStats.size() < implId + 1) { implStats.add(EMPTY); } - implStats.set(implId, - TAB_NEWLINE_PATTERN.split(stats + "\tSpeed = " + opsPerSec + " /sec\tRate = " + speed + " MB/s")); + implStats.set( + implId, + TAB_NEWLINE_PATTERN.split(stats + "\tSpeed = " + DF.format(opsPerSec) + " /sec\tRate = " + + DF.format(speed) + " MB/s")); + } + + public void createData() { + for (int i = 0; i < Math.max(numVectors, numClusters); ++i) { + vectors[0][vIndex(i)] = new DenseVector(randomVectors.get(vIndex(i))); + vectors[1][vIndex(i)] = new RandomAccessSparseVector(randomVectors.get(vIndex(i))); + vectors[2][vIndex(i)] = new SequentialAccessSparseVector(randomVectors.get(vIndex(i))); + clusters[cIndex(i)] = new RandomAccessSparseVector(randomVectors.get(vIndex(i))); + } } - + public void createBenchmark() { - TimingStatistics stats = new TimingStatistics(); - for (int l = 0; l < loop; l++) { - for (int i = 0; i < numVectors; i++) { - TimingStatistics.Call call = stats.newCall(); - vectors[0][i] = new DenseVector(randomVectors.get(i)); - call.end(); + printStats(runner.benchmark(new BenchmarkFn() { + @Override + public Boolean apply(Integer i) { + vectors[0][vIndex(i)] = new DenseVector(randomVectors.get(vIndex(i))); + return depends(vectors[0][vIndex(i)]); } - } - printStats(stats, "Create (copy)", "DenseVector"); - - stats = new TimingStatistics(); - for (int l = 0; l < loop; l++) { - for (int i = 0; i < numVectors; i++) { - TimingStatistics.Call call = stats.newCall(); - vectors[1][i] = new RandomAccessSparseVector(randomVectors.get(i)); - call.end(); + }), CREATE_COPY, DENSE_VECTOR); + + printStats(runner.benchmark(new BenchmarkFn() { + @Override + public Boolean apply(Integer i) { + vectors[1][vIndex(i)] = new RandomAccessSparseVector(randomVectors.get(vIndex(i))); + return depends(vectors[1][vIndex(i)]); } - } - printStats(stats, "Create (copy)", "RandSparseVector"); - - stats = new TimingStatistics(); - for (int l = 0; l < loop; l++) { - for (int i = 0; i < numVectors; i++) { - TimingStatistics.Call call = stats.newCall(); - vectors[2][i] = new SequentialAccessSparseVector(randomVectors.get(i)); - call.end(); + }), CREATE_COPY, RAND_SPARSE_VECTOR); + + printStats(runner.benchmark(new BenchmarkFn() { + @Override + public Boolean apply(Integer i) { + vectors[2][vIndex(i)] = new SequentialAccessSparseVector(randomVectors.get(vIndex(i))); + return depends(vectors[2][vIndex(i)]); } - } - printStats(stats, "Create (copy)", "SeqSparseVector"); - + }), CREATE_COPY, SEQ_SPARSE_VECTOR); + + printStats(runner.benchmark(new BenchmarkFn() { + @Override + public Boolean apply(Integer i) { + clusters[cIndex(i)] = new RandomAccessSparseVector(randomVectors.get(vIndex(i))); + return depends(clusters[cIndex(i)]); + } + }), CREATE_COPY, CLUSTERS); } - private void buildVectorIncrementally(TimingStatistics stats, int randomIndex, Vector v, boolean useSetQuick) { + private boolean buildVectorIncrementally(TimingStatistics stats, int randomIndex, Vector v, boolean useSetQuick) { int[] indexes = randomVectorIndices.get(randomIndex); double[] values = randomVectorValues.get(randomIndex); List randomOrder = Lists.newArrayList(); @@ -199,7 +230,7 @@ public class VectorBenchmarks { permutation[i] = randomOrder.get(i); } - TimingStatistics.Call call = stats.newCall(); + TimingStatistics.Call call = stats.newCall(leadTimeUsec); if (useSetQuick) { for (int i : permutation) { v.setQuick(indexes[i], values[i]); @@ -209,599 +240,176 @@ public class VectorBenchmarks { v.set(indexes[i], values[i]); } } - call.end(); + return call.end(maxTimeUsec); } public void incrementalCreateBenchmark() { TimingStatistics stats = new TimingStatistics(); - for (int l = 0; l < loop; l++) { - for (int i = 0; i < numVectors; i++) { - vectors[0][i] = new DenseVector(cardinality); - buildVectorIncrementally(stats, i, vectors[0][i], false); + for (int i = 0; i < loop; i++) { + vectors[0][vIndex(i)] = new DenseVector(cardinality); + if (buildVectorIncrementally(stats, vIndex(i), vectors[0][vIndex(i)], false)) { + break; } } - printStats(stats, "Create (incrementally)", "DenseVector"); + printStats(stats, CREATE_INCREMENTALLY, DENSE_VECTOR); stats = new TimingStatistics(); - for (int l = 0; l < loop; l++) { - for (int i = 0; i < numVectors; i++) { - vectors[1][i] = new RandomAccessSparseVector(cardinality); - buildVectorIncrementally(stats, i, vectors[1][i], false); - } - } - printStats(stats, "Create (incrementally)", "RandSparseVector"); - -// stats = new TimingStatistics(); -// for (int l = 0; l < loop; l++) { -// for (int i = 0; i < numVectors; i++) { -// vectors[2][i] = new SequentialAccessSparseVector(cardinality); -// buildVectorIncrementally(stats, i, vectors[2][i], false); -// } -// } -// printStats(stats, "Create (incrementally)", "SeqSparseVector"); - - stats = new TimingStatistics(); - for (int l = 0; l < loop; l++) { - for (int i = 0; i < numClusters; i++) { - clusters[i] = new RandomAccessSparseVector(cardinality); - buildVectorIncrementally(stats, i, clusters[i], false); + for (int i = 0; i < loop; i++) { + vectors[1][vIndex(i)] = new RandomAccessSparseVector(cardinality); + if (buildVectorIncrementally(stats, vIndex(i), vectors[1][vIndex(i)], false)) { + break; } } - printStats(stats, "Create (incrementally)", "Clusters"); - } - - public void cloneBenchmark() { - TimingStatistics stats = new TimingStatistics(); - for (int l = 0; l < loop; l++) { - for (int i = 0; i < numVectors; i++) { - TimingStatistics.Call call = stats.newCall(); - vectors[0][i] = vectors[0][i].clone(); - call.end(); - } - } - printStats(stats, "Clone", "DenseVector"); - - stats = new TimingStatistics(); - for (int l = 0; l < loop; l++) { - for (int i = 0; i < numVectors; i++) { - TimingStatistics.Call call = stats.newCall(); - vectors[1][i] = vectors[1][i].clone(); - call.end(); - } - } - printStats(stats, "Clone", "RandSparseVector"); - - stats = new TimingStatistics(); - for (int l = 0; l < loop; l++) { - for (int i = 0; i < numVectors; i++) { - TimingStatistics.Call call = stats.newCall(); - vectors[2][i] = vectors[2][i].clone(); - call.end(); - } - } - printStats(stats, "Clone", "SeqSparseVector"); - - } - - public void serializeBenchmark() throws IOException { - Configuration conf = new Configuration(); - FileSystem fs = FileSystem.get(conf); - SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, - new Path("/tmp/dense-vector"), IntWritable.class, VectorWritable.class); + printStats(stats, CREATE_INCREMENTALLY, RAND_SPARSE_VECTOR); - Writable one = new IntWritable(0); - VectorWritable vec = new VectorWritable(); - TimingStatistics stats = new TimingStatistics(); - - try { - for (int l = 0; l < loop; l++) { - for (int i = 0; i < numVectors; i++) { - TimingStatistics.Call call = stats.newCall(); - vec.set(vectors[0][i]); - writer.append(one, vec); - call.end(); - } - } - } finally { - Closeables.closeQuietly(writer); - } - printStats(stats, "Serialize", "DenseVector"); - - writer = new SequenceFile.Writer(fs, conf, - new Path("/tmp/randsparse-vector"), IntWritable.class, VectorWritable.class); - stats = new TimingStatistics(); - try { - for (int l = 0; l < loop; l++) { - for (int i = 0; i < numVectors; i++) { - TimingStatistics.Call call = stats.newCall(); - vec.set(vectors[1][i]); - writer.append(one, vec); - call.end(); - } - } - } finally { - Closeables.closeQuietly(writer); - } - printStats(stats, "Serialize", "RandSparseVector"); - - writer = new SequenceFile.Writer(fs, conf, - new Path("/tmp/seqsparse-vector"), IntWritable.class, VectorWritable.class); stats = new TimingStatistics(); - try { - for (int l = 0; l < loop; l++) { - for (int i = 0; i < numVectors; i++) { - TimingStatistics.Call call = stats.newCall(); - vec.set(vectors[2][i]); - writer.append(one, vec); - call.end(); - } + for (int i = 0; i < loop; i++) { + vectors[2][vIndex(i)] = new SequentialAccessSparseVector(cardinality); + if (buildVectorIncrementally(stats, vIndex(i), vectors[2][vIndex(i)], false)) { + break; } - } finally { - Closeables.closeQuietly(writer); } - printStats(stats, "Serialize", "SeqSparseVector"); - - } - - public void deserializeBenchmark() throws IOException { - doDeserializeBenchmark("DenseVector", "/tmp/dense-vector"); - doDeserializeBenchmark("RandSparseVector", "/tmp/randsparse-vector"); - doDeserializeBenchmark("SeqSparseVector", "/tmp/seqsparse-vector"); - } + printStats(stats, CREATE_INCREMENTALLY, SEQ_SPARSE_VECTOR); - private void doDeserializeBenchmark(String name, String pathString) throws IOException { - TimingStatistics stats = new TimingStatistics(); - TimingStatistics.Call call = stats.newCall(); - Iterator iterator = new SequenceFileValueIterator(new Path(pathString), true, new Configuration()); - while (iterator.hasNext()) { - iterator.next(); - call.end(); - call = stats.newCall(); - } - printStats(stats, "Deserialize", name); - } - - public void dotBenchmark() { - double result = 0; - TimingStatistics stats = new TimingStatistics(); - for (int l = 0; l < loop; l++) { - for (int i = 0; i < numVectors; i++) { - TimingStatistics.Call call = stats.newCall(); - result += vectors[0][i].dot(vectors[0][(i + 1) % numVectors]); - call.end(); - } - } - // print result to prevent hotspot from eliminating deadcode - printStats(stats, "DotProduct", "DenseVector", "sum = " + result + ' '); - result = 0; - stats = new TimingStatistics(); - for (int l = 0; l < loop; l++) { - for (int i = 0; i < numVectors; i++) { - TimingStatistics.Call call = stats.newCall(); - result += vectors[1][i].dot(vectors[1][(i + 1) % numVectors]); - call.end(); - } - } - // print result to prevent hotspot from eliminating deadcode - printStats(stats, "DotProduct", "RandSparseVector", "sum = " + result + ' '); - result = 0; stats = new TimingStatistics(); - for (int l = 0; l < loop; l++) { - for (int i = 0; i < numVectors; i++) { - TimingStatistics.Call call = stats.newCall(); - result += vectors[2][i].dot(vectors[2][(i + 1) % numVectors]); - call.end(); + for (int i = 0; i < loop; i++) { + clusters[cIndex(i)] = new RandomAccessSparseVector(cardinality); + if (buildVectorIncrementally(stats, vIndex(i), clusters[cIndex(i)], false)) { + break; } } - // print result to prevent hotspot from eliminating deadcode - printStats(stats, "DotProduct", "SeqSparseVector", "sum = " + result + ' '); - result = 0; - stats = new TimingStatistics(); - for (int l = 0; l < loop; l++) { - for (int i = 0; i < numVectors; i++) { - TimingStatistics.Call call = stats.newCall(); - result += vectors[0][i].dot(vectors[1][(i + 1) % numVectors]); - call.end(); - } - } - // print result to prevent hotspot from eliminating deadcode - printStats(stats, "DotProduct", "Dense.fn(Rand)", "sum = " + result + ' '); - result = 0; - stats = new TimingStatistics(); - for (int l = 0; l < loop; l++) { - for (int i = 0; i < numVectors; i++) { - TimingStatistics.Call call = stats.newCall(); - result += vectors[0][i].dot(vectors[2][(i + 1) % numVectors]); - call.end(); - } - } - // print result to prevent hotspot from eliminating deadcode - printStats(stats, "DotProduct", "Dense.fn(Seq)", "sum = " + result + ' '); - result = 0; - stats = new TimingStatistics(); - for (int l = 0; l < loop; l++) { - for (int i = 0; i < numVectors; i++) { - TimingStatistics.Call call = stats.newCall(); - result += vectors[1][i].dot(vectors[0][(i + 1) % numVectors]); - call.end(); - } - } - // print result to prevent hotspot from eliminating deadcode - printStats(stats, "DotProduct", "Rand.fn(Dense)", "sum = " + result + ' '); - result = 0; - stats = new TimingStatistics(); - for (int l = 0; l < loop; l++) { - for (int i = 0; i < numVectors; i++) { - TimingStatistics.Call call = stats.newCall(); - result += vectors[1][i].dot(vectors[2][(i + 1) % numVectors]); - call.end(); - } - } - // print result to prevent hotspot from eliminating deadcode - printStats(stats, "DotProduct", "Rand.fn(Seq)", "sum = " + result + ' '); - result = 0; - stats = new TimingStatistics(); - for (int l = 0; l < loop; l++) { - for (int i = 0; i < numVectors; i++) { - TimingStatistics.Call call = stats.newCall(); - result += vectors[2][i].dot(vectors[0][(i + 1) % numVectors]); - call.end(); - } - } - // print result to prevent hotspot from eliminating deadcode - printStats(stats, "DotProduct", "Seq.fn(Dense)", "sum = " + result + ' '); - result = 0; - stats = new TimingStatistics(); - for (int l = 0; l < loop; l++) { - for (int i = 0; i < numVectors; i++) { - TimingStatistics.Call call = stats.newCall(); - result += vectors[2][i].dot(vectors[1][(i + 1) % numVectors]); - call.end(); - } - } - // print result to prevent hotspot from eliminating deadcode - printStats(stats, "DotProduct", "Seq.fn(Rand)", "sum = " + result + ' '); - - + printStats(stats, CREATE_INCREMENTALLY, CLUSTERS); } - - public void closestCentroidBenchmark(DistanceMeasure measure) { - - for (int i = 0; i < numClusters; i++) { - for (int j = 0; j < numClusters; j++) { - double distance = Double.POSITIVE_INFINITY; - if (i != j) { - distance = measure.distance(clusters[i], clusters[j]); - } - clusterDistances.setQuick(i, j, distance); - } - } - - long distanceCalculations = 0; - TimingStatistics stats = new TimingStatistics(); - for (int l = 0; l < loop; l++) { - TimingStatistics.Call call = stats.newCall(); - for (int i = 0; i < numVectors; i++) { - Vector vector = vectors[1][i]; - double minDistance = Double.MAX_VALUE; - for (int k = 0; k < numClusters; k++) { - double distance = measure.distance(vector, clusters[k]); - distanceCalculations++; - if (distance < minDistance) { - minDistance = distance; - } - } - } - call.end(); - } - printStats(stats, - measure.getClass().getName(), - "Closest center without Elkan's trick", - "distanceCalculations = " + distanceCalculations); - - - distanceCalculations = 0; - stats = new TimingStatistics(); - Random rand = RandomUtils.getRandom(); - //rand.setSeed(System.currentTimeMillis()); - for (int l = 0; l < loop; l++) { - TimingStatistics.Call call = stats.newCall(); - for (int i = 0; i < numVectors; i++) { - Vector vector = vectors[1][i]; - int closestCentroid = rand.nextInt(numClusters); - double dist = measure.distance(vector, clusters[closestCentroid]); - distanceCalculations++; - for (int k = 0; k < numClusters; k++) { - if (closestCentroid != k) { - double centroidDist = clusterDistances.getQuick(k, closestCentroid); - if (centroidDist < 2 * dist) { - dist = measure.distance(vector, clusters[k]); - closestCentroid = k; - distanceCalculations++; - } - } - } - } - call.end(); - } - printStats(stats, - measure.getClass().getName(), - "Closest center with Elkan's trick", - "distanceCalculations = " + distanceCalculations); + public int vIndex(int i) { + return i % numVectors; } - public void distanceMeasureBenchmark(DistanceMeasure measure) { - double result = 0; - TimingStatistics stats = new TimingStatistics(); - for (int l = 0; l < loop; l++) { - for (int i = 0; i < numVectors; i++) { - TimingStatistics.Call call = stats.newCall(); - double minDistance = Double.MAX_VALUE; - for (int u = 0; u < opsPerUnit; u++) { - double distance = measure.distance(vectors[0][i], vectors[0][u]); - if (distance < minDistance) { - minDistance = distance; - } - } - result += minDistance; - call.end(); - } - } - // print result to prevent hotspot from eliminating deadcode - printStats(stats, measure.getClass().getName(), "DenseVector", "minDistance = " + result + ' '); - result = 0; - stats = new TimingStatistics(); - for (int l = 0; l < loop; l++) { - for (int i = 0; i < numVectors; i++) { - TimingStatistics.Call call = stats.newCall(); - double minDistance = Double.MAX_VALUE; - for (int u = 0; u < opsPerUnit; u++) { - double distance = measure.distance(vectors[1][i], vectors[1][u]); - if (distance < minDistance) { - minDistance = distance; - } - } - result += minDistance; - call.end(); - } - } - // print result to prevent hotspot from eliminating deadcode - printStats(stats, measure.getClass().getName(), "RandSparseVector", "minDistance = " + result - + ' '); - result = 0; - stats = new TimingStatistics(); - for (int l = 0; l < loop; l++) { - for (int i = 0; i < numVectors; i++) { - TimingStatistics.Call call = stats.newCall(); - double minDistance = Double.MAX_VALUE; - for (int u = 0; u < opsPerUnit; u++) { - double distance = measure.distance(vectors[2][i], vectors[2][u]); - if (distance < minDistance) { - minDistance = distance; - } - } - result += minDistance; - call.end(); - } - } - // print result to prevent hotspot from eliminating deadcode - printStats(stats, measure.getClass().getName(), "SeqSparseVector", "minDistance = " + result - + ' '); - result = 0; - stats = new TimingStatistics(); - for (int l = 0; l < loop; l++) { - for (int i = 0; i < numVectors; i++) { - TimingStatistics.Call call = stats.newCall(); - double minDistance = Double.MAX_VALUE; - for (int u = 0; u < opsPerUnit; u++) { - double distance = measure.distance(vectors[0][i], vectors[1][u]); - if (distance < minDistance) { - minDistance = distance; - } - } - result += minDistance; - call.end(); - } - } - // print result to prevent hotspot from eliminating deadcode - printStats(stats, measure.getClass().getName(), "Dense.fn(Rand)", "minDistance = " + result + ' '); - result = 0; - stats = new TimingStatistics(); - for (int l = 0; l < loop; l++) { - for (int i = 0; i < numVectors; i++) { - TimingStatistics.Call call = stats.newCall(); - double minDistance = Double.MAX_VALUE; - for (int u = 0; u < opsPerUnit; u++) { - double distance = measure.distance(vectors[0][i], vectors[2][u]); - if (distance < minDistance) { - minDistance = distance; - } - } - result += minDistance; - call.end(); - } - } - // print result to prevent hotspot from eliminating deadcode - printStats(stats, measure.getClass().getName(), "Dense.fn(Seq)", "minDistance = " + result - + ' '); - result = 0; - stats = new TimingStatistics(); - for (int l = 0; l < loop; l++) { - for (int i = 0; i < numVectors; i++) { - TimingStatistics.Call call = stats.newCall(); - double minDistance = Double.MAX_VALUE; - for (int u = 0; u < opsPerUnit; u++) { - double distance = measure.distance(vectors[1][i], vectors[0][u]); - if (distance < minDistance) { - minDistance = distance; - } - } - result += minDistance; - call.end(); - } - } - // print result to prevent hotspot from eliminating deadcode - printStats(stats, measure.getClass().getName(), "Rand.fn(Dense)", "minDistance = " + result - + ' '); - stats = new TimingStatistics(); - for (int l = 0; l < loop; l++) { - for (int i = 0; i < numVectors; i++) { - TimingStatistics.Call call = stats.newCall(); - double minDistance = Double.MAX_VALUE; - for (int u = 0; u < opsPerUnit; u++) { - double distance = measure.distance(vectors[1][i], vectors[2][u]); - if (distance < minDistance) { - minDistance = distance; - } - } - result += minDistance; - call.end(); - } - } - // print result to prevent hotspot from eliminating deadcode - printStats(stats, measure.getClass().getName(), "Rand.fn(Seq)", "minDistance = " + result + ' '); - result = 0; - stats = new TimingStatistics(); - for (int l = 0; l < loop; l++) { - for (int i = 0; i < numVectors; i++) { - TimingStatistics.Call call = stats.newCall(); - double minDistance = Double.MAX_VALUE; - for (int u = 0; u < opsPerUnit; u++) { - double distance = measure.distance(vectors[2][i], vectors[0][u]); - if (distance < minDistance) { - minDistance = distance; - } - } - result += minDistance; - call.end(); - } - } - // print result to prevent hotspot from eliminating deadcode - printStats(stats, measure.getClass().getName(), "Seq.fn(Dense)", "minDistance = " + result - + ' '); - result = 0; - stats = new TimingStatistics(); - for (int l = 0; l < loop; l++) { - for (int i = 0; i < numVectors; i++) { - TimingStatistics.Call call = stats.newCall(); - double minDistance = Double.MAX_VALUE; - for (int u = 0; u < opsPerUnit; u++) { - double distance = measure.distance(vectors[2][i], vectors[1][u]); - if (distance < minDistance) { - minDistance = distance; - } - } - result += minDistance; - call.end(); - } - } - // print result to prevent hotspot from eliminating deadcode - printStats(stats, measure.getClass().getName(), "Seq.fn(Rand)", "minDistance = " + result - + ' '); - + public int cIndex(int i) { + return i % numClusters; } - + public static void main(String[] args) throws IOException { - DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); - - Option vectorSizeOpt = obuilder.withLongName("vectorSize").withRequired(false).withArgument( - abuilder.withName("vs").withMinimum(1).withMaximum(1).create()).withDescription( - "Cardinality of the vector. Default 1000").withShortName("vs").create(); - - Option vectorSparsityOpt = obuilder.withLongName("sparsity").withRequired(false).withArgument( - abuilder.withName("sp").withMinimum(1).withMaximum(1).create()).withDescription( - "Sparsity of the vector. Default 1000").withShortName("sp").create(); - Option numVectorsOpt = obuilder.withLongName("numVectors").withRequired(false).withArgument( - abuilder.withName("nv").withMinimum(1).withMaximum(1).create()).withDescription( - "Number of Vectors to create. Default: 100").withShortName("nv").create(); - Option numClustersOpt = obuilder.withLongName("numClusters").withRequired(false).withArgument( - abuilder.withName("vs").withMinimum(1).withMaximum(1).create()).withDescription( - "Number of Vectors to create. Default: 10").withShortName("vs").create(); - Option loopOpt = obuilder.withLongName("loop").withRequired(false).withArgument( - abuilder.withName("loop").withMinimum(1).withMaximum(1).create()).withDescription( - "Number of times to loop. Default: 200").withShortName("l").create(); - Option numOpsOpt = obuilder.withLongName("numOps").withRequired(false).withArgument( - abuilder.withName("numOps").withMinimum(1).withMaximum(1).create()).withDescription( - "Number of operations to do per timer. " - + "E.g In distance measure, the distance is calculated numOps times" - + " and the total time is measured. Default: 10").withShortName("no").create(); - + + Option vectorSizeOpt = obuilder + .withLongName("vectorSize") + .withRequired(false) + .withArgument(abuilder.withName("vs").withDefault(1000000).create()) + .withDescription("Cardinality of the vector. Default: 1000000").withShortName("vs").create(); + Option numNonZeroOpt = obuilder + .withLongName("numNonZero") + .withRequired(false) + .withArgument(abuilder.withName("nz").withDefault(1000).create()) + .withDescription("Size of the vector. Default: 1000").withShortName("nz").create(); + Option numVectorsOpt = obuilder + .withLongName("numVectors") + .withRequired(false) + .withArgument(abuilder.withName("nv").withDefault(25).create()) + .withDescription("Number of Vectors to create. Default: 25").withShortName("nv").create(); + Option numClustersOpt = obuilder + .withLongName("numClusters") + .withRequired(false) + .withArgument(abuilder.withName("nc").withDefault(25).create()) + .withDescription("Number of clusters to create. Default: 25").withShortName("nc").create(); + Option numOpsOpt = obuilder + .withLongName("numOps") + .withRequired(false) + .withArgument(abuilder.withName("numOps").withDefault(10).create()) + .withDescription( + "Number of operations to do per timer. " + + "E.g In distance measure, the distance is calculated numOps times" + + " and the total time is measured. Default: 10").withShortName("no").create(); + Option helpOpt = DefaultOptionCreator.helpOption(); - - Group group = gbuilder.withName("Options").withOption(vectorSizeOpt).withOption(vectorSparsityOpt) - .withOption(numVectorsOpt).withOption(loopOpt).withOption(numOpsOpt).withOption(helpOpt).create(); - + + Group group = gbuilder.withName("Options").withOption(vectorSizeOpt).withOption(numNonZeroOpt) + .withOption(numVectorsOpt).withOption(numOpsOpt).withOption(numClustersOpt).withOption(helpOpt).create(); + try { Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); - + if (cmdLine.hasOption(helpOpt)) { - CommandLineUtil.printHelp(group); + CommandLineUtil.printHelpWithGenericOptions(group); return; } - - int cardinality = 1000; + + int cardinality = 1000000; if (cmdLine.hasOption(vectorSizeOpt)) { cardinality = Integer.parseInt((String) cmdLine.getValue(vectorSizeOpt)); - - } - + + } + int numClusters = 25; if (cmdLine.hasOption(numClustersOpt)) { numClusters = Integer.parseInt((String) cmdLine.getValue(numClustersOpt)); } - int sparsity = 1000; - if (cmdLine.hasOption(vectorSparsityOpt)) { - sparsity = Integer.parseInt((String) cmdLine.getValue(vectorSparsityOpt)); + int numNonZero = 1000; + if (cmdLine.hasOption(numNonZeroOpt)) { + numNonZero = Integer.parseInt((String) cmdLine.getValue(numNonZeroOpt)); } - int numVectors = 100; + int numVectors = 25; if (cmdLine.hasOption(numVectorsOpt)) { numVectors = Integer.parseInt((String) cmdLine.getValue(numVectorsOpt)); - - } - int loop = 200; - if (cmdLine.hasOption(loopOpt)) { - loop = Integer.parseInt((String) cmdLine.getValue(loopOpt)); - + } + int numOps = 10; if (cmdLine.hasOption(numOpsOpt)) { numOps = Integer.parseInt((String) cmdLine.getValue(numOpsOpt)); - + } - VectorBenchmarks mark = new VectorBenchmarks(cardinality, sparsity, numVectors, numClusters, loop, numOps); - mark.createBenchmark(); - mark.incrementalCreateBenchmark(); - mark.cloneBenchmark(); - mark.dotBenchmark(); - mark.serializeBenchmark(); - mark.deserializeBenchmark(); - mark.distanceMeasureBenchmark(new CosineDistanceMeasure()); - mark.distanceMeasureBenchmark(new SquaredEuclideanDistanceMeasure()); - mark.distanceMeasureBenchmark(new EuclideanDistanceMeasure()); - mark.distanceMeasureBenchmark(new ManhattanDistanceMeasure()); - mark.distanceMeasureBenchmark(new TanimotoDistanceMeasure()); - - mark.closestCentroidBenchmark(new CosineDistanceMeasure()); - mark.closestCentroidBenchmark(new SquaredEuclideanDistanceMeasure()); - mark.closestCentroidBenchmark(new EuclideanDistanceMeasure()); - mark.closestCentroidBenchmark(new ManhattanDistanceMeasure()); - mark.closestCentroidBenchmark(new TanimotoDistanceMeasure()); - + VectorBenchmarks mark = new VectorBenchmarks(cardinality, numNonZero, numVectors, numClusters, numOps); + runBenchmark(mark); + log.info("\n{}", mark); } catch (OptionException e) { CommandLineUtil.printHelp(group); } - } - + + private static void runBenchmark(VectorBenchmarks mark) throws IOException { + // Required to set up data. + mark.createData(); + + mark.createBenchmark(); + if (mark.cardinality < 200000) { + // Too slow. + mark.incrementalCreateBenchmark(); + } + + new CloneBenchmark(mark).benchmark(); + new DotBenchmark(mark).benchmark(); + new PlusBenchmark(mark).benchmark(); + new MinusBenchmark(mark).benchmark(); + new TimesBenchmark(mark).benchmark(); + new SerializationBenchmark(mark).benchmark(); + + DistanceBenchmark distanceBenchmark = new DistanceBenchmark(mark); + distanceBenchmark.benchmark(new CosineDistanceMeasure()); + distanceBenchmark.benchmark(new SquaredEuclideanDistanceMeasure()); + distanceBenchmark.benchmark(new EuclideanDistanceMeasure()); + distanceBenchmark.benchmark(new ManhattanDistanceMeasure()); + distanceBenchmark.benchmark(new TanimotoDistanceMeasure()); + distanceBenchmark.benchmark(new ChebyshevDistanceMeasure()); + distanceBenchmark.benchmark(new MinkowskiDistanceMeasure()); + + ClosestCentroidBenchmark centroidBenchmark = new ClosestCentroidBenchmark(mark); + centroidBenchmark.benchmark(new CosineDistanceMeasure()); + centroidBenchmark.benchmark(new SquaredEuclideanDistanceMeasure()); + centroidBenchmark.benchmark(new EuclideanDistanceMeasure()); + centroidBenchmark.benchmark(new ManhattanDistanceMeasure()); + centroidBenchmark.benchmark(new TanimotoDistanceMeasure()); + centroidBenchmark.benchmark(new ChebyshevDistanceMeasure()); + centroidBenchmark.benchmark(new MinkowskiDistanceMeasure()); + } + @Override public String toString() { int pad = 24; @@ -824,7 +432,7 @@ public class VectorBenchmarks { for (String[] stat : implTokenizedStats) { maxStats = Math.max(maxStats, stat.length); } - + for (int i = 0; i < maxStats; i++) { boolean printedName = false; for (String[] stats : implTokenizedStats) { @@ -848,5 +456,8 @@ public class VectorBenchmarks { } return sb.toString(); } - + + public BenchmarkRunner getRunner() { + return runner; + } }