mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tdunn...@apache.org
Subject svn commit: r1095871 - in /mahout/trunk/utils/src: main/java/org/apache/mahout/utils/Bump125.java test/java/org/apache/mahout/utils/Bump125Test.java
Date Fri, 22 Apr 2011 06:13:36 GMT
Author: tdunning
Date: Fri Apr 22 06:13:36 2011
New Revision: 1095871

URL: http://svn.apache.org/viewvc?rev=1095871&view=rev
Log:
MAHOUT-672 - the forgotten files

Added:
    mahout/trunk/utils/src/main/java/org/apache/mahout/utils/Bump125.java
    mahout/trunk/utils/src/test/java/org/apache/mahout/utils/Bump125Test.java

Added: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/Bump125.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/Bump125.java?rev=1095871&view=auto
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/Bump125.java (added)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/Bump125.java Fri Apr 22 06:13:36
2011
@@ -0,0 +1,45 @@
+package org.apache.mahout.utils;
+
+/**
+ * Helps with making nice intervals at arbitrary scale.
+ *
+ * One use case is where we are producing progress or error messages every time an incoming
+ * record is received.  It is generally bad form to produce a message for <i>every</i>
input
+ * so it would be better to produce a message for each of the first 10 records, then every
+ * other record up to 20 and then every 5 records up to 50 and then every 10 records up to
100,
+ * more or less. The pattern can now repeat scaled up by 100.  The total number of messages
will scale
+ * with the log of the number of input lines which is much more survivable than direct output
+ * and because early records all get messages, we get indications early.
+ */
+public class Bump125 {
+  private static final int[] bumps = {1, 2, 5};
+
+  static int scale(double value, double base) {
+    double scale = value / base;
+    // scan for correct step
+    int i = 0;
+    while (i < bumps.length - 1 && bumps[i + 1] <= scale) {
+      i++;
+    }
+    return bumps[i];
+  }
+
+  static long base(double value) {
+    return Math.max(1, (long) Math.pow(10, (int) Math.floor(Math.log10(value))));
+  }
+
+  private long counter = 0;
+
+  public long increment() {
+    long delta;
+    if (counter >= 10) {
+      final long base = base(counter / 4.0);
+      int scale = scale(counter / 4.0, base);
+      delta = (long) (base * scale);
+    } else {
+      delta = 1;
+    }
+    counter += delta;
+    return counter;
+  }
+}

Added: mahout/trunk/utils/src/test/java/org/apache/mahout/utils/Bump125Test.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/Bump125Test.java?rev=1095871&view=auto
==============================================================================
--- mahout/trunk/utils/src/test/java/org/apache/mahout/utils/Bump125Test.java (added)
+++ mahout/trunk/utils/src/test/java/org/apache/mahout/utils/Bump125Test.java Fri Apr 22 06:13:36
2011
@@ -0,0 +1,23 @@
+package org.apache.mahout.utils;
+
+import com.google.common.collect.Lists;
+import org.junit.Test;
+
+import java.util.Iterator;
+
+public class Bump125Test extends MahoutTestCase {
+  @Test
+  public void testIncrement() throws Exception {
+    Iterator<Integer> ref = Lists.newArrayList(1, 2, 3, 4, 5, 6, 7,
+            8, 9, 10, 12, 14, 16, 18, 20, 25, 30, 35, 40, 50, 60,
+            70, 80, 100, 120, 140, 160, 180, 200, 250, 300, 350,
+            400, 500, 600, 700, 800, 1000, 1200, 1400, 1600, 1800,
+            2000, 2500, 3000, 3500, 4000, 5000, 6000, 7000)
+            .iterator();
+    Bump125 b = new Bump125();
+    for (int i = 0; i < 50; i++) {
+      final long x = b.increment();
+      assertEquals(ref.next().longValue(), x);
+    }
+  }
+}



Mime
View raw message