hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From z..@apache.org
Subject hadoop git commit: HADOOP-13804. MutableStat mean loses accuracy if add(long, long) is used. Contributed by Erik Krogen.
Date Tue, 08 Nov 2016 00:22:49 GMT
Repository: hadoop
Updated Branches:
  refs/heads/branch-2.7 c1a6f4f2e -> 64657cd67


HADOOP-13804. MutableStat mean loses accuracy if add(long, long) is used. Contributed by Erik
Krogen.

(cherry picked from commit 3dbad5d823b8bf61b643dd1057165044138b99e0)
(cherry picked from commit b245e9ce2f20bb84690bffe902a60d5e96130cdb)
(cherry picked from commit 384b7b71a3d22bcc27bbdb9002ad700015b86eab)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/64657cd6
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/64657cd6
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/64657cd6

Branch: refs/heads/branch-2.7
Commit: 64657cd675b6a4f5b77ef4d4237ab569ecd5e514
Parents: c1a6f4f
Author: Zhe Zhang <zhz@apache.org>
Authored: Mon Nov 7 16:08:10 2016 -0800
Committer: Zhe Zhang <zhz@apache.org>
Committed: Mon Nov 7 16:22:16 2016 -0800

----------------------------------------------------------------------
 hadoop-common-project/hadoop-common/CHANGES.txt  |  3 +++
 .../apache/hadoop/metrics2/lib/MutableStat.java  |  4 ++++
 .../apache/hadoop/metrics2/util/SampleStat.java  | 19 +++++++++++++++----
 .../hadoop/metrics2/lib/TestMutableMetrics.java  | 17 +++++++++++++++++
 4 files changed, 39 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/64657cd6/hadoop-common-project/hadoop-common/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt
index c869571..9ed18a7 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@@ -90,6 +90,9 @@ Release 2.7.4 - UNRELEASED
     HADOOP-12483. Maintain wrapped SASL ordering for postponed IPC responses.
     (Daryn Sharp via yliu)
 
+    HADOOP-13804. MutableStat mean loses accuracy if add(long, long) is used.
+    (Erik Krogen via zhz)
+
 Release 2.7.3 - 2016-08-25
 
   INCOMPATIBLE CHANGES

http://git-wip-us.apache.org/repos/asf/hadoop/blob/64657cd6/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableStat.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableStat.java
b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableStat.java
index f104420..b5d9929 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableStat.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableStat.java
@@ -99,6 +99,10 @@ public class MutableStat extends MutableMetric {
 
   /**
    * Add a number of samples and their sum to the running stat
+   *
+   * Note that although use of this method will preserve accurate mean values,
+   * large values for numSamples may result in inaccurate variance values due
+   * to the use of a single step of the Welford method for variance calculation.
    * @param numSamples  number of samples
    * @param sum of the samples
    */

http://git-wip-us.apache.org/repos/asf/hadoop/blob/64657cd6/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/SampleStat.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/SampleStat.java
b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/SampleStat.java
index 589062a..be00a65 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/SampleStat.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/SampleStat.java
@@ -27,29 +27,32 @@ import org.apache.hadoop.classification.InterfaceAudience;
 public class SampleStat {
   private final MinMax minmax = new MinMax();
   private long numSamples = 0;
-  private double a0, a1, s0, s1;
+  private double a0, a1, s0, s1, total;
 
   /**
    * Construct a new running sample stat
    */
   public SampleStat() {
     a0 = s0 = 0.0;
+    total = 0.0;
   }
 
   public void reset() {
     numSamples = 0;
     a0 = s0 = 0.0;
+    total = 0.0;
     minmax.reset();
   }
 
   // We want to reuse the object, sometimes.
   void reset(long numSamples, double a0, double a1, double s0, double s1,
-             MinMax minmax) {
+      double total, MinMax minmax) {
     this.numSamples = numSamples;
     this.a0 = a0;
     this.a1 = a1;
     this.s0 = s0;
     this.s1 = s1;
+    this.total = total;
     this.minmax.reset(minmax);
   }
 
@@ -58,7 +61,7 @@ public class SampleStat {
    * @param other the destination to hold our values
    */
   public void copyTo(SampleStat other) {
-    other.reset(numSamples, a0, a1, s0, s1, minmax);
+    other.reset(numSamples, a0, a1, s0, s1, total, minmax);
   }
 
   /**
@@ -80,6 +83,7 @@ public class SampleStat {
    */
   public SampleStat add(long nSamples, double x) {
     numSamples += nSamples;
+    total += x;
 
     if (numSamples == 1) {
       a0 = a1 = x;
@@ -103,10 +107,17 @@ public class SampleStat {
   }
 
   /**
+   * @return the total of all samples added
+   */
+  public double total() {
+    return total;
+  }
+
+  /**
    * @return  the arithmetic mean of the samples
    */
   public double mean() {
-    return numSamples > 0 ? a1 : 0.0;
+    return numSamples > 0 ? (total / numSamples) : 0.0;
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/hadoop/blob/64657cd6/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/lib/TestMutableMetrics.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/lib/TestMutableMetrics.java
b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/lib/TestMutableMetrics.java
index ed83000..9982d4e 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/lib/TestMutableMetrics.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/lib/TestMutableMetrics.java
@@ -116,6 +116,23 @@ public class TestMutableMetrics {
   }
 
   /**
+   * Tests that when using {@link MutableStat#add(long, long)}, even with a high
+   * sample count, the mean does not lose accuracy.
+   */
+  @Test public void testMutableStatWithBulkAdd() {
+    MetricsRecordBuilder rb = mockMetricsRecordBuilder();
+    MetricsRegistry registry = new MetricsRegistry("test");
+    MutableStat stat = registry.newStat("Test", "Test", "Ops", "Val", false);
+
+    stat.add(1000, 1000);
+    stat.add(1000, 2000);
+    registry.snapshot(rb, false);
+
+    assertCounter("TestNumOps", 2000L, rb);
+    assertGauge("TestAvgVal", 1.5, rb);
+  }
+
+  /**
    * Ensure that quantile estimates from {@link MutableQuantiles} are within
    * specified error bounds.
    */


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org


Mime
View raw message