Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 1F75A200BC5 for ; Tue, 8 Nov 2016 01:22:51 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id 1E1B5160AF9; Tue, 8 Nov 2016 00:22:51 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 48A03160AEC for ; Tue, 8 Nov 2016 01:22:50 +0100 (CET) Received: (qmail 21579 invoked by uid 500); 8 Nov 2016 00:22:49 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 21570 invoked by uid 99); 8 Nov 2016 00:22:49 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 08 Nov 2016 00:22:49 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 47184E094C; Tue, 8 Nov 2016 00:22:49 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: zhz@apache.org To: common-commits@hadoop.apache.org Message-Id: <71a0182675504b42afffe3b9b43e2855@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: hadoop git commit: HADOOP-13804. MutableStat mean loses accuracy if add(long, long) is used. Contributed by Erik Krogen. Date: Tue, 8 Nov 2016 00:22:49 +0000 (UTC) archived-at: Tue, 08 Nov 2016 00:22:51 -0000 Repository: hadoop Updated Branches: refs/heads/branch-2.7 c1a6f4f2e -> 64657cd67 HADOOP-13804. MutableStat mean loses accuracy if add(long, long) is used. Contributed by Erik Krogen. (cherry picked from commit 3dbad5d823b8bf61b643dd1057165044138b99e0) (cherry picked from commit b245e9ce2f20bb84690bffe902a60d5e96130cdb) (cherry picked from commit 384b7b71a3d22bcc27bbdb9002ad700015b86eab) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/64657cd6 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/64657cd6 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/64657cd6 Branch: refs/heads/branch-2.7 Commit: 64657cd675b6a4f5b77ef4d4237ab569ecd5e514 Parents: c1a6f4f Author: Zhe Zhang Authored: Mon Nov 7 16:08:10 2016 -0800 Committer: Zhe Zhang Committed: Mon Nov 7 16:22:16 2016 -0800 ---------------------------------------------------------------------- hadoop-common-project/hadoop-common/CHANGES.txt | 3 +++ .../apache/hadoop/metrics2/lib/MutableStat.java | 4 ++++ .../apache/hadoop/metrics2/util/SampleStat.java | 19 +++++++++++++++---- .../hadoop/metrics2/lib/TestMutableMetrics.java | 17 +++++++++++++++++ 4 files changed, 39 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/64657cd6/hadoop-common-project/hadoop-common/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index c869571..9ed18a7 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -90,6 +90,9 @@ Release 2.7.4 - UNRELEASED HADOOP-12483. Maintain wrapped SASL ordering for postponed IPC responses. (Daryn Sharp via yliu) + HADOOP-13804. MutableStat mean loses accuracy if add(long, long) is used. + (Erik Krogen via zhz) + Release 2.7.3 - 2016-08-25 INCOMPATIBLE CHANGES http://git-wip-us.apache.org/repos/asf/hadoop/blob/64657cd6/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableStat.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableStat.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableStat.java index f104420..b5d9929 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableStat.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableStat.java @@ -99,6 +99,10 @@ public class MutableStat extends MutableMetric { /** * Add a number of samples and their sum to the running stat + * + * Note that although use of this method will preserve accurate mean values, + * large values for numSamples may result in inaccurate variance values due + * to the use of a single step of the Welford method for variance calculation. * @param numSamples number of samples * @param sum of the samples */ http://git-wip-us.apache.org/repos/asf/hadoop/blob/64657cd6/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/SampleStat.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/SampleStat.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/SampleStat.java index 589062a..be00a65 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/SampleStat.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/SampleStat.java @@ -27,29 +27,32 @@ import org.apache.hadoop.classification.InterfaceAudience; public class SampleStat { private final MinMax minmax = new MinMax(); private long numSamples = 0; - private double a0, a1, s0, s1; + private double a0, a1, s0, s1, total; /** * Construct a new running sample stat */ public SampleStat() { a0 = s0 = 0.0; + total = 0.0; } public void reset() { numSamples = 0; a0 = s0 = 0.0; + total = 0.0; minmax.reset(); } // We want to reuse the object, sometimes. void reset(long numSamples, double a0, double a1, double s0, double s1, - MinMax minmax) { + double total, MinMax minmax) { this.numSamples = numSamples; this.a0 = a0; this.a1 = a1; this.s0 = s0; this.s1 = s1; + this.total = total; this.minmax.reset(minmax); } @@ -58,7 +61,7 @@ public class SampleStat { * @param other the destination to hold our values */ public void copyTo(SampleStat other) { - other.reset(numSamples, a0, a1, s0, s1, minmax); + other.reset(numSamples, a0, a1, s0, s1, total, minmax); } /** @@ -80,6 +83,7 @@ public class SampleStat { */ public SampleStat add(long nSamples, double x) { numSamples += nSamples; + total += x; if (numSamples == 1) { a0 = a1 = x; @@ -103,10 +107,17 @@ public class SampleStat { } /** + * @return the total of all samples added + */ + public double total() { + return total; + } + + /** * @return the arithmetic mean of the samples */ public double mean() { - return numSamples > 0 ? a1 : 0.0; + return numSamples > 0 ? (total / numSamples) : 0.0; } /** http://git-wip-us.apache.org/repos/asf/hadoop/blob/64657cd6/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/lib/TestMutableMetrics.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/lib/TestMutableMetrics.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/lib/TestMutableMetrics.java index ed83000..9982d4e 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/lib/TestMutableMetrics.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/lib/TestMutableMetrics.java @@ -116,6 +116,23 @@ public class TestMutableMetrics { } /** + * Tests that when using {@link MutableStat#add(long, long)}, even with a high + * sample count, the mean does not lose accuracy. + */ + @Test public void testMutableStatWithBulkAdd() { + MetricsRecordBuilder rb = mockMetricsRecordBuilder(); + MetricsRegistry registry = new MetricsRegistry("test"); + MutableStat stat = registry.newStat("Test", "Test", "Ops", "Val", false); + + stat.add(1000, 1000); + stat.add(1000, 2000); + registry.snapshot(rb, false); + + assertCounter("TestNumOps", 2000L, rb); + assertGauge("TestAvgVal", 1.5, rb); + } + + /** * Ensure that quantile estimates from {@link MutableQuantiles} are within * specified error bounds. */ --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org For additional commands, e-mail: common-commits-help@hadoop.apache.org