From commits-return-7657-archive-asf-public=cust-asf.ponee.io@zookeeper.apache.org Mon Mar 4 23:04:16 2019 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx-eu-01.ponee.io (Postfix) with SMTP id 9F6B5180627 for ; Tue, 5 Mar 2019 00:04:15 +0100 (CET) Received: (qmail 73869 invoked by uid 500); 4 Mar 2019 23:04:14 -0000 Mailing-List: contact commits-help@zookeeper.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@zookeeper.apache.org Delivered-To: mailing list commits@zookeeper.apache.org Received: (qmail 73858 invoked by uid 99); 4 Mar 2019 23:04:14 -0000 Received: from ec2-52-202-80-70.compute-1.amazonaws.com (HELO gitbox.apache.org) (52.202.80.70) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 04 Mar 2019 23:04:14 +0000 Received: by gitbox.apache.org (ASF Mail Server at gitbox.apache.org, from userid 33) id 00AE087792; Mon, 4 Mar 2019 23:04:13 +0000 (UTC) Date: Mon, 04 Mar 2019 23:04:13 +0000 To: "commits@zookeeper.apache.org" Subject: [zookeeper] branch master updated: ZOOKEEPER-3267: Add watcher metrics MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit Message-ID: <155174065388.20272.15916885505137020103@gitbox.apache.org> From: andor@apache.org X-Git-Host: gitbox.apache.org X-Git-Repo: zookeeper X-Git-Refname: refs/heads/master X-Git-Reftype: branch X-Git-Oldrev: c4303e2ff6781b030ab84d94a30494a4048dc6a0 X-Git-Newrev: dafc40d59f3d4ed1d2586bf93fc340d20fb6f6c4 X-Git-Rev: dafc40d59f3d4ed1d2586bf93fc340d20fb6f6c4 X-Git-NotificationType: ref_changed_plus_diff X-Git-Multimail-Version: 1.5.dev Auto-Submitted: auto-generated This is an automated email from the ASF dual-hosted git repository. andor pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/zookeeper.git The following commit(s) were added to refs/heads/master by this push: new dafc40d ZOOKEEPER-3267: Add watcher metrics dafc40d is described below commit dafc40d59f3d4ed1d2586bf93fc340d20fb6f6c4 Author: Jie Huang AuthorDate: Mon Mar 4 17:04:10 2019 -0600 ZOOKEEPER-3267: Add watcher metrics Author: Jie Huang Reviewers: fangmin@apache.org, andor@apache.org Closes #796 from jhuan31/ZOOKEEPER-3267 and squashes the following commits: 4dafb7e94 [Jie Huang] refactor/de-flaky unit tests 9bdc93a04 [Jie Huang] fix ADD_DEAD_WATCHERS_STALL_TIME measurement 8e45fc253 [Jie Huang] add unit tests for the watcher metrics 2d786e455 [Jie Huang] ZOOKEEPER-3267: Add watcher metrics --- .../org/apache/zookeeper/server/ServerMetrics.java | 17 ++++++ .../zookeeper/server/watch/WatchManager.java | 23 +++++++ .../server/watch/WatchManagerOptimized.java | 28 +++++++++ .../zookeeper/server/watch/WatcherCleaner.java | 7 +++ .../zookeeper/server/watch/WatchManagerTest.java | 71 +++++++++++++++++++++- .../zookeeper/server/watch/WatcherCleanerTest.java | 41 +++++++++++++ 6 files changed, 186 insertions(+), 1 deletion(-) diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/ServerMetrics.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/ServerMetrics.java index ffa1317..bc66d66 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/ServerMetrics.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/ServerMetrics.java @@ -76,6 +76,23 @@ public enum ServerMetrics { BYTES_RECEIVED_COUNT(new SimpleCounter("bytes_received_count")), + /** + * Fired watcher stats. + */ + NODE_CREATED_WATCHER(new AvgMinMaxCounter("node_created_watch_count")), + NODE_DELETED_WATCHER(new AvgMinMaxCounter("node_deleted_watch_count")), + NODE_CHANGED_WATCHER(new AvgMinMaxCounter("node_changed_watch_count")), + NODE_CHILDREN_WATCHER(new AvgMinMaxCounter("node_children_watch_count")), + + + /* + * Number of dead watchers in DeadWatcherListener + */ + ADD_DEAD_WATCHER_STALL_TIME(new SimpleCounter("add_dead_watcher_stall_time")), + DEAD_WATCHERS_QUEUED(new SimpleCounter("dead_watchers_queued")), + DEAD_WATCHERS_CLEARED(new SimpleCounter("dead_watchers_cleared")), + DEAD_WATCHERS_CLEANER_LATENCY(new AvgMinMaxPercentileCounter("dead_watchers_cleaner_latency")), + RESPONSE_PACKET_CACHE_HITS(new SimpleCounter("response_packet_cache_hits")), RESPONSE_PACKET_CACHE_MISSING(new SimpleCounter("response_packet_cache_misses")), diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/watch/WatchManager.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/watch/WatchManager.java index 3e14f6e..3ba866d 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/watch/WatchManager.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/watch/WatchManager.java @@ -31,6 +31,7 @@ import org.apache.zookeeper.Watcher; import org.apache.zookeeper.Watcher.Event.EventType; import org.apache.zookeeper.Watcher.Event.KeeperState; import org.apache.zookeeper.server.ServerCnxn; +import org.apache.zookeeper.server.ServerMetrics; import org.apache.zookeeper.server.ZooTrace; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -138,6 +139,28 @@ public class WatchManager implements IWatchManager { } w.process(e); } + + switch (type) { + case NodeCreated: + ServerMetrics.NODE_CREATED_WATCHER.add(watchers.size()); + break; + + case NodeDeleted: + ServerMetrics.NODE_DELETED_WATCHER.add(watchers.size()); + break; + + case NodeDataChanged: + ServerMetrics.NODE_CHANGED_WATCHER.add(watchers.size()); + break; + + case NodeChildrenChanged: + ServerMetrics.NODE_CHILDREN_WATCHER.add(watchers.size()); + break; + default: + // Other types not logged. + break; + } + return new WatcherOrBitSet(watchers); } diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/watch/WatchManagerOptimized.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/watch/WatchManagerOptimized.java index 6abb760..f0c8fe4 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/watch/WatchManagerOptimized.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/watch/WatchManagerOptimized.java @@ -33,6 +33,7 @@ import org.apache.zookeeper.Watcher; import org.apache.zookeeper.Watcher.Event.EventType; import org.apache.zookeeper.Watcher.Event.KeeperState; import org.apache.zookeeper.server.ServerCnxn; +import org.apache.zookeeper.server.ServerMetrics; import org.apache.zookeeper.server.util.BitHashSet; import org.apache.zookeeper.server.util.BitMap; import org.slf4j.Logger; @@ -222,6 +223,8 @@ public class WatchManagerOptimized return null; } + int triggeredWatches = 0; + // Avoid race condition between dead watcher cleaner in // WatcherCleaner and iterating here synchronized (watchers) { @@ -238,9 +241,11 @@ public class WatchManagerOptimized } w.process(e); + triggeredWatches++; } } + updateMetrics(type, triggeredWatches); return new WatcherOrBitSet(watchers); } @@ -269,6 +274,29 @@ public class WatchManagerOptimized } } + void updateMetrics(final EventType type, int size) { + switch (type) { + case NodeCreated: + ServerMetrics.NODE_CREATED_WATCHER.add(size); + break; + + case NodeDeleted: + ServerMetrics.NODE_DELETED_WATCHER.add(size); + break; + + case NodeDataChanged: + ServerMetrics.NODE_CHANGED_WATCHER.add(size); + break; + + case NodeChildrenChanged: + ServerMetrics.NODE_CHILDREN_WATCHER.add(size); + break; + default: + // Other types not logged. + break; + } + } + boolean isDeadWatcher(Watcher watcher) { return watcher instanceof ServerCnxn && ((ServerCnxn) watcher).isStale(); } diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/watch/WatcherCleaner.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/watch/WatcherCleaner.java index 9648848..6b67877 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/watch/WatcherCleaner.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/watch/WatcherCleaner.java @@ -25,6 +25,7 @@ import java.util.concurrent.atomic.AtomicInteger; import org.apache.zookeeper.common.Time; import org.apache.zookeeper.server.RateLogger; +import org.apache.zookeeper.server.ServerMetrics; import org.apache.zookeeper.server.WorkerService; import org.apache.zookeeper.server.WorkerService.WorkRequest; @@ -103,9 +104,12 @@ public class WatcherCleaner extends Thread { totalDeadWatchers.get() >= maxInProcessingDeadWatchers) { try { RATE_LOGGER.rateLimitLog("Waiting for dead watchers cleaning"); + long startTime = Time.currentElapsedTime(); synchronized(processingCompletedEvent) { processingCompletedEvent.wait(100); } + long latency = Time.currentElapsedTime() - startTime; + ServerMetrics.ADD_DEAD_WATCHER_STALL_TIME.add(latency); } catch (InterruptedException e) { LOG.info("Got interrupted while waiting for dead watches " + "queue size"); @@ -115,6 +119,7 @@ public class WatcherCleaner extends Thread { synchronized (this) { if (deadWatchers.add(watcherBit)) { totalDeadWatchers.incrementAndGet(); + ServerMetrics.DEAD_WATCHERS_QUEUED.add(1); if (deadWatchers.size() >= watcherCleanThreshold) { synchronized (cleanEvent) { cleanEvent.notifyAll(); @@ -164,6 +169,8 @@ public class WatcherCleaner extends Thread { listener.processDeadWatchers(snapshot); long latency = Time.currentElapsedTime() - startTime; LOG.info("Takes {} to process {} watches", latency, total); + ServerMetrics.DEAD_WATCHERS_CLEANER_LATENCY.add(latency); + ServerMetrics.DEAD_WATCHERS_CLEARED.add(total); totalDeadWatchers.addAndGet(-total); synchronized(processingCompletedEvent) { processingCompletedEvent.notifyAll(); diff --git a/zookeeper-server/src/test/java/org/apache/zookeeper/server/watch/WatchManagerTest.java b/zookeeper-server/src/test/java/org/apache/zookeeper/server/watch/WatchManagerTest.java index f6a229b..166cfff 100644 --- a/zookeeper-server/src/test/java/org/apache/zookeeper/server/watch/WatchManagerTest.java +++ b/zookeeper-server/src/test/java/org/apache/zookeeper/server/watch/WatchManagerTest.java @@ -33,6 +33,10 @@ import org.apache.zookeeper.server.DumbWatcher; import org.apache.zookeeper.server.ServerCnxn; import org.apache.zookeeper.ZKTestCase; +import org.apache.zookeeper.server.ServerMetrics; +import org.apache.zookeeper.server.metric.AvgMinMaxCounter; +import org.apache.zookeeper.server.metric.Metric; +import org.eclipse.jetty.util.IO; import org.junit.Assert; import org.junit.Before; import org.junit.Test; @@ -390,8 +394,10 @@ public class WatchManagerTest extends ZKTestCase { } // 4. sleep for a while to make sure all the thread exited + // the cleaner may wait as long as CleanerInterval+CleanerInterval/2+1 + // So need to sleep as least that long try { - Thread.sleep(1000); + Thread.sleep(2000); } catch (InterruptedException e) {} // 5. make sure the dead watchers are not in the existing watchers @@ -401,4 +407,67 @@ public class WatchManagerTest extends ZKTestCase { existingWatchers.hasPaths(((ServerCnxn) w).getSessionId())); } } + + private void checkMetrics(String metricName, long min, long max, double avg, long cnt, long sum){ + Map values = ServerMetrics.getAllValues(); + + Assert.assertEquals(min, values.get("min_" + metricName)); + Assert.assertEquals(max, values.get("max_" + metricName)); + Assert.assertEquals(avg, (Double)values.get("avg_" + metricName), 0.000001); + Assert.assertEquals(cnt, values.get("cnt_" + metricName)); + Assert.assertEquals(sum, values.get("sum_" + metricName)); + } + + @Test + public void testWatcherMetrics() throws IOException { + IWatchManager manager = getWatchManager(); + ServerMetrics.resetAll(); + + DumbWatcher watcher1 = new DumbWatcher(1); + DumbWatcher watcher2 = new DumbWatcher(2); + + final String path1 = "/path1"; + final String path2 = "/path2"; + + final String path3 = "/path3"; + + //both wather1 and wather2 are watching path1 + manager.addWatch(path1, watcher1); + manager.addWatch(path1, watcher2); + + //path2 is watched by watcher1 + manager.addWatch(path2, watcher1); + + manager.triggerWatch(path3, EventType.NodeCreated); + //path3 is not being watched so metric is 0 + checkMetrics("node_created_watch_count", 0L, 0L, 0D, 0L, 0L); + + //path1 is watched by two watchers so two fired + manager.triggerWatch(path1, EventType.NodeCreated); + checkMetrics("node_created_watch_count", 2L, 2L, 2D, 1L, 2L); + + //path2 is watched by one watcher so one fired now total is 3 + manager.triggerWatch(path2, EventType.NodeCreated); + checkMetrics("node_created_watch_count", 1L, 2L, 1.5D, 2L, 3L); + + //watches on path1 are no longer there so zero fired + manager.triggerWatch(path1, EventType.NodeDataChanged); + checkMetrics("node_changed_watch_count", 0L, 0L, 0D, 0L, 0L); + + //both wather1 and wather2 are watching path1 + manager.addWatch(path1, watcher1); + manager.addWatch(path1, watcher2); + + //path2 is watched by watcher1 + manager.addWatch(path2, watcher1); + + manager.triggerWatch(path1, EventType.NodeDataChanged); + checkMetrics("node_changed_watch_count", 2L, 2L, 2D, 1L, 2L); + + manager.triggerWatch(path2, EventType.NodeDeleted); + checkMetrics("node_deleted_watch_count", 1L, 1L, 1D, 1L, 1L); + + //make sure that node created watch count is not impacted by the fire of other event types + checkMetrics("node_created_watch_count", 1L, 2L, 1.5D, 2L, 3L); + } } diff --git a/zookeeper-server/src/test/java/org/apache/zookeeper/server/watch/WatcherCleanerTest.java b/zookeeper-server/src/test/java/org/apache/zookeeper/server/watch/WatcherCleanerTest.java index d315232..64989ce 100644 --- a/zookeeper-server/src/test/java/org/apache/zookeeper/server/watch/WatcherCleanerTest.java +++ b/zookeeper-server/src/test/java/org/apache/zookeeper/server/watch/WatcherCleanerTest.java @@ -16,6 +16,7 @@ */ package org.apache.zookeeper.server.watch; +import java.util.Map; import java.util.Set; import java.util.HashSet; import java.util.concurrent.CountDownLatch; @@ -23,10 +24,16 @@ import java.util.concurrent.TimeUnit; import org.apache.zookeeper.ZKTestCase; import org.apache.zookeeper.common.Time; +import org.apache.zookeeper.server.ServerMetrics; import org.junit.Test; import org.junit.Assert; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.hamcrest.number.OrderingComparison.greaterThan; public class WatcherCleanerTest extends ZKTestCase { + private static final Logger LOG = LoggerFactory.getLogger(WatcherCleanerTest.class); public static class MyDeadWatcherListener implements IDeadWatcherListener { @@ -124,4 +131,38 @@ public class WatcherCleanerTest extends ZKTestCase { Assert.assertTrue(Time.currentElapsedTime() - startTime >= delayMs); Assert.assertTrue(listener.wait(5000)); } + + @Test + public void testDeadWatcherMetrics() { + MyDeadWatcherListener listener = new MyDeadWatcherListener(); + WatcherCleaner cleaner = new WatcherCleaner(listener, 1, 1, 1, 1); + listener.setDelayMs(20); + cleaner.start(); + listener.setCountDownLatch(new CountDownLatch(3)); + //the dead watchers will be added one by one and cleared one by one because we set both watchCleanThreshold and + //maxInProcessingDeadWatchers to 1 + cleaner.addDeadWatcher(1); + cleaner.addDeadWatcher(2); + cleaner.addDeadWatcher(3); + + Assert.assertTrue(listener.wait(5000)); + + Map values = ServerMetrics.getAllValues(); + + Assert.assertThat("Adding dead watcher should be stalled twice", + (Long)values.get("add_dead_watcher_stall_time"), + greaterThan(0L)); + Assert.assertEquals("Total dead watchers added to the queue should be 3", 3L, values.get("dead_watchers_queued")); + Assert.assertEquals("Total dead watchers cleared should be 3", 3L, values.get("dead_watchers_cleared")); + + Assert.assertEquals(3L, values.get("cnt_dead_watchers_cleaner_latency")); + + //Each latency should be a little over 20 ms, allow 5 ms deviation + Assert.assertEquals(20D, (Double)values.get("avg_dead_watchers_cleaner_latency"), 5); + Assert.assertEquals(20D, ((Long)values.get("min_dead_watchers_cleaner_latency")).doubleValue(), 5); + Assert.assertEquals(20D, ((Long)values.get("max_dead_watchers_cleaner_latency")).doubleValue(), 5); + Assert.assertEquals(20D, ((Long)values.get("p50_dead_watchers_cleaner_latency")).doubleValue(), 5); + Assert.assertEquals(20D, ((Long)values.get("p95_dead_watchers_cleaner_latency")).doubleValue(), 5); + Assert.assertEquals(20D, ((Long)values.get("p99_dead_watchers_cleaner_latency")).doubleValue(), 5); + } }