zookeeper-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From an...@apache.org
Subject [zookeeper] branch master updated: ZOOKEEPER-3470: fix flaky LearnerMetricsTest
Date Tue, 06 Aug 2019 15:11:34 GMT
This is an automated email from the ASF dual-hosted git repository.

andor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/zookeeper.git


The following commit(s) were added to refs/heads/master by this push:
     new 9fa18ab  ZOOKEEPER-3470: fix flaky LearnerMetricsTest
9fa18ab is described below

commit 9fa18ab0e1ce07b3cc61e8758e27d393253b81fb
Author: Mate Szalay-Beko <szalay.beko.mate@gmail.com>
AuthorDate: Tue Aug 6 17:11:26 2019 +0200

    ZOOKEEPER-3470: fix flaky LearnerMetricsTest
    
    This PR is about fixing the flaky unit test: `LearnerMetricsTest`.
    The code changes are:
    - Replacing the `Thread.sleep(200)` with polling the metrics until the assertion succeed
or timeout (this seems help in some flaky scenario)
    - Fixing the `ZKTestCase.waitFor` method
    - We are also logging the 0 millisec latency values now on the server side. We calculate
these latency values between different hosts (as the difference between the server wall time
and the wall time in the request header), so 0 values are possible. And in case of unittests
(where all the hosts are using the same clock) it can happen relatively frequently. This situation
also caused some of the flakiness. In the previous implementation the 0 latency values were
not counted, so the ass [...]
    
    Author: Mate Szalay-Beko <szalay.beko.mate@gmail.com>
    
    Reviewers: eolivelli@apache.org, andor@apache.org
    
    Closes #1040 from symat/ZOOKEEPER-3470 and squashes the following commits:
    
    65a8b5795 [Mate Szalay-Beko] ZOOKEEPER-3470: implement PR comments
    7fdf78041 [Mate Szalay-Beko] ZOOKEEPER-3470: fix flaky LearnerMetricsTest
---
 .../zookeeper/server/FinalRequestProcessor.java    |  2 +-
 .../java/org/apache/zookeeper/server/Request.java  |  4 +-
 .../apache/zookeeper/server/quorum/Follower.java   |  2 +-
 .../test/java/org/apache/zookeeper/ZKTestCase.java |  9 +--
 .../server/quorum/LearnerMetricsTest.java          | 68 ++++++++++++++--------
 5 files changed, 54 insertions(+), 31 deletions(-)

diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/FinalRequestProcessor.java
b/zookeeper-server/src/main/java/org/apache/zookeeper/server/FinalRequestProcessor.java
index 528c4ba..3d393a8 100644
--- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/FinalRequestProcessor.java
+++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/FinalRequestProcessor.java
@@ -170,7 +170,7 @@ public class FinalRequestProcessor implements RequestProcessor {
              * CLOCK_MONOTONIC.
              */
             long propagationLatency = Time.currentWallTime() - request.getHdr().getTime();
-            if (propagationLatency > 0) {
+            if (propagationLatency >= 0) {
                 ServerMetrics.getMetrics().PROPAGATION_LATENCY.add(propagationLatency);
             }
         }
diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/Request.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/Request.java
index 769fc87..5eb94f5 100644
--- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/Request.java
+++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/Request.java
@@ -402,7 +402,7 @@ public class Request {
              * CLOCK_MONOTONIC.
              */
             long latency = currentTime - hdr.getTime();
-            if (latency > 0) {
+            if (latency >= 0) {
                 metric.add(latency);
             }
         }
@@ -415,7 +415,7 @@ public class Request {
              * CLOCK_MONOTONIC.
              */
             long latency = currentTime - hdr.getTime();
-            if (latency > 0) {
+            if (latency >= 0) {
                 metric.add(key, latency);
             }
         }
diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/Follower.java
b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/Follower.java
index 0eeac83..3d684d1 100644
--- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/Follower.java
+++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/Follower.java
@@ -169,7 +169,7 @@ public class Follower extends Learner{
                  */
                 long now = Time.currentWallTime();
                 long latency = now - hdr.getTime();
-                if (latency > 0) {
+                if (latency >= 0) {
                     ServerMetrics.getMetrics().PROPOSAL_LATENCY.add(latency);
                 }
             }
diff --git a/zookeeper-server/src/test/java/org/apache/zookeeper/ZKTestCase.java b/zookeeper-server/src/test/java/org/apache/zookeeper/ZKTestCase.java
index b8d24fa..00eeef4 100644
--- a/zookeeper-server/src/test/java/org/apache/zookeeper/ZKTestCase.java
+++ b/zookeeper-server/src/test/java/org/apache/zookeeper/ZKTestCase.java
@@ -25,7 +25,8 @@ import org.junit.Rule;
 import org.junit.rules.TestWatcher;
 import org.junit.runner.Description;
 import org.junit.runner.RunWith;
-import org.junit.runners.model.FrameworkMethod;
+
+import java.time.LocalDateTime;
 
 /**
  * Base class for a non-parameterized ZK test.
@@ -92,9 +93,9 @@ public class ZKTestCase {
      * @param timeout   timeout in seconds
      * @throws InterruptedException
      */
-    public void waitFor(String msg, WaitForCondition condition, int timeout)
-            throws InterruptedException {
-        for (int i = 0; i < timeout; ++i) {
+    public void waitFor(String msg, WaitForCondition condition, int timeout) throws InterruptedException
{
+        final LocalDateTime deadline = LocalDateTime.now().plusSeconds(timeout);
+        while (LocalDateTime.now().isBefore(deadline)) {
             if (condition.evaluate()) {
                 return;
             }
diff --git a/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/LearnerMetricsTest.java
b/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/LearnerMetricsTest.java
index 85bbe31..b3df3ca 100644
--- a/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/LearnerMetricsTest.java
+++ b/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/LearnerMetricsTest.java
@@ -8,9 +8,9 @@
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
- *uuuuu
+ *
  * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "/RequuuAS IS" BASIS,
+ * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
@@ -25,25 +25,31 @@ import org.apache.zookeeper.ZooKeeper;
 import org.apache.zookeeper.metrics.MetricsUtils;
 import org.apache.zookeeper.server.ServerMetrics;
 import org.apache.zookeeper.test.ClientBase;
-import org.junit.Assert;
+import org.hamcrest.Matcher;
+import org.junit.After;
 import org.junit.Test;
 
 import java.util.HashMap;
 import java.util.Map;
 
-import static org.hamcrest.number.OrderingComparison.greaterThan;
+import static org.hamcrest.core.Is.is;
+import static org.hamcrest.number.OrderingComparison.greaterThanOrEqualTo;
 
 public class LearnerMetricsTest extends QuorumPeerTestBase {
 
+    private static final int TIMEOUT_SECONDS = 30;
+    private static final int SERVER_COUNT = 6; // 5 participants, 1 observer
+    private final QuorumPeerTestBase.MainThread[] mt = new QuorumPeerTestBase.MainThread[SERVER_COUNT];
+    private ZooKeeper zk_client;
+
     @Test
     public void testLearnerMetricsTest() throws Exception {
         ServerMetrics.getMetrics().resetAll();
         ClientBase.setupTestEnv();
 
-        final int SERVER_COUNT = 6; // 5 participants, 1 observer
         final String path = "/zk-testLeanerMetrics";
         final byte[] data = new byte[512];
-        final int clientPorts[] = new int[SERVER_COUNT];
+        final int[] clientPorts = new int[SERVER_COUNT];
         StringBuilder sb = new StringBuilder();
         int observer = 0 ;
         clientPorts[observer] = PortAssignment.unique();
@@ -55,7 +61,6 @@ public class LearnerMetricsTest extends QuorumPeerTestBase {
 
         // start the participants
         String quorumCfgSection = sb.toString();
-        QuorumPeerTestBase.MainThread mt[] = new QuorumPeerTestBase.MainThread[SERVER_COUNT];
         for(int i = 1; i < SERVER_COUNT; i++) {
             mt[i] = new QuorumPeerTestBase.MainThread(i, clientPorts[i], quorumCfgSection);
             mt[i].start();
@@ -67,26 +72,43 @@ public class LearnerMetricsTest extends QuorumPeerTestBase {
         mt[observer] = new QuorumPeerTestBase.MainThread(observer, clientPorts[observer],
quorumCfgSection, observerConfig);
         mt[observer].start();
 
-        ZooKeeper zk = new ZooKeeper("127.0.0.1:" + clientPorts[1], ClientBase.CONNECTION_TIMEOUT,
this);
-
-        waitForOne(zk, ZooKeeper.States.CONNECTED);
-
         // send one create request
-        zk.create(path, data, ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
-
-        Thread.sleep(200);
+        zk_client = new ZooKeeper("127.0.0.1:" + clientPorts[1], ClientBase.CONNECTION_TIMEOUT,
this);
+        waitForOne(zk_client, ZooKeeper.States.CONNECTED);
+        zk_client.create(path, data, ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
 
-        Map<String, Object> values = MetricsUtils.currentServerMetrics();
         // there are 4 followers, each received two proposals, one for leader election, one
for the create request
-        Assert.assertEquals(8L, values.get("learner_proposal_received_count"));
-        Assert.assertEquals(8L, values.get("cnt_proposal_latency"));
-        Assert.assertThat((long)values.get("min_proposal_latency"), greaterThan(0L));
-        Assert.assertEquals(10L, values.get("cnt_proposal_ack_creation_latency"));
-        Assert.assertThat((long)values.get("min_proposal_ack_creation_latency"), greaterThan(0L));
+        waitForMetric("learner_proposal_received_count", is(8L));
+        waitForMetric("cnt_proposal_latency", is(8L));
+        waitForMetric("min_proposal_latency", greaterThanOrEqualTo(0L));
+        waitForMetric("cnt_proposal_ack_creation_latency", is(10L));
+        waitForMetric("min_proposal_ack_creation_latency", greaterThanOrEqualTo(0L));
 
         // there are five learners, each received two commits, one for leader election, one
for the create request
-        Assert.assertEquals(10L, values.get("learner_commit_received_count"));
-        Assert.assertEquals(10L, values.get("cnt_commit_propagation_latency"));
-        Assert.assertThat((long)values.get("min_commit_propagation_latency"), greaterThan(0L));
+        waitForMetric("learner_commit_received_count", is(10L));
+        waitForMetric("cnt_commit_propagation_latency", is(10L));
+        waitForMetric("min_commit_propagation_latency", greaterThanOrEqualTo(0L));
+    }
+
+    private void waitForMetric(final String metricKey, final Matcher<Long> matcher)
throws InterruptedException {
+        final String errorMessage = String.format("unable to match on metric: %s", metricKey);
+        waitFor(errorMessage,
+                () -> {
+                    long actual = (long) MetricsUtils.currentServerMetrics().get(metricKey);
+                    if(!matcher.matches(actual)) {
+                        LOG.info(String.format("match failed on %s, actual value: %d", metricKey,
actual));
+                        return false;
+                    }
+                    return true;
+                },
+                TIMEOUT_SECONDS);
+    }
+
+    @After
+    public void tearDown() throws Exception {
+        zk_client.close();
+        for(int i = 0; i < SERVER_COUNT; i++) {
+            mt[i].shutdown();
+        }
     }
 }


Mime
View raw message