zookeeper-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From afine <...@git.apache.org>
Subject [GitHub] zookeeper pull request #453: ZOOKEEPER-2845: Apply commit log when restartin...
Date Fri, 16 Feb 2018 22:40:04 GMT
Github user afine commented on a diff in the pull request:

    https://github.com/apache/zookeeper/pull/453#discussion_r168887935
  
    --- Diff: src/java/test/org/apache/zookeeper/server/quorum/QuorumPeerMainTest.java ---
    @@ -888,4 +923,103 @@ public void testWithOnlyMinSessionTimeout() throws Exception {
                     maxSessionTimeOut, quorumPeer.getMaxSessionTimeout());
         }
     
    +    @Test
    +    public void testFailedTxnAsPartOfQuorumLoss() throws Exception {
    +        // 1. start up server and wait for leader election to finish
    +        ClientBase.setupTestEnv();
    +        final int SERVER_COUNT = 3;
    +        servers = LaunchServers(SERVER_COUNT);
    +
    +        waitForAll(servers, States.CONNECTED);
    +
    +        // we need to shutdown and start back up to make sure that the create session
isn't the first transaction since
    +        // that is rather innocuous.
    +        servers.shutDownAllServers();
    +        waitForAll(servers, States.CONNECTING);
    +        servers.restartAllServersAndClients(this);
    +        waitForAll(servers, States.CONNECTED);
    +
    +        // 2. kill all followers
    +        int leader = servers.findLeader();
    +        Map<Long, Proposal> outstanding =  servers.mt[leader].main.quorumPeer.leader.outstandingProposals;
    +        // increase the tick time to delay the leader going to looking
    +        servers.mt[leader].main.quorumPeer.tickTime = 10000;
    +        LOG.warn("LEADER {}", leader);
    +
    +        for (int i = 0; i < SERVER_COUNT; i++) {
    +            if (i != leader) {
    +                servers.mt[i].shutdown();
    +            }
    +        }
    +
    +        // 3. start up the followers to form a new quorum
    +        for (int i = 0; i < SERVER_COUNT; i++) {
    +            if (i != leader) {
    +                servers.mt[i].start();
    +            }
    +        }
    +
    +        // 4. wait one of the follower to be the new leader
    +        for (int i = 0; i < SERVER_COUNT; i++) {
    +            if (i != leader) {
    +                // Recreate a client session since the previous session was not persisted.
    +                servers.restartClient(i, this);
    +                waitForOne(servers.zk[i], States.CONNECTED);
    +            }
    +        }
    +
    +        // 5. send a create request to old leader and make sure it's synced to disk,
    +        //    which means it acked from itself
    +        try {
    +            servers.zk[leader].create("/zk" + leader, "zk".getBytes(), Ids.OPEN_ACL_UNSAFE,
    +                CreateMode.PERSISTENT);
    +            Assert.fail("create /zk" + leader + " should have failed");
    +        } catch (KeeperException e) {
    +        }
    +
    +        // just make sure that we actually did get it in process at the
    +        // leader
    +        Assert.assertEquals(1, outstanding.size());
    +        Proposal p = outstanding.values().iterator().next();
    +        Assert.assertEquals(OpCode.create, p.request.getHdr().getType());
    +
    +        // make sure it has a chance to write it to disk
    +        int sleepTime = 0;
    +        Long longLeader = new Long(leader);
    +        while (!p.qvAcksetPairs.get(0).getAckset().contains(longLeader)) {
    +            if (sleepTime > 2000) {
    +                Assert.fail("Transaction not synced to disk within 1 second " + p.qvAcksetPairs.get(0).getAckset()
    +                    + " expected " + leader);
    +            }
    +            Thread.sleep(100);
    +            sleepTime += 100;
    +        }
    +
    +        // 6. wait for the leader to quit due to not enough followers and come back up
as a part of the new quorum
    +        sleepTime = 0;
    +        Follower f = servers.mt[leader].main.quorumPeer.follower;
    +        while (f == null || !f.isRunning()) {
    +            if (sleepTime > 10_000) {
    +                Assert.fail("Took too long for old leader to time out " + servers.mt[leader].main.quorumPeer.getPeerState());
    +            }
    +            Thread.sleep(100);
    +            sleepTime += 100;
    +            f = servers.mt[leader].main.quorumPeer.follower;
    +        }
    +        servers.mt[leader].shutdown();
    --- End diff --
    
    why do we need this?


---

Mime
View raw message