From common-commits-return-90422-archive-asf-public=cust-asf.ponee.io@hadoop.apache.org Tue Nov 6 19:05:51 2018 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx-eu-01.ponee.io (Postfix) with SMTP id 6F3B718067E for ; Tue, 6 Nov 2018 19:05:50 +0100 (CET) Received: (qmail 64137 invoked by uid 500); 6 Nov 2018 18:05:37 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 62491 invoked by uid 99); 6 Nov 2018 18:05:37 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 06 Nov 2018 18:05:37 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id E9876E1233; Tue, 6 Nov 2018 18:05:35 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: brahma@apache.org To: common-commits@hadoop.apache.org Date: Tue, 06 Nov 2018 18:06:07 -0000 Message-Id: <33588ba2ad924905ac93cf9c47dce328@git.apache.org> In-Reply-To: References: X-Mailer: ASF-Git Admin Mailer Subject: [33/48] hadoop git commit: HDDS-797. If DN is started before SCM, it does not register. Contributed by Hanisha Koneru. HDDS-797. If DN is started before SCM, it does not register. Contributed by Hanisha Koneru. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/c8ca1747 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/c8ca1747 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/c8ca1747 Branch: refs/heads/HDFS-13532 Commit: c8ca1747c08d905cdefaa5566dd58d770a6b71bd Parents: 15df2e7 Author: Arpit Agarwal Authored: Mon Nov 5 09:40:00 2018 -0800 Committer: Arpit Agarwal Committed: Mon Nov 5 09:40:00 2018 -0800 ---------------------------------------------------------------------- .../states/endpoint/VersionEndpointTask.java | 79 +++++++++++--------- .../hadoop/ozone/TestMiniOzoneCluster.java | 52 ++++++++++++- 2 files changed, 94 insertions(+), 37 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/c8ca1747/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java ---------------------------------------------------------------------- diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java index 79fa174..2d00da8 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java @@ -64,50 +64,57 @@ public class VersionEndpointTask implements public EndpointStateMachine.EndPointStates call() throws Exception { rpcEndPoint.lock(); try{ - SCMVersionResponseProto versionResponse = - rpcEndPoint.getEndPoint().getVersion(null); - VersionResponse response = VersionResponse.getFromProtobuf( - versionResponse); - rpcEndPoint.setVersion(response); + if (rpcEndPoint.getState().equals( + EndpointStateMachine.EndPointStates.GETVERSION)) { + SCMVersionResponseProto versionResponse = + rpcEndPoint.getEndPoint().getVersion(null); + VersionResponse response = VersionResponse.getFromProtobuf( + versionResponse); + rpcEndPoint.setVersion(response); - String scmId = response.getValue(OzoneConsts.SCM_ID); - String clusterId = response.getValue(OzoneConsts.CLUSTER_ID); + String scmId = response.getValue(OzoneConsts.SCM_ID); + String clusterId = response.getValue(OzoneConsts.CLUSTER_ID); - // Check volumes - VolumeSet volumeSet = ozoneContainer.getVolumeSet(); - volumeSet.writeLock(); - try { - Map volumeMap = volumeSet.getVolumeMap(); + // Check volumes + VolumeSet volumeSet = ozoneContainer.getVolumeSet(); + volumeSet.writeLock(); + try { + Map volumeMap = volumeSet.getVolumeMap(); - Preconditions.checkNotNull(scmId, "Reply from SCM: scmId cannot be " + - "null"); - Preconditions.checkNotNull(clusterId, "Reply from SCM: clusterId " + - "cannot be null"); + Preconditions.checkNotNull(scmId, "Reply from SCM: scmId cannot be " + + "null"); + Preconditions.checkNotNull(clusterId, "Reply from SCM: clusterId " + + "cannot be null"); - // If version file does not exist create version file and also set scmId - for (Map.Entry entry : volumeMap.entrySet()) { - HddsVolume hddsVolume = entry.getValue(); - boolean result = HddsVolumeUtil.checkVolume(hddsVolume, scmId, - clusterId, LOG); - if (!result) { - volumeSet.failVolume(hddsVolume.getHddsRootDir().getPath()); + // If version file does not exist create version file and also set scmId + + for (Map.Entry entry : volumeMap.entrySet()) { + HddsVolume hddsVolume = entry.getValue(); + boolean result = HddsVolumeUtil.checkVolume(hddsVolume, scmId, + clusterId, LOG); + if (!result) { + volumeSet.failVolume(hddsVolume.getHddsRootDir().getPath()); + } } + if (volumeSet.getVolumesList().size() == 0) { + // All volumes are in inconsistent state + throw new DiskOutOfSpaceException("All configured Volumes are in " + + "Inconsistent State"); + } + } finally { + volumeSet.writeUnlock(); } - if (volumeSet.getVolumesList().size() == 0) { - // All volumes are in inconsistent state - throw new DiskOutOfSpaceException("All configured Volumes are in " + - "Inconsistent State"); - } - } finally { - volumeSet.writeUnlock(); - } - ozoneContainer.getDispatcher().setScmId(scmId); + ozoneContainer.getDispatcher().setScmId(scmId); - EndpointStateMachine.EndPointStates nextState = - rpcEndPoint.getState().getNextState(); - rpcEndPoint.setState(nextState); - rpcEndPoint.zeroMissedCount(); + EndpointStateMachine.EndPointStates nextState = + rpcEndPoint.getState().getNextState(); + rpcEndPoint.setState(nextState); + rpcEndPoint.zeroMissedCount(); + } else { + LOG.debug("Cannot execute GetVersion task as endpoint state machine " + + "is in {} state", rpcEndPoint.getState()); + } } catch (DiskOutOfSpaceException ex) { rpcEndPoint.setState(EndpointStateMachine.EndPointStates.SHUTDOWN); } catch(IOException ex) { http://git-wip-us.apache.org/repos/asf/hadoop/blob/c8ca1747/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestMiniOzoneCluster.java ---------------------------------------------------------------------- diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestMiniOzoneCluster.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestMiniOzoneCluster.java index 0ca4749..3083660 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestMiniOzoneCluster.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestMiniOzoneCluster.java @@ -23,9 +23,13 @@ import org.apache.hadoop.hdds.HddsConfigKeys; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.scm.pipeline.PipelineID; +import org.apache.hadoop.hdds.scm.server.StorageContainerManager; import org.apache.hadoop.ozone.container.common.SCMTestUtils; import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils; -import org.apache.hadoop.ozone.container.common.statemachine.DatanodeStateMachine; +import org.apache.hadoop.ozone.container.common.statemachine + .DatanodeStateMachine; +import org.apache.hadoop.ozone.container.common.statemachine + .EndpointStateMachine; import org.apache.hadoop.ozone.container.ozoneimpl.TestOzoneContainer; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.scm.TestUtils; @@ -214,4 +218,50 @@ public class TestMiniOzoneCluster { out.write("malformed".getBytes()); out.close(); } + + /** + * Test that a DN can register with SCM even if it was started before the SCM. + * @throws Exception + */ + @Test (timeout = 300_000) + public void testDNstartAfterSCM() throws Exception { + // Start a cluster with 1 DN + cluster = MiniOzoneCluster.newBuilder(conf) + .setNumDatanodes(1) + .build(); + cluster.waitForClusterToBeReady(); + + // Stop the SCM + StorageContainerManager scm = cluster.getStorageContainerManager(); + scm.stop(); + + // Restart DN + cluster.restartHddsDatanode(0, false); + + // DN should be in GETVERSION state till the SCM is restarted. + // Check DN endpoint state for 20 seconds + DatanodeStateMachine dnStateMachine = cluster.getHddsDatanodes().get(0) + .getDatanodeStateMachine(); + for (int i = 0; i < 20; i++) { + for (EndpointStateMachine endpoint : + dnStateMachine.getConnectionManager().getValues()) { + Assert.assertEquals( + EndpointStateMachine.EndPointStates.GETVERSION, + endpoint.getState()); + } + Thread.sleep(1000); + } + + // DN should successfully register with the SCM after SCM is restarted. + // Restart the SCM + cluster.restartStorageContainerManager(); + // Wait for DN to register + cluster.waitForClusterToBeReady(); + // DN should be in HEARTBEAT state after registering with the SCM + for (EndpointStateMachine endpoint : + dnStateMachine.getConnectionManager().getValues()) { + Assert.assertEquals(EndpointStateMachine.EndPointStates.HEARTBEAT, + endpoint.getState()); + } + } } --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org For additional commands, e-mail: common-commits-help@hadoop.apache.org