hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From bha...@apache.org
Subject [hadoop] branch trunk updated: HDDS-1187. Healthy pipeline Chill Mode rule to consider only pipelines with replication factor three.
Date Fri, 01 Mar 2019 01:03:45 GMT
This is an automated email from the ASF dual-hosted git repository.

bharat pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/trunk by this push:
     new eae8819  HDDS-1187.  Healthy pipeline Chill Mode rule to consider only pipelines
with replication factor three.
eae8819 is described below

commit eae8819fd2acb7474190b64340b82cba09577810
Author: Bharat Viswanadham <bharat@apache.org>
AuthorDate: Thu Feb 28 17:03:39 2019 -0800

    HDDS-1187.  Healthy pipeline Chill Mode rule to consider only pipelines with replication
factor three.
---
 .../chillmode/HealthyPipelineChillModeRule.java    | 39 ++++++++---
 .../TestHealthyPipelineChillModeRule.java          | 77 ++++++++++++++++++++++
 .../scm/chillmode/TestSCMChillModeManager.java     | 26 ++++----
 3 files changed, 120 insertions(+), 22 deletions(-)

diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/chillmode/HealthyPipelineChillModeRule.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/chillmode/HealthyPipelineChillModeRule.java
index 07088ca..3f475b8 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/chillmode/HealthyPipelineChillModeRule.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/chillmode/HealthyPipelineChillModeRule.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.hdds.scm.chillmode;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdds.HddsConfigKeys;
+import org.apache.hadoop.hdds.protocol.DatanodeDetails;
 import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
 import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.PipelineReport;
 import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.PipelineReportsProto;
@@ -34,6 +35,9 @@ import com.google.common.base.Preconditions;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.util.HashSet;
+import java.util.Set;
+
 /**
  * Class defining Chill mode exit criteria for Pipelines.
  *
@@ -45,12 +49,14 @@ public class HealthyPipelineChillModeRule
     implements ChillModeExitRule<PipelineReportFromDatanode>,
     EventHandler<PipelineReportFromDatanode> {
 
-  private static final Logger LOG =
+  public static final Logger LOG =
       LoggerFactory.getLogger(HealthyPipelineChillModeRule.class);
   private final PipelineManager pipelineManager;
   private final SCMChillModeManager chillModeManager;
   private final int healthyPipelineThresholdCount;
   private int currentHealthyPipelineCount = 0;
+  private final Set<DatanodeDetails> processedDatanodeDetails =
+      new HashSet<>();
 
   HealthyPipelineChillModeRule(PipelineManager pipelineManager,
       SCMChillModeManager manager, Configuration configuration) {
@@ -71,7 +77,7 @@ public class HealthyPipelineChillModeRule
     // On a fresh installed cluster, there will be zero pipelines in the SCM
     // pipeline DB.
     healthyPipelineThresholdCount =
-        (int) Math.ceil((healthyPipelinesPercent / 100) *  pipelineCount);
+        (int) Math.ceil(healthyPipelinesPercent * pipelineCount);
 
     LOG.info(" Total pipeline count is {}, healthy pipeline " +
         "threshold count is {}", pipelineCount, healthyPipelineThresholdCount);
@@ -101,7 +107,8 @@ public class HealthyPipelineChillModeRule
         continue;
       }
 
-      if (pipeline.getPipelineState() == Pipeline.PipelineState.OPEN) {
+      if (pipeline.getFactor() == HddsProtos.ReplicationFactor.THREE &&
+          pipeline.getPipelineState() == Pipeline.PipelineState.OPEN) {
         // If the pipeline is open state mean, all 3 datanodes are reported
         // for this pipeline.
         currentHealthyPipelineCount++;
@@ -125,14 +132,26 @@ public class HealthyPipelineChillModeRule
       return;
     }
 
-    // Process pipeline report from datanode
-    process(pipelineReportFromDatanode);
 
-    if (chillModeManager.getInChillMode()) {
-      SCMChillModeManager.getLogger().info(
-          "SCM in chill mode. Healthy pipelines reported count is {}, " +
-              "required healthy pipeline reported count is {}",
-          currentHealthyPipelineCount, healthyPipelineThresholdCount);
+    // When SCM is in chill mode for long time, already registered
+    // datanode can send pipeline report again, then pipeline handler fires
+    // processed report event, we should not consider this pipeline report
+    // from datanode again during threshold calculation.
+    DatanodeDetails dnDetails = pipelineReportFromDatanode.getDatanodeDetails();
+    if (!processedDatanodeDetails.contains(
+        pipelineReportFromDatanode.getDatanodeDetails())) {
+
+      // Process pipeline report from datanode
+      process(pipelineReportFromDatanode);
+
+      if (chillModeManager.getInChillMode()) {
+        SCMChillModeManager.getLogger().info(
+            "SCM in chill mode. Healthy pipelines reported count is {}, " +
+                "required healthy pipeline reported count is {}",
+            currentHealthyPipelineCount, healthyPipelineThresholdCount);
+      }
+
+      processedDatanodeDetails.add(dnDetails);
     }
 
     if (validate()) {
diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/chillmode/TestHealthyPipelineChillModeRule.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/chillmode/TestHealthyPipelineChillModeRule.java
index adfa73f..61fbf19 100644
--- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/chillmode/TestHealthyPipelineChillModeRule.java
+++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/chillmode/TestHealthyPipelineChillModeRule.java
@@ -38,6 +38,7 @@ import org.apache.hadoop.hdds.server.events.EventQueue;
 import org.apache.hadoop.test.GenericTestUtils;
 import org.junit.Assert;
 import org.junit.Test;
+import org.slf4j.LoggerFactory;
 
 import java.io.File;
 import java.util.ArrayList;
@@ -153,6 +154,82 @@ public class TestHealthyPipelineChillModeRule {
   }
 
 
+  @Test
+  public void testHealthyPipelineChillModeRuleWithMixedPipelines()
+      throws Exception {
+
+    String storageDir = GenericTestUtils.getTempPath(
+        TestHealthyPipelineChillModeRule.class.getName() + UUID.randomUUID());
+
+    try {
+      EventQueue eventQueue = new EventQueue();
+      List<ContainerInfo> containers = new ArrayList<>();
+      containers.addAll(HddsTestUtils.getContainerInfo(1));
+
+      OzoneConfiguration config = new OzoneConfiguration();
+
+      // In Mock Node Manager, first 8 nodes are healthy, next 2 nodes are
+      // stale and last one is dead, and this repeats. So for a 12 node, 9
+      // healthy, 2 stale and one dead.
+      MockNodeManager nodeManager = new MockNodeManager(true, 12);
+      config.set(HddsConfigKeys.OZONE_METADATA_DIRS, storageDir);
+      // enable pipeline check
+      config.setBoolean(
+          HddsConfigKeys.HDDS_SCM_CHILLMODE_PIPELINE_AVAILABILITY_CHECK, true);
+
+
+      PipelineManager pipelineManager = new SCMPipelineManager(config,
+          nodeManager, eventQueue);
+
+      // Create 3 pipelines
+      Pipeline pipeline1 =
+          pipelineManager.createPipeline(HddsProtos.ReplicationType.RATIS,
+              HddsProtos.ReplicationFactor.ONE);
+      Pipeline pipeline2 =
+          pipelineManager.createPipeline(HddsProtos.ReplicationType.RATIS,
+              HddsProtos.ReplicationFactor.THREE);
+      Pipeline pipeline3 =
+          pipelineManager.createPipeline(HddsProtos.ReplicationType.RATIS,
+              HddsProtos.ReplicationFactor.THREE);
+
+
+      SCMChillModeManager scmChillModeManager = new SCMChillModeManager(
+          config, containers, pipelineManager, eventQueue);
+
+      HealthyPipelineChillModeRule healthyPipelineChillModeRule =
+          scmChillModeManager.getHealthyPipelineChillModeRule();
+
+
+      // No datanodes have sent pipelinereport from datanode
+      Assert.assertFalse(healthyPipelineChillModeRule.validate());
+
+
+      GenericTestUtils.LogCapturer logCapturer =
+          GenericTestUtils.LogCapturer.captureLogs(LoggerFactory.getLogger(
+              SCMChillModeManager.class));
+
+      // fire event with pipeline report with ratis type and factor 1
+      // pipeline, validate() should return false
+      firePipelineEvent(pipeline1, eventQueue);
+
+      GenericTestUtils.waitFor(() -> logCapturer.getOutput().contains(
+          "reported count is 0"),
+          1000, 5000);
+      Assert.assertFalse(healthyPipelineChillModeRule.validate());
+
+      firePipelineEvent(pipeline2, eventQueue);
+      firePipelineEvent(pipeline3, eventQueue);
+
+      GenericTestUtils.waitFor(() -> healthyPipelineChillModeRule.validate(),
+          1000, 5000);
+
+    } finally {
+      FileUtil.fullyDelete(new File(storageDir));
+    }
+
+  }
+
+
   private void firePipelineEvent(Pipeline pipeline, EventQueue eventQueue) {
     PipelineReportsProto.Builder reportBuilder = PipelineReportsProto
         .newBuilder();
diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/chillmode/TestSCMChillModeManager.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/chillmode/TestSCMChillModeManager.java
index 7c8cafa..faf8fee 100644
--- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/chillmode/TestSCMChillModeManager.java
+++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/chillmode/TestSCMChillModeManager.java
@@ -237,7 +237,7 @@ public class TestSCMChillModeManager {
     String storageDir = GenericTestUtils.getTempPath(
         TestSCMChillModeManager.class.getName() + UUID.randomUUID());
     try{
-      MockNodeManager nodeManager = new MockNodeManager(true, 1);
+      MockNodeManager nodeManager = new MockNodeManager(true, 3);
       config.set(HddsConfigKeys.OZONE_METADATA_DIRS, storageDir);
       // enable pipeline check
       config.setBoolean(
@@ -245,6 +245,15 @@ public class TestSCMChillModeManager {
 
       PipelineManager pipelineManager = new SCMPipelineManager(config,
           nodeManager, queue);
+
+      Pipeline pipeline = pipelineManager.createPipeline(
+          HddsProtos.ReplicationType.RATIS,
+          HddsProtos.ReplicationFactor.THREE);
+      PipelineReportsProto.Builder reportBuilder = PipelineReportsProto
+          .newBuilder();
+      reportBuilder.addPipelineReport(PipelineReport.newBuilder()
+          .setPipelineID(pipeline.getId().getProtobuf()));
+
       scmChillModeManager = new SCMChillModeManager(
           config, containers, pipelineManager, queue);
       queue.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
@@ -254,17 +263,10 @@ public class TestSCMChillModeManager {
           HddsTestUtils.createNodeRegistrationContainerReport(containers));
       assertTrue(scmChillModeManager.getInChillMode());
 
-      // simulation a pipeline report to trigger the rule check
-      Pipeline pipeline = pipelineManager.createPipeline(
-          HddsProtos.ReplicationType.STAND_ALONE,
-          HddsProtos.ReplicationFactor.ONE);
-      PipelineReportsProto.Builder reportBuilder = PipelineReportsProto
-          .newBuilder();
-      reportBuilder.addPipelineReport(PipelineReport.newBuilder()
-          .setPipelineID(pipeline.getId().getProtobuf()));
-
-      queue.fireEvent(SCMEvents.PIPELINE_REPORT, new PipelineReportFromDatanode(
-          pipeline.getNodes().get(0), reportBuilder.build()));
+      // Trigger the processed pipeline report event
+      queue.fireEvent(SCMEvents.PROCESSED_PIPELINE_REPORT,
+          new PipelineReportFromDatanode(pipeline.getNodes().get(0),
+              reportBuilder.build()));
 
       GenericTestUtils.waitFor(() -> {
         return !scmChillModeManager.getInChillMode();


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org


Mime
View raw message