hadoop-ozone-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sodonn...@apache.org
Subject [hadoop-ozone] branch HDDS-1880-Decom updated: HDDS-2728. Remove methods of internal representation from DatanodeAdminMontor interface (#355)
Date Fri, 13 Dec 2019 20:17:45 GMT
This is an automated email from the ASF dual-hosted git repository.

sodonnell pushed a commit to branch HDDS-1880-Decom
in repository https://gitbox.apache.org/repos/asf/hadoop-ozone.git


The following commit(s) were added to refs/heads/HDDS-1880-Decom by this push:
     new f66c40c  HDDS-2728. Remove methods of internal representation from DatanodeAdminMontor
interface  (#355)
f66c40c is described below

commit f66c40cce184dfaf099c6f1ffd3f3ec4d306d923
Author: Elek, Márton <elek@users.noreply.github.com>
AuthorDate: Fri Dec 13 21:17:34 2019 +0100

    HDDS-2728. Remove methods of internal representation from DatanodeAdminMontor interface
 (#355)
---
 .../hadoop/hdds/scm/node/DatanodeAdminMonitor.java | 382 +--------------------
 ...nMonitor.java => DatanodeAdminMonitorImpl.java} | 102 +++---
 .../scm/node/DatanodeAdminMonitorInterface.java    |  43 ---
 .../hdds/scm/node/NodeDecommissionManager.java     |  17 +-
 .../hdds/scm/server/StorageContainerManager.java   |   2 +-
 .../hdds/scm/node/TestDatanodeAdminMonitor.java    |   8 +-
 .../scm/node/TestDatanodeAdminNodeDetails.java     |   2 -
 .../hdds/scm/node/TestNodeDecommissionManager.java |   2 +-
 8 files changed, 61 insertions(+), 497 deletions(-)

diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitor.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitor.java
index 6aabd62..9ead66f 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitor.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitor.java
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * <p>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p>
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -17,380 +17,16 @@
  */
 package org.apache.hadoop.hdds.scm.node;
 
-import com.google.common.annotations.VisibleForTesting;
-import org.apache.hadoop.hdds.conf.OzoneConfiguration;
 import org.apache.hadoop.hdds.protocol.DatanodeDetails;
-import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
-import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState;
-import org.apache.hadoop.hdds.scm.container.ContainerID;
-import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException;
-import org.apache.hadoop.hdds.scm.container.ContainerReplicaCount;
-import org.apache.hadoop.hdds.scm.container.ReplicationManager;
-import org.apache.hadoop.hdds.scm.events.SCMEvents;
-import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException;
-import org.apache.hadoop.hdds.scm.pipeline.PipelineID;
-import org.apache.hadoop.hdds.scm.pipeline.PipelineManager;
-import org.apache.hadoop.hdds.server.events.EventPublisher;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.util.*;
 
 /**
- * Monitor thread which watches for nodes to be decommissioned, recommissioned
- * or placed into maintenance. Newly added nodes are queued in pendingNodes
- * and recommissoned nodes are queued in cancelled nodes. On each monitor
- * 'tick', the cancelled nodes are processed and removed from the monitor.
- * Then any pending nodes are added to the trackedNodes set, where they stay
- * until decommission or maintenance has ended.
- *
- * Once an node is placed into tracked nodes, it goes through a workflow where
- * the following happens:
- *
- * 1. First an event is fired to close any pipelines on the node, which will
- *    also close any containers.
- * 2. Next the containers on the node are obtained and checked to see if new
- *    replicas are needed. If so, the new replicas are scheduled.
- * 3. After scheduling replication, the node remains pending until replication
- *    has completed.
- * 4. At this stage the node will complete decommission or enter maintenance.
- * 5. Maintenance nodes will remain tracked by this monitor until maintenance
- *    is manually ended, or the maintenance window expires.
+ * Interface used by the DatanodeAdminMonitor, which can be used to
+ * decommission or recommission nodes and take them in and out of maintenance.
  */
-public class DatanodeAdminMonitor implements DatanodeAdminMonitorInterface {
-
-  private OzoneConfiguration conf;
-  private EventPublisher eventQueue;
-  private NodeManager nodeManager;
-  private PipelineManager pipelineManager;
-  private ReplicationManager replicationManager;
-  private Queue<DatanodeAdminNodeDetails> pendingNodes = new ArrayDeque();
-  private Queue<DatanodeAdminNodeDetails> cancelledNodes = new ArrayDeque();
-  private Set<DatanodeAdminNodeDetails> trackedNodes = new HashSet<>();
-
-  private static final Logger LOG =
-      LoggerFactory.getLogger(DatanodeAdminMonitor.class);
-
-  public DatanodeAdminMonitor(OzoneConfiguration config) {
-    conf = config;
-  }
-
-  @Override
-  public void setConf(OzoneConfiguration config) {
-    conf = config;
-  }
-
-  @Override
-  public void setEventQueue(EventPublisher eventQueue) {
-    this.eventQueue = eventQueue;
-  }
-
-  @Override
-  public void setNodeManager(NodeManager nm) {
-    nodeManager = nm;
-  }
-
-  @Override
-  public void setPipelineManager(PipelineManager pm) {
-    pipelineManager = pm;
-  }
-
-  @Override
-  public void setReplicationManager(ReplicationManager rm) {
-    replicationManager = rm;
-  }
-
-  /**
-   * Add a node to the decommission or maintenance workflow. The node will be
-   * queued and added to the workflow after a defined interval.
-   *
-   * @param dn The datanode to move into an admin state
-   * @param endInHours For nodes going into maintenance, the number of hours
-   *                   from now for maintenance to automatically end. Ignored
-   *                   for decommissioning nodes.
-   */
-  @Override
-  public synchronized void startMonitoring(DatanodeDetails dn, int endInHours) {
-    DatanodeAdminNodeDetails nodeDetails =
-        new DatanodeAdminNodeDetails(dn, endInHours);
-    cancelledNodes.remove(nodeDetails);
-    pendingNodes.add(nodeDetails);
-  }
-
-  /**
-   * Remove a node from the decommission or maintenance workflow, and return it
-   * to service. The node will be queued and removed from decommission or
-   * maintenance after a defined interval.
-   * @param dn The datanode for which to stop decommission or maintenance.
-   */
-  @Override
-  public synchronized void stopMonitoring(DatanodeDetails dn) {
-    DatanodeAdminNodeDetails nodeDetails = new DatanodeAdminNodeDetails(dn, 0);
-    pendingNodes.remove(nodeDetails);
-    cancelledNodes.add(nodeDetails);
-  }
-
-  /**
-   * Run an iteration of the monitor. This is the main run loop, and performs
-   * the following checks:
-   *
-   * 1. Check for any cancelled nodes and process them
-   * 2. Check for any newly added nodes and add them to the workflow
-   * 3. Perform checks on the transitioning nodes and move them through the
-   *    workflow until they have completed decommission or maintenance
-   */
-  @Override
-  public void run() {
-    try {
-      synchronized (this) {
-        processCancelledNodes();
-        processPendingNodes();
-      }
-      processTransitioningNodes();
-      if (trackedNodes.size() > 0 || pendingNodes.size() > 0) {
-        LOG.info("There are {} nodes tracked for decommission and "+
-            "maintenance. {} pending nodes.",
-            trackedNodes.size(), pendingNodes.size());
-      }
-    } catch (Exception e) {
-      LOG.error("Caught an error in the DatanodeAdminMonitor", e);
-      // Intentionally do not re-throw, as if we do the monitor thread
-      // will not get rescheduled.
-    }
-  }
-
-  @Override
-  public int getPendingCount() {
-    return pendingNodes.size();
-  }
-
-  @Override
-  public int getCancelledCount() {
-    return cancelledNodes.size();
-  }
-
-  @Override
-  public int getTrackedNodeCount() {
-    return trackedNodes.size();
-  }
-
-  @VisibleForTesting
-  public Set<DatanodeAdminNodeDetails> getTrackedNodes() {
-    return trackedNodes;
-  }
-
-  private void processCancelledNodes() {
-    while(!cancelledNodes.isEmpty()) {
-      DatanodeAdminNodeDetails dn = cancelledNodes.poll();
-      try {
-        stopTrackingNode(dn);
-        putNodeBackInService(dn);
-        LOG.info("Recommissioned node {}", dn.getDatanodeDetails());
-      } catch (NodeNotFoundException e) {
-        LOG.warn("Failed processing the cancel admin request for {}",
-            dn.getDatanodeDetails(), e);
-      }
-      // TODO - fire event to bring node back into service?
-    }
-  }
-
-  private void processPendingNodes() {
-    while(!pendingNodes.isEmpty()) {
-      startTrackingNode(pendingNodes.poll());
-    }
-  }
-
-  private void processTransitioningNodes() {
-    Iterator<DatanodeAdminNodeDetails> iterator = trackedNodes.iterator();
-    while (iterator.hasNext()) {
-      DatanodeAdminNodeDetails dn = iterator.next();
-      try {
-        NodeStatus status = getNodeStatus(dn.getDatanodeDetails());
-
-        if (!shouldContinueWorkflow(dn, status)) {
-          abortWorkflow(dn);
-          iterator.remove();
-          continue;
-        }
-
-        if (status.isMaintenance()) {
-          if (dn.shouldMaintenanceEnd()) {
-            completeMaintenance(dn);
-            iterator.remove();
-            continue;
-          }
-        }
-
-        if (status.isDecommissioning() || status.isEnteringMaintenance()) {
-          if (checkPipelinesClosedOnNode(dn)
-              && checkContainersReplicatedOnNode(dn)) {
-            // CheckContainersReplicatedOnNode may take a short time to run
-            // so after it completes, re-get the nodestatus to check the health
-            // and ensure the state is still good to continue
-            status = getNodeStatus(dn.getDatanodeDetails());
-            if (status.isDead()) {
-              LOG.warn("Datanode {} is dead and the admin workflow cannot "+
-                  "continue. The node will be put back to IN_SERVICE and "+
-                  "handled as a dead node", dn);
-              putNodeBackInService(dn);
-              iterator.remove();
-            } else if (status.isDecommissioning()) {
-              completeDecommission(dn);
-              iterator.remove();
-            } else if (status.isEnteringMaintenance()) {
-              putIntoMaintenance(dn);
-            }
-          }
-        }
-
-      } catch (NodeNotFoundException e) {
-        LOG.error("An unexpected error occurred processing datanode {}. " +
-            "Aborting the admin workflow", dn.getDatanodeDetails(), e);
-        abortWorkflow(dn);
-        iterator.remove();
-      }
-    }
-  }
-
-  /**
-   * Checks if a node is in an unexpected state or has gone dead while
-   * decommissioning or entering maintenance. If the node is not in a valid
-   * state to continue the admin workflow, return false, otherwise return true.
-   * @param dn The Datanode for which to check the current state
-   * @param nodeStatus The current NodeStatus for the datanode
-   * @return True if admin can continue, false otherwise
-   */
-  private boolean shouldContinueWorkflow(DatanodeAdminNodeDetails dn,
-      NodeStatus nodeStatus) {
-    if (!nodeStatus.isDecommission() && !nodeStatus.isMaintenance()) {
-      LOG.warn("Datanode {} has an operational state of {} when it should "+
-              "be undergoing decommission or maintenance. Aborting admin for "+
-              "this node.",
-          dn.getDatanodeDetails(), nodeStatus.getOperationalState());
-      return false;
-    }
-    if (nodeStatus.isDead() && !nodeStatus.isInMaintenance()) {
-      LOG.error("Datanode {} is dead but is not IN_MAINTENANCE. Aborting the "+
-          "admin workflow for this node", dn.getDatanodeDetails());
-      return false;
-    }
-    return true;
-  }
-
-  private boolean checkPipelinesClosedOnNode(DatanodeAdminNodeDetails dn) {
-    DatanodeDetails dnd = dn.getDatanodeDetails();
-    Set<PipelineID> pipelines = nodeManager.getPipelines(dnd);
-    if (pipelines == null || pipelines.size() == 0
-        || dn.shouldMaintenanceEnd()) {
-      return true;
-    } else {
-      LOG.info("Waiting for pipelines to close for {}. There are {} "+
-          "pipelines", dnd, pipelines.size());
-      return false;
-    }
-  }
-
-  private boolean checkContainersReplicatedOnNode(DatanodeAdminNodeDetails dn)
-      throws NodeNotFoundException {
-    int sufficientlyReplicated = 0;
-    int underReplicated = 0;
-    int unhealthy = 0;
-    Set<ContainerID> containers =
-        nodeManager.getContainers(dn.getDatanodeDetails());
-    for(ContainerID cid : containers) {
-      try {
-        ContainerReplicaCount replicaSet =
-            replicationManager.getContainerReplicaCount(cid);
-        if (replicaSet.isSufficientlyReplicated()) {
-          sufficientlyReplicated++;
-        } else {
-          underReplicated++;
-        }
-        if (!replicaSet.isHealthy()) {
-          unhealthy++;
-        }
-      } catch (ContainerNotFoundException e) {
-        LOG.warn("ContainerID {} present in node list for {} but not found "+
-            "in containerManager", cid, dn.getDatanodeDetails());
-      }
-    }
-    dn.setSufficientlyReplicatedContainers(sufficientlyReplicated);
-    dn.setUnderReplicatedContainers(underReplicated);
-    dn.setUnHealthyContainers(unhealthy);
-
-    return underReplicated == 0 && unhealthy == 0;
-  }
-
-  private void completeDecommission(DatanodeAdminNodeDetails dn)
-      throws NodeNotFoundException{
-    setNodeOpState(dn, NodeOperationalState.DECOMMISSIONED);
-    LOG.info("Datanode {} has completed the admin workflow. The operational "+
-        "state has been set to {}", dn.getDatanodeDetails(),
-        NodeOperationalState.DECOMMISSIONED);
-  }
-
-  private void putIntoMaintenance(DatanodeAdminNodeDetails dn)
-      throws NodeNotFoundException {
-    LOG.info("Datanode {} has entered maintenance", dn.getDatanodeDetails());
-    setNodeOpState(dn, NodeOperationalState.IN_MAINTENANCE);
-  }
-
-  private void completeMaintenance(DatanodeAdminNodeDetails dn)
-      throws NodeNotFoundException {
-    // The end state of Maintenance is to put the node back IN_SERVICE, whether
-    // it is dead or not.
-    // TODO - if the node is dead do we trigger a dead node event here or leave
-    //        it to the heartbeat manager?
-    LOG.info("Datanode {} has ended maintenance automatically",
-        dn.getDatanodeDetails());
-    putNodeBackInService(dn);
-  }
-
-  private void startTrackingNode(DatanodeAdminNodeDetails dn) {
-    eventQueue.fireEvent(SCMEvents.START_ADMIN_ON_NODE,
-        dn.getDatanodeDetails());
-    trackedNodes.add(dn);
-  }
-
-  private void stopTrackingNode(DatanodeAdminNodeDetails dn) {
-    trackedNodes.remove(dn);
-  }
-
-  /**
-   * If we encounter an unexpected condition in maintenance, we must abort the
-   * workflow by setting the node operationalState back to IN_SERVICE and then
-   * remove the node from tracking.
-   * @param dn The datanode for which to abort tracking
-   */
-  private void abortWorkflow(DatanodeAdminNodeDetails dn) {
-    try {
-      putNodeBackInService(dn);
-    } catch (NodeNotFoundException e) {
-      LOG.error("Unable to set the node OperationalState for {} while "+
-          "aborting the datanode admin workflow", dn.getDatanodeDetails());
-    }
-  }
-
-  private void putNodeBackInService(DatanodeAdminNodeDetails dn)
-      throws NodeNotFoundException {
-    setNodeOpState(dn, NodeOperationalState.IN_SERVICE);
-  }
+public interface DatanodeAdminMonitor extends Runnable {
 
-  private void setNodeOpState(DatanodeAdminNodeDetails dn,
-      HddsProtos.NodeOperationalState state) throws NodeNotFoundException {
-    nodeManager.setNodeOperationalState(dn.getDatanodeDetails(), state);
-  }
+  void startMonitoring(DatanodeDetails dn, int endInHours);
 
-  // TODO - The nodeManager.getNodeStatus call should really throw
-  //        NodeNotFoundException rather than having to handle it here as all
-  //        registered nodes must have a status.
-  private NodeStatus getNodeStatus(DatanodeDetails dnd)
-      throws NodeNotFoundException {
-    NodeStatus nodeStatus = nodeManager.getNodeStatus(dnd);
-    if (nodeStatus == null) {
-      throw new NodeNotFoundException("Unable to retrieve the nodeStatus");
-    }
-    return nodeStatus;
-  }
+  void stopMonitoring(DatanodeDetails dn);
 
-}
\ No newline at end of file
+}
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitor.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitorImpl.java
similarity index 89%
copy from hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitor.java
copy to hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitorImpl.java
index 6aabd62..931a45e 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitor.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitorImpl.java
@@ -18,6 +18,7 @@
 package org.apache.hadoop.hdds.scm.node;
 
 import com.google.common.annotations.VisibleForTesting;
+
 import org.apache.hadoop.hdds.conf.OzoneConfiguration;
 import org.apache.hadoop.hdds.protocol.DatanodeDetails;
 import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
@@ -29,8 +30,8 @@ import org.apache.hadoop.hdds.scm.container.ReplicationManager;
 import org.apache.hadoop.hdds.scm.events.SCMEvents;
 import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException;
 import org.apache.hadoop.hdds.scm.pipeline.PipelineID;
-import org.apache.hadoop.hdds.scm.pipeline.PipelineManager;
 import org.apache.hadoop.hdds.server.events.EventPublisher;
+
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -43,68 +44,49 @@ import java.util.*;
  * 'tick', the cancelled nodes are processed and removed from the monitor.
  * Then any pending nodes are added to the trackedNodes set, where they stay
  * until decommission or maintenance has ended.
- *
+ * <p>
  * Once an node is placed into tracked nodes, it goes through a workflow where
  * the following happens:
- *
+ * <p>
  * 1. First an event is fired to close any pipelines on the node, which will
- *    also close any containers.
+ * also close any containers.
  * 2. Next the containers on the node are obtained and checked to see if new
- *    replicas are needed. If so, the new replicas are scheduled.
+ * replicas are needed. If so, the new replicas are scheduled.
  * 3. After scheduling replication, the node remains pending until replication
- *    has completed.
+ * has completed.
  * 4. At this stage the node will complete decommission or enter maintenance.
  * 5. Maintenance nodes will remain tracked by this monitor until maintenance
- *    is manually ended, or the maintenance window expires.
+ * is manually ended, or the maintenance window expires.
  */
-public class DatanodeAdminMonitor implements DatanodeAdminMonitorInterface {
+public class DatanodeAdminMonitorImpl implements DatanodeAdminMonitor {
 
   private OzoneConfiguration conf;
   private EventPublisher eventQueue;
   private NodeManager nodeManager;
-  private PipelineManager pipelineManager;
   private ReplicationManager replicationManager;
   private Queue<DatanodeAdminNodeDetails> pendingNodes = new ArrayDeque();
   private Queue<DatanodeAdminNodeDetails> cancelledNodes = new ArrayDeque();
   private Set<DatanodeAdminNodeDetails> trackedNodes = new HashSet<>();
 
   private static final Logger LOG =
-      LoggerFactory.getLogger(DatanodeAdminMonitor.class);
-
-  public DatanodeAdminMonitor(OzoneConfiguration config) {
-    conf = config;
-  }
-
-  @Override
-  public void setConf(OzoneConfiguration config) {
-    conf = config;
-  }
-
-  @Override
-  public void setEventQueue(EventPublisher eventQueue) {
+      LoggerFactory.getLogger(DatanodeAdminMonitorImpl.class);
+
+  public DatanodeAdminMonitorImpl(
+      OzoneConfiguration conf,
+      EventPublisher eventQueue,
+      NodeManager nodeManager,
+      ReplicationManager replicationManager) {
+    this.conf = conf;
     this.eventQueue = eventQueue;
-  }
-
-  @Override
-  public void setNodeManager(NodeManager nm) {
-    nodeManager = nm;
-  }
-
-  @Override
-  public void setPipelineManager(PipelineManager pm) {
-    pipelineManager = pm;
-  }
-
-  @Override
-  public void setReplicationManager(ReplicationManager rm) {
-    replicationManager = rm;
+    this.nodeManager = nodeManager;
+    this.replicationManager = replicationManager;
   }
 
   /**
    * Add a node to the decommission or maintenance workflow. The node will be
    * queued and added to the workflow after a defined interval.
    *
-   * @param dn The datanode to move into an admin state
+   * @param dn         The datanode to move into an admin state
    * @param endInHours For nodes going into maintenance, the number of hours
    *                   from now for maintenance to automatically end. Ignored
    *                   for decommissioning nodes.
@@ -121,6 +103,7 @@ public class DatanodeAdminMonitor implements DatanodeAdminMonitorInterface
{
    * Remove a node from the decommission or maintenance workflow, and return it
    * to service. The node will be queued and removed from decommission or
    * maintenance after a defined interval.
+   *
    * @param dn The datanode for which to stop decommission or maintenance.
    */
   @Override
@@ -133,11 +116,11 @@ public class DatanodeAdminMonitor implements DatanodeAdminMonitorInterface
{
   /**
    * Run an iteration of the monitor. This is the main run loop, and performs
    * the following checks:
-   *
+   * <p>
    * 1. Check for any cancelled nodes and process them
    * 2. Check for any newly added nodes and add them to the workflow
    * 3. Perform checks on the transitioning nodes and move them through the
-   *    workflow until they have completed decommission or maintenance
+   * workflow until they have completed decommission or maintenance
    */
   @Override
   public void run() {
@@ -148,8 +131,8 @@ public class DatanodeAdminMonitor implements DatanodeAdminMonitorInterface
{
       }
       processTransitioningNodes();
       if (trackedNodes.size() > 0 || pendingNodes.size() > 0) {
-        LOG.info("There are {} nodes tracked for decommission and "+
-            "maintenance. {} pending nodes.",
+        LOG.info("There are {} nodes tracked for decommission and " +
+                "maintenance. {} pending nodes.",
             trackedNodes.size(), pendingNodes.size());
       }
     } catch (Exception e) {
@@ -159,17 +142,14 @@ public class DatanodeAdminMonitor implements DatanodeAdminMonitorInterface
{
     }
   }
 
-  @Override
   public int getPendingCount() {
     return pendingNodes.size();
   }
 
-  @Override
   public int getCancelledCount() {
     return cancelledNodes.size();
   }
 
-  @Override
   public int getTrackedNodeCount() {
     return trackedNodes.size();
   }
@@ -180,7 +160,7 @@ public class DatanodeAdminMonitor implements DatanodeAdminMonitorInterface
{
   }
 
   private void processCancelledNodes() {
-    while(!cancelledNodes.isEmpty()) {
+    while (!cancelledNodes.isEmpty()) {
       DatanodeAdminNodeDetails dn = cancelledNodes.poll();
       try {
         stopTrackingNode(dn);
@@ -195,7 +175,7 @@ public class DatanodeAdminMonitor implements DatanodeAdminMonitorInterface
{
   }
 
   private void processPendingNodes() {
-    while(!pendingNodes.isEmpty()) {
+    while (!pendingNodes.isEmpty()) {
       startTrackingNode(pendingNodes.poll());
     }
   }
@@ -229,8 +209,8 @@ public class DatanodeAdminMonitor implements DatanodeAdminMonitorInterface
{
             // and ensure the state is still good to continue
             status = getNodeStatus(dn.getDatanodeDetails());
             if (status.isDead()) {
-              LOG.warn("Datanode {} is dead and the admin workflow cannot "+
-                  "continue. The node will be put back to IN_SERVICE and "+
+              LOG.warn("Datanode {} is dead and the admin workflow cannot " +
+                  "continue. The node will be put back to IN_SERVICE and " +
                   "handled as a dead node", dn);
               putNodeBackInService(dn);
               iterator.remove();
@@ -256,21 +236,22 @@ public class DatanodeAdminMonitor implements DatanodeAdminMonitorInterface
{
    * Checks if a node is in an unexpected state or has gone dead while
    * decommissioning or entering maintenance. If the node is not in a valid
    * state to continue the admin workflow, return false, otherwise return true.
-   * @param dn The Datanode for which to check the current state
+   *
+   * @param dn         The Datanode for which to check the current state
    * @param nodeStatus The current NodeStatus for the datanode
    * @return True if admin can continue, false otherwise
    */
   private boolean shouldContinueWorkflow(DatanodeAdminNodeDetails dn,
       NodeStatus nodeStatus) {
     if (!nodeStatus.isDecommission() && !nodeStatus.isMaintenance()) {
-      LOG.warn("Datanode {} has an operational state of {} when it should "+
-              "be undergoing decommission or maintenance. Aborting admin for "+
+      LOG.warn("Datanode {} has an operational state of {} when it should " +
+              "be undergoing decommission or maintenance. Aborting admin for " +
               "this node.",
           dn.getDatanodeDetails(), nodeStatus.getOperationalState());
       return false;
     }
     if (nodeStatus.isDead() && !nodeStatus.isInMaintenance()) {
-      LOG.error("Datanode {} is dead but is not IN_MAINTENANCE. Aborting the "+
+      LOG.error("Datanode {} is dead but is not IN_MAINTENANCE. Aborting the " +
           "admin workflow for this node", dn.getDatanodeDetails());
       return false;
     }
@@ -284,7 +265,7 @@ public class DatanodeAdminMonitor implements DatanodeAdminMonitorInterface
{
         || dn.shouldMaintenanceEnd()) {
       return true;
     } else {
-      LOG.info("Waiting for pipelines to close for {}. There are {} "+
+      LOG.info("Waiting for pipelines to close for {}. There are {} " +
           "pipelines", dnd, pipelines.size());
       return false;
     }
@@ -297,7 +278,7 @@ public class DatanodeAdminMonitor implements DatanodeAdminMonitorInterface
{
     int unhealthy = 0;
     Set<ContainerID> containers =
         nodeManager.getContainers(dn.getDatanodeDetails());
-    for(ContainerID cid : containers) {
+    for (ContainerID cid : containers) {
       try {
         ContainerReplicaCount replicaSet =
             replicationManager.getContainerReplicaCount(cid);
@@ -310,7 +291,7 @@ public class DatanodeAdminMonitor implements DatanodeAdminMonitorInterface
{
           unhealthy++;
         }
       } catch (ContainerNotFoundException e) {
-        LOG.warn("ContainerID {} present in node list for {} but not found "+
+        LOG.warn("ContainerID {} present in node list for {} but not found " +
             "in containerManager", cid, dn.getDatanodeDetails());
       }
     }
@@ -322,10 +303,10 @@ public class DatanodeAdminMonitor implements DatanodeAdminMonitorInterface
{
   }
 
   private void completeDecommission(DatanodeAdminNodeDetails dn)
-      throws NodeNotFoundException{
+      throws NodeNotFoundException {
     setNodeOpState(dn, NodeOperationalState.DECOMMISSIONED);
-    LOG.info("Datanode {} has completed the admin workflow. The operational "+
-        "state has been set to {}", dn.getDatanodeDetails(),
+    LOG.info("Datanode {} has completed the admin workflow. The operational " +
+            "state has been set to {}", dn.getDatanodeDetails(),
         NodeOperationalState.DECOMMISSIONED);
   }
 
@@ -360,13 +341,14 @@ public class DatanodeAdminMonitor implements DatanodeAdminMonitorInterface
{
    * If we encounter an unexpected condition in maintenance, we must abort the
    * workflow by setting the node operationalState back to IN_SERVICE and then
    * remove the node from tracking.
+   *
    * @param dn The datanode for which to abort tracking
    */
   private void abortWorkflow(DatanodeAdminNodeDetails dn) {
     try {
       putNodeBackInService(dn);
     } catch (NodeNotFoundException e) {
-      LOG.error("Unable to set the node OperationalState for {} while "+
+      LOG.error("Unable to set the node OperationalState for {} while " +
           "aborting the datanode admin workflow", dn.getDatanodeDetails());
     }
   }
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitorInterface.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitorInterface.java
deleted file mode 100644
index cfbbffb..0000000
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitorInterface.java
+++ /dev/null
@@ -1,43 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hdds.scm.node;
-
-import org.apache.hadoop.hdds.conf.OzoneConfiguration;
-import org.apache.hadoop.hdds.protocol.DatanodeDetails;
-import org.apache.hadoop.hdds.scm.container.ReplicationManager;
-import org.apache.hadoop.hdds.scm.pipeline.PipelineManager;
-import org.apache.hadoop.hdds.server.events.EventPublisher;
-
-/**
- * Interface used by the DatanodeAdminMonitor, which can be used to
- * decommission or recommission nodes and take them in and out of maintenance.
- */
-public interface DatanodeAdminMonitorInterface extends Runnable {
-
-  void setConf(OzoneConfiguration conf);
-  void setEventQueue(EventPublisher scm);
-  void setNodeManager(NodeManager nm);
-  void setPipelineManager(PipelineManager pm);
-  void setReplicationManager(ReplicationManager rm);
-  void startMonitoring(DatanodeDetails dn, int endInHours);
-  void stopMonitoring(DatanodeDetails dn);
-
-  int getPendingCount();
-  int getCancelledCount();
-  int getTrackedNodeCount();
-}
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeDecommissionManager.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeDecommissionManager.java
index f76929e..4c9bf9c 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeDecommissionManager.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeDecommissionManager.java
@@ -25,7 +25,6 @@ import org.apache.hadoop.hdds.scm.ScmConfigKeys;
 import org.apache.hadoop.hdds.scm.container.ContainerManager;
 import org.apache.hadoop.hdds.scm.container.ReplicationManager;
 import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException;
-import org.apache.hadoop.hdds.scm.pipeline.PipelineManager;
 import org.apache.hadoop.hdds.server.events.EventPublisher;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.slf4j.Logger;
@@ -47,10 +46,9 @@ import java.util.concurrent.TimeUnit;
 public class NodeDecommissionManager {
 
   private ScheduledExecutorService executor;
-  private DatanodeAdminMonitorInterface monitor;
+  private DatanodeAdminMonitor monitor;
 
   private NodeManager nodeManager;
-  private PipelineManager pipelineManager;
   //private ContainerManager containerManager;
   private EventPublisher eventQueue;
   private ReplicationManager replicationManager;
@@ -168,11 +166,10 @@ public class NodeDecommissionManager {
   }
 
   public NodeDecommissionManager(OzoneConfiguration config, NodeManager nm,
-      PipelineManager pm, ContainerManager containerManager,
+      ContainerManager containerManager,
       EventPublisher eventQueue, ReplicationManager rm) {
     this.nodeManager = nm;
     conf = config;
-    this.pipelineManager = pm;
     //this.containerManager = containerManager;
     this.eventQueue = eventQueue;
     this.replicationManager = rm;
@@ -201,19 +198,15 @@ public class NodeDecommissionManager {
           TimeUnit.SECONDS);
     }
 
-    monitor = new DatanodeAdminMonitor(conf);
-    monitor.setConf(conf);
-    monitor.setEventQueue(this.eventQueue);
-    monitor.setNodeManager(nodeManager);
-    monitor.setPipelineManager(pipelineManager);
-    monitor.setReplicationManager(replicationManager);
+    monitor = new DatanodeAdminMonitorImpl(conf, eventQueue, nodeManager,
+        replicationManager);
 
     executor.scheduleAtFixedRate(monitor, monitorInterval, monitorInterval,
         TimeUnit.SECONDS);
   }
 
   @VisibleForTesting
-  public DatanodeAdminMonitorInterface getMonitor() {
+  public DatanodeAdminMonitor getMonitor() {
     return monitor;
   }
 
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
index 651f259..a7a75eb 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
@@ -340,7 +340,7 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
         pipelineManager);
 
     scmDecommissionManager = new NodeDecommissionManager(conf, scmNodeManager,
-        pipelineManager, containerManager, eventQueue, replicationManager);
+        containerManager, eventQueue, replicationManager);
 
     eventQueue.addHandler(SCMEvents.DATANODE_COMMAND, scmNodeManager);
     eventQueue.addHandler(SCMEvents.RETRIABLE_DATANODE_COMMAND, scmNodeManager);
diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDatanodeAdminMonitor.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDatanodeAdminMonitor.java
index 98ad037..5b74750 100644
--- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDatanodeAdminMonitor.java
+++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDatanodeAdminMonitor.java
@@ -52,7 +52,7 @@ public class TestDatanodeAdminMonitor {
 
   private SimpleMockNodeManager nodeManager;
   private OzoneConfiguration conf;
-  private DatanodeAdminMonitor monitor;
+  private DatanodeAdminMonitorImpl monitor;
   private DatanodeAdminHandler startAdminHandler;
   private ReplicationManager repManager;
   private EventQueue eventQueue;
@@ -69,10 +69,8 @@ public class TestDatanodeAdminMonitor {
 
     repManager = Mockito.mock(ReplicationManager.class);
 
-    monitor = new DatanodeAdminMonitor(conf);
-    monitor.setEventQueue(eventQueue);
-    monitor.setNodeManager(nodeManager);
-    monitor.setReplicationManager(repManager);
+    monitor =
+        new DatanodeAdminMonitorImpl(conf, eventQueue, nodeManager, repManager);
   }
 
   @After
diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDatanodeAdminNodeDetails.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDatanodeAdminNodeDetails.java
index 916e383..c5310b9 100644
--- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDatanodeAdminNodeDetails.java
+++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDatanodeAdminNodeDetails.java
@@ -35,12 +35,10 @@ import static org.junit.Assert.assertNotEquals;
 public class TestDatanodeAdminNodeDetails {
 
   private OzoneConfiguration conf;
-  private DatanodeAdminMonitor monitor;
 
   @Before
   public void setup() {
     conf = new OzoneConfiguration();
-    monitor = new DatanodeAdminMonitor(conf);
   }
 
   @After
diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestNodeDecommissionManager.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestNodeDecommissionManager.java
index 2da7a61..47055f5 100644
--- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestNodeDecommissionManager.java
+++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestNodeDecommissionManager.java
@@ -56,7 +56,7 @@ public class TestNodeDecommissionManager {
     conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, storageDir);
     nodeManager = createNodeManager(conf);
     decom = new NodeDecommissionManager(
-        conf, nodeManager, null, null, null, null);
+        conf, nodeManager, null, null, null);
   }
 
   @Test


---------------------------------------------------------------------
To unsubscribe, e-mail: ozone-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: ozone-commits-help@hadoop.apache.org


Mime
View raw message