hadoop-ozone-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From a..@apache.org
Subject [hadoop-ozone] branch ozone-0.5.0 updated: HDDS-3116. Datanode sometimes fails to start with NPE when starting Ratis xceiver server (#630)
Date Fri, 13 Mar 2020 15:29:53 GMT
This is an automated email from the ASF dual-hosted git repository.

arp pushed a commit to branch ozone-0.5.0
in repository https://gitbox.apache.org/repos/asf/hadoop-ozone.git


The following commit(s) were added to refs/heads/ozone-0.5.0 by this push:
     new cf3f6ed  HDDS-3116. Datanode sometimes fails to start with NPE when starting Ratis
xceiver server (#630)
cf3f6ed is described below

commit cf3f6ed0da13ba7d71c2ae181e369eaf5e51aa6b
Author: Stephen O'Donnell <stephen.odonnell@gmail.com>
AuthorDate: Thu Mar 12 08:40:31 2020 +0000

    HDDS-3116. Datanode sometimes fails to start with NPE when starting Ratis xceiver server
(#630)
    
    
    (cherry picked from commit c1997218a4e1a6695a275c73cf85360cd046329c)
---
 .../common/statemachine/DatanodeStateMachine.java  | 27 +++++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java
index 5229ae8..dc39025 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java
@@ -21,6 +21,8 @@ import java.io.IOException;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicLong;
+import java.util.concurrent.locks.ReadWriteLock;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdds.conf.OzoneConfiguration;
@@ -90,6 +92,11 @@ public class DatanodeStateMachine implements Closeable {
   private JvmPauseMonitor jvmPauseMonitor;
   private CertificateClient dnCertClient;
   private final HddsDatanodeStopService hddsDatanodeStopService;
+  /**
+   * Used to synchronize to the OzoneContainer object created in the
+   * constructor in a non-thread-safe way - see HDDS-3116.
+   */
+  private final ReadWriteLock constructionLock = new ReentrantReadWriteLock();
 
   /**
    * Constructs a a datanode state machine.
@@ -113,8 +120,16 @@ public class DatanodeStateMachine implements Closeable {
             .setNameFormat("Datanode State Machine Thread - %d").build());
     connectionManager = new SCMConnectionManager(conf);
     context = new StateContext(this.conf, DatanodeStates.getInitState(), this);
-    container = new OzoneContainer(this.datanodeDetails,
-        ozoneConf, context, certClient);
+    // OzoneContainer instance is used in a non-thread safe way by the context
+    // past to its constructor, so we much synchronize its access. See
+    // HDDS-3116 for more details.
+    constructionLock.writeLock().lock();
+    try {
+      container = new OzoneContainer(this.datanodeDetails,
+          ozoneConf, context, certClient);
+    } finally {
+      constructionLock.writeLock().unlock();
+    }
     dnCertClient = certClient;
     nextHB = new AtomicLong(Time.monotonicNow());
 
@@ -173,7 +188,13 @@ public class DatanodeStateMachine implements Closeable {
   }
 
   public OzoneContainer getContainer() {
-    return this.container;
+    // See HDDS-3116 to explain the need for this lock
+    constructionLock.readLock().lock();
+    try {
+      return this.container;
+    } finally {
+      constructionLock.readLock().unlock();
+    }
   }
 
   /**


---------------------------------------------------------------------
To unsubscribe, e-mail: ozone-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: ozone-commits-help@hadoop.apache.org


Mime
View raw message