hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From a..@apache.org
Subject [2/3] hadoop git commit: HDFS-10830. FsDatasetImpl#removeVolumes crashes with IllegalMonitorStateException when vol being removed is in use. (Arpit Agarwal and Manoj Govindassamy)
Date Sun, 11 Sep 2016 01:38:07 GMT
HDFS-10830. FsDatasetImpl#removeVolumes crashes with IllegalMonitorStateException when vol
being removed is in use. (Arpit Agarwal and Manoj Govindassamy)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/a99bf26a
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/a99bf26a
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/a99bf26a

Branch: refs/heads/trunk
Commit: a99bf26a0899bcc4307c3a242c8414eaef555aa7
Parents: bee9f57
Author: Arpit Agarwal <arp@apache.org>
Authored: Sat Sep 10 18:22:25 2016 -0700
Committer: Arpit Agarwal <arp@apache.org>
Committed: Sat Sep 10 18:22:25 2016 -0700

----------------------------------------------------------------------
 .../apache/hadoop/util/AutoCloseableLock.java   |  8 ++++++++
 .../datanode/fsdataset/impl/FsDatasetImpl.java  |  6 +++++-
 .../datanode/fsdataset/impl/FsVolumeList.java   | 18 +++++++++++------
 .../fsdataset/impl/TestFsDatasetImpl.java       | 21 ++++++++------------
 4 files changed, 33 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/a99bf26a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/AutoCloseableLock.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/AutoCloseableLock.java
b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/AutoCloseableLock.java
index d920bc6..d7fe93d 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/AutoCloseableLock.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/AutoCloseableLock.java
@@ -18,6 +18,7 @@
 package org.apache.hadoop.util;
 
 import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.Condition;
 import java.util.concurrent.locks.ReentrantLock;
 
 import com.google.common.annotations.VisibleForTesting;
@@ -135,4 +136,11 @@ public class AutoCloseableLock implements AutoCloseable {
     throw new UnsupportedOperationException();
   }
 
+  /**
+   * See {@link ReentrantLock#newCondition()}.
+   * @return the Condition object
+   */
+  public Condition newCondition() {
+    return lock.newCondition();
+  }
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/a99bf26a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java
index e5da0e5..e9f1dc1 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java
@@ -40,6 +40,7 @@ import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.Executor;
+import java.util.concurrent.locks.Condition;
 import java.util.concurrent.TimeUnit;
 
 import javax.management.NotCompliantMBeanException;
@@ -269,6 +270,7 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
   private final int maxDataLength;
 
   private final AutoCloseableLock datasetLock;
+  private final Condition datasetLockCondition;
   
   /**
    * An FSDataset has a directory where it loads its data files.
@@ -287,6 +289,8 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
             DFSConfigKeys.DFS_LOCK_SUPPRESS_WARNING_INTERVAL_DEFAULT,
             TimeUnit.MILLISECONDS),
           300));
+    this.datasetLockCondition = datasetLock.newCondition();
+
     // The number of volumes required for operation is the total number
     // of volumes minus the number of failed volumes we can tolerate.
     volFailuresTolerated = datanode.getDnConf().getVolFailuresTolerated();
@@ -523,7 +527,7 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
           // Disable the volume from the service.
           asyncDiskService.removeVolume(sd.getCurrentDir());
           volumes.removeVolume(absRoot, clearFailure);
-          volumes.waitVolumeRemoved(5000, this);
+          volumes.waitVolumeRemoved(5000, datasetLockCondition);
 
           // Removed all replica information for the blocks on the volume.
           // Unlike updating the volumeMap in addVolume(), this operation does

http://git-wip-us.apache.org/repos/asf/hadoop/blob/a99bf26a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeList.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeList.java
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeList.java
index ea4d597..634ad42 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeList.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeList.java
@@ -31,6 +31,8 @@ import java.util.Map;
 import java.util.TreeMap;
 import java.util.Set;
 import java.util.concurrent.CopyOnWriteArrayList;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.locks.Condition;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.StorageType;
@@ -41,6 +43,7 @@ import org.apache.hadoop.hdfs.server.datanode.fsdataset.VolumeChoosingPolicy;
 import org.apache.hadoop.hdfs.server.datanode.BlockScanner;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage;
 import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.util.AutoCloseableLock;
 import org.apache.hadoop.util.DiskChecker.DiskErrorException;
 import org.apache.hadoop.util.Time;
 
@@ -52,7 +55,8 @@ class FsVolumeList {
       Collections.synchronizedMap(new TreeMap<String, VolumeFailureInfo>());
   private final ConcurrentLinkedQueue<FsVolumeImpl> volumesBeingRemoved =
       new ConcurrentLinkedQueue<>();
-  private Object checkDirsMutex = new Object();
+  private final AutoCloseableLock checkDirsLock;
+  private final Condition checkDirsLockCondition;
 
   private final VolumeChoosingPolicy<FsVolumeImpl> blockChooser;
   private final BlockScanner blockScanner;
@@ -62,6 +66,8 @@ class FsVolumeList {
       VolumeChoosingPolicy<FsVolumeImpl> blockChooser) {
     this.blockChooser = blockChooser;
     this.blockScanner = blockScanner;
+    this.checkDirsLock = new AutoCloseableLock();
+    this.checkDirsLockCondition = checkDirsLock.newCondition();
     for (VolumeFailureInfo volumeFailureInfo: initialVolumeFailureInfos) {
       volumeFailureInfos.put(volumeFailureInfo.getFailedStorageLocation(),
           volumeFailureInfo);
@@ -224,12 +230,12 @@ class FsVolumeList {
   /**
    * Calls {@link FsVolumeImpl#checkDirs()} on each volume.
    * 
-   * Use checkDirsMutext to allow only one instance of checkDirs() call
+   * Use {@link checkDirsLock} to allow only one instance of checkDirs() call.
    *
    * @return list of all the failed volumes.
    */
   Set<File> checkDirs() {
-    synchronized(checkDirsMutex) {
+    try (AutoCloseableLock lock = checkDirsLock.acquire()) {
       Set<File> failedVols = null;
       
       // Make a copy of volumes for performing modification 
@@ -260,7 +266,7 @@ class FsVolumeList {
             + " failure volumes.");
       }
 
-      waitVolumeRemoved(5000, checkDirsMutex);
+      waitVolumeRemoved(5000, checkDirsLockCondition);
       return failedVols;
     }
   }
@@ -271,13 +277,13 @@ class FsVolumeList {
    *
    * @param sleepMillis interval to recheck.
    */
-  void waitVolumeRemoved(int sleepMillis, Object monitor) {
+  void waitVolumeRemoved(int sleepMillis, Condition condition) {
     while (!checkVolumesRemoved()) {
       if (FsDatasetImpl.LOG.isDebugEnabled()) {
         FsDatasetImpl.LOG.debug("Waiting for volume reference to be released.");
       }
       try {
-        monitor.wait(sleepMillis);
+        condition.await(sleepMillis, TimeUnit.MILLISECONDS);
       } catch (InterruptedException e) {
         FsDatasetImpl.LOG.info("Thread interrupted when waiting for "
             + "volume reference to be released.");

http://git-wip-us.apache.org/repos/asf/hadoop/blob/a99bf26a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java
index b3f04d2..a330fbf 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java
@@ -636,24 +636,19 @@ public class TestFsDatasetImpl {
 
     class VolRemoveThread extends Thread {
       public void run() {
+        Set<File> volumesToRemove = new HashSet<>();
         try {
-          Set<File> volumesToRemove = new HashSet<>();
           volumesToRemove.add(StorageLocation.parse(
               dataset.getVolume(eb).getBasePath()).getFile());
-          /**
-           * TODO: {@link FsDatasetImpl#removeVolumes(Set, boolean)} is throwing
-           * IllegalMonitorStateException when there is a parallel reader/writer
-           * to the volume. Remove below exception handling block after fixing
-           * HDFS-10830.
-           */
-          LOG.info("Removing volume " + volumesToRemove);
-          dataset.removeVolumes(volumesToRemove, true);
-          volRemoveCompletedLatch.countDown();
-          LOG.info("Removed volume " + volumesToRemove);
         } catch (Exception e) {
-          LOG.info("Unexpected issue while removing volume: ", e);
-          volRemoveCompletedLatch.countDown();
+          LOG.info("Problem preparing volumes to remove: ", e);
+          Assert.fail("Exception in remove volume thread, check log for " +
+              "details.");
         }
+        LOG.info("Removing volume " + volumesToRemove);
+        dataset.removeVolumes(volumesToRemove, true);
+        volRemoveCompletedLatch.countDown();
+        LOG.info("Removed volume " + volumesToRemove);
       }
     }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org


Mime
View raw message