Return-Path: X-Original-To: apmail-hadoop-common-commits-archive@www.apache.org Delivered-To: apmail-hadoop-common-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 0447110355 for ; Tue, 9 Dec 2014 18:56:54 +0000 (UTC) Received: (qmail 7294 invoked by uid 500); 9 Dec 2014 18:56:53 -0000 Delivered-To: apmail-hadoop-common-commits-archive@hadoop.apache.org Received: (qmail 7220 invoked by uid 500); 9 Dec 2014 18:56:53 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: common-dev@hadoop.apache.org Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 7211 invoked by uid 99); 9 Dec 2014 18:56:53 -0000 Received: from tyr.zones.apache.org (HELO tyr.zones.apache.org) (140.211.11.114) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 09 Dec 2014 18:56:53 +0000 Received: by tyr.zones.apache.org (Postfix, from userid 65534) id 7DA62A2153B; Tue, 9 Dec 2014 18:56:53 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: cmccabe@apache.org To: common-commits@hadoop.apache.org Message-Id: X-Mailer: ASF-Git Admin Mailer Subject: hadoop git commit: Incorrect locking in FsVolumeList#checkDirs can hang datanodes (Noah Lorang via Colin P. McCabe) Date: Tue, 9 Dec 2014 18:56:53 +0000 (UTC) Repository: hadoop Updated Branches: refs/heads/trunk be86237c0 -> d8352b9b2 Incorrect locking in FsVolumeList#checkDirs can hang datanodes (Noah Lorang via Colin P. McCabe) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/d8352b9b Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/d8352b9b Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/d8352b9b Branch: refs/heads/trunk Commit: d8352b9b2b99aa46679c5880a724ba3f0ceb41ff Parents: be86237 Author: Colin Patrick Mccabe Authored: Tue Dec 9 10:55:17 2014 -0800 Committer: Colin Patrick Mccabe Committed: Tue Dec 9 10:56:46 2014 -0800 ---------------------------------------------------------------------- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 ++ .../datanode/fsdataset/impl/FsVolumeList.java | 56 ++++++++++---------- 2 files changed, 31 insertions(+), 28 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/d8352b9b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 55026a2..626d90a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -574,6 +574,9 @@ Release 2.6.1 - UNRELEASED HDFS-4882. Prevent the Namenode's LeaseManager from looping forever in checkLeases (Ravi Prakash via Colin P. McCabe) + HDFS-7489. Incorrect locking in FsVolumeList#checkDirs can hang datanodes + (Noah Lorang via Colin P. McCabe) + Release 2.6.0 - 2014-11-18 INCOMPATIBLE CHANGES http://git-wip-us.apache.org/repos/asf/hadoop/blob/d8352b9b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeList.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeList.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeList.java index 837ddf7..55329ae 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeList.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeList.java @@ -36,6 +36,7 @@ class FsVolumeList { * This list is replaced on modification holding "this" lock. */ volatile List volumes = null; + private Object checkDirsMutex = new Object(); private final VolumeChoosingPolicy blockChooser; private volatile int numFailedVolumes; @@ -167,40 +168,39 @@ class FsVolumeList { * Calls {@link FsVolumeImpl#checkDirs()} on each volume, removing any * volumes from the active list that result in a DiskErrorException. * - * This method is synchronized to allow only one instance of checkDirs() - * call + * Use checkDirsMutext to allow only one instance of checkDirs() call + * * @return list of all the removed volumes. */ - synchronized List checkDirs() { - ArrayList removedVols = null; - - // Make a copy of volumes for performing modification - final List volumeList = new ArrayList(volumes); + List checkDirs() { + synchronized(checkDirsMutex) { + ArrayList removedVols = null; + + // Make a copy of volumes for performing modification + final List volumeList = new ArrayList(volumes); - for(Iterator i = volumeList.iterator(); i.hasNext(); ) { - final FsVolumeImpl fsv = i.next(); - try { - fsv.checkDirs(); - } catch (DiskErrorException e) { - FsDatasetImpl.LOG.warn("Removing failed volume " + fsv + ": ",e); - if (removedVols == null) { - removedVols = new ArrayList(1); + for(Iterator i = volumeList.iterator(); i.hasNext(); ) { + final FsVolumeImpl fsv = i.next(); + try { + fsv.checkDirs(); + } catch (DiskErrorException e) { + FsDatasetImpl.LOG.warn("Removing failed volume " + fsv + ": ",e); + if (removedVols == null) { + removedVols = new ArrayList(1); + } + removedVols.add(fsv); + removeVolume(fsv.getBasePath()); + numFailedVolumes++; } - removedVols.add(fsv); - fsv.shutdown(); - i.remove(); // Remove the volume - numFailedVolumes++; } - } - - if (removedVols != null && removedVols.size() > 0) { - // Replace volume list - volumes = Collections.unmodifiableList(volumeList); - FsDatasetImpl.LOG.warn("Completed checkDirs. Removed " + removedVols.size() - + " volumes. Current volumes: " + this); - } + + if (removedVols != null && removedVols.size() > 0) { + FsDatasetImpl.LOG.warn("Completed checkDirs. Removed " + removedVols.size() + + " volumes. Current volumes: " + this); + } - return removedVols; + return removedVols; + } } @Override