Return-Path: X-Original-To: apmail-hadoop-hdfs-commits-archive@minotaur.apache.org Delivered-To: apmail-hadoop-hdfs-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id A4E3771F0 for ; Thu, 11 Aug 2011 22:16:39 +0000 (UTC) Received: (qmail 31103 invoked by uid 500); 11 Aug 2011 22:16:39 -0000 Delivered-To: apmail-hadoop-hdfs-commits-archive@hadoop.apache.org Received: (qmail 31040 invoked by uid 500); 11 Aug 2011 22:16:38 -0000 Mailing-List: contact hdfs-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hdfs-dev@hadoop.apache.org Delivered-To: mailing list hdfs-commits@hadoop.apache.org Received: (qmail 31027 invoked by uid 99); 11 Aug 2011 22:16:38 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 11 Aug 2011 22:16:38 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 11 Aug 2011 22:16:36 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 881ED238897D; Thu, 11 Aug 2011 22:16:17 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1156847 - in /hadoop/common/trunk/hdfs: CHANGES.txt src/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java Date: Thu, 11 Aug 2011 22:16:17 -0000 To: hdfs-commits@hadoop.apache.org From: szetszwo@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20110811221617.881ED238897D@eris.apache.org> Author: szetszwo Date: Thu Aug 11 22:16:16 2011 New Revision: 1156847 URL: http://svn.apache.org/viewvc?rev=1156847&view=rev Log: HDFS-2229. Fix a deadlock in namenode by enforcing lock acquisition ordering. Modified: hadoop/common/trunk/hdfs/CHANGES.txt hadoop/common/trunk/hdfs/src/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java hadoop/common/trunk/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java Modified: hadoop/common/trunk/hdfs/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hdfs/CHANGES.txt?rev=1156847&r1=1156846&r2=1156847&view=diff ============================================================================== --- hadoop/common/trunk/hdfs/CHANGES.txt (original) +++ hadoop/common/trunk/hdfs/CHANGES.txt Thu Aug 11 22:16:16 2011 @@ -954,6 +954,9 @@ Trunk (unreleased changes) HDFS-2245. Fix a NullPointerException in BlockManager.chooseTarget(..). (szetszwo) + HDFS-2229. Fix a deadlock in namenode by enforcing lock acquisition + ordering. (szetszwo) + BREAKDOWN OF HDFS-1073 SUBTASKS HDFS-1521. Persist transaction ID on disk between NN restarts. Modified: hadoop/common/trunk/hdfs/src/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hdfs/src/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java?rev=1156847&r1=1156846&r2=1156847&view=diff ============================================================================== --- hadoop/common/trunk/hdfs/src/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java (original) +++ hadoop/common/trunk/hdfs/src/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java Thu Aug 11 22:16:16 2011 @@ -1829,39 +1829,37 @@ public class BlockManager { * over or under replicated. Place it into the respective queue. */ public void processMisReplicatedBlocks() { + assert namesystem.hasWriteLock(); + long nrInvalid = 0, nrOverReplicated = 0, nrUnderReplicated = 0; - namesystem.writeLock(); - try { - neededReplications.clear(); - for (BlockInfo block : blocksMap.getBlocks()) { - INodeFile fileINode = block.getINode(); - if (fileINode == null) { - // block does not belong to any file - nrInvalid++; - addToInvalidates(block); - continue; - } - // calculate current replication - short expectedReplication = fileINode.getReplication(); - NumberReplicas num = countNodes(block); - int numCurrentReplica = num.liveReplicas(); - // add to under-replicated queue if need to be - if (isNeededReplication(block, expectedReplication, numCurrentReplica)) { - if (neededReplications.add(block, numCurrentReplica, num - .decommissionedReplicas(), expectedReplication)) { - nrUnderReplicated++; - } + neededReplications.clear(); + for (BlockInfo block : blocksMap.getBlocks()) { + INodeFile fileINode = block.getINode(); + if (fileINode == null) { + // block does not belong to any file + nrInvalid++; + addToInvalidates(block); + continue; + } + // calculate current replication + short expectedReplication = fileINode.getReplication(); + NumberReplicas num = countNodes(block); + int numCurrentReplica = num.liveReplicas(); + // add to under-replicated queue if need to be + if (isNeededReplication(block, expectedReplication, numCurrentReplica)) { + if (neededReplications.add(block, numCurrentReplica, num + .decommissionedReplicas(), expectedReplication)) { + nrUnderReplicated++; } + } - if (numCurrentReplica > expectedReplication) { - // over-replicated block - nrOverReplicated++; - processOverReplicatedBlock(block, expectedReplication, null, null); - } + if (numCurrentReplica > expectedReplication) { + // over-replicated block + nrOverReplicated++; + processOverReplicatedBlock(block, expectedReplication, null, null); } - } finally { - namesystem.writeUnlock(); } + LOG.info("Total number of blocks = " + blocksMap.size()); LOG.info("Number of invalid blocks = " + nrInvalid); LOG.info("Number of under-replicated blocks = " + nrUnderReplicated); Modified: hadoop/common/trunk/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=1156847&r1=1156846&r2=1156847&view=diff ============================================================================== --- hadoop/common/trunk/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java (original) +++ hadoop/common/trunk/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java Thu Aug 11 22:16:16 2011 @@ -313,14 +313,19 @@ public class FSNamesystem implements RwL * Activate FSNamesystem daemons. */ void activate(Configuration conf) throws IOException { - setBlockTotal(); - blockManager.activate(conf); - this.lmthread = new Daemon(leaseManager.new Monitor()); - lmthread.start(); - - this.nnrmthread = new Daemon(new NameNodeResourceMonitor()); - nnrmthread.start(); + writeLock(); + try { + setBlockTotal(); + blockManager.activate(conf); + this.lmthread = new Daemon(leaseManager.new Monitor()); + lmthread.start(); + this.nnrmthread = new Daemon(new NameNodeResourceMonitor()); + nnrmthread.start(); + } finally { + writeUnlock(); + } + registerMXBean(); DefaultMetricsSystem.instance().register(this); }