Return-Path: Delivered-To: apmail-lucene-hadoop-commits-archive@locus.apache.org Received: (qmail 52580 invoked from network); 18 Dec 2006 22:40:03 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.2) by minotaur.apache.org with SMTP; 18 Dec 2006 22:40:03 -0000 Received: (qmail 69657 invoked by uid 500); 18 Dec 2006 22:40:11 -0000 Delivered-To: apmail-lucene-hadoop-commits-archive@lucene.apache.org Received: (qmail 69629 invoked by uid 500); 18 Dec 2006 22:40:11 -0000 Mailing-List: contact hadoop-commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hadoop-dev@lucene.apache.org Delivered-To: mailing list hadoop-commits@lucene.apache.org Received: (qmail 69619 invoked by uid 99); 18 Dec 2006 22:40:11 -0000 Received: from herse.apache.org (HELO herse.apache.org) (140.211.11.133) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 18 Dec 2006 14:40:10 -0800 X-ASF-Spam-Status: No, hits=-8.6 required=10.0 tests=ALL_TRUSTED,INFO_TLD,NO_REAL_NAME X-Spam-Check-By: apache.org Received: from [140.211.11.3] (HELO eris.apache.org) (140.211.11.3) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 18 Dec 2006 14:40:02 -0800 Received: by eris.apache.org (Postfix, from userid 65534) id 038871A981A; Mon, 18 Dec 2006 14:39:14 -0800 (PST) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r488444 - in /lucene/hadoop/trunk: CHANGES.txt src/java/org/apache/hadoop/dfs/FSNamesystem.java src/java/org/apache/hadoop/dfs/NameNode.java Date: Mon, 18 Dec 2006 22:39:13 -0000 To: hadoop-commits@lucene.apache.org From: cutting@apache.org X-Mailer: svnmailer-1.1.0 Message-Id: <20061218223914.038871A981A@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: cutting Date: Mon Dec 18 14:39:13 2006 New Revision: 488444 URL: http://svn.apache.org/viewvc?view=rev&rev=488444 Log: HADOOP-814. Optimize locking in namenode. Contributed by Dhruba. Modified: lucene/hadoop/trunk/CHANGES.txt lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NameNode.java Modified: lucene/hadoop/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?view=diff&rev=488444&r1=488443&r2=488444 ============================================================================== --- lucene/hadoop/trunk/CHANGES.txt (original) +++ lucene/hadoop/trunk/CHANGES.txt Mon Dec 18 14:39:13 2006 @@ -128,6 +128,8 @@ longer passed in several methods, input validation has changed, etc. (omalley via cutting) +36. HADOOP-814. Optimize locking in namenode. (Dhruba Borthakur via cutting) + Release 0.9.2 - 2006-12-15 Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java?view=diff&rev=488444&r1=488443&r2=488444 ============================================================================== --- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java (original) +++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java Mon Dec 18 14:39:13 2006 @@ -122,11 +122,11 @@ // Stats on overall usage // long totalCapacity = 0, totalRemaining = 0; - + // total number of connections per live datanode int totalLoad = 0; - + // // For the HTTP browsing interface // @@ -1296,20 +1296,21 @@ * @return true if block report is required or false otherwise. * @throws IOException */ - public synchronized boolean gotHeartbeat( DatanodeID nodeID, - long capacity, - long remaining, - int xceiverCount - ) throws IOException { + public boolean gotHeartbeat( DatanodeID nodeID, + long capacity, + long remaining, + int xceiverCount + ) throws IOException { boolean needBlockReport; synchronized (heartbeats) { synchronized (datanodeMap) { DatanodeDescriptor nodeinfo = getDatanode( nodeID ); needBlockReport = isDatanodeDead(nodeinfo); - if (nodeinfo == null) + if (nodeinfo == null) { // We do not accept unregistered guests throw new UnregisteredDatanodeException( nodeID ); + } if (nodeinfo.isAlive) { updateStats(nodeinfo, false); } @@ -1325,6 +1326,10 @@ } private void updateStats(DatanodeDescriptor node, boolean isAdded) { + // + // The statistics are protected by the heartbeat lock + // + assert(Thread.holdsLock(heartbeats)); if (isAdded) { totalCapacity += node.getCapacity(); totalRemaining += node.getRemaining(); @@ -1431,19 +1436,39 @@ boolean allAlive = false; while (!allAlive) { boolean foundDead = false; - synchronized(this) { - synchronized (heartbeats) { + DatanodeID nodeID = null; + + // locate the first dead node. + synchronized(heartbeats) { for (Iterator it = heartbeats.iterator(); it.hasNext();) { DatanodeDescriptor nodeInfo = it.next(); if (isDatanodeDead(nodeInfo)) { - NameNode.stateChangeLog.info("BLOCK* NameSystem.heartbeatCheck: " - + "lost heartbeat from " + nodeInfo.getName()); - removeDatanode( nodeInfo ); foundDead = true; + nodeID = nodeInfo; break; } } + } + + // acquire the fsnamesystem lock, and then remove the dead node. + if (foundDead) { + synchronized (this) { + synchronized(heartbeats) { + synchronized (datanodeMap) { + DatanodeDescriptor nodeInfo = null; + try { + nodeInfo = getDatanode(nodeID); + } catch (IOException e) { + nodeInfo = null; + } + if (nodeInfo != null && isDatanodeDead(nodeInfo)) { + NameNode.stateChangeLog.info("BLOCK* NameSystem.heartbeatCheck: " + + "lost heartbeat from " + nodeInfo.getName()); + removeDatanode(nodeInfo); + } + } + } } } allAlive = ! foundDead; @@ -1735,30 +1760,39 @@ } /** - * Total raw bytes + * Total raw bytes. */ public long totalCapacity() { + synchronized (heartbeats) { return totalCapacity; + } } /** - * Total non-used raw bytes + * Total non-used raw bytes. */ public long totalRemaining() { + synchronized (heartbeats) { return totalRemaining; + } } /** + * Total number of connections. */ + public int totalLoad() { + synchronized (heartbeats) { + return totalLoad; + } + } + public synchronized DatanodeInfo[] datanodeReport() { DatanodeInfo results[] = null; - synchronized (heartbeats) { - synchronized (datanodeMap) { + synchronized (datanodeMap) { results = new DatanodeInfo[datanodeMap.size()]; int i = 0; for(Iterator it = datanodeMap.values().iterator(); it.hasNext(); ) results[i++] = new DatanodeInfo( it.next() ); - } } return results; } @@ -1767,8 +1801,7 @@ */ public synchronized void DFSNodesStatus( ArrayList live, ArrayList dead ) { - synchronized (heartbeats) { - synchronized (datanodeMap) { + synchronized (datanodeMap) { for(Iterator it = datanodeMap.values().iterator(); it.hasNext(); ) { DatanodeDescriptor node = it.next(); if( isDatanodeDead(node)) @@ -1776,7 +1809,6 @@ else live.add( node ); } - } } } /** @@ -2018,7 +2050,7 @@ double avgLoad = 0.0; if (heartbeats.size() != 0) { - avgLoad = (double) totalLoad / heartbeats.size(); + avgLoad = (double) totalLoad() / heartbeats.size(); } // choose local replica first if (desiredReplicates != 0) { Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NameNode.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NameNode.java?view=diff&rev=488444&r1=488443&r2=488444 ============================================================================== --- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NameNode.java (original) +++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NameNode.java Mon Dec 18 14:39:13 2006 @@ -427,8 +427,9 @@ */ public long[] getStats() throws IOException { long results[] = new long[2]; - results[0] = namesystem.totalCapacity(); - results[1] = namesystem.totalCapacity() - namesystem.totalRemaining(); + long totalCapacity = namesystem.totalCapacity(); + results[0] = totalCapacity; + results[1] = totalCapacity - namesystem.totalRemaining(); return results; }