Return-Path: Delivered-To: apmail-lucene-hadoop-commits-archive@locus.apache.org Received: (qmail 2497 invoked from network); 11 Apr 2007 16:14:40 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.2) by minotaur.apache.org with SMTP; 11 Apr 2007 16:14:40 -0000 Received: (qmail 37130 invoked by uid 500); 11 Apr 2007 16:14:46 -0000 Delivered-To: apmail-lucene-hadoop-commits-archive@lucene.apache.org Received: (qmail 37118 invoked by uid 500); 11 Apr 2007 16:14:46 -0000 Mailing-List: contact hadoop-commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hadoop-dev@lucene.apache.org Delivered-To: mailing list hadoop-commits@lucene.apache.org Received: (qmail 37105 invoked by uid 99); 11 Apr 2007 16:14:46 -0000 Received: from herse.apache.org (HELO herse.apache.org) (140.211.11.133) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 11 Apr 2007 09:14:46 -0700 X-ASF-Spam-Status: No, hits=-98.6 required=10.0 tests=ALL_TRUSTED,INFO_TLD,NO_REAL_NAME X-Spam-Check-By: apache.org Received: from [140.211.11.3] (HELO eris.apache.org) (140.211.11.3) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 11 Apr 2007 09:14:38 -0700 Received: by eris.apache.org (Postfix, from userid 65534) id 8C5CA1A9838; Wed, 11 Apr 2007 09:14:18 -0700 (PDT) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r527563 - in /lucene/hadoop/trunk: CHANGES.txt src/java/org/apache/hadoop/dfs/FSNamesystem.java src/test/org/apache/hadoop/dfs/TestHost2NodesMap.java Date: Wed, 11 Apr 2007 16:14:18 -0000 To: hadoop-commits@lucene.apache.org From: tomwhite@apache.org X-Mailer: svnmailer-1.1.0 Message-Id: <20070411161418.8C5CA1A9838@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: tomwhite Date: Wed Apr 11 09:14:17 2007 New Revision: 527563 URL: http://svn.apache.org/viewvc?view=rev&rev=527563 Log: HADOOP-971. Improve DFS Scalability: Improve name node performance by adding a hostname to datanodes map. Contributed by Hairong Kuang. Added: lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestHost2NodesMap.java Modified: lucene/hadoop/trunk/CHANGES.txt lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java Modified: lucene/hadoop/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?view=diff&rev=527563&r1=527562&r2=527563 ============================================================================== --- lucene/hadoop/trunk/CHANGES.txt (original) +++ lucene/hadoop/trunk/CHANGES.txt Wed Apr 11 09:14:17 2007 @@ -159,6 +159,9 @@ mapper against the types specified in JobConf. (Tahir Hashmi via tomwhite) +49. HADOOP-971. Improve DFS Scalability: Improve name node performance + by adding a hostname to datanodes map. (Hairong Kuang via tomwhite) + Release 0.12.3 - 2007-04-06 Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java?view=diff&rev=527563&r1=527562&r2=527563 ============================================================================== --- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java (original) +++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java Wed Apr 11 09:14:17 2007 @@ -612,6 +612,8 @@ if (blocks != null) { results = new Object[2]; DatanodeDescriptor machineSets[][] = new DatanodeDescriptor[blocks.length][]; + DatanodeDescriptor client = + host2DataNodeMap.getDatanodeByHost(clientMachine); for (int i = 0; i < blocks.length; i++) { int numNodes = blocksMap.numNodes( blocks[i] ); @@ -624,8 +626,7 @@ blocksMap.nodeIterator( blocks[i] ); it.hasNext(); ) { machineSets[i][ numNodes++ ] = it.next(); } - clusterMap.sortByDistance( getDatanodeByHost(clientMachine), - machineSets[i] ); + clusterMap.sortByDistance( client, machineSets[i] ); } } @@ -799,7 +800,8 @@ } // Get the array of replication targets - DatanodeDescriptor clientNode = getDatanodeByHost(clientMachine.toString()); + DatanodeDescriptor clientNode = + host2DataNodeMap.getDatanodeByHost(clientMachine.toString()); DatanodeDescriptor targets[] = replicator.chooseTarget(replication, clientNode, null, blockSize); if (targets.length < this.minReplication) { @@ -1603,7 +1605,7 @@ + " storage " + nodeReg.getStorageID() ); DatanodeDescriptor nodeS = datanodeMap.get(nodeReg.getStorageID()); - DatanodeDescriptor nodeN = getDatanodeByName( nodeReg.getName() ); + DatanodeDescriptor nodeN = host2DataNodeMap.getDatanodeByName( nodeReg.getName() ); if( nodeN != null && nodeN != nodeS ) { NameNode.LOG.info( "BLOCK* NameSystem.registerDatanode: " @@ -1995,6 +1997,8 @@ void unprotectedAddDatanode( DatanodeDescriptor nodeDescr ) { datanodeMap.put( nodeDescr.getStorageID(), nodeDescr ); + host2DataNodeMap.add(nodeDescr); + NameNode.stateChangeLog.debug( "BLOCK* NameSystem.unprotectedAddDatanode: " + "node " + nodeDescr.getName() + " is added to datanodeMap." ); @@ -2006,12 +2010,13 @@ * * @param nodeID node */ - void wipeDatanode( DatanodeID nodeID ) { + void wipeDatanode( DatanodeID nodeID ) throws IOException { String key = nodeID.getStorageID(); datanodeMap.remove(key); + host2DataNodeMap.remove(getDatanode( nodeID )); NameNode.stateChangeLog.debug( "BLOCK* NameSystem.wipeDatanode: " - + nodeID.getName() + " storage " + nodeID.getStorageID() + + nodeID.getName() + " storage " + key + " is removed from datanodeMap."); } @@ -3460,37 +3465,136 @@ return node; } - /** - * Find data node by its name. - * - * This method is called when the node is registering. - * Not performance critical. - * Otherwise an additional tree-like structure will be required. - * - * @param name - * @return DatanodeDescriptor if found or null otherwise - * @throws IOException - */ - public DatanodeDescriptor getDatanodeByName( String name ) throws IOException { - for (Iterator it = datanodeMap.values().iterator(); it.hasNext(); ) { - DatanodeDescriptor node = it.next(); - if( node.getName().equals(name) ) - return node; + static class Host2NodesMap { + private HashMap map + = new HashMap(); + private Random r = new Random(); + + /** Check if node is already in the map */ + synchronized boolean contains(DatanodeDescriptor node) { + if( node==null ) return false; + + String host = node.getHost(); + DatanodeDescriptor[] nodes = map.get(host); + if( nodes != null ) { + for(DatanodeDescriptor containedNode:nodes) { + if(node==containedNode) + return true; + } + } + return false; } - return null; - } - - /* Find data node by its host name. */ - private DatanodeDescriptor getDatanodeByHost( String name ) { - for (Iterator it = datanodeMap.values().iterator(); - it.hasNext(); ) { - DatanodeDescriptor node = it.next(); - if( node.getHost().equals(name) ) - return node; + + /** add to the map + * return true if the node is added; false otherwise + */ + synchronized boolean add(DatanodeDescriptor node) { + if(node==null || contains(node)) return false; + + String host = node.getHost(); + DatanodeDescriptor[] nodes = map.get(host); + DatanodeDescriptor[] newNodes; + if(nodes==null) { + newNodes = new DatanodeDescriptor[1]; + newNodes[0]=node; + } else { // rare case: more than one datanode on the host + newNodes = new DatanodeDescriptor[nodes.length+1]; + System.arraycopy(nodes, 0, newNodes, 0, nodes.length); + newNodes[nodes.length] = node; + } + map.put(host, newNodes); + return true; + } + + /** remove node from the map + * return true if the node is removed; false otherwise + */ + synchronized boolean remove(DatanodeDescriptor node) { + if(node==null) return false; + + String host = node.getHost(); + DatanodeDescriptor[] nodes = map.get(host); + if(nodes==null) { + return false; + } + if( nodes.length==1 ) { + if( nodes[0]==node ) { + map.remove(host); + return true; + } else { + return false; + } + } + //rare case + int i=0; + for(; i