hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tomwh...@apache.org
Subject svn commit: r527563 - in /lucene/hadoop/trunk: CHANGES.txt src/java/org/apache/hadoop/dfs/FSNamesystem.java src/test/org/apache/hadoop/dfs/TestHost2NodesMap.java
Date Wed, 11 Apr 2007 16:14:18 GMT
Author: tomwhite
Date: Wed Apr 11 09:14:17 2007
New Revision: 527563

URL: http://svn.apache.org/viewvc?view=rev&rev=527563
Log:
HADOOP-971.  Improve DFS Scalability: Improve name node performance by adding a hostname to
datanodes map.  Contributed by Hairong Kuang.

Added:
    lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestHost2NodesMap.java
Modified:
    lucene/hadoop/trunk/CHANGES.txt
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java

Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?view=diff&rev=527563&r1=527562&r2=527563
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Wed Apr 11 09:14:17 2007
@@ -159,6 +159,9 @@
     mapper against the types specified in JobConf.  
     (Tahir Hashmi via tomwhite)
 
+49. HADOOP-971.  Improve DFS Scalability: Improve name node performance
+    by adding a hostname to datanodes map.  (Hairong Kuang via tomwhite)
+
 
 Release 0.12.3 - 2007-04-06
 

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java?view=diff&rev=527563&r1=527562&r2=527563
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java Wed Apr 11 09:14:17
2007
@@ -612,6 +612,8 @@
         if (blocks != null) {
             results = new Object[2];
             DatanodeDescriptor machineSets[][] = new DatanodeDescriptor[blocks.length][];
+            DatanodeDescriptor client = 
+              host2DataNodeMap.getDatanodeByHost(clientMachine);
 
             for (int i = 0; i < blocks.length; i++) {
                 int numNodes = blocksMap.numNodes( blocks[i] );
@@ -624,8 +626,7 @@
                          blocksMap.nodeIterator( blocks[i] ); it.hasNext(); ) {
                         machineSets[i][ numNodes++ ] = it.next();
                     }
-                    clusterMap.sortByDistance( getDatanodeByHost(clientMachine),
-                                               machineSets[i] );
+                    clusterMap.sortByDistance( client, machineSets[i] );
                 }
             }
 
@@ -799,7 +800,8 @@
         }
 
         // Get the array of replication targets
-        DatanodeDescriptor clientNode = getDatanodeByHost(clientMachine.toString());
+        DatanodeDescriptor clientNode = 
+          host2DataNodeMap.getDatanodeByHost(clientMachine.toString());
         DatanodeDescriptor targets[] = replicator.chooseTarget(replication,
                                                       clientNode, null, blockSize);
         if (targets.length < this.minReplication) {
@@ -1603,7 +1605,7 @@
           + " storage " + nodeReg.getStorageID() );
 
       DatanodeDescriptor nodeS = datanodeMap.get(nodeReg.getStorageID());
-      DatanodeDescriptor nodeN = getDatanodeByName( nodeReg.getName() );
+      DatanodeDescriptor nodeN = host2DataNodeMap.getDatanodeByName( nodeReg.getName() );
       
       if( nodeN != null && nodeN != nodeS ) {
           NameNode.LOG.info( "BLOCK* NameSystem.registerDatanode: "
@@ -1995,6 +1997,8 @@
     
     void unprotectedAddDatanode( DatanodeDescriptor nodeDescr ) {
       datanodeMap.put( nodeDescr.getStorageID(), nodeDescr );
+      host2DataNodeMap.add(nodeDescr);
+      
       NameNode.stateChangeLog.debug(
           "BLOCK* NameSystem.unprotectedAddDatanode: "
           + "node " + nodeDescr.getName() + " is added to datanodeMap." );
@@ -2006,12 +2010,13 @@
      * 
      * @param nodeID node
      */
-    void wipeDatanode( DatanodeID nodeID ) {
+    void wipeDatanode( DatanodeID nodeID ) throws IOException {
       String key = nodeID.getStorageID();
       datanodeMap.remove(key);
+      host2DataNodeMap.remove(getDatanode( nodeID ));
       NameNode.stateChangeLog.debug(
           "BLOCK* NameSystem.wipeDatanode: "
-          + nodeID.getName() + " storage " + nodeID.getStorageID() 
+          + nodeID.getName() + " storage " + key 
           + " is removed from datanodeMap.");
     }
     
@@ -3460,37 +3465,136 @@
       return node;
     }
     
-    /**
-     * Find data node by its name.
-     * 
-     * This method is called when the node is registering.
-     * Not performance critical.
-     * Otherwise an additional tree-like structure will be required.
-     * 
-     * @param name
-     * @return DatanodeDescriptor if found or null otherwise 
-     * @throws IOException
-     */
-    public DatanodeDescriptor getDatanodeByName( String name ) throws IOException {
-      for (Iterator<DatanodeDescriptor> it = datanodeMap.values().iterator(); it.hasNext();
) {
-        DatanodeDescriptor node = it.next();
-        if( node.getName().equals(name) )
-           return node;
+    static class Host2NodesMap {
+      private HashMap<String, DatanodeDescriptor[]> map
+                        = new HashMap<String, DatanodeDescriptor[]>();
+      private Random r = new Random();
+                        
+      /** Check if node is already in the map */
+      synchronized boolean contains(DatanodeDescriptor node) {
+        if( node==null ) return false;
+        
+        String host = node.getHost();
+        DatanodeDescriptor[] nodes = map.get(host);
+        if( nodes != null ) {
+          for(DatanodeDescriptor containedNode:nodes) {
+            if(node==containedNode)
+              return true;
+          }
+        }
+        return false;
       }
-      return null;
-    }
-    
-    /* Find data node by its host name. */
-    private DatanodeDescriptor getDatanodeByHost( String name ) {
-        for (Iterator<DatanodeDescriptor> it = datanodeMap.values().iterator(); 
-        it.hasNext(); ) {
-            DatanodeDescriptor node = it.next();
-            if( node.getHost().equals(name) )
-                return node;
+      
+      /** add <node.getHost(), node> to the map 
+       * return true if the node is added; false otherwise
+       */
+      synchronized boolean add(DatanodeDescriptor node) {
+        if(node==null || contains(node)) return false;
+        
+        String host = node.getHost();
+        DatanodeDescriptor[] nodes = map.get(host);
+        DatanodeDescriptor[] newNodes;
+        if(nodes==null) {
+          newNodes = new DatanodeDescriptor[1];
+          newNodes[0]=node;
+        } else { // rare case: more than one datanode on the host
+          newNodes = new DatanodeDescriptor[nodes.length+1];
+          System.arraycopy(nodes, 0, newNodes, 0, nodes.length);
+          newNodes[nodes.length] = node;
+        }
+        map.put(host, newNodes);
+        return true;
+      }
+      
+      /** remove node from the map 
+       * return true if the node is removed; false otherwise
+       */
+      synchronized boolean remove(DatanodeDescriptor node) {
+        if(node==null) return false;
+        
+        String host = node.getHost();
+        DatanodeDescriptor[] nodes = map.get(host);
+        if(nodes==null) {
+          return false;
+        }
+        if( nodes.length==1 ) {
+          if( nodes[0]==node ) {
+            map.remove(host);
+            return true;
+          } else {
+            return false;
+          }
+        }
+        //rare case
+        int i=0;
+        for(; i<nodes.length; i++) {
+          if(nodes[i]==node) {
+            break;
+          }
+        }
+        if( i==nodes.length ) {
+          return false;
+        } else {
+          DatanodeDescriptor[] newNodes;
+          newNodes = new DatanodeDescriptor[nodes.length-1];
+          System.arraycopy(nodes, 0, newNodes, 0, i);
+          System.arraycopy(nodes, i+1, newNodes, i, nodes.length-i-1);
+          map.put(host, newNodes);
+          return true;
+        }
+      }
+      
+      /** get a data node by its host
+      * @return DatanodeDescriptor if found; otherwise null
+      */
+      synchronized DatanodeDescriptor getDatanodeByHost( String host ) {
+        if(host==null) return null;
+        
+        DatanodeDescriptor[] nodes = map.get(host);
+        // no entry
+        if( nodes== null ) {
+          return null;
+        }
+        // one node
+        if (nodes.length == 1) {
+          return nodes[0];
+        }
+        // more than one node
+        return nodes[r.nextInt(nodes.length)];
+      }
+      
+      /**
+       * Find data node by its name.
+       * 
+       * @return DatanodeDescriptor if found or null otherwise 
+       */
+      public DatanodeDescriptor getDatanodeByName( String name ) {
+        if(name==null) return null;
+        
+        int colon = name.indexOf(":");
+        String host;
+        if (colon < 0) {
+          host = name;
+        } else {
+          host = name.substring(0, colon);
+        }
+
+        DatanodeDescriptor[] nodes = map.get(host);
+        // no entry
+        if( nodes== null ) {
+          return null;
+        }
+        for(DatanodeDescriptor containedNode:nodes) {
+          if(name.equals(containedNode.getName())) {
+            return containedNode;
+          }
         }
         return null;
+      }
     }
     
+    private Host2NodesMap host2DataNodeMap = new Host2NodesMap();
+       
     /** Stop at and return the datanode at index (used for content browsing)*/
     private DatanodeInfo getDatanodeByIndex( int index ) {
       int i = 0;

Added: lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestHost2NodesMap.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestHost2NodesMap.java?view=auto&rev=527563
==============================================================================
--- lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestHost2NodesMap.java (added)
+++ lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestHost2NodesMap.java Wed Apr 11 09:14:17
2007
@@ -0,0 +1,77 @@
+package org.apache.hadoop.dfs;
+
+import junit.framework.TestCase;
+
+public class TestHost2NodesMap extends TestCase {
+  static private FSNamesystem.Host2NodesMap map = new FSNamesystem.Host2NodesMap();
+  private final static DatanodeDescriptor dataNodes[] = new DatanodeDescriptor[] {
+    new DatanodeDescriptor(new DatanodeID("h1:5020", "0", -1), "/d1/r1"),
+    new DatanodeDescriptor(new DatanodeID("h2:5020", "0", -1), "/d1/r1"),
+    new DatanodeDescriptor(new DatanodeID("h3:5020", "0", -1), "/d1/r2"),
+    new DatanodeDescriptor(new DatanodeID("h3:5030", "0", -1), "/d1/r2"),
+  };
+  private final static DatanodeDescriptor NULL_NODE = null; 
+  private final static DatanodeDescriptor NODE = 
+    new DatanodeDescriptor(new DatanodeID("h3:5040", "0", -1), "/d1/r4");
+
+  static {
+    for(DatanodeDescriptor node:dataNodes) {
+      map.add( node );
+    }
+    map.add(NULL_NODE);
+  }
+  
+  public void testContains() throws Exception {
+    for(int i=0; i<dataNodes.length; i++) {
+      assertTrue( map.contains(dataNodes[i]) );
+    }
+    assertFalse( map.contains( NULL_NODE ) );
+    assertFalse( map.contains( NODE ) );
+  }
+
+  public void testGetDatanodeByHost() throws Exception {
+    assertTrue(map.getDatanodeByHost("h1")==dataNodes[0]);
+    assertTrue(map.getDatanodeByHost("h2")==dataNodes[1]);
+    DatanodeDescriptor node = map.getDatanodeByHost("h3");
+    assertTrue(node==dataNodes[2] || node==dataNodes[3]);
+    assertTrue(null==map.getDatanodeByHost("h4"));
+  }
+
+  public void testGetDatanodeByName() throws Exception {
+    assertTrue(map.getDatanodeByName("h1:5020")==dataNodes[0]);
+    assertTrue(map.getDatanodeByName("h1:5030")==null);
+    assertTrue(map.getDatanodeByName("h2:5020")==dataNodes[1]);
+    assertTrue(map.getDatanodeByName("h2:5030")==null);
+    assertTrue(map.getDatanodeByName("h3:5020")==dataNodes[2]);
+    assertTrue(map.getDatanodeByName("h3:5030")==dataNodes[3]);
+    assertTrue(map.getDatanodeByName("h3:5040")==null);
+    assertTrue(map.getDatanodeByName("h4")==null);
+    assertTrue(map.getDatanodeByName(null)==null);
+  }
+
+  public void testRemove() throws Exception {
+    assertFalse(map.remove(NODE));
+    
+    assertTrue(map.remove(dataNodes[0]));
+    assertTrue(map.getDatanodeByHost("h1")==null);
+    assertTrue(map.getDatanodeByHost("h2")==dataNodes[1]);
+    DatanodeDescriptor node = map.getDatanodeByHost("h3");
+    assertTrue(node==dataNodes[2] || node==dataNodes[3]);
+    assertTrue(null==map.getDatanodeByHost("h4"));
+    
+    assertTrue(map.remove(dataNodes[2]));
+    assertTrue(map.getDatanodeByHost("h1")==null);
+    assertTrue(map.getDatanodeByHost("h2")==dataNodes[1]);
+    assertTrue(map.getDatanodeByHost("h3")==dataNodes[3]);
+    
+    assertTrue(map.remove(dataNodes[3]));
+    assertTrue(map.getDatanodeByHost("h1")==null);
+    assertTrue(map.getDatanodeByHost("h2")==dataNodes[1]);
+    assertTrue(map.getDatanodeByHost("h3")==null);
+    
+    assertFalse(map.remove(NULL_NODE));
+    assertTrue(map.remove(dataNodes[1]));
+    assertFalse(map.remove(dataNodes[1]));
+  }
+
+}



Mime
View raw message