hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tomwh...@apache.org
Subject svn commit: r526411 - in /lucene/hadoop/trunk: ./ src/java/org/apache/hadoop/dfs/ src/java/org/apache/hadoop/net/ src/test/org/apache/hadoop/net/
Date Sat, 07 Apr 2007 08:46:55 GMT
Author: tomwhite
Date: Sat Apr  7 01:46:54 2007
New Revision: 526411

URL: http://svn.apache.org/viewvc?view=rev&rev=526411
Log:
HADOOP-1149.  Improve DFS Scalability: optimize getDistance(), contains(), and isOnSameRack()
in NetworkTopology.  Contributed by Hairong Kuang.

Modified:
    lucene/hadoop/trunk/CHANGES.txt
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeInfo.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/net/NetworkTopology.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/net/Node.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/net/NodeBase.java
    lucene/hadoop/trunk/src/test/org/apache/hadoop/net/TestNetworkTopology.java

Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?view=diff&rev=526411&r1=526410&r2=526411
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Sat Apr  7 01:46:54 2007
@@ -127,6 +127,10 @@
     processOverReplicatedBlock() a no-op if blocks are not 
     over-replicated.  (Raghu Angadi via tomwhite)
 
+40. HADOOP-1149.  Improve DFS Scalability: optimize getDistance(), 
+    contains(), and isOnSameRack() in NetworkTopology.  
+    (Hairong Kuang via tomwhite)
+
 
 Release 0.12.3 - 2007-04-06
 

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeInfo.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeInfo.java?view=diff&rev=526411&r1=526410&r2=526411
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeInfo.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeInfo.java Sat Apr  7 01:46:54
2007
@@ -255,6 +255,19 @@
       }
     }
 
+  private int level; //which level of the tree the node resides
+  private Node parent; //its parent
+
+  /** Return this node's parent */
+  public Node getParent() { return parent; }
+  public void setParent( Node parent ) {this.parent = parent;}
+   
+  /** Return this node's level in the tree.
+   * E.g. the root of a tree returns 0 and its children return 1
+   */
+  public int getLevel() { return level; }
+  public void setLevel( int level) {this.level = level;}
+
   /////////////////////////////////////////////////
   // Writable
   /////////////////////////////////////////////////

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java?view=diff&rev=526411&r1=526410&r2=526411
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java Sat Apr  7 01:46:54
2007
@@ -2916,7 +2916,8 @@
         results.removeAll(choosenNodes);
         
         // sorting nodes to form a pipeline
-        return getPipeline((writer==null)?localNode:writer, results);
+        return getPipeline((writer==null)?localNode:writer,
+                 results.toArray(new DatanodeDescriptor[results.size()]));
       }
       
       /* choose <i>numOfReplicas</i> from all data nodes */
@@ -3225,22 +3226,20 @@
        */
       private DatanodeDescriptor[] getPipeline(
           DatanodeDescriptor writer,
-          List<DatanodeDescriptor> nodes ) {
-        int numOfNodes = nodes.size();
-        DatanodeDescriptor[] results = new DatanodeDescriptor[numOfNodes];
-        if( numOfNodes==0 ) return results;
+          DatanodeDescriptor[] nodes ) {
+        if( nodes.length==0 ) return nodes;
         
         synchronized( clusterMap ) {
           int index=0;
           if(writer == null || !clusterMap.contains(writer)) {
-            writer = nodes.get(0);
+            writer = nodes[0];
           }
-          for( ;index<numOfNodes; index++ ) {
+          for( ;index<nodes.length; index++ ) {
             DatanodeDescriptor shortestNode = null;
             int shortestDistance = Integer.MAX_VALUE;
             int shortestIndex = index;
-            for( int i=index; i<numOfNodes; i++ ) {
-              DatanodeDescriptor currentNode = nodes.get(i);
+            for( int i=index; i<nodes.length; i++ ) {
+              DatanodeDescriptor currentNode = nodes[i];
               int currentDistance = clusterMap.getDistance( writer, currentNode );
               if(shortestDistance>currentDistance ) {
                 shortestDistance = currentDistance;
@@ -3250,13 +3249,13 @@
             }
             //switch position index & shortestIndex
             if( index != shortestIndex ) {
-              nodes.set(shortestIndex, nodes.get(index));
-              nodes.set(index, shortestNode);
+              nodes[shortestIndex] = nodes[index];
+              nodes[index] = shortestNode;
             }
             writer = shortestNode;
           }
         }
-        return nodes.toArray( results );
+        return nodes;
       }
     } //end of Replicator
 

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/net/NetworkTopology.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/net/NetworkTopology.java?view=diff&rev=526411&r1=526410&r2=526411
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/net/NetworkTopology.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/net/NetworkTopology.java Sat Apr  7 01:46:54
2007
@@ -61,6 +61,12 @@
             super( name, location );
         }
         
+        /** Construct an InnerNode
+         * from its name, its network location, its parent, and its level */
+        InnerNode( String name, String location, InnerNode parent, int level ) {
+            super( name, location, parent, level );
+        }
+        
         /** Get its children */
         Collection<Node> getChildren() {return children;}
         
@@ -126,13 +132,13 @@
          * @return true if the node is added; false otherwise
          */
         boolean add( DatanodeDescriptor n ) {
-            String parent = n.getNetworkLocation();
-            String currentPath = getPath();
             if( !isAncestor( n ) )
                 throw new IllegalArgumentException( n.getName()+", which is located at "
-                        +parent+", is not a decendent of "+currentPath);
+                        +n.getNetworkLocation()+", is not a decendent of "+getPath());
             if( isParent( n ) ) {
                 // this node is the parent of n; add n directly
+                n.setParent( this );
+                n.setLevel( this.level+1 );
                 for(int i=0; i<children.size(); i++) {
                     if(children.get(i).getName().equals(n.getName())) {
                         children.set(i, n);
@@ -149,11 +155,13 @@
                 for(int i=0; i<children.size(); i++) {
                     if(children.get(i).getName().equals(parentName)) {
                         parentNode = (InnerNode)children.get(i);
+                        break;
                     }
                 }
                 if( parentNode == null ) {
                     // create a new InnerNode
-                    parentNode = new InnerNode( parentName, currentPath );
+                    parentNode = new InnerNode( parentName, getPath(),
+                        this, this.getLevel()+1 );
                     children.add(parentNode);
                 }
                 // add n to the subtree of the next ancestor node
@@ -183,6 +191,7 @@
                     if(children.get(i).getName().equals(n.getName())) {
                         children.remove(i);
                         numOfLeaves--;
+                        n.setParent(null);
                         return true;
                     }
                 }
@@ -341,10 +350,15 @@
      *          a data node
      * @return true if <i>node</i> is already in the tree; false otherwise
      */
-    public boolean contains( DatanodeDescriptor node ) {
+    public synchronized boolean contains( DatanodeDescriptor node ) {
         if( node == null ) return false;
-        Node rNode = getNode(node.getPath());
-        return (rNode == node); 
+        Node parent = node.getParent();
+        for( int level=node.getLevel(); parent!=null&&level>0;
+                 parent=parent.getParent(), level-- ) {
+          if(parent == clusterMap)
+            return true;
+        }
+        return false; 
     }
     
     /** Given a string representation of a node, return its reference
@@ -370,19 +384,6 @@
         return clusterMap.getNumOfLeaves();
     }
     
-    private void checkArgument( DatanodeDescriptor node ) {
-        if( node == null ) {
-            throw new IllegalArgumentException( 
-                    "Unexpected null pointer argument" );
-        }
-        if( !contains(node) ) {
-            String path = node.getPath();
-            LOG.warn("The cluster does not contain data node: " + path);
-            throw new IllegalArgumentException(
-                    "Unexpected non-existing data node: " +path);
-        }
-    }
-    
     /** Return the distance between two data nodes
      * It is assumed that the distance from one node to its parent is 1
      * The distance between two nodes is calculated by summing up their distances
@@ -390,27 +391,40 @@
      * @param node1 one data node
      * @param node2 another data node
      * @return the distance between node1 and node2
-     * @exception IllegalArgumentException when either node1 or node2 is null, or
      * node1 or node2 do not belong to the cluster
      */
     public int getDistance(DatanodeDescriptor node1, DatanodeDescriptor node2 ) {
-        checkArgument( node1 );
-        checkArgument( node2 );
-
-        if( node1 == node2 || node1.equals(node2)) {
+        if( node1 == node2 ) {
             return 0;
         }
-        String[] path1 = node1.getNetworkLocation().split("/");
-        String[] path2 = node2.getNetworkLocation().split("/");
-        
         int i;
-        for(i=0; i<Math.min(path1.length, path2.length); i++) {
-            if( path1[i]!=path2[i] && (path1[i]!=null 
-                    && !path1[i].equals(path2[i]))) {
-                break;
-            }
+        Node n1=node1, n2=node2;
+        int level1=node1.getLevel(), level2=node2.getLevel();
+        int dis = 0;
+        while( n1!=null && level1>level2 ) {
+          n1 = n1.getParent();
+          level1--;
+          dis++;
+        }
+        while( n2!=null && level2>level1 ) {
+          n2 = n2.getParent();
+          level2--;
+          dis++;
+        }
+        while(n1!=null && n2!=null && n1.getParent()!=n2.getParent()) {
+          n1=n1.getParent();
+          n2=n2.getParent();
+          dis+=2;
+        }
+        if (n1==null) {
+          LOG.warn("The cluster does not contain data node: "+node1.getPath());
+          return Integer.MAX_VALUE;
+        }
+        if(n2==null) {
+          LOG.warn("The cluster does not contain data node: "+node2.getPath());
+          return Integer.MAX_VALUE;
         }
-        return 2+(path1.length-i)+(path2.length-i);
+        return dis+2;
     } 
     
     /** Check if two data nodes are on the same rack
@@ -422,18 +436,15 @@
      */
     public boolean isOnSameRack(
             DatanodeDescriptor node1, DatanodeDescriptor node2) {
-        checkArgument( node1 );
-        checkArgument( node2 );
+      if( node1 == null || node2 == null ) {
+        return false;
+      }
+      
         if( node1 == node2 || node1.equals(node2)) {
             return true;
         }
         
-        String location1 = node1.getNetworkLocation();
-        String location2 = node2.getNetworkLocation();
-        
-        if(location1 == location2 ) return true;
-        if(location1 == null || location2 == null) return false;
-        return location1.equals(location2);
+        return node1.getParent()==node2.getParent();
     }
     
     final private static Random r = new Random();

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/net/Node.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/net/Node.java?view=diff&rev=526411&r1=526410&r2=526411
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/net/Node.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/net/Node.java Sat Apr  7 01:46:54 2007
@@ -34,4 +34,10 @@
   public String getNetworkLocation();
   /** Return this node's name */
   public String getName();
+  /** Return this node's parent */
+  public Node getParent();
+  /** Return this node's level in the tree.
+   * E.g. the root of a tree returns 0 and its children return 1
+   */
+  public int getLevel();
 }

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/net/NodeBase.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/net/NodeBase.java?view=diff&rev=526411&r1=526410&r2=526411
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/net/NodeBase.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/net/NodeBase.java Sat Apr  7 01:46:54 2007
@@ -30,6 +30,8 @@
   
   protected String name; //host:port#
   protected String location; //string representation of this node's location
+  protected int level; //which level of the tree the node resides
+  protected Node parent; //its parent
   
   /** Default constructor */
   public NodeBase( ) {
@@ -57,6 +59,18 @@
     set(name, normalize(location));
   }
   
+  /** Construct a node from its name and its location
+   * @param name this node's name 
+   * @param location this node's location 
+   * @param parent this node's parent node
+   * @param level this node's level in the tree
+   */
+  public NodeBase( String name, String location, Node parent, int level ) {
+    set(name, normalize(location));
+    this.parent = parent;
+    this.level = level;
+  }
+
   /* set this node's name and location */
   private void set( String name, String location ) {
     if(name != null && name.contains(PATH_SEPARATOR_STR))
@@ -98,4 +112,12 @@
     }
     return path;
   }
+  
+  /** Return this node's parent */
+  public Node getParent() { return parent; }
+  
+  /** Return this node's level in the tree.
+   * E.g. the root of a tree returns 0 and its children return 1
+   */
+  public int getLevel() { return level; }
 }

Modified: lucene/hadoop/trunk/src/test/org/apache/hadoop/net/TestNetworkTopology.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/test/org/apache/hadoop/net/TestNetworkTopology.java?view=diff&rev=526411&r1=526410&r2=526411
==============================================================================
--- lucene/hadoop/trunk/src/test/org/apache/hadoop/net/TestNetworkTopology.java (original)
+++ lucene/hadoop/trunk/src/test/org/apache/hadoop/net/TestNetworkTopology.java Sat Apr  7
01:46:54 2007
@@ -6,7 +6,7 @@
 import junit.framework.TestCase;
 
 public class TestNetworkTopology extends TestCase {
-  private NetworkTopology cluster = new NetworkTopology();
+  private final static NetworkTopology cluster = new NetworkTopology();
   private final static DatanodeDescriptor dataNodes[] = new DatanodeDescriptor[] {
       new DatanodeDescriptor(new DatanodeID("h1:5020", "0", -1), "/d1/r1"),
       new DatanodeDescriptor(new DatanodeID("h2:5020", "0", -1), "/d1/r1"),
@@ -19,10 +19,10 @@
   private final static DatanodeDescriptor NODE = 
     new DatanodeDescriptor(new DatanodeID("h8:5020", "0", -1), "/d2/r4");
   
-  public TestNetworkTopology() {
+  static {
     for(int i=0; i<dataNodes.length; i++) {
       cluster.add( dataNodes[i] );
-    }    
+    }
   }
   
   public void testContains() {
@@ -36,8 +36,14 @@
     assertEquals(cluster.getNumOfLeaves(), dataNodes.length);
   }
 
-  public void testNumOfRacks() throws Exception {
+  public void testRacks() throws Exception {
     assertEquals(cluster.getNumOfRacks(), 3);
+    assertTrue(cluster.isOnSameRack(dataNodes[0], dataNodes[1]));
+    assertFalse(cluster.isOnSameRack(dataNodes[1], dataNodes[2]));
+    assertTrue(cluster.isOnSameRack(dataNodes[2], dataNodes[3]));
+    assertTrue(cluster.isOnSameRack(dataNodes[3], dataNodes[4]));
+    assertFalse(cluster.isOnSameRack(dataNodes[4], dataNodes[5]));
+    assertTrue(cluster.isOnSameRack(dataNodes[5], dataNodes[6]));
   }
   
   public void testGetDistance() throws Exception {



Mime
View raw message