hadoop-mapreduce-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cdoug...@apache.org
Subject svn commit: r889788 - in /hadoop/mapreduce/branches/branch-0.21: ./ src/contrib/mumak/src/java/org/apache/hadoop/mapred/ src/contrib/mumak/src/test/data/ src/contrib/mumak/src/test/org/apache/hadoop/mapred/ src/tools/org/apache/hadoop/tools/rumen/
Date Fri, 11 Dec 2009 19:51:05 GMT
Author: cdouglas
Date: Fri Dec 11 19:51:04 2009
New Revision: 889788

URL: http://svn.apache.org/viewvc?rev=889788&view=rev
Log:
MAPREDUCE-1222. Add an option to exclude numeric IP addresses in topologies
processed by Mumak. Contributed by Hong Tang

Added:
    hadoop/mapreduce/branches/branch-0.21/src/contrib/mumak/src/test/data/topo-with-numeric-ips.json
    hadoop/mapreduce/branches/branch-0.21/src/contrib/mumak/src/test/data/topo-without-numeric-ips.json
    hadoop/mapreduce/branches/branch-0.21/src/contrib/mumak/src/test/org/apache/hadoop/mapred/TestRemoveIpsFromLoggedNetworkTopology.java
Modified:
    hadoop/mapreduce/branches/branch-0.21/CHANGES.txt
    hadoop/mapreduce/branches/branch-0.21/src/contrib/mumak/src/java/org/apache/hadoop/mapred/SimulatorEngine.java
    hadoop/mapreduce/branches/branch-0.21/src/tools/org/apache/hadoop/tools/rumen/ZombieCluster.java

Modified: hadoop/mapreduce/branches/branch-0.21/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/branch-0.21/CHANGES.txt?rev=889788&r1=889787&r2=889788&view=diff
==============================================================================
--- hadoop/mapreduce/branches/branch-0.21/CHANGES.txt (original)
+++ hadoop/mapreduce/branches/branch-0.21/CHANGES.txt Fri Dec 11 19:51:04 2009
@@ -877,3 +877,6 @@
     (Sreekanth Ramakrishnan via yhemanth)
 
     MAPREDUCE-1124. Fix imprecise byte counts in Gridmix. (cdouglas)
+
+    MAPREDUCE-1222. Add an option to exclude numeric IP addresses in topologies
+    processed by Mumak. (Hong Tang via cdouglas)

Modified: hadoop/mapreduce/branches/branch-0.21/src/contrib/mumak/src/java/org/apache/hadoop/mapred/SimulatorEngine.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/branch-0.21/src/contrib/mumak/src/java/org/apache/hadoop/mapred/SimulatorEngine.java?rev=889788&r1=889787&r2=889788&view=diff
==============================================================================
--- hadoop/mapreduce/branches/branch-0.21/src/contrib/mumak/src/java/org/apache/hadoop/mapred/SimulatorEngine.java
(original)
+++ hadoop/mapreduce/branches/branch-0.21/src/contrib/mumak/src/java/org/apache/hadoop/mapred/SimulatorEngine.java
Fri Dec 11 19:51:04 2009
@@ -20,7 +20,9 @@
 import java.io.IOException;
 import java.io.PrintStream;
 import java.util.ArrayList;
+import java.util.Iterator;
 import java.util.List;
+import java.util.regex.Pattern;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
@@ -35,7 +37,9 @@
 import org.apache.hadoop.net.DNSToSwitchMapping;
 import org.apache.hadoop.net.StaticMapping;
 import org.apache.hadoop.tools.rumen.ClusterStory;
+import org.apache.hadoop.tools.rumen.ClusterTopologyReader;
 import org.apache.hadoop.tools.rumen.JobStoryProducer;
+import org.apache.hadoop.tools.rumen.LoggedNetworkTopology;
 import org.apache.hadoop.tools.rumen.MachineNode;
 import org.apache.hadoop.tools.rumen.RackNode;
 import org.apache.hadoop.tools.rumen.ZombieCluster;
@@ -77,8 +81,6 @@
 
     for (MachineNode node : clusterStory.getMachines()) {
       String hostname = node.getName();
-      RackNode rackNode = node.getRackNode();
-      StaticMapping.addNodeToRack(hostname, rackNode.getName());
       String taskTrackerName = "tracker_" + hostname + ":localhost/127.0.0.1:"
           + port;
       port++;
@@ -131,8 +133,15 @@
 
     MachineNode defaultNode = new MachineNode.Builder("default", 2)
         .setMapSlots(maxMaps).setReduceSlots(maxReduces).build();
-    ZombieCluster cluster = new ZombieCluster(new Path(topologyFile), 
-        defaultNode, jobConf);
+    
+    LoggedNetworkTopology topology = new ClusterTopologyReader(new Path(
+        topologyFile), jobConf).get();
+    // Setting the static mapping before removing numeric IP hosts.
+    setStaticMapping(topology);
+    if (getConf().getBoolean("mumak.topology.filter-numeric-ips", true)) {
+      removeIpHosts(topology);
+    }
+    ZombieCluster cluster = new ZombieCluster(topology, defaultNode);
     long firstJobStartTime = now + 60000;
     JobStoryProducer jobStoryProducer = new SimulatorJobStoryProducer(
         new Path(traceFile), cluster, firstJobStartTime, jobConf);
@@ -238,4 +247,58 @@
   long getCurrentTime() {
     return currentTime;
   }
+  
+  // Due to HDFS-778, a node may appear in job history logs as both numeric
+  // ips and as host names. We remove them from the parsed network topology
+  // before feeding it to ZombieCluster.
+  static void removeIpHosts(LoggedNetworkTopology topology) {
+    for (Iterator<LoggedNetworkTopology> rackIt = topology.getChildren()
+        .iterator(); rackIt.hasNext();) {
+      LoggedNetworkTopology rack = rackIt.next();
+      List<LoggedNetworkTopology> nodes = rack.getChildren();
+      for (Iterator<LoggedNetworkTopology> it = nodes.iterator(); it.hasNext();) {
+        LoggedNetworkTopology node = it.next();
+        if (isIPAddress(node.getName())) {
+          it.remove();
+        }
+      }
+      if (nodes.isEmpty()) {
+        rackIt.remove();
+      }
+    }
+  }
+
+  static Pattern IP_PATTERN;
+  
+  static {
+    // 0-255
+    String IPV4BK1 = "(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)";
+    // .b.c.d - where b/c/d are 0-255, and optionally adding two more
+    // backslashes before each period
+    String IPV4BKN = "(?:\\\\?\\." + IPV4BK1 + "){3}";
+    String IPV4_PATTERN = IPV4BK1 + IPV4BKN;
+    
+    // first hexadecimal number
+    String IPV6BK1 = "(?:[0-9a-fA-F]{1,4})";
+    // remaining 7 hexadecimal numbers, each preceded with ":".
+    String IPV6BKN = "(?::" + IPV6BK1 + "){7}";
+    String IPV6_PATTERN = IPV6BK1 + IPV6BKN;
+
+    IP_PATTERN = Pattern.compile(
+        "^(?:" + IPV4_PATTERN + "|" + IPV6_PATTERN + ")$");
+  }
+
+ 
+  static boolean isIPAddress(String hostname) {
+    return IP_PATTERN.matcher(hostname).matches();
+  }
+  
+  static void setStaticMapping(LoggedNetworkTopology topology) {
+    for (LoggedNetworkTopology rack : topology.getChildren()) {
+      for (LoggedNetworkTopology node : rack.getChildren()) {
+        StaticMapping.addNodeToRack(node.getName(), 
+            new RackNode(rack.getName(), 1).getName());
+      }
+    }
+  }
 }

Added: hadoop/mapreduce/branches/branch-0.21/src/contrib/mumak/src/test/data/topo-with-numeric-ips.json
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/branch-0.21/src/contrib/mumak/src/test/data/topo-with-numeric-ips.json?rev=889788&view=auto
==============================================================================
--- hadoop/mapreduce/branches/branch-0.21/src/contrib/mumak/src/test/data/topo-with-numeric-ips.json
(added)
+++ hadoop/mapreduce/branches/branch-0.21/src/contrib/mumak/src/test/data/topo-with-numeric-ips.json
Fri Dec 11 19:51:04 2009
@@ -0,0 +1,22 @@
+{
+  "name" : "<root>",
+  "children" : [ {
+    "name" : "194\\.6\\.129\\.64",
+    "children" : [ {
+      "name" : "node1817\\.megatron\\.com",
+      "children" : null
+    }, {
+      "name" : "194\\.6\\.129\\.67",
+      "children" : null
+    } ]
+  }, {
+    "name" : "192\\.30\\.63\\.64",
+    "children" : [ {
+      "name" : "192\\.30\\.63\\.69",
+      "children" : null
+    }, {
+      "name" : "192\\.30\\.63\\.81",
+      "children" : null
+    } ]
+  } ]
+}

Added: hadoop/mapreduce/branches/branch-0.21/src/contrib/mumak/src/test/data/topo-without-numeric-ips.json
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/branch-0.21/src/contrib/mumak/src/test/data/topo-without-numeric-ips.json?rev=889788&view=auto
==============================================================================
--- hadoop/mapreduce/branches/branch-0.21/src/contrib/mumak/src/test/data/topo-without-numeric-ips.json
(added)
+++ hadoop/mapreduce/branches/branch-0.21/src/contrib/mumak/src/test/data/topo-without-numeric-ips.json
Fri Dec 11 19:51:04 2009
@@ -0,0 +1,10 @@
+{
+  "name" : "<root>",
+  "children" : [ {
+    "name" : "194\\.6\\.129\\.64",
+    "children" : [ {
+      "name" : "node1817\\.megatron\\.com",
+      "children" : null
+    } ]
+  } ]
+}

Added: hadoop/mapreduce/branches/branch-0.21/src/contrib/mumak/src/test/org/apache/hadoop/mapred/TestRemoveIpsFromLoggedNetworkTopology.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/branch-0.21/src/contrib/mumak/src/test/org/apache/hadoop/mapred/TestRemoveIpsFromLoggedNetworkTopology.java?rev=889788&view=auto
==============================================================================
--- hadoop/mapreduce/branches/branch-0.21/src/contrib/mumak/src/test/org/apache/hadoop/mapred/TestRemoveIpsFromLoggedNetworkTopology.java
(added)
+++ hadoop/mapreduce/branches/branch-0.21/src/contrib/mumak/src/test/org/apache/hadoop/mapred/TestRemoveIpsFromLoggedNetworkTopology.java
Fri Dec 11 19:51:04 2009
@@ -0,0 +1,97 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapred;
+
+import java.io.IOException;
+
+import junit.framework.Assert;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.tools.rumen.ClusterTopologyReader;
+import org.apache.hadoop.tools.rumen.DeepInequalityException;
+import org.apache.hadoop.tools.rumen.LoggedNetworkTopology;
+import org.apache.hadoop.tools.rumen.TreePath;
+import org.junit.Test;
+
+public class TestRemoveIpsFromLoggedNetworkTopology {
+  
+  
+  @Test
+  public void testIsIPAddress() {
+    final String[] positives = {
+        "123.13.42.255", // regular ipv4
+        "123.01.0.255", // padded 0
+        "000.001.002.020", // more padded 0
+        "123\\.13\\.42\\.255", // escaped .
+        "0.0.0.0", // all-zero
+        "255.255.255.255", // all-0xff
+        
+        "1080:0:0:0:8:800:200C:417A", // regular ipv6
+        "1080:01:020:3:8:0800:200C:417A", // padded 0
+        "1080:01:002:0003:080:0800:0200:417A", // more padded 0
+        "0:0:0:0:0:0:0:0", // all-zero
+        "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff", // all-0xff
+    };
+
+    final String[] negatives = {
+        "node.megatron.com", // domain name
+        "13.42.255", // too short
+        "123.13.42.255.10", // too long
+        "123.256.42.255", // too large
+        "123.13.42.255.weird.com", // weird
+        "1080:0:0:0:8:200C:417A", // too short
+        "1080:0:0:0:1:8:800:200C:417A", // too long
+        "1080A:0:0:0:8:800:200C:417A", // too large
+        "1080:0:0:0:8:800:200G:417A", // too large
+    };
+    
+    for (String s : positives) {
+      Assert.assertTrue(s, SimulatorEngine.isIPAddress(s));
+    }
+    
+    for (String s : negatives) {
+      Assert.assertFalse(s, SimulatorEngine.isIPAddress(s));
+    }
+  }
+  
+  @Test
+  public void testIpRemoval() throws IOException {
+    final Configuration conf = new Configuration();
+    final FileSystem lfs = FileSystem.getLocal(conf);
+    final Path rootInputDir = new Path(System.getProperty("src.test.data",
+        "data")).makeQualified(lfs.getUri(), lfs.getWorkingDirectory());
+
+    final LoggedNetworkTopology topoWithIps = new ClusterTopologyReader(new Path(
+        rootInputDir, "topo-with-numeric-ips.json"), conf).get();
+    final LoggedNetworkTopology topoWithoutIps = new ClusterTopologyReader(new Path(
+        rootInputDir, "topo-without-numeric-ips.json"), conf).get();
+    try {
+      topoWithIps.deepCompare(topoWithoutIps, new TreePath(null, "<root>"));
+      Assert.fail("Expecting two topologies to differ");
+    } catch (DeepInequalityException e) {
+    }
+    SimulatorEngine.removeIpHosts(topoWithIps);
+    try {
+      topoWithIps.deepCompare(topoWithoutIps, new TreePath(null, "<root>"));
+    } catch (DeepInequalityException e) {
+      Assert.fail("Expecting two topologies to be equal");
+    }
+  }
+}

Modified: hadoop/mapreduce/branches/branch-0.21/src/tools/org/apache/hadoop/tools/rumen/ZombieCluster.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/branch-0.21/src/tools/org/apache/hadoop/tools/rumen/ZombieCluster.java?rev=889788&r1=889787&r2=889788&view=diff
==============================================================================
--- hadoop/mapreduce/branches/branch-0.21/src/tools/org/apache/hadoop/tools/rumen/ZombieCluster.java
(original)
+++ hadoop/mapreduce/branches/branch-0.21/src/tools/org/apache/hadoop/tools/rumen/ZombieCluster.java
Fri Dec 11 19:51:04 2009
@@ -45,7 +45,7 @@
    * @param defaultNode
    *          The default node setting.
    */
-  ZombieCluster(LoggedNetworkTopology topology, MachineNode defaultNode) {
+  public ZombieCluster(LoggedNetworkTopology topology, MachineNode defaultNode) {
     buildCluster(topology, defaultNode);
   }
 



Mime
View raw message