tez-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hit...@apache.org
Subject tez git commit: TEZ-2397. Translation of LocalResources via Tez plan serialization can be lossy. (Siddharth Seth via hitesh)
Date Fri, 01 May 2015 18:48:19 GMT
Repository: tez
Updated Branches:
  refs/heads/branch-0.5 210359641 -> 0bceb8e49


TEZ-2397. Translation of LocalResources via Tez plan serialization can be lossy. (Siddharth
Seth via hitesh)

(cherry picked from commit c924e8a25d61be50e8c99f721108772b1c97c326)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/0bceb8e4
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/0bceb8e4
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/0bceb8e4

Branch: refs/heads/branch-0.5
Commit: 0bceb8e49e4896140b977f0ae5b99dc8a728308e
Parents: 2103596
Author: Hitesh Shah <hitesh@apache.org>
Authored: Fri May 1 11:46:55 2015 -0700
Committer: Hitesh Shah <hitesh@apache.org>
Committed: Fri May 1 11:48:04 2015 -0700

----------------------------------------------------------------------
 CHANGES.txt                                     |  1 +
 .../apache/tez/dag/api/DagTypeConverters.java   | 25 ++++++++------
 .../tez/dag/api/TestDagTypeConverters.java      | 35 ++++++++++++++++++++
 3 files changed, 51 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/0bceb8e4/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index bfac4b5..2691b6a 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -4,6 +4,7 @@ Apache Tez Change Log
 Release 0.5.4: Unreleased
 
 ALL CHANGES:
+  TEZ-2397. Translation of LocalResources via Tez plan serialization can be lossy.
   TEZ-2221. VertexGroup name should be unqiue
   TEZ-1521. VertexDataMovementEventsGeneratedEvent may be logged twice in recovery log
   TEZ-1560. Invalid state machine handling for V_SOURCE_VERTEX_RECOVERED in recovery.

http://git-wip-us.apache.org/repos/asf/tez/blob/0bceb8e4/tez-api/src/main/java/org/apache/tez/dag/api/DagTypeConverters.java
----------------------------------------------------------------------
diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/DagTypeConverters.java b/tez-api/src/main/java/org/apache/tez/dag/api/DagTypeConverters.java
index 17807d3..6c3fd0d 100644
--- a/tez-api/src/main/java/org/apache/tez/dag/api/DagTypeConverters.java
+++ b/tez-api/src/main/java/org/apache/tez/dag/api/DagTypeConverters.java
@@ -19,6 +19,7 @@ package org.apache.tez.dag.api;
 
 import java.io.DataOutputStream;
 import java.io.IOException;
+import java.net.URISyntaxException;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.HashMap;
@@ -210,16 +211,20 @@ public class DagTypeConverters {
     return VertexLocationHint.create(outputList);
   }
   
-  // notes re HDFS URL handling:
-  //   Resource URLs in the protobuf message are strings of the form hdfs://host:port/path
-  //   org.apache.hadoop.fs.Path.Path  is actually a URI type that allows any scheme
-  //   org.apache.hadoop.yarn.api.records.URL is a URL type used by YARN.
-  //   java.net.URL cannot be used out of the box as it rejects unknown schemes such as HDFS.
-
   public static String convertToDAGPlan(URL resource) {
-    // see above notes on HDFS URL handling
-    return resource.getScheme() + "://" + resource.getHost()
-        + ":" + resource.getPort() + resource.getFile();
+    Path p;
+    try {
+      p = ConverterUtils.getPathFromYarnURL(resource);
+    } catch (URISyntaxException e) {
+      throw new TezUncheckedException("Unable to translate resource: " + resource + " to
Path");
+    }
+    String urlString = p.toString();
+    return urlString;
+  }
+
+  public static URL convertToYarnURL(String pathString) {
+    Path path = new Path(pathString);
+    return ConverterUtils.getYarnUrlFromPath(path);
   }
 
   public static Map<String, LocalResource> createLocalResourceMapFromDAGPlan(
@@ -233,7 +238,7 @@ public class DagTypeConverters {
       if(res.hasPattern()){
         r.setPattern(res.getPattern());
       }
-      r.setResource(ConverterUtils.getYarnUrlFromPath(new Path(res.getUri())));  // see above
notes on HDFS URL handling
+      r.setResource(convertToYarnURL(res.getUri()));
       r.setSize(res.getSize());
       r.setTimestamp(res.getTimeStamp());
       r.setType(DagTypeConverters.convertFromDAGPlan(res.getType()));

http://git-wip-us.apache.org/repos/asf/tez/blob/0bceb8e4/tez-api/src/test/java/org/apache/tez/dag/api/TestDagTypeConverters.java
----------------------------------------------------------------------
diff --git a/tez-api/src/test/java/org/apache/tez/dag/api/TestDagTypeConverters.java b/tez-api/src/test/java/org/apache/tez/dag/api/TestDagTypeConverters.java
index 13347bb..64a7bd5 100644
--- a/tez-api/src/test/java/org/apache/tez/dag/api/TestDagTypeConverters.java
+++ b/tez-api/src/test/java/org/apache/tez/dag/api/TestDagTypeConverters.java
@@ -21,6 +21,10 @@ package org.apache.tez.dag.api;
 import java.io.IOException;
 
 import java.nio.ByteBuffer;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.yarn.api.records.URL;
+import org.apache.hadoop.yarn.util.ConverterUtils;
 import org.apache.tez.common.TezCommonUtils;
 import org.apache.tez.dag.api.records.DAGProtos.TezEntityDescriptorProto;
 import org.junit.Assert;
@@ -50,4 +54,35 @@ public class TestDagTypeConverters {
     Assert.assertNull(inputDescriptor.getHistoryText());
   }
 
+  @Test(timeout = 5000)
+  public void testYarnPathTranslation() {
+    // Without port
+    String p1String = "hdfs://mycluster/file";
+    Path p1Path = new Path(p1String);
+    // Users would translate this via this mechanic.
+    URL lr1Url = ConverterUtils.getYarnUrlFromPath(p1Path);
+    // Serialize to dag plan.
+    String p1StringSerialized = DagTypeConverters.convertToDAGPlan(lr1Url);
+    // Deserialize
+    URL lr1UrlDeserialized = DagTypeConverters.convertToYarnURL(p1StringSerialized);
+    Assert.assertEquals("mycluster", lr1UrlDeserialized.getHost());
+    Assert.assertEquals("/file", lr1UrlDeserialized.getFile());
+    Assert.assertEquals("hdfs", lr1UrlDeserialized.getScheme());
+
+
+    // With port
+    String p2String = "hdfs://mycluster:2311/file";
+    Path p2Path = new Path(p2String);
+    // Users would translate this via this mechanic.
+    URL lr2Url = ConverterUtils.getYarnUrlFromPath(p2Path);
+    // Serialize to dag plan.
+    String p2StringSerialized = DagTypeConverters.convertToDAGPlan(lr2Url);
+    // Deserialize
+    URL lr2UrlDeserialized = DagTypeConverters.convertToYarnURL(p2StringSerialized);
+    Assert.assertEquals("mycluster", lr2UrlDeserialized.getHost());
+    Assert.assertEquals("/file", lr2UrlDeserialized.getFile());
+    Assert.assertEquals("hdfs", lr2UrlDeserialized.getScheme());
+    Assert.assertEquals(2311, lr2UrlDeserialized.getPort());
+  }
+
 }


Mime
View raw message