hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From the...@apache.org
Subject svn commit: r1526025 - in /hive/branches/branch-0.12/hcatalog: src/docs/src/documentation/content/xdocs/ webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/ webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/ webhcat/svr/src/te...
Date Tue, 24 Sep 2013 22:08:01 GMT
Author: thejas
Date: Tue Sep 24 22:08:00 2013
New Revision: 1526025

URL: http://svn.apache.org/r1526025
Log:
HIVE-4531: [WebHCat] Collecting task logs to hdfs (Daniel Dai via Thejas Nair)

Added:
    hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/HiveJobIDParser.java
    hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JarJobIDParser.java
    hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobIDParser.java
    hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/LogRetriever.java
    hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/PigJobIDParser.java
    hive/branches/branch-0.12/hcatalog/webhcat/svr/src/test/data/
    hive/branches/branch-0.12/hcatalog/webhcat/svr/src/test/data/status/
    hive/branches/branch-0.12/hcatalog/webhcat/svr/src/test/data/status/hive/
    hive/branches/branch-0.12/hcatalog/webhcat/svr/src/test/data/status/hive/stderr
    hive/branches/branch-0.12/hcatalog/webhcat/svr/src/test/data/status/jar/
    hive/branches/branch-0.12/hcatalog/webhcat/svr/src/test/data/status/jar/stderr
    hive/branches/branch-0.12/hcatalog/webhcat/svr/src/test/data/status/pig/
    hive/branches/branch-0.12/hcatalog/webhcat/svr/src/test/data/status/pig/stderr
    hive/branches/branch-0.12/hcatalog/webhcat/svr/src/test/data/status/streaming/
    hive/branches/branch-0.12/hcatalog/webhcat/svr/src/test/data/status/streaming/stderr
    hive/branches/branch-0.12/hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/tool/TestJobIDParser.java
Modified:
    hive/branches/branch-0.12/hcatalog/src/docs/src/documentation/content/xdocs/hive.xml
    hive/branches/branch-0.12/hcatalog/src/docs/src/documentation/content/xdocs/mapreducejar.xml
    hive/branches/branch-0.12/hcatalog/src/docs/src/documentation/content/xdocs/mapreducestreaming.xml
    hive/branches/branch-0.12/hcatalog/src/docs/src/documentation/content/xdocs/pig.xml
    hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HiveDelegator.java
    hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/JarDelegator.java
    hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/LauncherDelegator.java
    hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/PigDelegator.java
    hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/Server.java
    hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/StreamingDelegator.java
    hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonControllerJob.java
    hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonUtils.java
    hive/branches/branch-0.12/hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/tool/TestTempletonUtils.java

Modified: hive/branches/branch-0.12/hcatalog/src/docs/src/documentation/content/xdocs/hive.xml
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/hcatalog/src/docs/src/documentation/content/xdocs/hive.xml?rev=1526025&r1=1526024&r2=1526025&view=diff
==============================================================================
--- hive/branches/branch-0.12/hcatalog/src/docs/src/documentation/content/xdocs/hive.xml (original)
+++ hive/branches/branch-0.12/hcatalog/src/docs/src/documentation/content/xdocs/hive.xml Tue Sep 24 22:08:00 2013
@@ -66,6 +66,13 @@
         <td>None</td>
     </tr>
 
+    <tr><td><strong>enablelog</strong></td>
+        <td>Collecting hadoop job config and logs into $statusdir/logs folder.
+         statusdir must be set as well to use this feature.</td>
+        <td>Optional</td>
+        <td>None</td>
+    </tr>
+
     <tr><td><strong>callback</strong></td>
         <td>Define a URL to be called upon job completion. You may embed a specific
          job ID into this URL using <code>$jobId</code>.  This tag

Modified: hive/branches/branch-0.12/hcatalog/src/docs/src/documentation/content/xdocs/mapreducejar.xml
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/hcatalog/src/docs/src/documentation/content/xdocs/mapreducejar.xml?rev=1526025&r1=1526024&r2=1526025&view=diff
==============================================================================
--- hive/branches/branch-0.12/hcatalog/src/docs/src/documentation/content/xdocs/mapreducejar.xml (original)
+++ hive/branches/branch-0.12/hcatalog/src/docs/src/documentation/content/xdocs/mapreducejar.xml Tue Sep 24 22:08:00 2013
@@ -86,6 +86,13 @@
         <td>None</td>
     </tr>
 
+    <tr><td><strong>enablelog</strong></td>
+        <td>Collecting hadoop job config and logs into $statusdir/logs folder.
+         statusdir must be set as well to use this feature.</td>
+        <td>Optional</td>
+        <td>None</td>
+    </tr>
+
     <tr><td><strong>callback</strong></td>
         <td>Define a URL to be called upon job completion. You may embed a specific
          job ID into this URL using <code>$jobId</code>.  This tag

Modified: hive/branches/branch-0.12/hcatalog/src/docs/src/documentation/content/xdocs/mapreducestreaming.xml
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/hcatalog/src/docs/src/documentation/content/xdocs/mapreducestreaming.xml?rev=1526025&r1=1526024&r2=1526025&view=diff
==============================================================================
--- hive/branches/branch-0.12/hcatalog/src/docs/src/documentation/content/xdocs/mapreducestreaming.xml (original)
+++ hive/branches/branch-0.12/hcatalog/src/docs/src/documentation/content/xdocs/mapreducestreaming.xml Tue Sep 24 22:08:00 2013
@@ -101,6 +101,13 @@
         <td>None</td>
     </tr>
 
+    <tr><td><strong>enablelog</strong></td>
+        <td>Collecting hadoop job config and logs into $statusdir/logs folder.
+         statusdir must be set as well to use this feature.</td>
+        <td>Optional</td>
+        <td>None</td>
+    </tr>
+
     <tr><td><strong>callback</strong></td>
         <td>Define a URL to be called upon job completion. You may embed a specific
          job ID into this URL using <code>$jobId</code>.  This tag

Modified: hive/branches/branch-0.12/hcatalog/src/docs/src/documentation/content/xdocs/pig.xml
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/hcatalog/src/docs/src/documentation/content/xdocs/pig.xml?rev=1526025&r1=1526024&r2=1526025&view=diff
==============================================================================
--- hive/branches/branch-0.12/hcatalog/src/docs/src/documentation/content/xdocs/pig.xml (original)
+++ hive/branches/branch-0.12/hcatalog/src/docs/src/documentation/content/xdocs/pig.xml Tue Sep 24 22:08:00 2013
@@ -70,6 +70,13 @@
         <td>None</td>
     </tr>
 
+    <tr><td><strong>enablelog</strong></td>
+        <td>Collecting hadoop job config and logs into $statusdir/logs folder.
+         statusdir must be set as well to use this feature.</td>
+        <td>Optional</td>
+        <td>None</td>
+    </tr>
+
     <tr><td><strong>callback</strong></td>
         <td>Define a URL to be called upon job completion. You may embed a specific
          job ID into this URL using <code>$jobId</code>.  This tag

Modified: hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HiveDelegator.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HiveDelegator.java?rev=1526025&r1=1526024&r2=1526025&view=diff
==============================================================================
--- hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HiveDelegator.java (original)
+++ hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HiveDelegator.java Tue Sep 24 22:08:00 2013
@@ -43,25 +43,25 @@ public class HiveDelegator extends Launc
   public EnqueueBean run(String user,
                String execute, String srcFile, List<String> defines,
                List<String> hiveArgs, String otherFiles,
-               String statusdir, String callback, String completedUrl)
+               String statusdir, String callback, String completedUrl, boolean enablelog)
     throws NotAuthorizedException, BadParam, BusyException, QueueException,
     ExecuteException, IOException, InterruptedException
   {
     runAs = user;
     List<String> args = makeArgs(execute, srcFile, defines, hiveArgs, otherFiles, statusdir,
-                   completedUrl);
+                   completedUrl, enablelog);
 
     return enqueueController(user, callback, args);
   }
 
   private List<String> makeArgs(String execute, String srcFile,
              List<String> defines, List<String> hiveArgs, String otherFiles,
-             String statusdir, String completedUrl)
+             String statusdir, String completedUrl, boolean enablelog)
     throws BadParam, IOException, InterruptedException
   {
     ArrayList<String> args = new ArrayList<String>();
     try {
-      args.addAll(makeBasicArgs(execute, srcFile, otherFiles, statusdir, completedUrl));
+      args.addAll(makeBasicArgs(execute, srcFile, otherFiles, statusdir, completedUrl, enablelog));
       args.add("--");
       args.add(appConf.hivePath());
 
@@ -99,7 +99,8 @@ public class HiveDelegator extends Launc
   }
 
   private List<String> makeBasicArgs(String execute, String srcFile, String otherFiles,
-                                         String statusdir, String completedUrl)
+                                         String statusdir, String completedUrl,
+                                         boolean enablelog)
     throws URISyntaxException, FileNotFoundException, IOException,
     InterruptedException
   {
@@ -115,7 +116,8 @@ public class HiveDelegator extends Launc
       allFiles.addAll(Arrays.asList(ofs));
     }
 
-    args.addAll(makeLauncherArgs(appConf, statusdir, completedUrl, allFiles));
+    args.addAll(makeLauncherArgs(appConf, statusdir, completedUrl, allFiles,
+                enablelog, JobType.HIVE));
 
     args.add("-archives");
     args.add(appConf.hiveArchive());

Modified: hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/JarDelegator.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/JarDelegator.java?rev=1526025&r1=1526024&r2=1526025&view=diff
==============================================================================
--- hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/JarDelegator.java (original)
+++ hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/JarDelegator.java Tue Sep 24 22:08:00 2013
@@ -41,13 +41,14 @@ public class JarDelegator extends Launch
   public EnqueueBean run(String user, String jar, String mainClass,
                String libjars, String files,
                List<String> jarArgs, List<String> defines,
-               String statusdir, String callback, String completedUrl)
+               String statusdir, String callback, String completedUrl,
+               boolean enablelog, JobType jobType)
     throws NotAuthorizedException, BadParam, BusyException, QueueException,
     ExecuteException, IOException, InterruptedException {
     runAs = user;
     List<String> args = makeArgs(jar, mainClass,
       libjars, files, jarArgs, defines,
-      statusdir, completedUrl);
+      statusdir, completedUrl, enablelog, jobType);
 
     return enqueueController(user, callback, args);
   }
@@ -55,7 +56,8 @@ public class JarDelegator extends Launch
   private List<String> makeArgs(String jar, String mainClass,
                   String libjars, String files,
                   List<String> jarArgs, List<String> defines,
-                  String statusdir, String completedUrl)
+                  String statusdir, String completedUrl,
+                  boolean enablelog, JobType jobType)
     throws BadParam, IOException, InterruptedException {
     ArrayList<String> args = new ArrayList<String>();
     try {
@@ -63,7 +65,7 @@ public class JarDelegator extends Launch
       allFiles.add(TempletonUtils.hadoopFsFilename(jar, appConf, runAs));
 
       args.addAll(makeLauncherArgs(appConf, statusdir,
-        completedUrl, allFiles));
+        completedUrl, allFiles, enablelog, jobType));
       args.add("--");
       args.add(appConf.clusterHadoop());
       args.add("jar");

Modified: hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/LauncherDelegator.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/LauncherDelegator.java?rev=1526025&r1=1526024&r2=1526025&view=diff
==============================================================================
--- hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/LauncherDelegator.java (original)
+++ hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/LauncherDelegator.java Tue Sep 24 22:08:00 2013
@@ -43,6 +43,7 @@ import org.apache.hive.hcatalog.templeto
 public class LauncherDelegator extends TempletonDelegator {
   private static final Log LOG = LogFactory.getLog(LauncherDelegator.class);
   protected String runAs = null;
+  static public enum JobType {JAR, STREAMING, PIG, HIVE};
 
   public LauncherDelegator(AppConfig appConf) {
     super(appConf);
@@ -105,7 +106,9 @@ public class LauncherDelegator extends T
 
   public List<String> makeLauncherArgs(AppConfig appConf, String statusdir,
                      String completedUrl,
-                     List<String> copyFiles) {
+                     List<String> copyFiles,
+                     boolean enablelog,
+                     JobType jobType) {
     ArrayList<String> args = new ArrayList<String>();
 
     args.add("-libjars");
@@ -123,6 +126,10 @@ public class LauncherDelegator extends T
       TempletonUtils.encodeArray(copyFiles));
     addDef(args, TempletonControllerJob.OVERRIDE_CLASSPATH,
       makeOverrideClasspath(appConf));
+    addDef(args, TempletonControllerJob.ENABLE_LOG,
+      Boolean.toString(enablelog));
+    addDef(args, TempletonControllerJob.JOB_TYPE,
+      jobType.toString());
 
     // Hadoop queue information
     addDef(args, "mapred.job.queue.name", appConf.hadoopQueueName());

Modified: hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/PigDelegator.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/PigDelegator.java?rev=1526025&r1=1526024&r2=1526025&view=diff
==============================================================================
--- hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/PigDelegator.java (original)
+++ hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/PigDelegator.java Tue Sep 24 22:08:00 2013
@@ -42,20 +42,20 @@ public class PigDelegator extends Launch
   public EnqueueBean run(String user,
                String execute, String srcFile,
                List<String> pigArgs, String otherFiles,
-               String statusdir, String callback, String completedUrl)
+               String statusdir, String callback, String completedUrl, boolean enablelog)
     throws NotAuthorizedException, BadParam, BusyException, QueueException,
     ExecuteException, IOException, InterruptedException {
     runAs = user;
     List<String> args = makeArgs(execute,
       srcFile, pigArgs,
-      otherFiles, statusdir, completedUrl);
+      otherFiles, statusdir, completedUrl, enablelog);
 
     return enqueueController(user, callback, args);
   }
 
   private List<String> makeArgs(String execute, String srcFile,
                   List<String> pigArgs, String otherFiles,
-                  String statusdir, String completedUrl)
+                  String statusdir, String completedUrl, boolean enablelog)
     throws BadParam, IOException, InterruptedException {
     ArrayList<String> args = new ArrayList<String>();
     try {
@@ -68,7 +68,7 @@ public class PigDelegator extends Launch
         allFiles.addAll(Arrays.asList(ofs));
       }
 
-      args.addAll(makeLauncherArgs(appConf, statusdir, completedUrl, allFiles));
+      args.addAll(makeLauncherArgs(appConf, statusdir, completedUrl, allFiles, enablelog, JobType.PIG));
       args.add("-archives");
       args.add(appConf.pigArchive());
 

Modified: hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/Server.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/Server.java?rev=1526025&r1=1526024&r2=1526025&view=diff
==============================================================================
--- hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/Server.java (original)
+++ hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/Server.java Tue Sep 24 22:08:00 2013
@@ -50,6 +50,7 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.authentication.client.PseudoAuthenticator;
+import org.apache.hive.hcatalog.templeton.LauncherDelegator.JobType;
 import org.apache.hive.hcatalog.templeton.tool.TempletonUtils;
 
 /**
@@ -591,18 +592,21 @@ public class Server {
                       @FormParam("cmdenv") List<String> cmdenvs,
                       @FormParam("arg") List<String> args,
                       @FormParam("statusdir") String statusdir,
-                      @FormParam("callback") String callback)
+                      @FormParam("callback") String callback,
+                      @FormParam("enablelog") boolean enablelog)
     throws NotAuthorizedException, BusyException, BadParam, QueueException,
     ExecuteException, IOException, InterruptedException {
     verifyUser();
     verifyParam(inputs, "input");
     verifyParam(mapper, "mapper");
     verifyParam(reducer, "reducer");
+    
+    checkEnableLogPrerequisite(enablelog, statusdir);
 
     StreamingDelegator d = new StreamingDelegator(appConf);
     return d.run(getDoAsUser(), inputs, output, mapper, reducer,
       files, defines, cmdenvs, args,
-      statusdir, callback, getCompletedUrl());
+      statusdir, callback, getCompletedUrl(), enablelog, JobType.STREAMING);
   }
 
   /**
@@ -618,18 +622,21 @@ public class Server {
                   @FormParam("arg") List<String> args,
                   @FormParam("define") List<String> defines,
                   @FormParam("statusdir") String statusdir,
-                  @FormParam("callback") String callback)
+                  @FormParam("callback") String callback,
+                  @FormParam("enablelog") boolean enablelog)
     throws NotAuthorizedException, BusyException, BadParam, QueueException,
     ExecuteException, IOException, InterruptedException {
     verifyUser();
     verifyParam(jar, "jar");
     verifyParam(mainClass, "class");
+    
+    checkEnableLogPrerequisite(enablelog, statusdir);
 
     JarDelegator d = new JarDelegator(appConf);
     return d.run(getDoAsUser(),
       jar, mainClass,
       libjars, files, args, defines,
-      statusdir, callback, getCompletedUrl());
+      statusdir, callback, getCompletedUrl(), enablelog, JobType.JAR);
   }
 
   /**
@@ -643,18 +650,21 @@ public class Server {
                @FormParam("arg") List<String> pigArgs,
                @FormParam("files") String otherFiles,
                @FormParam("statusdir") String statusdir,
-               @FormParam("callback") String callback)
+               @FormParam("callback") String callback,
+               @FormParam("enablelog") boolean enablelog)
     throws NotAuthorizedException, BusyException, BadParam, QueueException,
     ExecuteException, IOException, InterruptedException {
     verifyUser();
     if (execute == null && srcFile == null)
       throw new BadParam("Either execute or file parameter required");
+    
+    checkEnableLogPrerequisite(enablelog, statusdir);
 
     PigDelegator d = new PigDelegator(appConf);
     return d.run(getDoAsUser(),
       execute, srcFile,
       pigArgs, otherFiles,
-      statusdir, callback, getCompletedUrl());
+      statusdir, callback, getCompletedUrl(), enablelog);
   }
 
   /**
@@ -670,6 +680,7 @@ public class Server {
    * @param defines    shortcut for command line arguments "--define"
    * @param statusdir  where the stderr/stdout of templeton controller job goes
    * @param callback   callback url when the hive job finishes
+   * @param enablelog  whether to collect mapreduce log into statusdir/logs
    */
   @POST
   @Path("hive")
@@ -680,16 +691,19 @@ public class Server {
               @FormParam("files") String otherFiles,
               @FormParam("define") List<String> defines,
               @FormParam("statusdir") String statusdir,
-              @FormParam("callback") String callback)
+              @FormParam("callback") String callback,
+              @FormParam("enablelog") boolean enablelog)
     throws NotAuthorizedException, BusyException, BadParam, QueueException,
     ExecuteException, IOException, InterruptedException {
     verifyUser();
     if (execute == null && srcFile == null)
       throw new BadParam("Either execute or file parameter required");
+    
+    checkEnableLogPrerequisite(enablelog, statusdir);
 
     HiveDelegator d = new HiveDelegator(appConf);
     return d.run(getDoAsUser(), execute, srcFile, defines, hiveArgs, otherFiles,
-      statusdir, callback, getCompletedUrl());
+      statusdir, callback, getCompletedUrl(), enablelog);
   }
 
   /**
@@ -902,6 +916,7 @@ public class Server {
     return theUriInfo.getBaseUri() + VERSION
       + "/internal/complete/$jobId";
   }
+
   /**
    * Returns canonical host name from which the request is made; used for doAs validation  
    */
@@ -930,4 +945,9 @@ public class Server {
       return unkHost;
     }
   }
+  
+  private void checkEnableLogPrerequisite(boolean enablelog, String statusdir) throws BadParam {
+    if (enablelog == true && !TempletonUtils.isset(statusdir))
+      throw new BadParam("enablelog is only applicable when statusdir is set");
+  }
 }

Modified: hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/StreamingDelegator.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/StreamingDelegator.java?rev=1526025&r1=1526024&r2=1526025&view=diff
==============================================================================
--- hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/StreamingDelegator.java (original)
+++ hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/StreamingDelegator.java Tue Sep 24 22:08:00 2013
@@ -43,7 +43,9 @@ public class StreamingDelegator extends 
                List<String> jarArgs,
                String statusdir,
                String callback,
-               String completedUrl)
+               String completedUrl,
+               boolean enableLog,
+               JobType jobType)
     throws NotAuthorizedException, BadParam, BusyException, QueueException,
     ExecuteException, IOException, InterruptedException {
     List<String> args = makeArgs(inputs, output, mapper, reducer,
@@ -53,7 +55,7 @@ public class StreamingDelegator extends 
     return d.run(user,
       appConf.streamingJar(), null,
       null, null, args, defines,
-      statusdir, callback, completedUrl);
+      statusdir, callback, completedUrl, enableLog, jobType);
   }
 
   private List<String> makeArgs(List<String> inputs,

Added: hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/HiveJobIDParser.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/HiveJobIDParser.java?rev=1526025&view=auto
==============================================================================
--- hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/HiveJobIDParser.java (added)
+++ hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/HiveJobIDParser.java Tue Sep 24 22:08:00 2013
@@ -0,0 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hive.hcatalog.templeton.tool;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+
+class HiveJobIDParser extends JobIDParser {
+  final static String jobidPattern = "Starting Job = (job_\\d+_\\d+),";
+
+  HiveJobIDParser(String statusdir, Configuration conf) {
+    super(statusdir, conf);
+  }
+
+  @Override
+  List<String> parseJobID() throws IOException {
+    return parseJobID(TempletonControllerJob.STDERR_FNAME, jobidPattern);
+  }
+}

Added: hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JarJobIDParser.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JarJobIDParser.java?rev=1526025&view=auto
==============================================================================
--- hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JarJobIDParser.java (added)
+++ hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JarJobIDParser.java Tue Sep 24 22:08:00 2013
@@ -0,0 +1,38 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hive.hcatalog.templeton.tool;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+
+class JarJobIDParser extends JobIDParser {
+  final static String jobidPattern = "Running job: (job_\\d+_\\d+)";
+
+  JarJobIDParser(String statusdir, Configuration conf) {
+    super(statusdir, conf);
+  }
+
+  @Override
+  List<String> parseJobID() throws IOException {
+    return parseJobID(TempletonControllerJob.STDERR_FNAME, jobidPattern);
+  }
+
+}

Added: hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobIDParser.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobIDParser.java?rev=1526025&view=auto
==============================================================================
--- hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobIDParser.java (added)
+++ hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobIDParser.java Tue Sep 24 22:08:00 2013
@@ -0,0 +1,90 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hive.hcatalog.templeton.tool;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+/*
+ * This is the base class for the output parser.
+ * Output parser will parse the output of a Pig/
+ * Hive/Hadoop or other job and extract jobid.
+ * Note Hadoop jobid extract is rely on the API
+ * Hadoop application submitting the job. Different
+ * api will result in different console output. The
+ * jobid extraction logic is not always working in
+ * this case
+ */
+abstract class JobIDParser {
+  private String statusdir;
+  private Configuration conf;
+  
+  JobIDParser(String statusdir, Configuration conf) {
+    this.statusdir = statusdir;
+    this.conf = conf;
+  }
+  
+  private BufferedReader openStatusFile(String fname) throws IOException {
+    Path p = new Path(statusdir, fname);
+    FileSystem fs = p.getFileSystem(conf);
+    BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(p)));
+    return in;
+  }
+
+  private List<String> findJobID(BufferedReader in, String patternAsString) throws IOException {
+    Pattern pattern = Pattern.compile(patternAsString);
+    Matcher matcher;
+    String line;
+    List<String> jobs = new ArrayList<String>();
+    while ((line=in.readLine())!=null) {
+      matcher = pattern.matcher(line);
+      if (matcher.find()) {
+        String jobid = matcher.group(1);
+        jobs.add(jobid);
+      }
+    }
+    return jobs;
+  }
+
+  abstract List<String> parseJobID() throws IOException;
+
+  List<String> parseJobID(String fname, String pattern) throws IOException {
+    BufferedReader in=null;
+    try {
+      in = openStatusFile(fname);
+      List<String> jobs = findJobID(in, pattern);
+      return jobs;
+    } catch (IOException e) {
+      throw e;
+    } finally {
+      if (in!=null) {
+        in.close();
+      }
+    }
+  }
+}

Added: hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/LogRetriever.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/LogRetriever.java?rev=1526025&view=auto
==============================================================================
--- hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/LogRetriever.java (added)
+++ hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/LogRetriever.java Tue Sep 24 22:08:00 2013
@@ -0,0 +1,405 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hive.hcatalog.templeton.tool;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.io.PrintWriter;
+import java.net.URL;
+import java.net.URLConnection;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.JobID;
+import org.apache.hadoop.mapred.JobStatus;
+import org.apache.hadoop.mapred.RunningJob;
+import org.apache.hive.hcatalog.templeton.LauncherDelegator.JobType;
+
+/*
+ * This class provides support to collect mapreduce stderr/stdout/syslogs
+ * from jobtracker, and stored into a hdfs location. The log directory layout is:
+ * <ul compact>
+ * <li>logs/$job_id (directory for $job_id)
+ * <li>logs/$job_id/job.xml.html
+ * <li>logs/$job_id/$attempt_id (directory for $attempt_id)
+ * <li>logs/$job_id/$attempt_id/stderr
+ * <li>logs/$job_id/$attempt_id/stdout
+ * <li>logs/$job_id/$attempt_id/syslog 
+ * Since there is no API to retrieve mapreduce log from jobtracker, the code retrieve
+ * it from jobtracker ui and parse the html file. The current parser only works with
+ * Hadoop 1, for Hadoop 2, we would need a different parser
+ */
+public class LogRetriever {
+  String statusDir;
+  JobType jobType;
+  private static final String attemptDetailPatternInString = "<a href=\"(taskdetails.jsp\\?.*?)\">";
+  private static Pattern attemptDetailPattern = null;
+  private static final String attemptLogPatternInString = "Last 8KB</a><br/><a href=\"(.*?tasklog\\?attemptid=.*?)\">All</a>";
+  private static Pattern attemptLogPattern = null;
+  private static final String attemptIDPatternInString = "attemptid=(.*)?&";
+  private static Pattern attemptIDPattern = null;
+  private static final String attemptStartTimePatternInString = "<td>(\\d{1,2}-[A-Za-z]{3}-\\d{4} \\d{2}:\\d{2}:\\d{2})(<br/>)?</td>";
+  private static Pattern attemptStartTimePattern = null;
+  private static final String attemptEndTimePatternInString = "<td>(\\d{1,2}-[A-Za-z]{3}-\\d{4} \\d{2}:\\d{2}:\\d{2}) \\(.*\\)(<br/>)?</td>";
+  private static Pattern attemptEndTimePattern = null;
+  private FileSystem fs;
+  private JobClient jobClient = null;
+  private Configuration conf = null;
+
+  // Class to store necessary information for an attempt to log
+  static class AttemptInfo {
+    public String id;
+    public URL baseUrl;
+    public enum AttemptStatus {COMPLETED, FAILED};
+    AttemptStatus status;
+    public String startTime;
+    public String endTime;
+    public String type = "unknown";
+
+    @Override
+    public String toString() {
+      return id + "\t" + baseUrl.toString() + "\t" + status.toString() + "\t" + type
+          + "\t" + startTime + "\t" + endTime + "\n";
+    }
+  }
+
+  /*
+   * @param statusDir directory of statusDir defined for the webhcat job. It is supposed
+   *                  to contain stdout/stderr/syslog for the webhcat controller job
+   * @param jobType   Currently we support pig/hive/stream/generic mapreduce. The specific
+   *                  parser will parse the log of the controller job and retrieve job_id
+   *                  of all mapreduce jobs it launches. The generic mapreduce parser works
+   *                  when the program use JobClient.runJob to submit the job, but if the program
+   *                  use other API, generic mapreduce parser is not guaranteed to find the job_id
+   * @param conf      Configuration for webhcat
+   */
+  public LogRetriever(String statusDir, JobType jobType, Configuration conf)
+    throws IOException {
+    this.statusDir = statusDir;
+    this.jobType = jobType;
+    attemptDetailPattern = Pattern.compile(attemptDetailPatternInString);
+    attemptLogPattern = Pattern.compile(attemptLogPatternInString);
+    attemptIDPattern = Pattern.compile(attemptIDPatternInString);
+    attemptStartTimePattern = Pattern.compile(attemptStartTimePatternInString);
+    attemptEndTimePattern = Pattern.compile(attemptEndTimePatternInString);
+    Path statusPath = new Path(statusDir);
+    fs = statusPath.getFileSystem(conf);
+    jobClient = new JobClient(new JobConf(conf));
+    this.conf = conf;
+  }
+
+  public void run() throws IOException {
+    String logDir = statusDir + "/logs";
+
+    fs.mkdirs(new Path(logDir));
+
+    // Get jobids from job status dir
+    JobIDParser jobIDParser = null;
+    switch (jobType) {
+    case PIG:
+      jobIDParser = new PigJobIDParser(statusDir, conf);
+      break;
+    case HIVE:
+      jobIDParser = new HiveJobIDParser(statusDir, conf);
+      break;
+    case JAR:
+    case STREAMING:
+      jobIDParser = new JarJobIDParser(statusDir, conf);
+      break;
+    default:
+      System.err
+        .println("Unknown job type:" + jobType!=null? jobType.toString():"null"
+        + ", only pig/hive/jar/streaming are supported, skip logs");
+      return;
+    }
+    List<String> jobs = new ArrayList<String>();
+    try {
+      jobs = jobIDParser.parseJobID();
+    } catch (IOException e) {
+      System.err.println("Cannot retrieve jobid from log file");
+      e.printStackTrace();
+    }
+
+    // Log jobs
+    PrintWriter listWriter = null;
+    try {
+      listWriter = new PrintWriter(new OutputStreamWriter(
+          fs.create(new Path(logDir, "list.txt"))));
+      for (String job : jobs) {
+        try {
+          logJob(logDir, job, listWriter);
+        } catch (IOException e) {
+          System.err.println("Cannot retrieve log for " + job);
+          e.printStackTrace();
+        }
+      }
+    } finally {
+      if (listWriter!=null) {
+        listWriter.close();
+      }
+    }
+  }
+
+  private void logJob(String logDir, String jobID, PrintWriter listWriter)
+    throws IOException {
+    RunningJob rj = jobClient.getJob(JobID.forName(jobID));
+    String jobURLString = rj.getTrackingURL();
+
+    Path jobDir = new Path(logDir, jobID);
+    fs.mkdirs(jobDir);
+
+    // Log jobconf
+    try {
+      logJobConf(jobID, jobURLString, jobDir.toString());
+    } catch (IOException e) {
+      System.err.println("Cannot retrieve job.xml.html for " + jobID);
+      e.printStackTrace();
+    }
+
+    listWriter.println("job: " + jobID + "(" + "name=" + rj.getJobName() + ","
+        + "status=" + JobStatus.getJobRunState(rj.getJobState()) + ")");
+
+    // Get completed attempts
+    List<AttemptInfo> attempts = new ArrayList<AttemptInfo>();
+    for (String type : new String[] { "map", "reduce", "setup", "cleanup" }) {
+      try {
+        List<AttemptInfo> successAttempts = getCompletedAttempts(jobID,
+            jobURLString, type);
+        attempts.addAll(successAttempts);
+      } catch (IOException e) {
+        System.err.println("Cannot retrieve " + type + " tasks for " + jobID);
+        e.printStackTrace();
+      }
+    }
+
+    // Get failed attempts
+    try {
+      List<AttemptInfo> failedAttempts = getFailedAttempts(jobID, jobURLString);
+      attempts.addAll(failedAttempts);
+    } catch (IOException e) {
+      System.err.println("Cannot retrieve failed attempts for " + jobID);
+      e.printStackTrace();
+    }
+
+    // Log attempts
+    for (AttemptInfo attempt : attempts) {
+      try {
+        logAttempt(jobID, attempt, jobDir.toString());
+        listWriter.println("  attempt:" + attempt.id + "(" + "type="
+            + attempt.type + "," + "status=" + attempt.status + ","
+            + "starttime=" + attempt.startTime + "," + "endtime="
+            + attempt.endTime + ")");
+      } catch (IOException e) {
+        System.err.println("Cannot log attempt " + attempt.id);
+        e.printStackTrace();
+      }
+    }
+
+    listWriter.println();
+  }
+
+  // Utility to get patterns from a url, every array element is match for one
+  // pattern
+  private List<String>[] getMatches(URL url, Pattern[] pattern)
+    throws IOException {
+    List<String>[] results = new ArrayList[pattern.length];
+    for (int i = 0; i < pattern.length; i++) {
+      results[i] = new ArrayList<String>();
+    }
+
+    URLConnection urlConnection = url.openConnection();
+    BufferedReader reader = new BufferedReader(new InputStreamReader(
+        urlConnection.getInputStream()));
+    String line;
+    while ((line = reader.readLine()) != null) {
+      for (int i = 0; i < pattern.length; i++) {
+        Matcher matcher = pattern[i].matcher(line);
+        if (matcher.find()) {
+          results[i].add(matcher.group(1));
+        }
+      }
+    }
+    reader.close();
+    return results;
+  }
+
+  // Retrieve job conf into logDir
+  private void logJobConf(String job, String jobURLInString, String jobDir)
+    throws IOException {
+    URL jobURL = new URL(jobURLInString);
+    String fileInURL = "/jobconf.jsp?jobid=" + job;
+    URL jobTasksURL = new URL(jobURL.getProtocol(), jobURL.getHost(),
+        jobURL.getPort(), fileInURL);
+    URLConnection urlConnection = jobTasksURL.openConnection();
+    BufferedReader reader = null;
+    PrintWriter writer = null;
+    try {
+      reader = new BufferedReader(new InputStreamReader(
+        urlConnection.getInputStream()));
+  
+      writer = new PrintWriter(new OutputStreamWriter(
+        fs.create(new Path(jobDir, "job.xml.html"))));
+  
+      // Copy conf file
+      String line;
+      while ((line = reader.readLine()) != null) {
+        writer.println(line);
+      }
+    } finally {
+      if (reader!=null) {
+        reader.close();
+      }
+      if (writer!=null) {
+        writer.close();
+      }
+    }
+  }
+
+  // Get completed attempts from jobtasks.jsp
+  private List<AttemptInfo> getCompletedAttempts(String job,
+      String jobURLInString, String type) throws IOException {
+    // Get task detail link from the jobtask page
+    String fileInURL = "/jobtasks.jsp?jobid=" + job + "&type=" + type
+        + "&pagenum=1&state=completed";
+    URL jobURL = new URL(jobURLInString);
+    URL jobTasksURL = new URL(jobURL.getProtocol(), jobURL.getHost(),
+        jobURL.getPort(), fileInURL);
+    List<String>[] taskAttemptURLAndTimestamp = getMatches(jobTasksURL,
+      new Pattern[] { attemptDetailPattern, attemptStartTimePattern,
+        attemptEndTimePattern });
+    List<AttemptInfo> results = new ArrayList<AttemptInfo>();
+
+    // Go to task details, fetch task tracker url
+    for (int i = 0; i < taskAttemptURLAndTimestamp[0].size(); i++) {
+      String taskString = taskAttemptURLAndTimestamp[0].get(i);
+      URL taskDetailsURL = new URL(jobURL.getProtocol(), jobURL.getHost(),
+          jobURL.getPort(), "/" + taskString);
+      List<String>[] attemptLogStrings = getMatches(taskDetailsURL,
+          new Pattern[] { attemptLogPattern });
+      for (String attemptLogString : attemptLogStrings[0]) {
+        AttemptInfo attempt = new AttemptInfo();
+        attempt.baseUrl = new URL(attemptLogString);
+        attempt.startTime = taskAttemptURLAndTimestamp[1].get(i);
+        attempt.endTime = taskAttemptURLAndTimestamp[2].get(i);
+        attempt.type = type;
+        Matcher matcher = attemptIDPattern.matcher(attemptLogString);
+        if (matcher.find()) {
+          attempt.id = matcher.group(1);
+        }
+        attempt.status = AttemptInfo.AttemptStatus.COMPLETED;
+        results.add(attempt);
+      }
+    }
+
+    return results;
+  }
+
+  // Get failed attempts from jobfailures.jsp
+  private List<AttemptInfo> getFailedAttempts(String job, String jobURLInString)
+    throws IOException {
+    String fileInURL = "/jobfailures.jsp?jobid=" + job
+        + "&kind=all&cause=failed";
+    URL jobURL = new URL(jobURLInString);
+    URL url = new URL(jobURL.getProtocol(), jobURL.getHost(), jobURL.getPort(),
+        fileInURL);
+    List<String>[] attemptLogStrings = getMatches(url,
+        new Pattern[] { attemptDetailPattern });
+    List<String> failedTaskStrings = new ArrayList<String>();
+    for (String attempt : attemptLogStrings[0]) {
+      if (!failedTaskStrings.contains(attempt)) {
+        failedTaskStrings.add(attempt);
+      }
+    }
+    List<AttemptInfo> results = new ArrayList<AttemptInfo>();
+    for (String taskString : failedTaskStrings) {
+      URL taskDetailsURL = new URL(jobURL.getProtocol(), jobURL.getHost(),
+        jobURL.getPort(), "/" + taskString);
+      List<String>[] taskAttemptURLAndTimestamp = getMatches(taskDetailsURL,
+        new Pattern[] { attemptLogPattern, attemptStartTimePattern,
+          attemptEndTimePattern });
+      for (int i = 0; i < taskAttemptURLAndTimestamp[0].size(); i++) {
+        String attemptLogString = taskAttemptURLAndTimestamp[0].get(i);
+        AttemptInfo attempt = new AttemptInfo();
+        attempt.baseUrl = new URL(attemptLogString);
+        attempt.startTime = taskAttemptURLAndTimestamp[1].get(i);
+        attempt.endTime = taskAttemptURLAndTimestamp[2].get(i);
+        Matcher matcher = attemptIDPattern.matcher(attemptLogString);
+        if (matcher.find()) {
+          attempt.id = matcher.group(1);
+        }
+        if (attempt.id.contains("_r_")) {
+          attempt.type = "reduce";
+        }
+        attempt.status = AttemptInfo.AttemptStatus.COMPLETED.FAILED;
+        results.add(attempt);
+      }
+    }
+
+    return results;
+  }
+
+  // Retrieve attempt log into logDir
+  private void logAttempt(String job, AttemptInfo attemptInfo, String logDir)
+    throws IOException {
+    Path attemptDir = new Path(logDir, attemptInfo.id);
+    fs.mkdirs(attemptDir);
+    for (String type : new String[] { "stderr", "stdout", "syslog" }) {
+      // Retrieve log from task tracker
+      String fileInURL = "tasklog?attemptid=" + attemptInfo.id
+          + "&plaintext=true&filter=" + type;
+      URL url = new URL(attemptInfo.baseUrl.getProtocol(),
+          attemptInfo.baseUrl.getHost(), attemptInfo.baseUrl.getPort(), "/"
+              + fileInURL);
+
+      URLConnection urlConnection = url.openConnection();
+      BufferedReader reader = null;
+      PrintWriter writer = null;
+      
+      try {
+        reader = new BufferedReader(new InputStreamReader(
+          urlConnection.getInputStream()));
+  
+        writer = new PrintWriter(new OutputStreamWriter(
+          fs.create(new Path(attemptDir, type))));
+  
+        // Copy log file
+        String line;
+        while ((line = reader.readLine()) != null) {
+          writer.println(line);
+        }
+      } finally {
+        if (reader!=null) {
+          reader.close();
+        }
+        if (writer!=null) {
+          writer.close();
+        }
+      }
+    }
+  }
+}

Added: hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/PigJobIDParser.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/PigJobIDParser.java?rev=1526025&view=auto
==============================================================================
--- hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/PigJobIDParser.java (added)
+++ hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/PigJobIDParser.java Tue Sep 24 22:08:00 2013
@@ -0,0 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hive.hcatalog.templeton.tool;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+
+class PigJobIDParser extends JobIDParser {
+  final static String jobidPattern = "HadoopJobId: (job_\\d+_\\d+)";
+
+  PigJobIDParser(String statusdir, Configuration conf) {
+    super(statusdir, conf);
+  }
+
+  @Override
+  List<String> parseJobID() throws IOException {
+    return parseJobID(TempletonControllerJob.STDERR_FNAME, jobidPattern);
+  }
+}

Modified: hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonControllerJob.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonControllerJob.java?rev=1526025&r1=1526024&r2=1526025&view=diff
==============================================================================
--- hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonControllerJob.java (original)
+++ hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonControllerJob.java Tue Sep 24 22:08:00 2013
@@ -24,6 +24,7 @@ import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.OutputStream;
 import java.io.PrintWriter;
+import java.net.URISyntaxException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Iterator;
@@ -53,6 +54,7 @@ import org.apache.hadoop.security.UserGr
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.ToolRunner;
+import org.apache.hive.hcatalog.templeton.LauncherDelegator;
 
 /**
  * A Map Reduce job that will start another job.
@@ -69,6 +71,8 @@ import org.apache.hadoop.util.ToolRunner
 public class TempletonControllerJob extends Configured implements Tool {
   public static final String COPY_NAME = "templeton.copy";
   public static final String STATUSDIR_NAME = "templeton.statusdir";
+  public static final String ENABLE_LOG = "templeton.enablelog";
+  public static final String JOB_TYPE = "templeton.jobtype";
   public static final String JAR_ARGS_NAME = "templeton.args";
   public static final String OVERRIDE_CLASSPATH = "templeton.override-classpath";
 
@@ -155,9 +159,17 @@ public class TempletonControllerJob exte
       String statusdir = conf.get(STATUSDIR_NAME);
 
       if (statusdir != null) {
-        statusdir = TempletonUtils.addUserHomeDirectoryIfApplicable(statusdir, conf.get("user.name"), conf);
+        try {
+          statusdir = TempletonUtils.addUserHomeDirectoryIfApplicable(statusdir,
+            conf.get("user.name"));
+        } catch (URISyntaxException e) {
+          throw new IOException("Invalid status dir URI", e);
+        }
       }
 
+      Boolean enablelog = Boolean.parseBoolean(conf.get(ENABLE_LOG));
+      LauncherDelegator.JobType jobType = LauncherDelegator.JobType.valueOf(conf.get(JOB_TYPE));
+
       ExecutorService pool = Executors.newCachedThreadPool();
       executeWatcher(pool, conf, context.getJobID(),
         proc.getInputStream(), statusdir, STDOUT_FNAME);
@@ -177,6 +189,13 @@ public class TempletonControllerJob exte
       state.setCompleteStatus("done");
       state.close();
 
+      if (enablelog && TempletonUtils.isset(statusdir)) {
+        System.err.println("templeton: collecting logs for " + context.getJobID().toString()
+          + " to " + statusdir + "/logs");
+        LogRetriever logRetriever = new LogRetriever(statusdir, jobType, conf);
+        logRetriever.run();
+      }
+
       if (proc.exitValue() != 0)
         System.err.println("templeton: job failed with exit code "
           + proc.exitValue());

Modified: hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonUtils.java?rev=1526025&r1=1526024&r2=1526025&view=diff
==============================================================================
--- hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonUtils.java (original)
+++ hive/branches/branch-0.12/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonUtils.java Tue Sep 24 22:08:00 2013
@@ -33,11 +33,11 @@ import java.util.Map;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
+import javax.ws.rs.core.UriBuilder;
+
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.LocalFileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hive.hcatalog.templeton.UgiFactory;
@@ -213,22 +213,19 @@ public class TempletonUtils {
     }
   }
 
-  public static String addUserHomeDirectoryIfApplicable(String origPathStr, String user, Configuration conf) throws IOException {
-    Path path = new Path(origPathStr);
-    String result = origPathStr;
-
-    // shortcut for s3/asv
-    // If path contains scheme, user should mean an absolute path,
-    // However, path.isAbsolute tell us otherwise.
-    // So we skip conversion for non-hdfs.
-    if (!(path.getFileSystem(conf) instanceof DistributedFileSystem)&&
-        !(path.getFileSystem(conf) instanceof LocalFileSystem)) {
-      return result;
-    }
-    if (!path.isAbsolute()) {
-      result = "/user/" + user + "/" + origPathStr;
-    }
-    return result;
+  public static String addUserHomeDirectoryIfApplicable(String origPathStr, String user)
+    throws IOException, URISyntaxException {
+    URI uri = new URI(origPathStr);
+
+    if (uri.getPath().isEmpty()) {
+      String newPath = "/user/" + user;
+      uri = UriBuilder.fromUri(uri).replacePath(newPath).build();
+    } else if (!new Path(uri.getPath()).isAbsolute()) {
+      String newPath = "/user/" + user + "/" + uri.getPath();
+      uri = UriBuilder.fromUri(uri).replacePath(newPath).build();
+    } // no work needed for absolute paths
+
+    return uri.toString();
   }
 
   public static Path hadoopFsPath(String fname, final Configuration conf, String user)
@@ -254,7 +251,7 @@ public class TempletonUtils {
           }
         });
 
-    fname = addUserHomeDirectoryIfApplicable(fname, user, conf);
+    fname = addUserHomeDirectoryIfApplicable(fname, user);
     URI u = new URI(fname);
     Path p = new Path(u).makeQualified(defaultFs);
 

Added: hive/branches/branch-0.12/hcatalog/webhcat/svr/src/test/data/status/hive/stderr
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/hcatalog/webhcat/svr/src/test/data/status/hive/stderr?rev=1526025&view=auto
==============================================================================
--- hive/branches/branch-0.12/hcatalog/webhcat/svr/src/test/data/status/hive/stderr (added)
+++ hive/branches/branch-0.12/hcatalog/webhcat/svr/src/test/data/status/hive/stderr Tue Sep 24 22:08:00 2013
@@ -0,0 +1,35 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+WARNING: org.apache.hadoop.metrics.jvm.EventCounter is deprecated. Please use org.apache.hadoop.log.metrics.EventCounter in all the log4j.properties files.
+Logging initialized using configuration in jar:file:/Users/daijy/hadoop-1.0.3/tmp/mapred/local/taskTracker/distcache/7168149899505899073_637041239_1133292873/localhost/apps/templeton/hive-0.10.0.tar.gz/hive-0.10.0/lib/hive-common-0.10.0.jar!/hive-log4j.properties
+Hive history file=/tmp/daijy/hive_job_log_daijy_201305091500_862342848.txt
+Total MapReduce jobs = 1
+Launching Job 1 out of 1
+Number of reduce tasks is set to 0 since there's no reduce operator
+Starting Job = job_201305091437_0012, Tracking URL = http://localhost:50030/jobdetails.jsp?jobid=job_201305091437_0012
+Kill Command = /Users/daijy/hadoop-1.0.3/libexec/../bin/hadoop job  -kill job_201305091437_0012
+Hadoop job information for Stage-1: number of mappers: 0; number of reducers: 0
+2013-05-09 15:01:13,625 Stage-1 map = 0%,  reduce = 0%
+2013-05-09 15:01:19,660 Stage-1 map = 100%,  reduce = 100%
+Ended Job = job_201305091437_0012
+MapReduce Jobs Launched: 
+Job 0:  HDFS Read: 0 HDFS Write: 0 SUCCESS
+Total MapReduce CPU Time Spent: 0 msec
+OK
+Time taken: 26.187 seconds

Added: hive/branches/branch-0.12/hcatalog/webhcat/svr/src/test/data/status/jar/stderr
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/hcatalog/webhcat/svr/src/test/data/status/jar/stderr?rev=1526025&view=auto
==============================================================================
--- hive/branches/branch-0.12/hcatalog/webhcat/svr/src/test/data/status/jar/stderr (added)
+++ hive/branches/branch-0.12/hcatalog/webhcat/svr/src/test/data/status/jar/stderr Tue Sep 24 22:08:00 2013
@@ -0,0 +1,58 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+13/05/09 09:56:05 INFO input.FileInputFormat: Total input paths to process : 1
+13/05/09 09:56:05 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
+13/05/09 09:56:05 WARN snappy.LoadSnappy: Snappy native library not loaded
+13/05/09 09:56:05 INFO mapred.JobClient: Running job: job_201305090950_0004
+13/05/09 09:56:06 INFO mapred.JobClient:  map 0% reduce 0%
+13/05/09 09:56:19 INFO mapred.JobClient:  map 100% reduce 0%
+13/05/09 09:56:31 INFO mapred.JobClient:  map 100% reduce 100%
+13/05/09 09:56:36 INFO mapred.JobClient: Job complete: job_201305090950_0004
+13/05/09 09:56:36 INFO mapred.JobClient: Counters: 26
+13/05/09 09:56:36 INFO mapred.JobClient:   Job Counters 
+13/05/09 09:56:36 INFO mapred.JobClient:     Launched reduce tasks=1
+13/05/09 09:56:36 INFO mapred.JobClient:     SLOTS_MILLIS_MAPS=12660
+13/05/09 09:56:36 INFO mapred.JobClient:     Total time spent by all reduces waiting after reserving slots (ms)=0
+13/05/09 09:56:36 INFO mapred.JobClient:     Total time spent by all maps waiting after reserving slots (ms)=0
+13/05/09 09:56:36 INFO mapred.JobClient:     Launched map tasks=1
+13/05/09 09:56:36 INFO mapred.JobClient:     Data-local map tasks=1
+13/05/09 09:56:36 INFO mapred.JobClient:     SLOTS_MILLIS_REDUCES=10247
+13/05/09 09:56:36 INFO mapred.JobClient:   File Output Format Counters 
+13/05/09 09:56:36 INFO mapred.JobClient:     Bytes Written=16
+13/05/09 09:56:36 INFO mapred.JobClient:   FileSystemCounters
+13/05/09 09:56:36 INFO mapred.JobClient:     FILE_BYTES_READ=38
+13/05/09 09:56:36 INFO mapred.JobClient:     HDFS_BYTES_READ=127
+13/05/09 09:56:36 INFO mapred.JobClient:     FILE_BYTES_WRITTEN=45519
+13/05/09 09:56:36 INFO mapred.JobClient:     HDFS_BYTES_WRITTEN=16
+13/05/09 09:56:36 INFO mapred.JobClient:   File Input Format Counters 
+13/05/09 09:56:36 INFO mapred.JobClient:     Bytes Read=8
+13/05/09 09:56:36 INFO mapred.JobClient:   Map-Reduce Framework
+13/05/09 09:56:36 INFO mapred.JobClient:     Map output materialized bytes=38
+13/05/09 09:56:36 INFO mapred.JobClient:     Map input records=2
+13/05/09 09:56:36 INFO mapred.JobClient:     Reduce shuffle bytes=0
+13/05/09 09:56:36 INFO mapred.JobClient:     Spilled Records=8
+13/05/09 09:56:36 INFO mapred.JobClient:     Map output bytes=24
+13/05/09 09:56:36 INFO mapred.JobClient:     Total committed heap usage (bytes)=269619200
+13/05/09 09:56:36 INFO mapred.JobClient:     Combine input records=4
+13/05/09 09:56:36 INFO mapred.JobClient:     SPLIT_RAW_BYTES=119
+13/05/09 09:56:36 INFO mapred.JobClient:     Reduce input records=4
+13/05/09 09:56:36 INFO mapred.JobClient:     Reduce input groups=4
+13/05/09 09:56:36 INFO mapred.JobClient:     Combine output records=4
+13/05/09 09:56:36 INFO mapred.JobClient:     Reduce output records=4
+13/05/09 09:56:36 INFO mapred.JobClient:     Map output records=4

Added: hive/branches/branch-0.12/hcatalog/webhcat/svr/src/test/data/status/pig/stderr
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/hcatalog/webhcat/svr/src/test/data/status/pig/stderr?rev=1526025&view=auto
==============================================================================
--- hive/branches/branch-0.12/hcatalog/webhcat/svr/src/test/data/status/pig/stderr (added)
+++ hive/branches/branch-0.12/hcatalog/webhcat/svr/src/test/data/status/pig/stderr Tue Sep 24 22:08:00 2013
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+2013-04-18 14:54:45,945 [main] INFO  org.apache.pig.Main - Apache Pig version 0.10.1 (r1426677) compiled Dec 28 2012, 16:46:13
+2013-04-18 14:54:45,946 [main] INFO  org.apache.pig.Main - Logging error messages to: /Users/daijy/hadoop-1.0.3/tmp/mapred/local/taskTracker/daijy/jobcache/job_201304181449_0003/attempt_201304181449_0003_m_000000_0/work/pig_1366322085940.log
+2013-04-18 14:54:46,381 [main] INFO  org.apache.pig.backend.hadoop.executionengine.HExecutionEngine - Connecting to hadoop file system at: hdfs://localhost:8020
+2013-04-18 14:54:46,512 [main] INFO  org.apache.pig.backend.hadoop.executionengine.HExecutionEngine - Connecting to map-reduce job tracker at: localhost:9001
+2013-04-18 14:54:46,899 [main] INFO  org.apache.pig.tools.pigstats.ScriptState - Pig features used in the script: UNKNOWN
+2013-04-18 14:54:47,059 [main] INFO  org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MRCompiler - File concatenation threshold: 100 optimistic? false
+2013-04-18 14:54:47,082 [main] INFO  org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MultiQueryOptimizer - MR plan size before optimization: 1
+2013-04-18 14:54:47,083 [main] INFO  org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MultiQueryOptimizer - MR plan size after optimization: 1
+2013-04-18 14:54:47,144 [main] INFO  org.apache.pig.tools.pigstats.ScriptState - Pig script settings are added to the job
+2013-04-18 14:54:47,159 [main] INFO  org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler - mapred.job.reduce.markreset.buffer.percent is not set, set to default 0.3
+2013-04-18 14:54:47,162 [main] INFO  org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler - creating jar file Job4814368788682413488.jar
+2013-04-18 14:54:50,051 [main] INFO  org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler - jar file Job4814368788682413488.jar created
+2013-04-18 14:54:50,065 [main] INFO  org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler - Setting up single store job
+2013-04-18 14:54:50,093 [main] INFO  org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - 1 map-reduce job(s) waiting for submission.
+2013-04-18 14:54:50,386 [Thread-7] INFO  org.apache.hadoop.mapreduce.lib.input.FileInputFormat - Total input paths to process : 1
+2013-04-18 14:54:50,386 [Thread-7] INFO  org.apache.pig.backend.hadoop.executionengine.util.MapRedUtil - Total input paths to process : 1
+2013-04-18 14:54:50,395 [Thread-7] WARN  org.apache.hadoop.util.NativeCodeLoader - Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
+2013-04-18 14:54:50,395 [Thread-7] WARN  org.apache.hadoop.io.compress.snappy.LoadSnappy - Snappy native library not loaded
+2013-04-18 14:54:50,397 [Thread-7] INFO  org.apache.pig.backend.hadoop.executionengine.util.MapRedUtil - Total input paths (combined) to process : 1
+2013-04-18 14:54:50,594 [main] INFO  org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - HadoopJobId: job_201304181449_0004
+2013-04-18 14:54:50,595 [main] INFO  org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - More information at: http://localhost:50030/jobdetails.jsp?jobid=job_201304181449_0004
+2013-04-18 14:54:50,597 [main] INFO  org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - 0% complete
+2013-04-18 14:55:12,184 [main] INFO  org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - 50% complete
+2013-04-18 14:55:20,743 [main] INFO  org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - 100% complete
+2013-04-18 14:55:20,744 [main] INFO  org.apache.pig.tools.pigstats.SimplePigStats - Script Statistics: 
+
+HadoopVersion	PigVersion	UserId	StartedAt	FinishedAt	Features
+1.0.3	0.10.1	daijy	2013-04-18 14:54:47	2013-04-18 14:55:20	UNKNOWN
+
+Success!
+
+Job Stats (time in seconds):
+JobId	Maps	Reduces	MaxMapTime	MinMapTIme	AvgMapTime	MaxReduceTime	MinReduceTime	AvgReduceTime	Alias	Feature	Outputs
+job_201304181449_0004	1	0	6	6	6	0	0	0	a	MAP_ONLY	hdfs://localhost:8020/tmp/temp416260498/tmp1616274076,
+
+Input(s):
+Successfully read 3 records (369 bytes) from: "hdfs://localhost:8020/user/daijy/2.txt"
+
+Output(s):
+Successfully stored 3 records (33 bytes) in: "hdfs://localhost:8020/tmp/temp416260498/tmp1616274076"
+
+Counters:
+Total records written : 3
+Total bytes written : 33
+Spillable Memory Manager spill count : 0
+Total bags proactively spilled: 0
+Total records proactively spilled: 0
+
+Job DAG:
+job_201304181449_0004
+
+
+2013-04-18 14:55:20,752 [main] INFO  org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - Success!
+2013-04-18 14:55:20,759 [main] INFO  org.apache.hadoop.mapreduce.lib.input.FileInputFormat - Total input paths to process : 1
+2013-04-18 14:55:20,759 [main] INFO  org.apache.pig.backend.hadoop.executionengine.util.MapRedUtil - Total input paths to process : 1

Added: hive/branches/branch-0.12/hcatalog/webhcat/svr/src/test/data/status/streaming/stderr
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/hcatalog/webhcat/svr/src/test/data/status/streaming/stderr?rev=1526025&view=auto
==============================================================================
--- hive/branches/branch-0.12/hcatalog/webhcat/svr/src/test/data/status/streaming/stderr (added)
+++ hive/branches/branch-0.12/hcatalog/webhcat/svr/src/test/data/status/streaming/stderr Tue Sep 24 22:08:00 2013
@@ -0,0 +1,33 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+13/05/09 09:58:26 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
+13/05/09 09:58:26 WARN snappy.LoadSnappy: Snappy native library not loaded
+13/05/09 09:58:26 INFO mapred.FileInputFormat: Total input paths to process : 1
+13/05/09 09:58:26 INFO streaming.StreamJob: getLocalDirs(): [/Users/daijy/hadoop-1.0.3/tmp/mapred/local]
+13/05/09 09:58:26 INFO streaming.StreamJob: Running job: job_201305090950_0006
+13/05/09 09:58:26 INFO streaming.StreamJob: To kill this job, run:
+13/05/09 09:58:26 INFO streaming.StreamJob: /Users/daijy/hadoop-1.0.3/libexec/../bin/hadoop job  -Dmapred.job.tracker=localhost:9001 -kill job_201305090950_0006
+13/05/09 09:58:26 INFO streaming.StreamJob: Tracking URL: http://localhost:50030/jobdetails.jsp?jobid=job_201305090950_0006
+13/05/09 09:58:27 INFO streaming.StreamJob:  map 0%  reduce 0%
+13/05/09 09:58:39 INFO streaming.StreamJob:  map 50%  reduce 0%
+13/05/09 09:58:45 INFO streaming.StreamJob:  map 100%  reduce 0%
+13/05/09 09:58:48 INFO streaming.StreamJob:  map 100%  reduce 17%
+13/05/09 09:58:57 INFO streaming.StreamJob:  map 100%  reduce 100%
+13/05/09 09:59:03 INFO streaming.StreamJob: Job complete: job_201305090950_0006
+13/05/09 09:59:03 INFO streaming.StreamJob: Output: ooo4

Added: hive/branches/branch-0.12/hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/tool/TestJobIDParser.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/tool/TestJobIDParser.java?rev=1526025&view=auto
==============================================================================
--- hive/branches/branch-0.12/hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/tool/TestJobIDParser.java (added)
+++ hive/branches/branch-0.12/hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/tool/TestJobIDParser.java Tue Sep 24 22:08:00 2013
@@ -0,0 +1,62 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hive.hcatalog.templeton.tool;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.junit.Test;
+
+import junit.framework.Assert;
+
+public class TestJobIDParser {
+  @Test
+  public void testParsePig() throws IOException {
+    String errFileName = "../../src/test/data/status/pig";
+    PigJobIDParser pigJobIDParser = new PigJobIDParser(errFileName, new Configuration());
+    List<String> jobs = pigJobIDParser.parseJobID();
+    Assert.assertEquals(jobs.size(), 1);
+  }
+
+  @Test
+  public void testParseHive() throws IOException {
+    String errFileName = "../../src/test/data/status/hive";
+    HiveJobIDParser hiveJobIDParser = new HiveJobIDParser(errFileName, new Configuration());
+    List<String> jobs = hiveJobIDParser.parseJobID();
+    Assert.assertEquals(jobs.size(), 1);
+  }
+
+  @Test
+  public void testParseJar() throws IOException {
+    String errFileName = "../../src/test/data/status/jar";
+    JarJobIDParser jarJobIDParser = new JarJobIDParser(errFileName, new Configuration());
+    List<String> jobs = jarJobIDParser.parseJobID();
+    Assert.assertEquals(jobs.size(), 1);
+  }
+
+  @Test
+  public void testParseStreaming() throws IOException {
+    String errFileName = "../../src/test/data/status/streaming";
+    JarJobIDParser jarJobIDParser = new JarJobIDParser(errFileName, new Configuration());
+    List<String> jobs = jarJobIDParser.parseJobID();
+    Assert.assertEquals(jobs.size(), 1);
+  }
+
+}

Modified: hive/branches/branch-0.12/hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/tool/TestTempletonUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/tool/TestTempletonUtils.java?rev=1526025&r1=1526024&r2=1526025&view=diff
==============================================================================
--- hive/branches/branch-0.12/hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/tool/TestTempletonUtils.java (original)
+++ hive/branches/branch-0.12/hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/tool/TestTempletonUtils.java Tue Sep 24 22:08:00 2013
@@ -20,6 +20,7 @@ package org.apache.hive.hcatalog.templet
 
 import java.io.File;
 import java.io.FileNotFoundException;
+import java.net.URISyntaxException;
 import java.io.IOException;
 
 import org.apache.hadoop.conf.Configuration;
@@ -245,4 +246,32 @@ public class TestTempletonUtils {
     }
   }
 
+  @Test
+  public void testConstructingUserHomeDirectory() throws Exception {
+    String[] sources = new String[] { "output+", "/user/hadoop/output",
+      "hdfs://container", "hdfs://container/", "hdfs://container/path",
+      "output#link", "hdfs://cointaner/output#link",
+      "hdfs://container@acc/test" };
+    String[] expectedResults = new String[] { "/user/webhcat/output+",
+      "/user/hadoop/output", "hdfs://container/user/webhcat",
+      "hdfs://container/", "hdfs://container/path",
+      "/user/webhcat/output#link", "hdfs://cointaner/output#link",
+      "hdfs://container@acc/test" };
+    for (int i = 0; i < sources.length; i++) {
+      String source = sources[i];
+      String expectedResult = expectedResults[i];
+      String result = TempletonUtils.addUserHomeDirectoryIfApplicable(source,
+          "webhcat");
+      Assert.assertEquals(result, expectedResult);
+    }
+
+    String badUri = "c:\\some\\path";
+    try {
+      TempletonUtils.addUserHomeDirectoryIfApplicable(badUri, "webhcat");
+      Assert.fail("addUserHomeDirectoryIfApplicable should fail for bad URI: "
+          + badUri);
+    } catch (URISyntaxException ex) {
+    }
+  }
+
 }



Mime
View raw message