hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From na...@apache.org
Subject svn commit: r1310583 - in /hive/trunk: ./ common/src/java/org/apache/hadoop/hive/conf/ conf/ ql/src/java/org/apache/hadoop/hive/ql/ ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ ql/src/java/org/apache/ha...
Date Fri, 06 Apr 2012 20:56:11 GMT
Author: namit
Date: Fri Apr  6 20:56:10 2012
New Revision: 1310583

URL: http://svn.apache.org/viewvc?rev=1310583&view=rev
Log:
HIVE-2858 Cache remote map reduce job stack traces for additional
logging (Kevin Wilfong via namit)


Added:
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/VerifySessionStateStackTracesHook.java
    hive/trunk/ql/src/test/queries/clientnegative/mapreduce_stack_trace.q
    hive/trunk/ql/src/test/queries/clientnegative/mapreduce_stack_trace_turnoff.q
    hive/trunk/ql/src/test/results/clientnegative/mapreduce_stack_trace.q.out
    hive/trunk/ql/src/test/results/clientnegative/mapreduce_stack_trace_turnoff.q.out
Modified:
    hive/trunk/build-common.xml
    hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    hive/trunk/conf/hive-default.xml.template
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/HadoopJobExecHelper.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/JobDebugger.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/TaskLogProcessor.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java

Modified: hive/trunk/build-common.xml
URL: http://svn.apache.org/viewvc/hive/trunk/build-common.xml?rev=1310583&r1=1310582&r2=1310583&view=diff
==============================================================================
--- hive/trunk/build-common.xml (original)
+++ hive/trunk/build-common.xml Fri Apr  6 20:56:10 2012
@@ -59,7 +59,7 @@
   <property name="test.junit.output.format" value="xml"/>
   <property name="test.junit.output.usefile" value="true"/>
   <property name="minimr.query.files" value="input16_cc.q,scriptfile1.q,bucket4.q,bucketmapjoin6.q,disable_merge_for_bucketing.q,reduce_deduplicate.q,smb_mapjoin_8.q,join1.q,groupby2.q,bucketizedhiveinputformat.q"/>
-  <property name="minimr.query.negative.files" value="minimr_broken_pipe.q" />
+  <property name="minimr.query.negative.files" value="minimr_broken_pipe.q,mapreduce_stack_trace.q,mapreduce_stack_trace_turnoff.q"
/>
   <property name="test.silent" value="true"/>
   <property name="hadoopVersion" value="${hadoop.version.ant-internal}"/>
   <property name="test.serialize.qplan" value="false"/>

Modified: hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=1310583&r1=1310582&r2=1310583&view=diff
==============================================================================
--- hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Fri Apr  6 20:56:10
2012
@@ -185,6 +185,7 @@ public class HiveConf extends Configurat
     DEFAULT_ZOOKEEPER_PARTITION_NAME("hive.lockmgr.zookeeper.default.partition.name", "__HIVE_DEFAULT_ZOOKEEPER_PARTITION__"),
     // Whether to show a link to the most failed task + debugging tips
     SHOW_JOB_FAIL_DEBUG_INFO("hive.exec.show.job.failure.debug.info", true),
+    JOB_DEBUG_CAPTURE_STACKTRACES("hive.exec.job.debug.capture.stacktraces", true),
     JOB_DEBUG_TIMEOUT("hive.exec.job.debug.timeout", 30000),
     TASKLOG_DEBUG_TIMEOUT("hive.exec.tasklog.debug.timeout", 20000),
     OUTPUT_FILE_EXTENSION("hive.output.file.extension", null),

Modified: hive/trunk/conf/hive-default.xml.template
URL: http://svn.apache.org/viewvc/hive/trunk/conf/hive-default.xml.template?rev=1310583&r1=1310582&r2=1310583&view=diff
==============================================================================
--- hive/trunk/conf/hive-default.xml.template (original)
+++ hive/trunk/conf/hive-default.xml.template Fri Apr  6 20:56:10 2012
@@ -1252,6 +1252,14 @@
 </property>
 
 <property>
+  <name>hive.exec.job.debug.capture.stacktraces</name>
+  <value>true</value>
+  <description>Whether or not stack traces parsed from the task logs of a sampled failed
task for
+  			   each failed job should be stored in the SessionState
+  </description>
+</property>
+
+<property>
   <name>hive.exec.driver.run.hooks</name>
   <value></value>
   <description>A comma separated list of hooks which implement HiveDriverRunHook and
will be run at the

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Driver.java?rev=1310583&r1=1310582&r2=1310583&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Driver.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Driver.java Fri Apr  6 20:56:10 2012
@@ -1100,6 +1100,7 @@ public class Driver implements CommandPr
       DriverContext driverCxt = new DriverContext(runnable, ctx);
 
       SessionState.get().setLastMapRedStatsList(new ArrayList<MapRedStats>());
+      SessionState.get().setStackTraces(new HashMap<String, List<List<String>>>());
       SessionState.get().setLocalMapRedErrors(new HashMap<String, List<String>>());
 
       // Add root Tasks to runnable

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/HadoopJobExecHelper.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/HadoopJobExecHelper.java?rev=1310583&r1=1310582&r2=1310583&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/HadoopJobExecHelper.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/HadoopJobExecHelper.java Fri Apr
 6 20:56:10 2012
@@ -701,9 +701,15 @@ public class HadoopJobExecHelper {
       statusMesg += " with errors";
       returnVal = 2;
       console.printError(statusMesg);
-      if (HiveConf.getBoolVar(job, HiveConf.ConfVars.SHOW_JOB_FAIL_DEBUG_INFO)) {
+      if (HiveConf.getBoolVar(job, HiveConf.ConfVars.SHOW_JOB_FAIL_DEBUG_INFO) ||
+          HiveConf.getBoolVar(job, HiveConf.ConfVars.JOB_DEBUG_CAPTURE_STACKTRACES)) {
         try {
-          JobDebugger jd = new JobDebugger(job, rj, console);
+          JobDebugger jd;
+          if (SessionState.get() != null) {
+            jd = new JobDebugger(job, rj, console, SessionState.get().getStackTraces());
+          } else {
+            jd = new JobDebugger(job, rj, console);
+          }
           Thread t = new Thread(jd);
           t.start();
           t.join(HiveConf.getIntVar(job, HiveConf.ConfVars.JOB_DEBUG_TIMEOUT));

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/JobDebugger.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/JobDebugger.java?rev=1310583&r1=1310582&r2=1310583&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/JobDebugger.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/JobDebugger.java Fri Apr  6 20:56:10
2012
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hive.ql.exec;
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
@@ -42,6 +43,7 @@ public class JobDebugger implements Runn
   private final JobConf conf;
   private final RunningJob rj;
   private final LogHelper console;
+  private final Map<String, List<List<String>>> stackTraces;
   // Mapping from task ID to the number of failures
   private final Map<String, Integer> failures = new HashMap<String, Integer>();
   private final Set<String> successes = new HashSet<String>(); // Successful
task ID's
@@ -74,6 +76,15 @@ public class JobDebugger implements Runn
     this.conf = conf;
     this.rj = rj;
     this.console = console;
+    this.stackTraces = null;
+  }
+
+  public JobDebugger(JobConf conf, RunningJob rj, LogHelper console,
+      Map<String, List<List<String>>> stackTraces) {
+    this.conf = conf;
+    this.rj = rj;
+    this.console = console;
+    this.stackTraces = stackTraces;
   }
 
   public void run() {
@@ -208,27 +219,37 @@ public class JobDebugger implements Runn
           tlp.addTaskAttemptLogUrl(logUrl);
         }
 
-        List<ErrorAndSolution> errors = tlp.getErrors();
+        if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.JOB_DEBUG_CAPTURE_STACKTRACES) &&
+            stackTraces != null) {
+          if (!stackTraces.containsKey(jobId)) {
+            stackTraces.put(jobId, new ArrayList<List<String>>());
+          }
+          stackTraces.get(jobId).addAll(tlp.getStackTraces());
+        }
 
-        StringBuilder sb = new StringBuilder();
-        // We use a StringBuilder and then call printError only once as
-        // printError will write to both stderr and the error log file. In
-        // situations where both the stderr and the log file output is
-        // simultaneously output to a single stream, this will look cleaner.
-        sb.append("\n");
-        sb.append("Task with the most failures(" + maxFailures + "): \n");
-        sb.append("-----\n");
-        sb.append("Task ID:\n  " + task + "\n\n");
-        sb.append("URL:\n  " + taskUrl + "\n");
+        if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.SHOW_JOB_FAIL_DEBUG_INFO)) {
+          List<ErrorAndSolution> errors = tlp.getErrors();
 
-        for (ErrorAndSolution e : errors) {
+          StringBuilder sb = new StringBuilder();
+          // We use a StringBuilder and then call printError only once as
+          // printError will write to both stderr and the error log file. In
+          // situations where both the stderr and the log file output is
+          // simultaneously output to a single stream, this will look cleaner.
           sb.append("\n");
-          sb.append("Possible error:\n  " + e.getError() + "\n\n");
-          sb.append("Solution:\n  " + e.getSolution() + "\n");
-        }
-        sb.append("-----\n");
+          sb.append("Task with the most failures(" + maxFailures + "): \n");
+          sb.append("-----\n");
+          sb.append("Task ID:\n  " + task + "\n\n");
+          sb.append("URL:\n  " + taskUrl + "\n");
+
+          for (ErrorAndSolution e : errors) {
+            sb.append("\n");
+            sb.append("Possible error:\n  " + e.getError() + "\n\n");
+            sb.append("Solution:\n  " + e.getSolution() + "\n");
+          }
+          sb.append("-----\n");
 
-        console.printError(sb.toString());
+          console.printError(sb.toString());
+        }
 
         // Only print out one task because that's good enough for debugging.
         break;

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/TaskLogProcessor.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/TaskLogProcessor.java?rev=1310583&r1=1310582&r2=1310583&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/TaskLogProcessor.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/TaskLogProcessor.java Fri
Apr  6 20:56:10 2012
@@ -28,6 +28,7 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
+import java.util.regex.Pattern;
 
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.mapred.JobConf;
@@ -170,4 +171,77 @@ public class TaskLogProcessor {
     return errors;
   }
 
+  /**
+   * Processes the provided task logs to extract stack traces.
+   * @return A list of lists of strings where each list of strings represents a stack trace
+   */
+  public List<List<String>> getStackTraces() {
+    List<List<String>> stackTraces = new ArrayList<List<String>>();
+
+    for(String urlString : taskLogUrls) {
+
+      // Open the log file, and read the lines, parse out stack traces
+      URL taskAttemptLogUrl;
+      try {
+        taskAttemptLogUrl = new URL(urlString);
+      } catch(MalformedURLException e) {
+        throw new RuntimeException("Bad task log url", e);
+      }
+      BufferedReader in;
+      try {
+        in = new BufferedReader(
+            new InputStreamReader(taskAttemptLogUrl.openStream()));
+        String inputLine;
+        String lastLine = null;
+        boolean lastLineMatched = false;
+        List<String> stackTrace = null;
+
+        // Patterns that match the middle/end of stack traces
+        Pattern stackTracePattern = Pattern.compile("^\tat .*", Pattern.CASE_INSENSITIVE);
+        Pattern endStackTracePattern =
+            Pattern.compile("^\t... [0-9]+ more.*", Pattern.CASE_INSENSITIVE);
+
+        while ((inputLine = in.readLine()) != null) {
+
+          if (stackTracePattern.matcher(inputLine).matches() ||
+              endStackTracePattern.matcher(inputLine).matches()) {
+            // We are in a stack trace
+
+            if (stackTrace == null) {
+              // This is the first time we have realized we are in a stack trace.  In this
case,
+              // the previous line was the error message, add that to the stack trace as
well
+              stackTrace = new ArrayList<String>();
+              stackTrace.add(lastLine);
+            } else if (!lastLineMatched) {
+              // The last line didn't match a pattern, it is probably an error message, part
of
+              // a string of stack traces related to the same error message so add it to
the stack
+              // trace
+              stackTrace.add(lastLine);
+            }
+
+            stackTrace.add(inputLine);
+            lastLineMatched = true;
+          } else {
+
+            if (!lastLineMatched && stackTrace != null) {
+              // If the last line didn't match the patterns either, the stack trace is definitely
+              // over
+              stackTraces.add(stackTrace);
+              stackTrace = null;
+            }
+
+            lastLineMatched = false;
+          }
+
+          lastLine = inputLine;
+        }
+        in.close();
+      } catch (IOException e) {
+        throw new RuntimeException("Error while reading from task log url", e);
+      }
+    }
+
+    return stackTraces;
+  }
+
 }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java?rev=1310583&r1=1310582&r2=1310583&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java Fri Apr  6
20:56:10 2012
@@ -117,6 +117,9 @@ public class SessionState {
 
   private Map<String, String> hiveVariables;
 
+  // A mapping from a hadoop job ID to the stack traces collected from the map reduce task
logs
+  private Map<String, List<List<String>>> stackTraces;
+
   // This mapping collects all the configuration variables which have been set by the user
   // explicitely, either via SET in the CLI, the hiveconf option, or a System property.
   // It is a mapping from the variable name to its value.  Note that if a user repeatedly
@@ -711,6 +714,14 @@ public class SessionState {
     this.lastMapRedStatsList = lastMapRedStatsList;
   }
 
+  public void setStackTraces(Map<String, List<List<String>>> stackTraces)
{
+    this.stackTraces = stackTraces;
+  }
+
+  public Map<String, List<List<String>>> getStackTraces() {
+    return stackTraces;
+  }
+
   public Map<String, String> getOverriddenConfigurations() {
     if (overriddenConfigurations == null) {
       overriddenConfigurations = new HashMap<String, String>();

Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/VerifySessionStateStackTracesHook.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/VerifySessionStateStackTracesHook.java?rev=1310583&view=auto
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/VerifySessionStateStackTracesHook.java
(added)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/VerifySessionStateStackTracesHook.java
Fri Apr  6 20:56:10 2012
@@ -0,0 +1,52 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.hooks;
+
+import java.util.List;
+import java.util.Map.Entry;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
+
+/**
+ *
+ * VerifySessionStateStackTracesHook.
+ *
+ * Writes the first line of each stack trace collected to the console, to either verify stack
+ * traces were or were not collected.
+ */
+public class VerifySessionStateStackTracesHook implements ExecuteWithHookContext {
+
+  public void run(HookContext hookContext) {
+    LogHelper console = SessionState.getConsole();
+
+    for (Entry<String, List<List<String>>> entry :
+        SessionState.get().getStackTraces().entrySet()) {
+
+      for (List<String> stackTrace : entry.getValue()) {
+        // Only print the first line of the stack trace as it contains the error message,
and other
+        // lines may contain line numbers which are volatile
+        // Also only take the string after the first two spaces, because the prefix is a
date and
+        // and time stamp
+        console.printError(StringUtils.substringAfter(
+            StringUtils.substringAfter(stackTrace.get(0), " "), " "));
+      }
+    }
+  }
+}

Added: hive/trunk/ql/src/test/queries/clientnegative/mapreduce_stack_trace.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/mapreduce_stack_trace.q?rev=1310583&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientnegative/mapreduce_stack_trace.q (added)
+++ hive/trunk/ql/src/test/queries/clientnegative/mapreduce_stack_trace.q Fri Apr  6 20:56:10
2012
@@ -0,0 +1,5 @@
+set hive.exec.mode.local.auto=false;
+set hive.exec.job.debug.capture.stacktraces=true;
+set hive.exec.failure.hooks=org.apache.hadoop.hive.ql.hooks.VerifySessionStateStackTracesHook;
+
+FROM src SELECT TRANSFORM(key, value) USING 'script_does_not_exist' AS (key, value);

Added: hive/trunk/ql/src/test/queries/clientnegative/mapreduce_stack_trace_turnoff.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/mapreduce_stack_trace_turnoff.q?rev=1310583&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientnegative/mapreduce_stack_trace_turnoff.q (added)
+++ hive/trunk/ql/src/test/queries/clientnegative/mapreduce_stack_trace_turnoff.q Fri Apr
 6 20:56:10 2012
@@ -0,0 +1,5 @@
+set hive.exec.mode.local.auto=false;
+set hive.exec.job.debug.capture.stacktraces=false;
+set hive.exec.failure.hooks=org.apache.hadoop.hive.ql.hooks.VerifySessionStateStackTracesHook;
+
+FROM src SELECT TRANSFORM(key, value) USING 'script_does_not_exist' AS (key, value);

Added: hive/trunk/ql/src/test/results/clientnegative/mapreduce_stack_trace.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientnegative/mapreduce_stack_trace.q.out?rev=1310583&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientnegative/mapreduce_stack_trace.q.out (added)
+++ hive/trunk/ql/src/test/results/clientnegative/mapreduce_stack_trace.q.out Fri Apr  6 20:56:10
2012
@@ -0,0 +1,13 @@
+PREHOOK: query: FROM src SELECT TRANSFORM(key, value) USING 'script_does_not_exist' AS (key,
value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+FATAL ExecMapper: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while
processing row {"key":"238","value":"val_238"}
+Hive Runtime Error while processing row {"key":"238","value":"val_238"}
+FATAL ExecMapper: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while
processing row {"key":"238","value":"val_238"}
+Hive Runtime Error while processing row {"key":"238","value":"val_238"}
+FATAL ExecMapper: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while
processing row {"key":"238","value":"val_238"}
+Hive Runtime Error while processing row {"key":"238","value":"val_238"}
+FATAL ExecMapper: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while
processing row {"key":"238","value":"val_238"}
+Hive Runtime Error while processing row {"key":"238","value":"val_238"}
+FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.MapRedTask

Added: hive/trunk/ql/src/test/results/clientnegative/mapreduce_stack_trace_turnoff.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientnegative/mapreduce_stack_trace_turnoff.q.out?rev=1310583&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientnegative/mapreduce_stack_trace_turnoff.q.out (added)
+++ hive/trunk/ql/src/test/results/clientnegative/mapreduce_stack_trace_turnoff.q.out Fri
Apr  6 20:56:10 2012
@@ -0,0 +1,5 @@
+PREHOOK: query: FROM src SELECT TRANSFORM(key, value) USING 'script_does_not_exist' AS (key,
value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.MapRedTask



Mime
View raw message