hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ser...@apache.org
Subject svn commit: r1668148 - in /hive/branches/llap/ql/src: java/org/apache/hadoop/hive/ql/exec/ java/org/apache/hadoop/hive/ql/io/ java/org/apache/hadoop/hive/ql/optimizer/ java/org/apache/hadoop/hive/ql/parse/ java/org/apache/hadoop/hive/ql/plan/ test/resu...
Date Fri, 20 Mar 2015 21:03:02 GMT
Author: sershe
Date: Fri Mar 20 21:03:02 2015
New Revision: 1668148

URL: http://svn.apache.org/r1668148
Log:
HIVE-10014 : LLAP : investigate showing LLAP IO usage in explain (Sergey Shelukhin)

Modified:
    hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
    hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
    hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
    hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java
    hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java
    hive/branches/llap/ql/src/test/results/clientpositive/orc_llap.q.out

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java?rev=1668148&r1=1668147&r2=1668148&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java Fri Mar 20
21:03:02 2015
@@ -3653,6 +3653,11 @@ public final class Utilities {
     return false;
   }
 
+  public static boolean isVectorMode(Configuration conf, MapWork mapWork) {
+    return HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)
+        && mapWork.getVectorMode();
+  }
+
   public static void clearWorkMapForConf(Configuration conf) {
     // Remove cached query plans for the current query only
     Path mapPath = getPlanPath(conf, MAP_PLAN_NAME);

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java?rev=1668148&r1=1668147&r2=1668148&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java Fri Mar
20 21:03:02 2015
@@ -221,6 +221,16 @@ public class HiveInputFormat<K extends W
     return castInputFormat(llapIo.getInputFormat(inputFormat));
   }
 
+  public static boolean canWrapAnyForLlap(Configuration conf, MapWork mapWork) {
+    // Don't check IO - it needn't be initialized on client.
+    return HiveConf.getBoolVar(conf, ConfVars.LLAP_IO_ENABLED)
+        && Utilities.isVectorMode(conf, mapWork);
+  }
+
+  public static boolean canWrapForLlap(Class<? extends InputFormat> inputFormatClass)
{
+    return LlapWrappableInputFormatInterface.class.isAssignableFrom(inputFormatClass);
+  }
+
   @SuppressWarnings("unchecked")
   private static <T, U, V, W> InputFormat<T, U> castInputFormat(InputFormat<V,
W> from) {
     // This is ugly in two ways...

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java?rev=1668148&r1=1668147&r2=1668148&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
(original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
Fri Mar 20 21:03:02 2015
@@ -34,6 +34,7 @@ import java.util.Set;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
@@ -874,6 +875,45 @@ public final class GenMapRedUtils {
     }
   }
 
+  /**
+   * Called at the end of TaskCompiler::compile to derive final
+   * explain attributes based on previous compilation.
+   */
+  public static void deriveFinalExplainAttributes(
+      Task<? extends Serializable> task, Configuration conf) {
+    // TODO: deriveExplainAttributes should be called here, code is too fragile to move it
around.
+    if (task instanceof ConditionalTask) {
+      for (Task<? extends Serializable> tsk : ((ConditionalTask) task).getListTasks())
{
+        deriveFinalExplainAttributes(tsk, conf);
+      }
+    } else if (task instanceof ExecDriver) {
+      MapredWork work = (MapredWork) task.getWork();
+      work.getMapWork().deriveLlap(conf);
+    } else if (task != null && (task.getWork() instanceof TezWork)) {
+      TezWork work = (TezWork)task.getWork();
+      for (BaseWork w : work.getAllWorkUnsorted()) {
+        if (w instanceof MapWork) {
+          ((MapWork)w).deriveLlap(conf);
+        }
+      }
+    } else if (task instanceof SparkTask) {
+      SparkWork work = (SparkWork) task.getWork();
+      for (BaseWork w : work.getAllWorkUnsorted()) {
+        if (w instanceof MapWork) {
+          ((MapWork) w).deriveLlap(conf);
+        }
+      }
+    }
+
+    if (task.getChildTasks() == null) {
+      return;
+    }
+
+    for (Task<? extends Serializable> childTask : task.getChildTasks()) {
+      deriveFinalExplainAttributes(childTask, conf);
+    }
+  }
+
   public static void internTableDesc(Task<?> task, Interner<TableDesc> interner)
{
 
     if (task instanceof ConditionalTask) {
@@ -1874,4 +1914,5 @@ public final class GenMapRedUtils {
   private GenMapRedUtils() {
     // prevent instantiation
   }
+
 }

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java?rev=1668148&r1=1668147&r2=1668148&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java Fri Mar
20 21:03:02 2015
@@ -292,6 +292,7 @@ public abstract class TaskCompiler {
     Interner<TableDesc> interner = Interners.newStrongInterner();
     for (Task<? extends Serializable> rootTask : rootTasks) {
       GenMapRedUtils.internTableDesc(rootTask, interner);
+      GenMapRedUtils.deriveFinalExplainAttributes(rootTask, pCtx.getConf());
     }
   }
 

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java?rev=1668148&r1=1668147&r2=1668148&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java Fri Mar 20
21:03:02 2015
@@ -32,11 +32,13 @@ import java.util.Set;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
 import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.exec.OperatorUtils;
+import org.apache.hadoop.hive.ql.io.HiveInputFormat;
 import org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.BucketCol;
 import org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.SortCol;
 import org.apache.hadoop.hive.ql.parse.SplitSample;
@@ -128,6 +130,9 @@ public class MapWork extends BaseWork {
 
   private boolean doSplitsGrouping = true;
 
+  /** Whether LLAP IO will be used for inputs. */
+  private String llapIoDesc;
+
   public MapWork() {}
 
   public MapWork(String name) {
@@ -187,18 +192,32 @@ public class MapWork extends BaseWork {
    */
   public void deriveExplainAttributes() {
     if (pathToPartitionInfo != null) {
-      for (Map.Entry<String, PartitionDesc> entry : pathToPartitionInfo
-          .entrySet()) {
+      for (Map.Entry<String, PartitionDesc> entry : pathToPartitionInfo.entrySet())
{
         entry.getValue().deriveBaseFileName(entry.getKey());
       }
     }
-
     MapredLocalWork mapLocalWork = getMapRedLocalWork();
     if (mapLocalWork != null) {
       mapLocalWork.deriveExplainAttributes();
     }
   }
 
+  public void deriveLlap(Configuration conf) {
+    boolean hasLlap = false, hasNonLlap = false;
+    boolean isLlapOn = HiveInputFormat.canWrapAnyForLlap(conf, this);
+    boolean hasPathToPartInfo = (pathToPartitionInfo != null && !pathToPartitionInfo.isEmpty());
+    if (hasPathToPartInfo) {
+      for (PartitionDesc part : pathToPartitionInfo.values()) {
+        boolean isUsingLlapIo = isLlapOn
+            && HiveInputFormat.canWrapForLlap(part.getInputFileFormatClass());
+        hasLlap |= isUsingLlapIo;
+        hasNonLlap |= (!isUsingLlapIo);
+      }
+    }
+    llapIoDesc = isLlapOn ? (hasPathToPartInfo ? ((hasLlap == hasNonLlap) ? "some inputs"
+        : (hasLlap ? "all inputs" : "no inputs")) : "unknown") : null;
+  }
+
   public void internTable(Interner<TableDesc> interner) {
     if (aliasToPartnInfo != null) {
       for (PartitionDesc part : aliasToPartnInfo.values()) {
@@ -245,6 +264,11 @@ public class MapWork extends BaseWork {
     return nameToSplitSample;
   }
 
+  @Explain(displayName = "LLAP IO")
+  public String getLlapIoDesc() {
+    return llapIoDesc;
+  }
+
   public void setNameToSplitSample(HashMap<String, SplitSample> nameToSplitSample)
{
     this.nameToSplitSample = nameToSplitSample;
   }
@@ -316,13 +340,13 @@ public class MapWork extends BaseWork {
   public String getExecutionMode() {
     if (vectorMode) {
       if (llapMode) {
-	if (uberMode) {
-	  return "vectorized, uber";
-	} else {
-	  return "vectorized, llap";
-	}
+        if (uberMode) {
+          return "vectorized, uber";
+        } else {
+          return "vectorized, llap";
+        }
       } else {
-	return "vectorized";
+        return "vectorized";
       }
     } else if (llapMode) {
       return uberMode? "uber" : "llap";

Modified: hive/branches/llap/ql/src/test/results/clientpositive/orc_llap.q.out
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/test/results/clientpositive/orc_llap.q.out?rev=1668148&r1=1668147&r2=1668148&view=diff
==============================================================================
--- hive/branches/llap/ql/src/test/results/clientpositive/orc_llap.q.out (original)
+++ hive/branches/llap/ql/src/test/results/clientpositive/orc_llap.q.out Fri Mar 20 21:03:02
2015
@@ -119,6 +119,7 @@ STAGE PLANS:
                       output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
       Execution mode: vectorized
+      LLAP IO: all inputs
 
   Stage: Stage-0
     Fetch Operator
@@ -188,6 +189,7 @@ STAGE PLANS:
                       output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
       Execution mode: vectorized
+      LLAP IO: all inputs
 
   Stage: Stage-0
     Fetch Operator
@@ -257,6 +259,7 @@ STAGE PLANS:
                       output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
       Execution mode: vectorized
+      LLAP IO: all inputs
 
   Stage: Stage-0
     Fetch Operator
@@ -327,6 +330,7 @@ STAGE PLANS:
                   Statistics: Num rows: 122880 Data size: 29079940 Basic stats: COMPLETE
Column stats: NONE
                   value expressions: _col2 (type: bigint)
       Execution mode: vectorized
+      LLAP IO: all inputs
       Reduce Operator Tree:
         Group By Operator
           aggregations: count(VALUE._col0)
@@ -439,6 +443,7 @@ STAGE PLANS:
                         output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                         serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
       Execution mode: vectorized
+      LLAP IO: all inputs
       Local Work:
         Map Reduce Local Work
 
@@ -547,6 +552,7 @@ STAGE PLANS:
                       output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
       Execution mode: vectorized
+      LLAP IO: all inputs
 
   Stage: Stage-0
     Fetch Operator
@@ -616,6 +622,7 @@ STAGE PLANS:
                       output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
       Execution mode: vectorized
+      LLAP IO: all inputs
 
   Stage: Stage-0
     Fetch Operator
@@ -685,6 +692,7 @@ STAGE PLANS:
                       output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
       Execution mode: vectorized
+      LLAP IO: all inputs
 
   Stage: Stage-0
     Fetch Operator
@@ -755,6 +763,7 @@ STAGE PLANS:
                   Statistics: Num rows: 7989 Data size: 1597800 Basic stats: COMPLETE Column
stats: NONE
                   value expressions: _col2 (type: bigint)
       Execution mode: vectorized
+      LLAP IO: all inputs
       Reduce Operator Tree:
         Group By Operator
           aggregations: count(VALUE._col0)
@@ -867,6 +876,7 @@ STAGE PLANS:
                         output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                         serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
       Execution mode: vectorized
+      LLAP IO: all inputs
       Local Work:
         Map Reduce Local Work
 



Mime
View raw message