hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From xu...@apache.org
Subject [54/61] [abbrv] hive git commit: HIVE-11028: Tez: table self join and join with another table fails with IndexOutOfBoundsException (Jason Dere, reviewed by John Pullokkaran)
Date Sun, 21 Jun 2015 05:26:19 GMT
HIVE-11028: Tez: table self join and join with another table fails with IndexOutOfBoundsException
(Jason Dere, reviewed by John Pullokkaran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e2dd54ab
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e2dd54ab
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e2dd54ab

Branch: refs/heads/spark
Commit: e2dd54ab180b577b08cf6b0e69310ac81fc99fd3
Parents: 56cffb2
Author: Jason Dere <jdere@hortonworks.com>
Authored: Fri Jun 19 15:19:09 2015 -0700
Committer: Jason Dere <jdere@hortonworks.com>
Committed: Fri Jun 19 15:19:09 2015 -0700

----------------------------------------------------------------------
 .../test/resources/testconfiguration.properties |   1 +
 .../hive/ql/optimizer/ConstantPropagate.java    |  12 +-
 .../ql/optimizer/ConstantPropagateProcCtx.java  |  21 ++
 .../optimizer/ConstantPropagateProcFactory.java |  69 ++++++-
 .../hadoop/hive/ql/parse/TezCompiler.java       |   5 +-
 .../test/queries/clientpositive/tez_self_join.q |  32 +++
 .../clientpositive/tez/explainuser_2.q.out      |   4 +-
 .../clientpositive/tez/tez_self_join.q.out      | 205 +++++++++++++++++++
 8 files changed, 343 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/e2dd54ab/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index b9f39fb..7b7559a 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -334,6 +334,7 @@ minitez.query.files=bucket_map_join_tez1.q,\
   tez_join_tests.q,\
   tez_joins_explain.q,\
   tez_schema_evolution.q,\
+  tez_self_join.q,\
   tez_union.q,\
   tez_union2.q,\
   tez_union_dynamic_partition.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/e2dd54ab/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java
index 0027960..b5ee4ef 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java
@@ -43,6 +43,7 @@ import org.apache.hadoop.hive.ql.lib.Node;
 import org.apache.hadoop.hive.ql.lib.NodeProcessor;
 import org.apache.hadoop.hive.ql.lib.Rule;
 import org.apache.hadoop.hive.ql.lib.RuleRegExp;
+import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcCtx.ConstantPropagateOption;
 import org.apache.hadoop.hive.ql.parse.ParseContext;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 
@@ -62,8 +63,15 @@ public class ConstantPropagate implements Transform {
 
   private static final Log LOG = LogFactory.getLog(ConstantPropagate.class);
   protected ParseContext pGraphContext;
+  private ConstantPropagateOption constantPropagateOption;
 
-  public ConstantPropagate() {}
+  public ConstantPropagate() {
+    this(ConstantPropagateOption.FULL);
+  }
+
+  public ConstantPropagate(ConstantPropagateOption option) {
+    this.constantPropagateOption = option;
+  }
 
   /**
    * Transform the query tree.
@@ -76,7 +84,7 @@ public class ConstantPropagate implements Transform {
     pGraphContext = pactx;
 
     // generate pruned column list for all relevant operators
-    ConstantPropagateProcCtx cppCtx = new ConstantPropagateProcCtx();
+    ConstantPropagateProcCtx cppCtx = new ConstantPropagateProcCtx(constantPropagateOption);
 
     // create a walker which walks the tree in a DFS manner while maintaining
     // the operator stack. The dispatcher

http://git-wip-us.apache.org/repos/asf/hive/blob/e2dd54ab/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcCtx.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcCtx.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcCtx.java
index 6bb2a09..f30e330 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcCtx.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcCtx.java
@@ -43,16 +43,28 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
  */
 public class ConstantPropagateProcCtx implements NodeProcessorCtx {
 
+  public enum ConstantPropagateOption {
+    FULL,      // Do full constant propagation
+    SHORTCUT,  // Only perform expression short-cutting - remove unnecessary AND/OR operators
+               // if one of the child conditions is true/false.
+  };
+
   private static final org.apache.commons.logging.Log LOG = LogFactory
       .getLog(ConstantPropagateProcCtx.class);
 
   private final Map<Operator<? extends Serializable>, Map<ColumnInfo, ExprNodeDesc>>
opToConstantExprs;
   private final List<Operator<? extends Serializable>> opToDelete;
+  private ConstantPropagateOption constantPropagateOption = ConstantPropagateOption.FULL;
 
   public ConstantPropagateProcCtx() {
+    this(ConstantPropagateOption.FULL);
+  }
+
+  public ConstantPropagateProcCtx(ConstantPropagateOption option) {
     opToConstantExprs =
         new HashMap<Operator<? extends Serializable>, Map<ColumnInfo, ExprNodeDesc>>();
     opToDelete = new ArrayList<Operator<? extends Serializable>>();
+    this.constantPropagateOption = option;
   }
 
   public Map<Operator<? extends Serializable>, Map<ColumnInfo, ExprNodeDesc>>
getOpToConstantExprs() {
@@ -184,4 +196,13 @@ public class ConstantPropagateProcCtx implements NodeProcessorCtx {
   public List<Operator<? extends Serializable>> getOpToDelete() {
     return opToDelete;
   }
+
+  public ConstantPropagateOption getConstantPropagateOption() {
+    return constantPropagateOption;
+  }
+
+  public void setConstantPropagateOption(
+      ConstantPropagateOption constantPropagateOption) {
+    this.constantPropagateOption = constantPropagateOption;
+  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/e2dd54ab/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
index 4a4814d..f9df8e5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
@@ -48,6 +48,7 @@ import org.apache.hadoop.hive.ql.lib.Node;
 import org.apache.hadoop.hive.ql.lib.NodeProcessor;
 import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcCtx.ConstantPropagateOption;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx;
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
@@ -193,6 +194,72 @@ public final class ConstantPropagateProcFactory {
     }
     return evaluateFunction(funcDesc.getGenericUDF(),funcDesc.getChildren(), funcDesc.getChildren());
   }
+
+  /**
+   * Fold input expression desc.
+   *
+   * @param desc folding expression
+   * @param constants current propagated constant map
+   * @param cppCtx
+   * @param op processing operator
+   * @param propagate if true, assignment expressions will be added to constants.
+   * @return fold expression
+   * @throws UDFArgumentException
+   */
+  private static ExprNodeDesc foldExpr(ExprNodeDesc desc, Map<ColumnInfo, ExprNodeDesc>
constants,
+      ConstantPropagateProcCtx cppCtx, Operator<? extends Serializable> op, int tag,
+      boolean propagate) throws UDFArgumentException {
+    if (cppCtx.getConstantPropagateOption() == ConstantPropagateOption.SHORTCUT) {
+      return foldExprShortcut(desc, constants, cppCtx, op, tag, propagate);
+    }
+    return foldExprFull(desc, constants, cppCtx, op, tag, propagate);
+  }
+
+  /**
+   * Fold input expression desc, only performing short-cutting.
+   *
+   * Unnecessary AND/OR operations involving a constant true/false value will be eliminated.
+   *
+   * @param desc folding expression
+   * @param constants current propagated constant map
+   * @param cppCtx
+   * @param op processing operator
+   * @param propagate if true, assignment expressions will be added to constants.
+   * @return fold expression
+   * @throws UDFArgumentException
+   */
+  private static ExprNodeDesc foldExprShortcut(ExprNodeDesc desc, Map<ColumnInfo, ExprNodeDesc>
constants,
+      ConstantPropagateProcCtx cppCtx, Operator<? extends Serializable> op, int tag,
+      boolean propagate) throws UDFArgumentException {
+    if (desc instanceof ExprNodeGenericFuncDesc) {
+      ExprNodeGenericFuncDesc funcDesc = (ExprNodeGenericFuncDesc) desc;
+
+      GenericUDF udf = funcDesc.getGenericUDF();
+
+      boolean propagateNext = propagate && propagatableUdfs.contains(udf.getClass());
+      List<ExprNodeDesc> newExprs = new ArrayList<ExprNodeDesc>();
+      for (ExprNodeDesc childExpr : desc.getChildren()) {
+        newExprs.add(foldExpr(childExpr, constants, cppCtx, op, tag, propagateNext));
+      }
+
+      // Don't evalulate nondeterministic function since the value can only calculate during
runtime.
+      if (!isDeterministicUdf(udf)) {
+        LOG.debug("Function " + udf.getClass() + " is undeterministic. Don't evalulating
immediately.");
+        ((ExprNodeGenericFuncDesc) desc).setChildren(newExprs);
+        return desc;
+      }
+
+      // Check if the function can be short cut.
+      ExprNodeDesc shortcut = shortcutFunction(udf, newExprs, op);
+      if (shortcut != null) {
+        LOG.debug("Folding expression:" + desc + " -> " + shortcut);
+        return shortcut;
+      }
+      ((ExprNodeGenericFuncDesc) desc).setChildren(newExprs);
+    }
+    return desc;
+  }
+
   /**
    * Fold input expression desc.
    *
@@ -211,7 +278,7 @@ public final class ConstantPropagateProcFactory {
    * @return fold expression
    * @throws UDFArgumentException
    */
-  private static ExprNodeDesc foldExpr(ExprNodeDesc desc, Map<ColumnInfo, ExprNodeDesc>
constants,
+  private static ExprNodeDesc foldExprFull(ExprNodeDesc desc, Map<ColumnInfo, ExprNodeDesc>
constants,
       ConstantPropagateProcCtx cppCtx, Operator<? extends Serializable> op, int tag,
       boolean propagate) throws UDFArgumentException {
     if (desc instanceof ExprNodeGenericFuncDesc) {

http://git-wip-us.apache.org/repos/asf/hive/blob/e2dd54ab/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
index 56707af..03d41f2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
@@ -62,6 +62,7 @@ import org.apache.hadoop.hive.ql.lib.Rule;
 import org.apache.hadoop.hive.ql.lib.RuleRegExp;
 import org.apache.hadoop.hive.ql.metadata.Hive;
 import org.apache.hadoop.hive.ql.optimizer.ConstantPropagate;
+import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcCtx.ConstantPropagateOption;
 import org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin;
 import org.apache.hadoop.hive.ql.optimizer.DynamicPartitionPruningOptimization;
 import org.apache.hadoop.hive.ql.optimizer.MergeJoinProc;
@@ -304,8 +305,10 @@ public class TezCompiler extends TaskCompiler {
 
     // need a new run of the constant folding because we might have created lots
     // of "and true and true" conditions.
+    // Rather than run the full constant folding just need to shortcut AND/OR expressions
+    // involving constant true/false values.
     if(procCtx.conf.getBoolVar(ConfVars.HIVEOPTCONSTANTPROPAGATION)) {
-      new ConstantPropagate().transform(procCtx.parseContext);
+      new ConstantPropagate(ConstantPropagateOption.SHORTCUT).transform(procCtx.parseContext);
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/e2dd54ab/ql/src/test/queries/clientpositive/tez_self_join.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/tez_self_join.q b/ql/src/test/queries/clientpositive/tez_self_join.q
new file mode 100644
index 0000000..a1ef585
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/tez_self_join.q
@@ -0,0 +1,32 @@
+drop table if exists tez_self_join1;
+drop table if exists tez_self_join2;
+
+create table tez_self_join1(id1 int, id2 string, id3 string);
+insert into table tez_self_join1 values(1, 'aa','bb'), (2, 'ab','ab'), (3,'ba','ba');
+
+create table tez_self_join2(id1 int);
+insert into table tez_self_join2 values(1),(2),(3);
+
+explain
+select s.id2, s.id3
+from  
+(
+ select self1.id1, self1.id2, self1.id3
+ from tez_self_join1 self1 join tez_self_join1 self2
+ on self1.id2=self2.id3 ) s
+join tez_self_join2
+on s.id1=tez_self_join2.id1
+where s.id2='ab';
+
+select s.id2, s.id3
+from  
+(
+ select self1.id1, self1.id2, self1.id3
+ from tez_self_join1 self1 join tez_self_join1 self2
+ on self1.id2=self2.id3 ) s
+join tez_self_join2
+on s.id1=tez_self_join2.id1
+where s.id2='ab';
+
+drop table tez_self_join1;
+drop table tez_self_join2;

http://git-wip-us.apache.org/repos/asf/hive/blob/e2dd54ab/ql/src/test/results/clientpositive/tez/explainuser_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/explainuser_2.q.out b/ql/src/test/results/clientpositive/tez/explainuser_2.q.out
index 222e89e..8768198 100644
--- a/ql/src/test/results/clientpositive/tez/explainuser_2.q.out
+++ b/ql/src/test/results/clientpositive/tez/explainuser_2.q.out
@@ -239,7 +239,7 @@ Stage-0
                      |        outputColumnNames:["_col0"]
                      |        Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE
Column stats: NONE
                      |        Filter Operator [FIL_26]
-                     |           predicate:value is not null (type: boolean)
+                     |           predicate:((11.0 = 11.0) and value is not null) (type: boolean)
                      |           Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE
Column stats: NONE
                      |           TableScan [TS_2]
                      |              alias:z
@@ -1442,7 +1442,7 @@ Stage-0
                         outputColumnNames:["_col0"]
                         Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column
stats: NONE
                         Filter Operator [FIL_26]
-                           predicate:value is not null (type: boolean)
+                           predicate:((11.0 = 11.0) and value is not null) (type: boolean)
                            Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE
Column stats: NONE
                            TableScan [TS_2]
                               alias:z

http://git-wip-us.apache.org/repos/asf/hive/blob/e2dd54ab/ql/src/test/results/clientpositive/tez/tez_self_join.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/tez_self_join.q.out b/ql/src/test/results/clientpositive/tez/tez_self_join.q.out
new file mode 100644
index 0000000..be538ac
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/tez_self_join.q.out
@@ -0,0 +1,205 @@
+PREHOOK: query: drop table if exists tez_self_join1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists tez_self_join1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists tez_self_join2
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists tez_self_join2
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table tez_self_join1(id1 int, id2 string, id3 string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tez_self_join1
+POSTHOOK: query: create table tez_self_join1(id1 int, id2 string, id3 string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tez_self_join1
+PREHOOK: query: insert into table tez_self_join1 values(1, 'aa','bb'), (2, 'ab','ab'), (3,'ba','ba')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@tez_self_join1
+POSTHOOK: query: insert into table tez_self_join1 values(1, 'aa','bb'), (2, 'ab','ab'), (3,'ba','ba')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@tez_self_join1
+POSTHOOK: Lineage: tez_self_join1.id1 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1,
type:string, comment:), ]
+POSTHOOK: Lineage: tez_self_join1.id2 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2,
type:string, comment:), ]
+POSTHOOK: Lineage: tez_self_join1.id3 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3,
type:string, comment:), ]
+PREHOOK: query: create table tez_self_join2(id1 int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tez_self_join2
+POSTHOOK: query: create table tez_self_join2(id1 int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tez_self_join2
+PREHOOK: query: insert into table tez_self_join2 values(1),(2),(3)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__2
+PREHOOK: Output: default@tez_self_join2
+POSTHOOK: query: insert into table tez_self_join2 values(1),(2),(3)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__2
+POSTHOOK: Output: default@tez_self_join2
+POSTHOOK: Lineage: tez_self_join2.id1 EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1,
type:string, comment:), ]
+PREHOOK: query: explain
+select s.id2, s.id3
+from  
+(
+ select self1.id1, self1.id2, self1.id3
+ from tez_self_join1 self1 join tez_self_join1 self2
+ on self1.id2=self2.id3 ) s
+join tez_self_join2
+on s.id1=tez_self_join2.id1
+where s.id2='ab'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select s.id2, s.id3
+from  
+(
+ select self1.id1, self1.id2, self1.id3
+ from tez_self_join1 self1 join tez_self_join1 self2
+ on self1.id2=self2.id3 ) s
+join tez_self_join2
+on s.id1=tez_self_join2.id1
+where s.id2='ab'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+        Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: self1
+                  Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats:
NONE
+                  Filter Operator
+                    predicate: ((id2 is not null and id1 is not null) and (id2 = 'ab')) (type:
boolean)
+                    Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats:
NONE
+                    Reduce Output Operator
+                      key expressions: 'ab' (type: string)
+                      sort order: +
+                      Map-reduce partition columns: 'ab' (type: string)
+                      Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats:
NONE
+                      value expressions: id1 (type: int), id3 (type: string)
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: self2
+                  Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats:
NONE
+                  Filter Operator
+                    predicate: (id3 = 'ab') (type: boolean)
+                    Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats:
NONE
+                    Reduce Output Operator
+                      key expressions: 'ab' (type: string)
+                      sort order: +
+                      Map-reduce partition columns: 'ab' (type: string)
+                      Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats:
NONE
+        Map 5 
+            Map Operator Tree:
+                TableScan
+                  alias: tez_self_join2
+                  Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats:
NONE
+                  Filter Operator
+                    predicate: id1 is not null (type: boolean)
+                    Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats:
NONE
+                    Reduce Output Operator
+                      key expressions: id1 (type: int)
+                      sort order: +
+                      Map-reduce partition columns: id1 (type: int)
+                      Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats:
NONE
+        Reducer 2 
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 'ab' (type: string)
+                  1 'ab' (type: string)
+                outputColumnNames: _col0, _col2
+                Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats:
NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats:
NONE
+                  value expressions: _col2 (type: string)
+        Reducer 3 
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 id1 (type: int)
+                outputColumnNames: _col2
+                Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats:
NONE
+                Select Operator
+                  expressions: 'ab' (type: string), _col2 (type: string)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats:
NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats:
NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select s.id2, s.id3
+from  
+(
+ select self1.id1, self1.id2, self1.id3
+ from tez_self_join1 self1 join tez_self_join1 self2
+ on self1.id2=self2.id3 ) s
+join tez_self_join2
+on s.id1=tez_self_join2.id1
+where s.id2='ab'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tez_self_join1
+PREHOOK: Input: default@tez_self_join2
+#### A masked pattern was here ####
+POSTHOOK: query: select s.id2, s.id3
+from  
+(
+ select self1.id1, self1.id2, self1.id3
+ from tez_self_join1 self1 join tez_self_join1 self2
+ on self1.id2=self2.id3 ) s
+join tez_self_join2
+on s.id1=tez_self_join2.id1
+where s.id2='ab'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tez_self_join1
+POSTHOOK: Input: default@tez_self_join2
+#### A masked pattern was here ####
+ab	ab
+PREHOOK: query: drop table tez_self_join1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@tez_self_join1
+PREHOOK: Output: default@tez_self_join1
+POSTHOOK: query: drop table tez_self_join1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@tez_self_join1
+POSTHOOK: Output: default@tez_self_join1
+PREHOOK: query: drop table tez_self_join2
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@tez_self_join2
+PREHOOK: Output: default@tez_self_join2
+POSTHOOK: query: drop table tez_self_join2
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@tez_self_join2
+POSTHOOK: Output: default@tez_self_join2


Mime
View raw message