hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hashut...@apache.org
Subject svn commit: r1509542 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/exec/ java/org/apache/hadoop/hive/ql/optimizer/correlation/ test/queries/clientpositive/ test/results/clientpositive/
Date Fri, 02 Aug 2013 04:52:40 GMT
Author: hashutosh
Date: Fri Aug  2 04:52:39 2013
New Revision: 1509542

URL: http://svn.apache.org/r1509542
Log:
HIVE-4952 : When hive.join.emit.interval is small, queries optimized by Correlation Optimizer
may generate wrong results (Yin Huai via Ashutosh Chauhan)

Added:
    hive/trunk/ql/src/test/queries/clientpositive/correlationoptimizer15.q
    hive/trunk/ql/src/test/results/clientpositive/correlationoptimizer15.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DemuxOperator.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/QueryPlanTreeTransformation.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DemuxOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DemuxOperator.java?rev=1509542&r1=1509541&r2=1509542&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DemuxOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DemuxOperator.java Fri Aug  2 04:52:39
2013
@@ -88,6 +88,9 @@ public class DemuxOperator extends Opera
 
   private int childrenDone;
 
+  // The index of the child which the last row was forwarded to in a key group.
+  private int lastChildIndex;
+
   // Since DemuxOperator may appear multiple times in MuxOperator's parents list.
   // We use newChildIndexTag instead of childOperatorsTag.
   // Example:
@@ -227,18 +230,26 @@ public class DemuxOperator extends Opera
 
   @Override
   public void processOp(Object row, int tag) throws HiveException {
-    int childIndex = newTagToChildIndex.get(tag);
+    int currentChildIndex = newTagToChildIndex.get(tag);
+
+    // Check if we start to forward rows to a new child.
+    // If so, in the current key group, rows will not be forwarded
+    // to those children which have an index less than the currentChildIndex.
+    // We can call flush the buffer of children from lastChildIndex (inclusive)
+    // to currentChildIndex (exclusive) and propagate processGroup to those children.
+    endGroupIfNecessary(currentChildIndex);
+
     int oldTag = newTagToOldTag.get(tag);
     if (isLogInfoEnabled) {
       cntrs[tag]++;
       if (cntrs[tag] == nextCntrs[tag]) {
-        LOG.info(id + " (newTag, childIndex, oldTag)=(" + tag + ", " + childIndex + ", "
+        LOG.info(id + " (newTag, childIndex, oldTag)=(" + tag + ", " + currentChildIndex
+ ", "
             + oldTag + "), forwarding " + cntrs[tag] + " rows");
         nextCntrs[tag] = getNextCntr(cntrs[tag]);
       }
     }
 
-    Operator<? extends OperatorDesc> child = childOperatorsArray[childIndex];
+    Operator<? extends OperatorDesc> child = childOperatorsArray[currentChildIndex];
     if (child.getDone()) {
       childrenDone++;
     } else {
@@ -270,6 +281,36 @@ public class DemuxOperator extends Opera
     }
   }
 
+  /**
+   * We assume that the input rows associated with the same key are ordered by
+   * the tag. Because a tag maps to a childindex, when we see a new childIndex,
+   * we will not see the last childIndex (lastChildIndex) again before we start
+   * a new key group. So, we can call flush the buffer of children
+   * from lastChildIndex (inclusive) to currentChildIndex (exclusive) and
+   * propagate processGroup to those children.
+   * @param currentChildIndex the childIndex we have right now.
+   * @throws HiveException
+   */
+  private void endGroupIfNecessary(int currentChildIndex) throws HiveException {
+    if (lastChildIndex != currentChildIndex) {
+      for (int i = lastChildIndex; i < currentChildIndex; i++) {
+        Operator<? extends OperatorDesc> child = childOperatorsArray[i];
+        child.flush();
+        child.endGroup();
+        for (Integer childTag: newChildOperatorsTag.get(i)) {
+          child.processGroup(childTag);
+        }
+      }
+      lastChildIndex = currentChildIndex;
+    }
+  }
+
+  @Override
+  public void startGroup() throws HiveException {
+    lastChildIndex = 0;
+    super.startGroup();
+  }
+
   @Override
   public void endGroup() throws HiveException {
     if (childOperators == null) {
@@ -280,7 +321,10 @@ public class DemuxOperator extends Opera
       return;
     }
 
-    for (int i = 0; i < childOperatorsArray.length; i++) {
+    // We will start a new key group. We can call flush the buffer
+    // of children from lastChildIndex (inclusive) to the last child and
+    // propagate processGroup to those children.
+    for (int i = lastChildIndex; i < childOperatorsArray.length; i++) {
       Operator<? extends OperatorDesc> child = childOperatorsArray[i];
       child.flush();
       child.endGroup();

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/QueryPlanTreeTransformation.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/QueryPlanTreeTransformation.java?rev=1509542&r1=1509541&r2=1509542&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/QueryPlanTreeTransformation.java
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/QueryPlanTreeTransformation.java
Fri Aug  2 04:52:39 2013
@@ -204,7 +204,6 @@ public class QueryPlanTreeTransformation
           childOP.setParentOperators(Utilities.makeList(mux));
           parentOp.setChildOperators(Utilities.makeList(mux));
         } else {
-          // childOp is a JoinOperator
           List<Operator<? extends OperatorDesc>> parentsOfMux =
               new ArrayList<Operator<? extends OperatorDesc>>();
           List<Operator<? extends OperatorDesc>> siblingOPs =

Added: hive/trunk/ql/src/test/queries/clientpositive/correlationoptimizer15.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/correlationoptimizer15.q?rev=1509542&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/correlationoptimizer15.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/correlationoptimizer15.q Fri Aug  2 04:52:39
2013
@@ -0,0 +1,32 @@
+set hive.auto.convert.join=false;
+set hive.optimize.correlation=false;
+-- When Correlation Optimizer is turned off, 4 MR jobs are needed.
+-- When Correlation Optimizer is turned on, 2 MR jobs are needed.
+-- The first job will evaluate subquery xx and xx join yy.
+EXPLAIN
+SELECT xx.key, xx.cnt, yy.key
+FROM
+(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by
x.key) xx
+JOIN src yy
+ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key;
+
+SELECT xx.key, xx.cnt, yy.key
+FROM
+(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by
x.key) xx
+JOIN src yy
+ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key;
+
+set hive.optimize.correlation=true;
+set hive.join.emit.interval=1;
+EXPLAIN
+SELECT xx.key, xx.cnt, yy.key
+FROM
+(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by
x.key) xx
+JOIN src yy
+ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key;
+
+SELECT xx.key, xx.cnt, yy.key
+FROM
+(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by
x.key) xx
+JOIN src yy
+ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key;

Added: hive/trunk/ql/src/test/results/clientpositive/correlationoptimizer15.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/correlationoptimizer15.q.out?rev=1509542&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/correlationoptimizer15.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/correlationoptimizer15.q.out Fri Aug  2
04:52:39 2013
@@ -0,0 +1,518 @@
+PREHOOK: query: -- When Correlation Optimizer is turned off, 4 MR jobs are needed.
+-- When Correlation Optimizer is turned on, 2 MR jobs are needed.
+-- The first job will evaluate subquery xx and xx join yy.
+EXPLAIN
+SELECT xx.key, xx.cnt, yy.key
+FROM
+(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by
x.key) xx
+JOIN src yy
+ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- When Correlation Optimizer is turned off, 4 MR jobs are needed.
+-- When Correlation Optimizer is turned on, 2 MR jobs are needed.
+-- The first job will evaluate subquery xx and xx join yy.
+EXPLAIN
+SELECT xx.key, xx.cnt, yy.key
+FROM
+(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by
x.key) xx
+JOIN src yy
+ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF
(TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src1) y) (= (. (TOK_TABLE_OR_COL x) key) (.
(TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT
(TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY
(. (TOK_TABLE_OR_COL x) key)))) xx) (TOK_TABREF (TOK_TABNAME src) yy) (= (. (TOK_TABLE_OR_COL
xx) key) (. (TOK_TABLE_OR_COL yy) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE))
(TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL
xx) cnt)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC
(. (TOK_TABLE_OR_COL xx) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_TABSORTCOLNAMEASC
(. (TOK_TABLE_OR_COL yy) key)))))
+
+STAGE DEPENDENCIES:
+  Stage-3 is a root stage
+  Stage-4 depends on stages: Stage-3
+  Stage-1 depends on stages: Stage-4
+  Stage-2 depends on stages: Stage-1
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-3
+    Map Reduce
+      Alias -> Map Operator Tree:
+        xx:x 
+          TableScan
+            alias: x
+            Reduce Output Operator
+              key expressions:
+                    expr: key
+                    type: string
+              sort order: +
+              Map-reduce partition columns:
+                    expr: key
+                    type: string
+              tag: 0
+              value expressions:
+                    expr: key
+                    type: string
+        xx:y 
+          TableScan
+            alias: y
+            Reduce Output Operator
+              key expressions:
+                    expr: key
+                    type: string
+              sort order: +
+              Map-reduce partition columns:
+                    expr: key
+                    type: string
+              tag: 1
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          condition expressions:
+            0 {VALUE._col0}
+            1 
+          handleSkewJoin: false
+          outputColumnNames: _col0
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+            outputColumnNames: _col0
+            Group By Operator
+              aggregations:
+                    expr: count(1)
+              bucketGroup: false
+              keys:
+                    expr: _col0
+                    type: string
+              mode: hash
+              outputColumnNames: _col0, _col1
+              File Output Operator
+                compressed: false
+                GlobalTableId: 0
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+  Stage: Stage-4
+    Map Reduce
+      Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+            Reduce Output Operator
+              key expressions:
+                    expr: _col0
+                    type: string
+              sort order: +
+              Map-reduce partition columns:
+                    expr: _col0
+                    type: string
+              tag: -1
+              value expressions:
+                    expr: _col1
+                    type: bigint
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(VALUE._col0)
+          bucketGroup: false
+          keys:
+                expr: KEY._col0
+                type: string
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: bigint
+            outputColumnNames: _col0, _col1
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        $INTNAME 
+            Reduce Output Operator
+              key expressions:
+                    expr: _col0
+                    type: string
+              sort order: +
+              Map-reduce partition columns:
+                    expr: _col0
+                    type: string
+              tag: 0
+              value expressions:
+                    expr: _col0
+                    type: string
+                    expr: _col1
+                    type: bigint
+        yy 
+          TableScan
+            alias: yy
+            Reduce Output Operator
+              key expressions:
+                    expr: key
+                    type: string
+              sort order: +
+              Map-reduce partition columns:
+                    expr: key
+                    type: string
+              tag: 1
+              value expressions:
+                    expr: key
+                    type: string
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          condition expressions:
+            0 {VALUE._col0} {VALUE._col1}
+            1 {VALUE._col0}
+          handleSkewJoin: false
+          outputColumnNames: _col0, _col1, _col2
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: bigint
+                  expr: _col2
+                  type: string
+            outputColumnNames: _col0, _col1, _col2
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+  Stage: Stage-2
+    Map Reduce
+      Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+            Reduce Output Operator
+              key expressions:
+                    expr: _col0
+                    type: string
+                    expr: _col1
+                    type: bigint
+                    expr: _col2
+                    type: string
+              sort order: +++
+              tag: -1
+              value expressions:
+                    expr: _col0
+                    type: string
+                    expr: _col1
+                    type: bigint
+                    expr: _col2
+                    type: string
+      Reduce Operator Tree:
+        Extract
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: SELECT xx.key, xx.cnt, yy.key
+FROM
+(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by
x.key) xx
+JOIN src yy
+ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT xx.key, xx.cnt, yy.key
+FROM
+(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by
x.key) xx
+JOIN src yy
+ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+128	1	128
+128	1	128
+128	1	128
+146	1	146
+146	1	146
+150	1	150
+213	1	213
+213	1	213
+224	1	224
+224	1	224
+238	1	238
+238	1	238
+255	1	255
+255	1	255
+273	1	273
+273	1	273
+273	1	273
+278	1	278
+278	1	278
+311	1	311
+311	1	311
+311	1	311
+369	1	369
+369	1	369
+369	1	369
+401	1	401
+401	1	401
+401	1	401
+401	1	401
+401	1	401
+406	1	406
+406	1	406
+406	1	406
+406	1	406
+66	1	66
+98	1	98
+98	1	98
+PREHOOK: query: EXPLAIN
+SELECT xx.key, xx.cnt, yy.key
+FROM
+(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by
x.key) xx
+JOIN src yy
+ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT xx.key, xx.cnt, yy.key
+FROM
+(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by
x.key) xx
+JOIN src yy
+ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF
(TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src1) y) (= (. (TOK_TABLE_OR_COL x) key) (.
(TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT
(TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY
(. (TOK_TABLE_OR_COL x) key)))) xx) (TOK_TABREF (TOK_TABNAME src) yy) (= (. (TOK_TABLE_OR_COL
xx) key) (. (TOK_TABLE_OR_COL yy) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE))
(TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL
xx) cnt)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC
(. (TOK_TABLE_OR_COL xx) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_TABSORTCOLNAMEASC
(. (TOK_TABLE_OR_COL yy) key)))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        xx:x 
+          TableScan
+            alias: x
+            Reduce Output Operator
+              key expressions:
+                    expr: key
+                    type: string
+              sort order: +
+              Map-reduce partition columns:
+                    expr: key
+                    type: string
+              tag: 0
+              value expressions:
+                    expr: key
+                    type: string
+        xx:y 
+          TableScan
+            alias: y
+            Reduce Output Operator
+              key expressions:
+                    expr: key
+                    type: string
+              sort order: +
+              Map-reduce partition columns:
+                    expr: key
+                    type: string
+              tag: 1
+        yy 
+          TableScan
+            alias: yy
+            Reduce Output Operator
+              key expressions:
+                    expr: key
+                    type: string
+              sort order: +
+              Map-reduce partition columns:
+                    expr: key
+                    type: string
+              tag: 2
+              value expressions:
+                    expr: key
+                    type: string
+      Reduce Operator Tree:
+        Demux Operator
+          Join Operator
+            condition map:
+                 Inner Join 0 to 1
+            condition expressions:
+              0 {VALUE._col0}
+              1 
+            handleSkewJoin: false
+            outputColumnNames: _col0
+            Select Operator
+              expressions:
+                    expr: _col0
+                    type: string
+              outputColumnNames: _col0
+              Mux Operator
+                Group By Operator
+                  aggregations:
+                        expr: count(1)
+                  bucketGroup: false
+                  keys:
+                        expr: _col0
+                        type: string
+                  mode: complete
+                  outputColumnNames: _col0, _col1
+                  Select Operator
+                    expressions:
+                          expr: _col0
+                          type: string
+                          expr: _col1
+                          type: bigint
+                    outputColumnNames: _col0, _col1
+                    Mux Operator
+                      Join Operator
+                        condition map:
+                             Inner Join 0 to 1
+                        condition expressions:
+                          0 {VALUE._col0} {VALUE._col1}
+                          1 {VALUE._col0}
+                        handleSkewJoin: false
+                        outputColumnNames: _col0, _col1, _col2
+                        Select Operator
+                          expressions:
+                                expr: _col0
+                                type: string
+                                expr: _col1
+                                type: bigint
+                                expr: _col2
+                                type: string
+                          outputColumnNames: _col0, _col1, _col2
+                          File Output Operator
+                            compressed: false
+                            GlobalTableId: 0
+                            table:
+                                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+          Mux Operator
+            Join Operator
+              condition map:
+                   Inner Join 0 to 1
+              condition expressions:
+                0 {VALUE._col0} {VALUE._col1}
+                1 {VALUE._col0}
+              handleSkewJoin: false
+              outputColumnNames: _col0, _col1, _col2
+              Select Operator
+                expressions:
+                      expr: _col0
+                      type: string
+                      expr: _col1
+                      type: bigint
+                      expr: _col2
+                      type: string
+                outputColumnNames: _col0, _col1, _col2
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+  Stage: Stage-2
+    Map Reduce
+      Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+            Reduce Output Operator
+              key expressions:
+                    expr: _col0
+                    type: string
+                    expr: _col1
+                    type: bigint
+                    expr: _col2
+                    type: string
+              sort order: +++
+              tag: -1
+              value expressions:
+                    expr: _col0
+                    type: string
+                    expr: _col1
+                    type: bigint
+                    expr: _col2
+                    type: string
+      Reduce Operator Tree:
+        Extract
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: SELECT xx.key, xx.cnt, yy.key
+FROM
+(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by
x.key) xx
+JOIN src yy
+ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT xx.key, xx.cnt, yy.key
+FROM
+(SELECT x.key as key, count(1) as cnt FROM src1 x JOIN src1 y ON (x.key = y.key) group by
x.key) xx
+JOIN src yy
+ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+128	1	128
+128	1	128
+128	1	128
+146	1	146
+146	1	146
+150	1	150
+213	1	213
+213	1	213
+224	1	224
+224	1	224
+238	1	238
+238	1	238
+255	1	255
+255	1	255
+273	1	273
+273	1	273
+273	1	273
+278	1	278
+278	1	278
+311	1	311
+311	1	311
+311	1	311
+369	1	369
+369	1	369
+369	1	369
+401	1	401
+401	1	401
+401	1	401
+401	1	401
+401	1	401
+406	1	406
+406	1	406
+406	1	406
+406	1	406
+66	1	66
+98	1	98
+98	1	98



Mime
View raw message