hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hashut...@apache.org
Subject svn commit: r1536474 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/exec/vector/VectorReduceSinkOperator.java test/queries/clientpositive/vectorized_shufflejoin.q test/results/clientpositive/vectorized_shufflejoin.q.out
Date Mon, 28 Oct 2013 18:34:59 GMT
Author: hashutosh
Date: Mon Oct 28 18:34:59 2013
New Revision: 1536474

URL: http://svn.apache.org/r1536474
Log:
HIVE-5653 : Vectorized Shuffle Join produces incorrect results (Remus Rusanu via Ashutosh
Chauhan)

Added:
    hive/trunk/ql/src/test/queries/clientpositive/vectorized_shufflejoin.q
    hive/trunk/ql/src/test/results/clientpositive/vectorized_shufflejoin.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorReduceSinkOperator.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorReduceSinkOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorReduceSinkOperator.java?rev=1536474&r1=1536473&r2=1536474&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorReduceSinkOperator.java
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorReduceSinkOperator.java
Mon Oct 28 18:34:59 2013
@@ -178,6 +178,10 @@ public class VectorReduceSinkOperator ex
         numDistributionKeys;
       cachedKeys = new Object[numKeys][keyLen];
       cachedValues = new Object[valueEval.length];
+      
+      int tag = conf.getTag();
+      tagByte[0] = (byte) tag;
+      LOG.info("Using tag = " + tag);
 
     } catch(Exception e) {
       throw new HiveException(e);

Added: hive/trunk/ql/src/test/queries/clientpositive/vectorized_shufflejoin.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/vectorized_shufflejoin.q?rev=1536474&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/vectorized_shufflejoin.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/vectorized_shufflejoin.q Mon Oct 28 18:34:59
2013
@@ -0,0 +1,10 @@
+SET hive.vectorized.execution.enabled=true;
+SET hive.auto.convert.join=false;
+
+EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint)
+  FROM alltypesorc t1
+  JOIN alltypesorc t2 ON t1.cint = t2.cint;
+
+SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint)
+  FROM alltypesorc t1
+  JOIN alltypesorc t2 ON t1.cint = t2.cint;  
\ No newline at end of file

Added: hive/trunk/ql/src/test/results/clientpositive/vectorized_shufflejoin.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/vectorized_shufflejoin.q.out?rev=1536474&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/vectorized_shufflejoin.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/vectorized_shufflejoin.q.out Mon Oct 28
18:34:59 2013
@@ -0,0 +1,149 @@
+PREHOOK: query: EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint)
+  FROM alltypesorc t1
+  JOIN alltypesorc t2 ON t1.cint = t2.cint
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint)
+  FROM alltypesorc t1
+  JOIN alltypesorc t2 ON t1.cint = t2.cint
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME alltypesorc) t1) (TOK_TABREF (TOK_TABNAME
alltypesorc) t2) (= (. (TOK_TABLE_OR_COL t1) cint) (. (TOK_TABLE_OR_COL t2) cint)))) (TOK_INSERT
(TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION COUNT (. (TOK_TABLE_OR_COL
t1) cint))) (TOK_SELEXPR (TOK_FUNCTION MAX (. (TOK_TABLE_OR_COL t2) cint))) (TOK_SELEXPR (TOK_FUNCTION
MIN (. (TOK_TABLE_OR_COL t1) cint))) (TOK_SELEXPR (TOK_FUNCTION AVG (+ (. (TOK_TABLE_OR_COL
t1) cint) (. (TOK_TABLE_OR_COL t2) cint)))))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        t1 
+          TableScan
+            alias: t1
+            Reduce Output Operator
+              key expressions:
+                    expr: cint
+                    type: int
+              sort order: +
+              Map-reduce partition columns:
+                    expr: cint
+                    type: int
+              tag: 0
+              value expressions:
+                    expr: cint
+                    type: int
+              Vectorized execution: true
+        t2 
+          TableScan
+            alias: t2
+            Reduce Output Operator
+              key expressions:
+                    expr: cint
+                    type: int
+              sort order: +
+              Map-reduce partition columns:
+                    expr: cint
+                    type: int
+              tag: 1
+              value expressions:
+                    expr: cint
+                    type: int
+              Vectorized execution: true
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          condition expressions:
+            0 {VALUE._col2}
+            1 {VALUE._col2}
+          handleSkewJoin: false
+          outputColumnNames: _col2, _col16
+          Select Operator
+            expressions:
+                  expr: _col2
+                  type: int
+                  expr: _col16
+                  type: int
+            outputColumnNames: _col2, _col16
+            Group By Operator
+              aggregations:
+                    expr: count(_col2)
+                    expr: max(_col16)
+                    expr: min(_col2)
+                    expr: avg((_col2 + _col16))
+              bucketGroup: false
+              mode: hash
+              outputColumnNames: _col0, _col1, _col2, _col3
+              File Output Operator
+                compressed: false
+                GlobalTableId: 0
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              tag: -1
+              value expressions:
+                    expr: _col0
+                    type: bigint
+                    expr: _col1
+                    type: int
+                    expr: _col2
+                    type: int
+                    expr: _col3
+                    type: struct<count:bigint,sum:double>
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(VALUE._col0)
+                expr: max(VALUE._col1)
+                expr: min(VALUE._col2)
+                expr: avg(VALUE._col3)
+          bucketGroup: false
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2, _col3
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: bigint
+                  expr: _col1
+                  type: int
+                  expr: _col2
+                  type: int
+                  expr: _col3
+                  type: double
+            outputColumnNames: _col0, _col1, _col2, _col3
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint)
+  FROM alltypesorc t1
+  JOIN alltypesorc t2 ON t1.cint = t2.cint
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint)
+  FROM alltypesorc t1
+  JOIN alltypesorc t2 ON t1.cint = t2.cint
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+3152013	1073680599	-1073279343	9.375396162525452E8



Mime
View raw message