hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jd...@apache.org
Subject hive git commit: HIVE-9908: vectorization error binary type not supported, group by with binary columns (Matt McCline via Jason Dere)
Date Tue, 05 May 2015 04:55:51 GMT
Repository: hive
Updated Branches:
  refs/heads/master ce736af2a -> 3bf41faa0


HIVE-9908: vectorization error binary type not supported, group by with binary columns (Matt
McCline via Jason Dere)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3bf41faa
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3bf41faa
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3bf41faa

Branch: refs/heads/master
Commit: 3bf41faa04284b1f50adcd7da50dbb74664a8396
Parents: ce736af
Author: Jason Dere <jdere@hortonworks.com>
Authored: Mon May 4 21:55:19 2015 -0700
Committer: Jason Dere <jdere@hortonworks.com>
Committed: Mon May 4 21:55:19 2015 -0700

----------------------------------------------------------------------
 .../test/resources/testconfiguration.properties |   1 +
 .../ql/exec/vector/VectorColumnSetInfo.java     |   3 +-
 .../clientpositive/vector_binary_join_groupby.q |  55 ++++
 .../tez/vector_binary_join_groupby.q.out        | 303 +++++++++++++++++++
 .../vector_binary_join_groupby.q.out            | 293 ++++++++++++++++++
 5 files changed, 654 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/3bf41faa/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index b2a6e58..8e9984a 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -184,6 +184,7 @@ minitez.query.files.shared=alter_merge_2_orc.q,\
   update_two_cols.q,\
   vector_aggregate_9.q,\
   vector_between_in.q,\
+  vector_binary_join_groupby.q,\
   vector_bucket.q,\
   vector_cast_constant.q,\
   vector_char_2.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/3bf41faa/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java
index d9c16dc..8c4b6ea 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java
@@ -126,7 +126,8 @@ public class VectorColumnSetInfo {
       doubleIndices[doubleIndicesIndex] = addIndex;
       indexLookup[addIndex].setDouble(doubleIndicesIndex);
       ++doubleIndicesIndex;
-    } else if (VectorizationContext.isStringFamily(outputType)) {
+    } else if (VectorizationContext.isStringFamily(outputType) ||
+        outputType.equalsIgnoreCase("binary")) {
       stringIndices[stringIndicesIndex]= addIndex;
       indexLookup[addIndex].setString(stringIndicesIndex);
       ++stringIndicesIndex;

http://git-wip-us.apache.org/repos/asf/hive/blob/3bf41faa/ql/src/test/queries/clientpositive/vector_binary_join_groupby.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_binary_join_groupby.q b/ql/src/test/queries/clientpositive/vector_binary_join_groupby.q
new file mode 100644
index 0000000..3bdfd8c
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_binary_join_groupby.q
@@ -0,0 +1,55 @@
+SET hive.auto.convert.join=true;
+SET hive.auto.convert.join.noconditionaltask=true;
+SET hive.auto.convert.join.noconditionaltask.size=1000000000;
+SET hive.vectorized.execution.enabled=true;
+
+DROP TABLE over1k;
+DROP TABLE hundredorc;
+
+-- data setup
+CREATE TABLE over1k(t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp,
+           dec decimal(4,2),
+           bin binary)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k;
+
+CREATE TABLE hundredorc(t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp,
+           dec decimal(4,2),
+           bin binary)
+STORED AS ORC;
+
+INSERT INTO TABLE hundredorc SELECT * FROM over1k LIMIT 100;
+
+EXPLAIN 
+SELECT sum(hash(*))
+FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin;
+
+SELECT sum(hash(*))
+FROM hundredorc t1 JOIN hundredorc t2 ON t2.bin = t2.bin;
+
+EXPLAIN 
+SELECT count(*), bin
+FROM hundredorc
+GROUP BY bin;
+
+SELECT count(*), bin
+FROM hundredorc
+GROUP BY bin;

http://git-wip-us.apache.org/repos/asf/hive/blob/3bf41faa/ql/src/test/results/clientpositive/tez/vector_binary_join_groupby.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_binary_join_groupby.q.out b/ql/src/test/results/clientpositive/tez/vector_binary_join_groupby.q.out
new file mode 100644
index 0000000..8dcd40d
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/vector_binary_join_groupby.q.out
@@ -0,0 +1,303 @@
+PREHOOK: query: DROP TABLE over1k
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE over1k
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: DROP TABLE hundredorc
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE hundredorc
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: -- data setup
+CREATE TABLE over1k(t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp,
+           dec decimal(4,2),
+           bin binary)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@over1k
+POSTHOOK: query: -- data setup
+CREATE TABLE over1k(t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp,
+           dec decimal(4,2),
+           bin binary)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@over1k
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@over1k
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@over1k
+PREHOOK: query: CREATE TABLE hundredorc(t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp,
+           dec decimal(4,2),
+           bin binary)
+STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@hundredorc
+POSTHOOK: query: CREATE TABLE hundredorc(t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp,
+           dec decimal(4,2),
+           bin binary)
+STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@hundredorc
+PREHOOK: query: INSERT INTO TABLE hundredorc SELECT * FROM over1k LIMIT 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@over1k
+PREHOOK: Output: default@hundredorc
+POSTHOOK: query: INSERT INTO TABLE hundredorc SELECT * FROM over1k LIMIT 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@over1k
+POSTHOOK: Output: default@hundredorc
+POSTHOOK: Lineage: hundredorc.b SIMPLE [(over1k)over1k.FieldSchema(name:b, type:bigint, comment:null),
]
+POSTHOOK: Lineage: hundredorc.bin SIMPLE [(over1k)over1k.FieldSchema(name:bin, type:binary,
comment:null), ]
+POSTHOOK: Lineage: hundredorc.bo SIMPLE [(over1k)over1k.FieldSchema(name:bo, type:boolean,
comment:null), ]
+POSTHOOK: Lineage: hundredorc.d SIMPLE [(over1k)over1k.FieldSchema(name:d, type:double, comment:null),
]
+POSTHOOK: Lineage: hundredorc.dec SIMPLE [(over1k)over1k.FieldSchema(name:dec, type:decimal(4,2),
comment:null), ]
+POSTHOOK: Lineage: hundredorc.f SIMPLE [(over1k)over1k.FieldSchema(name:f, type:float, comment:null),
]
+POSTHOOK: Lineage: hundredorc.i SIMPLE [(over1k)over1k.FieldSchema(name:i, type:int, comment:null),
]
+POSTHOOK: Lineage: hundredorc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:string, comment:null),
]
+POSTHOOK: Lineage: hundredorc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint,
comment:null), ]
+POSTHOOK: Lineage: hundredorc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint,
comment:null), ]
+POSTHOOK: Lineage: hundredorc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp,
comment:null), ]
+PREHOOK: query: EXPLAIN 
+SELECT sum(hash(*))
+FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN 
+SELECT sum(hash(*))
+FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Map 1 <- Map 3 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column
stats: NONE
+                  Filter Operator
+                    predicate: bin is not null (type: boolean)
+                    Statistics: Num rows: 50 Data size: 14819 Basic stats: COMPLETE Column
stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Inner Join 0 to 1
+                      keys:
+                        0 bin (type: binary)
+                        1 bin (type: binary)
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6,
_col7, _col8, _col9, _col10, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21,
_col22, _col23, _col24
+                      input vertices:
+                        1 Map 3
+                      Statistics: Num rows: 55 Data size: 16300 Basic stats: COMPLETE Column
stats: NONE
+                      HybridGraceHashJoin: true
+                      Group By Operator
+                        aggregations: sum(hash(_col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23,_col24))
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column
stats: NONE
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column
stats: NONE
+                          value expressions: _col0 (type: bigint)
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: t2
+                  Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column
stats: NONE
+                  Filter Operator
+                    predicate: bin is not null (type: boolean)
+                    Statistics: Num rows: 50 Data size: 14819 Basic stats: COMPLETE Column
stats: NONE
+                    Reduce Output Operator
+                      key expressions: bin (type: binary)
+                      sort order: +
+                      Map-reduce partition columns: bin (type: binary)
+                      Statistics: Num rows: 50 Data size: 14819 Basic stats: COMPLETE Column
stats: NONE
+                      value expressions: t (type: tinyint), si (type: smallint), i (type:
int), b (type: bigint), f (type: float), d (type: double), bo (type: boolean), s (type: string),
ts (type: timestamp), dec (type: decimal(4,2))
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats:
NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats:
NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+Warning: Map Join MAPJOIN[13][bigTable=t1] in task 'Map 1' is a cross product
+PREHOOK: query: SELECT sum(hash(*))
+FROM hundredorc t1 JOIN hundredorc t2 ON t2.bin = t2.bin
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hundredorc
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(*))
+FROM hundredorc t1 JOIN hundredorc t2 ON t2.bin = t2.bin
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hundredorc
+#### A masked pattern was here ####
+-107801098240
+PREHOOK: query: EXPLAIN 
+SELECT count(*), bin
+FROM hundredorc
+GROUP BY bin
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN 
+SELECT count(*), bin
+FROM hundredorc
+GROUP BY bin
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: hundredorc
+                  Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column
stats: NONE
+                  Select Operator
+                    expressions: bin (type: binary)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column
stats: NONE
+                    Group By Operator
+                      aggregations: count()
+                      keys: _col0 (type: binary)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column
stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: binary)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: binary)
+                        Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE
Column stats: NONE
+                        value expressions: _col1 (type: bigint)
+            Execution mode: vectorized
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: binary)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 50 Data size: 14819 Basic stats: COMPLETE Column stats:
NONE
+                Select Operator
+                  expressions: _col1 (type: bigint), _col0 (type: binary)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 50 Data size: 14819 Basic stats: COMPLETE Column
stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 50 Data size: 14819 Basic stats: COMPLETE Column
stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT count(*), bin
+FROM hundredorc
+GROUP BY bin
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hundredorc
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT count(*), bin
+FROM hundredorc
+GROUP BY bin
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hundredorc
+#### A masked pattern was here ####
+5	american history
+5	biology
+2	chemistry
+2	debate
+4	education
+5	forestry
+4	geology
+5	history
+6	industrial engineering
+3	joggying
+5	kindergarten
+1	linguistics
+9	mathematics
+8	nap time
+1	opthamology
+2	philosophy
+5	quiet hour
+4	religion
+3	study skills
+7	topology
+1	undecided
+2	values clariffication
+3	wind surfing
+3	xylophone band
+2	yard duty
+3	zync studies

http://git-wip-us.apache.org/repos/asf/hive/blob/3bf41faa/ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out b/ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out
new file mode 100644
index 0000000..c3e4d52
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out
@@ -0,0 +1,293 @@
+PREHOOK: query: DROP TABLE over1k
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE over1k
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: DROP TABLE hundredorc
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE hundredorc
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: -- data setup
+CREATE TABLE over1k(t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp,
+           dec decimal(4,2),
+           bin binary)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@over1k
+POSTHOOK: query: -- data setup
+CREATE TABLE over1k(t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp,
+           dec decimal(4,2),
+           bin binary)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@over1k
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@over1k
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@over1k
+PREHOOK: query: CREATE TABLE hundredorc(t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp,
+           dec decimal(4,2),
+           bin binary)
+STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@hundredorc
+POSTHOOK: query: CREATE TABLE hundredorc(t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp,
+           dec decimal(4,2),
+           bin binary)
+STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@hundredorc
+PREHOOK: query: INSERT INTO TABLE hundredorc SELECT * FROM over1k LIMIT 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@over1k
+PREHOOK: Output: default@hundredorc
+POSTHOOK: query: INSERT INTO TABLE hundredorc SELECT * FROM over1k LIMIT 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@over1k
+POSTHOOK: Output: default@hundredorc
+POSTHOOK: Lineage: hundredorc.b SIMPLE [(over1k)over1k.FieldSchema(name:b, type:bigint, comment:null),
]
+POSTHOOK: Lineage: hundredorc.bin SIMPLE [(over1k)over1k.FieldSchema(name:bin, type:binary,
comment:null), ]
+POSTHOOK: Lineage: hundredorc.bo SIMPLE [(over1k)over1k.FieldSchema(name:bo, type:boolean,
comment:null), ]
+POSTHOOK: Lineage: hundredorc.d SIMPLE [(over1k)over1k.FieldSchema(name:d, type:double, comment:null),
]
+POSTHOOK: Lineage: hundredorc.dec SIMPLE [(over1k)over1k.FieldSchema(name:dec, type:decimal(4,2),
comment:null), ]
+POSTHOOK: Lineage: hundredorc.f SIMPLE [(over1k)over1k.FieldSchema(name:f, type:float, comment:null),
]
+POSTHOOK: Lineage: hundredorc.i SIMPLE [(over1k)over1k.FieldSchema(name:i, type:int, comment:null),
]
+POSTHOOK: Lineage: hundredorc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:string, comment:null),
]
+POSTHOOK: Lineage: hundredorc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint,
comment:null), ]
+POSTHOOK: Lineage: hundredorc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint,
comment:null), ]
+POSTHOOK: Lineage: hundredorc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp,
comment:null), ]
+PREHOOK: query: EXPLAIN 
+SELECT sum(hash(*))
+FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN 
+SELECT sum(hash(*))
+FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-5 is a root stage
+  Stage-2 depends on stages: Stage-5
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-5
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        t1 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        t1 
+          TableScan
+            alias: t1
+            Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats:
NONE
+            Filter Operator
+              predicate: bin is not null (type: boolean)
+              Statistics: Num rows: 50 Data size: 14819 Basic stats: COMPLETE Column stats:
NONE
+              HashTable Sink Operator
+                keys:
+                  0 bin (type: binary)
+                  1 bin (type: binary)
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: t2
+            Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats:
NONE
+            Filter Operator
+              predicate: bin is not null (type: boolean)
+              Statistics: Num rows: 50 Data size: 14819 Basic stats: COMPLETE Column stats:
NONE
+              Map Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 bin (type: binary)
+                  1 bin (type: binary)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7,
_col8, _col9, _col10, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22,
_col23, _col24
+                Statistics: Num rows: 55 Data size: 16300 Basic stats: COMPLETE Column stats:
NONE
+                Group By Operator
+                  aggregations: sum(hash(_col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23,_col24))
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats:
NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats:
NONE
+                    value expressions: _col0 (type: bigint)
+      Local Work:
+        Map Reduce Local Work
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: sum(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Stage-2:MAPRED' is a cross product
+PREHOOK: query: SELECT sum(hash(*))
+FROM hundredorc t1 JOIN hundredorc t2 ON t2.bin = t2.bin
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hundredorc
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(*))
+FROM hundredorc t1 JOIN hundredorc t2 ON t2.bin = t2.bin
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hundredorc
+#### A masked pattern was here ####
+-107801098240
+PREHOOK: query: EXPLAIN 
+SELECT count(*), bin
+FROM hundredorc
+GROUP BY bin
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN 
+SELECT count(*), bin
+FROM hundredorc
+GROUP BY bin
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: hundredorc
+            Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats:
NONE
+            Select Operator
+              expressions: bin (type: binary)
+              outputColumnNames: _col0
+              Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats:
NONE
+              Group By Operator
+                aggregations: count()
+                keys: _col0 (type: binary)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats:
NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: binary)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: binary)
+                  Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column
stats: NONE
+                  value expressions: _col1 (type: bigint)
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          keys: KEY._col0 (type: binary)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 50 Data size: 14819 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col1 (type: bigint), _col0 (type: binary)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 50 Data size: 14819 Basic stats: COMPLETE Column stats:
NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 50 Data size: 14819 Basic stats: COMPLETE Column stats:
NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT count(*), bin
+FROM hundredorc
+GROUP BY bin
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hundredorc
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT count(*), bin
+FROM hundredorc
+GROUP BY bin
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hundredorc
+#### A masked pattern was here ####
+5	american history
+5	biology
+2	chemistry
+2	debate
+4	education
+5	forestry
+4	geology
+5	history
+6	industrial engineering
+3	joggying
+5	kindergarten
+1	linguistics
+9	mathematics
+8	nap time
+1	opthamology
+2	philosophy
+5	quiet hour
+4	religion
+3	study skills
+7	topology
+1	undecided
+2	values clariffication
+3	wind surfing
+3	xylophone band
+2	yard duty
+3	zync studies


Mime
View raw message