hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From gop...@apache.org
Subject hive git commit: HIVE-11054: Handle varchar/char partition columns in vectorization (Gopal V, reviewed by Gunther Hagleitner)
Date Mon, 06 Jul 2015 23:27:26 GMT
Repository: hive
Updated Branches:
  refs/heads/branch-1 693ccf5d8 -> ab5cf37dd


HIVE-11054: Handle varchar/char partition columns in vectorization (Gopal V, reviewed by Gunther
Hagleitner)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ab5cf37d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ab5cf37d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ab5cf37d

Branch: refs/heads/branch-1
Commit: ab5cf37ddbb8b3c8d4619ad6760fca01b259b40b
Parents: 693ccf5
Author: Gopal V <gopalv@apache.org>
Authored: Mon Jul 6 16:25:13 2015 -0700
Committer: Gopal V <gopalv@apache.org>
Committed: Mon Jul 6 16:25:13 2015 -0700

----------------------------------------------------------------------
 .../ql/exec/vector/VectorizedRowBatchCtx.java   |  2 +-
 .../clientpositive/vectorization_part_varchar.q |  7 ++
 .../vectorization_part_varchar.q.out            | 72 ++++++++++++++++++++
 3 files changed, 80 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/ab5cf37d/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
index db382cd..82d4a8f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
@@ -591,7 +591,7 @@ public class VectorizedRowBatchCtx {
         case CHAR:
         case VARCHAR: {
           BytesColumnVector bcv = (BytesColumnVector) batch.cols[colIndex];
-          String sVal = (String) value;
+          String sVal = value.toString();
           if (sVal == null) {
             bcv.noNulls = false;
             bcv.isNull[0] = true;

http://git-wip-us.apache.org/repos/asf/hive/blob/ab5cf37d/ql/src/test/queries/clientpositive/vectorization_part_varchar.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vectorization_part_varchar.q b/ql/src/test/queries/clientpositive/vectorization_part_varchar.q
new file mode 100644
index 0000000..e78f9f5
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vectorization_part_varchar.q
@@ -0,0 +1,7 @@
+SET hive.vectorized.execution.enabled=true;
+CREATE TABLE alltypesorc_part_varchar(ctinyint tinyint, csmallint smallint, cint int, cbigint
bigint, cfloat float, cdouble double, cstring1 string, cstring2 string, ctimestamp1 timestamp,
ctimestamp2 timestamp, cboolean1 boolean, cboolean2 boolean) partitioned by (ds varchar(4))
STORED AS ORC;
+insert overwrite table alltypesorc_part_varchar partition (ds='2011') select * from alltypesorc
limit 100;
+insert overwrite table alltypesorc_part_varchar partition (ds='2012') select * from alltypesorc
limit 100;
+
+select count(cdouble), cint from alltypesorc_part_varchar where ds='2011' group by cint limit
10;
+select count(*) from alltypesorc_part_varchar A join alltypesorc_part_varchar B on A.ds=B.ds;

http://git-wip-us.apache.org/repos/asf/hive/blob/ab5cf37d/ql/src/test/results/clientpositive/vectorization_part_varchar.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vectorization_part_varchar.q.out b/ql/src/test/results/clientpositive/vectorization_part_varchar.q.out
new file mode 100644
index 0000000..c351de7
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vectorization_part_varchar.q.out
@@ -0,0 +1,72 @@
+PREHOOK: query: CREATE TABLE alltypesorc_part_varchar(ctinyint tinyint, csmallint smallint,
cint int, cbigint bigint, cfloat float, cdouble double, cstring1 string, cstring2 string,
ctimestamp1 timestamp, ctimestamp2 timestamp, cboolean1 boolean, cboolean2 boolean) partitioned
by (ds varchar(4)) STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@alltypesorc_part_varchar
+POSTHOOK: query: CREATE TABLE alltypesorc_part_varchar(ctinyint tinyint, csmallint smallint,
cint int, cbigint bigint, cfloat float, cdouble double, cstring1 string, cstring2 string,
ctimestamp1 timestamp, ctimestamp2 timestamp, cboolean1 boolean, cboolean2 boolean) partitioned
by (ds varchar(4)) STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@alltypesorc_part_varchar
+PREHOOK: query: insert overwrite table alltypesorc_part_varchar partition (ds='2011') select
* from alltypesorc limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@alltypesorc_part_varchar@ds=2011
+POSTHOOK: query: insert overwrite table alltypesorc_part_varchar partition (ds='2011') select
* from alltypesorc limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@alltypesorc_part_varchar@ds=2011
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2011).cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint,
type:bigint, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2011).cboolean1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cboolean1,
type:boolean, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2011).cboolean2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cboolean2,
type:boolean, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2011).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble,
type:double, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2011).cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat,
type:float, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2011).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint,
type:int, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2011).csmallint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:csmallint,
type:smallint, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2011).cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1,
type:string, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2011).cstring2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring2,
type:string, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2011).ctimestamp1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1,
type:timestamp, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2011).ctimestamp2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2,
type:timestamp, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2011).ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint,
type:tinyint, comment:null), ]
+PREHOOK: query: insert overwrite table alltypesorc_part_varchar partition (ds='2012') select
* from alltypesorc limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@alltypesorc_part_varchar@ds=2012
+POSTHOOK: query: insert overwrite table alltypesorc_part_varchar partition (ds='2012') select
* from alltypesorc limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@alltypesorc_part_varchar@ds=2012
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2012).cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint,
type:bigint, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2012).cboolean1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cboolean1,
type:boolean, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2012).cboolean2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cboolean2,
type:boolean, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2012).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble,
type:double, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2012).cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat,
type:float, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2012).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint,
type:int, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2012).csmallint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:csmallint,
type:smallint, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2012).cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1,
type:string, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2012).cstring2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring2,
type:string, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2012).ctimestamp1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1,
type:timestamp, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2012).ctimestamp2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2,
type:timestamp, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2012).ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint,
type:tinyint, comment:null), ]
+PREHOOK: query: select count(cdouble), cint from alltypesorc_part_varchar where ds='2011'
group by cint limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc_part_varchar
+PREHOOK: Input: default@alltypesorc_part_varchar@ds=2011
+#### A masked pattern was here ####
+POSTHOOK: query: select count(cdouble), cint from alltypesorc_part_varchar where ds='2011'
group by cint limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc_part_varchar
+POSTHOOK: Input: default@alltypesorc_part_varchar@ds=2011
+#### A masked pattern was here ####
+100	528534767
+PREHOOK: query: select count(*) from alltypesorc_part_varchar A join alltypesorc_part_varchar
B on A.ds=B.ds
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc_part_varchar
+PREHOOK: Input: default@alltypesorc_part_varchar@ds=2011
+PREHOOK: Input: default@alltypesorc_part_varchar@ds=2012
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from alltypesorc_part_varchar A join alltypesorc_part_varchar
B on A.ds=B.ds
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc_part_varchar
+POSTHOOK: Input: default@alltypesorc_part_varchar@ds=2011
+POSTHOOK: Input: default@alltypesorc_part_varchar@ds=2012
+#### A masked pattern was here ####
+20000


Mime
View raw message