Return-Path: X-Original-To: apmail-hive-commits-archive@www.apache.org Delivered-To: apmail-hive-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 8583C193B6 for ; Sun, 10 Apr 2016 06:02:25 +0000 (UTC) Received: (qmail 82376 invoked by uid 500); 10 Apr 2016 06:02:25 -0000 Delivered-To: apmail-hive-commits-archive@hive.apache.org Received: (qmail 82335 invoked by uid 500); 10 Apr 2016 06:02:25 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 82107 invoked by uid 99); 10 Apr 2016 06:02:25 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 10 Apr 2016 06:02:25 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 0ABD6DFF41; Sun, 10 Apr 2016 06:02:25 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: mmccline@apache.org To: commits@hive.apache.org Date: Sun, 10 Apr 2016 06:02:25 -0000 Message-Id: In-Reply-To: References: X-Mailer: ASF-Git Admin Mailer Subject: [02/16] hive git commit: HIVE-9862 Vectorized execution corrupts timestamp values (Matt McCline, reviewed by Jason Dere) http://git-wip-us.apache.org/repos/asf/hive/blob/4a479d0c/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorOrcFile.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorOrcFile.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorOrcFile.java index f5b06db..4ca20c5 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorOrcFile.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorOrcFile.java @@ -19,10 +19,12 @@ package org.apache.hadoop.hive.ql.io.orc; import com.google.common.collect.Lists; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.PisaTimestamp; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; @@ -30,6 +32,7 @@ import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector; import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampUtils; import org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -518,13 +521,13 @@ public class TestVectorOrcFile { tslist.add(Timestamp.valueOf("2008-10-02 00:00:00")); VectorizedRowBatch batch = new VectorizedRowBatch(1, 1024); - LongColumnVector vec = new LongColumnVector(1024); + TimestampColumnVector vec = new TimestampColumnVector(1024); batch.cols[0] = vec; batch.reset(); batch.size = tslist.size(); for (int i=0; i < tslist.size(); ++i) { Timestamp ts = tslist.get(i); - vec.vector[i] = TimestampUtils.getTimeNanoSec(ts); + vec.set(i, new PisaTimestamp(ts)); } writer.addRowBatch(batch); writer.close(); @@ -1340,9 +1343,10 @@ public class TestVectorOrcFile { batch.size = 1000; for (int year = minYear; year < maxYear; ++year) { for (int ms = 1000; ms < 2000; ++ms) { - ((LongColumnVector) batch.cols[0]).vector[ms - 1000] = - TimestampUtils.getTimeNanoSec(Timestamp.valueOf(year + - "-05-05 12:34:56." + ms)); + TimestampColumnVector timestampColVector = (TimestampColumnVector) batch.cols[0]; + timestampColVector.set(ms - 1000, + new PisaTimestamp(Timestamp.valueOf(year + + "-05-05 12:34:56." + ms))); ((LongColumnVector) batch.cols[1]).vector[ms - 1000] = new DateWritable(new Date(year - 1900, 11, 25)).getDays(); } @@ -1380,8 +1384,8 @@ public class TestVectorOrcFile { HiveDecimalWritable dec) { UnionColumnVector union = (UnionColumnVector) batch.cols[1]; if (ts != null) { - ((LongColumnVector) batch.cols[0]).vector[rowId] = - TimestampUtils.getTimeNanoSec(ts); + TimestampColumnVector timestampColVector = (TimestampColumnVector) batch.cols[0]; + timestampColVector.set(rowId, new PisaTimestamp(ts)); } else { batch.cols[0].isNull[rowId] = true; batch.cols[0].noNulls = false; @@ -2173,9 +2177,9 @@ public class TestVectorOrcFile { ((DoubleColumnVector) batch.cols[5]).vector[0] = 0.0009765625; ((LongColumnVector) batch.cols[6]).vector[0] = new DateWritable(new Date(111, 6, 1)).getDays(); - ((LongColumnVector) batch.cols[7]).vector[0] = - TimestampUtils.getTimeNanoSec(new Timestamp(115, 9, 23, 10, 11, 59, - 999999999)); + ((TimestampColumnVector) batch.cols[7]).set(0, + new PisaTimestamp(new Timestamp(115, 9, 23, 10, 11, 59, + 999999999))); ((DecimalColumnVector) batch.cols[8]).vector[0] = new HiveDecimalWritable("1.234567"); ((BytesColumnVector) batch.cols[9]).setVal(0, "Echelon".getBytes()); @@ -2228,9 +2232,11 @@ public class TestVectorOrcFile { ((DoubleColumnVector) batch.cols[5]).vector[r] = 0.0009765625 * r; ((LongColumnVector) batch.cols[6]).vector[r] = new DateWritable(new Date(111, 6, 1)).getDays() + r; - ((LongColumnVector) batch.cols[7]).vector[r] = - TimestampUtils.getTimeNanoSec(new Timestamp(115, 9, 23, 10, 11, 59, - 999999999)) + r * 1000000000L; + + Timestamp ts = new Timestamp(115, 9, 23, 10, 11, 59, 999999999); + PisaTimestamp pisaTimestamp = new PisaTimestamp(ts); + pisaTimestamp.addSeconds(pisaTimestamp, r, pisaTimestamp); + ((TimestampColumnVector) batch.cols[7]).set(r, pisaTimestamp); ((DecimalColumnVector) batch.cols[8]).vector[r] = new HiveDecimalWritable("1.234567"); ((BytesColumnVector) batch.cols[9]).setVal(r, http://git-wip-us.apache.org/repos/asf/hive/blob/4a479d0c/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java index a777b1c..adb52f0 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java @@ -35,6 +35,7 @@ import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DateWritable; @@ -176,13 +177,8 @@ public class TestVectorizedORCReader { } else if (a instanceof TimestampWritable) { // Timestamps are stored as long, so convert and compare TimestampWritable t = ((TimestampWritable) a); - // Timestamp.getTime() is overriden and is - // long time = super.getTime(); - // return (time + (nanos / 1000000)); - Long timeInNanoSec = (t.getTimestamp().getTime() * 1000000) - + (t.getTimestamp().getNanos() % 1000000); - long b = ((LongColumnVector) cv).vector[rowId]; - Assert.assertEquals(timeInNanoSec.toString(), Long.toString(b)); + TimestampColumnVector tcv = ((TimestampColumnVector) cv); + Assert.assertEquals(t.getTimestamp(), tcv.asScratchTimestamp(rowId)); } else if (a instanceof DateWritable) { // Dates are stored as long, so convert and compare http://git-wip-us.apache.org/repos/asf/hive/blob/4a479d0c/ql/src/test/queries/clientpositive/vectorized_timestamp.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vectorized_timestamp.q b/ql/src/test/queries/clientpositive/vectorized_timestamp.q new file mode 100644 index 0000000..baf0cfa --- /dev/null +++ b/ql/src/test/queries/clientpositive/vectorized_timestamp.q @@ -0,0 +1,27 @@ +set hive.fetch.task.conversion=none; + +DROP TABLE IF EXISTS test; +CREATE TABLE test(ts TIMESTAMP) STORED AS ORC; +INSERT INTO TABLE test VALUES ('0001-01-01 00:00:00.000000000'), ('9999-12-31 23:59:59.999999999'); + +SET hive.vectorized.execution.enabled = false; +EXPLAIN +SELECT ts FROM test; + +SELECT ts FROM test; + +EXPLAIN +SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test; + +SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test; + +SET hive.vectorized.execution.enabled = true; +EXPLAIN +SELECT ts FROM test; + +SELECT ts FROM test; + +EXPLAIN +SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test; + +SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/4a479d0c/ql/src/test/results/clientpositive/tez/vector_groupby_mapjoin.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/vector_groupby_mapjoin.q.out b/ql/src/test/results/clientpositive/tez/vector_groupby_mapjoin.q.out index dedcec8..d963c5c 100644 --- a/ql/src/test/results/clientpositive/tez/vector_groupby_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/tez/vector_groupby_mapjoin.q.out @@ -1,4 +1,4 @@ -Warning: Map Join MAPJOIN[28][bigTable=?] in task 'Reducer 3' is a cross product +Warning: Map Join MAPJOIN[29][bigTable=?] in task 'Reducer 3' is a cross product PREHOOK: query: -- HIVE-12738 -- We are checking if a MapJoin after a GroupBy will work properly. explain select * @@ -26,87 +26,87 @@ Stage-0 limit:-1 Stage-1 Reducer 4 vectorized - File Output Operator [FS_34] + File Output Operator [FS_35] compressed:false Statistics:Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"} - Select Operator [OP_33] + Select Operator [OP_34] | outputColumnNames:["_col0","_col1"] | Statistics:Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE |<-Reducer 3 [SIMPLE_EDGE] vectorized - Reduce Output Operator [RS_22] + Reduce Output Operator [RS_23] key expressions:_col0 (type: string) sort order:+ Statistics:Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE value expressions:_col1 (type: string) - Select Operator [SEL_21] + Select Operator [SEL_22] outputColumnNames:["_col0","_col1"] Statistics:Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - Filter Operator [FIL_20] + Filter Operator [FIL_21] predicate:_col3 is null (type: boolean) Statistics:Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - Map Join Operator [MAPJOIN_29] + Map Join Operator [MAPJOIN_30] | condition map:[{"":"Left Outer Join0 to 1"}] | HybridGraceHashJoin:true | keys:{"Reducer 3":"_col0 (type: string)","Map 5":"_col0 (type: string)"} | outputColumnNames:["_col0","_col1","_col3"] | Statistics:Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE |<-Map 5 [BROADCAST_EDGE] - | Reduce Output Operator [RS_18] + | Reduce Output Operator [RS_19] | key expressions:_col0 (type: string) | Map-reduce partition columns:_col0 (type: string) | sort order:+ | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | Select Operator [SEL_12] + | Select Operator [SEL_13] | outputColumnNames:["_col0"] | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_11] + | TableScan [TS_12] | alias:src | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - |<-Map Join Operator [MAPJOIN_28] + |<-Map Join Operator [MAPJOIN_29] | condition map:[{"":"Inner Join 0 to 1"}] | keys:{} | outputColumnNames:["_col0","_col1"] | Statistics:Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE |<-Map 1 [BROADCAST_EDGE] - | Reduce Output Operator [RS_14] + | Reduce Output Operator [RS_15] | sort order: | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE | value expressions:_col0 (type: string), _col1 (type: string) - | Select Operator [SEL_1] + | Select Operator [SEL_2] | outputColumnNames:["_col0","_col1"] | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE | TableScan [TS_0] | alias:src | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - |<-Select Operator [SEL_10] + |<-Select Operator [SEL_11] Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator [FIL_9] + Filter Operator [FIL_10] predicate:(_col0 = 0) (type: boolean) Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Group By Operator [OP_32] + Group By Operator [OP_33] | aggregations:["count(VALUE._col0)"] | outputColumnNames:["_col0"] | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE |<-Map 2 [SIMPLE_EDGE] - Reduce Output Operator [RS_6] + Reduce Output Operator [RS_7] sort order: Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions:_col0 (type: bigint) - Group By Operator [GBY_5] + Group By Operator [GBY_6] aggregations:["count()"] outputColumnNames:["_col0"] Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator [SEL_4] + Select Operator [SEL_5] Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Filter Operator [FIL_26] + Filter Operator [FIL_27] predicate:key is null (type: boolean) Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TableScan [TS_2] + TableScan [TS_3] alias:src Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE -Warning: Map Join MAPJOIN[28][bigTable=?] in task 'Reducer 3' is a cross product +Warning: Map Join MAPJOIN[29][bigTable=?] in task 'Reducer 3' is a cross product PREHOOK: query: select * from src where not key in http://git-wip-us.apache.org/repos/asf/hive/blob/4a479d0c/ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out b/ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out index 367eb59..93cac71 100644 --- a/ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out @@ -1,4 +1,4 @@ -Warning: Map Join MAPJOIN[33][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +Warning: Map Join MAPJOIN[34][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: -- HIVE-12738 -- We are checking if a MapJoin after a GroupBy will work properly. explain select * @@ -148,7 +148,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[33][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +Warning: Map Join MAPJOIN[34][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: select * from src where not key in http://git-wip-us.apache.org/repos/asf/hive/blob/4a479d0c/ql/src/test/results/clientpositive/vectorized_timestamp.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/vectorized_timestamp.q.out b/ql/src/test/results/clientpositive/vectorized_timestamp.q.out new file mode 100644 index 0000000..bbc9b10 --- /dev/null +++ b/ql/src/test/results/clientpositive/vectorized_timestamp.q.out @@ -0,0 +1,239 @@ +PREHOOK: query: DROP TABLE IF EXISTS test +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS test +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE test(ts TIMESTAMP) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test +POSTHOOK: query: CREATE TABLE test(ts TIMESTAMP) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test +PREHOOK: query: INSERT INTO TABLE test VALUES ('0001-01-01 00:00:00.000000000'), ('9999-12-31 23:59:59.999999999') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@test +POSTHOOK: query: INSERT INTO TABLE test VALUES ('0001-01-01 00:00:00.000000000'), ('9999-12-31 23:59:59.999999999') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@test +POSTHOOK: Lineage: test.ts EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: EXPLAIN +SELECT ts FROM test +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT ts FROM test +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT ts FROM test +PREHOOK: type: QUERY +PREHOOK: Input: default@test +#### A masked pattern was here #### +POSTHOOK: query: SELECT ts FROM test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test +#### A masked pattern was here #### +0001-01-01 00:00:00 +9999-12-31 23:59:59.999999999 +PREHOOK: query: EXPLAIN +SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: ts + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(ts), max(ts) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: timestamp), _col1 (type: timestamp) + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: timestamp), _col1 (type: timestamp), (_col1 - _col0) (type: interval_day_time) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test +PREHOOK: type: QUERY +PREHOOK: Input: default@test +#### A masked pattern was here #### +POSTHOOK: query: SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test +#### A masked pattern was here #### +0001-01-01 00:00:00 9999-12-31 23:59:59.999999999 3652060 23:59:59.999999999 +PREHOOK: query: EXPLAIN +SELECT ts FROM test +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT ts FROM test +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT ts FROM test +PREHOOK: type: QUERY +PREHOOK: Input: default@test +#### A masked pattern was here #### +POSTHOOK: query: SELECT ts FROM test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test +#### A masked pattern was here #### +0001-01-01 00:00:00 +9999-12-31 23:59:59.999999999 +PREHOOK: query: EXPLAIN +SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: ts + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(ts), max(ts) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: timestamp), _col1 (type: timestamp) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: timestamp), _col1 (type: timestamp), (_col1 - _col0) (type: interval_day_time) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test +PREHOOK: type: QUERY +PREHOOK: Input: default@test +#### A masked pattern was here #### +POSTHOOK: query: SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test +#### A masked pattern was here #### +0001-01-01 00:00:00 9999-12-31 23:59:59.999999999 3652060 23:59:59.999999999 http://git-wip-us.apache.org/repos/asf/hive/blob/4a479d0c/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableSerializeWrite.java ---------------------------------------------------------------------- diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableSerializeWrite.java b/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableSerializeWrite.java index 9ea6e91..7456725 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableSerializeWrite.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableSerializeWrite.java @@ -349,19 +349,6 @@ public final class BinarySortableSerializeWrite implements SerializeWrite { BinarySortableSerDe.serializeHiveIntervalDayTime(output, vidt, invert); } - @Override - public void writeHiveIntervalDayTime(long totalNanos) throws IOException { - final boolean invert = columnSortOrderIsDesc[++index]; - - // This field is not a null. - BinarySortableSerDe.writeByte(output, (byte) 1, invert); - - long totalSecs = DateUtils.getIntervalDayTimeTotalSecondsFromTotalNanos(totalNanos); - int nanos = DateUtils.getIntervalDayTimeNanosFromTotalNanos(totalNanos); - BinarySortableSerDe.serializeLong(output, totalSecs, invert); - BinarySortableSerDe.serializeInt(output, nanos, invert); - } - /* * DECIMAL. */ http://git-wip-us.apache.org/repos/asf/hive/blob/4a479d0c/serde/src/java/org/apache/hadoop/hive/serde2/fast/SerializeWrite.java ---------------------------------------------------------------------- diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/fast/SerializeWrite.java b/serde/src/java/org/apache/hadoop/hive/serde2/fast/SerializeWrite.java index 21daa8b..e562ce3 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/fast/SerializeWrite.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/fast/SerializeWrite.java @@ -145,9 +145,6 @@ public interface SerializeWrite { */ void writeHiveIntervalDayTime(HiveIntervalDayTime vidt) throws IOException; - // We provide a faster way to write a hive interval day time without a HiveIntervalDayTime object. - void writeHiveIntervalDayTime(long totalNanos) throws IOException; - /* * DECIMAL. */ http://git-wip-us.apache.org/repos/asf/hive/blob/4a479d0c/serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritable.java ---------------------------------------------------------------------- diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritable.java b/serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritable.java index 533b76f..fdc64e7 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritable.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritable.java @@ -25,8 +25,10 @@ import java.math.BigDecimal; import java.sql.Timestamp; import java.text.DateFormat; import java.text.SimpleDateFormat; +import java.util.Date; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.PisaTimestamp; import org.apache.hadoop.hive.serde2.ByteStream.RandomAccessOutput; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VInt; @@ -150,6 +152,21 @@ public class TimestampWritable implements WritableComparable } } + public static void updateTimestamp(Timestamp timestamp, long secondsAsMillis, int nanos) { + ((Date) timestamp).setTime(secondsAsMillis); + timestamp.setNanos(nanos); + } + + public void setInternal(long secondsAsMillis, int nanos) { + + // This is our way of documenting that we are MUTATING the contents of + // this writable's internal timestamp. + updateTimestamp(timestamp, secondsAsMillis, nanos); + + bytesEmpty = true; + timestampEmpty = false; + } + private void clearTimestamp() { timestampEmpty = true; } @@ -656,7 +673,7 @@ public class TimestampWritable implements WritableComparable * Rounds the number of milliseconds relative to the epoch down to the nearest whole number of * seconds. 500 would round to 0, -500 would round to -1. */ - static long millisToSeconds(long millis) { + public static long millisToSeconds(long millis) { if (millis >= 0) { return millis / 1000; } else { http://git-wip-us.apache.org/repos/asf/hive/blob/4a479d0c/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleSerializeWrite.java ---------------------------------------------------------------------- diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleSerializeWrite.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleSerializeWrite.java index b64a803..280c2b0 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleSerializeWrite.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleSerializeWrite.java @@ -473,26 +473,6 @@ public final class LazySimpleSerializeWrite implements SerializeWrite { index++; } - @Override - public void writeHiveIntervalDayTime(long totalNanos) throws IOException { - - if (index > 0) { - output.write(separator); - } - - if (hiveIntervalDayTime == null) { - hiveIntervalDayTime = new HiveIntervalDayTime(); - } - if (hiveIntervalDayTimeWritable == null) { - hiveIntervalDayTimeWritable = new HiveIntervalDayTimeWritable(); - } - DateUtils.setIntervalDayTimeTotalNanos(hiveIntervalDayTime, totalNanos); - hiveIntervalDayTimeWritable.set(hiveIntervalDayTime); - LazyHiveIntervalDayTime.writeUTF8(output, hiveIntervalDayTimeWritable); - - index++; - } - /* * DECIMAL. */ http://git-wip-us.apache.org/repos/asf/hive/blob/4a479d0c/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinarySerializeWrite.java ---------------------------------------------------------------------- diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinarySerializeWrite.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinarySerializeWrite.java index 8f81df6..91ef12d 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinarySerializeWrite.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinarySerializeWrite.java @@ -673,42 +673,6 @@ public class LazyBinarySerializeWrite implements SerializeWrite { } } - @Override - public void writeHiveIntervalDayTime(long totalNanos) throws IOException { - - // Every 8 fields we write a NULL byte. - if ((fieldIndex % 8) == 0) { - if (fieldIndex > 0) { - // Write back previous 8 field's NULL byte. - output.writeByte(nullOffset, nullByte); - nullByte = 0; - nullOffset = output.getLength(); - } - // Allocate next NULL byte. - output.reserve(1); - } - - // Set bit in NULL byte when a field is NOT NULL. - nullByte |= 1 << (fieldIndex % 8); - - if (hiveIntervalDayTime == null) { - hiveIntervalDayTime = new HiveIntervalDayTime(); - } - if (hiveIntervalDayTimeWritable == null) { - hiveIntervalDayTimeWritable = new HiveIntervalDayTimeWritable(); - } - DateUtils.setIntervalDayTimeTotalNanos(hiveIntervalDayTime, totalNanos); - hiveIntervalDayTimeWritable.set(hiveIntervalDayTime); - hiveIntervalDayTimeWritable.writeToByteStream(output); - - fieldIndex++; - - if (fieldIndex == fieldCount) { - // Write back the final NULL byte before the last fields. - output.writeByte(nullOffset, nullByte); - } - } - /* * DECIMAL. */ http://git-wip-us.apache.org/repos/asf/hive/blob/4a479d0c/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestClass.java ---------------------------------------------------------------------- diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestClass.java b/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestClass.java index 36f5b09..14fc38e 100644 --- a/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestClass.java +++ b/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestClass.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.common.type.RandomTypeUtil; import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo; public class MyTestClass { @@ -77,7 +78,7 @@ public class MyTestClass { myBinary = MyTestPrimitiveClass.getRandBinary(r, r.nextInt(1000)); myDecimal = (randField == field++) ? null : MyTestPrimitiveClass.getRandHiveDecimal(r, extraTypeInfo); myDate = (randField == field++) ? null : MyTestPrimitiveClass.getRandDate(r); - myTimestamp = (randField == field++) ? null : MyTestPrimitiveClass.getRandTimestamp(r); + myTimestamp = (randField == field++) ? null : RandomTypeUtil.getRandTimestamp(r); myIntervalYearMonth = (randField == field++) ? null : MyTestPrimitiveClass.getRandIntervalYearMonth(r); myIntervalDayTime = (randField == field++) ? null : MyTestPrimitiveClass.getRandIntervalDayTime(r); http://git-wip-us.apache.org/repos/asf/hive/blob/4a479d0c/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestPrimitiveClass.java ---------------------------------------------------------------------- diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestPrimitiveClass.java b/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestPrimitiveClass.java index 299a56c..321b574 100644 --- a/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestPrimitiveClass.java +++ b/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestPrimitiveClass.java @@ -33,6 +33,7 @@ import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.common.type.RandomTypeUtil; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; @@ -105,7 +106,7 @@ public class MyTestPrimitiveClass { myBinary = getRandBinary(r, r.nextInt(1000)); myDecimal = chooseNull(r, randField, field++) ? null : getRandHiveDecimal(r, extraTypeInfo); myDate = chooseNull(r, randField, field++) ? null : getRandDate(r); - myTimestamp = chooseNull(r, randField, field++) ? null : getRandTimestamp(r); + myTimestamp = chooseNull(r, randField, field++) ? null : RandomTypeUtil.getRandTimestamp(r); myIntervalYearMonth = chooseNull(r, randField, field++) ? null : getRandIntervalYearMonth(r); myIntervalDayTime = chooseNull(r, randField, field++) ? null : getRandIntervalDayTime(r); return field; @@ -226,24 +227,6 @@ public class MyTestPrimitiveClass { return dateVal; } - public static Timestamp getRandTimestamp(Random r) { - String optionalNanos = ""; - if (r.nextInt(2) == 1) { - optionalNanos = String.format(".%09d", - Integer.valueOf(0 + r.nextInt(DateUtils.NANOS_PER_SEC))); - } - String timestampStr = String.format("%d-%02d-%02d %02d:%02d:%02d%s", - Integer.valueOf(1970 + r.nextInt(200)), // year - Integer.valueOf(1 + r.nextInt(12)), // month - Integer.valueOf(1 + r.nextInt(28)), // day - Integer.valueOf(0 + r.nextInt(24)), // hour - Integer.valueOf(0 + r.nextInt(60)), // minute - Integer.valueOf(0 + r.nextInt(60)), // second - optionalNanos); - Timestamp timestampVal = Timestamp.valueOf(timestampStr); - return timestampVal; - } - public static HiveIntervalYearMonth getRandIntervalYearMonth(Random r) { String yearMonthSignStr = r.nextInt(2) == 0 ? "" : "-"; String intervalYearMonthStr = String.format("%s%d-%d", http://git-wip-us.apache.org/repos/asf/hive/blob/4a479d0c/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassBigger.java ---------------------------------------------------------------------- diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassBigger.java b/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassBigger.java index 10549b9..1e06049 100644 --- a/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassBigger.java +++ b/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassBigger.java @@ -30,6 +30,7 @@ import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.common.type.RandomTypeUtil; import org.apache.hadoop.hive.serde2.binarysortable.MyTestClass; import org.apache.hadoop.hive.serde2.binarysortable.MyTestInnerStruct; import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass; @@ -93,7 +94,7 @@ public class MyTestClassBigger { myBinary = MyTestPrimitiveClass.getRandBinary(r, r.nextInt(1000)); myDecimal = (randField == field++) ? null : MyTestPrimitiveClass.getRandHiveDecimal(r, extraTypeInfo); myDate = (randField == field++) ? null : MyTestPrimitiveClass.getRandDate(r); - myTimestamp = (randField == field++) ? null : MyTestPrimitiveClass.getRandTimestamp(r); + myTimestamp = (randField == field++) ? null : RandomTypeUtil.getRandTimestamp(r); myIntervalYearMonth = (randField == field++) ? null : MyTestPrimitiveClass.getRandIntervalYearMonth(r); myIntervalDayTime = (randField == field++) ? null : MyTestPrimitiveClass.getRandIntervalDayTime(r); http://git-wip-us.apache.org/repos/asf/hive/blob/4a479d0c/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassSmaller.java ---------------------------------------------------------------------- diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassSmaller.java b/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassSmaller.java index dabfb74..5449a5f 100644 --- a/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassSmaller.java +++ b/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassSmaller.java @@ -26,6 +26,7 @@ import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.common.type.RandomTypeUtil; import org.apache.hadoop.hive.serde2.binarysortable.MyTestClass; import org.apache.hadoop.hive.serde2.binarysortable.MyTestInnerStruct; import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass; @@ -73,7 +74,7 @@ public class MyTestClassSmaller { myBinary = MyTestPrimitiveClass.getRandBinary(r, r.nextInt(1000)); myDecimal = (randField == field++) ? null : MyTestPrimitiveClass.getRandHiveDecimal(r, extraTypeInfo); myDate = (randField == field++) ? null : MyTestPrimitiveClass.getRandDate(r); - myTimestamp = (randField == field++) ? null : MyTestPrimitiveClass.getRandTimestamp(r); + myTimestamp = (randField == field++) ? null : RandomTypeUtil.getRandTimestamp(r); myIntervalYearMonth = (randField == field++) ? null : MyTestPrimitiveClass.getRandIntervalYearMonth(r); myIntervalDayTime = (randField == field++) ? null : MyTestPrimitiveClass.getRandIntervalDayTime(r); http://git-wip-us.apache.org/repos/asf/hive/blob/4a479d0c/storage-api/src/java/org/apache/hadoop/hive/common/type/PisaTimestamp.java ---------------------------------------------------------------------- diff --git a/storage-api/src/java/org/apache/hadoop/hive/common/type/PisaTimestamp.java b/storage-api/src/java/org/apache/hadoop/hive/common/type/PisaTimestamp.java new file mode 100644 index 0000000..ac1e38a --- /dev/null +++ b/storage-api/src/java/org/apache/hadoop/hive/common/type/PisaTimestamp.java @@ -0,0 +1,609 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.common.type; + +import java.math.BigDecimal; +import java.sql.Timestamp; +import java.util.Random; +import java.util.concurrent.TimeUnit; + +import com.google.common.base.Preconditions; + +/** + * Pisa project is named after the famous Leonardo of Pisa, or better known as Fibanacci. + * + * A Pisa timestamp is a timestamp without a time-zone (i.e. local) in the ISO-8601 calendar system, + * such as 2007-12-03 10:15:30.0123456789, with accuracy to the nanosecond (1 billionth of a + * second). + * + * Pisa timestamps use the same starting point as a java.sql.Timestamp -- the number of nanoseconds + * since the epoch (1970-01-01, or the day Unix roared awake) where negative numbers represent + * earlier days. + * + * However, we use the PisaTimestamp class which has different design requirements than + * java.sql.Timestamp. It is designed to be mutable and NOT thread-safe to avoid high memory + * allocation / garbage collection costs. And, provides for ease of use by our vectorization + * code to avoid the high CPU data cache miss cost for small objects, too. We do this by allowing + * the epoch day and nano of day to be stored externally (i.e. vector arrays). + * + * And, importantly, PisaTimestamp is a light-weight class similar to the epochDay/NanoOfDay of + * the newer Java 8 LocalDateTime class, except the timestamp is *indifferent* to timezone. + * + * A common usage would be to treat it as UTC. + * + * You can work with days, seconds, milliseconds, nanoseconds, etc. But to work with months you + * will need to convert to an external timestamp object and use calendars, etc. + * * + * The storage for a PisaTimestamp is: + * + * long epochDay + * // The number of days since 1970-01-01 (==> similar to Java 8 LocalDate). + * long nanoOfDay + * // The number of nanoseconds within the day, with the range of + * // 0 to 24 * 60 * 60 * 1,000,000,000 - 1 (==> similar to Java 8 LocalTime). + * + * Both epochDay and nanoOfDay are signed. + * + * We when both epochDay and nanoOfDay are non-zero, we will maintain them so they have the + * same sign. + * + */ + +public class PisaTimestamp { + + private static final long serialVersionUID = 1L; + + private long epochDay; + private long nanoOfDay; + + private Timestamp scratchTimestamp; + + public static final long NANOSECONDS_PER_SECOND = TimeUnit.SECONDS.toNanos(1); + public static final long NANOSECONDS_PER_MILLISECOND = TimeUnit.MILLISECONDS.toNanos(1); + public static final long NANOSECONDS_PER_DAY = TimeUnit.DAYS.toNanos(1); + + public static final long MILLISECONDS_PER_SECOND = TimeUnit.SECONDS.toMillis(1); + public static final long MILLISECONDS_PER_DAY = TimeUnit.DAYS.toMillis(1); + + public static final long SECONDS_PER_DAY = TimeUnit.DAYS.toSeconds(1); + + public static final long MIN_NANO_OF_DAY = -NANOSECONDS_PER_DAY; + public static final long MAX_NANO_OF_DAY = NANOSECONDS_PER_DAY; + + public static final BigDecimal BIG_NANOSECONDS_PER_SECOND = new BigDecimal(NANOSECONDS_PER_SECOND); + + public long getEpochDay() { + return epochDay; + } + + public long getNanoOfDay() { + return nanoOfDay; + } + + public PisaTimestamp() { + epochDay = 0; + nanoOfDay = 0; + scratchTimestamp = new Timestamp(0); + } + + public PisaTimestamp(long epochDay, long nanoOfDay) { + + Preconditions.checkState(validateIntegrity(epochDay, nanoOfDay), + "epochDay " + epochDay + ", nanoOfDay " + nanoOfDay + " not valid"); + + this.epochDay = epochDay; + this.nanoOfDay = nanoOfDay; + scratchTimestamp = new Timestamp(0); + } + + public PisaTimestamp(Timestamp timestamp) { + super(); + updateFromTimestamp(timestamp); + } + + public void reset() { + epochDay = 0; + nanoOfDay = 0; + } + + /** + * NOTE: This method validates the integrity rules between epoch day and nano of day, + * but not overflow/underflow of epoch day. Since epoch day overflow/underflow can result + * from to client data input, that must be checked manually with as this + * class do not throw data range exceptions as a rule. It leaves that choice to the caller. + * @param epochDay + * @param nanoOfDay + * @return true if epoch day and nano of day have integrity. + */ + public static boolean validateIntegrity(long epochDay, long nanoOfDay) { + + // Range check nano per day as invariant. + if (nanoOfDay >= NANOSECONDS_PER_DAY || nanoOfDay <= -NANOSECONDS_PER_DAY) { + return false; + } + + // Signs of epoch day and nano of day must match. + if (!(epochDay >= 0 && nanoOfDay >= 0 || + epochDay <= 0 && nanoOfDay <= 0)) { + return false; + } + + return true; + } + + /** + * Set this PisaTimestamp from another PisaTimestamp. + * @param source + * @return this + */ + public PisaTimestamp update(PisaTimestamp source) { + this.epochDay = source.epochDay; + this.nanoOfDay = source.nanoOfDay; + return this; + } + + /** + * Set this PisaTimestamp from a epoch day and nano of day. + * @param epochDay + * @param nanoOfDay + * @return this + */ + public PisaTimestamp update(long epochDay, long nanoOfDay) { + + Preconditions.checkState(validateIntegrity(epochDay, nanoOfDay), + "epochDay " + epochDay + ", nanoOfDay " + nanoOfDay + " not valid"); + + this.epochDay = epochDay; + this.nanoOfDay = nanoOfDay; + return this; + } + + /** + * Set the PisaTimestamp from a Timestamp object. + * @param timestamp + * @return this + */ + public PisaTimestamp updateFromTimestamp(Timestamp timestamp) { + + long timestampTime = timestamp.getTime(); + int nanos = timestamp.getNanos(); + + /** + * Since the Timestamp class always stores nanos as a positive quantity (0 .. 999,999,999), + * we have to adjust back the time (subtract) by 1,000,000,000 to get right quantity for + * our calculations below. One thing it ensures is nanoOfDay will be negative. + */ + if (timestampTime < 0 && nanos > 0) { + timestampTime -= MILLISECONDS_PER_SECOND; + } + + // The Timestamp class does not use the milliseconds part (always 0). It is covered by nanos. + long epochSeconds = timestampTime / MILLISECONDS_PER_SECOND; + + nanoOfDay = (epochSeconds % SECONDS_PER_DAY) * NANOSECONDS_PER_SECOND + nanos; + epochDay = epochSeconds / SECONDS_PER_DAY + (nanoOfDay / NANOSECONDS_PER_DAY); + + Preconditions.checkState(validateIntegrity(epochDay, nanoOfDay)); + return this; + } + + /** + * Set this PisaTimestamp from a timestamp milliseconds. + * @param epochMilliseconds + * @return this + */ + public PisaTimestamp updateFromTimestampMilliseconds(long timestampMilliseconds) { + /** + * The Timestamp class setTime sets both the time (seconds stored as milliseconds) and + * the nanos. + */ + scratchTimestamp.setTime(timestampMilliseconds); + updateFromTimestamp(scratchTimestamp); + return this; + } + + /** + * Set this PisaTimestamp from a timestamp seconds. + * @param epochMilliseconds + * @return this + */ + public PisaTimestamp updateFromTimestampSeconds(long timestampSeconds) { + return updateFromTimestampMilliseconds(timestampSeconds * MILLISECONDS_PER_SECOND); + } + + /** + * Set this PisaTimestamp from a timestamp seconds. + * @param epochMilliseconds + * @return this + */ + public PisaTimestamp updateFromTimestampSecondsWithFractionalNanoseconds( + double timestampSecondsWithFractionalNanoseconds) { + + // Otherwise, BigDecimal throws an exception. (Support vector operations that sometimes + // do work on double Not-a-Number NaN values). + if (Double.isNaN(timestampSecondsWithFractionalNanoseconds)) { + timestampSecondsWithFractionalNanoseconds = 0; + } + // Algorithm used by TimestampWritable.doubleToTimestamp method. + // Allocates a BigDecimal object! + + long seconds = (long) timestampSecondsWithFractionalNanoseconds; + + // We must ensure the exactness of the double's fractional portion. + // 0.6 as the fraction part will be converted to 0.59999... and + // significantly reduce the savings from binary serialization. + BigDecimal bd; + + bd = new BigDecimal(String.valueOf(timestampSecondsWithFractionalNanoseconds)); + bd = bd.subtract(new BigDecimal(seconds)); // Get the nanos fraction. + bd = bd.multiply(BIG_NANOSECONDS_PER_SECOND); // Make nanos an integer. + + int nanos = bd.intValue(); + + // Convert to millis + long millis = seconds * 1000; + if (nanos < 0) { + millis -= 1000; + nanos += 1000000000; + } + + scratchTimestamp.setTime(millis); + scratchTimestamp.setNanos(nanos); + updateFromTimestamp(scratchTimestamp); + return this; + } + + /** + * Set this PisaTimestamp from a epoch seconds and signed nanos (-999999999 to 999999999). + * @param epochSeconds + * @param signedNanos + * @return this + */ + public PisaTimestamp updateFromEpochSecondsAndSignedNanos(long epochSeconds, int signedNanos) { + + long nanoOfDay = (epochSeconds % SECONDS_PER_DAY) * NANOSECONDS_PER_SECOND + signedNanos; + long epochDay = epochSeconds / SECONDS_PER_DAY + nanoOfDay / NANOSECONDS_PER_DAY; + + Preconditions.checkState(validateIntegrity(epochDay, nanoOfDay)); + + this.epochDay = epochDay; + this.nanoOfDay = nanoOfDay; + return this; + } + + /** + * Set a scratch PisaTimestamp with this PisaTimestamp's values and return the scratch object. + * @param epochDay + * @param nanoOfDay + */ + public PisaTimestamp scratchCopy(PisaTimestamp scratch) { + + scratch.epochDay = epochDay; + scratch.nanoOfDay = nanoOfDay; + return scratch; + } + + /** + * Set a Timestamp object from this PisaTimestamp. + * @param timestamp + */ + public void timestampUpdate(Timestamp timestamp) { + + /* + * java.sql.Timestamp consists of a long variable to store milliseconds and an integer variable for nanoseconds. + * The long variable is used to store only the full seconds converted to millis. For example for 1234 milliseconds, + * 1000 is stored in the long variable, and 234000000 (234 converted to nanoseconds) is stored as nanoseconds. + * The negative timestamps are also supported, but nanoseconds must be positive therefore millisecond part is + * reduced by one second. + */ + + long epochSeconds = epochDay * SECONDS_PER_DAY + nanoOfDay / NANOSECONDS_PER_SECOND; + long integralSecInMillis; + int nanos = (int) (nanoOfDay % NANOSECONDS_PER_SECOND); // The nanoseconds. + if (nanos < 0) { + nanos = (int) NANOSECONDS_PER_SECOND + nanos; // The positive nano-part that will be added to milliseconds. + integralSecInMillis = (epochSeconds - 1) * MILLISECONDS_PER_SECOND; // Reduce by one second. + } else { + integralSecInMillis = epochSeconds * MILLISECONDS_PER_SECOND; // Full seconds converted to millis. + } + + timestamp.setTime(integralSecInMillis); + timestamp.setNanos(nanos); + } + + /** + * Return the scratch timestamp with values from Pisa timestamp. + * @return + */ + public Timestamp asScratchTimestamp() { + timestampUpdate(scratchTimestamp); + return scratchTimestamp; + } + + /** + * Return the scratch timestamp for use by the caller. + * @return + */ + public Timestamp useScratchTimestamp() { + return scratchTimestamp; + } + + public int compareTo(PisaTimestamp another) { + + if (epochDay == another.epochDay) { + if (nanoOfDay == another.nanoOfDay){ + return 0; + } else { + return (nanoOfDay < another.nanoOfDay ? -1 : 1); + } + } else { + return (epochDay < another.epochDay ? -1: 1); + } + } + + public static int compareTo(long epochDay1, long nanoOfDay1, PisaTimestamp another) { + + if (epochDay1 == another.epochDay) { + if (nanoOfDay1 == another.nanoOfDay){ + return 0; + } else { + return (nanoOfDay1 < another.nanoOfDay ? -1 : 1); + } + } else { + return (epochDay1 < another.epochDay ? -1: 1); + } + } + + public static int compareTo(PisaTimestamp pisaTimestamp1, long epochDay2, long nanoOfDay2) { + + if (pisaTimestamp1.epochDay == epochDay2) { + if (pisaTimestamp1.nanoOfDay == nanoOfDay2){ + return 0; + } else { + return (pisaTimestamp1.nanoOfDay < nanoOfDay2 ? -1 : 1); + } + } else { + return (pisaTimestamp1.epochDay < epochDay2 ? -1: 1); + } + } + + public static int compareTo(long epochDay1, long nanoOfDay1, long epochDay2, long nanoOfDay2) { + + if (epochDay1 == epochDay2) { + if (nanoOfDay1 == nanoOfDay2){ + return 0; + } else { + return (nanoOfDay1 < nanoOfDay2 ? -1 : 1); + } + } else { + return (epochDay1 < epochDay2 ? -1: 1); + } + } + + + /** + * Standard equals method override. + */ + @Override + public boolean equals(Object obj) { + if (obj == null || obj.getClass() != getClass()) { + return false; + } + return equals((PisaTimestamp) obj); + } + + public boolean equals(PisaTimestamp other) { + + if (epochDay == other.epochDay) { + if (nanoOfDay == other.nanoOfDay) { + return true; + } else { + return false; + } + } else { + return false; + } + } + + public static void add(PisaTimestamp pisaTimestamp1, PisaTimestamp pisaTimestamp2, + PisaTimestamp result) { + add(pisaTimestamp1.epochDay, pisaTimestamp1.nanoOfDay, + pisaTimestamp2.epochDay, pisaTimestamp2.nanoOfDay, + result); + } + + public static void add(long epochDay1, long nanoOfDay1, + long epochDay2, long nanoOfDay2, + PisaTimestamp result) { + + // Validate integrity rules between epoch day and nano of day. + Preconditions.checkState(PisaTimestamp.validateIntegrity(epochDay1, nanoOfDay1)); + Preconditions.checkState(PisaTimestamp.validateIntegrity(epochDay2, nanoOfDay2)); + + long intermediateEpochDay = epochDay1 + epochDay2; + long intermediateNanoOfDay = nanoOfDay1 + nanoOfDay2; + + // Normalize so both are positive or both are negative. + long normalizedEpochDay; + long normalizedNanoOfDay; + if (intermediateEpochDay > 0 && intermediateNanoOfDay < 0) { + normalizedEpochDay = intermediateEpochDay - 1; + normalizedNanoOfDay = intermediateNanoOfDay + NANOSECONDS_PER_DAY; + } else if (intermediateEpochDay < 0 && intermediateNanoOfDay > 0) { + normalizedEpochDay = intermediateEpochDay + 1; + normalizedNanoOfDay = intermediateNanoOfDay - NANOSECONDS_PER_DAY; + } else { + normalizedEpochDay = intermediateEpochDay; + normalizedNanoOfDay = intermediateNanoOfDay; + } + + long resultEpochDay; + long resultNanoOfDay; + if (normalizedNanoOfDay >= NANOSECONDS_PER_DAY || normalizedNanoOfDay <= -NANOSECONDS_PER_DAY) { + // Adjust for carry or overflow... + + resultEpochDay = normalizedEpochDay + normalizedNanoOfDay / NANOSECONDS_PER_DAY; + resultNanoOfDay = normalizedNanoOfDay % NANOSECONDS_PER_DAY; + + } else { + resultEpochDay = normalizedEpochDay; + resultNanoOfDay = normalizedNanoOfDay; + } + + // The update method will validate integrity rules between epoch day and nano of day, + // but not overflow/underflow of epoch day. + result.update(resultEpochDay, resultNanoOfDay); + } + + public static void addSeconds(PisaTimestamp timestamp1, long epochSeconds, PisaTimestamp result) { + long epochDay = epochSeconds / SECONDS_PER_DAY; + long nanoOfDay = (epochSeconds % SECONDS_PER_DAY) * NANOSECONDS_PER_SECOND; + add(timestamp1.epochDay, timestamp1.nanoOfDay, epochDay, nanoOfDay, result); + } + + public static void subtract(PisaTimestamp timestamp1, PisaTimestamp timestamp2, + PisaTimestamp result) { + + add(timestamp1.epochDay, timestamp1.nanoOfDay, -timestamp2.epochDay, -timestamp2.nanoOfDay, + result); + } + + public static void subtract(long epochDay1, long nanoOfDay1, + long epochDay2, long nanoOfDay2, + PisaTimestamp result) { + + add(epochDay1, nanoOfDay1, -epochDay2, -nanoOfDay2, result); + } + + public static void subtractSeconds(PisaTimestamp timestamp1, long epochSeconds, + PisaTimestamp result) { + long epochDay = epochSeconds / SECONDS_PER_DAY; + long nanoOfDay = (epochSeconds % SECONDS_PER_DAY) * NANOSECONDS_PER_SECOND; + add(timestamp1.epochDay, timestamp1.nanoOfDay, -epochDay, -nanoOfDay, result); + } + + /** + * Rounds the number of milliseconds relative to the epoch down to the nearest whole number of + * seconds. 500 would round to 0, -500 would round to -1. + */ + public static long timestampMillisToSeconds(long millis) { + if (millis >= 0) { + return millis / 1000; + } else { + return (millis - 999) / 1000; + } + } + + /** + * Return a double with the integer part as the seconds and the fractional part as + * the nanoseconds the way the Timestamp class does it. + * @return seconds.nanoseconds + */ + public double getTimestampSecondsWithFractionalNanos() { + // Algorithm must be the same as TimestampWritable.getDouble method. + timestampUpdate(scratchTimestamp); + double seconds = timestampMillisToSeconds(scratchTimestamp.getTime()); + double nanos = scratchTimestamp.getNanos(); + BigDecimal bigSeconds = new BigDecimal(seconds); + BigDecimal bigNanos = new BigDecimal(nanos).divide(BIG_NANOSECONDS_PER_SECOND); + return bigSeconds.add(bigNanos).doubleValue(); + } + + /** + * Return an integer as the seconds the way the Timestamp class does it. + * @return seconds.nanoseconds + */ + public long getTimestampSeconds() { + // Algorithm must be the same as TimestampWritable.getSeconds method. + timestampUpdate(scratchTimestamp); + return timestampMillisToSeconds(scratchTimestamp.getTime()); + } + + /** + * Return an integer as the milliseconds the way the Timestamp class does it. + * @return seconds.nanoseconds + */ + public long getTimestampMilliseconds() { + timestampUpdate(scratchTimestamp); + return scratchTimestamp.getTime(); + } + + /** + * Return the epoch seconds. + * @return + */ + public long getEpochSeconds() { + return epochDay * SECONDS_PER_DAY + nanoOfDay / NANOSECONDS_PER_SECOND; + } + + /** + * Return the epoch seconds, given the epoch day and nano of day. + * @param epochDay + * @param nanoOfDay + * @return + */ + public static long getEpochSecondsFromEpochDayAndNanoOfDay(long epochDay, long nanoOfDay) { + return epochDay * SECONDS_PER_DAY + nanoOfDay / NANOSECONDS_PER_SECOND; + } + + /** + * Return the signed nanos (-999999999 to 999999999). + * NOTE: Not the same as Timestamp class nanos (which are always positive). + */ + public int getSignedNanos() { + return (int) (nanoOfDay % NANOSECONDS_PER_SECOND); + } + + /** + * Return the signed nanos (-999999999 to 999999999). + * NOTE: Not the same as Timestamp class nanos (which are always positive). + */ + public static int getSignedNanos(long nanoOfDay) { + return (int) (nanoOfDay % NANOSECONDS_PER_SECOND); + } + + /** + * Return the epoch milliseconds. + * @return + */ + public long getEpochMilliseconds() { + return epochDay * MILLISECONDS_PER_DAY + nanoOfDay / NANOSECONDS_PER_MILLISECOND; + } + + /** + * Return the epoch seconds, given the epoch day and nano of day. + * @param epochDay + * @param nanoOfDay + * @return + */ + public static long getEpochMillisecondsFromEpochDayAndNanoOfDay(long epochDay, long nanoOfDay) { + return epochDay * MILLISECONDS_PER_DAY + nanoOfDay / NANOSECONDS_PER_MILLISECOND; + } + + @Override + public int hashCode() { + // UNDONE: We don't want to box the longs just to get the hash codes... + return new Long(epochDay).hashCode() ^ new Long(nanoOfDay).hashCode(); + } + + @Override + public String toString() { + timestampUpdate(scratchTimestamp); + return scratchTimestamp.toString(); + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/4a479d0c/storage-api/src/java/org/apache/hadoop/hive/common/type/RandomTypeUtil.java ---------------------------------------------------------------------- diff --git a/storage-api/src/java/org/apache/hadoop/hive/common/type/RandomTypeUtil.java b/storage-api/src/java/org/apache/hadoop/hive/common/type/RandomTypeUtil.java new file mode 100644 index 0000000..13baff4 --- /dev/null +++ b/storage-api/src/java/org/apache/hadoop/hive/common/type/RandomTypeUtil.java @@ -0,0 +1,51 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.common.type; + +import java.sql.Timestamp; +import java.util.Random; +import java.util.concurrent.TimeUnit; + +public class RandomTypeUtil { + + public static final long NANOSECONDS_PER_SECOND = TimeUnit.SECONDS.toNanos(1); + + public static Timestamp getRandTimestamp(Random r) { + String optionalNanos = ""; + if (r.nextInt(2) == 1) { + optionalNanos = String.format(".%09d", + Integer.valueOf(0 + r.nextInt((int) NANOSECONDS_PER_SECOND))); + } + String timestampStr = String.format("%04d-%02d-%02d %02d:%02d:%02d%s", + Integer.valueOf(0 + r.nextInt(10000)), // year + Integer.valueOf(1 + r.nextInt(12)), // month + Integer.valueOf(1 + r.nextInt(28)), // day + Integer.valueOf(0 + r.nextInt(24)), // hour + Integer.valueOf(0 + r.nextInt(60)), // minute + Integer.valueOf(0 + r.nextInt(60)), // second + optionalNanos); + Timestamp timestampVal; + try { + timestampVal = Timestamp.valueOf(timestampStr); + } catch (Exception e) { + System.err.println("Timestamp string " + timestampStr + " did not parse"); + throw e; + } + return timestampVal; + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/4a479d0c/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java ---------------------------------------------------------------------- diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java index fcb1ae9..4ae9c47 100644 --- a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java @@ -42,6 +42,7 @@ public abstract class ColumnVector { DOUBLE, BYTES, DECIMAL, + TIMESTAMP, STRUCT, LIST, MAP, http://git-wip-us.apache.org/repos/asf/hive/blob/4a479d0c/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java ---------------------------------------------------------------------- diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java new file mode 100644 index 0000000..b73a0d2 --- /dev/null +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java @@ -0,0 +1,498 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector; + +import java.sql.Timestamp; +import java.util.Arrays; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.io.Writable; + +import com.google.common.base.Preconditions; + +/** + * This class represents a nullable timestamp column vector capable of handing a wide range of + * timestamp values. + * + * We use the PisaTimestamp which is designed to be mutable and avoid the heavy memory allocation + * and CPU data cache miss costs. + */ +public class TimestampColumnVector extends ColumnVector { + + /* + * The storage arrays for this column vector corresponds to the storage of a PisaTimestamp: + */ + private long[] epochDay; + // An array of the number of days since 1970-01-01 (similar to Java 8 LocalDate). + + private long[] nanoOfDay; + // An array of the number of nanoseconds within the day, with the range of + // 0 to 24 * 60 * 60 * 1,000,000,000 - 1 (similar to Java 8 LocalTime). + + /* + * Scratch objects. + */ + private PisaTimestamp scratchPisaTimestamp; + // Convenience scratch Pisa timestamp object. + + private Writable scratchWritable; + // Supports keeping a TimestampWritable object without having to import that definition... + + /** + * Use this constructor by default. All column vectors + * should normally be the default size. + */ + public TimestampColumnVector() { + this(VectorizedRowBatch.DEFAULT_SIZE); + } + + /** + * Don't use this except for testing purposes. + * + * @param len the number of rows + */ + public TimestampColumnVector(int len) { + super(len); + + epochDay = new long[len]; + nanoOfDay = new long[len]; + + scratchPisaTimestamp = new PisaTimestamp(); + + scratchWritable = null; // Allocated by caller. + } + + /** + * Return the number of rows. + * @return + */ + public int getLength() { + return epochDay.length; + } + + /** + * Returnt a row's epoch day. + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param elementNum + * @return + */ + public long getEpochDay(int elementNum) { + return epochDay[elementNum]; + } + + /** + * Return a row's nano of day. + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param elementNum + * @return + */ + public long getNanoOfDay(int elementNum) { + return nanoOfDay[elementNum]; + } + + /** + * Get a scratch PisaTimestamp object from a row of the column. + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param elementNum + * @return scratch + */ + public PisaTimestamp asScratchPisaTimestamp(int elementNum) { + scratchPisaTimestamp.update(epochDay[elementNum], nanoOfDay[elementNum]); + return scratchPisaTimestamp; + } + + /** + * Set a PisaTimestamp object from a row of the column. + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param pisaTimestamp + * @param elementNum + */ + public void pisaTimestampUpdate(PisaTimestamp pisaTimestamp, int elementNum) { + pisaTimestamp.update(epochDay[elementNum], nanoOfDay[elementNum]); + } + + /** + * Set a Timestamp object from a row of the column. + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param timestamp + * @param elementNum + */ + public void timestampUpdate(Timestamp timestamp, int elementNum) { + scratchPisaTimestamp.update(epochDay[elementNum], nanoOfDay[elementNum]); + scratchPisaTimestamp.timestampUpdate(timestamp); + } + + /** + * Compare row to PisaTimestamp. + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param elementNum + * @param pisaTimestamp + * @return -1, 0, 1 standard compareTo values. + */ + public int compareTo(int elementNum, PisaTimestamp pisaTimestamp) { + return PisaTimestamp.compareTo(epochDay[elementNum], nanoOfDay[elementNum], pisaTimestamp); + } + + /** + * Compare PisaTimestamp to row. + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param pisaTimestamp + * @param elementNum + * @return -1, 0, 1 standard compareTo values. + */ + public int compareTo(PisaTimestamp pisaTimestamp, int elementNum) { + return PisaTimestamp.compareTo(pisaTimestamp, epochDay[elementNum], nanoOfDay[elementNum]); + } + + /** + * Compare a row to another TimestampColumnVector's row. + * @param elementNum1 + * @param timestampColVector2 + * @param elementNum2 + * @return + */ + public int compareTo(int elementNum1, TimestampColumnVector timestampColVector2, + int elementNum2) { + return PisaTimestamp.compareTo( + epochDay[elementNum1], nanoOfDay[elementNum1], + timestampColVector2.epochDay[elementNum2], timestampColVector2.nanoOfDay[elementNum2]); + } + + /** + * Compare another TimestampColumnVector's row to a row. + * @param timestampColVector1 + * @param elementNum1 + * @param elementNum2 + * @return + */ + public int compareTo(TimestampColumnVector timestampColVector1, int elementNum1, + int elementNum2) { + return PisaTimestamp.compareTo( + timestampColVector1.epochDay[elementNum1], timestampColVector1.nanoOfDay[elementNum1], + epochDay[elementNum2], nanoOfDay[elementNum2]); + } + + public void add(PisaTimestamp timestamp1, PisaTimestamp timestamp2, int resultElementNum) { + PisaTimestamp.add(timestamp1, timestamp2, scratchPisaTimestamp); + epochDay[resultElementNum] = scratchPisaTimestamp.getEpochDay(); + nanoOfDay[resultElementNum] = scratchPisaTimestamp.getNanoOfDay(); + } + + public void subtract(PisaTimestamp timestamp1, PisaTimestamp timestamp2, int resultElementNum) { + PisaTimestamp.subtract(timestamp1, timestamp2, scratchPisaTimestamp); + epochDay[resultElementNum] = scratchPisaTimestamp.getEpochDay(); + nanoOfDay[resultElementNum] = scratchPisaTimestamp.getNanoOfDay(); + } + + /** + * Return row as a double with the integer part as the seconds and the fractional part as + * the nanoseconds the way the Timestamp class does it. + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param elementNum + * @return seconds.nanoseconds + */ + public double getTimestampSecondsWithFractionalNanos(int elementNum) { + scratchPisaTimestamp.update(epochDay[elementNum], nanoOfDay[elementNum]); + return scratchPisaTimestamp.getTimestampSecondsWithFractionalNanos(); + } + + /** + * Return row as integer as the seconds the way the Timestamp class does it. + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param elementNum + * @return seconds + */ + public long getTimestampSeconds(int elementNum) { + scratchPisaTimestamp.update(epochDay[elementNum], nanoOfDay[elementNum]); + return scratchPisaTimestamp.getTimestampSeconds(); + } + + + /** + * Return row as milliseconds the way the Timestamp class does it. + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param elementNum + * @return + */ + public long getTimestampMilliseconds(int elementNum) { + scratchPisaTimestamp.update(epochDay[elementNum], nanoOfDay[elementNum]); + return scratchPisaTimestamp.getTimestampMilliseconds(); + } + + /** + * Return row as epoch seconds. + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param elementNum + * @return + */ + public long getEpochSeconds(int elementNum) { + return PisaTimestamp.getEpochSecondsFromEpochDayAndNanoOfDay(epochDay[elementNum], nanoOfDay[elementNum]); + } + + /** + * Return row as epoch milliseconds. + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param elementNum + * @return + */ + public long getEpochMilliseconds(int elementNum) { + return PisaTimestamp.getEpochMillisecondsFromEpochDayAndNanoOfDay(epochDay[elementNum], nanoOfDay[elementNum]); + } + + /** + * Return row as signed nanos (-999999999 to 999999999). + * NOTE: This is not the same as the Timestamp class nanos (which is always positive). + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param elementNum + * @return + */ + public int getSignedNanos(int elementNum) { + return PisaTimestamp.getSignedNanos(nanoOfDay[elementNum]); + } + + /** + * Get scratch timestamp with value of a row. + * @param elementNum + * @return + */ + public Timestamp asScratchTimestamp(int elementNum) { + scratchPisaTimestamp.update(epochDay[elementNum], nanoOfDay[elementNum]); + return scratchPisaTimestamp.asScratchTimestamp(); + } + + /** + * Get scratch Pisa timestamp for use by the caller. + * @return + */ + public PisaTimestamp useScratchPisaTimestamp() { + return scratchPisaTimestamp; + } + + @Override + public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) { + + TimestampColumnVector timestampColVector = (TimestampColumnVector) inputVector; + + epochDay[outElementNum] = timestampColVector.epochDay[inputElementNum]; + nanoOfDay[outElementNum] = timestampColVector.nanoOfDay[inputElementNum]; + } + + // Simplify vector by brute-force flattening noNulls and isRepeating + // This can be used to reduce combinatorial explosion of code paths in VectorExpressions + // with many arguments. + public void flatten(boolean selectedInUse, int[] sel, int size) { + flattenPush(); + if (isRepeating) { + isRepeating = false; + long repeatEpochDay = epochDay[0]; + long repeatNanoOfDay = nanoOfDay[0]; + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + epochDay[i] = repeatEpochDay; + nanoOfDay[i] = repeatNanoOfDay; + } + } else { + Arrays.fill(epochDay, 0, size, repeatEpochDay); + Arrays.fill(nanoOfDay, 0, size, repeatNanoOfDay); + } + flattenRepeatingNulls(selectedInUse, sel, size); + } + flattenNoNulls(selectedInUse, sel, size); + } + + /** + * Set a row from a PisaTimestamp. + * We assume the entry has already been isRepeated adjusted. + * @param elementNum + * @param pisaTimestamp + */ + public void set(int elementNum, PisaTimestamp pisaTimestamp) { + this.epochDay[elementNum] = pisaTimestamp.getEpochDay(); + this.nanoOfDay[elementNum] = pisaTimestamp.getNanoOfDay(); + } + + /** + * Set a row from a timestamp. + * We assume the entry has already been isRepeated adjusted. + * @param elementNum + * @param timestamp + */ + public void set(int elementNum, Timestamp timestamp) { + scratchPisaTimestamp.updateFromTimestamp(timestamp); + this.epochDay[elementNum] = scratchPisaTimestamp.getEpochDay(); + this.nanoOfDay[elementNum] = scratchPisaTimestamp.getNanoOfDay(); + } + + /** + * Set a row from a epoch seconds and signed nanos (-999999999 to 999999999). + * @param elementNum + * @param epochSeconds + * @param signedNanos + */ + public void setEpochSecondsAndSignedNanos(int elementNum, long epochSeconds, int signedNanos) { + scratchPisaTimestamp.updateFromEpochSecondsAndSignedNanos(epochSeconds, signedNanos); + set(elementNum, scratchPisaTimestamp); + } + + /** + * Set a row from timestamp milliseconds. + * We assume the entry has already been isRepeated adjusted. + * @param elementNum + * @param timestampMilliseconds + */ + public void setTimestampMilliseconds(int elementNum, long timestampMilliseconds) { + scratchPisaTimestamp.updateFromTimestampMilliseconds(timestampMilliseconds); + set(elementNum, scratchPisaTimestamp.useScratchTimestamp()); + } + + /** + * Set a row from timestamp seconds. + * We assume the entry has already been isRepeated adjusted. + * @param elementNum + * @param timestamp + */ + public void setTimestampSeconds(int elementNum, long timestampSeconds) { + scratchPisaTimestamp.updateFromTimestampSeconds(timestampSeconds); + set(elementNum, scratchPisaTimestamp); + } + + /** + * Set a row from a double timestamp seconds with fractional nanoseconds. + * We assume the entry has already been isRepeated adjusted. + * @param elementNum + * @param timestamp + */ + public void setTimestampSecondsWithFractionalNanoseconds(int elementNum, + double secondsWithFractionalNanoseconds) { + scratchPisaTimestamp.updateFromTimestampSecondsWithFractionalNanoseconds(secondsWithFractionalNanoseconds); + set(elementNum, scratchPisaTimestamp); + } + + /** + * Set row to standard null value(s). + * We assume the entry has already been isRepeated adjusted. + * @param elementNum + */ + public void setNullValue(int elementNum) { + epochDay[elementNum] = 0; + nanoOfDay[elementNum] = 1; + } + + // Copy the current object contents into the output. Only copy selected entries, + // as indicated by selectedInUse and the sel array. + public void copySelected( + boolean selectedInUse, int[] sel, int size, TimestampColumnVector output) { + + // Output has nulls if and only if input has nulls. + output.noNulls = noNulls; + output.isRepeating = false; + + // Handle repeating case + if (isRepeating) { + output.epochDay[0] = epochDay[0]; + output.nanoOfDay[0] = nanoOfDay[0]; + output.isNull[0] = isNull[0]; + output.isRepeating = true; + return; + } + + // Handle normal case + + // Copy data values over + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.epochDay[i] = epochDay[i]; + output.nanoOfDay[i] = nanoOfDay[i]; + } + } + else { + System.arraycopy(epochDay, 0, output.epochDay, 0, size); + System.arraycopy(nanoOfDay, 0, output.nanoOfDay, 0, size); + } + + // Copy nulls over if needed + if (!noNulls) { + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.isNull[i] = isNull[i]; + } + } + else { + System.arraycopy(isNull, 0, output.isNull, 0, size); + } + } + } + + /** + * Fill all the vector entries with a PisaTimestamp. + * @param pisaTimestamp + */ + public void fill(PisaTimestamp pisaTimestamp) { + noNulls = true; + isRepeating = true; + epochDay[0] = pisaTimestamp.getEpochDay(); + nanoOfDay[0] = pisaTimestamp.getNanoOfDay(); + } + + /** + * Fill all the vector entries with a timestamp. + * @param timestamp + */ + public void fill(Timestamp timestamp) { + noNulls = true; + isRepeating = true; + scratchPisaTimestamp.updateFromTimestamp(timestamp); + epochDay[0] = scratchPisaTimestamp.getEpochDay(); + nanoOfDay[0] = scratchPisaTimestamp.getNanoOfDay(); + } + + /** + * Return a convenience writable object stored by this column vector. + * Supports keeping a TimestampWritable object without having to import that definition... + * @return + */ + public Writable getScratchWritable() { + return scratchWritable; + } + + /** + * Set the convenience writable object stored by this column vector + * @param scratchWritable + */ + public void setScratchWritable(Writable scratchWritable) { + this.scratchWritable = scratchWritable; + } + + @Override + public void stringifyValue(StringBuilder buffer, int row) { + if (isRepeating) { + row = 0; + } + if (noNulls || !isNull[row]) { + scratchPisaTimestamp.update(epochDay[row], nanoOfDay[row]); + buffer.append(scratchPisaTimestamp.toString()); + } else { + buffer.append("null"); + } + } +} \ No newline at end of file