Return-Path: X-Original-To: apmail-hive-commits-archive@www.apache.org Delivered-To: apmail-hive-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 0C14918E5E for ; Tue, 9 Feb 2016 19:16:04 +0000 (UTC) Received: (qmail 80467 invoked by uid 500); 9 Feb 2016 19:16:03 -0000 Delivered-To: apmail-hive-commits-archive@hive.apache.org Received: (qmail 80354 invoked by uid 500); 9 Feb 2016 19:16:03 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 80008 invoked by uid 99); 9 Feb 2016 19:16:03 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 09 Feb 2016 19:16:03 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id D9936E6995; Tue, 9 Feb 2016 19:16:02 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: mmccline@apache.org To: commits@hive.apache.org Date: Tue, 09 Feb 2016 19:16:08 -0000 Message-Id: <8fba91ff66ca4885a95eb760d66cbd59@git.apache.org> In-Reply-To: <1b93c65553a84757a493fb876b8fd2bb@git.apache.org> References: <1b93c65553a84757a493fb876b8fd2bb@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [07/15] hive git commit: HIVE-9862 Vectorized execution corrupts timestamp values (Matt McCline, reviewed by Jason Dere) http://git-wip-us.apache.org/repos/asf/hive/blob/a11a6ca6/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java index 0ec91b8..7e79e1e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java @@ -370,18 +370,18 @@ public class VectorizedRowBatchCtx { } else { lcv.fill(DateWritable.dateToDays((Date) value)); lcv.isNull[0] = false; - } + } } break; - + case TIMESTAMP: { - LongColumnVector lcv = (LongColumnVector) batch.cols[colIndex]; + TimestampColumnVector lcv = (TimestampColumnVector) batch.cols[colIndex]; if (value == null) { lcv.noNulls = false; lcv.isNull[0] = true; lcv.isRepeating = true; - } else { - lcv.fill(TimestampUtils.getTimeNanoSec((Timestamp) value)); + } else { + lcv.fill((Timestamp) value); lcv.isNull[0] = false; } } @@ -400,14 +400,14 @@ public class VectorizedRowBatchCtx { } case INTERVAL_DAY_TIME: { - LongColumnVector lcv = (LongColumnVector) batch.cols[colIndex]; + TimestampColumnVector tcv = (TimestampColumnVector) batch.cols[colIndex]; if (value == null) { - lcv.noNulls = false; - lcv.isNull[0] = true; - lcv.isRepeating = true; + tcv.noNulls = false; + tcv.isNull[0] = true; + tcv.isRepeating = true; } else { - lcv.fill(DateUtils.getIntervalDayTimeTotalNanos((HiveIntervalDayTime) value)); - lcv.isNull[0] = false; + tcv.fill(((HiveIntervalDayTime) value).pisaTimestampUpdate(tcv.useScratchPisaTimestamp())); + tcv.isNull[0] = false; } } http://git-wip-us.apache.org/repos/asf/hive/blob/a11a6ca6/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToTimestamp.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToTimestamp.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToTimestamp.java index a52cf19..2b0068d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToTimestamp.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToTimestamp.java @@ -18,20 +18,23 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.sql.Timestamp; + import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampUtils; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; /** * Type cast decimal to timestamp. The decimal value is interpreted * as NNNN.DDDDDDDDD where NNNN is a number of seconds and DDDDDDDDD * is a number of nano-seconds. */ -public class CastDecimalToTimestamp extends FuncDecimalToLong { +public class CastDecimalToTimestamp extends FuncDecimalToTimestamp { private static final long serialVersionUID = 1L; - private static transient HiveDecimal tenE9 = HiveDecimal.create(1000000000); - public CastDecimalToTimestamp(int inputColumn, int outputColumn) { super(inputColumn, outputColumn); } @@ -40,13 +43,8 @@ public class CastDecimalToTimestamp extends FuncDecimalToLong { } @Override - protected void func(LongColumnVector outV, DecimalColumnVector inV, int i) { - HiveDecimal result = inV.vector[i].getHiveDecimal().multiply(tenE9); - if (result == null) { - outV.noNulls = false; - outV.isNull[i] = true; - } else { - outV.vector[i] = result.longValue(); - } + protected void func(TimestampColumnVector outV, DecimalColumnVector inV, int i) { + Timestamp timestamp = TimestampWritable.decimalToTimestamp(inV.vector[i].getHiveDecimal()); + outV.set(i, timestamp); } } http://git-wip-us.apache.org/repos/asf/hive/blob/a11a6ca6/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java new file mode 100644 index 0000000..39823fe --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java @@ -0,0 +1,123 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.*; + +public class CastDoubleToTimestamp extends VectorExpression { + private static final long serialVersionUID = 1L; + + private int colNum; + private int outputColumn; + + public CastDoubleToTimestamp(int colNum, int outputColumn) { + this(); + this.colNum = colNum; + this.outputColumn = outputColumn; + } + + public CastDoubleToTimestamp() { + super(); + } + + private void setSecondsWithFractionalNanoseconds(TimestampColumnVector timestampColVector, + double[] vector, int elementNum) { + timestampColVector.setTimestampSecondsWithFractionalNanoseconds(elementNum, vector[elementNum]); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + this.evaluateChildren(batch); + } + + DoubleColumnVector inputColVector = (DoubleColumnVector) batch.cols[colNum]; + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector.noNulls; + int n = batch.size; + double[] vector = inputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + setSecondsWithFractionalNanoseconds(outputColVector, vector, 0); + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + outputColVector.isRepeating = true; + } else if (inputColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + setSecondsWithFractionalNanoseconds(outputColVector, vector, i); + } + } else { + for(int i = 0; i != n; i++) { + setSecondsWithFractionalNanoseconds(outputColVector, vector, i); + } + } + outputColVector.isRepeating = false; + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + setSecondsWithFractionalNanoseconds(outputColVector, vector, i); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + setSecondsWithFractionalNanoseconds(outputColVector, vector, i); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + outputColVector.isRepeating = false; + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(1) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("double")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/a11a6ca6/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDate.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDate.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDate.java index 32cefea..ceefd61 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDate.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDate.java @@ -63,62 +63,6 @@ public class CastLongToDate extends VectorExpression { } switch (inputTypes[0]) { - case TIMESTAMP: - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; - date.setTime(inV.vector[0] / 1000000); - outV.vector[0] = DateWritable.dateToDays(date); - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - date.setTime(inV.vector[i] / 1000000); - outV.vector[i] = DateWritable.dateToDays(date); - } - outV.isRepeating = false; - } else { - for(int i = 0; i != n; i++) { - date.setTime(inV.vector[i] / 1000000); - outV.vector[i] = DateWritable.dateToDays(date); - } - outV.isRepeating = false; - } - } else { - - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. - outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - date.setTime(inV.vector[0] / 1000000); - outV.vector[0] = DateWritable.dateToDays(date); - } - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outV.isNull[i] = inV.isNull[i]; - if (!inV.isNull[i]) { - date.setTime(inV.vector[i] / 1000000); - outV.vector[i] = DateWritable.dateToDays(date); - } - } - outV.isRepeating = false; - } else { - System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); - for(int i = 0; i != n; i++) { - if (!inV.isNull[i]) { - date.setTime(inV.vector[i] / 1000000); - outV.vector[i] = DateWritable.dateToDays(date); - } - } - outV.isRepeating = false; - } - } - break; - case DATE: inV.copySelected(batch.selectedInUse, batch.selected, batch.size, outV); break; @@ -155,7 +99,7 @@ public class CastLongToDate extends VectorExpression { b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) .setNumArguments(1) .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.DATETIME_FAMILY) + VectorExpressionDescriptor.ArgumentType.DATE) .setInputExpressionTypes( VectorExpressionDescriptor.InputExpressionType.COLUMN); return b.build(); http://git-wip-us.apache.org/repos/asf/hive/blob/a11a6ca6/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java new file mode 100644 index 0000000..d344d4d --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java @@ -0,0 +1,124 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +public class CastLongToTimestamp extends VectorExpression { + private static final long serialVersionUID = 1L; + + private int colNum; + private int outputColumn; + + public CastLongToTimestamp(int colNum, int outputColumn) { + this(); + this.colNum = colNum; + this.outputColumn = outputColumn; + } + + public CastLongToTimestamp() { + super(); + } + + private void setSeconds(TimestampColumnVector timestampColVector, long[] vector, int elementNum) { + timestampColVector.setTimestampSeconds(elementNum, vector[elementNum]); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + this.evaluateChildren(batch); + } + + LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector.noNulls; + int n = batch.size; + long[] vector = inputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + setSeconds(outputColVector, vector, 0); + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + outputColVector.isRepeating = true; + } else if (inputColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + setSeconds(outputColVector, vector, i); + } + } else { + for(int i = 0; i != n; i++) { + setSeconds(outputColVector, vector, i); + } + } + outputColVector.isRepeating = false; + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + setSeconds(outputColVector, vector, i); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + setSeconds(outputColVector, vector, i); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + outputColVector.isRepeating = false; + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(1) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("long")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/a11a6ca6/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java new file mode 100644 index 0000000..a0c947f --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java @@ -0,0 +1,120 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +public class CastMillisecondsLongToTimestamp extends VectorExpression { + private static final long serialVersionUID = 1L; + + private int colNum; + private int outputColumn; + + public CastMillisecondsLongToTimestamp(int colNum, int outputColumn) { + this(); + this.colNum = colNum; + this.outputColumn = outputColumn; + } + + public CastMillisecondsLongToTimestamp() { + super(); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + this.evaluateChildren(batch); + } + + LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector.noNulls; + int n = batch.size; + long[] vector = inputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + outputColVector.setTimestampMilliseconds(0, vector[0]); + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + outputColVector.isRepeating = true; + } else if (inputColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.setTimestampMilliseconds(i, vector[i]); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.setTimestampMilliseconds(i, vector[i]); + } + } + outputColVector.isRepeating = false; + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.setTimestampMilliseconds(i, vector[i]); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.setTimestampMilliseconds(i, vector[i]); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + outputColVector.isRepeating = false; + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(1) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("long")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/a11a6ca6/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java index 518d5d5..a3ddf9f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java @@ -21,6 +21,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.serde.serdeConstants; @@ -55,7 +56,7 @@ public class CastStringToIntervalDayTime extends VectorExpression { BytesColumnVector inV = (BytesColumnVector) batch.cols[inputColumn]; int[] sel = batch.selected; int n = batch.size; - LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn]; + TimestampColumnVector outV = (TimestampColumnVector) batch.cols[outputColumn]; if (n == 0) { @@ -112,13 +113,13 @@ public class CastStringToIntervalDayTime extends VectorExpression { } } - private void evaluate(LongColumnVector outV, BytesColumnVector inV, int i) { + private void evaluate(TimestampColumnVector outV, BytesColumnVector inV, int i) { try { HiveIntervalDayTime interval = HiveIntervalDayTime.valueOf( new String(inV.vector[i], inV.start[i], inV.length[i], "UTF-8")); - outV.vector[i] = DateUtils.getIntervalDayTimeTotalNanos(interval); + outV.setEpochSecondsAndSignedNanos(i, interval.getTotalSeconds(), interval.getNanos()); } catch (Exception e) { - outV.vector[i] = 1; + outV.setNullValue(i); outV.isNull[i] = true; outV.noNulls = false; } http://git-wip-us.apache.org/repos/asf/hive/blob/a11a6ca6/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java new file mode 100644 index 0000000..55b84b1 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java @@ -0,0 +1,138 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.MathExpr; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +public class CastTimestampToBoolean extends VectorExpression { + private static final long serialVersionUID = 1L; + + private int colNum; + private int outputColumn; + + public CastTimestampToBoolean(int colNum, int outputColumn) { + this(); + this.colNum = colNum; + this.outputColumn = outputColumn; + } + + public CastTimestampToBoolean() { + super(); + } + + private int toBool(TimestampColumnVector timestampColVector, int index) { + return (timestampColVector.getEpochDay(index) != 0 || + timestampColVector.getNanoOfDay(index) != 0) ? 1 : 0; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + this.evaluateChildren(batch); + } + + TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[colNum]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector.noNulls; + int n = batch.size; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + outputVector[0] = toBool(inputColVector, 0); + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + outputColVector.isRepeating = true; + } else if (inputColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = toBool(inputColVector, i); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = toBool(inputColVector, i); + } + } + outputColVector.isRepeating = false; + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = toBool(inputColVector, i); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = toBool(inputColVector, i); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + outputColVector.isRepeating = false; + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "long"; + } + + public int getColNum() { + return colNum; + } + + public void setColNum(int colNum) { + this.colNum = colNum; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(1) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("timestamp")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/a11a6ca6/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDate.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDate.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDate.java new file mode 100644 index 0000000..00790b9 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDate.java @@ -0,0 +1,49 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.serde2.io.DateWritable; + +/** + * To be used to cast timestamp to decimal. + */ +public class CastTimestampToDate extends FuncTimestampToLong { + + private static final long serialVersionUID = 1L; + + public CastTimestampToDate() { + super(); + this.outputType = "date"; + } + + public CastTimestampToDate(int inputColumn, int outputColumn) { + super(inputColumn, outputColumn); + this.outputType = "date"; + } + + @Override + protected void func(LongColumnVector outV, TimestampColumnVector inV, int i) { + + outV.vector[i] = DateWritable.millisToDays(inV.getTimestampMilliseconds(i)); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/a11a6ca6/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDecimal.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDecimal.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDecimal.java index 0aedddc..aec104e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDecimal.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDecimal.java @@ -20,12 +20,12 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; /** * To be used to cast timestamp to decimal. */ -public class CastTimestampToDecimal extends FuncLongToDecimal { +public class CastTimestampToDecimal extends FuncTimestampToDecimal { private static final long serialVersionUID = 1L; @@ -38,12 +38,12 @@ public class CastTimestampToDecimal extends FuncLongToDecimal { } @Override - protected void func(DecimalColumnVector outV, LongColumnVector inV, int i) { + protected void func(DecimalColumnVector outV, TimestampColumnVector inV, int i) { - // The resulting decimal value is 10e-9 * the input long value (i.e. seconds). - // - HiveDecimal result = HiveDecimal.create(inV.vector[i]); - result = result.scaleByPowerOfTen(-9); + // The BigDecimal class recommends not converting directly from double to BigDecimal, + // so we convert like the non-vectorized case and got through a string... + Double timestampDouble = inV.getTimestampSecondsWithFractionalNanos(i); + HiveDecimal result = HiveDecimal.create(timestampDouble.toString()); outV.set(i, result); } } http://git-wip-us.apache.org/repos/asf/hive/blob/a11a6ca6/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java new file mode 100644 index 0000000..f8737f9 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java @@ -0,0 +1,132 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +public class CastTimestampToDouble extends VectorExpression { + private static final long serialVersionUID = 1L; + + private int colNum; + private int outputColumn; + + public CastTimestampToDouble(int colNum, int outputColumn) { + this(); + this.colNum = colNum; + this.outputColumn = outputColumn; + } + + public CastTimestampToDouble() { + super(); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + this.evaluateChildren(batch); + } + + TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[colNum]; + DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector.noNulls; + int n = batch.size; + double[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + outputVector[0] = inputColVector.getTimestampSecondsWithFractionalNanos(0); + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + outputColVector.isRepeating = true; + } else if (inputColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = inputColVector.getTimestampSecondsWithFractionalNanos(i); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = inputColVector.getTimestampSecondsWithFractionalNanos(i); + } + } + outputColVector.isRepeating = false; + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = inputColVector.getTimestampSecondsWithFractionalNanos(i); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = inputColVector.getTimestampSecondsWithFractionalNanos(i); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + outputColVector.isRepeating = false; + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "double"; + } + + public int getColNum() { + return colNum; + } + + public void setColNum(int colNum) { + this.colNum = colNum; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(1) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("timestamp")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/a11a6ca6/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java new file mode 100644 index 0000000..4f53f5c --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java @@ -0,0 +1,133 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.MathExpr; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +public class CastTimestampToLong extends VectorExpression { + private static final long serialVersionUID = 1L; + + private int colNum; + private int outputColumn; + + public CastTimestampToLong(int colNum, int outputColumn) { + this(); + this.colNum = colNum; + this.outputColumn = outputColumn; + } + + public CastTimestampToLong() { + super(); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + this.evaluateChildren(batch); + } + + TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[colNum]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector.noNulls; + int n = batch.size; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + outputVector[0] = inputColVector.getEpochSeconds(0); + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + outputColVector.isRepeating = true; + } else if (inputColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = inputColVector.getEpochSeconds(i); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = inputColVector.getEpochSeconds(i); + } + } + outputColVector.isRepeating = false; + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = inputColVector.getEpochSeconds(i); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = inputColVector.getEpochSeconds(i); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + outputColVector.isRepeating = false; + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "long"; + } + + public int getColNum() { + return colNum; + } + + public void setColNum(int colNum) { + this.colNum = colNum; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(1) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("timestamp")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/a11a6ca6/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java index 8d75cf3..24ee9bc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java @@ -18,10 +18,19 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.sql.Timestamp; + import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.common.type.PisaTimestamp; import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hive.common.util.DateUtils; /** * Constant is represented as a vector with repeating values. @@ -30,21 +39,15 @@ public class ConstantVectorExpression extends VectorExpression { private static final long serialVersionUID = 1L; - private static enum Type { - LONG, - DOUBLE, - BYTES, - DECIMAL - } - private int outputColumn; protected long longValue = 0; private double doubleValue = 0; private byte[] bytesValue = null; private HiveDecimal decimalValue = null; + private PisaTimestamp timestampValue = null; private boolean isNullValue = false; - private Type type; + private ColumnVector.Type type; private int bytesValueLength = 0; public ConstantVectorExpression() { @@ -82,11 +85,22 @@ public class ConstantVectorExpression extends VectorExpression { setBytesValue(value.getValue().getBytes()); } - public ConstantVectorExpression(int outputColumn, HiveDecimal value) { - this(outputColumn, "decimal"); + // Include type name for precision/scale. + public ConstantVectorExpression(int outputColumn, HiveDecimal value, String typeName) { + this(outputColumn, typeName); setDecimalValue(value); } + public ConstantVectorExpression(int outputColumn, Timestamp value) { + this(outputColumn, "timestamp"); + setTimestampValue(value); + } + + public ConstantVectorExpression(int outputColumn, HiveIntervalDayTime value) { + this(outputColumn, "timestamp"); + setIntervalDayTimeValue(value); + } + /* * Support for null constant object */ @@ -140,6 +154,17 @@ public class ConstantVectorExpression extends VectorExpression { } } + private void evaluateTimestamp(VectorizedRowBatch vrg) { + TimestampColumnVector dcv = (TimestampColumnVector) vrg.cols[outputColumn]; + dcv.isRepeating = true; + dcv.noNulls = !isNullValue; + if (!isNullValue) { + dcv.set(0, timestampValue); + } else { + dcv.isNull[0] = true; + } + } + @Override public void evaluate(VectorizedRowBatch vrg) { switch (type) { @@ -155,6 +180,9 @@ public class ConstantVectorExpression extends VectorExpression { case DECIMAL: evaluateDecimal(vrg); break; + case TIMESTAMP: + evaluateTimestamp(vrg); + break; } } @@ -192,39 +220,37 @@ public class ConstantVectorExpression extends VectorExpression { this.decimalValue = decimalValue; } - public String getTypeString() { - return getOutputType(); + public HiveDecimal getDecimalValue() { + return decimalValue; } - public void setTypeString(String typeString) { - this.outputType = typeString; - if (VectorizationContext.isStringFamily(typeString)) { - this.type = Type.BYTES; - } else if (VectorizationContext.isFloatFamily(typeString)) { - this.type = Type.DOUBLE; - } else if (VectorizationContext.isDecimalFamily(typeString)){ - this.type = Type.DECIMAL; - } else { - // everything else that does not belong to string, double, decimal is treated as long. - this.type = Type.LONG; - } + public void setTimestampValue(Timestamp timestampValue) { + this.timestampValue = new PisaTimestamp(timestampValue); } - public void setOutputColumn(int outputColumn) { - this.outputColumn = outputColumn; + public void setIntervalDayTimeValue(HiveIntervalDayTime intervalDayTimeValue) { + this.timestampValue = intervalDayTimeValue.pisaTimestampUpdate(new PisaTimestamp()); + } + + + public PisaTimestamp getTimestampValue() { + return timestampValue; } - public Type getType() { - return type; + public String getTypeString() { + return getOutputType(); } - public void setType(Type type) { - this.type = type; + private void setTypeString(String typeString) { + this.outputType = typeString; + + String typeName = VectorizationContext.mapTypeNameSynonyms(outputType); + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); + this.type = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo); } - @Override - public void setOutputType(String type) { - setTypeString(type); + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; } @Override http://git-wip-us.apache.org/repos/asf/hive/blob/a11a6ca6/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateColumn.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateColumn.java new file mode 100644 index 0000000..8d2a186 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateColumn.java @@ -0,0 +1,187 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.util.DateTimeMath; +import org.apache.hadoop.hive.serde2.io.DateWritable; + +// A type date (LongColumnVector storing epoch days) minus a type date produces a +// type interval_day_time (TimestampColumnVector storing nanosecond interval in 2 longs). +public class DateColSubtractDateColumn extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum1; + private int colNum2; + private int outputColumn; + private PisaTimestamp scratchPisaTimestamp1; + private PisaTimestamp scratchPisaTimestamp2; + private DateTimeMath dtm = new DateTimeMath(); + + public DateColSubtractDateColumn(int colNum1, int colNum2, int outputColumn) { + this.colNum1 = colNum1; + this.colNum2 = colNum2; + this.outputColumn = outputColumn; + scratchPisaTimestamp1 = new PisaTimestamp(); + scratchPisaTimestamp2 = new PisaTimestamp(); + } + + public DateColSubtractDateColumn() { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #1 is type date (epochDays). + LongColumnVector inputColVector1 = (LongColumnVector) batch.cols[colNum1]; + + // Input #2 is type date (epochDays). + LongColumnVector inputColVector2 = (LongColumnVector) batch.cols[colNum2]; + + // Output is type interval_day_time. + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + int n = batch.size; + long[] vector1 = inputColVector1.vector; + long[] vector2 = inputColVector2.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.isRepeating = + inputColVector1.isRepeating && inputColVector2.isRepeating + || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] + || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; + + // Handle nulls first + NullUtil.propagateNullsColCol( + inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); + + /* Disregard nulls for processing. In other words, + * the arithmetic operation is performed even if one or + * more inputs are null. This is to improve speed by avoiding + * conditional checks in the inner loop. + */ + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + outputColVector.subtract( + scratchPisaTimestamp1.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector1[0])), + scratchPisaTimestamp2.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector2[0])), + 0); + } else if (inputColVector1.isRepeating) { + if (batch.selectedInUse) { + scratchPisaTimestamp1.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector1[0])); + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.subtract( + scratchPisaTimestamp1, + scratchPisaTimestamp2.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector2[i])), + i); + } + } else { + scratchPisaTimestamp1.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector1[0])); + for(int i = 0; i != n; i++) { + outputColVector.subtract( + scratchPisaTimestamp1, + scratchPisaTimestamp2.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector2[i])), + i); + } + } + } else if (inputColVector2.isRepeating) { + if (batch.selectedInUse) { + scratchPisaTimestamp2.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector2[0])); + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.subtract( + scratchPisaTimestamp1.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector1[i])), + scratchPisaTimestamp2, + i); + } + } else { + scratchPisaTimestamp2.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector2[0])); + for(int i = 0; i != n; i++) { + outputColVector.subtract( + scratchPisaTimestamp1.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector1[i])), + scratchPisaTimestamp2, + i); + } + } + } else { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.subtract( + scratchPisaTimestamp1.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector1[i])), + scratchPisaTimestamp2.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector2[i])), + i); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.subtract( + scratchPisaTimestamp1.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector1[i])), + scratchPisaTimestamp2.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector2[i])), + i); + } + } + } + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and + * NaN for double. This is to prevent possible later zero-divide errors + * in complex arithmetic expressions like col2 / (col1 - 1) + * in the case when some col1 entries are null. + */ + NullUtil.setNullDataEntriesTimestamp(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("date"), + VectorExpressionDescriptor.ArgumentType.getType("date")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} + http://git-wip-us.apache.org/repos/asf/hive/blob/a11a6ca6/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateScalar.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateScalar.java new file mode 100644 index 0000000..3ea9331 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateScalar.java @@ -0,0 +1,150 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.util.DateTimeMath; +import org.apache.hadoop.hive.serde2.io.DateWritable; + +// A type date (LongColumnVector storing epoch days) minus a type date produces a +// type interval_day_time (TimestampColumnVector storing nanosecond interval in 2 longs). +public class DateColSubtractDateScalar extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private PisaTimestamp value; + private int outputColumn; + private PisaTimestamp scratchPisaTimestamp; + private DateTimeMath dtm = new DateTimeMath(); + + public DateColSubtractDateScalar(int colNum, long value, int outputColumn) { + this.colNum = colNum; + this.value = new PisaTimestamp().updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) value)); + this.outputColumn = outputColumn; + scratchPisaTimestamp = new PisaTimestamp(); + } + + public DateColSubtractDateScalar() { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #1 is type date (epochDays). + LongColumnVector inputColVector1 = (LongColumnVector) batch.cols[colNum]; + + // Output is type Timestamp. + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector1.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector1.noNulls; + outputColVector.isRepeating = inputColVector1.isRepeating; + int n = batch.size; + long[] vector1 = inputColVector1.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector1.isRepeating) { + outputColVector.subtract( + scratchPisaTimestamp.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector1[0])), + value, + 0); + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + } else if (inputColVector1.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.subtract( + scratchPisaTimestamp.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector1[0])), + value, + i); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.subtract( + scratchPisaTimestamp.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector1[0])), + value, + i); + } + } + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.subtract( + scratchPisaTimestamp.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector1[0])), + value, + i); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.subtract( + scratchPisaTimestamp.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector1[0])), + value, + i); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("date"), + VectorExpressionDescriptor.ArgumentType.getType("date")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/a11a6ca6/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateScalarSubtractDateColumn.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateScalarSubtractDateColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateScalarSubtractDateColumn.java new file mode 100644 index 0000000..a8cabb8 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateScalarSubtractDateColumn.java @@ -0,0 +1,154 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.util.DateTimeMath; +import org.apache.hadoop.hive.serde2.io.DateWritable; + +// A type date (LongColumnVector storing epoch days) minus a type date produces a +// type interval_day_time (TimestampColumnVector storing nanosecond interval in 2 longs). +public class DateScalarSubtractDateColumn extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private PisaTimestamp value; + private int outputColumn; + private PisaTimestamp scratchPisaTimestamp; + private DateTimeMath dtm = new DateTimeMath(); + + public DateScalarSubtractDateColumn(long value, int colNum, int outputColumn) { + this.colNum = colNum; + this.value = new PisaTimestamp().updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) value)); + this.outputColumn = outputColumn; + scratchPisaTimestamp = new PisaTimestamp(); + } + + public DateScalarSubtractDateColumn() { + } + + @Override + /** + * Method to evaluate scalar-column operation in vectorized fashion. + * + * @batch a package of rows with each column stored in a vector + */ + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #2 is type date (epochDays). + LongColumnVector inputColVector2 = (LongColumnVector) batch.cols[colNum]; + + // Output is type Timestamp. + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector2.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector2.noNulls; + outputColVector.isRepeating = inputColVector2.isRepeating; + int n = batch.size; + + long[] vector2 = inputColVector2.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector2.isRepeating) { + outputColVector.subtract( + value, + scratchPisaTimestamp.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector2[0])), + 0); + + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + } else if (inputColVector2.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.subtract( + value, + scratchPisaTimestamp.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector2[i])), + i); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.subtract( + value, + scratchPisaTimestamp.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector2[i])), + i); + } + } + } else { /* there are nulls */ + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.subtract( + value, + scratchPisaTimestamp.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector2[i])), + i); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.subtract( + value, + scratchPisaTimestamp.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector2[i])), + i); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("date"), + VectorExpressionDescriptor.ArgumentType.getType("date")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/a11a6ca6/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterTimestampColumnInList.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterTimestampColumnInList.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterTimestampColumnInList.java new file mode 100644 index 0000000..42e4984 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterTimestampColumnInList.java @@ -0,0 +1,181 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.sql.Timestamp; +import java.util.HashSet; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Descriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * Evaluate IN filter on a batch for a vector of timestamps. + */ +public class FilterTimestampColumnInList extends VectorExpression implements ITimestampInExpr { + private static final long serialVersionUID = 1L; + private int inputCol; + private Timestamp[] inListValues; + + // The set object containing the IN list. + private transient HashSet inSet; + + public FilterTimestampColumnInList() { + super(); + inSet = null; + } + + /** + * After construction you must call setInListValues() to add the values to the IN set. + */ + public FilterTimestampColumnInList(int colNum) { + this.inputCol = colNum; + inSet = null; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + if (inSet == null) { + inSet = new HashSet(inListValues.length); + for (Timestamp val : inListValues) { + inSet.add(new PisaTimestamp(val)); + } + } + + TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[inputCol]; + int[] sel = batch.selected; + boolean[] nullPos = inputColVector.isNull; + int n = batch.size; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + PisaTimestamp scratchTimestamp = new PisaTimestamp(); + + if (inputColVector.noNulls) { + if (inputColVector.isRepeating) { + + // All must be selected otherwise size would be zero + // Repeating property will not change. + + inputColVector.pisaTimestampUpdate(scratchTimestamp, 0); + if (!(inSet.contains(scratchTimestamp))) { + //Entire batch is filtered out. + batch.size = 0; + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + inputColVector.pisaTimestampUpdate(scratchTimestamp, i); + if (inSet.contains(scratchTimestamp)) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + inputColVector.pisaTimestampUpdate(scratchTimestamp, i); + if (inSet.contains(scratchTimestamp)) { + sel[newSize++] = i; + } + } + if (newSize < n) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { + if (inputColVector.isRepeating) { + + //All must be selected otherwise size would be zero + //Repeating property will not change. + if (!nullPos[0]) { + inputColVector.pisaTimestampUpdate(scratchTimestamp, 0); + if (!inSet.contains(scratchTimestamp)) { + + //Entire batch is filtered out. + batch.size = 0; + } + } else { + batch.size = 0; + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos[i]) { + inputColVector.pisaTimestampUpdate(scratchTimestamp, i); + if (inSet.contains(scratchTimestamp)) { + sel[newSize++] = i; + } + } + } + + // Change the selected vector + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos[i]) { + inputColVector.pisaTimestampUpdate(scratchTimestamp, i); + if (inSet.contains(scratchTimestamp)) { + sel[newSize++] = i; + } + } + } + if (newSize < n) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + } + + + @Override + public String getOutputType() { + return "boolean"; + } + + @Override + public int getOutputColumn() { + return -1; + } + + @Override + public Descriptor getDescriptor() { + + // This VectorExpression (IN) is a special case, so don't return a descriptor. + return null; + } + + public void setInListValues(Timestamp[] a) { + this.inListValues = a; + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/a11a6ca6/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToTimestamp.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToTimestamp.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToTimestamp.java new file mode 100644 index 0000000..561c152 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToTimestamp.java @@ -0,0 +1,135 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * This is a superclass for unary decimal functions and expressions returning timestamps that + * operate directly on the input and set the output. + */ +public abstract class FuncDecimalToTimestamp extends VectorExpression { + private static final long serialVersionUID = 1L; + int inputColumn; + int outputColumn; + + public FuncDecimalToTimestamp(int inputColumn, int outputColumn) { + this.inputColumn = inputColumn; + this.outputColumn = outputColumn; + } + + public FuncDecimalToTimestamp() { + super(); + } + + abstract protected void func(TimestampColumnVector outV, DecimalColumnVector inV, int i); + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + DecimalColumnVector inV = (DecimalColumnVector) batch.cols[inputColumn]; + int[] sel = batch.selected; + int n = batch.size; + TimestampColumnVector outV = (TimestampColumnVector) batch.cols[outputColumn]; + + if (n == 0) { + + // Nothing to do + return; + } + + if (inV.noNulls) { + outV.noNulls = true; + if (inV.isRepeating) { + outV.isRepeating = true; + func(outV, inV, 0); + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + func(outV, inV, i); + } + outV.isRepeating = false; + } else { + for(int i = 0; i != n; i++) { + func(outV, inV, i); + } + outV.isRepeating = false; + } + } else { + + // Handle case with nulls. Don't do function if the value is null, + // because the data may be undefined for a null value. + outV.noNulls = false; + if (inV.isRepeating) { + outV.isRepeating = true; + outV.isNull[0] = inV.isNull[0]; + if (!inV.isNull[0]) { + func(outV, inV, 0); + } + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outV.isNull[i] = inV.isNull[i]; + if (!inV.isNull[i]) { + func(outV, inV, i); + } + } + outV.isRepeating = false; + } else { + System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); + for(int i = 0; i != n; i++) { + if (!inV.isNull[i]) { + func(outV, inV, i); + } + } + outV.isRepeating = false; + } + } + } + + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(1) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.DECIMAL) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN); + return b.build(); + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/a11a6ca6/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToDecimal.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToDecimal.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToDecimal.java new file mode 100644 index 0000000..774551c --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToDecimal.java @@ -0,0 +1,144 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * This is a superclass for unary timestamp functions and expressions returning decimals that + * operate directly on the input and set the output. + */ +public abstract class FuncTimestampToDecimal extends VectorExpression { + private static final long serialVersionUID = 1L; + int inputColumn; + int outputColumn; + + public FuncTimestampToDecimal(int inputColumn, int outputColumn) { + this.inputColumn = inputColumn; + this.outputColumn = outputColumn; + this.outputType = "decimal"; + } + + public FuncTimestampToDecimal() { + super(); + this.outputType = "decimal"; + } + + abstract protected void func(DecimalColumnVector outV, TimestampColumnVector inV, int i); + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + TimestampColumnVector inV = (TimestampColumnVector) batch.cols[inputColumn]; + int[] sel = batch.selected; + int n = batch.size; + DecimalColumnVector outV = (DecimalColumnVector) batch.cols[outputColumn]; + + if (n == 0) { + + // Nothing to do + return; + } + + if (inV.noNulls) { + outV.noNulls = true; + if (inV.isRepeating) { + outV.isRepeating = true; + func(outV, inV, 0); + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + func(outV, inV, i); + } + outV.isRepeating = false; + } else { + for(int i = 0; i != n; i++) { + func(outV, inV, i); + } + outV.isRepeating = false; + } + } else { + + // Handle case with nulls. Don't do function if the value is null, + // because the data may be undefined for a null value. + outV.noNulls = false; + if (inV.isRepeating) { + outV.isRepeating = true; + outV.isNull[0] = inV.isNull[0]; + if (!inV.isNull[0]) { + func(outV, inV, 0); + } + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outV.isNull[i] = inV.isNull[i]; + if (!inV.isNull[i]) { + func(outV, inV, i); + } + } + outV.isRepeating = false; + } else { + System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); + for(int i = 0; i != n; i++) { + if (!inV.isNull[i]) { + func(outV, inV, i); + } + } + outV.isRepeating = false; + } + } + } + + + @Override + public int getOutputColumn() { + return outputColumn; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + public int getInputColumn() { + return inputColumn; + } + + public void setInputColumn(int inputColumn) { + this.inputColumn = inputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(1) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.TIMESTAMP) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN); + return b.build(); + } +} \ No newline at end of file