From commits-return-31755-archive-asf-public=cust-asf.ponee.io@hive.apache.org Fri Feb 16 16:52:24 2018 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx-eu-01.ponee.io (Postfix) with SMTP id 616F31807AA for ; Fri, 16 Feb 2018 16:52:19 +0100 (CET) Received: (qmail 32583 invoked by uid 500); 16 Feb 2018 15:52:16 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 31760 invoked by uid 99); 16 Feb 2018 15:52:16 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 16 Feb 2018 15:52:16 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 9209DE2F42; Fri, 16 Feb 2018 15:52:15 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: mmccline@apache.org To: commits@hive.apache.org Date: Fri, 16 Feb 2018 15:52:41 -0000 Message-Id: In-Reply-To: <6e59fe8488f04b3984d2c68fc19487b2@git.apache.org> References: <6e59fe8488f04b3984d2c68fc19487b2@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [27/32] hive git commit: HIVE-18622: Vectorization: IF Statements, Comparisons, and more do not handle NULLs correctly (Matt McCline, reviewed by Sergey Shelukhin, Deepak Jaiswal, Vihang Karajgaonkar) http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java index a2e4a52..8326002 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -52,9 +54,9 @@ public class CastDecimalToDecimal extends VectorExpression { * Convert input decimal value to a decimal with a possibly different precision and scale, * at position i in the respective vectors. */ - protected void convert(DecimalColumnVector outV, DecimalColumnVector inV, int i) { + protected void convert(DecimalColumnVector outputColVector, DecimalColumnVector inputColVector, int i) { // The set routine enforces precision and scale. - outV.vector[i].set(inV.vector[i]); + outputColVector.vector[i].set(inputColVector.vector[i]); } /** @@ -70,10 +72,12 @@ public class CastDecimalToDecimal extends VectorExpression { super.evaluateChildren(batch); } - DecimalColumnVector inV = (DecimalColumnVector) batch.cols[inputColumn]; + DecimalColumnVector inputColVector = (DecimalColumnVector) batch.cols[inputColumn]; int[] sel = batch.selected; int n = batch.size; - DecimalColumnVector outV = (DecimalColumnVector) batch.cols[outputColumnNum]; + DecimalColumnVector outputColVector = (DecimalColumnVector) batch.cols[outputColumnNum]; + + boolean[] outputIsNull = outputColVector.isNull; if (n == 0) { @@ -81,51 +85,82 @@ public class CastDecimalToDecimal extends VectorExpression { return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; - convert(outV, inV, 0); - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - convert(outV, inV, i); + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + outputColVector.isRepeating = true; + if (inputColVector.noNulls || !inputColVector.isNull[0]) { + // Set isNull before call in case it changes it mind. + outputColVector.isNull[0] = false; + convert(outputColVector, inputColVector, 0); + } else { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + } + return; + } + + if (inputColVector.noNulls) { + + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + convert(outputColVector, inputColVector, i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + convert(outputColVector, inputColVector, i); + } } - outV.isRepeating = false; } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { - convert(outV, inV, i); + convert(outputColVector, inputColVector, i); } - outV.isRepeating = false; } - } else { - - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. - outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - convert(outV, inV, 0); - } - } else if (batch.selectedInUse) { + } else /* there are NULLs in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outV.isNull[i] = inV.isNull[i]; - if (!inV.isNull[i]) { - convert(outV, inV, i); + if (!inputColVector.isNull[i]) { + // Set isNull before call in case it changes it mind. + outputColVector.isNull[i] = false; + convert(outputColVector, inputColVector, i); + } else { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; } } - outV.isRepeating = false; } else { - System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { - if (!inV.isNull[i]) { - convert(outV, inV, i); + if (!inputColVector.isNull[i]) { + // Set isNull before call in case it changes it mind. + outputColVector.isNull[i] = false; + convert(outputColVector, inputColVector, i); + } else { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; } } - outV.isRepeating = false; } } } http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToLong.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToLong.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToLong.java index aa529ed..7ad0493 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToLong.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToLong.java @@ -64,6 +64,7 @@ public class CastDecimalToLong extends FuncDecimalToLong { outV.noNulls = false; return; } + outV.isNull[i] = false; switch (integerPrimitiveCategory) { case BYTE: outV.vector[i] = decWritable.byteValue(); http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java index 08abf27..5494579 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -56,7 +58,6 @@ public class CastDoubleToTimestamp extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; int n = batch.size; double[] vector = inputColVector.vector; @@ -65,39 +66,82 @@ public class CastDoubleToTimestamp extends VectorExpression { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - setDouble(outputColVector, vector, 0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + setDouble(outputColVector, vector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - setDouble(outputColVector, vector, i); + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + setDouble(outputColVector, vector, i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + setDouble(outputColVector, vector, i); + } } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { setDouble(outputColVector, vector, i); } } - outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are NULLs in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - setDouble(outputColVector, vector, i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + setDouble(outputColVector, vector, i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - setDouble(outputColVector, vector, i); + if (!inputIsNull[i]) { + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + setDouble(outputColVector, vector, i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } } http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java index df25eac..a3c4212 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.serde2.io.TimestampWritable; @@ -56,7 +58,6 @@ public class CastLongToTimestamp extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; int n = batch.size; long[] vector = inputColVector.vector; @@ -65,39 +66,79 @@ public class CastLongToTimestamp extends VectorExpression { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - setSeconds(outputColVector, vector, 0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + setSeconds(outputColVector, vector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { + return; + } + + if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - setSeconds(outputColVector, vector, i); + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + setSeconds(outputColVector, vector, i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + setSeconds(outputColVector, vector, i); + } } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { setSeconds(outputColVector, vector, i); } } - outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are NULLs in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - setSeconds(outputColVector, vector, i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + setSeconds(outputColVector, vector, i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { + // Set isNull before call in case it changes it mind. + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - setSeconds(outputColVector, vector, i); + if (!inputIsNull[i]) { + setSeconds(outputColVector, vector, i); + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } } http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java index 42c34c8..6a29c62 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.serde2.io.TimestampWritable; @@ -54,7 +56,6 @@ public class CastMillisecondsLongToTimestamp extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; int n = batch.size; long[] vector = inputColVector.vector; @@ -63,39 +64,84 @@ public class CastMillisecondsLongToTimestamp extends VectorExpression { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - setMilliseconds(outputColVector, vector, 0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + setMilliseconds(outputColVector, vector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { + return; + } + + if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - setMilliseconds(outputColVector, vector, i); + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + setMilliseconds(outputColVector, vector, i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + setMilliseconds(outputColVector, vector, i); + } } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { setMilliseconds(outputColVector, vector, i); } } - outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are NULLs in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - setMilliseconds(outputColVector, vector, i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + setMilliseconds(outputColVector, vector, i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { + // Set isNull before calls in case they change their mind. + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - setMilliseconds(outputColVector, vector, i); + if (!inputIsNull[i]) { + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + setMilliseconds(outputColVector, vector, i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } } http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java index 34269da..b55712a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hive.common.util.DateParser; import java.nio.charset.StandardCharsets; +import java.util.Arrays; /** * Casts a string vector to a date vector. @@ -62,7 +63,10 @@ public class CastStringToDate extends VectorExpression { BytesColumnVector inV = (BytesColumnVector) batch.cols[inputColumn]; int[] sel = batch.selected; int n = batch.size; - LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; + + boolean[] inputIsNull = inV.isNull; + boolean[] outputIsNull = outputColVector.isNull; if (n == 0) { @@ -70,65 +74,94 @@ public class CastStringToDate extends VectorExpression { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inV.isRepeating) { + if (inV.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + evaluate(outputColVector, inV, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; - evaluate(outV, inV, 0); - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - evaluate(outV, inV, i); + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + evaluate(outputColVector, inV, i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + evaluate(outputColVector, inV, i); + } } - outV.isRepeating = false; } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { - evaluate(outV, inV, i); + evaluate(outputColVector, inV, i); } - outV.isRepeating = false; } - } else { - - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. - outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - evaluate(outV, inV, 0); - } - } else if (batch.selectedInUse) { + } else /* there are NULLs in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outV.isNull[i] = inV.isNull[i]; - if (!inV.isNull[i]) { - evaluate(outV, inV, i); + if (!inputIsNull[i]) { + // Set isNull before call in case it changes it mind. + outputColVector.isNull[i] = false; + evaluate(outputColVector, inV, i); + } else { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; } } - outV.isRepeating = false; } else { - System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { - if (!inV.isNull[i]) { - evaluate(outV, inV, i); + if (!inputIsNull[i]) { + // Set isNull before call in case it changes it mind. + outputColVector.isNull[i] = false; + evaluate(outputColVector, inV, i); + } else { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; } } - outV.isRepeating = false; } } } - private void evaluate(LongColumnVector outV, BytesColumnVector inV, int i) { + private void evaluate(LongColumnVector outputColVector, BytesColumnVector inV, int i) { String dateString = new String(inV.vector[i], inV.start[i], inV.length[i], StandardCharsets.UTF_8); if (dateParser.parseDate(dateString, sqlDate)) { - outV.vector[i] = DateWritable.dateToDays(sqlDate); + outputColVector.vector[i] = DateWritable.dateToDays(sqlDate); return; } - outV.vector[i] = 1; - outV.isNull[i] = true; - outV.noNulls = false; + outputColVector.vector[i] = 1; + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; } @Override http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDecimal.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDecimal.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDecimal.java index 41443c5..cbefa80 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDecimal.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDecimal.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; @@ -51,7 +53,7 @@ public class CastStringToDecimal extends VectorExpression { /** * Convert input string to a decimal, at position i in the respective vectors. */ - protected void func(DecimalColumnVector outV, BytesColumnVector inV, int i) { + protected void func(DecimalColumnVector outputColVector, BytesColumnVector inputColVector, int i) { String s; try { @@ -59,13 +61,13 @@ public class CastStringToDecimal extends VectorExpression { * e.g. by converting to decimal from the input bytes directly without * making a new string. */ - s = new String(inV.vector[i], inV.start[i], inV.length[i], "UTF-8"); - outV.vector[i].set(HiveDecimal.create(s)); + s = new String(inputColVector.vector[i], inputColVector.start[i], inputColVector.length[i], "UTF-8"); + outputColVector.vector[i].set(HiveDecimal.create(s)); } catch (Exception e) { // for any exception in conversion to decimal, produce NULL - outV.noNulls = false; - outV.isNull[i] = true; + outputColVector.noNulls = false; + outputColVector.isNull[i] = true; } } @@ -76,10 +78,13 @@ public class CastStringToDecimal extends VectorExpression { super.evaluateChildren(batch); } - BytesColumnVector inV = (BytesColumnVector) batch.cols[inputColumn]; + BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[inputColumn]; int[] sel = batch.selected; int n = batch.size; - DecimalColumnVector outV = (DecimalColumnVector) batch.cols[outputColumnNum]; + DecimalColumnVector outputColVector = (DecimalColumnVector) batch.cols[outputColumnNum]; + + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; if (n == 0) { @@ -87,51 +92,82 @@ public class CastStringToDecimal extends VectorExpression { return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; - func(outV, inV, 0); - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - func(outV, inV, i); + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + func(outputColVector, inputColVector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + func(outputColVector, inputColVector, i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + func(outputColVector, inputColVector, i); + } } - outV.isRepeating = false; } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { - func(outV, inV, i); + func(outputColVector, inputColVector, i); } - outV.isRepeating = false; } - } else { - - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. - outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - func(outV, inV, 0); - } - } else if (batch.selectedInUse) { + } else /* there are NULLs in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outV.isNull[i] = inV.isNull[i]; - if (!inV.isNull[i]) { - func(outV, inV, i); + if (!inputColVector.isNull[i]) { + // Set isNull before call in case it changes it mind. + outputColVector.isNull[i] = false; + func(outputColVector, inputColVector, i); + } else { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; } } - outV.isRepeating = false; } else { - System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); + System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n); for(int i = 0; i != n; i++) { - if (!inV.isNull[i]) { - func(outV, inV, i); + if (!inputColVector.isNull[i]) { + // Set isNull before call in case it changes it mind. + outputColVector.isNull[i] = false; + func(outputColVector, inputColVector, i); + } else { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; } } - outV.isRepeating = false; } } } http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDouble.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDouble.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDouble.java index 3ea1e8c..9ad442a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDouble.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDouble.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -51,25 +53,25 @@ public class CastStringToDouble extends VectorExpression { /** * Convert input string to a double, at position i in the respective vectors. */ - protected void func(DoubleColumnVector outV, BytesColumnVector inV, int batchIndex) { + protected void func(DoubleColumnVector outputColVector, BytesColumnVector inputColVector, int batchIndex) { - byte[] bytes = inV.vector[batchIndex]; - final int start = inV.start[batchIndex]; - final int length = inV.length[batchIndex]; + byte[] bytes = inputColVector.vector[batchIndex]; + final int start = inputColVector.start[batchIndex]; + final int length = inputColVector.length[batchIndex]; try { if (!LazyUtils.isNumberMaybe(bytes, start, length)) { - outV.noNulls = false; - outV.isNull[batchIndex] = true; - outV.vector[batchIndex] = DoubleColumnVector.NULL_VALUE; + outputColVector.noNulls = false; + outputColVector.isNull[batchIndex] = true; + outputColVector.vector[batchIndex] = DoubleColumnVector.NULL_VALUE; return; } - outV.vector[batchIndex] = StringToDouble.strtod(bytes, start, length); + outputColVector.vector[batchIndex] = StringToDouble.strtod(bytes, start, length); } catch (Exception e) { // for any exception in conversion to integer, produce NULL - outV.noNulls = false; - outV.isNull[batchIndex] = true; - outV.vector[batchIndex] = DoubleColumnVector.NULL_VALUE; + outputColVector.noNulls = false; + outputColVector.isNull[batchIndex] = true; + outputColVector.vector[batchIndex] = DoubleColumnVector.NULL_VALUE; } } @@ -80,10 +82,13 @@ public class CastStringToDouble extends VectorExpression { super.evaluateChildren(batch); } - BytesColumnVector inV = (BytesColumnVector) batch.cols[inputColumn]; + BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[inputColumn]; int[] sel = batch.selected; int n = batch.size; - DoubleColumnVector outV = (DoubleColumnVector) batch.cols[outputColumnNum]; + DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumnNum]; + + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; if (n == 0) { @@ -91,51 +96,82 @@ public class CastStringToDouble extends VectorExpression { return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; - func(outV, inV, 0); - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - func(outV, inV, i); + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + func(outputColVector, inputColVector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + func(outputColVector, inputColVector, i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + func(outputColVector, inputColVector, i); + } } - outV.isRepeating = false; } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { - func(outV, inV, i); + func(outputColVector, inputColVector, i); } - outV.isRepeating = false; } - } else { - - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. - outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - func(outV, inV, 0); - } - } else if (batch.selectedInUse) { + } else /* there are NULLs in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outV.isNull[i] = inV.isNull[i]; - if (!inV.isNull[i]) { - func(outV, inV, i); + if (!inputColVector.isNull[i]) { + // Set isNull before call in case it changes it mind. + outputColVector.isNull[i] = false; + func(outputColVector, inputColVector, i); + } else { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; } } - outV.isRepeating = false; } else { - System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); + System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n); for(int i = 0; i != n; i++) { - if (!inV.isNull[i]) { - func(outV, inV, i); + if (!inputColVector.isNull[i]) { + // Set isNull before call in case it changes it mind. + outputColVector.isNull[i] = false; + func(outputColVector, inputColVector, i); + } else { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; } } - outV.isRepeating = false; } } } http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java index feb0ab6..8a64dcf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -55,10 +57,13 @@ public class CastStringToIntervalDayTime extends VectorExpression { super.evaluateChildren(batch); } - BytesColumnVector inV = (BytesColumnVector) batch.cols[inputColumn]; + BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[inputColumn]; int[] sel = batch.selected; int n = batch.size; - IntervalDayTimeColumnVector outV = (IntervalDayTimeColumnVector) batch.cols[outputColumnNum]; + IntervalDayTimeColumnVector outputColVector = (IntervalDayTimeColumnVector) batch.cols[outputColumnNum]; + + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; if (n == 0) { @@ -66,64 +71,88 @@ public class CastStringToIntervalDayTime extends VectorExpression { return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; - evaluate(outV, inV, 0); - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - evaluate(outV, inV, i); + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + evaluate(outputColVector, inputColVector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + evaluate(outputColVector, inputColVector, i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + evaluate(outputColVector, inputColVector, i); + } } - outV.isRepeating = false; } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { - evaluate(outV, inV, i); + evaluate(outputColVector, inputColVector, i); } - outV.isRepeating = false; } - } else { - - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. - outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - evaluate(outV, inV, 0); - } - } else if (batch.selectedInUse) { + } else /* there are NULLs in the inputColVector */ { + + // Carefully handle NULLs... + + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outV.isNull[i] = inV.isNull[i]; - if (!inV.isNull[i]) { - evaluate(outV, inV, i); + // Set isNull before call in case it changes it mind. + outputColVector.isNull[i] = inputColVector.isNull[i]; + if (!inputColVector.isNull[i]) { + evaluate(outputColVector, inputColVector, i); } } - outV.isRepeating = false; } else { - System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); + // Set isNull before calls in case they change their mind. + System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n); for(int i = 0; i != n; i++) { - if (!inV.isNull[i]) { - evaluate(outV, inV, i); + if (!inputColVector.isNull[i]) { + evaluate(outputColVector, inputColVector, i); } } - outV.isRepeating = false; } } } - private void evaluate(IntervalDayTimeColumnVector outV, BytesColumnVector inV, int i) { + private void evaluate(IntervalDayTimeColumnVector outputColVector, BytesColumnVector inputColVector, int i) { try { HiveIntervalDayTime interval = HiveIntervalDayTime.valueOf( - new String(inV.vector[i], inV.start[i], inV.length[i], "UTF-8")); - outV.set(i, interval); + new String(inputColVector.vector[i], inputColVector.start[i], inputColVector.length[i], "UTF-8")); + outputColVector.set(i, interval); } catch (Exception e) { - outV.setNullValue(i); - outV.isNull[i] = true; - outV.noNulls = false; + outputColVector.setNullValue(i); + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; } } http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalYearMonth.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalYearMonth.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalYearMonth.java index 09dd4d9..598113f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalYearMonth.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalYearMonth.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -53,10 +55,13 @@ public class CastStringToIntervalYearMonth extends VectorExpression { super.evaluateChildren(batch); } - BytesColumnVector inV = (BytesColumnVector) batch.cols[inputColumn]; + BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[inputColumn]; int[] sel = batch.selected; int n = batch.size; - LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; + + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; if (n == 0) { @@ -64,64 +69,96 @@ public class CastStringToIntervalYearMonth extends VectorExpression { return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; - evaluate(outV, inV, 0); - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - evaluate(outV, inV, i); + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + evaluate(outputColVector, inputColVector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + evaluate(outputColVector, inputColVector, i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + evaluate(outputColVector, inputColVector, i); + } } - outV.isRepeating = false; } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { - evaluate(outV, inV, i); + evaluate(outputColVector, inputColVector, i); } - outV.isRepeating = false; } - } else { - - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. - outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - evaluate(outV, inV, 0); - } - } else if (batch.selectedInUse) { + } else /* there are NULLs in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outV.isNull[i] = inV.isNull[i]; - if (!inV.isNull[i]) { - evaluate(outV, inV, i); + if (!inputColVector.isNull[i]) { + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + evaluate(outputColVector, inputColVector, i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; } } - outV.isRepeating = false; } else { - System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); + // Set isNull before calls in case they change their mind. + System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n); for(int i = 0; i != n; i++) { - if (!inV.isNull[i]) { - evaluate(outV, inV, i); + if (!inputColVector.isNull[i]) { + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + evaluate(outputColVector, inputColVector, i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; } } - outV.isRepeating = false; } } } - private void evaluate(LongColumnVector outV, BytesColumnVector inV, int i) { + private void evaluate(LongColumnVector outputColVector, BytesColumnVector inputColVector, int i) { try { HiveIntervalYearMonth interval = HiveIntervalYearMonth.valueOf( - new String(inV.vector[i], inV.start[i], inV.length[i], "UTF-8")); - outV.vector[i] = interval.getTotalMonths(); + new String(inputColVector.vector[i], inputColVector.start[i], inputColVector.length[i], "UTF-8")); + outputColVector.vector[i] = interval.getTotalMonths(); } catch (Exception e) { - outV.vector[i] = 1; - outV.isNull[i] = true; - outV.noNulls = false; + outputColVector.vector[i] = 1; + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; } } http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToLong.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToLong.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToLong.java index a6cfee8..e3da77e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToLong.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToLong.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -70,11 +72,11 @@ public class CastStringToLong extends VectorExpression { /** * Convert input string to a long, at position i in the respective vectors. */ - protected void func(LongColumnVector outV, BytesColumnVector inV, int batchIndex) { + protected void func(LongColumnVector outputColVector, BytesColumnVector inputColVector, int batchIndex) { - byte[] bytes = inV.vector[batchIndex]; - final int start = inV.start[batchIndex]; - final int length = inV.length[batchIndex]; + byte[] bytes = inputColVector.vector[batchIndex]; + final int start = inputColVector.start[batchIndex]; + final int length = inputColVector.length[batchIndex]; try { switch (integerPrimitiveCategory) { @@ -90,8 +92,8 @@ public class CastStringToLong extends VectorExpression { booleanValue = true; } else { // No boolean value match for 4 char field. - outV.noNulls = false; - outV.isNull[batchIndex] = true; + outputColVector.noNulls = false; + outputColVector.isNull[batchIndex] = true; return; } } else if (length == 5) { @@ -103,8 +105,8 @@ public class CastStringToLong extends VectorExpression { booleanValue = false; } else { // No boolean value match for 5 char field. - outV.noNulls = false; - outV.isNull[batchIndex] = true; + outputColVector.noNulls = false; + outputColVector.isNull[batchIndex] = true; return; } } else if (length == 1) { @@ -115,50 +117,50 @@ public class CastStringToLong extends VectorExpression { booleanValue = false; } else { // No boolean value match for extended 1 char field. - outV.noNulls = false; - outV.isNull[batchIndex] = true; + outputColVector.noNulls = false; + outputColVector.isNull[batchIndex] = true; return; } } else { // No boolean value match for other lengths. - outV.noNulls = false; - outV.isNull[batchIndex] = true; + outputColVector.noNulls = false; + outputColVector.isNull[batchIndex] = true; return; } - outV.vector[batchIndex] = (booleanValue ? 1 : 0); + outputColVector.vector[batchIndex] = (booleanValue ? 1 : 0); } break; case BYTE: if (!LazyUtils.isNumberMaybe(bytes, start, length)) { - outV.noNulls = false; - outV.isNull[batchIndex] = true; + outputColVector.noNulls = false; + outputColVector.isNull[batchIndex] = true; return; } - outV.vector[batchIndex] = LazyByte.parseByte(bytes, start, length, 10); + outputColVector.vector[batchIndex] = LazyByte.parseByte(bytes, start, length, 10); break; case SHORT: if (!LazyUtils.isNumberMaybe(bytes, start, length)) { - outV.noNulls = false; - outV.isNull[batchIndex] = true; + outputColVector.noNulls = false; + outputColVector.isNull[batchIndex] = true; return; } - outV.vector[batchIndex] = LazyShort.parseShort(bytes, start, length, 10); + outputColVector.vector[batchIndex] = LazyShort.parseShort(bytes, start, length, 10); break; case INT: if (!LazyUtils.isNumberMaybe(bytes, start, length)) { - outV.noNulls = false; - outV.isNull[batchIndex] = true; + outputColVector.noNulls = false; + outputColVector.isNull[batchIndex] = true; return; } - outV.vector[batchIndex] = LazyInteger.parseInt(bytes, start, length, 10); + outputColVector.vector[batchIndex] = LazyInteger.parseInt(bytes, start, length, 10); break; case LONG: if (!LazyUtils.isNumberMaybe(bytes, start, length)) { - outV.noNulls = false; - outV.isNull[batchIndex] = true; + outputColVector.noNulls = false; + outputColVector.isNull[batchIndex] = true; return; } - outV.vector[batchIndex] = LazyLong.parseLong(bytes, start, length, 10); + outputColVector.vector[batchIndex] = LazyLong.parseLong(bytes, start, length, 10); break; default: throw new Error("Unexpected primitive category " + integerPrimitiveCategory); @@ -166,8 +168,8 @@ public class CastStringToLong extends VectorExpression { } catch (Exception e) { // for any exception in conversion to integer, produce NULL - outV.noNulls = false; - outV.isNull[batchIndex] = true; + outputColVector.noNulls = false; + outputColVector.isNull[batchIndex] = true; } } @@ -178,10 +180,13 @@ public class CastStringToLong extends VectorExpression { super.evaluateChildren(batch); } - BytesColumnVector inV = (BytesColumnVector) batch.cols[inputColumn]; + BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[inputColumn]; int[] sel = batch.selected; int n = batch.size; - LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; + + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; if (n == 0) { @@ -189,51 +194,81 @@ public class CastStringToLong extends VectorExpression { return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; - func(outV, inV, 0); - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - func(outV, inV, i); + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + func(outputColVector, inputColVector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + func(outputColVector, inputColVector, i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + func(outputColVector, inputColVector, i); + } } - outV.isRepeating = false; } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { - func(outV, inV, i); + func(outputColVector, inputColVector, i); } - outV.isRepeating = false; } - } else { + } else /* there are NULLs in the inputColVector */ { - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. - outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - func(outV, inV, 0); - } - } else if (batch.selectedInUse) { + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outV.isNull[i] = inV.isNull[i]; - if (!inV.isNull[i]) { - func(outV, inV, i); + if (!inputColVector.isNull[i]) { + // Set isNull before call in case it changes it mind. + outputColVector.isNull[i] = false; + func(outputColVector, inputColVector, i); + } else { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; } } - outV.isRepeating = false; } else { - System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { - if (!inV.isNull[i]) { - func(outV, inV, i); + if (!inputColVector.isNull[i]) { + // Set isNull before call in case it changes it mind. + outputColVector.isNull[i] = false; + func(outputColVector, inputColVector, i); + } else { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; } } - outV.isRepeating = false; } } } http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java index 1231cda..1836131 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.MathExpr; import org.apache.hadoop.hive.ql.exec.vector.*; @@ -56,7 +58,6 @@ public class CastTimestampToBoolean extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; int n = batch.size; long[] outputVector = outputColVector.vector; @@ -65,39 +66,51 @@ public class CastTimestampToBoolean extends VectorExpression { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = toBool(inputColVector, 0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = toBool(inputColVector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { + return; + } + + if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = toBool(inputColVector, i); + outputIsNull[i] = false; + outputVector[i] = toBool(inputColVector, i); } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outputVector[i] = toBool(inputColVector, i); } } - outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are NULLs in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = toBool(inputColVector, i); outputIsNull[i] = inputIsNull[i]; + outputVector[i] = toBool(inputColVector, i); } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = toBool(inputColVector, i); } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } } http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java index e696455..c11797b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.serde2.io.TimestampWritable; @@ -51,7 +53,6 @@ public class CastTimestampToDouble extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; int n = batch.size; double[] outputVector = outputColVector.vector; @@ -60,39 +61,79 @@ public class CastTimestampToDouble extends VectorExpression { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = inputColVector.getDouble(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = inputColVector.getDouble(0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { + return; + } + + if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = inputColVector.getDouble(i); + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + outputVector[i] = inputColVector.getDouble(i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputVector[i] = inputColVector.getDouble(i); + } } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { outputVector[i] = inputColVector.getDouble(i); } } - outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are NULLs in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = inputColVector.getDouble(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + outputVector[i] = inputColVector.getDouble(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - outputVector[i] = inputColVector.getDouble(i); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + outputVector[i] = inputColVector.getDouble(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } } http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java index 36b9f13..a0f0927 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.MathExpr; import org.apache.hadoop.hive.ql.exec.vector.*; @@ -48,7 +50,6 @@ public class CastTimestampToLong extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; int n = batch.size; long[] outputVector = outputColVector.vector; @@ -57,39 +58,79 @@ public class CastTimestampToLong extends VectorExpression { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = inputColVector.getTimestampAsLong(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = inputColVector.getTimestampAsLong(0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { + return; + } + + if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = inputColVector.getTimestampAsLong(i); + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + outputVector[i] = inputColVector.getTimestampAsLong(i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputVector[i] = inputColVector.getTimestampAsLong(i); + } } } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } for(int i = 0; i != n; i++) { outputVector[i] = inputColVector.getTimestampAsLong(i); } } - outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are NULLs in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = inputColVector.getTimestampAsLong(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + inputIsNull[i] = false; + outputVector[i] = inputColVector.getTimestampAsLong(i); + } else { + inputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - outputVector[i] = inputColVector.getTimestampAsLong(i); + if (!inputIsNull[i]) { + inputIsNull[i] = false; + outputVector[i] = inputColVector.getTimestampAsLong(i); + } else { + inputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } } http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java index 127e431..6fb29a8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -66,104 +68,121 @@ public class ColAndCol extends VectorExpression { return; } + boolean[] outputIsNull = outV.isNull; + + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + long vector1Value = vector1[0]; long vector2Value = vector2[0]; if (inputColVector1.noNulls && inputColVector2.noNulls) { + if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { // All must be selected otherwise size would be zero // Repeating property will not change. outV.isRepeating = true; + outputIsNull[0] = false; outputVector[0] = vector1[0] & vector2[0]; } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = vector1Value & vector2[i]; } } else { + Arrays.fill(outputIsNull, 0, n, false); for (int i = 0; i != n; i++) { outputVector[i] = vector1Value & vector2[i]; } } - outV.isRepeating = false; } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = vector1[i] & vector2Value; } } else { + Arrays.fill(outputIsNull, 0, n, false); for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] & vector2Value; } } - outV.isRepeating = false; } else /* neither side is repeating */{ if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = vector1[i] & vector2[i]; } } else { + Arrays.fill(outputIsNull, 0, n, false); for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] & vector2[i]; } } - outV.isRepeating = false; } - outV.noNulls = true; - } else if (inputColVector1.noNulls && !inputColVector2.noNulls) { + return; + } + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outV.noNulls = false; + + if (inputColVector1.noNulls && !inputColVector2.noNulls) { // only input 2 side has nulls if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { // All must be selected otherwise size would be zero // Repeating property will not change. outV.isRepeating = true; outputVector[0] = vector1[0] & vector2[0]; - outV.isNull[0] = (vector1[0] == 1) && inputColVector2.isNull[0]; + outputIsNull[0] = (vector1[0] == 1) && inputColVector2.isNull[0]; } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1Value & vector2[i]; - outV.isNull[i] = (vector1[0] == 1) && inputColVector2.isNull[i]; + outputIsNull[i] = (vector1[0] == 1) && inputColVector2.isNull[i]; } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1Value & vector2[i]; - outV.isNull[i] = (vector1[0] == 1) && inputColVector2.isNull[i]; + outputIsNull[i] = (vector1[0] == 1) && inputColVector2.isNull[i]; } } - outV.isRepeating = false; } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] & vector2Value; - outV.isNull[i] = (vector1[i] == 1) && inputColVector2.isNull[0]; + outputIsNull[i] = (vector1[i] == 1) && inputColVector2.isNull[0]; } } else { + for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] & vector2Value; - outV.isNull[i] = (vector1[i] == 1) && inputColVector2.isNull[0]; + outputIsNull[i] = (vector1[i] == 1) && inputColVector2.isNull[0]; } } - outV.isRepeating = false; } else /* neither side is repeating */{ if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] & vector2[i]; - outV.isNull[i] = (vector1[i] == 1) && inputColVector2.isNull[i]; + outputIsNull[i] = (vector1[i] == 1) && inputColVector2.isNull[i]; } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] & vector2[i]; - outV.isNull[i] = (vector1[i] == 1) && inputColVector2.isNull[i]; + outputIsNull[i] = (vector1[i] == 1) && inputColVector2.isNull[i]; } } - outV.isRepeating = false; } - outV.noNulls = false; } else if (!inputColVector1.noNulls && inputColVector2.noNulls) { // only input 1 side has nulls if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { @@ -171,49 +190,46 @@ public class ColAndCol extends VectorExpression { // Repeating property will not change. outV.isRepeating = true; outputVector[0] = vector1[0] & vector2[0]; - outV.isNull[0] = inputColVector1.isNull[0] && (vector2[0] == 1); + outputIsNull[0] = inputColVector1.isNull[0] && (vector2[0] == 1); } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1Value & vector2[i]; - outV.isNull[i] = inputColVector1.isNull[0] && (vector2[i] == 1); + outputIsNull[i] = inputColVector1.isNull[0] && (vector2[i] == 1); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1Value & vector2[i]; - outV.isNull[i] = inputColVector1.isNull[0] && (vector2[i] == 1); + outputIsNull[i] = inputColVector1.isNull[0] && (vector2[i] == 1); } } - outV.isRepeating = false; } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] & vector2Value; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2[0] == 1); + outputIsNull[i] = inputColVector1.isNull[i] && (vector2[0] == 1); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] & vector2Value; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2[0] == 1); + outputIsNull[i] = inputColVector1.isNull[i] && (vector2[0] == 1); } } - outV.isRepeating = false; } else /* neither side is repeating */{ if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] & vector2[i]; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2[i] == 1); + outputIsNull[i] = inputColVector1.isNull[i] && (vector2[i] == 1); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] & vector2[i]; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2[i] == 1); + outputIsNull[i] = inputColVector1.isNull[i] && (vector2[i] == 1); } } - outV.isRepeating = false; } outV.noNulls = false; } else /* !inputColVector1.noNulls && !inputColVector2.noNulls */{ @@ -223,7 +239,7 @@ public class ColAndCol extends VectorExpression { // Repeating property will not change. outV.isRepeating = true; outputVector[0] = vector1[0] & vector2[0]; - outV.isNull[0] = ((vector1[0] == 1) && inputColVector2.isNull[0]) + outputIsNull[0] = ((vector1[0] == 1) && inputColVector2.isNull[0]) || (inputColVector1.isNull[0] && (vector2[0] == 1)) || (inputColVector1.isNull[0] && inputColVector2.isNull[0]); } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { @@ -231,32 +247,31 @@ public class ColAndCol extends VectorExpression { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1Value & vector2[i]; - outV.isNull[i] = ((vector1[0] == 1) && inputColVector2.isNull[i]) + outputIsNull[i] = ((vector1[0] == 1) && inputColVector2.isNull[i]) || (inputColVector1.isNull[0] && (vector2[i] == 1)) || (inputColVector1.isNull[0] && inputColVector2.isNull[i]); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1Value & vector2[i]; - outV.isNull[i] = ((vector1[0] == 1) && inputColVector2.isNull[i]) + outputIsNull[i] = ((vector1[0] == 1) && inputColVector2.isNull[i]) || (inputColVector1.isNull[0] && (vector2[i] == 1)) || (inputColVector1.isNull[0] && inputColVector2.isNull[i]); } } - outV.isRepeating = false; } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] & vector2Value; - outV.isNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[0]) + outputIsNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[0]) || (inputColVector1.isNull[i] && (vector2[0] == 1)) || (inputColVector1.isNull[i] && inputColVector2.isNull[0]); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] & vector2Value; - outV.isNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[0]) + outputIsNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[0]) || (inputColVector1.isNull[i] && (vector2[0] == 1)) || (inputColVector1.isNull[i] && inputColVector2.isNull[0]); } @@ -267,21 +282,19 @@ public class ColAndCol extends VectorExpression { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] & vector2[i]; - outV.isNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[i]) + outputIsNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[i]) || (inputColVector1.isNull[i] && (vector2[i] == 1)) || (inputColVector1.isNull[i] && inputColVector2.isNull[i]); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] & vector2[i]; - outV.isNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[i]) + outputIsNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[i]) || (inputColVector1.isNull[i] && (vector2[i] == 1)) || (inputColVector1.isNull[i] && inputColVector2.isNull[i]); } } - outV.isRepeating = false; } - outV.noNulls = false; } }