hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mmccl...@apache.org
Subject [27/32] hive git commit: HIVE-18622: Vectorization: IF Statements, Comparisons, and more do not handle NULLs correctly (Matt McCline, reviewed by Sergey Shelukhin, Deepak Jaiswal, Vihang Karajgaonkar)
Date Fri, 16 Feb 2018 15:52:41 GMT
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java
index a2e4a52..8326002 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.hive.ql.exec.vector.expressions;
 
+import java.util.Arrays;
+
 import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
@@ -52,9 +54,9 @@ public class CastDecimalToDecimal extends VectorExpression {
    * Convert input decimal value to a decimal with a possibly different precision and scale,
    * at position i in the respective vectors.
    */
-  protected void convert(DecimalColumnVector outV, DecimalColumnVector inV, int i) {
+  protected void convert(DecimalColumnVector outputColVector, DecimalColumnVector inputColVector, int i) {
     // The set routine enforces precision and scale.
-    outV.vector[i].set(inV.vector[i]);
+    outputColVector.vector[i].set(inputColVector.vector[i]);
   }
 
   /**
@@ -70,10 +72,12 @@ public class CastDecimalToDecimal extends VectorExpression {
       super.evaluateChildren(batch);
     }
 
-    DecimalColumnVector inV = (DecimalColumnVector) batch.cols[inputColumn];
+    DecimalColumnVector inputColVector = (DecimalColumnVector) batch.cols[inputColumn];
     int[] sel = batch.selected;
     int n = batch.size;
-    DecimalColumnVector outV = (DecimalColumnVector) batch.cols[outputColumnNum];
+    DecimalColumnVector outputColVector = (DecimalColumnVector) batch.cols[outputColumnNum];
+
+    boolean[] outputIsNull = outputColVector.isNull;
 
     if (n == 0) {
 
@@ -81,51 +85,82 @@ public class CastDecimalToDecimal extends VectorExpression {
       return;
     }
 
-    if (inV.noNulls) {
-      outV.noNulls = true;
-      if (inV.isRepeating) {
-        outV.isRepeating = true;
-        convert(outV, inV, 0);
-      } else if (batch.selectedInUse) {
-        for(int j = 0; j != n; j++) {
-          int i = sel[j];
-          convert(outV, inV, i);
+    // We do not need to do a column reset since we are carefully changing the output.
+    outputColVector.isRepeating = false;
+
+    if (inputColVector.isRepeating) {
+      outputColVector.isRepeating = true;
+      if (inputColVector.noNulls || !inputColVector.isNull[0]) {
+        // Set isNull before call in case it changes it mind.
+        outputColVector.isNull[0] = false;
+        convert(outputColVector, inputColVector, 0);
+      } else {
+        outputColVector.isNull[0] = true;
+        outputColVector.noNulls = false;
+      }
+      return;
+    }
+
+    if (inputColVector.noNulls) {
+
+      if (batch.selectedInUse) {
+
+        // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+        if (!outputColVector.noNulls) {
+          for(int j = 0; j != n; j++) {
+           final int i = sel[j];
+           // Set isNull before call in case it changes it mind.
+           outputIsNull[i] = false;
+           convert(outputColVector, inputColVector, i);
+         }
+        } else {
+          for(int j = 0; j != n; j++) {
+            final int i = sel[j];
+            convert(outputColVector, inputColVector, i);
+          }
         }
-        outV.isRepeating = false;
       } else {
+        if (!outputColVector.noNulls) {
+
+          // Assume it is almost always a performance win to fill all of isNull so we can
+          // safely reset noNulls.
+          Arrays.fill(outputIsNull, false);
+          outputColVector.noNulls = true;
+        }
         for(int i = 0; i != n; i++) {
-          convert(outV, inV, i);
+          convert(outputColVector, inputColVector, i);
         }
-        outV.isRepeating = false;
       }
-    } else {
-
-      // Handle case with nulls. Don't do function if the value is null,
-      // because the data may be undefined for a null value.
-      outV.noNulls = false;
-      if (inV.isRepeating) {
-        outV.isRepeating = true;
-        outV.isNull[0] = inV.isNull[0];
-        if (!inV.isNull[0]) {
-          convert(outV, inV, 0);
-        }
-      } else if (batch.selectedInUse) {
+    } else /* there are NULLs in the inputColVector */ {
+
+      /*
+       * Do careful maintenance of the outputColVector.noNulls flag.
+       */
+
+      if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
           int i = sel[j];
-          outV.isNull[i] = inV.isNull[i];
-          if (!inV.isNull[i]) {
-            convert(outV, inV, i);
+          if (!inputColVector.isNull[i]) {
+            // Set isNull before call in case it changes it mind.
+            outputColVector.isNull[i] = false;
+            convert(outputColVector, inputColVector, i);
+          } else {
+            outputColVector.isNull[i] = true;
+            outputColVector.noNulls = false;
           }
         }
-        outV.isRepeating = false;
       } else {
-        System.arraycopy(inV.isNull, 0, outV.isNull, 0, n);
         for(int i = 0; i != n; i++) {
-          if (!inV.isNull[i]) {
-            convert(outV, inV, i);
+          if (!inputColVector.isNull[i]) {
+            // Set isNull before call in case it changes it mind.
+            outputColVector.isNull[i] = false;
+            convert(outputColVector, inputColVector, i);
+          } else {
+            outputColVector.isNull[i] = true;
+            outputColVector.noNulls = false;
           }
         }
-        outV.isRepeating = false;
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToLong.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToLong.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToLong.java
index aa529ed..7ad0493 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToLong.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToLong.java
@@ -64,6 +64,7 @@ public class CastDecimalToLong extends FuncDecimalToLong {
       outV.noNulls = false;
       return;
     }
+    outV.isNull[i] = false;
     switch (integerPrimitiveCategory) {
     case BYTE:
       outV.vector[i] = decWritable.byteValue();

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java
index 08abf27..5494579 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.hive.ql.exec.vector.expressions;
 
+import java.util.Arrays;
+
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -56,7 +58,6 @@ public class CastDoubleToTimestamp extends VectorExpression {
     int[] sel = batch.selected;
     boolean[] inputIsNull = inputColVector.isNull;
     boolean[] outputIsNull = outputColVector.isNull;
-    outputColVector.noNulls = inputColVector.noNulls;
     int n = batch.size;
     double[] vector = inputColVector.vector;
 
@@ -65,39 +66,82 @@ public class CastDoubleToTimestamp extends VectorExpression {
       return;
     }
 
+    // We do not need to do a column reset since we are carefully changing the output.
+    outputColVector.isRepeating = false;
+
     if (inputColVector.isRepeating) {
-      //All must be selected otherwise size would be zero
-      //Repeating property will not change.
-      setDouble(outputColVector, vector, 0);
-      // Even if there are no nulls, we always copy over entry 0. Simplifies code.
-      outputIsNull[0] = inputIsNull[0];
+      if (inputColVector.noNulls || !inputIsNull[0]) {
+        outputIsNull[0] = false;
+        setDouble(outputColVector, vector, 0);
+      } else {
+        outputIsNull[0] = true;
+        outputColVector.noNulls = false;
+      }
       outputColVector.isRepeating = true;
-    } else if (inputColVector.noNulls) {
+      return;
+    }
+
+    if (inputColVector.noNulls) {
+
       if (batch.selectedInUse) {
-        for(int j = 0; j != n; j++) {
-          int i = sel[j];
-          setDouble(outputColVector, vector, i);
+
+        // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+        if (!outputColVector.noNulls) {
+          for(int j = 0; j != n; j++) {
+           final int i = sel[j];
+           // Set isNull before call in case it changes it mind.
+           outputIsNull[i] = false;
+           setDouble(outputColVector, vector, i);
+         }
+        } else {
+          for(int j = 0; j != n; j++) {
+            final int i = sel[j];
+            setDouble(outputColVector, vector, i);
+          }
         }
       } else {
+        if (!outputColVector.noNulls) {
+
+          // Assume it is almost always a performance win to fill all of isNull so we can
+          // safely reset noNulls.
+          Arrays.fill(outputIsNull, false);
+          outputColVector.noNulls = true;
+        }
         for(int i = 0; i != n; i++) {
           setDouble(outputColVector, vector, i);
         }
       }
-      outputColVector.isRepeating = false;
-    } else /* there are nulls */ {
+    } else /* there are NULLs in the inputColVector */ {
+
+      /*
+       * Do careful maintenance of the outputColVector.noNulls flag.
+       */
+
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
           int i = sel[j];
-          setDouble(outputColVector, vector, i);
-          outputIsNull[i] = inputIsNull[i];
+          if (!inputIsNull[i]) {
+            // Set isNull before call in case it changes it mind.
+            outputIsNull[i] = false;
+            setDouble(outputColVector, vector, i);
+          } else {
+            outputIsNull[i] = true;
+            outputColVector.noNulls = false;
+          }
         }
       } else {
         for(int i = 0; i != n; i++) {
-          setDouble(outputColVector, vector, i);
+          if (!inputIsNull[i]) {
+            // Set isNull before call in case it changes it mind.
+            outputIsNull[i] = false;
+            setDouble(outputColVector, vector, i);
+          } else {
+            outputIsNull[i] = true;
+            outputColVector.noNulls = false;
+          }
         }
-        System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
       }
-      outputColVector.isRepeating = false;
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java
index df25eac..a3c4212 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.hive.ql.exec.vector.expressions;
 
+import java.util.Arrays;
+
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
 import org.apache.hadoop.hive.ql.exec.vector.*;
 import org.apache.hadoop.hive.serde2.io.TimestampWritable;
@@ -56,7 +58,6 @@ public class CastLongToTimestamp extends VectorExpression {
     int[] sel = batch.selected;
     boolean[] inputIsNull = inputColVector.isNull;
     boolean[] outputIsNull = outputColVector.isNull;
-    outputColVector.noNulls = inputColVector.noNulls;
     int n = batch.size;
     long[] vector = inputColVector.vector;
 
@@ -65,39 +66,79 @@ public class CastLongToTimestamp extends VectorExpression {
       return;
     }
 
+    // We do not need to do a column reset since we are carefully changing the output.
+    outputColVector.isRepeating = false;
+
     if (inputColVector.isRepeating) {
-      //All must be selected otherwise size would be zero
-      //Repeating property will not change.
-      setSeconds(outputColVector, vector, 0);
-      // Even if there are no nulls, we always copy over entry 0. Simplifies code.
-      outputIsNull[0] = inputIsNull[0];
+      if (inputColVector.noNulls || !inputIsNull[0]) {
+        // Set isNull before call in case it changes it mind.
+        outputIsNull[0] = false;
+        setSeconds(outputColVector, vector, 0);
+      } else {
+        outputIsNull[0] = true;
+        outputColVector.noNulls = false;
+      }
       outputColVector.isRepeating = true;
-    } else if (inputColVector.noNulls) {
+      return;
+    }
+
+    if (inputColVector.noNulls) {
       if (batch.selectedInUse) {
-        for(int j = 0; j != n; j++) {
-          int i = sel[j];
-          setSeconds(outputColVector, vector, i);
+
+        // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+        if (!outputColVector.noNulls) {
+          for(int j = 0; j != n; j++) {
+           final int i = sel[j];
+           // Set isNull before call in case it changes it mind.
+           outputIsNull[i] = false;
+           setSeconds(outputColVector, vector, i);
+         }
+        } else {
+          for(int j = 0; j != n; j++) {
+            final int i = sel[j];
+            setSeconds(outputColVector, vector, i);
+          }
         }
       } else {
+        if (!outputColVector.noNulls) {
+
+          // Assume it is almost always a performance win to fill all of isNull so we can
+          // safely reset noNulls.
+          Arrays.fill(outputIsNull, false);
+          outputColVector.noNulls = true;
+        }
         for(int i = 0; i != n; i++) {
           setSeconds(outputColVector, vector, i);
         }
       }
-      outputColVector.isRepeating = false;
-    } else /* there are nulls */ {
+    } else /* there are NULLs in the inputColVector */ {
+
+      /*
+       * Do careful maintenance of the outputColVector.noNulls flag.
+       */
+
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
           int i = sel[j];
-          setSeconds(outputColVector, vector, i);
-          outputIsNull[i] = inputIsNull[i];
+          if (!inputIsNull[i]) {
+            // Set isNull before call in case it changes it mind.
+            outputIsNull[i] = false;
+            setSeconds(outputColVector, vector, i);
+          } else {
+            outputIsNull[i] = true;
+            outputColVector.noNulls = false;
+          }
         }
       } else {
+        // Set isNull before call in case it changes it mind.
+        System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
         for(int i = 0; i != n; i++) {
-          setSeconds(outputColVector, vector, i);
+          if (!inputIsNull[i]) {
+            setSeconds(outputColVector, vector, i);
+          }
         }
-        System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
       }
-      outputColVector.isRepeating = false;
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java
index 42c34c8..6a29c62 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.hive.ql.exec.vector.expressions;
 
+import java.util.Arrays;
+
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
 import org.apache.hadoop.hive.ql.exec.vector.*;
 import org.apache.hadoop.hive.serde2.io.TimestampWritable;
@@ -54,7 +56,6 @@ public class CastMillisecondsLongToTimestamp extends VectorExpression {
     int[] sel = batch.selected;
     boolean[] inputIsNull = inputColVector.isNull;
     boolean[] outputIsNull = outputColVector.isNull;
-    outputColVector.noNulls = inputColVector.noNulls;
     int n = batch.size;
     long[] vector = inputColVector.vector;
 
@@ -63,39 +64,84 @@ public class CastMillisecondsLongToTimestamp extends VectorExpression {
       return;
     }
 
+    // We do not need to do a column reset since we are carefully changing the output.
+    outputColVector.isRepeating = false;
+
     if (inputColVector.isRepeating) {
-      //All must be selected otherwise size would be zero
-      //Repeating property will not change.
-      setMilliseconds(outputColVector, vector, 0);
-      // Even if there are no nulls, we always copy over entry 0. Simplifies code.
-      outputIsNull[0] = inputIsNull[0];
+      if (inputColVector.noNulls || !inputIsNull[0]) {
+        // Set isNull before call in case it changes it mind.
+        outputIsNull[0] = false;
+        setMilliseconds(outputColVector, vector, 0);
+      } else {
+        outputIsNull[0] = true;
+        outputColVector.noNulls = false;
+      }
       outputColVector.isRepeating = true;
-    } else if (inputColVector.noNulls) {
+      return;
+    }
+
+    if (inputColVector.noNulls) {
       if (batch.selectedInUse) {
-        for(int j = 0; j != n; j++) {
-          int i = sel[j];
-          setMilliseconds(outputColVector, vector, i);
+
+        // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+        if (!outputColVector.noNulls) {
+          for(int j = 0; j != n; j++) {
+           final int i = sel[j];
+           // Set isNull before call in case it changes it mind.
+           outputIsNull[i] = false;
+           setMilliseconds(outputColVector, vector, i);
+         }
+        } else {
+          for(int j = 0; j != n; j++) {
+            final int i = sel[j];
+            setMilliseconds(outputColVector, vector, i);
+          }
         }
       } else {
+        if (!outputColVector.noNulls) {
+
+          // Assume it is almost always a performance win to fill all of isNull so we can
+          // safely reset noNulls.
+          Arrays.fill(outputIsNull, false);
+          outputColVector.noNulls = true;
+        }
         for(int i = 0; i != n; i++) {
           setMilliseconds(outputColVector, vector, i);
         }
       }
-      outputColVector.isRepeating = false;
-    } else /* there are nulls */ {
+    } else /* there are NULLs in the inputColVector */ {
+
+      /*
+       * Do careful maintenance of the outputColVector.noNulls flag.
+       */
+
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
           int i = sel[j];
-          setMilliseconds(outputColVector, vector, i);
-          outputIsNull[i] = inputIsNull[i];
+          if (!inputIsNull[i]) {
+            // Set isNull before call in case it changes it mind.
+            outputIsNull[i] = false;
+            setMilliseconds(outputColVector, vector, i);
+          } else {
+            outputIsNull[i] = true;
+            outputColVector.noNulls = false;
+          }
         }
       } else {
+         // Set isNull before calls in case they change their mind.
+        System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
         for(int i = 0; i != n; i++) {
-          setMilliseconds(outputColVector, vector, i);
+          if (!inputIsNull[i]) {
+            // Set isNull before call in case it changes it mind.
+            outputIsNull[i] = false;
+            setMilliseconds(outputColVector, vector, i);
+          } else {
+            outputIsNull[i] = true;
+            outputColVector.noNulls = false;
+          }
         }
-        System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
       }
-      outputColVector.isRepeating = false;
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java
index 34269da..b55712a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java
@@ -28,6 +28,7 @@ import org.apache.hadoop.hive.serde2.io.DateWritable;
 import org.apache.hive.common.util.DateParser;
 
 import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
 
 /**
  * Casts a string vector to a date vector.
@@ -62,7 +63,10 @@ public class CastStringToDate extends VectorExpression {
     BytesColumnVector inV = (BytesColumnVector) batch.cols[inputColumn];
     int[] sel = batch.selected;
     int n = batch.size;
-    LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum];
+    LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
+
+    boolean[] inputIsNull = inV.isNull;
+    boolean[] outputIsNull = outputColVector.isNull;
 
     if (n == 0) {
 
@@ -70,65 +74,94 @@ public class CastStringToDate extends VectorExpression {
       return;
     }
 
+    // We do not need to do a column reset since we are carefully changing the output.
+    outputColVector.isRepeating = false;
+
+    if (inV.isRepeating) {
+      if (inV.noNulls || !inputIsNull[0]) {
+        // Set isNull before call in case it changes it mind.
+        outputIsNull[0] = false;
+        evaluate(outputColVector, inV, 0);
+      } else {
+        outputIsNull[0] = true;
+        outputColVector.noNulls = false;
+      }
+      outputColVector.isRepeating = true;
+      return;
+    }
+
     if (inV.noNulls) {
-      outV.noNulls = true;
-      if (inV.isRepeating) {
-        outV.isRepeating = true;
-        evaluate(outV, inV, 0);
-      } else if (batch.selectedInUse) {
-        for(int j = 0; j != n; j++) {
-          int i = sel[j];
-          evaluate(outV, inV, i);
+      if (batch.selectedInUse) {
+
+        // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+        if (!outputColVector.noNulls) {
+          for(int j = 0; j != n; j++) {
+           final int i = sel[j];
+           // Set isNull before call in case it changes it mind.
+           outputIsNull[i] = false;
+           evaluate(outputColVector, inV, i);
+         }
+        } else {
+          for(int j = 0; j != n; j++) {
+            final int i = sel[j];
+            evaluate(outputColVector, inV, i);
+          }
         }
-        outV.isRepeating = false;
       } else {
+        if (!outputColVector.noNulls) {
+
+          // Assume it is almost always a performance win to fill all of isNull so we can
+          // safely reset noNulls.
+          Arrays.fill(outputIsNull, false);
+          outputColVector.noNulls = true;
+        }
         for(int i = 0; i != n; i++) {
-          evaluate(outV, inV, i);
+          evaluate(outputColVector, inV, i);
         }
-        outV.isRepeating = false;
       }
-    } else {
-
-      // Handle case with nulls. Don't do function if the value is null,
-      // because the data may be undefined for a null value.
-      outV.noNulls = false;
-      if (inV.isRepeating) {
-        outV.isRepeating = true;
-        outV.isNull[0] = inV.isNull[0];
-        if (!inV.isNull[0]) {
-          evaluate(outV, inV, 0);
-        }
-      } else if (batch.selectedInUse) {
+    } else /* there are NULLs in the inputColVector */ {
+
+      // Carefully handle NULLs...
+      outputColVector.noNulls = false;
+
+      if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
           int i = sel[j];
-          outV.isNull[i] = inV.isNull[i];
-          if (!inV.isNull[i]) {
-            evaluate(outV, inV, i);
+          if (!inputIsNull[i]) {
+            // Set isNull before call in case it changes it mind.
+            outputColVector.isNull[i] = false;
+            evaluate(outputColVector, inV, i);
+          } else {
+            outputColVector.isNull[i] = true;
+            outputColVector.noNulls = false;
           }
         }
-        outV.isRepeating = false;
       } else {
-        System.arraycopy(inV.isNull, 0, outV.isNull, 0, n);
         for(int i = 0; i != n; i++) {
-          if (!inV.isNull[i]) {
-            evaluate(outV, inV, i);
+          if (!inputIsNull[i]) {
+            // Set isNull before call in case it changes it mind.
+            outputColVector.isNull[i] = false;
+            evaluate(outputColVector, inV, i);
+          } else {
+            outputColVector.isNull[i] = true;
+            outputColVector.noNulls = false;
           }
         }
-        outV.isRepeating = false;
       }
     }
   }
 
-  private void evaluate(LongColumnVector outV, BytesColumnVector inV, int i) {
+  private void evaluate(LongColumnVector outputColVector, BytesColumnVector inV, int i) {
     String dateString = new String(inV.vector[i], inV.start[i], inV.length[i], StandardCharsets.UTF_8);
     if (dateParser.parseDate(dateString, sqlDate)) {
-      outV.vector[i] = DateWritable.dateToDays(sqlDate);
+      outputColVector.vector[i] = DateWritable.dateToDays(sqlDate);
       return;
     }
 
-    outV.vector[i] = 1;
-    outV.isNull[i] = true;
-    outV.noNulls = false;
+    outputColVector.vector[i] = 1;
+    outputColVector.isNull[i] = true;
+    outputColVector.noNulls = false;
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDecimal.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDecimal.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDecimal.java
index 41443c5..cbefa80 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDecimal.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDecimal.java
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.hive.ql.exec.vector.expressions;
 
+import java.util.Arrays;
+
 import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
@@ -51,7 +53,7 @@ public class CastStringToDecimal extends VectorExpression {
   /**
    * Convert input string to a decimal, at position i in the respective vectors.
    */
-  protected void func(DecimalColumnVector outV, BytesColumnVector inV, int i) {
+  protected void func(DecimalColumnVector outputColVector, BytesColumnVector inputColVector, int i) {
     String s;
     try {
 
@@ -59,13 +61,13 @@ public class CastStringToDecimal extends VectorExpression {
        * e.g. by converting to decimal from the input bytes directly without
        * making a new string.
        */
-      s = new String(inV.vector[i], inV.start[i], inV.length[i], "UTF-8");
-      outV.vector[i].set(HiveDecimal.create(s));
+      s = new String(inputColVector.vector[i], inputColVector.start[i], inputColVector.length[i], "UTF-8");
+      outputColVector.vector[i].set(HiveDecimal.create(s));
     } catch (Exception e) {
 
       // for any exception in conversion to decimal, produce NULL
-      outV.noNulls = false;
-      outV.isNull[i] = true;
+      outputColVector.noNulls = false;
+      outputColVector.isNull[i] = true;
     }
   }
 
@@ -76,10 +78,13 @@ public class CastStringToDecimal extends VectorExpression {
       super.evaluateChildren(batch);
     }
 
-    BytesColumnVector inV = (BytesColumnVector) batch.cols[inputColumn];
+    BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[inputColumn];
     int[] sel = batch.selected;
     int n = batch.size;
-    DecimalColumnVector outV = (DecimalColumnVector) batch.cols[outputColumnNum];
+    DecimalColumnVector outputColVector = (DecimalColumnVector) batch.cols[outputColumnNum];
+
+    boolean[] inputIsNull = inputColVector.isNull;
+    boolean[] outputIsNull = outputColVector.isNull;
 
     if (n == 0) {
 
@@ -87,51 +92,82 @@ public class CastStringToDecimal extends VectorExpression {
       return;
     }
 
-    if (inV.noNulls) {
-      outV.noNulls = true;
-      if (inV.isRepeating) {
-        outV.isRepeating = true;
-        func(outV, inV, 0);
-      } else if (batch.selectedInUse) {
-        for(int j = 0; j != n; j++) {
-          int i = sel[j];
-          func(outV, inV, i);
+    // We do not need to do a column reset since we are carefully changing the output.
+    outputColVector.isRepeating = false;
+
+    if (inputColVector.isRepeating) {
+      if (inputColVector.noNulls || !inputIsNull[0]) {
+        // Set isNull before call in case it changes it mind.
+        outputIsNull[0] = false;
+        func(outputColVector, inputColVector, 0);
+      } else {
+        outputIsNull[0] = true;
+        outputColVector.noNulls = false;
+      }
+      outputColVector.isRepeating = true;
+      return;
+    }
+
+    if (inputColVector.noNulls) {
+      if (batch.selectedInUse) {
+
+        // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+        if (!outputColVector.noNulls) {
+          for(int j = 0; j != n; j++) {
+           final int i = sel[j];
+           // Set isNull before call in case it changes it mind.
+           outputIsNull[i] = false;
+           func(outputColVector, inputColVector, i);
+         }
+        } else {
+          for(int j = 0; j != n; j++) {
+            final int i = sel[j];
+            func(outputColVector, inputColVector, i);
+          }
         }
-        outV.isRepeating = false;
       } else {
+        if (!outputColVector.noNulls) {
+
+          // Assume it is almost always a performance win to fill all of isNull so we can
+          // safely reset noNulls.
+          Arrays.fill(outputIsNull, false);
+          outputColVector.noNulls = true;
+        }
         for(int i = 0; i != n; i++) {
-          func(outV, inV, i);
+          func(outputColVector, inputColVector, i);
         }
-        outV.isRepeating = false;
       }
-    } else {
-
-      // Handle case with nulls. Don't do function if the value is null,
-      // because the data may be undefined for a null value.
-      outV.noNulls = false;
-      if (inV.isRepeating) {
-        outV.isRepeating = true;
-        outV.isNull[0] = inV.isNull[0];
-        if (!inV.isNull[0]) {
-          func(outV, inV, 0);
-        }
-      } else if (batch.selectedInUse) {
+    } else /* there are NULLs in the inputColVector */ {
+
+      /*
+       * Do careful maintenance of the outputColVector.noNulls flag.
+       */
+
+      if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
           int i = sel[j];
-          outV.isNull[i] = inV.isNull[i];
-          if (!inV.isNull[i]) {
-            func(outV, inV, i);
+          if (!inputColVector.isNull[i]) {
+            // Set isNull before call in case it changes it mind.
+            outputColVector.isNull[i] = false;
+            func(outputColVector, inputColVector, i);
+          } else {
+            outputColVector.isNull[i] = true;
+            outputColVector.noNulls = false;
           }
         }
-        outV.isRepeating = false;
       } else {
-        System.arraycopy(inV.isNull, 0, outV.isNull, 0, n);
+        System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n);
         for(int i = 0; i != n; i++) {
-          if (!inV.isNull[i]) {
-            func(outV, inV, i);
+          if (!inputColVector.isNull[i]) {
+            // Set isNull before call in case it changes it mind.
+            outputColVector.isNull[i] = false;
+            func(outputColVector, inputColVector, i);
+          } else {
+            outputColVector.isNull[i] = true;
+            outputColVector.noNulls = false;
           }
         }
-        outV.isRepeating = false;
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDouble.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDouble.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDouble.java
index 3ea1e8c..9ad442a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDouble.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDouble.java
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.hive.ql.exec.vector.expressions;
 
+import java.util.Arrays;
+
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -51,25 +53,25 @@ public class CastStringToDouble extends VectorExpression {
   /**
    * Convert input string to a double, at position i in the respective vectors.
    */
-  protected void func(DoubleColumnVector outV, BytesColumnVector inV, int batchIndex) {
+  protected void func(DoubleColumnVector outputColVector, BytesColumnVector inputColVector, int batchIndex) {
 
-    byte[] bytes = inV.vector[batchIndex];
-    final int start = inV.start[batchIndex];
-    final int length = inV.length[batchIndex];
+    byte[] bytes = inputColVector.vector[batchIndex];
+    final int start = inputColVector.start[batchIndex];
+    final int length = inputColVector.length[batchIndex];
     try {
       if (!LazyUtils.isNumberMaybe(bytes, start, length)) {
-        outV.noNulls = false;
-        outV.isNull[batchIndex] = true;
-        outV.vector[batchIndex] = DoubleColumnVector.NULL_VALUE;
+        outputColVector.noNulls = false;
+        outputColVector.isNull[batchIndex] = true;
+        outputColVector.vector[batchIndex] = DoubleColumnVector.NULL_VALUE;
         return;
       }
-      outV.vector[batchIndex] = StringToDouble.strtod(bytes, start, length);
+      outputColVector.vector[batchIndex] = StringToDouble.strtod(bytes, start, length);
     } catch (Exception e) {
 
       // for any exception in conversion to integer, produce NULL
-      outV.noNulls = false;
-      outV.isNull[batchIndex] = true;
-      outV.vector[batchIndex] = DoubleColumnVector.NULL_VALUE;
+      outputColVector.noNulls = false;
+      outputColVector.isNull[batchIndex] = true;
+      outputColVector.vector[batchIndex] = DoubleColumnVector.NULL_VALUE;
     }
   }
 
@@ -80,10 +82,13 @@ public class CastStringToDouble extends VectorExpression {
       super.evaluateChildren(batch);
     }
 
-    BytesColumnVector inV = (BytesColumnVector) batch.cols[inputColumn];
+    BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[inputColumn];
     int[] sel = batch.selected;
     int n = batch.size;
-    DoubleColumnVector outV = (DoubleColumnVector) batch.cols[outputColumnNum];
+    DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumnNum];
+
+    boolean[] inputIsNull = inputColVector.isNull;
+    boolean[] outputIsNull = outputColVector.isNull;
 
     if (n == 0) {
 
@@ -91,51 +96,82 @@ public class CastStringToDouble extends VectorExpression {
       return;
     }
 
-    if (inV.noNulls) {
-      outV.noNulls = true;
-      if (inV.isRepeating) {
-        outV.isRepeating = true;
-        func(outV, inV, 0);
-      } else if (batch.selectedInUse) {
-        for(int j = 0; j != n; j++) {
-          int i = sel[j];
-          func(outV, inV, i);
+    // We do not need to do a column reset since we are carefully changing the output.
+    outputColVector.isRepeating = false;
+
+    if (inputColVector.isRepeating) {
+      if (inputColVector.noNulls || !inputIsNull[0]) {
+        // Set isNull before call in case it changes it mind.
+        outputIsNull[0] = false;
+        func(outputColVector, inputColVector, 0);
+      } else {
+        outputIsNull[0] = true;
+        outputColVector.noNulls = false;
+      }
+      outputColVector.isRepeating = true;
+      return;
+    }
+
+    if (inputColVector.noNulls) {
+      if (batch.selectedInUse) {
+
+        // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+        if (!outputColVector.noNulls) {
+          for(int j = 0; j != n; j++) {
+           final int i = sel[j];
+           // Set isNull before call in case it changes it mind.
+           outputIsNull[i] = false;
+           func(outputColVector, inputColVector, i);
+         }
+        } else {
+          for(int j = 0; j != n; j++) {
+            final int i = sel[j];
+            func(outputColVector, inputColVector, i);
+          }
         }
-        outV.isRepeating = false;
       } else {
+        if (!outputColVector.noNulls) {
+
+          // Assume it is almost always a performance win to fill all of isNull so we can
+          // safely reset noNulls.
+          Arrays.fill(outputIsNull, false);
+          outputColVector.noNulls = true;
+        }
         for(int i = 0; i != n; i++) {
-          func(outV, inV, i);
+          func(outputColVector, inputColVector, i);
         }
-        outV.isRepeating = false;
       }
-    } else {
-
-      // Handle case with nulls. Don't do function if the value is null,
-      // because the data may be undefined for a null value.
-      outV.noNulls = false;
-      if (inV.isRepeating) {
-        outV.isRepeating = true;
-        outV.isNull[0] = inV.isNull[0];
-        if (!inV.isNull[0]) {
-          func(outV, inV, 0);
-        }
-      } else if (batch.selectedInUse) {
+    } else /* there are NULLs in the inputColVector */ {
+
+      /*
+       * Do careful maintenance of the outputColVector.noNulls flag.
+       */
+
+      if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
           int i = sel[j];
-          outV.isNull[i] = inV.isNull[i];
-          if (!inV.isNull[i]) {
-            func(outV, inV, i);
+          if (!inputColVector.isNull[i]) {
+             // Set isNull before call in case it changes it mind.
+            outputColVector.isNull[i] = false;
+            func(outputColVector, inputColVector, i);
+          } else {
+            outputColVector.isNull[i] = true;
+            outputColVector.noNulls = false;
           }
         }
-        outV.isRepeating = false;
       } else {
-        System.arraycopy(inV.isNull, 0, outV.isNull, 0, n);
+        System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n);
         for(int i = 0; i != n; i++) {
-          if (!inV.isNull[i]) {
-            func(outV, inV, i);
+          if (!inputColVector.isNull[i]) {
+            // Set isNull before call in case it changes it mind.
+            outputColVector.isNull[i] = false;
+            func(outputColVector, inputColVector, i);
+          } else {
+            outputColVector.isNull[i] = true;
+            outputColVector.noNulls = false;
           }
         }
-        outV.isRepeating = false;
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java
index feb0ab6..8a64dcf 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.hive.ql.exec.vector.expressions;
 
+import java.util.Arrays;
+
 import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
@@ -55,10 +57,13 @@ public class CastStringToIntervalDayTime extends VectorExpression {
       super.evaluateChildren(batch);
     }
 
-    BytesColumnVector inV = (BytesColumnVector) batch.cols[inputColumn];
+    BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[inputColumn];
     int[] sel = batch.selected;
     int n = batch.size;
-    IntervalDayTimeColumnVector outV = (IntervalDayTimeColumnVector) batch.cols[outputColumnNum];
+    IntervalDayTimeColumnVector outputColVector = (IntervalDayTimeColumnVector) batch.cols[outputColumnNum];
+
+    boolean[] inputIsNull = inputColVector.isNull;
+    boolean[] outputIsNull = outputColVector.isNull;
 
     if (n == 0) {
 
@@ -66,64 +71,88 @@ public class CastStringToIntervalDayTime extends VectorExpression {
       return;
     }
 
-    if (inV.noNulls) {
-      outV.noNulls = true;
-      if (inV.isRepeating) {
-        outV.isRepeating = true;
-        evaluate(outV, inV, 0);
-      } else if (batch.selectedInUse) {
-        for(int j = 0; j != n; j++) {
-          int i = sel[j];
-          evaluate(outV, inV, i);
+    // We do not need to do a column reset since we are carefully changing the output.
+    outputColVector.isRepeating = false;
+
+    if (inputColVector.isRepeating) {
+      if (inputColVector.noNulls || !inputIsNull[0]) {
+        // Set isNull before call in case it changes it mind.
+        outputIsNull[0] = false;
+        evaluate(outputColVector, inputColVector, 0);
+      } else {
+        outputIsNull[0] = true;
+        outputColVector.noNulls = false;
+      }
+      outputColVector.isRepeating = true;
+      return;
+    }
+
+    if (inputColVector.noNulls) {
+      if (batch.selectedInUse) {
+
+        // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+        if (!outputColVector.noNulls) {
+          for(int j = 0; j != n; j++) {
+           final int i = sel[j];
+           // Set isNull before call in case it changes it mind.
+           outputIsNull[i] = false;
+           evaluate(outputColVector, inputColVector, i);
+         }
+        } else {
+          for(int j = 0; j != n; j++) {
+            final int i = sel[j];
+            evaluate(outputColVector, inputColVector, i);
+          }
         }
-        outV.isRepeating = false;
       } else {
+        if (!outputColVector.noNulls) {
+
+          // Assume it is almost always a performance win to fill all of isNull so we can
+          // safely reset noNulls.
+          Arrays.fill(outputIsNull, false);
+          outputColVector.noNulls = true;
+        }
         for(int i = 0; i != n; i++) {
-          evaluate(outV, inV, i);
+          evaluate(outputColVector, inputColVector, i);
         }
-        outV.isRepeating = false;
       }
-    } else {
-
-      // Handle case with nulls. Don't do function if the value is null,
-      // because the data may be undefined for a null value.
-      outV.noNulls = false;
-      if (inV.isRepeating) {
-        outV.isRepeating = true;
-        outV.isNull[0] = inV.isNull[0];
-        if (!inV.isNull[0]) {
-          evaluate(outV, inV, 0);
-        }
-      } else if (batch.selectedInUse) {
+    } else /* there are NULLs in the inputColVector */ {
+
+      // Carefully handle NULLs...
+
+      outputColVector.noNulls = false;
+
+      if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
           int i = sel[j];
-          outV.isNull[i] = inV.isNull[i];
-          if (!inV.isNull[i]) {
-            evaluate(outV, inV, i);
+          // Set isNull before call in case it changes it mind.
+          outputColVector.isNull[i] = inputColVector.isNull[i];
+          if (!inputColVector.isNull[i]) {
+            evaluate(outputColVector, inputColVector, i);
           }
         }
-        outV.isRepeating = false;
       } else {
-        System.arraycopy(inV.isNull, 0, outV.isNull, 0, n);
+        // Set isNull before calls in case they change their mind.
+        System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n);
         for(int i = 0; i != n; i++) {
-          if (!inV.isNull[i]) {
-            evaluate(outV, inV, i);
+          if (!inputColVector.isNull[i]) {
+            evaluate(outputColVector, inputColVector, i);
           }
         }
-        outV.isRepeating = false;
       }
     }
   }
 
-  private void evaluate(IntervalDayTimeColumnVector outV, BytesColumnVector inV, int i) {
+  private void evaluate(IntervalDayTimeColumnVector outputColVector, BytesColumnVector inputColVector, int i) {
     try {
       HiveIntervalDayTime interval = HiveIntervalDayTime.valueOf(
-          new String(inV.vector[i], inV.start[i], inV.length[i], "UTF-8"));
-      outV.set(i, interval);
+          new String(inputColVector.vector[i], inputColVector.start[i], inputColVector.length[i], "UTF-8"));
+      outputColVector.set(i, interval);
     } catch (Exception e) {
-      outV.setNullValue(i);
-      outV.isNull[i] = true;
-      outV.noNulls = false;
+      outputColVector.setNullValue(i);
+      outputColVector.isNull[i] = true;
+      outputColVector.noNulls = false;
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalYearMonth.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalYearMonth.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalYearMonth.java
index 09dd4d9..598113f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalYearMonth.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalYearMonth.java
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.hive.ql.exec.vector.expressions;
 
+import java.util.Arrays;
+
 import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
@@ -53,10 +55,13 @@ public class CastStringToIntervalYearMonth extends VectorExpression {
       super.evaluateChildren(batch);
     }
 
-    BytesColumnVector inV = (BytesColumnVector) batch.cols[inputColumn];
+    BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[inputColumn];
     int[] sel = batch.selected;
     int n = batch.size;
-    LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum];
+    LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
+
+    boolean[] inputIsNull = inputColVector.isNull;
+    boolean[] outputIsNull = outputColVector.isNull;
 
     if (n == 0) {
 
@@ -64,64 +69,96 @@ public class CastStringToIntervalYearMonth extends VectorExpression {
       return;
     }
 
-    if (inV.noNulls) {
-      outV.noNulls = true;
-      if (inV.isRepeating) {
-        outV.isRepeating = true;
-        evaluate(outV, inV, 0);
-      } else if (batch.selectedInUse) {
-        for(int j = 0; j != n; j++) {
-          int i = sel[j];
-          evaluate(outV, inV, i);
+    // We do not need to do a column reset since we are carefully changing the output.
+    outputColVector.isRepeating = false;
+
+    if (inputColVector.isRepeating) {
+      if (inputColVector.noNulls || !inputIsNull[0]) {
+        // Set isNull before call in case it changes it mind.
+        outputIsNull[0] = false;
+        evaluate(outputColVector, inputColVector, 0);
+      } else {
+        outputIsNull[0] = true;
+        outputColVector.noNulls = false;
+      }
+      outputColVector.isRepeating = true;
+      return;
+    }
+
+    if (inputColVector.noNulls) {
+      if (batch.selectedInUse) {
+
+        // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+        if (!outputColVector.noNulls) {
+          for(int j = 0; j != n; j++) {
+           final int i = sel[j];
+           // Set isNull before call in case it changes it mind.
+           outputIsNull[i] = false;
+           evaluate(outputColVector, inputColVector, i);
+         }
+        } else {
+          for(int j = 0; j != n; j++) {
+            final int i = sel[j];
+            evaluate(outputColVector, inputColVector, i);
+          }
         }
-        outV.isRepeating = false;
       } else {
+        if (!outputColVector.noNulls) {
+
+          // Assume it is almost always a performance win to fill all of isNull so we can
+          // safely reset noNulls.
+          Arrays.fill(outputIsNull, false);
+          outputColVector.noNulls = true;
+        }
         for(int i = 0; i != n; i++) {
-          evaluate(outV, inV, i);
+          evaluate(outputColVector, inputColVector, i);
         }
-        outV.isRepeating = false;
       }
-    } else {
-
-      // Handle case with nulls. Don't do function if the value is null,
-      // because the data may be undefined for a null value.
-      outV.noNulls = false;
-      if (inV.isRepeating) {
-        outV.isRepeating = true;
-        outV.isNull[0] = inV.isNull[0];
-        if (!inV.isNull[0]) {
-          evaluate(outV, inV, 0);
-        }
-      } else if (batch.selectedInUse) {
+    } else /* there are NULLs in the inputColVector */ {
+
+      /*
+       * Do careful maintenance of the outputColVector.noNulls flag.
+       */
+
+      if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
           int i = sel[j];
-          outV.isNull[i] = inV.isNull[i];
-          if (!inV.isNull[i]) {
-            evaluate(outV, inV, i);
+          if (!inputColVector.isNull[i]) {
+            // Set isNull before call in case it changes it mind.
+            outputIsNull[i] = false;
+            evaluate(outputColVector, inputColVector, i);
+          } else {
+            outputIsNull[i] = true;
+            outputColVector.noNulls = false;
           }
         }
-        outV.isRepeating = false;
       } else {
-        System.arraycopy(inV.isNull, 0, outV.isNull, 0, n);
+        // Set isNull before calls in case they change their mind.
+        System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n);
         for(int i = 0; i != n; i++) {
-          if (!inV.isNull[i]) {
-            evaluate(outV, inV, i);
+          if (!inputColVector.isNull[i]) {
+            // Set isNull before call in case it changes it mind.
+            outputIsNull[i] = false;
+            evaluate(outputColVector, inputColVector, i);
+          } else {
+            outputIsNull[i] = true;
+            outputColVector.noNulls = false;
           }
         }
-        outV.isRepeating = false;
       }
     }
   }
 
-  private void evaluate(LongColumnVector outV, BytesColumnVector inV, int i) {
+  private void evaluate(LongColumnVector outputColVector, BytesColumnVector inputColVector, int i) {
     try {
       HiveIntervalYearMonth interval = HiveIntervalYearMonth.valueOf(
-          new String(inV.vector[i], inV.start[i], inV.length[i], "UTF-8"));
-      outV.vector[i] = interval.getTotalMonths();
+          new String(inputColVector.vector[i], inputColVector.start[i], inputColVector.length[i], "UTF-8"));
+      outputColVector.vector[i] = interval.getTotalMonths();
     } catch (Exception e) {
-      outV.vector[i] = 1;
-      outV.isNull[i] = true;
-      outV.noNulls = false;
+      outputColVector.vector[i] = 1;
+      outputColVector.isNull[i] = true;
+      outputColVector.noNulls = false;
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToLong.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToLong.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToLong.java
index a6cfee8..e3da77e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToLong.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToLong.java
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.hive.ql.exec.vector.expressions;
 
+import java.util.Arrays;
+
 import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
@@ -70,11 +72,11 @@ public class CastStringToLong extends VectorExpression {
   /**
    * Convert input string to a long, at position i in the respective vectors.
    */
-  protected void func(LongColumnVector outV, BytesColumnVector inV, int batchIndex) {
+  protected void func(LongColumnVector outputColVector, BytesColumnVector inputColVector, int batchIndex) {
 
-    byte[] bytes = inV.vector[batchIndex];
-    final int start = inV.start[batchIndex];
-    final int length = inV.length[batchIndex];
+    byte[] bytes = inputColVector.vector[batchIndex];
+    final int start = inputColVector.start[batchIndex];
+    final int length = inputColVector.length[batchIndex];
     try {
 
       switch (integerPrimitiveCategory) {
@@ -90,8 +92,8 @@ public class CastStringToLong extends VectorExpression {
               booleanValue = true;
             } else {
               // No boolean value match for 4 char field.
-              outV.noNulls = false;
-              outV.isNull[batchIndex] = true;
+              outputColVector.noNulls = false;
+              outputColVector.isNull[batchIndex] = true;
               return;
             }
           } else if (length == 5) {
@@ -103,8 +105,8 @@ public class CastStringToLong extends VectorExpression {
               booleanValue = false;
             } else {
               // No boolean value match for 5 char field.
-              outV.noNulls = false;
-              outV.isNull[batchIndex] = true;
+              outputColVector.noNulls = false;
+              outputColVector.isNull[batchIndex] = true;
               return;
             }
           } else if (length == 1) {
@@ -115,50 +117,50 @@ public class CastStringToLong extends VectorExpression {
               booleanValue = false;
             } else {
               // No boolean value match for extended 1 char field.
-              outV.noNulls = false;
-              outV.isNull[batchIndex] = true;
+              outputColVector.noNulls = false;
+              outputColVector.isNull[batchIndex] = true;
               return;
             }
           } else {
             // No boolean value match for other lengths.
-            outV.noNulls = false;
-            outV.isNull[batchIndex] = true;
+            outputColVector.noNulls = false;
+            outputColVector.isNull[batchIndex] = true;
             return;
           }
-          outV.vector[batchIndex] = (booleanValue ? 1 : 0);
+          outputColVector.vector[batchIndex] = (booleanValue ? 1 : 0);
         }
         break;
       case BYTE:
         if (!LazyUtils.isNumberMaybe(bytes, start, length)) {
-          outV.noNulls = false;
-          outV.isNull[batchIndex] = true;
+          outputColVector.noNulls = false;
+          outputColVector.isNull[batchIndex] = true;
           return;
         }
-        outV.vector[batchIndex] = LazyByte.parseByte(bytes, start, length, 10);
+        outputColVector.vector[batchIndex] = LazyByte.parseByte(bytes, start, length, 10);
         break;
       case SHORT:
         if (!LazyUtils.isNumberMaybe(bytes, start, length)) {
-          outV.noNulls = false;
-          outV.isNull[batchIndex] = true;
+          outputColVector.noNulls = false;
+          outputColVector.isNull[batchIndex] = true;
           return;
         }
-        outV.vector[batchIndex] = LazyShort.parseShort(bytes, start, length, 10);
+        outputColVector.vector[batchIndex] = LazyShort.parseShort(bytes, start, length, 10);
         break;
       case INT:
         if (!LazyUtils.isNumberMaybe(bytes, start, length)) {
-          outV.noNulls = false;
-          outV.isNull[batchIndex] = true;
+          outputColVector.noNulls = false;
+          outputColVector.isNull[batchIndex] = true;
           return;
         }
-        outV.vector[batchIndex] = LazyInteger.parseInt(bytes, start, length, 10);
+        outputColVector.vector[batchIndex] = LazyInteger.parseInt(bytes, start, length, 10);
         break;
       case LONG:
         if (!LazyUtils.isNumberMaybe(bytes, start, length)) {
-          outV.noNulls = false;
-          outV.isNull[batchIndex] = true;
+          outputColVector.noNulls = false;
+          outputColVector.isNull[batchIndex] = true;
           return;
         }
-        outV.vector[batchIndex] = LazyLong.parseLong(bytes, start, length, 10);
+        outputColVector.vector[batchIndex] = LazyLong.parseLong(bytes, start, length, 10);
         break;
       default:
         throw new Error("Unexpected primitive category " + integerPrimitiveCategory);
@@ -166,8 +168,8 @@ public class CastStringToLong extends VectorExpression {
     } catch (Exception e) {
 
       // for any exception in conversion to integer, produce NULL
-      outV.noNulls = false;
-      outV.isNull[batchIndex] = true;
+      outputColVector.noNulls = false;
+      outputColVector.isNull[batchIndex] = true;
     }
   }
 
@@ -178,10 +180,13 @@ public class CastStringToLong extends VectorExpression {
       super.evaluateChildren(batch);
     }
 
-    BytesColumnVector inV = (BytesColumnVector) batch.cols[inputColumn];
+    BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[inputColumn];
     int[] sel = batch.selected;
     int n = batch.size;
-    LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum];
+    LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
+
+    boolean[] inputIsNull = inputColVector.isNull;
+    boolean[] outputIsNull = outputColVector.isNull;
 
     if (n == 0) {
 
@@ -189,51 +194,81 @@ public class CastStringToLong extends VectorExpression {
       return;
     }
 
-    if (inV.noNulls) {
-      outV.noNulls = true;
-      if (inV.isRepeating) {
-        outV.isRepeating = true;
-        func(outV, inV, 0);
-      } else if (batch.selectedInUse) {
-        for(int j = 0; j != n; j++) {
-          int i = sel[j];
-          func(outV, inV, i);
+    // We do not need to do a column reset since we are carefully changing the output.
+    outputColVector.isRepeating = false;
+
+    if (inputColVector.isRepeating) {
+      if (inputColVector.noNulls || !inputIsNull[0]) {
+        // Set isNull before call in case it changes it mind.
+        outputIsNull[0] = false;
+        func(outputColVector, inputColVector, 0);
+      } else {
+        outputIsNull[0] = true;
+        outputColVector.noNulls = false;
+      }
+      outputColVector.isRepeating = true;
+      return;
+    }
+
+    if (inputColVector.noNulls) {
+      if (batch.selectedInUse) {
+
+        // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+        if (!outputColVector.noNulls) {
+          for(int j = 0; j != n; j++) {
+           final int i = sel[j];
+           // Set isNull before call in case it changes it mind.
+           outputIsNull[i] = false;
+           func(outputColVector, inputColVector, i);
+         }
+        } else {
+          for(int j = 0; j != n; j++) {
+            final int i = sel[j];
+            func(outputColVector, inputColVector, i);
+          }
         }
-        outV.isRepeating = false;
       } else {
+        if (!outputColVector.noNulls) {
+
+          // Assume it is almost always a performance win to fill all of isNull so we can
+          // safely reset noNulls.
+          Arrays.fill(outputIsNull, false);
+          outputColVector.noNulls = true;
+        }
         for(int i = 0; i != n; i++) {
-          func(outV, inV, i);
+          func(outputColVector, inputColVector, i);
         }
-        outV.isRepeating = false;
       }
-    } else {
+    } else  /* there are NULLs in the inputColVector */ {
 
-      // Handle case with nulls. Don't do function if the value is null,
-      // because the data may be undefined for a null value.
-      outV.noNulls = false;
-      if (inV.isRepeating) {
-        outV.isRepeating = true;
-        outV.isNull[0] = inV.isNull[0];
-        if (!inV.isNull[0]) {
-          func(outV, inV, 0);
-        }
-      } else if (batch.selectedInUse) {
+      /*
+       * Do careful maintenance of the outputColVector.noNulls flag.
+       */
+
+      if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
           int i = sel[j];
-          outV.isNull[i] = inV.isNull[i];
-          if (!inV.isNull[i]) {
-            func(outV, inV, i);
+          if (!inputColVector.isNull[i]) {
+            // Set isNull before call in case it changes it mind.
+            outputColVector.isNull[i] = false;
+            func(outputColVector, inputColVector, i);
+          } else {
+            outputColVector.isNull[i] = true;
+            outputColVector.noNulls = false;
           }
         }
-        outV.isRepeating = false;
       } else {
-        System.arraycopy(inV.isNull, 0, outV.isNull, 0, n);
         for(int i = 0; i != n; i++) {
-          if (!inV.isNull[i]) {
-            func(outV, inV, i);
+          if (!inputColVector.isNull[i]) {
+            // Set isNull before call in case it changes it mind.
+            outputColVector.isNull[i] = false;
+            func(outputColVector, inputColVector, i);
+          } else {
+            outputColVector.isNull[i] = true;
+            outputColVector.noNulls = false;
           }
         }
-        outV.isRepeating = false;
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java
index 1231cda..1836131 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.hive.ql.exec.vector.expressions;
 
+import java.util.Arrays;
+
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.MathExpr;
 import org.apache.hadoop.hive.ql.exec.vector.*;
@@ -56,7 +58,6 @@ public class CastTimestampToBoolean extends VectorExpression {
     int[] sel = batch.selected;
     boolean[] inputIsNull = inputColVector.isNull;
     boolean[] outputIsNull = outputColVector.isNull;
-    outputColVector.noNulls = inputColVector.noNulls;
     int n = batch.size;
     long[] outputVector = outputColVector.vector;
 
@@ -65,39 +66,51 @@ public class CastTimestampToBoolean extends VectorExpression {
       return;
     }
 
+    // We do not need to do a column reset since we are carefully changing the output.
+    outputColVector.isRepeating = false;
+
     if (inputColVector.isRepeating) {
-      //All must be selected otherwise size would be zero
-      //Repeating property will not change.
-      outputVector[0] =  toBool(inputColVector, 0);
-      // Even if there are no nulls, we always copy over entry 0. Simplifies code.
-      outputIsNull[0] = inputIsNull[0];
+      if (inputColVector.noNulls || !inputIsNull[0]) {
+        outputIsNull[0] = false;
+        outputVector[0] = toBool(inputColVector, 0);
+      } else {
+        outputIsNull[0] = true;
+        outputColVector.noNulls = false;
+      }
       outputColVector.isRepeating = true;
-    } else if (inputColVector.noNulls) {
+      return;
+    }
+
+    if (inputColVector.noNulls) {
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
           int i = sel[j];
-          outputVector[i] =  toBool(inputColVector, i);
+          outputIsNull[i] = false;
+          outputVector[i] = toBool(inputColVector, i);
         }
       } else {
+        Arrays.fill(outputIsNull, 0, n, false);
         for(int i = 0; i != n; i++) {
           outputVector[i] =  toBool(inputColVector, i);
         }
       }
-      outputColVector.isRepeating = false;
-    } else /* there are nulls */ {
+    } else /* there are NULLs in the inputColVector */ {
+
+      // Carefully handle NULLs...
+      outputColVector.noNulls = false;
+
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
           int i = sel[j];
-          outputVector[i] =  toBool(inputColVector, i);
           outputIsNull[i] = inputIsNull[i];
+          outputVector[i] =  toBool(inputColVector, i);
         }
       } else {
+        System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
         for(int i = 0; i != n; i++) {
           outputVector[i] =  toBool(inputColVector, i);
         }
-        System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
       }
-      outputColVector.isRepeating = false;
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java
index e696455..c11797b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.hive.ql.exec.vector.expressions;
 
+import java.util.Arrays;
+
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
 import org.apache.hadoop.hive.ql.exec.vector.*;
 import org.apache.hadoop.hive.serde2.io.TimestampWritable;
@@ -51,7 +53,6 @@ public class CastTimestampToDouble extends VectorExpression {
     int[] sel = batch.selected;
     boolean[] inputIsNull = inputColVector.isNull;
     boolean[] outputIsNull = outputColVector.isNull;
-    outputColVector.noNulls = inputColVector.noNulls;
     int n = batch.size;
     double[] outputVector = outputColVector.vector;
 
@@ -60,39 +61,79 @@ public class CastTimestampToDouble extends VectorExpression {
       return;
     }
 
+    // We do not need to do a column reset since we are carefully changing the output.
+    outputColVector.isRepeating = false;
+
     if (inputColVector.isRepeating) {
-      //All must be selected otherwise size would be zero
-      //Repeating property will not change.
-      outputVector[0] = inputColVector.getDouble(0);
-      // Even if there are no nulls, we always copy over entry 0. Simplifies code.
-      outputIsNull[0] = inputIsNull[0];
+      if (inputColVector.noNulls || !inputIsNull[0]) {
+        outputIsNull[0] = false;
+        outputVector[0] = inputColVector.getDouble(0);
+      } else {
+        outputIsNull[0] = true;
+        outputColVector.noNulls = false;
+      }
       outputColVector.isRepeating = true;
-    } else if (inputColVector.noNulls) {
+      return;
+    }
+
+    if (inputColVector.noNulls) {
       if (batch.selectedInUse) {
-        for(int j = 0; j != n; j++) {
-          int i = sel[j];
-          outputVector[i] =  inputColVector.getDouble(i);
+
+        // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+        if (!outputColVector.noNulls) {
+          for(int j = 0; j != n; j++) {
+           final int i = sel[j];
+           // Set isNull before call in case it changes it mind.
+           outputIsNull[i] = false;
+           outputVector[i] =  inputColVector.getDouble(i);
+         }
+        } else {
+          for(int j = 0; j != n; j++) {
+            final int i = sel[j];
+            outputVector[i] =  inputColVector.getDouble(i);
+          }
         }
       } else {
+        if (!outputColVector.noNulls) {
+
+          // Assume it is almost always a performance win to fill all of isNull so we can
+          // safely reset noNulls.
+          Arrays.fill(outputIsNull, false);
+          outputColVector.noNulls = true;
+        }
         for(int i = 0; i != n; i++) {
           outputVector[i] =  inputColVector.getDouble(i);
         }
       }
-      outputColVector.isRepeating = false;
-    } else /* there are nulls */ {
+    } else /* there are NULLs in the inputColVector */ {
+
+      /*
+       * Do careful maintenance of the outputColVector.noNulls flag.
+       */
+
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
           int i = sel[j];
-          outputVector[i] = inputColVector.getDouble(i);
-          outputIsNull[i] = inputIsNull[i];
+          if (!inputIsNull[i]) {
+            outputIsNull[i] = false;
+            outputVector[i] = inputColVector.getDouble(i);
+          } else {
+            outputIsNull[i] = true;
+            outputColVector.noNulls = false;
+          }
         }
       } else {
         for(int i = 0; i != n; i++) {
-          outputVector[i] = inputColVector.getDouble(i);
+          if (!inputIsNull[i]) {
+            outputIsNull[i] = false;
+            outputVector[i] = inputColVector.getDouble(i);
+          } else {
+            outputIsNull[i] = true;
+            outputColVector.noNulls = false;
+          }
         }
-        System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
       }
-      outputColVector.isRepeating = false;
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java
index 36b9f13..a0f0927 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.hive.ql.exec.vector.expressions;
 
+import java.util.Arrays;
+
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.MathExpr;
 import org.apache.hadoop.hive.ql.exec.vector.*;
@@ -48,7 +50,6 @@ public class CastTimestampToLong extends VectorExpression {
     int[] sel = batch.selected;
     boolean[] inputIsNull = inputColVector.isNull;
     boolean[] outputIsNull = outputColVector.isNull;
-    outputColVector.noNulls = inputColVector.noNulls;
     int n = batch.size;
     long[] outputVector = outputColVector.vector;
 
@@ -57,39 +58,79 @@ public class CastTimestampToLong extends VectorExpression {
       return;
     }
 
+    // We do not need to do a column reset since we are carefully changing the output.
+    outputColVector.isRepeating = false;
+
     if (inputColVector.isRepeating) {
-      //All must be selected otherwise size would be zero
-      //Repeating property will not change.
-      outputVector[0] =  inputColVector.getTimestampAsLong(0);
-      // Even if there are no nulls, we always copy over entry 0. Simplifies code.
-      outputIsNull[0] = inputIsNull[0];
+      if (inputColVector.noNulls || !inputIsNull[0]) {
+        outputIsNull[0] = false;
+        outputVector[0] = inputColVector.getTimestampAsLong(0);
+      } else {
+        outputIsNull[0] = true;
+        outputColVector.noNulls = false;
+      }
       outputColVector.isRepeating = true;
-    } else if (inputColVector.noNulls) {
+      return;
+    }
+
+    if (inputColVector.noNulls) {
       if (batch.selectedInUse) {
-        for(int j = 0; j != n; j++) {
-          int i = sel[j];
-          outputVector[i] =  inputColVector.getTimestampAsLong(i);
+
+        // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+        if (!outputColVector.noNulls) {
+          for(int j = 0; j != n; j++) {
+           final int i = sel[j];
+           // Set isNull before call in case it changes it mind.
+           outputIsNull[i] = false;
+           outputVector[i] =  inputColVector.getTimestampAsLong(i);
+         }
+        } else {
+          for(int j = 0; j != n; j++) {
+            final int i = sel[j];
+            outputVector[i] =  inputColVector.getTimestampAsLong(i);
+          }
         }
       } else {
+        if (!outputColVector.noNulls) {
+
+          // Assume it is almost always a performance win to fill all of isNull so we can
+          // safely reset noNulls.
+          Arrays.fill(outputIsNull, false);
+          outputColVector.noNulls = true;
+        }
         for(int i = 0; i != n; i++) {
           outputVector[i] =  inputColVector.getTimestampAsLong(i);
         }
       }
-      outputColVector.isRepeating = false;
-    } else /* there are nulls */ {
+    } else /* there are NULLs in the inputColVector */ {
+
+      /*
+       * Do careful maintenance of the outputColVector.noNulls flag.
+       */
+
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
           int i = sel[j];
-          outputVector[i] =  inputColVector.getTimestampAsLong(i);
-          outputIsNull[i] = inputIsNull[i];
+          if (!inputIsNull[i]) {
+            inputIsNull[i] = false;
+            outputVector[i] =  inputColVector.getTimestampAsLong(i);
+          } else {
+            inputIsNull[i] = true;
+            outputColVector.noNulls = false;
+          }
         }
       } else {
         for(int i = 0; i != n; i++) {
-          outputVector[i] =  inputColVector.getTimestampAsLong(i);
+          if (!inputIsNull[i]) {
+            inputIsNull[i] = false;
+            outputVector[i] =  inputColVector.getTimestampAsLong(i);
+          } else {
+            inputIsNull[i] = true;
+            outputColVector.noNulls = false;
+          }
         }
-        System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
       }
-      outputColVector.isRepeating = false;
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java
index 127e431..6fb29a8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java
@@ -17,6 +17,8 @@
  */
 package org.apache.hadoop.hive.ql.exec.vector.expressions;
 
+import java.util.Arrays;
+
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
@@ -66,104 +68,121 @@ public class ColAndCol extends VectorExpression {
       return;
     }
 
+    boolean[] outputIsNull = outV.isNull;
+
+    // We do not need to do a column reset since we are carefully changing the output.
+    outV.isRepeating = false;
+
     long vector1Value = vector1[0];
     long vector2Value = vector2[0];
     if (inputColVector1.noNulls && inputColVector2.noNulls) {
+
       if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) {
         // All must be selected otherwise size would be zero
         // Repeating property will not change.
         outV.isRepeating = true;
+        outputIsNull[0] = false;
         outputVector[0] = vector1[0] & vector2[0];
       } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) {
         if (batch.selectedInUse) {
           for (int j = 0; j != n; j++) {
             int i = sel[j];
+            outputIsNull[i] = false;
             outputVector[i] = vector1Value & vector2[i];
           }
         } else {
+          Arrays.fill(outputIsNull, 0, n, false);
           for (int i = 0; i != n; i++) {
             outputVector[i] = vector1Value & vector2[i];
           }
         }
-        outV.isRepeating = false;
       } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) {
         if (batch.selectedInUse) {
           for (int j = 0; j != n; j++) {
             int i = sel[j];
+            outputIsNull[i] = false;
             outputVector[i] = vector1[i] & vector2Value;
           }
         } else {
+          Arrays.fill(outputIsNull, 0, n, false);
           for (int i = 0; i != n; i++) {
             outputVector[i] = vector1[i] & vector2Value;
           }
         }
-        outV.isRepeating = false;
       } else /* neither side is repeating */{
         if (batch.selectedInUse) {
           for (int j = 0; j != n; j++) {
             int i = sel[j];
+            outputIsNull[i] = false;
             outputVector[i] = vector1[i] & vector2[i];
           }
         } else {
+          Arrays.fill(outputIsNull, 0, n, false);
           for (int i = 0; i != n; i++) {
             outputVector[i] = vector1[i] & vector2[i];
           }
         }
-        outV.isRepeating = false;
       }
-      outV.noNulls = true;
-    } else if (inputColVector1.noNulls && !inputColVector2.noNulls) {
+      return;
+    }
+
+    // Carefully handle NULLs...
+
+    /*
+     * For better performance on LONG/DOUBLE we don't want the conditional
+     * statements inside the for loop.
+     */
+    outV.noNulls = false;
+
+    if (inputColVector1.noNulls && !inputColVector2.noNulls) {
       // only input 2 side has nulls
       if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) {
         // All must be selected otherwise size would be zero
         // Repeating property will not change.
         outV.isRepeating = true;
         outputVector[0] = vector1[0] & vector2[0];
-        outV.isNull[0] = (vector1[0] == 1) && inputColVector2.isNull[0];
+        outputIsNull[0] = (vector1[0] == 1) && inputColVector2.isNull[0];
       } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) {
         if (batch.selectedInUse) {
           for (int j = 0; j != n; j++) {
             int i = sel[j];
             outputVector[i] = vector1Value & vector2[i];
-            outV.isNull[i] = (vector1[0] == 1) && inputColVector2.isNull[i];
+            outputIsNull[i] = (vector1[0] == 1) && inputColVector2.isNull[i];
           }
         } else {
           for (int i = 0; i != n; i++) {
             outputVector[i] = vector1Value & vector2[i];
-            outV.isNull[i] = (vector1[0] == 1) && inputColVector2.isNull[i];
+            outputIsNull[i] = (vector1[0] == 1) && inputColVector2.isNull[i];
           }
         }
-        outV.isRepeating = false;
       } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) {
         if (batch.selectedInUse) {
           for (int j = 0; j != n; j++) {
             int i = sel[j];
             outputVector[i] = vector1[i] & vector2Value;
-            outV.isNull[i] = (vector1[i] == 1) && inputColVector2.isNull[0];
+            outputIsNull[i] = (vector1[i] == 1) && inputColVector2.isNull[0];
           }
         } else {
+
           for (int i = 0; i != n; i++) {
             outputVector[i] = vector1[i] & vector2Value;
-            outV.isNull[i] = (vector1[i] == 1) && inputColVector2.isNull[0];
+            outputIsNull[i] = (vector1[i] == 1) && inputColVector2.isNull[0];
           }
         }
-        outV.isRepeating = false;
       } else /* neither side is repeating */{
         if (batch.selectedInUse) {
           for (int j = 0; j != n; j++) {
             int i = sel[j];
             outputVector[i] = vector1[i] & vector2[i];
-            outV.isNull[i] = (vector1[i] == 1) && inputColVector2.isNull[i];
+            outputIsNull[i] = (vector1[i] == 1) && inputColVector2.isNull[i];
           }
         } else {
           for (int i = 0; i != n; i++) {
             outputVector[i] = vector1[i] & vector2[i];
-            outV.isNull[i] = (vector1[i] == 1) && inputColVector2.isNull[i];
+            outputIsNull[i] = (vector1[i] == 1) && inputColVector2.isNull[i];
           }
         }
-        outV.isRepeating = false;
       }
-      outV.noNulls = false;
     } else if (!inputColVector1.noNulls && inputColVector2.noNulls) {
       // only input 1 side has nulls
       if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) {
@@ -171,49 +190,46 @@ public class ColAndCol extends VectorExpression {
         // Repeating property will not change.
         outV.isRepeating = true;
         outputVector[0] = vector1[0] & vector2[0];
-        outV.isNull[0] = inputColVector1.isNull[0] && (vector2[0] == 1);
+        outputIsNull[0] = inputColVector1.isNull[0] && (vector2[0] == 1);
       } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) {
         if (batch.selectedInUse) {
           for (int j = 0; j != n; j++) {
             int i = sel[j];
             outputVector[i] = vector1Value & vector2[i];
-            outV.isNull[i] = inputColVector1.isNull[0] && (vector2[i] == 1);
+            outputIsNull[i] = inputColVector1.isNull[0] && (vector2[i] == 1);
           }
         } else {
           for (int i = 0; i != n; i++) {
             outputVector[i] = vector1Value & vector2[i];
-            outV.isNull[i] = inputColVector1.isNull[0] && (vector2[i] == 1);
+            outputIsNull[i] = inputColVector1.isNull[0] && (vector2[i] == 1);
           }
         }
-        outV.isRepeating = false;
       } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) {
         if (batch.selectedInUse) {
           for (int j = 0; j != n; j++) {
             int i = sel[j];
             outputVector[i] = vector1[i] & vector2Value;
-            outV.isNull[i] = inputColVector1.isNull[i] && (vector2[0] == 1);
+            outputIsNull[i] = inputColVector1.isNull[i] && (vector2[0] == 1);
           }
         } else {
           for (int i = 0; i != n; i++) {
             outputVector[i] = vector1[i] & vector2Value;
-            outV.isNull[i] = inputColVector1.isNull[i] && (vector2[0] == 1);
+            outputIsNull[i] = inputColVector1.isNull[i] && (vector2[0] == 1);
           }
         }
-        outV.isRepeating = false;
       } else /* neither side is repeating */{
         if (batch.selectedInUse) {
           for (int j = 0; j != n; j++) {
             int i = sel[j];
             outputVector[i] = vector1[i] & vector2[i];
-            outV.isNull[i] = inputColVector1.isNull[i] && (vector2[i] == 1);
+            outputIsNull[i] = inputColVector1.isNull[i] && (vector2[i] == 1);
           }
         } else {
           for (int i = 0; i != n; i++) {
             outputVector[i] = vector1[i] & vector2[i];
-            outV.isNull[i] = inputColVector1.isNull[i] && (vector2[i] == 1);
+            outputIsNull[i] = inputColVector1.isNull[i] && (vector2[i] == 1);
           }
         }
-        outV.isRepeating = false;
       }
       outV.noNulls = false;
     } else /* !inputColVector1.noNulls && !inputColVector2.noNulls */{
@@ -223,7 +239,7 @@ public class ColAndCol extends VectorExpression {
         // Repeating property will not change.
         outV.isRepeating = true;
         outputVector[0] = vector1[0] & vector2[0];
-        outV.isNull[0] = ((vector1[0] == 1) && inputColVector2.isNull[0])
+        outputIsNull[0] = ((vector1[0] == 1) && inputColVector2.isNull[0])
             || (inputColVector1.isNull[0] && (vector2[0] == 1))
             || (inputColVector1.isNull[0] && inputColVector2.isNull[0]);
       } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) {
@@ -231,32 +247,31 @@ public class ColAndCol extends VectorExpression {
           for (int j = 0; j != n; j++) {
             int i = sel[j];
             outputVector[i] = vector1Value & vector2[i];
-            outV.isNull[i] = ((vector1[0] == 1) && inputColVector2.isNull[i])
+            outputIsNull[i] = ((vector1[0] == 1) && inputColVector2.isNull[i])
                 || (inputColVector1.isNull[0] && (vector2[i] == 1))
                 || (inputColVector1.isNull[0] && inputColVector2.isNull[i]);
           }
         } else {
           for (int i = 0; i != n; i++) {
             outputVector[i] = vector1Value & vector2[i];
-            outV.isNull[i] = ((vector1[0] == 1) && inputColVector2.isNull[i])
+            outputIsNull[i] = ((vector1[0] == 1) && inputColVector2.isNull[i])
                 || (inputColVector1.isNull[0] && (vector2[i] == 1))
                 || (inputColVector1.isNull[0] && inputColVector2.isNull[i]);
           }
         }
-        outV.isRepeating = false;
       } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) {
         if (batch.selectedInUse) {
           for (int j = 0; j != n; j++) {
             int i = sel[j];
             outputVector[i] = vector1[i] & vector2Value;
-            outV.isNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[0])
+            outputIsNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[0])
                 || (inputColVector1.isNull[i] && (vector2[0] == 1))
                 || (inputColVector1.isNull[i] && inputColVector2.isNull[0]);
           }
         } else {
           for (int i = 0; i != n; i++) {
             outputVector[i] = vector1[i] & vector2Value;
-            outV.isNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[0])
+            outputIsNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[0])
                 || (inputColVector1.isNull[i] && (vector2[0] == 1))
                 || (inputColVector1.isNull[i] && inputColVector2.isNull[0]);
           }
@@ -267,21 +282,19 @@ public class ColAndCol extends VectorExpression {
           for (int j = 0; j != n; j++) {
             int i = sel[j];
             outputVector[i] = vector1[i] & vector2[i];
-            outV.isNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[i])
+            outputIsNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[i])
                 || (inputColVector1.isNull[i] && (vector2[i] == 1))
                 || (inputColVector1.isNull[i] && inputColVector2.isNull[i]);
           }
         } else {
           for (int i = 0; i != n; i++) {
             outputVector[i] = vector1[i] & vector2[i];
-            outV.isNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[i])
+            outputIsNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[i])
                 || (inputColVector1.isNull[i] && (vector2[i] == 1))
                 || (inputColVector1.isNull[i] && inputColVector2.isNull[i]);
           }
         }
-        outV.isRepeating = false;
       }
-      outV.noNulls = false;
     }
   }
 


Mime
View raw message