hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hashut...@apache.org
Subject svn commit: r1489091 [2/4] - in /hive/branches/vectorization/ql/src: java/org/apache/hadoop/hive/ql/exec/vector/ java/org/apache/hadoop/hive/ql/exec/vector/expressions/ java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/ java/org/apache/hadoop/...
Date Mon, 03 Jun 2013 18:20:24 GMT
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColMultiplyDoubleColumn.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColMultiplyDoubleColumn.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColMultiplyDoubleColumn.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColMultiplyDoubleColumn.java Mon Jun  3 18:20:22 2013
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
 
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
 import org.apache.hadoop.hive.ql.exec.vector.*;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 
@@ -47,7 +48,6 @@ public class DoubleColMultiplyDoubleColu
     int n = batch.size;
     double[] vector1 = inputColVector1.vector;
     double[] vector2 = inputColVector2.vector;
-
     double[] outputVector = outputColVector.vector;
     
     // return immediately if batch is empty
@@ -55,81 +55,19 @@ public class DoubleColMultiplyDoubleColu
       return;
     }
     
-    /* Set repeating property to false (the default).
-     * It will be set to true later if needed later.
-     */
-    outputColVector.isRepeating = false;
-
-    //Handle nulls first
-    if (inputColVector1.noNulls && !inputColVector2.noNulls) {
-      outputColVector.noNulls = false;
-      if (inputColVector2.isRepeating) {
-        //Output will also be repeating and null
-        outputColVector.isNull[0] = true;
-        outputColVector.isRepeating = true;
-        //return as no further processing is needed
-        return;
-      } else {
-        if (batch.selectedInUse) {
-          for(int j = 0; j != n; j++) {
-            int i = sel[j];
-            outputColVector.isNull[i] = inputColVector2.isNull[i];
-          }
-        } else {
-          for(int i = 0; i != n; i++) {
-            outputColVector.isNull[i] = inputColVector2.isNull[i];
-          }
-        }
-      }
-    } else if (!inputColVector1.noNulls && inputColVector2.noNulls) {
-      outputColVector.noNulls = false;
-      if (inputColVector1.isRepeating) {
-        //Output will also be repeating and null
-        outputColVector.isRepeating = true;
-        outputColVector.isNull[0] = true;
-        //return as no further processing is needed
-        return;
-      } else {
-        if (batch.selectedInUse) {
-          for(int j = 0; j != n; j++) {
-            int i = sel[j];
-            outputColVector.isNull[i] = inputColVector1.isNull[i];
-          }
-        } else {
-          for(int i = 0; i != n; i++) {
-            outputColVector.isNull[i] = inputColVector1.isNull[i];
-          }
-        }
-      }
-    } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) {
-      outputColVector.noNulls = false;
-      if (inputColVector1.isRepeating || inputColVector2.isRepeating) {
-        //Output will also be repeating and null
-        outputColVector.isRepeating = true;
-        outputColVector.isNull[0] = true;
-        //return as no further processing is needed
-        return;
-      } else {
-        if (batch.selectedInUse) {
-          for(int j = 0; j != n; j++) {
-            int i = sel[j];
-            outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i];
-          }
-        } else {
-          for(int i = 0; i != n; i++) {
-            outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i];
-          }
-        }
-      }
-    }
-
-
-    //Disregard nulls for processing
+    outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating;
+    
+    // Handle nulls first  
+    NullUtil.propagateNullsColCol(
+      inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse);
+          
+    /* Disregard nulls for processing. In other words,
+     * the arithmetic operation is performed even if one or 
+     * more inputs are null. This is to improve speed by avoiding
+     * conditional checks in the inner loop.
+     */ 
     if (inputColVector1.isRepeating && inputColVector2.isRepeating) { 
-      //All must be selected otherwise size would be zero
-      //Repeating property will not change.
       outputVector[0] = vector1[0] * vector2[0];
-      outputColVector.isRepeating = true;
     } else if (inputColVector1.isRepeating) {
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
@@ -154,7 +92,7 @@ public class DoubleColMultiplyDoubleColu
       }
     } else {
       if (batch.selectedInUse) {
-        for(int j=0; j != n; j++) {
+        for(int j = 0; j != n; j++) {
           int i = sel[j];
           outputVector[i] = vector1[i] * vector2[i];
         }
@@ -164,6 +102,14 @@ public class DoubleColMultiplyDoubleColu
         }
       }
     }
+    
+    /* For the case when the output can have null values, follow 
+     * the convention that the data values must be 1 for long and 
+     * NaN for double. This is to prevent possible later zero-divide errors
+     * in complex arithmetic expressions like col2 / (col1 - 1)
+     * in the case when some col1 entries are null.
+     */
+    NullUtil.setNullDataEntriesDouble(outputColVector, batch.selectedInUse, sel, n);
   }
 
   @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColMultiplyDoubleScalar.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColMultiplyDoubleScalar.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColMultiplyDoubleScalar.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColMultiplyDoubleScalar.java Mon Jun  3 18:20:22 2013
@@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.ve
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
 
 public class DoubleColMultiplyDoubleScalar extends VectorExpression {
   private int colNum;
@@ -47,6 +48,7 @@ public class DoubleColMultiplyDoubleScal
     boolean[] inputIsNull = inputColVector.isNull;
     boolean[] outputIsNull = outputColVector.isNull;
     outputColVector.noNulls = inputColVector.noNulls;
+    outputColVector.isRepeating = inputColVector.isRepeating;
     int n = batch.size;
     double[] vector = inputColVector.vector;
     double[] outputVector = outputColVector.vector;
@@ -57,15 +59,13 @@ public class DoubleColMultiplyDoubleScal
     }
 
     if (inputColVector.isRepeating) {
-      //All must be selected otherwise size would be zero
-      //Repeating property will not change.
       outputVector[0] = vector[0] * value;
+      
       // Even if there are no nulls, we always copy over entry 0. Simplifies code.
       outputIsNull[0] = inputIsNull[0]; 
-      outputColVector.isRepeating = true;
     } else if (inputColVector.noNulls) {
       if (batch.selectedInUse) {
-        for(int j=0; j != n; j++) {
+        for(int j = 0; j != n; j++) {
           int i = sel[j];
           outputVector[i] = vector[i] * value;
         }
@@ -74,10 +74,9 @@ public class DoubleColMultiplyDoubleScal
           outputVector[i] = vector[i] * value;
         }
       }
-      outputColVector.isRepeating = false;
     } else /* there are nulls */ {
       if (batch.selectedInUse) {
-        for(int j=0; j != n; j++) {
+        for(int j = 0; j != n; j++) {
           int i = sel[j];
           outputVector[i] = vector[i] * value;
           outputIsNull[i] = inputIsNull[i];
@@ -88,8 +87,9 @@ public class DoubleColMultiplyDoubleScal
         }
         System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
       }
-      outputColVector.isRepeating = false;
     }
+    
+    NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n);
   }
 
   @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColMultiplyLongColumn.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColMultiplyLongColumn.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColMultiplyLongColumn.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColMultiplyLongColumn.java Mon Jun  3 18:20:22 2013
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
 
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
 import org.apache.hadoop.hive.ql.exec.vector.*;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 
@@ -47,7 +48,6 @@ public class DoubleColMultiplyLongColumn
     int n = batch.size;
     double[] vector1 = inputColVector1.vector;
     long[] vector2 = inputColVector2.vector;
-
     double[] outputVector = outputColVector.vector;
     
     // return immediately if batch is empty
@@ -55,81 +55,19 @@ public class DoubleColMultiplyLongColumn
       return;
     }
     
-    /* Set repeating property to false (the default).
-     * It will be set to true later if needed later.
-     */
-    outputColVector.isRepeating = false;
-
-    //Handle nulls first
-    if (inputColVector1.noNulls && !inputColVector2.noNulls) {
-      outputColVector.noNulls = false;
-      if (inputColVector2.isRepeating) {
-        //Output will also be repeating and null
-        outputColVector.isNull[0] = true;
-        outputColVector.isRepeating = true;
-        //return as no further processing is needed
-        return;
-      } else {
-        if (batch.selectedInUse) {
-          for(int j = 0; j != n; j++) {
-            int i = sel[j];
-            outputColVector.isNull[i] = inputColVector2.isNull[i];
-          }
-        } else {
-          for(int i = 0; i != n; i++) {
-            outputColVector.isNull[i] = inputColVector2.isNull[i];
-          }
-        }
-      }
-    } else if (!inputColVector1.noNulls && inputColVector2.noNulls) {
-      outputColVector.noNulls = false;
-      if (inputColVector1.isRepeating) {
-        //Output will also be repeating and null
-        outputColVector.isRepeating = true;
-        outputColVector.isNull[0] = true;
-        //return as no further processing is needed
-        return;
-      } else {
-        if (batch.selectedInUse) {
-          for(int j = 0; j != n; j++) {
-            int i = sel[j];
-            outputColVector.isNull[i] = inputColVector1.isNull[i];
-          }
-        } else {
-          for(int i = 0; i != n; i++) {
-            outputColVector.isNull[i] = inputColVector1.isNull[i];
-          }
-        }
-      }
-    } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) {
-      outputColVector.noNulls = false;
-      if (inputColVector1.isRepeating || inputColVector2.isRepeating) {
-        //Output will also be repeating and null
-        outputColVector.isRepeating = true;
-        outputColVector.isNull[0] = true;
-        //return as no further processing is needed
-        return;
-      } else {
-        if (batch.selectedInUse) {
-          for(int j = 0; j != n; j++) {
-            int i = sel[j];
-            outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i];
-          }
-        } else {
-          for(int i = 0; i != n; i++) {
-            outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i];
-          }
-        }
-      }
-    }
-
-
-    //Disregard nulls for processing
+    outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating;
+    
+    // Handle nulls first  
+    NullUtil.propagateNullsColCol(
+      inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse);
+          
+    /* Disregard nulls for processing. In other words,
+     * the arithmetic operation is performed even if one or 
+     * more inputs are null. This is to improve speed by avoiding
+     * conditional checks in the inner loop.
+     */ 
     if (inputColVector1.isRepeating && inputColVector2.isRepeating) { 
-      //All must be selected otherwise size would be zero
-      //Repeating property will not change.
       outputVector[0] = vector1[0] * vector2[0];
-      outputColVector.isRepeating = true;
     } else if (inputColVector1.isRepeating) {
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
@@ -154,7 +92,7 @@ public class DoubleColMultiplyLongColumn
       }
     } else {
       if (batch.selectedInUse) {
-        for(int j=0; j != n; j++) {
+        for(int j = 0; j != n; j++) {
           int i = sel[j];
           outputVector[i] = vector1[i] * vector2[i];
         }
@@ -164,6 +102,14 @@ public class DoubleColMultiplyLongColumn
         }
       }
     }
+    
+    /* For the case when the output can have null values, follow 
+     * the convention that the data values must be 1 for long and 
+     * NaN for double. This is to prevent possible later zero-divide errors
+     * in complex arithmetic expressions like col2 / (col1 - 1)
+     * in the case when some col1 entries are null.
+     */
+    NullUtil.setNullDataEntriesDouble(outputColVector, batch.selectedInUse, sel, n);
   }
 
   @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColMultiplyLongScalar.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColMultiplyLongScalar.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColMultiplyLongScalar.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColMultiplyLongScalar.java Mon Jun  3 18:20:22 2013
@@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.ve
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
 
 public class DoubleColMultiplyLongScalar extends VectorExpression {
   private int colNum;
@@ -47,6 +48,7 @@ public class DoubleColMultiplyLongScalar
     boolean[] inputIsNull = inputColVector.isNull;
     boolean[] outputIsNull = outputColVector.isNull;
     outputColVector.noNulls = inputColVector.noNulls;
+    outputColVector.isRepeating = inputColVector.isRepeating;
     int n = batch.size;
     double[] vector = inputColVector.vector;
     double[] outputVector = outputColVector.vector;
@@ -57,15 +59,13 @@ public class DoubleColMultiplyLongScalar
     }
 
     if (inputColVector.isRepeating) {
-      //All must be selected otherwise size would be zero
-      //Repeating property will not change.
       outputVector[0] = vector[0] * value;
+      
       // Even if there are no nulls, we always copy over entry 0. Simplifies code.
       outputIsNull[0] = inputIsNull[0]; 
-      outputColVector.isRepeating = true;
     } else if (inputColVector.noNulls) {
       if (batch.selectedInUse) {
-        for(int j=0; j != n; j++) {
+        for(int j = 0; j != n; j++) {
           int i = sel[j];
           outputVector[i] = vector[i] * value;
         }
@@ -74,10 +74,9 @@ public class DoubleColMultiplyLongScalar
           outputVector[i] = vector[i] * value;
         }
       }
-      outputColVector.isRepeating = false;
     } else /* there are nulls */ {
       if (batch.selectedInUse) {
-        for(int j=0; j != n; j++) {
+        for(int j = 0; j != n; j++) {
           int i = sel[j];
           outputVector[i] = vector[i] * value;
           outputIsNull[i] = inputIsNull[i];
@@ -88,8 +87,9 @@ public class DoubleColMultiplyLongScalar
         }
         System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
       }
-      outputColVector.isRepeating = false;
     }
+    
+    NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n);
   }
 
   @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColSubtractDoubleColumn.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColSubtractDoubleColumn.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColSubtractDoubleColumn.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColSubtractDoubleColumn.java Mon Jun  3 18:20:22 2013
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
 
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
 import org.apache.hadoop.hive.ql.exec.vector.*;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 
@@ -47,7 +48,6 @@ public class DoubleColSubtractDoubleColu
     int n = batch.size;
     double[] vector1 = inputColVector1.vector;
     double[] vector2 = inputColVector2.vector;
-
     double[] outputVector = outputColVector.vector;
     
     // return immediately if batch is empty
@@ -55,81 +55,19 @@ public class DoubleColSubtractDoubleColu
       return;
     }
     
-    /* Set repeating property to false (the default).
-     * It will be set to true later if needed later.
-     */
-    outputColVector.isRepeating = false;
-
-    //Handle nulls first
-    if (inputColVector1.noNulls && !inputColVector2.noNulls) {
-      outputColVector.noNulls = false;
-      if (inputColVector2.isRepeating) {
-        //Output will also be repeating and null
-        outputColVector.isNull[0] = true;
-        outputColVector.isRepeating = true;
-        //return as no further processing is needed
-        return;
-      } else {
-        if (batch.selectedInUse) {
-          for(int j = 0; j != n; j++) {
-            int i = sel[j];
-            outputColVector.isNull[i] = inputColVector2.isNull[i];
-          }
-        } else {
-          for(int i = 0; i != n; i++) {
-            outputColVector.isNull[i] = inputColVector2.isNull[i];
-          }
-        }
-      }
-    } else if (!inputColVector1.noNulls && inputColVector2.noNulls) {
-      outputColVector.noNulls = false;
-      if (inputColVector1.isRepeating) {
-        //Output will also be repeating and null
-        outputColVector.isRepeating = true;
-        outputColVector.isNull[0] = true;
-        //return as no further processing is needed
-        return;
-      } else {
-        if (batch.selectedInUse) {
-          for(int j = 0; j != n; j++) {
-            int i = sel[j];
-            outputColVector.isNull[i] = inputColVector1.isNull[i];
-          }
-        } else {
-          for(int i = 0; i != n; i++) {
-            outputColVector.isNull[i] = inputColVector1.isNull[i];
-          }
-        }
-      }
-    } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) {
-      outputColVector.noNulls = false;
-      if (inputColVector1.isRepeating || inputColVector2.isRepeating) {
-        //Output will also be repeating and null
-        outputColVector.isRepeating = true;
-        outputColVector.isNull[0] = true;
-        //return as no further processing is needed
-        return;
-      } else {
-        if (batch.selectedInUse) {
-          for(int j = 0; j != n; j++) {
-            int i = sel[j];
-            outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i];
-          }
-        } else {
-          for(int i = 0; i != n; i++) {
-            outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i];
-          }
-        }
-      }
-    }
-
-
-    //Disregard nulls for processing
+    outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating;
+    
+    // Handle nulls first  
+    NullUtil.propagateNullsColCol(
+      inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse);
+          
+    /* Disregard nulls for processing. In other words,
+     * the arithmetic operation is performed even if one or 
+     * more inputs are null. This is to improve speed by avoiding
+     * conditional checks in the inner loop.
+     */ 
     if (inputColVector1.isRepeating && inputColVector2.isRepeating) { 
-      //All must be selected otherwise size would be zero
-      //Repeating property will not change.
       outputVector[0] = vector1[0] - vector2[0];
-      outputColVector.isRepeating = true;
     } else if (inputColVector1.isRepeating) {
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
@@ -154,7 +92,7 @@ public class DoubleColSubtractDoubleColu
       }
     } else {
       if (batch.selectedInUse) {
-        for(int j=0; j != n; j++) {
+        for(int j = 0; j != n; j++) {
           int i = sel[j];
           outputVector[i] = vector1[i] - vector2[i];
         }
@@ -164,6 +102,14 @@ public class DoubleColSubtractDoubleColu
         }
       }
     }
+    
+    /* For the case when the output can have null values, follow 
+     * the convention that the data values must be 1 for long and 
+     * NaN for double. This is to prevent possible later zero-divide errors
+     * in complex arithmetic expressions like col2 / (col1 - 1)
+     * in the case when some col1 entries are null.
+     */
+    NullUtil.setNullDataEntriesDouble(outputColVector, batch.selectedInUse, sel, n);
   }
 
   @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColSubtractDoubleScalar.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColSubtractDoubleScalar.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColSubtractDoubleScalar.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColSubtractDoubleScalar.java Mon Jun  3 18:20:22 2013
@@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.ve
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
 
 public class DoubleColSubtractDoubleScalar extends VectorExpression {
   private int colNum;
@@ -47,6 +48,7 @@ public class DoubleColSubtractDoubleScal
     boolean[] inputIsNull = inputColVector.isNull;
     boolean[] outputIsNull = outputColVector.isNull;
     outputColVector.noNulls = inputColVector.noNulls;
+    outputColVector.isRepeating = inputColVector.isRepeating;
     int n = batch.size;
     double[] vector = inputColVector.vector;
     double[] outputVector = outputColVector.vector;
@@ -57,15 +59,13 @@ public class DoubleColSubtractDoubleScal
     }
 
     if (inputColVector.isRepeating) {
-      //All must be selected otherwise size would be zero
-      //Repeating property will not change.
       outputVector[0] = vector[0] - value;
+      
       // Even if there are no nulls, we always copy over entry 0. Simplifies code.
       outputIsNull[0] = inputIsNull[0]; 
-      outputColVector.isRepeating = true;
     } else if (inputColVector.noNulls) {
       if (batch.selectedInUse) {
-        for(int j=0; j != n; j++) {
+        for(int j = 0; j != n; j++) {
           int i = sel[j];
           outputVector[i] = vector[i] - value;
         }
@@ -74,10 +74,9 @@ public class DoubleColSubtractDoubleScal
           outputVector[i] = vector[i] - value;
         }
       }
-      outputColVector.isRepeating = false;
     } else /* there are nulls */ {
       if (batch.selectedInUse) {
-        for(int j=0; j != n; j++) {
+        for(int j = 0; j != n; j++) {
           int i = sel[j];
           outputVector[i] = vector[i] - value;
           outputIsNull[i] = inputIsNull[i];
@@ -88,8 +87,9 @@ public class DoubleColSubtractDoubleScal
         }
         System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
       }
-      outputColVector.isRepeating = false;
     }
+    
+    NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n);
   }
 
   @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColSubtractLongColumn.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColSubtractLongColumn.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColSubtractLongColumn.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColSubtractLongColumn.java Mon Jun  3 18:20:22 2013
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
 
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
 import org.apache.hadoop.hive.ql.exec.vector.*;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 
@@ -47,7 +48,6 @@ public class DoubleColSubtractLongColumn
     int n = batch.size;
     double[] vector1 = inputColVector1.vector;
     long[] vector2 = inputColVector2.vector;
-
     double[] outputVector = outputColVector.vector;
     
     // return immediately if batch is empty
@@ -55,81 +55,19 @@ public class DoubleColSubtractLongColumn
       return;
     }
     
-    /* Set repeating property to false (the default).
-     * It will be set to true later if needed later.
-     */
-    outputColVector.isRepeating = false;
-
-    //Handle nulls first
-    if (inputColVector1.noNulls && !inputColVector2.noNulls) {
-      outputColVector.noNulls = false;
-      if (inputColVector2.isRepeating) {
-        //Output will also be repeating and null
-        outputColVector.isNull[0] = true;
-        outputColVector.isRepeating = true;
-        //return as no further processing is needed
-        return;
-      } else {
-        if (batch.selectedInUse) {
-          for(int j = 0; j != n; j++) {
-            int i = sel[j];
-            outputColVector.isNull[i] = inputColVector2.isNull[i];
-          }
-        } else {
-          for(int i = 0; i != n; i++) {
-            outputColVector.isNull[i] = inputColVector2.isNull[i];
-          }
-        }
-      }
-    } else if (!inputColVector1.noNulls && inputColVector2.noNulls) {
-      outputColVector.noNulls = false;
-      if (inputColVector1.isRepeating) {
-        //Output will also be repeating and null
-        outputColVector.isRepeating = true;
-        outputColVector.isNull[0] = true;
-        //return as no further processing is needed
-        return;
-      } else {
-        if (batch.selectedInUse) {
-          for(int j = 0; j != n; j++) {
-            int i = sel[j];
-            outputColVector.isNull[i] = inputColVector1.isNull[i];
-          }
-        } else {
-          for(int i = 0; i != n; i++) {
-            outputColVector.isNull[i] = inputColVector1.isNull[i];
-          }
-        }
-      }
-    } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) {
-      outputColVector.noNulls = false;
-      if (inputColVector1.isRepeating || inputColVector2.isRepeating) {
-        //Output will also be repeating and null
-        outputColVector.isRepeating = true;
-        outputColVector.isNull[0] = true;
-        //return as no further processing is needed
-        return;
-      } else {
-        if (batch.selectedInUse) {
-          for(int j = 0; j != n; j++) {
-            int i = sel[j];
-            outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i];
-          }
-        } else {
-          for(int i = 0; i != n; i++) {
-            outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i];
-          }
-        }
-      }
-    }
-
-
-    //Disregard nulls for processing
+    outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating;
+    
+    // Handle nulls first  
+    NullUtil.propagateNullsColCol(
+      inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse);
+          
+    /* Disregard nulls for processing. In other words,
+     * the arithmetic operation is performed even if one or 
+     * more inputs are null. This is to improve speed by avoiding
+     * conditional checks in the inner loop.
+     */ 
     if (inputColVector1.isRepeating && inputColVector2.isRepeating) { 
-      //All must be selected otherwise size would be zero
-      //Repeating property will not change.
       outputVector[0] = vector1[0] - vector2[0];
-      outputColVector.isRepeating = true;
     } else if (inputColVector1.isRepeating) {
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
@@ -154,7 +92,7 @@ public class DoubleColSubtractLongColumn
       }
     } else {
       if (batch.selectedInUse) {
-        for(int j=0; j != n; j++) {
+        for(int j = 0; j != n; j++) {
           int i = sel[j];
           outputVector[i] = vector1[i] - vector2[i];
         }
@@ -164,6 +102,14 @@ public class DoubleColSubtractLongColumn
         }
       }
     }
+    
+    /* For the case when the output can have null values, follow 
+     * the convention that the data values must be 1 for long and 
+     * NaN for double. This is to prevent possible later zero-divide errors
+     * in complex arithmetic expressions like col2 / (col1 - 1)
+     * in the case when some col1 entries are null.
+     */
+    NullUtil.setNullDataEntriesDouble(outputColVector, batch.selectedInUse, sel, n);
   }
 
   @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColSubtractLongScalar.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColSubtractLongScalar.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColSubtractLongScalar.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColSubtractLongScalar.java Mon Jun  3 18:20:22 2013
@@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.ve
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
 
 public class DoubleColSubtractLongScalar extends VectorExpression {
   private int colNum;
@@ -47,6 +48,7 @@ public class DoubleColSubtractLongScalar
     boolean[] inputIsNull = inputColVector.isNull;
     boolean[] outputIsNull = outputColVector.isNull;
     outputColVector.noNulls = inputColVector.noNulls;
+    outputColVector.isRepeating = inputColVector.isRepeating;
     int n = batch.size;
     double[] vector = inputColVector.vector;
     double[] outputVector = outputColVector.vector;
@@ -57,15 +59,13 @@ public class DoubleColSubtractLongScalar
     }
 
     if (inputColVector.isRepeating) {
-      //All must be selected otherwise size would be zero
-      //Repeating property will not change.
       outputVector[0] = vector[0] - value;
+      
       // Even if there are no nulls, we always copy over entry 0. Simplifies code.
       outputIsNull[0] = inputIsNull[0]; 
-      outputColVector.isRepeating = true;
     } else if (inputColVector.noNulls) {
       if (batch.selectedInUse) {
-        for(int j=0; j != n; j++) {
+        for(int j = 0; j != n; j++) {
           int i = sel[j];
           outputVector[i] = vector[i] - value;
         }
@@ -74,10 +74,9 @@ public class DoubleColSubtractLongScalar
           outputVector[i] = vector[i] - value;
         }
       }
-      outputColVector.isRepeating = false;
     } else /* there are nulls */ {
       if (batch.selectedInUse) {
-        for(int j=0; j != n; j++) {
+        for(int j = 0; j != n; j++) {
           int i = sel[j];
           outputVector[i] = vector[i] - value;
           outputIsNull[i] = inputIsNull[i];
@@ -88,8 +87,9 @@ public class DoubleColSubtractLongScalar
         }
         System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
       }
-      outputColVector.isRepeating = false;
     }
+    
+    NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n);
   }
 
   @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarAddDoubleColumn.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarAddDoubleColumn.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarAddDoubleColumn.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarAddDoubleColumn.java Mon Jun  3 18:20:22 2013
@@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.ve
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
 
 /**
  * Implements a vectorized arithmetic operator with a scalar on the left and a
@@ -62,6 +63,7 @@ public class DoubleScalarAddDoubleColumn
     boolean[] inputIsNull = inputColVector.isNull;
     boolean[] outputIsNull = outputColVector.isNull;
     outputColVector.noNulls = inputColVector.noNulls;
+    outputColVector.isRepeating = inputColVector.isRepeating;
     int n = batch.size;
     double[] vector = inputColVector.vector;
     double[] outputVector = outputColVector.vector;
@@ -72,16 +74,10 @@ public class DoubleScalarAddDoubleColumn
     }
 
     if (inputColVector.isRepeating) {
-    
-      /*
-       * All must be selected otherwise size would be zero
-       * Repeating property will not change.
-       */
       outputVector[0] = value + vector[0];
       
       // Even if there are no nulls, we always copy over entry 0. Simplifies code.
       outputIsNull[0] = inputIsNull[0]; 
-      outputColVector.isRepeating = true;
     } else if (inputColVector.noNulls) {
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
@@ -93,7 +89,6 @@ public class DoubleScalarAddDoubleColumn
           outputVector[i] = value + vector[i];
         }
       }
-      outputColVector.isRepeating = false;
     } else {                         /* there are nulls */ 
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
@@ -107,8 +102,9 @@ public class DoubleScalarAddDoubleColumn
         }
         System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
       }
-      outputColVector.isRepeating = false;
     }
+    
+    NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n);
   }
 
   @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarAddLongColumn.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarAddLongColumn.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarAddLongColumn.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarAddLongColumn.java Mon Jun  3 18:20:22 2013
@@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.ve
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
 
 /**
  * Implements a vectorized arithmetic operator with a scalar on the left and a
@@ -62,6 +63,7 @@ public class DoubleScalarAddLongColumn e
     boolean[] inputIsNull = inputColVector.isNull;
     boolean[] outputIsNull = outputColVector.isNull;
     outputColVector.noNulls = inputColVector.noNulls;
+    outputColVector.isRepeating = inputColVector.isRepeating;
     int n = batch.size;
     long[] vector = inputColVector.vector;
     double[] outputVector = outputColVector.vector;
@@ -72,16 +74,10 @@ public class DoubleScalarAddLongColumn e
     }
 
     if (inputColVector.isRepeating) {
-    
-      /*
-       * All must be selected otherwise size would be zero
-       * Repeating property will not change.
-       */
       outputVector[0] = value + vector[0];
       
       // Even if there are no nulls, we always copy over entry 0. Simplifies code.
       outputIsNull[0] = inputIsNull[0]; 
-      outputColVector.isRepeating = true;
     } else if (inputColVector.noNulls) {
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
@@ -93,7 +89,6 @@ public class DoubleScalarAddLongColumn e
           outputVector[i] = value + vector[i];
         }
       }
-      outputColVector.isRepeating = false;
     } else {                         /* there are nulls */ 
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
@@ -107,8 +102,9 @@ public class DoubleScalarAddLongColumn e
         }
         System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
       }
-      outputColVector.isRepeating = false;
     }
+    
+    NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n);
   }
 
   @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarDivideDoubleColumn.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarDivideDoubleColumn.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarDivideDoubleColumn.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarDivideDoubleColumn.java Mon Jun  3 18:20:22 2013
@@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.ve
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
 
 /**
  * Implements a vectorized arithmetic operator with a scalar on the left and a
@@ -62,6 +63,7 @@ public class DoubleScalarDivideDoubleCol
     boolean[] inputIsNull = inputColVector.isNull;
     boolean[] outputIsNull = outputColVector.isNull;
     outputColVector.noNulls = inputColVector.noNulls;
+    outputColVector.isRepeating = inputColVector.isRepeating;
     int n = batch.size;
     double[] vector = inputColVector.vector;
     double[] outputVector = outputColVector.vector;
@@ -72,16 +74,10 @@ public class DoubleScalarDivideDoubleCol
     }
 
     if (inputColVector.isRepeating) {
-    
-      /*
-       * All must be selected otherwise size would be zero
-       * Repeating property will not change.
-       */
       outputVector[0] = value / vector[0];
       
       // Even if there are no nulls, we always copy over entry 0. Simplifies code.
       outputIsNull[0] = inputIsNull[0]; 
-      outputColVector.isRepeating = true;
     } else if (inputColVector.noNulls) {
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
@@ -93,7 +89,6 @@ public class DoubleScalarDivideDoubleCol
           outputVector[i] = value / vector[i];
         }
       }
-      outputColVector.isRepeating = false;
     } else {                         /* there are nulls */ 
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
@@ -107,8 +102,9 @@ public class DoubleScalarDivideDoubleCol
         }
         System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
       }
-      outputColVector.isRepeating = false;
     }
+    
+    NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n);
   }
 
   @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarDivideLongColumn.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarDivideLongColumn.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarDivideLongColumn.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarDivideLongColumn.java Mon Jun  3 18:20:22 2013
@@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.ve
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
 
 /**
  * Implements a vectorized arithmetic operator with a scalar on the left and a
@@ -62,6 +63,7 @@ public class DoubleScalarDivideLongColum
     boolean[] inputIsNull = inputColVector.isNull;
     boolean[] outputIsNull = outputColVector.isNull;
     outputColVector.noNulls = inputColVector.noNulls;
+    outputColVector.isRepeating = inputColVector.isRepeating;
     int n = batch.size;
     long[] vector = inputColVector.vector;
     double[] outputVector = outputColVector.vector;
@@ -72,16 +74,10 @@ public class DoubleScalarDivideLongColum
     }
 
     if (inputColVector.isRepeating) {
-    
-      /*
-       * All must be selected otherwise size would be zero
-       * Repeating property will not change.
-       */
       outputVector[0] = value / vector[0];
       
       // Even if there are no nulls, we always copy over entry 0. Simplifies code.
       outputIsNull[0] = inputIsNull[0]; 
-      outputColVector.isRepeating = true;
     } else if (inputColVector.noNulls) {
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
@@ -93,7 +89,6 @@ public class DoubleScalarDivideLongColum
           outputVector[i] = value / vector[i];
         }
       }
-      outputColVector.isRepeating = false;
     } else {                         /* there are nulls */ 
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
@@ -107,8 +102,9 @@ public class DoubleScalarDivideLongColum
         }
         System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
       }
-      outputColVector.isRepeating = false;
     }
+    
+    NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n);
   }
 
   @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarModuloDoubleColumn.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarModuloDoubleColumn.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarModuloDoubleColumn.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarModuloDoubleColumn.java Mon Jun  3 18:20:22 2013
@@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.ve
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
 
 /**
  * Implements a vectorized arithmetic operator with a scalar on the left and a
@@ -62,6 +63,7 @@ public class DoubleScalarModuloDoubleCol
     boolean[] inputIsNull = inputColVector.isNull;
     boolean[] outputIsNull = outputColVector.isNull;
     outputColVector.noNulls = inputColVector.noNulls;
+    outputColVector.isRepeating = inputColVector.isRepeating;
     int n = batch.size;
     double[] vector = inputColVector.vector;
     double[] outputVector = outputColVector.vector;
@@ -72,16 +74,10 @@ public class DoubleScalarModuloDoubleCol
     }
 
     if (inputColVector.isRepeating) {
-    
-      /*
-       * All must be selected otherwise size would be zero
-       * Repeating property will not change.
-       */
       outputVector[0] = value % vector[0];
       
       // Even if there are no nulls, we always copy over entry 0. Simplifies code.
       outputIsNull[0] = inputIsNull[0]; 
-      outputColVector.isRepeating = true;
     } else if (inputColVector.noNulls) {
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
@@ -93,7 +89,6 @@ public class DoubleScalarModuloDoubleCol
           outputVector[i] = value % vector[i];
         }
       }
-      outputColVector.isRepeating = false;
     } else {                         /* there are nulls */ 
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
@@ -107,8 +102,9 @@ public class DoubleScalarModuloDoubleCol
         }
         System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
       }
-      outputColVector.isRepeating = false;
     }
+    
+    NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n);
   }
 
   @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarModuloLongColumn.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarModuloLongColumn.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarModuloLongColumn.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarModuloLongColumn.java Mon Jun  3 18:20:22 2013
@@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.ve
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
 
 /**
  * Implements a vectorized arithmetic operator with a scalar on the left and a
@@ -62,6 +63,7 @@ public class DoubleScalarModuloLongColum
     boolean[] inputIsNull = inputColVector.isNull;
     boolean[] outputIsNull = outputColVector.isNull;
     outputColVector.noNulls = inputColVector.noNulls;
+    outputColVector.isRepeating = inputColVector.isRepeating;
     int n = batch.size;
     long[] vector = inputColVector.vector;
     double[] outputVector = outputColVector.vector;
@@ -72,16 +74,10 @@ public class DoubleScalarModuloLongColum
     }
 
     if (inputColVector.isRepeating) {
-    
-      /*
-       * All must be selected otherwise size would be zero
-       * Repeating property will not change.
-       */
       outputVector[0] = value % vector[0];
       
       // Even if there are no nulls, we always copy over entry 0. Simplifies code.
       outputIsNull[0] = inputIsNull[0]; 
-      outputColVector.isRepeating = true;
     } else if (inputColVector.noNulls) {
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
@@ -93,7 +89,6 @@ public class DoubleScalarModuloLongColum
           outputVector[i] = value % vector[i];
         }
       }
-      outputColVector.isRepeating = false;
     } else {                         /* there are nulls */ 
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
@@ -107,8 +102,9 @@ public class DoubleScalarModuloLongColum
         }
         System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
       }
-      outputColVector.isRepeating = false;
     }
+    
+    NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n);
   }
 
   @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarMultiplyDoubleColumn.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarMultiplyDoubleColumn.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarMultiplyDoubleColumn.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarMultiplyDoubleColumn.java Mon Jun  3 18:20:22 2013
@@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.ve
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
 
 /**
  * Implements a vectorized arithmetic operator with a scalar on the left and a
@@ -62,6 +63,7 @@ public class DoubleScalarMultiplyDoubleC
     boolean[] inputIsNull = inputColVector.isNull;
     boolean[] outputIsNull = outputColVector.isNull;
     outputColVector.noNulls = inputColVector.noNulls;
+    outputColVector.isRepeating = inputColVector.isRepeating;
     int n = batch.size;
     double[] vector = inputColVector.vector;
     double[] outputVector = outputColVector.vector;
@@ -72,16 +74,10 @@ public class DoubleScalarMultiplyDoubleC
     }
 
     if (inputColVector.isRepeating) {
-    
-      /*
-       * All must be selected otherwise size would be zero
-       * Repeating property will not change.
-       */
       outputVector[0] = value * vector[0];
       
       // Even if there are no nulls, we always copy over entry 0. Simplifies code.
       outputIsNull[0] = inputIsNull[0]; 
-      outputColVector.isRepeating = true;
     } else if (inputColVector.noNulls) {
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
@@ -93,7 +89,6 @@ public class DoubleScalarMultiplyDoubleC
           outputVector[i] = value * vector[i];
         }
       }
-      outputColVector.isRepeating = false;
     } else {                         /* there are nulls */ 
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
@@ -107,8 +102,9 @@ public class DoubleScalarMultiplyDoubleC
         }
         System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
       }
-      outputColVector.isRepeating = false;
     }
+    
+    NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n);
   }
 
   @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarMultiplyLongColumn.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarMultiplyLongColumn.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarMultiplyLongColumn.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarMultiplyLongColumn.java Mon Jun  3 18:20:22 2013
@@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.ve
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
 
 /**
  * Implements a vectorized arithmetic operator with a scalar on the left and a
@@ -62,6 +63,7 @@ public class DoubleScalarMultiplyLongCol
     boolean[] inputIsNull = inputColVector.isNull;
     boolean[] outputIsNull = outputColVector.isNull;
     outputColVector.noNulls = inputColVector.noNulls;
+    outputColVector.isRepeating = inputColVector.isRepeating;
     int n = batch.size;
     long[] vector = inputColVector.vector;
     double[] outputVector = outputColVector.vector;
@@ -72,16 +74,10 @@ public class DoubleScalarMultiplyLongCol
     }
 
     if (inputColVector.isRepeating) {
-    
-      /*
-       * All must be selected otherwise size would be zero
-       * Repeating property will not change.
-       */
       outputVector[0] = value * vector[0];
       
       // Even if there are no nulls, we always copy over entry 0. Simplifies code.
       outputIsNull[0] = inputIsNull[0]; 
-      outputColVector.isRepeating = true;
     } else if (inputColVector.noNulls) {
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
@@ -93,7 +89,6 @@ public class DoubleScalarMultiplyLongCol
           outputVector[i] = value * vector[i];
         }
       }
-      outputColVector.isRepeating = false;
     } else {                         /* there are nulls */ 
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
@@ -107,8 +102,9 @@ public class DoubleScalarMultiplyLongCol
         }
         System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
       }
-      outputColVector.isRepeating = false;
     }
+    
+    NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n);
   }
 
   @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarSubtractDoubleColumn.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarSubtractDoubleColumn.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarSubtractDoubleColumn.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarSubtractDoubleColumn.java Mon Jun  3 18:20:22 2013
@@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.ve
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
 
 /**
  * Implements a vectorized arithmetic operator with a scalar on the left and a
@@ -62,6 +63,7 @@ public class DoubleScalarSubtractDoubleC
     boolean[] inputIsNull = inputColVector.isNull;
     boolean[] outputIsNull = outputColVector.isNull;
     outputColVector.noNulls = inputColVector.noNulls;
+    outputColVector.isRepeating = inputColVector.isRepeating;
     int n = batch.size;
     double[] vector = inputColVector.vector;
     double[] outputVector = outputColVector.vector;
@@ -72,16 +74,10 @@ public class DoubleScalarSubtractDoubleC
     }
 
     if (inputColVector.isRepeating) {
-    
-      /*
-       * All must be selected otherwise size would be zero
-       * Repeating property will not change.
-       */
       outputVector[0] = value - vector[0];
       
       // Even if there are no nulls, we always copy over entry 0. Simplifies code.
       outputIsNull[0] = inputIsNull[0]; 
-      outputColVector.isRepeating = true;
     } else if (inputColVector.noNulls) {
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
@@ -93,7 +89,6 @@ public class DoubleScalarSubtractDoubleC
           outputVector[i] = value - vector[i];
         }
       }
-      outputColVector.isRepeating = false;
     } else {                         /* there are nulls */ 
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
@@ -107,8 +102,9 @@ public class DoubleScalarSubtractDoubleC
         }
         System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
       }
-      outputColVector.isRepeating = false;
     }
+    
+    NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n);
   }
 
   @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarSubtractLongColumn.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarSubtractLongColumn.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarSubtractLongColumn.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarSubtractLongColumn.java Mon Jun  3 18:20:22 2013
@@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.ve
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
 
 /**
  * Implements a vectorized arithmetic operator with a scalar on the left and a
@@ -62,6 +63,7 @@ public class DoubleScalarSubtractLongCol
     boolean[] inputIsNull = inputColVector.isNull;
     boolean[] outputIsNull = outputColVector.isNull;
     outputColVector.noNulls = inputColVector.noNulls;
+    outputColVector.isRepeating = inputColVector.isRepeating;
     int n = batch.size;
     long[] vector = inputColVector.vector;
     double[] outputVector = outputColVector.vector;
@@ -72,16 +74,10 @@ public class DoubleScalarSubtractLongCol
     }
 
     if (inputColVector.isRepeating) {
-    
-      /*
-       * All must be selected otherwise size would be zero
-       * Repeating property will not change.
-       */
       outputVector[0] = value - vector[0];
       
       // Even if there are no nulls, we always copy over entry 0. Simplifies code.
       outputIsNull[0] = inputIsNull[0]; 
-      outputColVector.isRepeating = true;
     } else if (inputColVector.noNulls) {
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
@@ -93,7 +89,6 @@ public class DoubleScalarSubtractLongCol
           outputVector[i] = value - vector[i];
         }
       }
-      outputColVector.isRepeating = false;
     } else {                         /* there are nulls */ 
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
@@ -107,8 +102,9 @@ public class DoubleScalarSubtractLongCol
         }
         System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
       }
-      outputColVector.isRepeating = false;
     }
+    
+    NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n);
   }
 
   @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColAddDoubleColumn.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColAddDoubleColumn.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColAddDoubleColumn.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColAddDoubleColumn.java Mon Jun  3 18:20:22 2013
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
 
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
 import org.apache.hadoop.hive.ql.exec.vector.*;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 
@@ -47,7 +48,6 @@ public class LongColAddDoubleColumn exte
     int n = batch.size;
     long[] vector1 = inputColVector1.vector;
     double[] vector2 = inputColVector2.vector;
-
     double[] outputVector = outputColVector.vector;
     
     // return immediately if batch is empty
@@ -55,81 +55,19 @@ public class LongColAddDoubleColumn exte
       return;
     }
     
-    /* Set repeating property to false (the default).
-     * It will be set to true later if needed later.
-     */
-    outputColVector.isRepeating = false;
-
-    //Handle nulls first
-    if (inputColVector1.noNulls && !inputColVector2.noNulls) {
-      outputColVector.noNulls = false;
-      if (inputColVector2.isRepeating) {
-        //Output will also be repeating and null
-        outputColVector.isNull[0] = true;
-        outputColVector.isRepeating = true;
-        //return as no further processing is needed
-        return;
-      } else {
-        if (batch.selectedInUse) {
-          for(int j = 0; j != n; j++) {
-            int i = sel[j];
-            outputColVector.isNull[i] = inputColVector2.isNull[i];
-          }
-        } else {
-          for(int i = 0; i != n; i++) {
-            outputColVector.isNull[i] = inputColVector2.isNull[i];
-          }
-        }
-      }
-    } else if (!inputColVector1.noNulls && inputColVector2.noNulls) {
-      outputColVector.noNulls = false;
-      if (inputColVector1.isRepeating) {
-        //Output will also be repeating and null
-        outputColVector.isRepeating = true;
-        outputColVector.isNull[0] = true;
-        //return as no further processing is needed
-        return;
-      } else {
-        if (batch.selectedInUse) {
-          for(int j = 0; j != n; j++) {
-            int i = sel[j];
-            outputColVector.isNull[i] = inputColVector1.isNull[i];
-          }
-        } else {
-          for(int i = 0; i != n; i++) {
-            outputColVector.isNull[i] = inputColVector1.isNull[i];
-          }
-        }
-      }
-    } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) {
-      outputColVector.noNulls = false;
-      if (inputColVector1.isRepeating || inputColVector2.isRepeating) {
-        //Output will also be repeating and null
-        outputColVector.isRepeating = true;
-        outputColVector.isNull[0] = true;
-        //return as no further processing is needed
-        return;
-      } else {
-        if (batch.selectedInUse) {
-          for(int j = 0; j != n; j++) {
-            int i = sel[j];
-            outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i];
-          }
-        } else {
-          for(int i = 0; i != n; i++) {
-            outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i];
-          }
-        }
-      }
-    }
-
-
-    //Disregard nulls for processing
+    outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating;
+    
+    // Handle nulls first  
+    NullUtil.propagateNullsColCol(
+      inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse);
+          
+    /* Disregard nulls for processing. In other words,
+     * the arithmetic operation is performed even if one or 
+     * more inputs are null. This is to improve speed by avoiding
+     * conditional checks in the inner loop.
+     */ 
     if (inputColVector1.isRepeating && inputColVector2.isRepeating) { 
-      //All must be selected otherwise size would be zero
-      //Repeating property will not change.
       outputVector[0] = vector1[0] + vector2[0];
-      outputColVector.isRepeating = true;
     } else if (inputColVector1.isRepeating) {
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
@@ -154,7 +92,7 @@ public class LongColAddDoubleColumn exte
       }
     } else {
       if (batch.selectedInUse) {
-        for(int j=0; j != n; j++) {
+        for(int j = 0; j != n; j++) {
           int i = sel[j];
           outputVector[i] = vector1[i] + vector2[i];
         }
@@ -164,6 +102,14 @@ public class LongColAddDoubleColumn exte
         }
       }
     }
+    
+    /* For the case when the output can have null values, follow 
+     * the convention that the data values must be 1 for long and 
+     * NaN for double. This is to prevent possible later zero-divide errors
+     * in complex arithmetic expressions like col2 / (col1 - 1)
+     * in the case when some col1 entries are null.
+     */
+    NullUtil.setNullDataEntriesDouble(outputColVector, batch.selectedInUse, sel, n);
   }
 
   @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColAddDoubleScalar.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColAddDoubleScalar.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColAddDoubleScalar.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColAddDoubleScalar.java Mon Jun  3 18:20:22 2013
@@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.ve
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
 
 public class LongColAddDoubleScalar extends VectorExpression {
   private int colNum;
@@ -47,6 +48,7 @@ public class LongColAddDoubleScalar exte
     boolean[] inputIsNull = inputColVector.isNull;
     boolean[] outputIsNull = outputColVector.isNull;
     outputColVector.noNulls = inputColVector.noNulls;
+    outputColVector.isRepeating = inputColVector.isRepeating;
     int n = batch.size;
     long[] vector = inputColVector.vector;
     double[] outputVector = outputColVector.vector;
@@ -57,15 +59,13 @@ public class LongColAddDoubleScalar exte
     }
 
     if (inputColVector.isRepeating) {
-      //All must be selected otherwise size would be zero
-      //Repeating property will not change.
       outputVector[0] = vector[0] + value;
+      
       // Even if there are no nulls, we always copy over entry 0. Simplifies code.
       outputIsNull[0] = inputIsNull[0]; 
-      outputColVector.isRepeating = true;
     } else if (inputColVector.noNulls) {
       if (batch.selectedInUse) {
-        for(int j=0; j != n; j++) {
+        for(int j = 0; j != n; j++) {
           int i = sel[j];
           outputVector[i] = vector[i] + value;
         }
@@ -74,10 +74,9 @@ public class LongColAddDoubleScalar exte
           outputVector[i] = vector[i] + value;
         }
       }
-      outputColVector.isRepeating = false;
     } else /* there are nulls */ {
       if (batch.selectedInUse) {
-        for(int j=0; j != n; j++) {
+        for(int j = 0; j != n; j++) {
           int i = sel[j];
           outputVector[i] = vector[i] + value;
           outputIsNull[i] = inputIsNull[i];
@@ -88,8 +87,9 @@ public class LongColAddDoubleScalar exte
         }
         System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
       }
-      outputColVector.isRepeating = false;
     }
+    
+    NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n);
   }
 
   @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColAddLongColumn.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColAddLongColumn.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColAddLongColumn.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColAddLongColumn.java Mon Jun  3 18:20:22 2013
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
 
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
 import org.apache.hadoop.hive.ql.exec.vector.*;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 
@@ -47,7 +48,6 @@ public class LongColAddLongColumn extend
     int n = batch.size;
     long[] vector1 = inputColVector1.vector;
     long[] vector2 = inputColVector2.vector;
-
     long[] outputVector = outputColVector.vector;
     
     // return immediately if batch is empty
@@ -55,81 +55,19 @@ public class LongColAddLongColumn extend
       return;
     }
     
-    /* Set repeating property to false (the default).
-     * It will be set to true later if needed later.
-     */
-    outputColVector.isRepeating = false;
-
-    //Handle nulls first
-    if (inputColVector1.noNulls && !inputColVector2.noNulls) {
-      outputColVector.noNulls = false;
-      if (inputColVector2.isRepeating) {
-        //Output will also be repeating and null
-        outputColVector.isNull[0] = true;
-        outputColVector.isRepeating = true;
-        //return as no further processing is needed
-        return;
-      } else {
-        if (batch.selectedInUse) {
-          for(int j = 0; j != n; j++) {
-            int i = sel[j];
-            outputColVector.isNull[i] = inputColVector2.isNull[i];
-          }
-        } else {
-          for(int i = 0; i != n; i++) {
-            outputColVector.isNull[i] = inputColVector2.isNull[i];
-          }
-        }
-      }
-    } else if (!inputColVector1.noNulls && inputColVector2.noNulls) {
-      outputColVector.noNulls = false;
-      if (inputColVector1.isRepeating) {
-        //Output will also be repeating and null
-        outputColVector.isRepeating = true;
-        outputColVector.isNull[0] = true;
-        //return as no further processing is needed
-        return;
-      } else {
-        if (batch.selectedInUse) {
-          for(int j = 0; j != n; j++) {
-            int i = sel[j];
-            outputColVector.isNull[i] = inputColVector1.isNull[i];
-          }
-        } else {
-          for(int i = 0; i != n; i++) {
-            outputColVector.isNull[i] = inputColVector1.isNull[i];
-          }
-        }
-      }
-    } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) {
-      outputColVector.noNulls = false;
-      if (inputColVector1.isRepeating || inputColVector2.isRepeating) {
-        //Output will also be repeating and null
-        outputColVector.isRepeating = true;
-        outputColVector.isNull[0] = true;
-        //return as no further processing is needed
-        return;
-      } else {
-        if (batch.selectedInUse) {
-          for(int j = 0; j != n; j++) {
-            int i = sel[j];
-            outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i];
-          }
-        } else {
-          for(int i = 0; i != n; i++) {
-            outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i];
-          }
-        }
-      }
-    }
-
-
-    //Disregard nulls for processing
+    outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating;
+    
+    // Handle nulls first  
+    NullUtil.propagateNullsColCol(
+      inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse);
+          
+    /* Disregard nulls for processing. In other words,
+     * the arithmetic operation is performed even if one or 
+     * more inputs are null. This is to improve speed by avoiding
+     * conditional checks in the inner loop.
+     */ 
     if (inputColVector1.isRepeating && inputColVector2.isRepeating) { 
-      //All must be selected otherwise size would be zero
-      //Repeating property will not change.
       outputVector[0] = vector1[0] + vector2[0];
-      outputColVector.isRepeating = true;
     } else if (inputColVector1.isRepeating) {
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
@@ -154,7 +92,7 @@ public class LongColAddLongColumn extend
       }
     } else {
       if (batch.selectedInUse) {
-        for(int j=0; j != n; j++) {
+        for(int j = 0; j != n; j++) {
           int i = sel[j];
           outputVector[i] = vector1[i] + vector2[i];
         }
@@ -164,6 +102,14 @@ public class LongColAddLongColumn extend
         }
       }
     }
+    
+    /* For the case when the output can have null values, follow 
+     * the convention that the data values must be 1 for long and 
+     * NaN for double. This is to prevent possible later zero-divide errors
+     * in complex arithmetic expressions like col2 / (col1 - 1)
+     * in the case when some col1 entries are null.
+     */
+    NullUtil.setNullDataEntriesLong(outputColVector, batch.selectedInUse, sel, n);
   }
 
   @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColAddLongScalar.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColAddLongScalar.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColAddLongScalar.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColAddLongScalar.java Mon Jun  3 18:20:22 2013
@@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.ve
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
 
 public class LongColAddLongScalar extends VectorExpression {
   private int colNum;
@@ -47,6 +48,7 @@ public class LongColAddLongScalar extend
     boolean[] inputIsNull = inputColVector.isNull;
     boolean[] outputIsNull = outputColVector.isNull;
     outputColVector.noNulls = inputColVector.noNulls;
+    outputColVector.isRepeating = inputColVector.isRepeating;
     int n = batch.size;
     long[] vector = inputColVector.vector;
     long[] outputVector = outputColVector.vector;
@@ -57,15 +59,13 @@ public class LongColAddLongScalar extend
     }
 
     if (inputColVector.isRepeating) {
-      //All must be selected otherwise size would be zero
-      //Repeating property will not change.
       outputVector[0] = vector[0] + value;
+      
       // Even if there are no nulls, we always copy over entry 0. Simplifies code.
       outputIsNull[0] = inputIsNull[0]; 
-      outputColVector.isRepeating = true;
     } else if (inputColVector.noNulls) {
       if (batch.selectedInUse) {
-        for(int j=0; j != n; j++) {
+        for(int j = 0; j != n; j++) {
           int i = sel[j];
           outputVector[i] = vector[i] + value;
         }
@@ -74,10 +74,9 @@ public class LongColAddLongScalar extend
           outputVector[i] = vector[i] + value;
         }
       }
-      outputColVector.isRepeating = false;
     } else /* there are nulls */ {
       if (batch.selectedInUse) {
-        for(int j=0; j != n; j++) {
+        for(int j = 0; j != n; j++) {
           int i = sel[j];
           outputVector[i] = vector[i] + value;
           outputIsNull[i] = inputIsNull[i];
@@ -88,8 +87,9 @@ public class LongColAddLongScalar extend
         }
         System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
       }
-      outputColVector.isRepeating = false;
     }
+    
+    NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n);
   }
 
   @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColDivideDoubleColumn.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColDivideDoubleColumn.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColDivideDoubleColumn.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColDivideDoubleColumn.java Mon Jun  3 18:20:22 2013
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
 
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
 import org.apache.hadoop.hive.ql.exec.vector.*;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 
@@ -47,7 +48,6 @@ public class LongColDivideDoubleColumn e
     int n = batch.size;
     long[] vector1 = inputColVector1.vector;
     double[] vector2 = inputColVector2.vector;
-
     double[] outputVector = outputColVector.vector;
     
     // return immediately if batch is empty
@@ -55,81 +55,19 @@ public class LongColDivideDoubleColumn e
       return;
     }
     
-    /* Set repeating property to false (the default).
-     * It will be set to true later if needed later.
-     */
-    outputColVector.isRepeating = false;
-
-    //Handle nulls first
-    if (inputColVector1.noNulls && !inputColVector2.noNulls) {
-      outputColVector.noNulls = false;
-      if (inputColVector2.isRepeating) {
-        //Output will also be repeating and null
-        outputColVector.isNull[0] = true;
-        outputColVector.isRepeating = true;
-        //return as no further processing is needed
-        return;
-      } else {
-        if (batch.selectedInUse) {
-          for(int j = 0; j != n; j++) {
-            int i = sel[j];
-            outputColVector.isNull[i] = inputColVector2.isNull[i];
-          }
-        } else {
-          for(int i = 0; i != n; i++) {
-            outputColVector.isNull[i] = inputColVector2.isNull[i];
-          }
-        }
-      }
-    } else if (!inputColVector1.noNulls && inputColVector2.noNulls) {
-      outputColVector.noNulls = false;
-      if (inputColVector1.isRepeating) {
-        //Output will also be repeating and null
-        outputColVector.isRepeating = true;
-        outputColVector.isNull[0] = true;
-        //return as no further processing is needed
-        return;
-      } else {
-        if (batch.selectedInUse) {
-          for(int j = 0; j != n; j++) {
-            int i = sel[j];
-            outputColVector.isNull[i] = inputColVector1.isNull[i];
-          }
-        } else {
-          for(int i = 0; i != n; i++) {
-            outputColVector.isNull[i] = inputColVector1.isNull[i];
-          }
-        }
-      }
-    } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) {
-      outputColVector.noNulls = false;
-      if (inputColVector1.isRepeating || inputColVector2.isRepeating) {
-        //Output will also be repeating and null
-        outputColVector.isRepeating = true;
-        outputColVector.isNull[0] = true;
-        //return as no further processing is needed
-        return;
-      } else {
-        if (batch.selectedInUse) {
-          for(int j = 0; j != n; j++) {
-            int i = sel[j];
-            outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i];
-          }
-        } else {
-          for(int i = 0; i != n; i++) {
-            outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i];
-          }
-        }
-      }
-    }
-
-
-    //Disregard nulls for processing
+    outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating;
+    
+    // Handle nulls first  
+    NullUtil.propagateNullsColCol(
+      inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse);
+          
+    /* Disregard nulls for processing. In other words,
+     * the arithmetic operation is performed even if one or 
+     * more inputs are null. This is to improve speed by avoiding
+     * conditional checks in the inner loop.
+     */ 
     if (inputColVector1.isRepeating && inputColVector2.isRepeating) { 
-      //All must be selected otherwise size would be zero
-      //Repeating property will not change.
       outputVector[0] = vector1[0] / vector2[0];
-      outputColVector.isRepeating = true;
     } else if (inputColVector1.isRepeating) {
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
@@ -154,7 +92,7 @@ public class LongColDivideDoubleColumn e
       }
     } else {
       if (batch.selectedInUse) {
-        for(int j=0; j != n; j++) {
+        for(int j = 0; j != n; j++) {
           int i = sel[j];
           outputVector[i] = vector1[i] / vector2[i];
         }
@@ -164,6 +102,14 @@ public class LongColDivideDoubleColumn e
         }
       }
     }
+    
+    /* For the case when the output can have null values, follow 
+     * the convention that the data values must be 1 for long and 
+     * NaN for double. This is to prevent possible later zero-divide errors
+     * in complex arithmetic expressions like col2 / (col1 - 1)
+     * in the case when some col1 entries are null.
+     */
+    NullUtil.setNullDataEntriesDouble(outputColVector, batch.selectedInUse, sel, n);
   }
 
   @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColDivideDoubleScalar.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColDivideDoubleScalar.java?rev=1489091&r1=1489090&r2=1489091&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColDivideDoubleScalar.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColDivideDoubleScalar.java Mon Jun  3 18:20:22 2013
@@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.ve
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
 
 public class LongColDivideDoubleScalar extends VectorExpression {
   private int colNum;
@@ -47,6 +48,7 @@ public class LongColDivideDoubleScalar e
     boolean[] inputIsNull = inputColVector.isNull;
     boolean[] outputIsNull = outputColVector.isNull;
     outputColVector.noNulls = inputColVector.noNulls;
+    outputColVector.isRepeating = inputColVector.isRepeating;
     int n = batch.size;
     long[] vector = inputColVector.vector;
     double[] outputVector = outputColVector.vector;
@@ -57,15 +59,13 @@ public class LongColDivideDoubleScalar e
     }
 
     if (inputColVector.isRepeating) {
-      //All must be selected otherwise size would be zero
-      //Repeating property will not change.
       outputVector[0] = vector[0] / value;
+      
       // Even if there are no nulls, we always copy over entry 0. Simplifies code.
       outputIsNull[0] = inputIsNull[0]; 
-      outputColVector.isRepeating = true;
     } else if (inputColVector.noNulls) {
       if (batch.selectedInUse) {
-        for(int j=0; j != n; j++) {
+        for(int j = 0; j != n; j++) {
           int i = sel[j];
           outputVector[i] = vector[i] / value;
         }
@@ -74,10 +74,9 @@ public class LongColDivideDoubleScalar e
           outputVector[i] = vector[i] / value;
         }
       }
-      outputColVector.isRepeating = false;
     } else /* there are nulls */ {
       if (batch.selectedInUse) {
-        for(int j=0; j != n; j++) {
+        for(int j = 0; j != n; j++) {
           int i = sel[j];
           outputVector[i] = vector[i] / value;
           outputIsNull[i] = inputIsNull[i];
@@ -88,8 +87,9 @@ public class LongColDivideDoubleScalar e
         }
         System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
       }
-      outputColVector.isRepeating = false;
     }
+    
+    NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n);
   }
 
   @Override



Mime
View raw message