hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From eh...@apache.org
Subject svn commit: r1547666 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/exec/vector/ java/org/apache/hadoop/hive/ql/exec/vector/expressions/ test/org/apache/hadoop/hive/ql/exec/vector/ test/org/apache/hadoop/hive/ql/exec/vector/expressions/
Date Wed, 04 Dec 2013 01:15:55 GMT
Author: ehans
Date: Wed Dec  4 01:15:55 2013
New Revision: 1547666

URL: http://svn.apache.org/r1547666
Log:
HIVE-5877: Implement vectorized support for IN as boolean-valued expression (Eric Hanson)

Added:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleColumnInList.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IDoubleInExpr.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ILongInExpr.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IStringInExpr.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColumnInList.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDoubleColumnInList.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterLongColumnInList.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColumnInList.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorScalarColArithmetic.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java?rev=1547666&r1=1547665&r2=1547666&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
Wed Dec  4 01:15:55 2013
@@ -550,7 +550,7 @@ public class VectorizationContext {
     if (udf instanceof GenericUDFBetween) {
       return getBetweenFilterExpression(childExpr, mode);
     } else if (udf instanceof GenericUDFIn) {
-      return getInFilterExpression(childExpr);
+      return getInExpression(childExpr, mode);
     } else if (udf instanceof GenericUDFOPPositive) {
       return getIdentityExpression(childExpr);
     } else if (udf instanceof GenericUDFBridge) {
@@ -575,11 +575,9 @@ public class VectorizationContext {
   }
 
   /**
-   * Create a filter expression for column IN ( <list-of-constants> )
-   * @param childExpr
-   * @return
+   * Create a filter or boolean-valued expression for column IN ( <list-of-constants>
)
    */
-  private VectorExpression getInFilterExpression(List<ExprNodeDesc> childExpr)
+  private VectorExpression getInExpression(List<ExprNodeDesc> childExpr, Mode mode)
       throws HiveException {
     ExprNodeDesc colExpr = childExpr.get(0);
     String colType = colExpr.getTypeString();
@@ -601,48 +599,41 @@ public class VectorizationContext {
     // determine class
     Class<?> cl = null;
     if (isIntFamily(colType)) {
-      cl = FilterLongColumnInList.class;
+      cl = (mode == Mode.FILTER ? FilterLongColumnInList.class : LongColumnInList.class);
       long[] inVals = new long[childrenForInList.size()];
       for (int i = 0; i != inVals.length; i++) {
         inVals[i] = getIntFamilyScalarAsLong((ExprNodeConstantDesc) childrenForInList.get(i));
       }
-      FilterLongColumnInList f = (FilterLongColumnInList)
-          createVectorExpression(cl, childExpr.subList(0, 1), Mode.PROJECTION);
-      f.setInListValues(inVals);
-      expr = f;
+      expr = createVectorExpression(cl, childExpr.subList(0, 1), Mode.PROJECTION);
+      ((ILongInExpr) expr).setInListValues(inVals);
     } else if (colType.equals("timestamp")) {
-      cl = FilterLongColumnInList.class;
+      cl = (mode == Mode.FILTER ? FilterLongColumnInList.class : LongColumnInList.class);
       long[] inVals = new long[childrenForInList.size()];
       for (int i = 0; i != inVals.length; i++) {
         inVals[i] = getTimestampScalar(childrenForInList.get(i));
       }
-      FilterLongColumnInList f = (FilterLongColumnInList)
-          createVectorExpression(cl, childExpr.subList(0, 1), Mode.PROJECTION);
-      f.setInListValues(inVals);
-      expr = f;
+      expr = createVectorExpression(cl, childExpr.subList(0, 1), Mode.PROJECTION);
+      ((ILongInExpr) expr).setInListValues(inVals);
     } else if (colType.equals("string")) {
-      cl = FilterStringColumnInList.class;
+      cl = (mode == Mode.FILTER ? FilterStringColumnInList.class : StringColumnInList.class);
       byte[][] inVals = new byte[childrenForInList.size()][];
       for (int i = 0; i != inVals.length; i++) {
         inVals[i] = getStringScalarAsByteArray((ExprNodeConstantDesc) childrenForInList.get(i));
       }
-      FilterStringColumnInList f =(FilterStringColumnInList)
-          createVectorExpression(cl, childExpr.subList(0, 1), Mode.PROJECTION);
-      f.setInListValues(inVals);
-      expr = f;
+      expr = createVectorExpression(cl, childExpr.subList(0, 1), Mode.PROJECTION);
+      ((IStringInExpr) expr).setInListValues(inVals);
     } else if (isFloatFamily(colType)) {
-      cl = FilterDoubleColumnInList.class;
+      cl = (mode == Mode.FILTER ? FilterDoubleColumnInList.class : DoubleColumnInList.class);
       double[] inValsD = new double[childrenForInList.size()];
       for (int i = 0; i != inValsD.length; i++) {
         inValsD[i] = getNumericScalarAsDouble(childrenForInList.get(i));
       }
-      FilterDoubleColumnInList f = (FilterDoubleColumnInList)
-          createVectorExpression(cl, childExpr.subList(0, 1), Mode.PROJECTION);
-      f.setInListValues(inValsD);
-      expr = f;
-    } else {
-      throw new HiveException("Type " + colType + " not supported for IN in vectorized mode");
+      expr = createVectorExpression(cl, childExpr.subList(0, 1), Mode.PROJECTION);
+      ((IDoubleInExpr) expr).setInListValues(inValsD);
     }
+
+    // Return the desired VectorExpression if found. Otherwise, return null to cause
+    // execution to fall back to row mode.
     return expr;
   }
 

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleColumnInList.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleColumnInList.java?rev=1547666&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleColumnInList.java
(added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleColumnInList.java
Wed Dec  4 01:15:55 2013
@@ -0,0 +1,164 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+
+/**
+ * Output a boolean value indicating if a column is IN a list of constants.
+ */
+public class DoubleColumnInList extends VectorExpression implements IDoubleInExpr {
+
+  private static final long serialVersionUID = 1L;
+
+  private int colNum;
+  private int outputColumn;
+  private double[] inListValues;
+
+  // The set object containing the IN list. This is optimized for lookup
+  // of the data type of the column.
+  private transient CuckooSetDouble inSet;
+
+  public DoubleColumnInList(int colNum, int outputColumn) {
+    this.colNum = colNum;
+    this.outputColumn = outputColumn;
+  }
+
+  public DoubleColumnInList() {
+    super();
+    inSet = null;
+  }
+
+  @Override
+  public void evaluate(VectorizedRowBatch batch) {
+
+    if (childExpressions != null) {
+      super.evaluateChildren(batch);
+    }
+
+    if (inSet == null) {
+      inSet = new CuckooSetDouble(inListValues.length);
+      inSet.load(inListValues);
+    }
+
+    DoubleColumnVector inputColVector = (DoubleColumnVector) batch.cols[colNum];
+    LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn];
+    int[] sel = batch.selected;
+    boolean[] nullPos = inputColVector.isNull;
+    boolean[] outNulls = outputColVector.isNull;
+    int n = batch.size;
+    double[] vector = inputColVector.vector;
+    long[] outputVector = outputColVector.vector;
+
+    // return immediately if batch is empty
+    if (n == 0) {
+      return;
+    }
+
+    outputColVector.isRepeating = false;
+    outputColVector.noNulls = inputColVector.noNulls;
+    if (inputColVector.noNulls) {
+      if (inputColVector.isRepeating) {
+
+        // All must be selected otherwise size would be zero
+        // Repeating property will not change.
+        outputVector[0] = inSet.lookup(vector[0]) ? 1 : 0;
+        outputColVector.isRepeating = true;
+      } else if (batch.selectedInUse) {
+        for(int j = 0; j != n; j++) {
+          int i = sel[j];
+          outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0;
+        }
+      } else {
+        for(int i = 0; i != n; i++) {
+          outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0;
+        }
+      }
+    } else {
+      if (inputColVector.isRepeating) {
+
+        // All must be selected otherwise size would be zero
+        // Repeating property will not change.
+        if (!nullPos[0]) {
+          outputVector[0] = inSet.lookup(vector[0]) ? 1 : 0;
+          outNulls[0] = false;
+        } else {
+          outNulls[0] = true;
+        }
+        outputColVector.isRepeating = true;
+      } else if (batch.selectedInUse) {
+        for(int j = 0; j != n; j++) {
+          int i = sel[j];
+          outNulls[i] = nullPos[i];
+          if (!nullPos[i]) {
+            outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0;
+          }
+        }
+      } else {
+        System.arraycopy(nullPos, 0, outNulls, 0, n);
+        for(int i = 0; i != n; i++) {
+          if (!nullPos[i]) {
+            outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0;
+          }
+        }
+      }
+    }
+  }
+
+  @Override
+  public int getOutputColumn() {
+    return outputColumn;
+  }
+
+  @Override
+  public String getOutputType() {
+    return "boolean";
+  }
+
+  public int getColNum() {
+    return colNum;
+  }
+
+  public void setColNum(int colNum) {
+    this.colNum = colNum;
+  }
+
+  public void setOutputColumn(int outputColumn) {
+    this.outputColumn = outputColumn;
+  }
+
+  public double[] getInListValues() {
+    return this.inListValues;
+  }
+
+  public void setInListValues(double[] a) {
+    this.inListValues = a;
+  }
+
+  @Override
+  public VectorExpressionDescriptor.Descriptor getDescriptor() {
+
+    // return null since this will be handled as a special case in VectorizationContext
+    return null;
+  }
+}

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDoubleColumnInList.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDoubleColumnInList.java?rev=1547666&r1=1547665&r2=1547666&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDoubleColumnInList.java
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDoubleColumnInList.java
Wed Dec  4 01:15:55 2013
@@ -34,7 +34,7 @@ import java.util.regex.Pattern;
 /**
  * Evaluate IN filter on a batch for a vector of doubles.
  */
-public class FilterDoubleColumnInList extends VectorExpression {
+public class FilterDoubleColumnInList extends VectorExpression implements IDoubleInExpr {
   private static final long serialVersionUID = 1L;
   private int inputCol;
   private double[] inListValues;

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterLongColumnInList.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterLongColumnInList.java?rev=1547666&r1=1547665&r2=1547666&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterLongColumnInList.java
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterLongColumnInList.java
Wed Dec  4 01:15:55 2013
@@ -33,7 +33,8 @@ import java.util.regex.Pattern;
 /**
  * Evaluate IN filter on a batch for a vector of longs.
  */
-public class FilterLongColumnInList extends VectorExpression {
+public class FilterLongColumnInList extends VectorExpression implements ILongInExpr {
+
   private static final long serialVersionUID = 1L;
   private int inputCol;
   private long[] inListValues;

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColumnInList.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColumnInList.java?rev=1547666&r1=1547665&r2=1547666&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColumnInList.java
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColumnInList.java
Wed Dec  4 01:15:55 2013
@@ -37,7 +37,7 @@ import java.util.regex.Pattern;
  * the inner loop, and there is a hash table implemented
  * with Cuckoo hashing that has fast lookup to do the IN test.
  */
-public class FilterStringColumnInList extends VectorExpression {
+public class FilterStringColumnInList extends VectorExpression implements IStringInExpr {
   private static final long serialVersionUID = 1L;
   private int inputCol;
   private byte[][] inListValues;

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IDoubleInExpr.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IDoubleInExpr.java?rev=1547666&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IDoubleInExpr.java
(added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IDoubleInExpr.java
Wed Dec  4 01:15:55 2013
@@ -0,0 +1,27 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+/**
+ * Interface used for both filter and non-filter versions of IN to simplify
+ * VectorizationContext code.
+ */
+public interface IDoubleInExpr {
+  void setInListValues(double[] inVals);
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ILongInExpr.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ILongInExpr.java?rev=1547666&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ILongInExpr.java
(added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ILongInExpr.java
Wed Dec  4 01:15:55 2013
@@ -0,0 +1,27 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+/**
+ * Interface used for both filter and non-filter versions of IN to simplify
+ * VectorizationContext code.
+ */
+public interface ILongInExpr {
+  void setInListValues(long[] inVals);
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IStringInExpr.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IStringInExpr.java?rev=1547666&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IStringInExpr.java
(added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IStringInExpr.java
Wed Dec  4 01:15:55 2013
@@ -0,0 +1,27 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+/**
+ * Interface used for both filter and non-filter versions of IN to simplify
+ * VectorizationContext code.
+ */
+public interface IStringInExpr {
+  void setInListValues(byte[][] inVals);
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColumnInList.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColumnInList.java?rev=1547666&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColumnInList.java
(added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColumnInList.java
Wed Dec  4 01:15:55 2013
@@ -0,0 +1,163 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+
+/**
+ * Output a boolean value indicating if a column is IN a list of constants.
+ */
+public class LongColumnInList extends VectorExpression implements ILongInExpr {
+
+  private static final long serialVersionUID = 1L;
+
+  private int colNum;
+  private int outputColumn;
+  private long[] inListValues;
+
+  // The set object containing the IN list. This is optimized for lookup
+  // of the data type of the column.
+  private transient CuckooSetLong inSet;
+
+  public LongColumnInList(int colNum, int outputColumn) {
+    this.colNum = colNum;
+    this.outputColumn = outputColumn;
+  }
+
+  public LongColumnInList() {
+    super();
+    inSet = null;
+  }
+
+  @Override
+  public void evaluate(VectorizedRowBatch batch) {
+
+    if (childExpressions != null) {
+      super.evaluateChildren(batch);
+    }
+
+    if (inSet == null) {
+      inSet = new CuckooSetLong(inListValues.length);
+      inSet.load(inListValues);
+    }
+
+    LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum];
+    LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn];
+    int[] sel = batch.selected;
+    boolean[] nullPos = inputColVector.isNull;
+    boolean[] outNulls = outputColVector.isNull;
+    int n = batch.size;
+    long[] vector = inputColVector.vector;
+    long[] outputVector = outputColVector.vector;
+
+    // return immediately if batch is empty
+    if (n == 0) {
+      return;
+    }
+
+    outputColVector.isRepeating = false;
+    outputColVector.noNulls = inputColVector.noNulls;
+    if (inputColVector.noNulls) {
+      if (inputColVector.isRepeating) {
+
+        // All must be selected otherwise size would be zero
+        // Repeating property will not change.
+        outputVector[0] = inSet.lookup(vector[0]) ? 1 : 0;
+        outputColVector.isRepeating = true;
+      } else if (batch.selectedInUse) {
+        for(int j = 0; j != n; j++) {
+          int i = sel[j];
+          outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0;
+        }
+      } else {
+        for(int i = 0; i != n; i++) {
+          outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0;
+        }
+      }
+    } else {
+      if (inputColVector.isRepeating) {
+
+        // All must be selected otherwise size would be zero
+        // Repeating property will not change.
+        if (!nullPos[0]) {
+          outputVector[0] = inSet.lookup(vector[0]) ? 1 : 0;
+          outNulls[0] = false;
+        } else {
+          outNulls[0] = true;
+        }
+        outputColVector.isRepeating = true;
+      } else if (batch.selectedInUse) {
+        for(int j = 0; j != n; j++) {
+          int i = sel[j];
+          outNulls[i] = nullPos[i];
+          if (!nullPos[i]) {
+            outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0;
+          }
+        }
+      } else {
+        System.arraycopy(nullPos, 0, outNulls, 0, n);
+        for(int i = 0; i != n; i++) {
+          if (!nullPos[i]) {
+            outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0;
+          }
+        }
+      }
+    }
+  }
+
+  @Override
+  public int getOutputColumn() {
+    return outputColumn;
+  }
+
+  @Override
+  public String getOutputType() {
+    return "boolean";
+  }
+
+  public int getColNum() {
+    return colNum;
+  }
+
+  public void setColNum(int colNum) {
+    this.colNum = colNum;
+  }
+
+  public void setOutputColumn(int outputColumn) {
+    this.outputColumn = outputColumn;
+  }
+
+  public long[] getInListValues() {
+    return this.inListValues;
+  }
+
+  public void setInListValues(long [] a) {
+    this.inListValues = a;
+  }
+
+  @Override
+  public VectorExpressionDescriptor.Descriptor getDescriptor() {
+
+    // return null since this will be handled as a special case in VectorizationContext
+    return null;
+  }
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java?rev=1547666&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java
(added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java
Wed Dec  4 01:15:55 2013
@@ -0,0 +1,174 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Descriptor;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.UDFLike;
+import org.apache.hadoop.io.Text;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Evaluate an IN boolean expression (not a filter) on a batch for a vector of strings.
+ * This is optimized so that no objects have to be created in
+ * the inner loop, and there is a hash table implemented
+ * with Cuckoo hashing that has fast lookup to do the IN test.
+ */
+public class StringColumnInList extends VectorExpression implements IStringInExpr {
+  private static final long serialVersionUID = 1L;
+  private int inputCol;
+  private int outputColumn;
+  private byte[][] inListValues;
+
+  // The set object containing the IN list. This is optimized for lookup
+  // of the data type of the column.
+  private transient CuckooSetBytes inSet;
+
+  public StringColumnInList() {
+    super();
+    inSet = null;
+  }
+
+  /**
+   * After construction you must call setInListValues() to add the values to the IN set.
+   */
+  public StringColumnInList(int colNum, int outputColumn) {
+    this.inputCol = colNum;
+    this.outputColumn = outputColumn;
+    inSet = null;
+  }
+
+  @Override
+  public void evaluate(VectorizedRowBatch batch) {
+
+    if (childExpressions != null) {
+      super.evaluateChildren(batch);
+    }
+
+    if (inSet == null) {
+      inSet = new CuckooSetBytes(inListValues.length);
+      inSet.load(inListValues);
+    }
+
+    BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[inputCol];
+    LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn];
+    int[] sel = batch.selected;
+    boolean[] nullPos = inputColVector.isNull;
+    int n = batch.size;
+    byte[][] vector = inputColVector.vector;
+    int[] start = inputColVector.start;
+    int[] len = inputColVector.length;
+    long[] outputVector = outputColVector.vector;
+
+    // return immediately if batch is empty
+    if (n == 0) {
+      return;
+    }
+
+    outputColVector.isRepeating = inputColVector.isRepeating;
+    outputColVector.noNulls = inputColVector.noNulls;
+    if (inputColVector.noNulls) {
+      if (inputColVector.isRepeating) {
+
+        // All must be selected otherwise size would be zero
+        // Repeating property will not change.
+        outputVector[0] = inSet.lookup(vector[0], start[0], len[0]) ? 1 : 0;
+      } else if (batch.selectedInUse) {
+        for(int j = 0; j != n; j++) {
+          int i = sel[j];
+          outputVector[i] = inSet.lookup(vector[i], start[i], len[i]) ? 1 : 0;
+        }
+      } else {
+        for(int i = 0; i != n; i++) {
+          outputVector[i] = inSet.lookup(vector[i], start[i], len[i]) ? 1 : 0;
+        }
+      }
+    } else {
+      if (inputColVector.isRepeating) {
+
+        // All must be selected otherwise size would be zero
+        // Repeating property will not change.
+        if (!nullPos[0]) {
+          outputVector[0] = inSet.lookup(vector[0], start[0], len[0]) ? 1 : 0;
+        }
+        outputColVector.isNull[0] = nullPos[0];
+      } else if (batch.selectedInUse) {
+        for(int j = 0; j != n; j++) {
+          int i = sel[j];
+          if (!nullPos[i]) {
+            outputVector[i] = inSet.lookup(vector[i], start[i], len[i]) ? 1 : 0;
+          }
+          outputColVector.isNull[i] = nullPos[i];
+        }
+      } else {
+        System.arraycopy(nullPos, 0, outputColVector.isNull, 0, n);
+        for(int i = 0; i != n; i++) {
+          if (!nullPos[i]) {
+            outputVector[i] = inSet.lookup(vector[i], start[i], len[i]) ? 1 : 0;
+          }
+        }
+      }
+    }
+  }
+
+
+  @Override
+  public String getOutputType() {
+    return "boolean";
+  }
+
+  @Override
+  public int getOutputColumn() {
+    return this.outputColumn;
+  }
+
+  public void setOutputColumn(int value) {
+    this.outputColumn = value;
+  }
+
+  public int getInputCol() {
+    return inputCol;
+  }
+
+  public void setInputCol(int colNum) {
+    this.inputCol = colNum;
+  }
+
+  @Override
+  public Descriptor getDescriptor() {
+
+    // This VectorExpression (IN) is a special case, so don't return a descriptor.
+    return null;
+  }
+
+  public byte[][] getInListValues() {
+    return this.inListValues;
+  }
+
+  public void setInListValues(byte [][] a) {
+    this.inListValues = a;
+  }
+}

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java?rev=1547666&r1=1547665&r2=1547666&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java
(original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java
Wed Dec  4 01:15:55 2013
@@ -31,6 +31,7 @@ import junit.framework.Assert;
 
 import org.apache.hadoop.hive.ql.exec.vector.expressions.ColAndCol;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.ColOrCol;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.DoubleColumnInList;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterExprAndExpr;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterExprOrExpr;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.FuncLogWithBaseDoubleToDouble;
@@ -38,12 +39,14 @@ import org.apache.hadoop.hive.ql.exec.ve
 import org.apache.hadoop.hive.ql.exec.vector.expressions.FuncPowerDoubleToDouble;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.IsNotNull;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.IsNull;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.LongColumnInList;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.NotCol;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.RoundWithNumDigitsDoubleToDouble;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsFalse;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsNotNull;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsNull;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsTrue;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.StringColumnInList;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.StringLTrim;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.StringLower;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.StringUpper;
@@ -957,8 +960,9 @@ public class TestVectorizationContext {
     assertTrue(ve instanceof FilterLongColumnNotBetween);
   }
 
+  // Test translation of both IN filters and boolean-valued IN expressions (non-filters).
   @Test
-  public void testInFilters() throws HiveException {
+  public void testInFiltersAndExprs() throws HiveException {
     ExprNodeColumnDesc col1Expr = new  ExprNodeColumnDesc(String.class, "col1", "table",
false);
     ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc("Alpha");
     ExprNodeConstantDesc constDesc2 = new ExprNodeConstantDesc("Bravo");
@@ -979,6 +983,8 @@ public class TestVectorizationContext {
     VectorizationContext vc = new VectorizationContext(columnMap, 2);
     VectorExpression ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
     assertTrue(ve instanceof FilterStringColumnInList);
+    ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.PROJECTION);
+    assertTrue(ve instanceof StringColumnInList);
 
     // long IN
     children1.set(0, new ExprNodeColumnDesc(Long.class, "col1", "table", false));
@@ -986,6 +992,8 @@ public class TestVectorizationContext {
     children1.set(2, new ExprNodeConstantDesc(20));
     ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
     assertTrue(ve instanceof FilterLongColumnInList);
+    ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.PROJECTION);
+    assertTrue(ve instanceof LongColumnInList);
 
     // double IN
     children1.set(0, new ExprNodeColumnDesc(Double.class, "col1", "table", false));
@@ -993,5 +1001,7 @@ public class TestVectorizationContext {
     children1.set(2, new ExprNodeConstantDesc(20d));
     ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
     assertTrue(ve instanceof FilterDoubleColumnInList);
+    ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.PROJECTION);
+    assertTrue(ve instanceof DoubleColumnInList);
   }
 }

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java?rev=1547666&r1=1547665&r2=1547666&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java
(original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java
Wed Dec  4 01:15:55 2013
@@ -21,6 +21,7 @@ package org.apache.hadoop.hive.ql.exec.v
 import static org.junit.Assert.assertEquals;
 
 import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.junit.Assert;
@@ -460,4 +461,67 @@ public class TestVectorLogicalExpression
 
     assertEquals(2, batch1.selected[0]);
   }
+
+  @Test
+  public void testLongInExpr() {
+
+    // check basic operation
+    VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchLongInLongOut();
+    LongColumnVector outV = (LongColumnVector) b.cols[1];
+    long[] inVals = new long[2];
+    inVals[0] = 0;
+    inVals[1] = -2;
+    LongColumnInList expr = new LongColumnInList(0, 1);
+    expr.setInListValues(inVals);
+    expr.evaluate(b);
+    assertEquals(1, outV.vector[0]);
+    assertEquals(0, outV.vector[1]);
+
+    // check null handling
+    b.cols[0].noNulls = false;
+    b.cols[0].isNull[0] = true;
+    expr.evaluate(b);
+    assertEquals(true, !outV.noNulls && outV.isNull[0]);
+    assertEquals(0, outV.vector[1]);
+
+    // check isRepeating handling
+    b = TestVectorMathFunctions.getVectorizedRowBatchLongInLongOut();
+    outV = (LongColumnVector) b.cols[1];
+    b.cols[0].isRepeating = true;
+    expr.evaluate(b);
+    assertEquals(true, outV.isRepeating);
+    assertEquals(1, outV.vector[0]);
+  }
+
+  @Test
+  public void testDoubleInExpr() {
+
+    // check basic operation
+    VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchDoubleInLongOut();
+    LongColumnVector outV = (LongColumnVector) b.cols[1];
+    double[] inVals = new double[2];
+    inVals[0] = -1.5d;
+    inVals[1] = 30d;
+    b.size = 2;
+    DoubleColumnInList expr = new DoubleColumnInList(0, 1);
+    expr.setInListValues(inVals);
+    expr.evaluate(b);
+    assertEquals(1, outV.vector[0]);
+    assertEquals(0, outV.vector[1]);
+
+    // check null handling
+    b.cols[0].noNulls = false;
+    b.cols[0].isNull[0] = true;
+    expr.evaluate(b);
+    assertEquals(true, !outV.noNulls && outV.isNull[0]);
+    assertEquals(0, outV.vector[1]);
+
+    // check isRepeating handling
+    b = TestVectorMathFunctions.getVectorizedRowBatchDoubleInLongOut();
+    outV = (LongColumnVector) b.cols[1];
+    b.cols[0].isRepeating = true;
+    expr.evaluate(b);
+    assertEquals(true, outV.isRepeating);
+    assertEquals(1, outV.vector[0]);
+  }
 }

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorScalarColArithmetic.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorScalarColArithmetic.java?rev=1547666&r1=1547665&r2=1547666&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorScalarColArithmetic.java
(original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorScalarColArithmetic.java
Wed Dec  4 01:15:55 2013
@@ -216,4 +216,29 @@ public class TestVectorScalarColArithmet
     assertFalse(out.noNulls);
     assertFalse(out.isRepeating);
   }
+
+  @Test
+  public void testBooleanValuedLongIn() {
+    VectorizedRowBatch batch = getBatch();
+    long[] a = new long[2];
+    a[0] = 20;
+    a[1] = 1000;
+    batch.size = 2;
+    VectorExpression expr = (new LongColumnInList(0, 1));
+    ((LongColumnInList) expr).setInListValues(a);
+    expr.evaluate(batch);
+    LongColumnVector out = (LongColumnVector) batch.cols[1];
+    Assert.assertEquals(0, out.vector[0]);
+    Assert.assertEquals(1, out.vector[1]);
+  }
+
+  private VectorizedRowBatch getBatch() {
+    VectorizedRowBatch b = new VectorizedRowBatch(2);
+    LongColumnVector v = new LongColumnVector();
+    v.vector[0] = 10;
+    v.vector[1] = 20;
+    b.cols[0] = v;
+    b.cols[1] = new LongColumnVector();
+    return b;
+  }
 }

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java?rev=1547666&r1=1547665&r2=1547666&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java
(original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java
Wed Dec  4 01:15:55 2013
@@ -1830,4 +1830,43 @@ public class TestVectorStringExpressions
     b.size = 5;
     return b;
   }
+
+  // Test boolean-valued (non-filter) IN expression for strings
+  @Test
+  public void testStringInExpr() {
+
+    // test basic operation
+    VectorizedRowBatch b = makeStringBatch();
+    b.size = 2;
+    b.cols[0].noNulls = true;
+    byte[][] inVals = new byte[2][];
+    inVals[0] = red;
+    inVals[1] = blue;
+    StringColumnInList expr = new StringColumnInList(0, 2);
+    expr.setInListValues(inVals);
+    expr.evaluate(b);
+    LongColumnVector outV = (LongColumnVector) b.cols[2];
+    Assert.assertEquals(1, outV.vector[0]);
+    Assert.assertEquals(0, outV.vector[1]);
+
+    // test null input
+    b = makeStringBatch();
+    b.size = 2;
+    b.cols[0].noNulls = false;
+    b.cols[0].isNull[0] = true;
+    expr.evaluate(b);
+    outV = (LongColumnVector) b.cols[2];
+    Assert.assertEquals(true, !outV.noNulls && outV.isNull[0] && !outV.isNull[1]);
+    Assert.assertEquals(0, outV.vector[1]);
+
+    // test repeating logic
+    b = makeStringBatch();
+    b.size = 2;
+    b.cols[0].noNulls = true;
+    b.cols[0].isRepeating = true;
+    expr.evaluate(b);
+    outV = (LongColumnVector) b.cols[2];
+    Assert.assertEquals(1, outV.vector[0]);
+    Assert.assertEquals(true, outV.isRepeating);
+  }
  }



Mime
View raw message