hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kgyrtk...@apache.org
Subject [1/5] hive git commit: HIVE-15939: Make cast expressions comply more to sql2011 (Zoltan Haindrich, reviewed by Ashutosh Chauhan)
Date Wed, 06 Dec 2017 08:48:45 GMT
Repository: hive
Updated Branches:
  refs/heads/master a3060b307 -> 95dadac9f


HIVE-15939: Make cast expressions comply more to sql2011 (Zoltan Haindrich, reviewed by Ashutosh Chauhan)

Signed-off-by: Zoltan Haindrich <kirk@rxd.hu>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/95dadac9
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/95dadac9
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/95dadac9

Branch: refs/heads/master
Commit: 95dadac9fa81702ce3af0d8759bee5082f2f2013
Parents: c2fc0fb
Author: Zoltan Haindrich <kirk@rxd.hu>
Authored: Wed Dec 6 09:36:54 2017 +0100
Committer: Zoltan Haindrich <kirk@rxd.hu>
Committed: Wed Dec 6 09:47:55 2017 +0100

----------------------------------------------------------------------
 .../ql/exec/vector/VectorizationContext.java    |  56 +------
 .../vector/expressions/CastStringToBoolean.java |  49 ++++++
 .../vector/expressions/FuncStringToLong.java    | 145 +++++++++++++++++
 .../vector/expressions/VectorExpression.java    |   7 -
 .../apache/hadoop/hive/ql/udf/UDFToBoolean.java |  11 +-
 .../expressions/TestVectorMathFunctions.java    |  24 +++
 .../vector/expressions/TestVectorTypeCasts.java |  17 ++
 .../queries/clientpositive/udf_to_boolean.q     |   6 +
 .../vector_udf_string_to_boolean.q              |  23 +++
 .../clientpositive/llap/vectorized_casts.q.out  |   4 +-
 .../results/clientpositive/udf_to_boolean.q.out |  54 +++++++
 .../clientpositive/vector_empty_where.q.out     |   2 +-
 .../vector_udf_string_to_boolean.q.out          | 156 +++++++++++++++++++
 .../clientpositive/vectorized_casts.q.out       |   4 +-
 .../PrimitiveObjectInspectorUtils.java          |  59 ++++++-
 .../TestPrimitiveObjectInspectorUtils.java      |   6 +
 16 files changed, 549 insertions(+), 74 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/95dadac9/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index 5c7d7ee..0ad6816 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -52,47 +52,11 @@ import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type;
 import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.ArgumentType;
 import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.InputExpressionType;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.*;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFBloomFilter;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFBloomFilterMerge;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFCount;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFCountMerge;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFCountStar;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFSumDecimal;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFSumDecimal64ToDecimal;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFSumTimestamp;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgDecimal;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgDecimalComplete;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgDecimalFinal;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgDecimalPartial2;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgDouble;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgDoubleComplete;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgFinal;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgLong;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgLongComplete;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgPartial2;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgTimestamp;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgTimestampComplete;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxDecimal;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxDouble;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxLong;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxString;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxTimestamp;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinDecimal;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinDouble;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinLong;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinString;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinTimestamp;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFSumDouble;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFSumLong;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarPartial2;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.*;
 import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor;
 import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
-import org.apache.hadoop.hive.ql.plan.AggregationDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
@@ -100,7 +64,6 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
 import org.apache.hadoop.hive.ql.udf.*;
 import org.apache.hadoop.hive.ql.udf.generic.*;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode;
 import org.apache.hadoop.hive.serde2.ByteStream.Output;
 import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite;
 import org.apache.hadoop.hive.serde2.io.DateWritable;
@@ -2734,24 +2697,11 @@ public class VectorizationContext {
     }
     // Long and double are handled using descriptors, string needs to be specially handled.
     if (isStringFamily(inputType)) {
-      // string casts to false if it is 0 characters long, otherwise true
-      VectorExpression lenExpr = createVectorExpression(StringLength.class, childExpr,
-          VectorExpressionDescriptor.Mode.PROJECTION, TypeInfoFactory.longTypeInfo);
 
-      int outputColumnNum = ocm.allocateOutputColumn(TypeInfoFactory.booleanTypeInfo);
-      VectorExpression lenToBoolExpr =
-          new CastLongToBooleanViaLongToLong(lenExpr.getOutputColumnNum(), outputColumnNum);
+      VectorExpression lenExpr = createVectorExpression(CastStringToBoolean.class, childExpr,
+          VectorExpressionDescriptor.Mode.PROJECTION, TypeInfoFactory.booleanTypeInfo);
 
-      lenToBoolExpr.setChildExpressions(new VectorExpression[] {lenExpr});
-
-      lenToBoolExpr.setInputTypeInfos(lenExpr.getOutputTypeInfo());
-      lenToBoolExpr.setInputDataTypePhysicalVariations(lenExpr.getOutputDataTypePhysicalVariation());
-
-      lenToBoolExpr.setOutputTypeInfo(TypeInfoFactory.booleanTypeInfo);
-      lenToBoolExpr.setOutputDataTypePhysicalVariation(DataTypePhysicalVariation.NONE);
-
-      ocm.freeOutputColumn(lenExpr.getOutputColumnNum());
-      return lenToBoolExpr;
+      return lenExpr;
     }
     return null;
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/95dadac9/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToBoolean.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToBoolean.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToBoolean.java
new file mode 100644
index 0000000..7a44035
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToBoolean.java
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+
+/**
+ * Type cast string to boolean.
+ */
+public class CastStringToBoolean extends FuncStringToLong {
+  private static final long serialVersionUID = 1L;
+
+  public CastStringToBoolean() {
+    super();
+  }
+
+  public CastStringToBoolean(int inputColumn, int outputColumn) {
+    super(inputColumn, outputColumn);
+  }
+
+  @Override
+  protected void func(LongColumnVector outV, BytesColumnVector inV, int offset) {
+
+    int start = inV.start[offset];
+    int length = inV.length[offset];
+    byte[] s = inV.vector[offset];
+    boolean b = PrimitiveObjectInspectorUtils.parseBoolean(s, start, length);
+    outV.vector[offset] = b ? 1 : 0;
+
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/95dadac9/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncStringToLong.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncStringToLong.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncStringToLong.java
new file mode 100644
index 0000000..5c0a7fa
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncStringToLong.java
@@ -0,0 +1,145 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
+/**
+ * Superclass to support vectorized functions that take a long
+ * and return a string, optionally with additional configuration arguments.
+ * Used for cast(string), length(string), etc
+ */
+public abstract class FuncStringToLong extends VectorExpression {
+  private static final long serialVersionUID = 1L;
+
+  private int inputCol;
+  private int outputCol;
+
+  public FuncStringToLong(int inputCol, int outputCol) {
+    super(outputCol);
+    this.inputCol = inputCol;
+    this.outputCol = outputCol;
+  }
+
+  public FuncStringToLong() {
+  }
+
+  @Override
+  public void evaluate(VectorizedRowBatch batch) {
+
+    if (childExpressions != null) {
+      super.evaluateChildren(batch);
+    }
+
+    BytesColumnVector inV = (BytesColumnVector) batch.cols[inputCol];
+    int[] sel = batch.selected;
+    int n = batch.size;
+    LongColumnVector outV = (LongColumnVector) batch.cols[outputCol];
+
+    if (n == 0) {
+      //Nothing to do
+      return;
+    }
+
+    if (inV.noNulls) {
+      outV.noNulls = true;
+      if (inV.isRepeating) {
+        outV.isRepeating = true;
+        func(outV, inV, 0);
+      } else if (batch.selectedInUse) {
+        for (int j = 0; j != n; j++) {
+          int i = sel[j];
+          func(outV, inV, i);
+        }
+        outV.isRepeating = false;
+      } else {
+        for (int i = 0; i != n; i++) {
+          func(outV, inV, i);
+        }
+        outV.isRepeating = false;
+      }
+    } else {
+      // Handle case with nulls. Don't do function if the value is null, to save time,
+      // because calling the function can be expensive.
+      outV.noNulls = false;
+      if (inV.isRepeating) {
+        outV.isRepeating = true;
+        outV.isNull[0] = inV.isNull[0];
+        if (!inV.isNull[0]) {
+          func(outV, inV, 0);
+        }
+      } else if (batch.selectedInUse) {
+        for (int j = 0; j != n; j++) {
+          int i = sel[j];
+          outV.isNull[i] = inV.isNull[i];
+          if (!inV.isNull[i]) {
+            func(outV, inV, i);
+          }
+        }
+        outV.isRepeating = false;
+      } else {
+        System.arraycopy(inV.isNull, 0, outV.isNull, 0, n);
+        for (int i = 0; i != n; i++) {
+          if (!inV.isNull[i]) {
+            func(outV, inV, i);
+          }
+        }
+        outV.isRepeating = false;
+      }
+    }
+  }
+
+  /* Evaluate result for position i (using bytes[] to avoid storage allocation costs)
+   * and set position i of the output vector to the result.
+   */
+  protected abstract void func(LongColumnVector outV, BytesColumnVector inV, int i);
+
+  public int getOutputCol() {
+    return outputCol;
+  }
+
+  public void setOutputCol(int outputCol) {
+    this.outputCol = outputCol;
+  }
+
+  public int getInputCol() {
+    return inputCol;
+  }
+
+  public void setInputCol(int inputCol) {
+    this.inputCol = inputCol;
+  }
+
+  @Override
+  public String vectorExpressionParameters() {
+    return "col " + inputCol;
+  }
+
+  @Override
+  public VectorExpressionDescriptor.Descriptor getDescriptor() {
+    VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder();
+    b.setMode(VectorExpressionDescriptor.Mode.PROJECTION).setNumArguments(1)
+        .setArgumentTypes(VectorExpressionDescriptor.ArgumentType.STRING_FAMILY)
+        .setInputExpressionTypes(VectorExpressionDescriptor.InputExpressionType.COLUMN);
+    return b.build();
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/95dadac9/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java
index b5399d6..8c2894b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java
@@ -21,13 +21,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions;
 import java.io.Serializable;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
-import java.util.Map;
-
-import com.google.common.collect.ImmutableMap;
-
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation;
 import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
@@ -35,8 +30,6 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
-import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 
 /**

http://git-wip-us.apache.org/repos/asf/hive/blob/95dadac9/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToBoolean.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToBoolean.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToBoolean.java
index d291e36..57949d9 100755
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToBoolean.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToBoolean.java
@@ -23,6 +23,7 @@ import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.ql.exec.UDF;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToBoolean;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToBoolean;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToLong;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastDoubleToBooleanViaDoubleToLong;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastLongToBooleanViaLongToLong;
@@ -33,8 +34,8 @@ import org.apache.hadoop.hive.serde2.io.DateWritable;
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
 import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
 import org.apache.hadoop.hive.serde2.io.ShortWritable;
-import org.apache.hadoop.hive.serde2.io.TimestampLocalTZWritable;
 import org.apache.hadoop.hive.serde2.io.TimestampWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
 import org.apache.hadoop.io.BooleanWritable;
 import org.apache.hadoop.io.FloatWritable;
 import org.apache.hadoop.io.IntWritable;
@@ -47,7 +48,7 @@ import org.apache.hadoop.io.Text;
  *
  */
 @VectorizedExpressions({CastLongToBooleanViaLongToLong.class,
-  CastDateToBooleanViaLongToLong.class, CastTimestampToBoolean.class,
+    CastDateToBooleanViaLongToLong.class, CastTimestampToBoolean.class, CastStringToBoolean.class,
   CastDoubleToBooleanViaDoubleToLong.class, CastDecimalToBoolean.class, CastStringToLong.class})
 public class UDFToBoolean extends UDF {
   private final BooleanWritable booleanWritable = new BooleanWritable();
@@ -172,10 +173,10 @@ public class UDFToBoolean extends UDF {
   public BooleanWritable evaluate(Text i) {
     if (i == null) {
       return null;
-    } else {
-      booleanWritable.set(i.getLength() != 0);
-      return booleanWritable;
     }
+    boolean b = PrimitiveObjectInspectorUtils.parseBoolean(i.getBytes(), 0, i.getLength());
+    booleanWritable.set(b);
+    return booleanWritable;
   }
 
   public BooleanWritable evaluate(DateWritable d) {

http://git-wip-us.apache.org/repos/asf/hive/blob/95dadac9/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java
index fb79116..e89f2e5 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hive.ql.exec.vector.expressions;
 
 import java.io.UnsupportedEncodingException;
+import java.nio.charset.StandardCharsets;
 import java.sql.Timestamp;
 import java.util.Arrays;
 import java.util.Random;
@@ -253,6 +254,29 @@ public class TestVectorMathFunctions {
     return batch;
   }
 
+  public static VectorizedRowBatch getVectorizedRowBatchStringInLongOut() {
+    VectorizedRowBatch batch = new VectorizedRowBatch(2);
+    BytesColumnVector inV;
+    LongColumnVector outV;
+    inV = new BytesColumnVector();
+    outV = new LongColumnVector();
+    inV.initBuffer();
+    inV.setVal(0, StandardCharsets.UTF_8.encode("true").array());
+    inV.setVal(1, StandardCharsets.UTF_8.encode("TRUE").array());
+    inV.setVal(2, StandardCharsets.UTF_8.encode("TrUe").array());
+    inV.setVal(3, StandardCharsets.UTF_8.encode("false").array());
+    inV.setVal(4, StandardCharsets.UTF_8.encode("FALSE").array());
+    inV.setVal(5, StandardCharsets.UTF_8.encode("FaLsE").array());
+    inV.setVal(6, StandardCharsets.UTF_8.encode("").array());
+    inV.setVal(7, StandardCharsets.UTF_8.encode("Other").array());
+
+    batch.cols[0] = inV;
+    batch.cols[1] = outV;
+
+    batch.size = 8;
+    return batch;
+  }
+
   public static VectorizedRowBatch getVectorizedRowBatchTimestampInLongOut(long[] longValues) {
     Random r = new Random(345);
     VectorizedRowBatch batch = new VectorizedRowBatch(2);

http://git-wip-us.apache.org/repos/asf/hive/blob/95dadac9/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java
index fb8035b..6952b45 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java
@@ -128,6 +128,23 @@ public class TestVectorTypeCasts {
   }
 
   @Test
+  public void testCastStringToBoolean() {
+    VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchStringInLongOut();
+    LongColumnVector resultV = (LongColumnVector) b.cols[1];
+    b.cols[0].noNulls = true;
+    VectorExpression expr = new CastStringToBoolean(0, 1);
+    expr.evaluate(b);
+    Assert.assertEquals(1, resultV.vector[0]); // true
+    Assert.assertEquals(1, resultV.vector[1]); // true
+    Assert.assertEquals(1, resultV.vector[2]); // true
+    Assert.assertEquals(0, resultV.vector[3]); // false
+    Assert.assertEquals(0, resultV.vector[4]); // false
+    Assert.assertEquals(0, resultV.vector[5]); // false
+    Assert.assertEquals(0, resultV.vector[6]); // false
+    Assert.assertEquals(1, resultV.vector[7]); // true
+  }
+
+  @Test
   public void testCastLongToTimestamp() {
     long[] longValues = new long[500];
     VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchLongInTimestampOut(longValues);

http://git-wip-us.apache.org/repos/asf/hive/blob/95dadac9/ql/src/test/queries/clientpositive/udf_to_boolean.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/udf_to_boolean.q b/ql/src/test/queries/clientpositive/udf_to_boolean.q
index 8bea7ab..1a50d05 100644
--- a/ql/src/test/queries/clientpositive/udf_to_boolean.q
+++ b/ql/src/test/queries/clientpositive/udf_to_boolean.q
@@ -12,6 +12,9 @@ SELECT CAST(CAST(-8.0 AS DOUBLE) AS BOOLEAN) FROM src tablesample (1 rows);
 SELECT CAST(CAST(-99.0 AS DECIMAL) AS BOOLEAN) FROM src tablesample (1 rows);
 
 SELECT CAST(CAST('Foo' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows);
+SELECT CAST(CAST('TRUE' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows);
+SELECT CAST(CAST('true' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows);
+SELECT CAST(CAST('TrUe' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows);
 
 SELECT CAST(CAST('2011-05-06 07:08:09' as timestamp) AS BOOLEAN) FROM src tablesample (1 rows);
 
@@ -27,6 +30,9 @@ SELECT CAST(CAST(0.0 AS DOUBLE) AS BOOLEAN) FROM src tablesample (1 rows);
 SELECT CAST(CAST(0.0 AS DECIMAL) AS BOOLEAN) FROM src tablesample (1 rows);
 
 SELECT CAST(CAST('' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows);
+SELECT CAST(CAST('FALSE' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows);
+SELECT CAST(CAST('false' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows);
+SELECT CAST(CAST('FaLsE' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows);
 
 SELECT CAST(CAST(0 as timestamp) AS BOOLEAN) FROM src tablesample (1 rows);
 

http://git-wip-us.apache.org/repos/asf/hive/blob/95dadac9/ql/src/test/queries/clientpositive/vector_udf_string_to_boolean.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_udf_string_to_boolean.q b/ql/src/test/queries/clientpositive/vector_udf_string_to_boolean.q
new file mode 100644
index 0000000..eeb5ab8
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_udf_string_to_boolean.q
@@ -0,0 +1,23 @@
+set hive.mapred.mode=nonstrict;
+SET hive.vectorized.execution.enabled = true;
+SET hive.int.timestamp.conversion.in.seconds=false;
+set hive.fetch.task.conversion=none;
+
+create table t (s string) stored as orc;
+
+insert into t values ('false');
+insert into t values ('FALSE');
+insert into t values ('FaLsE');
+insert into t values ('true');
+insert into t values ('TRUE');
+insert into t values ('TrUe');
+insert into t values ('');
+insert into t values ('Other');
+insert into t values ('Off');
+insert into t values ('No');
+insert into t values ('0');
+insert into t values ('1');
+
+explain select s,cast(s as boolean) from t order by s;
+
+select s,cast(s as boolean) from t order by s;

http://git-wip-us.apache.org/repos/asf/hive/blob/95dadac9/ql/src/test/results/clientpositive/llap/vectorized_casts.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_casts.q.out b/ql/src/test/results/clientpositive/llap/vectorized_casts.q.out
index ad6d97b..84b4d94 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_casts.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_casts.q.out
@@ -182,8 +182,8 @@ STAGE PLANS:
                       Select Vectorization:
                           className: VectorSelectOperator
                           native: true
-                          projectedOutputColumnNums: [13, 14, 15, 16, 17, 18, 10, 20, 19, 22, 0, 1, 2, 3, 21, 23, 10, 24, 25, 27, 28, 29, 30, 31, 32, 33, 34, 4, 5, 35, 36, 37, 38, 39, 5, 41, 43, 45, 47, 48, 49, 51, 54, 55, 8, 56, 57, 26, 58, 59, 60, 61, 62, 63, 64, 65, 6, 67, 68, 69, 70, 66, 73]
-                          selectExpressions: CastLongToBooleanViaLongToLong(col 0:tinyint) -> 13:boolean, CastLongToBooleanViaLongToLong(col 1:smallint) -> 14:boolean, CastLongToBooleanViaLongToLong(col 2:int) -> 15:boolean, CastLongToBooleanViaLongToLong(col 3:bigint) -> 16:boolean, CastDoubleToBooleanViaDoubleToLong(col 4:float) -> 17:boolean, CastDoubleToBooleanViaDoubleToLong(col 5:double) -> 18:boolean, CastLongToBooleanViaLongToLong(col 19:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 19:bigint) -> 20:boolean, CastTimestampToBoolean(col 8:timestamp) -> 19:boolean, CastLongToBooleanViaLongToLong(col 21:bigint)(children: StringLength(col 6:string) -> 21:bigint) -> 22:boolean, CastDoubleToLong(col 4:float) -> 21:int, CastDoubleToLong(col 5:double) -> 23:int, CastTimestampToLong(col 8:timestamp) -> 24:int, CastStringToLong(col 6:string) -> 25:int, CastStringToLong(col 26:string)(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) 
 -> 27:int, CastDoubleToLong(col 4:float) -> 28:tinyint, CastDoubleToLong(col 4:float) -> 29:smallint, CastDoubleToLong(col 4:float) -> 30:bigint, CastLongToDouble(col 0:tinyint) -> 31:double, CastLongToDouble(col 1:smallint) -> 32:double, CastLongToDouble(col 2:int) -> 33:double, CastLongToDouble(col 3:bigint) -> 34:double, CastLongToDouble(col 10:boolean) -> 35:double, CastTimestampToDouble(col 8:timestamp) -> 36:double, CastStringToDouble(col 6:string) -> 37:double, CastStringToDouble(col 26:string)(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 38:double, CastLongToFloatViaLongToDouble(col 2:int) -> 39:float, CastMillisecondsLongToTimestamp(col 0:tinyint) -> 41:timestamp, CastMillisecondsLongToTimestamp(col 1:smallint) -> 43:timestamp, CastMillisecondsLongToTimestamp(col 2:int) -> 45:timestamp, CastMillisecondsLongToTimestamp(col 3:bigint) -> 47:timestamp, CastDoubleToTimestamp(col 4:float) -> 48:timestamp, CastDoubleToTimestamp(col 5:double) 
 -> 49:timestamp, CastMillisecondsLongToTimestamp(col 10:boolean) -> 51:timestamp, CastMillisecondsLongToTimestamp(col 52:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 52:bigint) -> 54:timestamp, CastDateToTimestamp(col 52:date)(children: CastTimestampToDate(col 8:timestamp) -> 52:date) -> 55:timestamp, VectorUDFAdaptor(CAST( cstring1 AS TIMESTAMP)) -> 56:timestamp, VectorUDFAdaptor(CAST( substr(cstring1, 1, 1) AS TIMESTAMP))(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 57:timestamp, CastLongToString(col 0:tinyint) -> 26:string, CastLongToString(col 1:smallint) -> 58:string, CastLongToString(col 2:int) -> 59:string, CastLongToString(col 3:bigint) -> 60:string, CastFloatToString(col 4:float) -> 61:string, CastDoubleToString(col 5:double) -> 62:string, CastBooleanToStringViaLongToString(col 10:boolean) -> 63:string, CastLongToString(col 52:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 52:bigint) -> 64:st
 ring, VectorUDFAdaptor(UDFToString(ctimestamp1)) -> 65:string, CastStringGroupToString(col 66:char(10))(children: CastStringGroupToChar(col 6:string, maxLength 10) -> 66:char(10)) -> 67:string, CastStringGroupToString(col 66:varchar(10))(children: CastStringGroupToVarChar(col 6:string, maxLength 10) -> 66:varchar(10)) -> 68:string, CastLongToFloatViaLongToDouble(col 52:int)(children: CastDoubleToLong(col 4:float) -> 52:int) -> 69:float, CastLongToDouble(col 52:int)(children: LongColMultiplyLongScalar(col 2:int, val 2) -> 52:int) -> 70:double, CastDoubleToString(col 71:double)(children: FuncSinDoubleToDouble(col 4:float) -> 71:double) -> 66:string, DoubleColAddDoubleColumn(col 71:double, col 72:double)(children: col 71:float, CastLongToDouble(col 10:boolean) -> 72:double) -> 73:double
+                          projectedOutputColumnNums: [13, 14, 15, 16, 17, 18, 10, 20, 19, 21, 0, 1, 2, 3, 22, 23, 10, 24, 25, 27, 28, 29, 30, 31, 32, 33, 34, 4, 5, 35, 36, 37, 38, 39, 5, 41, 43, 45, 47, 48, 49, 51, 54, 55, 8, 56, 57, 26, 58, 59, 60, 61, 62, 63, 64, 65, 6, 67, 68, 69, 70, 66, 73]
+                          selectExpressions: CastLongToBooleanViaLongToLong(col 0:tinyint) -> 13:boolean, CastLongToBooleanViaLongToLong(col 1:smallint) -> 14:boolean, CastLongToBooleanViaLongToLong(col 2:int) -> 15:boolean, CastLongToBooleanViaLongToLong(col 3:bigint) -> 16:boolean, CastDoubleToBooleanViaDoubleToLong(col 4:float) -> 17:boolean, CastDoubleToBooleanViaDoubleToLong(col 5:double) -> 18:boolean, CastLongToBooleanViaLongToLong(col 19:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 19:bigint) -> 20:boolean, CastTimestampToBoolean(col 8:timestamp) -> 19:boolean, CastStringToBoolean(col 6) -> 21:boolean, CastDoubleToLong(col 4:float) -> 22:int, CastDoubleToLong(col 5:double) -> 23:int, CastTimestampToLong(col 8:timestamp) -> 24:int, CastStringToLong(col 6:string) -> 25:int, CastStringToLong(col 26:string)(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 27:int, CastDoubleToLong(col 4:float) -> 28:tinyint, CastDoubleToLo
 ng(col 4:float) -> 29:smallint, CastDoubleToLong(col 4:float) -> 30:bigint, CastLongToDouble(col 0:tinyint) -> 31:double, CastLongToDouble(col 1:smallint) -> 32:double, CastLongToDouble(col 2:int) -> 33:double, CastLongToDouble(col 3:bigint) -> 34:double, CastLongToDouble(col 10:boolean) -> 35:double, CastTimestampToDouble(col 8:timestamp) -> 36:double, CastStringToDouble(col 6:string) -> 37:double, CastStringToDouble(col 26:string)(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 38:double, CastLongToFloatViaLongToDouble(col 2:int) -> 39:float, CastMillisecondsLongToTimestamp(col 0:tinyint) -> 41:timestamp, CastMillisecondsLongToTimestamp(col 1:smallint) -> 43:timestamp, CastMillisecondsLongToTimestamp(col 2:int) -> 45:timestamp, CastMillisecondsLongToTimestamp(col 3:bigint) -> 47:timestamp, CastDoubleToTimestamp(col 4:float) -> 48:timestamp, CastDoubleToTimestamp(col 5:double) -> 49:timestamp, CastMillisecondsLongToTimestamp(col 10:boolean) -> 51
 :timestamp, CastMillisecondsLongToTimestamp(col 52:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 52:bigint) -> 54:timestamp, CastDateToTimestamp(col 52:date)(children: CastTimestampToDate(col 8:timestamp) -> 52:date) -> 55:timestamp, VectorUDFAdaptor(CAST( cstring1 AS TIMESTAMP)) -> 56:timestamp, VectorUDFAdaptor(CAST( substr(cstring1, 1, 1) AS TIMESTAMP))(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 57:timestamp, CastLongToString(col 0:tinyint) -> 26:string, CastLongToString(col 1:smallint) -> 58:string, CastLongToString(col 2:int) -> 59:string, CastLongToString(col 3:bigint) -> 60:string, CastFloatToString(col 4:float) -> 61:string, CastDoubleToString(col 5:double) -> 62:string, CastBooleanToStringViaLongToString(col 10:boolean) -> 63:string, CastLongToString(col 52:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 52:bigint) -> 64:string, VectorUDFAdaptor(UDFToString(ctimestamp1)) -> 65:string, CastStr
 ingGroupToString(col 66:char(10))(children: CastStringGroupToChar(col 6:string, maxLength 10) -> 66:char(10)) -> 67:string, CastStringGroupToString(col 66:varchar(10))(children: CastStringGroupToVarChar(col 6:string, maxLength 10) -> 66:varchar(10)) -> 68:string, CastLongToFloatViaLongToDouble(col 52:int)(children: CastDoubleToLong(col 4:float) -> 52:int) -> 69:float, CastLongToDouble(col 52:int)(children: LongColMultiplyLongScalar(col 2:int, val 2) -> 52:int) -> 70:double, CastDoubleToString(col 71:double)(children: FuncSinDoubleToDouble(col 4:float) -> 71:double) -> 66:string, DoubleColAddDoubleColumn(col 71:double, col 72:double)(children: col 71:float, CastLongToDouble(col 10:boolean) -> 72:double) -> 73:double
                       Statistics: Num rows: 6144 Data size: 16362860 Basic stats: COMPLETE Column stats: COMPLETE
                       File Output Operator
                         compressed: false

http://git-wip-us.apache.org/repos/asf/hive/blob/95dadac9/ql/src/test/results/clientpositive/udf_to_boolean.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/udf_to_boolean.q.out b/ql/src/test/results/clientpositive/udf_to_boolean.q.out
index ebce364..bee030a 100644
--- a/ql/src/test/results/clientpositive/udf_to_boolean.q.out
+++ b/ql/src/test/results/clientpositive/udf_to_boolean.q.out
@@ -70,6 +70,33 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
 #### A masked pattern was here ####
 true
+PREHOOK: query: SELECT CAST(CAST('TRUE' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT CAST(CAST('TRUE' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+true
+PREHOOK: query: SELECT CAST(CAST('true' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT CAST(CAST('true' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+true
+PREHOOK: query: SELECT CAST(CAST('TrUe' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT CAST(CAST('TrUe' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+true
 PREHOOK: query: SELECT CAST(CAST('2011-05-06 07:08:09' as timestamp) AS BOOLEAN) FROM src tablesample (1 rows)
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src
@@ -151,6 +178,33 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
 #### A masked pattern was here ####
 false
+PREHOOK: query: SELECT CAST(CAST('FALSE' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT CAST(CAST('FALSE' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+false
+PREHOOK: query: SELECT CAST(CAST('false' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT CAST(CAST('false' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+false
+PREHOOK: query: SELECT CAST(CAST('FaLsE' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT CAST(CAST('FaLsE' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+false
 PREHOOK: query: SELECT CAST(CAST(0 as timestamp) AS BOOLEAN) FROM src tablesample (1 rows)
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src

http://git-wip-us.apache.org/repos/asf/hive/blob/95dadac9/ql/src/test/results/clientpositive/vector_empty_where.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_empty_where.q.out b/ql/src/test/results/clientpositive/vector_empty_where.q.out
index 609a36c..6b2c7fe 100644
--- a/ql/src/test/results/clientpositive/vector_empty_where.q.out
+++ b/ql/src/test/results/clientpositive/vector_empty_where.q.out
@@ -26,7 +26,7 @@ STAGE PLANS:
               Filter Vectorization:
                   className: VectorFilterOperator
                   native: true
-                  predicateExpression: SelectColumnIsTrue(col 14:boolean)(children: CastLongToBooleanViaLongToLong(col 13:bigint)(children: StringLength(col 6:string) -> 13:bigint) -> 14:boolean)
+                  predicateExpression: SelectColumnIsTrue(col 13:boolean)(children: CastStringToBoolean(col 6) -> 13:boolean)
               predicate: cstring1 (type: string)
               Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
               Select Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/95dadac9/ql/src/test/results/clientpositive/vector_udf_string_to_boolean.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_udf_string_to_boolean.q.out b/ql/src/test/results/clientpositive/vector_udf_string_to_boolean.q.out
new file mode 100644
index 0000000..1b06698
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_udf_string_to_boolean.q.out
@@ -0,0 +1,156 @@
+PREHOOK: query: create table t (s string) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t
+POSTHOOK: query: create table t (s string) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t
+PREHOOK: query: insert into t values ('false')
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values ('false')
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.s SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: insert into t values ('FALSE')
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values ('FALSE')
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.s SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: insert into t values ('FaLsE')
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values ('FaLsE')
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.s SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: insert into t values ('true')
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values ('true')
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.s SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: insert into t values ('TRUE')
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values ('TRUE')
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.s SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: insert into t values ('TrUe')
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values ('TrUe')
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.s SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: insert into t values ('')
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values ('')
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.s SIMPLE [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: insert into t values ('Other')
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values ('Other')
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.s SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: insert into t values ('Off')
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values ('Off')
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.s SIMPLE [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: insert into t values ('No')
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values ('No')
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.s SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: insert into t values ('0')
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values ('0')
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.s SIMPLE [(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: insert into t values ('1')
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values ('1')
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.s SIMPLE [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: explain select s,cast(s as boolean) from t order by s
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select s,cast(s as boolean) from t order by s
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: t
+            Statistics: Num rows: 12 Data size: 1047 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: s (type: string), UDFToBoolean(s) (type: boolean)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 12 Data size: 1047 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col0 (type: string)
+                sort order: +
+                Statistics: Num rows: 12 Data size: 1047 Basic stats: COMPLETE Column stats: NONE
+                value expressions: _col1 (type: boolean)
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 12 Data size: 1047 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 12 Data size: 1047 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select s,cast(s as boolean) from t order by s
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+#### A masked pattern was here ####
+POSTHOOK: query: select s,cast(s as boolean) from t order by s
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+#### A masked pattern was here ####
+	false
+0	false
+1	true
+FALSE	false
+FaLsE	false
+No	false
+Off	false
+Other	true
+TRUE	true
+TrUe	true
+false	false
+true	true

http://git-wip-us.apache.org/repos/asf/hive/blob/95dadac9/ql/src/test/results/clientpositive/vectorized_casts.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vectorized_casts.q.out b/ql/src/test/results/clientpositive/vectorized_casts.q.out
index 96a1770..f6f2105 100644
--- a/ql/src/test/results/clientpositive/vectorized_casts.q.out
+++ b/ql/src/test/results/clientpositive/vectorized_casts.q.out
@@ -179,8 +179,8 @@ STAGE PLANS:
                 Select Vectorization:
                     className: VectorSelectOperator
                     native: true
-                    projectedOutputColumnNums: [13, 14, 15, 16, 17, 18, 10, 20, 19, 22, 0, 1, 2, 3, 21, 23, 10, 24, 25, 27, 28, 29, 30, 31, 32, 33, 34, 4, 5, 35, 36, 37, 38, 39, 5, 41, 43, 45, 47, 48, 49, 51, 54, 55, 8, 56, 57, 26, 58, 59, 60, 61, 62, 63, 64, 65, 6, 67, 68, 69, 70, 66, 73]
-                    selectExpressions: CastLongToBooleanViaLongToLong(col 0:tinyint) -> 13:boolean, CastLongToBooleanViaLongToLong(col 1:smallint) -> 14:boolean, CastLongToBooleanViaLongToLong(col 2:int) -> 15:boolean, CastLongToBooleanViaLongToLong(col 3:bigint) -> 16:boolean, CastDoubleToBooleanViaDoubleToLong(col 4:float) -> 17:boolean, CastDoubleToBooleanViaDoubleToLong(col 5:double) -> 18:boolean, CastLongToBooleanViaLongToLong(col 19:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 19:bigint) -> 20:boolean, CastTimestampToBoolean(col 8:timestamp) -> 19:boolean, CastLongToBooleanViaLongToLong(col 21:bigint)(children: StringLength(col 6:string) -> 21:bigint) -> 22:boolean, CastDoubleToLong(col 4:float) -> 21:int, CastDoubleToLong(col 5:double) -> 23:int, CastTimestampToLong(col 8:timestamp) -> 24:int, CastStringToLong(col 6:string) -> 25:int, CastStringToLong(col 26:string)(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 27:
 int, CastDoubleToLong(col 4:float) -> 28:tinyint, CastDoubleToLong(col 4:float) -> 29:smallint, CastDoubleToLong(col 4:float) -> 30:bigint, CastLongToDouble(col 0:tinyint) -> 31:double, CastLongToDouble(col 1:smallint) -> 32:double, CastLongToDouble(col 2:int) -> 33:double, CastLongToDouble(col 3:bigint) -> 34:double, CastLongToDouble(col 10:boolean) -> 35:double, CastTimestampToDouble(col 8:timestamp) -> 36:double, CastStringToDouble(col 6:string) -> 37:double, CastStringToDouble(col 26:string)(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 38:double, CastLongToFloatViaLongToDouble(col 2:int) -> 39:float, CastMillisecondsLongToTimestamp(col 0:tinyint) -> 41:timestamp, CastMillisecondsLongToTimestamp(col 1:smallint) -> 43:timestamp, CastMillisecondsLongToTimestamp(col 2:int) -> 45:timestamp, CastMillisecondsLongToTimestamp(col 3:bigint) -> 47:timestamp, CastDoubleToTimestamp(col 4:float) -> 48:timestamp, CastDoubleToTimestamp(col 5:double) -> 49:
 timestamp, CastMillisecondsLongToTimestamp(col 10:boolean) -> 51:timestamp, CastMillisecondsLongToTimestamp(col 52:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 52:bigint) -> 54:timestamp, CastDateToTimestamp(col 52:date)(children: CastTimestampToDate(col 8:timestamp) -> 52:date) -> 55:timestamp, VectorUDFAdaptor(CAST( cstring1 AS TIMESTAMP)) -> 56:timestamp, VectorUDFAdaptor(CAST( substr(cstring1, 1, 1) AS TIMESTAMP))(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 57:timestamp, CastLongToString(col 0:tinyint) -> 26:string, CastLongToString(col 1:smallint) -> 58:string, CastLongToString(col 2:int) -> 59:string, CastLongToString(col 3:bigint) -> 60:string, CastFloatToString(col 4:float) -> 61:string, CastDoubleToString(col 5:double) -> 62:string, CastBooleanToStringViaLongToString(col 10:boolean) -> 63:string, CastLongToString(col 52:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 52:bigint) -> 64:string, 
 VectorUDFAdaptor(UDFToString(ctimestamp1)) -> 65:string, CastStringGroupToString(col 66:char(10))(children: CastStringGroupToChar(col 6:string, maxLength 10) -> 66:char(10)) -> 67:string, CastStringGroupToString(col 66:varchar(10))(children: CastStringGroupToVarChar(col 6:string, maxLength 10) -> 66:varchar(10)) -> 68:string, CastLongToFloatViaLongToDouble(col 52:int)(children: CastDoubleToLong(col 4:float) -> 52:int) -> 69:float, CastLongToDouble(col 52:int)(children: LongColMultiplyLongScalar(col 2:int, val 2) -> 52:int) -> 70:double, CastDoubleToString(col 71:double)(children: FuncSinDoubleToDouble(col 4:float) -> 71:double) -> 66:string, DoubleColAddDoubleColumn(col 71:double, col 72:double)(children: col 71:float, CastLongToDouble(col 10:boolean) -> 72:double) -> 73:double
+                    projectedOutputColumnNums: [13, 14, 15, 16, 17, 18, 10, 20, 19, 21, 0, 1, 2, 3, 22, 23, 10, 24, 25, 27, 28, 29, 30, 31, 32, 33, 34, 4, 5, 35, 36, 37, 38, 39, 5, 41, 43, 45, 47, 48, 49, 51, 54, 55, 8, 56, 57, 26, 58, 59, 60, 61, 62, 63, 64, 65, 6, 67, 68, 69, 70, 66, 73]
+                    selectExpressions: CastLongToBooleanViaLongToLong(col 0:tinyint) -> 13:boolean, CastLongToBooleanViaLongToLong(col 1:smallint) -> 14:boolean, CastLongToBooleanViaLongToLong(col 2:int) -> 15:boolean, CastLongToBooleanViaLongToLong(col 3:bigint) -> 16:boolean, CastDoubleToBooleanViaDoubleToLong(col 4:float) -> 17:boolean, CastDoubleToBooleanViaDoubleToLong(col 5:double) -> 18:boolean, CastLongToBooleanViaLongToLong(col 19:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 19:bigint) -> 20:boolean, CastTimestampToBoolean(col 8:timestamp) -> 19:boolean, CastStringToBoolean(col 6) -> 21:boolean, CastDoubleToLong(col 4:float) -> 22:int, CastDoubleToLong(col 5:double) -> 23:int, CastTimestampToLong(col 8:timestamp) -> 24:int, CastStringToLong(col 6:string) -> 25:int, CastStringToLong(col 26:string)(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 27:int, CastDoubleToLong(col 4:float) -> 28:tinyint, CastDoubleToLong(col
  4:float) -> 29:smallint, CastDoubleToLong(col 4:float) -> 30:bigint, CastLongToDouble(col 0:tinyint) -> 31:double, CastLongToDouble(col 1:smallint) -> 32:double, CastLongToDouble(col 2:int) -> 33:double, CastLongToDouble(col 3:bigint) -> 34:double, CastLongToDouble(col 10:boolean) -> 35:double, CastTimestampToDouble(col 8:timestamp) -> 36:double, CastStringToDouble(col 6:string) -> 37:double, CastStringToDouble(col 26:string)(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 38:double, CastLongToFloatViaLongToDouble(col 2:int) -> 39:float, CastMillisecondsLongToTimestamp(col 0:tinyint) -> 41:timestamp, CastMillisecondsLongToTimestamp(col 1:smallint) -> 43:timestamp, CastMillisecondsLongToTimestamp(col 2:int) -> 45:timestamp, CastMillisecondsLongToTimestamp(col 3:bigint) -> 47:timestamp, CastDoubleToTimestamp(col 4:float) -> 48:timestamp, CastDoubleToTimestamp(col 5:double) -> 49:timestamp, CastMillisecondsLongToTimestamp(col 10:boolean) -> 51:times
 tamp, CastMillisecondsLongToTimestamp(col 52:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 52:bigint) -> 54:timestamp, CastDateToTimestamp(col 52:date)(children: CastTimestampToDate(col 8:timestamp) -> 52:date) -> 55:timestamp, VectorUDFAdaptor(CAST( cstring1 AS TIMESTAMP)) -> 56:timestamp, VectorUDFAdaptor(CAST( substr(cstring1, 1, 1) AS TIMESTAMP))(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 57:timestamp, CastLongToString(col 0:tinyint) -> 26:string, CastLongToString(col 1:smallint) -> 58:string, CastLongToString(col 2:int) -> 59:string, CastLongToString(col 3:bigint) -> 60:string, CastFloatToString(col 4:float) -> 61:string, CastDoubleToString(col 5:double) -> 62:string, CastBooleanToStringViaLongToString(col 10:boolean) -> 63:string, CastLongToString(col 52:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 52:bigint) -> 64:string, VectorUDFAdaptor(UDFToString(ctimestamp1)) -> 65:string, CastStringGro
 upToString(col 66:char(10))(children: CastStringGroupToChar(col 6:string, maxLength 10) -> 66:char(10)) -> 67:string, CastStringGroupToString(col 66:varchar(10))(children: CastStringGroupToVarChar(col 6:string, maxLength 10) -> 66:varchar(10)) -> 68:string, CastLongToFloatViaLongToDouble(col 52:int)(children: CastDoubleToLong(col 4:float) -> 52:int) -> 69:float, CastLongToDouble(col 52:int)(children: LongColMultiplyLongScalar(col 2:int, val 2) -> 52:int) -> 70:double, CastDoubleToString(col 71:double)(children: FuncSinDoubleToDouble(col 4:float) -> 71:double) -> 66:string, DoubleColAddDoubleColumn(col 71:double, col 72:double)(children: col 71:float, CastLongToDouble(col 10:boolean) -> 72:double) -> 73:double
                 Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false

http://git-wip-us.apache.org/repos/asf/hive/blob/95dadac9/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java
index 6a4733f..88e7fb0 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java
@@ -22,6 +22,7 @@ import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
 import java.nio.charset.CharacterCodingException;
+import java.nio.charset.StandardCharsets;
 import java.sql.Date;
 import java.sql.Timestamp;
 import java.time.DateTimeException;
@@ -555,19 +556,69 @@ public final class PrimitiveObjectInspectorUtils {
   }
 
 
-  private static final String falseBooleans[] = { "false", "no", "off", "0", "" };
+  enum FalseValues {
+    FALSE("false"), OFF("off"), NO("no"), ZERO("0"), EMPTY("");
+
+    private final byte[] bytes;
+    private String str;
+
+    FalseValues(String s) {
+      str = s;
+      bytes = s.getBytes(StandardCharsets.UTF_8);
+    }
+
+    public boolean accept(byte[] arr, int st) {
+      for (int i = 0; i < bytes.length; i++) {
+        byte b = arr[i + st];
+        if (!(b == bytes[i] || b + 'a' - 'A' == bytes[i])) {
+          return false;
+        }
+      }
+      return true;
+    }
+
+    public boolean accept(String s) {
+      return str.equalsIgnoreCase(s);
+    }
+  }
+  /**
+   * Parses a boolean from string
+   *
+   * Accepts "false","off","no","0" and "" as FALSE
+   * All other values are interpreted as true.
+   */
+  public static boolean parseBoolean(byte[] arr, int st, int len) {
+    switch (len) {
+    case 5:
+      return !FalseValues.FALSE.accept(arr, st);
+    case 3:
+      return !FalseValues.OFF.accept(arr, st);
+    case 2:
+      return !FalseValues.NO.accept(arr, st);
+    case 1:
+      return !FalseValues.ZERO.accept(arr, st);
+    case 0:
+      return false;
+    default:
+      return true;
+    }
+  }
+
+  private static final FalseValues[] FALSE_BOOLEANS = FalseValues.values();
 
   private static boolean parseBoolean(String s) {
-    for(int i=0;i<falseBooleans.length;i++){
-      if(falseBooleans[i].equalsIgnoreCase(s))
+    for (int i = 0; i < FALSE_BOOLEANS.length; i++) {
+      if (FALSE_BOOLEANS[i].accept(s)) {
         return false;
+      }
     }
     return true;
   }
 
   private static boolean parseBoolean(Text t) {
-    if(t.getLength()>5)
+    if(t.getLength()>5) {
       return true;
+    }
     String strVal=t.toString();
     return parseBoolean(strVal);
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/95dadac9/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/primitive/TestPrimitiveObjectInspectorUtils.java
----------------------------------------------------------------------
diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/primitive/TestPrimitiveObjectInspectorUtils.java b/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/primitive/TestPrimitiveObjectInspectorUtils.java
index 9d86a54..cbf19d0 100644
--- a/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/primitive/TestPrimitiveObjectInspectorUtils.java
+++ b/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/primitive/TestPrimitiveObjectInspectorUtils.java
@@ -231,10 +231,16 @@ public class TestPrimitiveObjectInspectorUtils extends TestCase {
     for (String falseStr : mustEvaluateToFalse) {
       assertFalse(falseStr, PrimitiveObjectInspectorUtils.getBoolean(falseStr,
           PrimitiveObjectInspectorFactory.javaStringObjectInspector));
+
+      byte[] b1 = ("asd"+falseStr).getBytes();
+      assertFalse(falseStr, PrimitiveObjectInspectorUtils.parseBoolean(b1, 3, falseStr.length()));
+
     }
     for (String trueStr : mustEvaluateToTrue) {
       assertTrue(trueStr, PrimitiveObjectInspectorUtils.getBoolean(trueStr,
           PrimitiveObjectInspectorFactory.javaStringObjectInspector));
+      byte[] b1 = ("asd"+trueStr).getBytes();
+      assertTrue(trueStr, PrimitiveObjectInspectorUtils.parseBoolean(b1, 3, trueStr.length()));
     }
   }
 }


Mime
View raw message