hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ser...@apache.org
Subject [02/18] hive git commit: HIVE-10855: with vectorization enabled join operation involving interval_day_time fails (Matt McCline reviewed by Gunther Hagleitner)
Date Thu, 04 Jun 2015 20:46:52 GMT
http://git-wip-us.apache.org/repos/asf/hive/blob/09100831/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index fe6ee17..47a1107 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -226,6 +226,7 @@ minitez.query.files.shared=alter_merge_2_orc.q,\
   vector_inner_join.q,\
   vector_interval_1.q,\
   vector_interval_2.q,\
+  vector_interval_mapjoin.q,\
   vector_join30.q,\
   vector_join_filters.q,\
   vector_join_nulls.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/09100831/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java
index 6b95360..6654166 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java
@@ -35,6 +35,16 @@ import org.apache.hadoop.io.Writable;
 public abstract class ColumnVector {
 
   /*
+   * The current kinds of column vectors.
+   */
+  public static enum Type {
+    LONG,
+    DOUBLE,
+    BYTES,
+    DECIMAL
+  }
+
+  /*
    * If hasNulls is true, then this array contains true if the value
    * is null, otherwise false. The array is always allocated, so a batch can be re-used
    * later and nulls added.

http://git-wip-us.apache.org/repos/asf/hive/blob/09100831/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java
index 8c4b6ea..6673509 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java
@@ -20,7 +20,12 @@ package org.apache.hadoop.hive.ql.exec.vector;
 
 import java.util.Arrays;
 
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
 
 /**
  * Class to keep information on a set of typed vector columns.  Used by
@@ -117,28 +122,41 @@ public class VectorColumnSetInfo {
 
   protected void addKey(String outputType) throws HiveException {
     indexLookup[addIndex] = new KeyLookupHelper();
-    if (VectorizationContext.isIntFamily(outputType) ||
-        VectorizationContext.isDatetimeFamily(outputType)) {
+
+    String typeName = VectorizationContext.mapTypeNameSynonyms(outputType);
+
+    TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName);
+    Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo);
+
+    switch (columnVectorType) {
+    case LONG:
       longIndices[longIndicesIndex] = addIndex;
       indexLookup[addIndex].setLong(longIndicesIndex);
       ++longIndicesIndex;
-    } else if (VectorizationContext.isFloatFamily(outputType)) {
+      break;
+
+    case DOUBLE:
       doubleIndices[doubleIndicesIndex] = addIndex;
       indexLookup[addIndex].setDouble(doubleIndicesIndex);
       ++doubleIndicesIndex;
-    } else if (VectorizationContext.isStringFamily(outputType) ||
-        outputType.equalsIgnoreCase("binary")) {
+      break;
+
+    case BYTES:
       stringIndices[stringIndicesIndex]= addIndex;
       indexLookup[addIndex].setString(stringIndicesIndex);
       ++stringIndicesIndex;
-    } else if (VectorizationContext.isDecimalFamily(outputType)) {
-        decimalIndices[decimalIndicesIndex]= addIndex;
-        indexLookup[addIndex].setDecimal(decimalIndicesIndex);
-        ++decimalIndicesIndex;
-    }
-    else {
-      throw new HiveException("Unsuported vector output type: " + outputType);
+      break;
+
+    case DECIMAL:
+      decimalIndices[decimalIndicesIndex]= addIndex;
+      indexLookup[addIndex].setDecimal(decimalIndicesIndex);
+      ++decimalIndicesIndex;
+      break;
+
+    default:
+      throw new HiveException("Unexpected column vector type " + columnVectorType);
     }
+
     addIndex++;
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/09100831/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java
index 0058141..f12bfde 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java
@@ -20,6 +20,12 @@ package org.apache.hadoop.hive.ql.exec.vector;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
 
 /**
  * This class copies specified columns of a row from one VectorizedRowBatch to another.
@@ -186,7 +192,7 @@ public class VectorCopyRow {
   private CopyRow[] subRowToBatchCopiersByValue;
   private CopyRow[] subRowToBatchCopiersByReference;
 
-  public void init(VectorColumnMapping columnMapping) {
+  public void init(VectorColumnMapping columnMapping) throws HiveException {
     int count = columnMapping.getCount();
     subRowToBatchCopiersByValue = new CopyRow[count];
     subRowToBatchCopiersByReference = new CopyRow[count];
@@ -194,24 +200,35 @@ public class VectorCopyRow {
     for (int i = 0; i < count; i++) {
       int inputColumn = columnMapping.getInputColumns()[i];
       int outputColumn = columnMapping.getOutputColumns()[i];
-      String typeName = columnMapping.getTypeNames()[i];
+      String typeName = columnMapping.getTypeNames()[i].toLowerCase();
+      TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName);
+      Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo);
 
       CopyRow copyRowByValue = null;
       CopyRow copyRowByReference = null;
 
-      if (VectorizationContext.isIntFamily(typeName) ||
-          VectorizationContext.isDatetimeFamily(typeName)) {
+      switch (columnVectorType) {
+      case LONG:
         copyRowByValue = new LongCopyRow(inputColumn, outputColumn);
-      } else if (VectorizationContext.isFloatFamily(typeName)) {
+        break;
+
+      case DOUBLE:
         copyRowByValue = new DoubleCopyRow(inputColumn, outputColumn);
-      } else if (VectorizationContext.isStringFamily(typeName)) {
+        break;
+
+      case BYTES:
         copyRowByValue = new BytesCopyRowByValue(inputColumn, outputColumn);
         copyRowByReference = new BytesCopyRowByReference(inputColumn, outputColumn);
-      } else if (VectorizationContext.decimalTypePattern.matcher(typeName).matches()){
+        break;
+
+      case DECIMAL:
         copyRowByValue = new DecimalCopyRow(inputColumn, outputColumn);
-      } else {
-        throw new RuntimeException("Cannot allocate vector copy row for " + typeName);
+        break;
+
+      default:
+        throw new HiveException("Unexpected column vector type " + columnVectorType);
       }
+
       subRowToBatchCopiersByValue[i] = copyRowByValue;
       if (copyRowByReference == null) {
         subRowToBatchCopiersByReference[i] = copyRowByValue;

http://git-wip-us.apache.org/repos/asf/hive/blob/09100831/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index 61d2972..392e56d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -107,6 +107,7 @@ import org.apache.hadoop.hive.ql.udf.generic.*;
 import org.apache.hadoop.hive.serde2.io.DateWritable;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
 import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils;
@@ -2044,6 +2045,51 @@ public class VectorizationContext {
     }
   }
 
+  public static String mapTypeNameSynonyms(String typeName) {
+    typeName = typeName.toLowerCase();
+    if (typeName.equals("long")) {
+      return "bigint";
+    } else if (typeName.equals("string_family")) {
+      return "string";
+    } else {
+      return typeName;
+    }
+  }
+
+  public static ColumnVector.Type getColumnVectorTypeFromTypeInfo(TypeInfo typeInfo) throws
HiveException {
+    PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo;
+    PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
+
+    switch (primitiveCategory) {
+    case BOOLEAN:
+    case BYTE:
+    case SHORT:
+    case INT:
+    case LONG:
+    case DATE:
+    case TIMESTAMP:
+    case INTERVAL_YEAR_MONTH:
+    case INTERVAL_DAY_TIME:
+      return ColumnVector.Type.LONG;
+
+    case FLOAT:
+    case DOUBLE:
+      return ColumnVector.Type.DOUBLE;
+
+    case STRING:
+    case CHAR:
+    case VARCHAR:
+    case BINARY:
+      return ColumnVector.Type.BYTES;
+
+    case DECIMAL:
+      return ColumnVector.Type.DECIMAL;
+
+    default:
+      throw new HiveException("Unexpected primitive type category " + primitiveCategory);
+    }
+  }
+
   // TODO: When we support vectorized STRUCTs and can handle more in the reduce-side (MERGEPARTIAL):
   // TODO:   Write reduce-side versions of AVG. Currently, only map-side (HASH) versions
are in table.
   // TODO:   And, investigate if different reduce-side versions are needed for var* and std*,
or if map-side aggregate can be used..  Right now they are conservatively

http://git-wip-us.apache.org/repos/asf/hive/blob/09100831/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
index af78776..4c8c4b1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
@@ -52,6 +52,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
 import org.apache.hadoop.hive.ql.exec.vector.mapjoin.optimized.VectorMapJoinOptimizedCreateHashTable;
@@ -68,8 +69,12 @@ import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc;
 import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType;
 import org.apache.hadoop.hive.ql.plan.api.OperatorType;
 import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
 
 /**
  * This class is common operator class for native vectorized map join.
@@ -658,7 +663,7 @@ public abstract class VectorMapJoinCommonOperator extends MapJoinOperator
implem
    * Setup our 2nd batch with the same "column schema" as the big table batch that can be
used to
    * build join output results in.
    */
-  protected VectorizedRowBatch setupOverflowBatch() {
+  protected VectorizedRowBatch setupOverflowBatch() throws HiveException {
     VectorizedRowBatch overflowBatch;
 
     Map<Integer, String> scratchColumnTypeMap = vOutContext.getScratchColumnTypeMap();
@@ -701,23 +706,38 @@ public abstract class VectorMapJoinCommonOperator extends MapJoinOperator
implem
    * Allocate overflow batch columns by hand.
    */
   private void allocateOverflowBatchColumnVector(VectorizedRowBatch overflowBatch, int outputColumn,
-              String typeName) {
+              String typeName) throws HiveException {
 
     if (overflowBatch.cols[outputColumn] == null) {
-      String vectorTypeName;
-      if (VectorizationContext.isIntFamily(typeName) ||
-          VectorizationContext.isDatetimeFamily(typeName)) {
-        vectorTypeName = "long";
-      } else if (VectorizationContext.isFloatFamily(typeName)) {
-        vectorTypeName = "double";
-      } else if (VectorizationContext.isStringFamily(typeName)) {
-         vectorTypeName = "string";
-      } else if (VectorizationContext.decimalTypePattern.matcher(typeName).matches()){
-        vectorTypeName = typeName;  // Keep precision and scale.
-      } else {
-        throw new RuntimeException("Cannot determine vector type for " + typeName);
+      typeName = VectorizationContext.mapTypeNameSynonyms(typeName);
+
+      String columnVectorTypeName;
+
+      TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName);
+      Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo);
+
+      switch (columnVectorType) {
+      case LONG:
+        columnVectorTypeName = "long";
+        break;
+
+      case DOUBLE:
+        columnVectorTypeName = "double";
+        break;
+
+      case BYTES:
+        columnVectorTypeName = "string";
+        break;
+
+      case DECIMAL:
+        columnVectorTypeName = typeName;  // Keep precision and scale.
+        break;
+
+      default:
+        throw new HiveException("Unexpected column vector type " + columnVectorType);
       }
-      overflowBatch.cols[outputColumn] = VectorizedRowBatchCtx.allocateColumnVector(vectorTypeName,
VectorizedRowBatch.DEFAULT_SIZE);
+
+      overflowBatch.cols[outputColumn] = VectorizedRowBatchCtx.allocateColumnVector(columnVectorTypeName,
VectorizedRowBatch.DEFAULT_SIZE);
 
       if (LOG.isDebugEnabled()) {
         LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator initializeOp
overflowBatch outputColumn " + outputColumn + " class " + overflowBatch.cols[outputColumn].getClass().getSimpleName());

http://git-wip-us.apache.org/repos/asf/hive/blob/09100831/ql/src/test/queries/clientpositive/vector_interval_mapjoin.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_interval_mapjoin.q b/ql/src/test/queries/clientpositive/vector_interval_mapjoin.q
new file mode 100644
index 0000000..9a58658
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_interval_mapjoin.q
@@ -0,0 +1,87 @@
+SET hive.vectorized.execution.enabled=true;
+SET hive.auto.convert.join=true;
+
+create table vectortab_a_1k(
+            t tinyint,
+            si smallint,
+            i int,
+            b bigint,
+            f float,
+            d double,
+            dc decimal(38,18),
+            bo boolean,
+            s string,
+            s2 string,
+            ts timestamp,
+            ts2 timestamp,
+            dt date)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/vectortab_a_1k' OVERWRITE INTO TABLE vectortab_a_1k;
+
+CREATE TABLE vectortab_a_1korc STORED AS ORC AS SELECT * FROM vectortab_a_1k;
+
+create table vectortab_b_1k(
+            t tinyint,
+            si smallint,
+            i int,
+            b bigint,
+            f float,
+            d double,
+            dc decimal(38,18),
+            bo boolean,
+            s string,
+            s2 string,
+            ts timestamp,
+            ts2 timestamp,
+            dt date)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/vectortab_b_1k' OVERWRITE INTO TABLE vectortab_b_1k;
+
+CREATE TABLE vectortab_b_1korc STORED AS ORC AS SELECT * FROM vectortab_b_1k;
+
+explain
+select
+   v1.s,
+   v2.s,
+   v1.intrvl1 
+from
+   ( select
+      s,
+      (cast(dt as date) - cast(ts as date)) as intrvl1 
+   from
+      vectortab_a_1korc ) v1 
+join
+   (
+      select
+         s ,
+         (cast(dt as date) - cast(ts as date)) as intrvl2 
+      from
+         vectortab_b_1korc 
+   ) v2 
+      on v1.intrvl1 = v2.intrvl2 
+      and v1.s = v2.s;
+
+select
+   v1.s,
+   v2.s,
+   v1.intrvl1 
+from
+   ( select
+      s,
+      (cast(dt as date) - cast(ts as date)) as intrvl1 
+   from
+      vectortab_a_1korc ) v1 
+join
+   (
+      select
+         s ,
+         (cast(dt as date) - cast(ts as date)) as intrvl2 
+      from
+         vectortab_b_1korc 
+   ) v2 
+      on v1.intrvl1 = v2.intrvl2 
+      and v1.s = v2.s;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/09100831/ql/src/test/results/clientpositive/tez/vector_interval_mapjoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_interval_mapjoin.q.out b/ql/src/test/results/clientpositive/tez/vector_interval_mapjoin.q.out
new file mode 100644
index 0000000..b4d3477
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/vector_interval_mapjoin.q.out
@@ -0,0 +1,281 @@
+PREHOOK: query: create table vectortab_a_1k(
+            t tinyint,
+            si smallint,
+            i int,
+            b bigint,
+            f float,
+            d double,
+            dc decimal(38,18),
+            bo boolean,
+            s string,
+            s2 string,
+            ts timestamp,
+            ts2 timestamp,
+            dt date)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@vectortab_a_1k
+POSTHOOK: query: create table vectortab_a_1k(
+            t tinyint,
+            si smallint,
+            i int,
+            b bigint,
+            f float,
+            d double,
+            dc decimal(38,18),
+            bo boolean,
+            s string,
+            s2 string,
+            ts timestamp,
+            ts2 timestamp,
+            dt date)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@vectortab_a_1k
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab_a_1k' OVERWRITE INTO TABLE
vectortab_a_1k
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@vectortab_a_1k
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab_a_1k' OVERWRITE INTO
TABLE vectortab_a_1k
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@vectortab_a_1k
+PREHOOK: query: CREATE TABLE vectortab_a_1korc STORED AS ORC AS SELECT * FROM vectortab_a_1k
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@vectortab_a_1k
+PREHOOK: Output: database:default
+PREHOOK: Output: default@vectortab_a_1korc
+POSTHOOK: query: CREATE TABLE vectortab_a_1korc STORED AS ORC AS SELECT * FROM vectortab_a_1k
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@vectortab_a_1k
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@vectortab_a_1korc
+PREHOOK: query: create table vectortab_b_1k(
+            t tinyint,
+            si smallint,
+            i int,
+            b bigint,
+            f float,
+            d double,
+            dc decimal(38,18),
+            bo boolean,
+            s string,
+            s2 string,
+            ts timestamp,
+            ts2 timestamp,
+            dt date)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@vectortab_b_1k
+POSTHOOK: query: create table vectortab_b_1k(
+            t tinyint,
+            si smallint,
+            i int,
+            b bigint,
+            f float,
+            d double,
+            dc decimal(38,18),
+            bo boolean,
+            s string,
+            s2 string,
+            ts timestamp,
+            ts2 timestamp,
+            dt date)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@vectortab_b_1k
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab_b_1k' OVERWRITE INTO TABLE
vectortab_b_1k
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@vectortab_b_1k
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab_b_1k' OVERWRITE INTO
TABLE vectortab_b_1k
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@vectortab_b_1k
+PREHOOK: query: CREATE TABLE vectortab_b_1korc STORED AS ORC AS SELECT * FROM vectortab_b_1k
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@vectortab_b_1k
+PREHOOK: Output: database:default
+PREHOOK: Output: default@vectortab_b_1korc
+POSTHOOK: query: CREATE TABLE vectortab_b_1korc STORED AS ORC AS SELECT * FROM vectortab_b_1k
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@vectortab_b_1k
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@vectortab_b_1korc
+PREHOOK: query: explain
+select
+   v1.s,
+   v2.s,
+   v1.intrvl1 
+from
+   ( select
+      s,
+      (cast(dt as date) - cast(ts as date)) as intrvl1 
+   from
+      vectortab_a_1korc ) v1 
+join
+   (
+      select
+         s ,
+         (cast(dt as date) - cast(ts as date)) as intrvl2 
+      from
+         vectortab_b_1korc 
+   ) v2 
+      on v1.intrvl1 = v2.intrvl2 
+      and v1.s = v2.s
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+   v1.s,
+   v2.s,
+   v1.intrvl1 
+from
+   ( select
+      s,
+      (cast(dt as date) - cast(ts as date)) as intrvl1 
+   from
+      vectortab_a_1korc ) v1 
+join
+   (
+      select
+         s ,
+         (cast(dt as date) - cast(ts as date)) as intrvl2 
+      from
+         vectortab_b_1korc 
+   ) v2 
+      on v1.intrvl1 = v2.intrvl2 
+      and v1.s = v2.s
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Map 1 <- Map 2 (BROADCAST_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: vectortab_a_1korc
+                  Statistics: Num rows: 1000 Data size: 460264 Basic stats: COMPLETE Column
stats: NONE
+                  Filter Operator
+                    predicate: s is not null (type: boolean)
+                    Statistics: Num rows: 500 Data size: 230132 Basic stats: COMPLETE Column
stats: NONE
+                    Select Operator
+                      expressions: s (type: string), (dt - CAST( ts AS DATE)) (type: interval_day_time)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 500 Data size: 230132 Basic stats: COMPLETE Column
stats: NONE
+                      Filter Operator
+                        predicate: _col1 is not null (type: boolean)
+                        Statistics: Num rows: 250 Data size: 115066 Basic stats: COMPLETE
Column stats: NONE
+                        Map Join Operator
+                          condition map:
+                               Inner Join 0 to 1
+                          keys:
+                            0 _col1 (type: interval_day_time), _col0 (type: string)
+                            1 _col1 (type: interval_day_time), _col0 (type: string)
+                          outputColumnNames: _col0, _col1, _col2
+                          input vertices:
+                            1 Map 2
+                          Statistics: Num rows: 275 Data size: 126572 Basic stats: COMPLETE
Column stats: NONE
+                          HybridGraceHashJoin: true
+                          Select Operator
+                            expressions: _col0 (type: string), _col2 (type: string), _col1
(type: interval_day_time)
+                            outputColumnNames: _col0, _col1, _col2
+                            Statistics: Num rows: 275 Data size: 126572 Basic stats: COMPLETE
Column stats: NONE
+                            File Output Operator
+                              compressed: false
+                              Statistics: Num rows: 275 Data size: 126572 Basic stats: COMPLETE
Column stats: NONE
+                              table:
+                                  input format: org.apache.hadoop.mapred.TextInputFormat
+                                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized
+        Map 2 
+            Map Operator Tree:
+                TableScan
+                  alias: vectortab_b_1korc
+                  Statistics: Num rows: 1000 Data size: 458448 Basic stats: COMPLETE Column
stats: NONE
+                  Filter Operator
+                    predicate: s is not null (type: boolean)
+                    Statistics: Num rows: 500 Data size: 229224 Basic stats: COMPLETE Column
stats: NONE
+                    Select Operator
+                      expressions: s (type: string), (dt - CAST( ts AS DATE)) (type: interval_day_time)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 500 Data size: 229224 Basic stats: COMPLETE Column
stats: NONE
+                      Filter Operator
+                        predicate: _col1 is not null (type: boolean)
+                        Statistics: Num rows: 250 Data size: 114612 Basic stats: COMPLETE
Column stats: NONE
+                        Reduce Output Operator
+                          key expressions: _col1 (type: interval_day_time), _col0 (type:
string)
+                          sort order: ++
+                          Map-reduce partition columns: _col1 (type: interval_day_time),
_col0 (type: string)
+                          Statistics: Num rows: 250 Data size: 114612 Basic stats: COMPLETE
Column stats: NONE
+            Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select
+   v1.s,
+   v2.s,
+   v1.intrvl1 
+from
+   ( select
+      s,
+      (cast(dt as date) - cast(ts as date)) as intrvl1 
+   from
+      vectortab_a_1korc ) v1 
+join
+   (
+      select
+         s ,
+         (cast(dt as date) - cast(ts as date)) as intrvl2 
+      from
+         vectortab_b_1korc 
+   ) v2 
+      on v1.intrvl1 = v2.intrvl2 
+      and v1.s = v2.s
+PREHOOK: type: QUERY
+PREHOOK: Input: default@vectortab_a_1korc
+PREHOOK: Input: default@vectortab_b_1korc
+#### A masked pattern was here ####
+POSTHOOK: query: select
+   v1.s,
+   v2.s,
+   v1.intrvl1 
+from
+   ( select
+      s,
+      (cast(dt as date) - cast(ts as date)) as intrvl1 
+   from
+      vectortab_a_1korc ) v1 
+join
+   (
+      select
+         s ,
+         (cast(dt as date) - cast(ts as date)) as intrvl2 
+      from
+         vectortab_b_1korc 
+   ) v2 
+      on v1.intrvl1 = v2.intrvl2 
+      and v1.s = v2.s
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@vectortab_a_1korc
+POSTHOOK: Input: default@vectortab_b_1korc
+#### A masked pattern was here ####

http://git-wip-us.apache.org/repos/asf/hive/blob/09100831/ql/src/test/results/clientpositive/vector_interval_mapjoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_interval_mapjoin.q.out b/ql/src/test/results/clientpositive/vector_interval_mapjoin.q.out
new file mode 100644
index 0000000..976091b
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_interval_mapjoin.q.out
@@ -0,0 +1,281 @@
+PREHOOK: query: create table vectortab_a_1k(
+            t tinyint,
+            si smallint,
+            i int,
+            b bigint,
+            f float,
+            d double,
+            dc decimal(38,18),
+            bo boolean,
+            s string,
+            s2 string,
+            ts timestamp,
+            ts2 timestamp,
+            dt date)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@vectortab_a_1k
+POSTHOOK: query: create table vectortab_a_1k(
+            t tinyint,
+            si smallint,
+            i int,
+            b bigint,
+            f float,
+            d double,
+            dc decimal(38,18),
+            bo boolean,
+            s string,
+            s2 string,
+            ts timestamp,
+            ts2 timestamp,
+            dt date)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@vectortab_a_1k
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab_a_1k' OVERWRITE INTO TABLE
vectortab_a_1k
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@vectortab_a_1k
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab_a_1k' OVERWRITE INTO
TABLE vectortab_a_1k
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@vectortab_a_1k
+PREHOOK: query: CREATE TABLE vectortab_a_1korc STORED AS ORC AS SELECT * FROM vectortab_a_1k
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@vectortab_a_1k
+PREHOOK: Output: database:default
+PREHOOK: Output: default@vectortab_a_1korc
+POSTHOOK: query: CREATE TABLE vectortab_a_1korc STORED AS ORC AS SELECT * FROM vectortab_a_1k
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@vectortab_a_1k
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@vectortab_a_1korc
+PREHOOK: query: create table vectortab_b_1k(
+            t tinyint,
+            si smallint,
+            i int,
+            b bigint,
+            f float,
+            d double,
+            dc decimal(38,18),
+            bo boolean,
+            s string,
+            s2 string,
+            ts timestamp,
+            ts2 timestamp,
+            dt date)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@vectortab_b_1k
+POSTHOOK: query: create table vectortab_b_1k(
+            t tinyint,
+            si smallint,
+            i int,
+            b bigint,
+            f float,
+            d double,
+            dc decimal(38,18),
+            bo boolean,
+            s string,
+            s2 string,
+            ts timestamp,
+            ts2 timestamp,
+            dt date)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@vectortab_b_1k
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab_b_1k' OVERWRITE INTO TABLE
vectortab_b_1k
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@vectortab_b_1k
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab_b_1k' OVERWRITE INTO
TABLE vectortab_b_1k
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@vectortab_b_1k
+PREHOOK: query: CREATE TABLE vectortab_b_1korc STORED AS ORC AS SELECT * FROM vectortab_b_1k
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@vectortab_b_1k
+PREHOOK: Output: database:default
+PREHOOK: Output: default@vectortab_b_1korc
+POSTHOOK: query: CREATE TABLE vectortab_b_1korc STORED AS ORC AS SELECT * FROM vectortab_b_1k
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@vectortab_b_1k
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@vectortab_b_1korc
+PREHOOK: query: explain
+select
+   v1.s,
+   v2.s,
+   v1.intrvl1 
+from
+   ( select
+      s,
+      (cast(dt as date) - cast(ts as date)) as intrvl1 
+   from
+      vectortab_a_1korc ) v1 
+join
+   (
+      select
+         s ,
+         (cast(dt as date) - cast(ts as date)) as intrvl2 
+      from
+         vectortab_b_1korc 
+   ) v2 
+      on v1.intrvl1 = v2.intrvl2 
+      and v1.s = v2.s
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+   v1.s,
+   v2.s,
+   v1.intrvl1 
+from
+   ( select
+      s,
+      (cast(dt as date) - cast(ts as date)) as intrvl1 
+   from
+      vectortab_a_1korc ) v1 
+join
+   (
+      select
+         s ,
+         (cast(dt as date) - cast(ts as date)) as intrvl2 
+      from
+         vectortab_b_1korc 
+   ) v2 
+      on v1.intrvl1 = v2.intrvl2 
+      and v1.s = v2.s
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-4 is a root stage
+  Stage-3 depends on stages: Stage-4
+  Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+  Stage: Stage-4
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        v2:vectortab_b_1korc 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        v2:vectortab_b_1korc 
+          TableScan
+            alias: vectortab_b_1korc
+            Statistics: Num rows: 1000 Data size: 458448 Basic stats: COMPLETE Column stats:
NONE
+            Filter Operator
+              predicate: s is not null (type: boolean)
+              Statistics: Num rows: 500 Data size: 229224 Basic stats: COMPLETE Column stats:
NONE
+              Select Operator
+                expressions: s (type: string), (dt - CAST( ts AS DATE)) (type: interval_day_time)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 500 Data size: 229224 Basic stats: COMPLETE Column
stats: NONE
+                Filter Operator
+                  predicate: _col1 is not null (type: boolean)
+                  Statistics: Num rows: 250 Data size: 114612 Basic stats: COMPLETE Column
stats: NONE
+                  HashTable Sink Operator
+                    keys:
+                      0 _col1 (type: interval_day_time), _col0 (type: string)
+                      1 _col1 (type: interval_day_time), _col0 (type: string)
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: vectortab_a_1korc
+            Statistics: Num rows: 1000 Data size: 460264 Basic stats: COMPLETE Column stats:
NONE
+            Filter Operator
+              predicate: s is not null (type: boolean)
+              Statistics: Num rows: 500 Data size: 230132 Basic stats: COMPLETE Column stats:
NONE
+              Select Operator
+                expressions: s (type: string), (dt - CAST( ts AS DATE)) (type: interval_day_time)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 500 Data size: 230132 Basic stats: COMPLETE Column
stats: NONE
+                Filter Operator
+                  predicate: _col1 is not null (type: boolean)
+                  Statistics: Num rows: 250 Data size: 115066 Basic stats: COMPLETE Column
stats: NONE
+                  Map Join Operator
+                    condition map:
+                         Inner Join 0 to 1
+                    keys:
+                      0 _col1 (type: interval_day_time), _col0 (type: string)
+                      1 _col1 (type: interval_day_time), _col0 (type: string)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 275 Data size: 126572 Basic stats: COMPLETE Column
stats: NONE
+                    Select Operator
+                      expressions: _col0 (type: string), _col2 (type: string), _col1 (type:
interval_day_time)
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 275 Data size: 126572 Basic stats: COMPLETE Column
stats: NONE
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 275 Data size: 126572 Basic stats: COMPLETE
Column stats: NONE
+                        table:
+                            input format: org.apache.hadoop.mapred.TextInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Local Work:
+        Map Reduce Local Work
+      Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select
+   v1.s,
+   v2.s,
+   v1.intrvl1 
+from
+   ( select
+      s,
+      (cast(dt as date) - cast(ts as date)) as intrvl1 
+   from
+      vectortab_a_1korc ) v1 
+join
+   (
+      select
+         s ,
+         (cast(dt as date) - cast(ts as date)) as intrvl2 
+      from
+         vectortab_b_1korc 
+   ) v2 
+      on v1.intrvl1 = v2.intrvl2 
+      and v1.s = v2.s
+PREHOOK: type: QUERY
+PREHOOK: Input: default@vectortab_a_1korc
+PREHOOK: Input: default@vectortab_b_1korc
+#### A masked pattern was here ####
+POSTHOOK: query: select
+   v1.s,
+   v2.s,
+   v1.intrvl1 
+from
+   ( select
+      s,
+      (cast(dt as date) - cast(ts as date)) as intrvl1 
+   from
+      vectortab_a_1korc ) v1 
+join
+   (
+      select
+         s ,
+         (cast(dt as date) - cast(ts as date)) as intrvl2 
+      from
+         vectortab_b_1korc 
+   ) v2 
+      on v1.intrvl1 = v2.intrvl2 
+      and v1.s = v2.s
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@vectortab_a_1korc
+POSTHOOK: Input: default@vectortab_b_1korc
+#### A masked pattern was here ####


Mime
View raw message