hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From prasan...@apache.org
Subject [30/39] hive git commit: HIVE-10591: Support limited integer type promotion in ORC (Prasanth Jayachandran reviewed by Gunther Hagleitner)
Date Tue, 12 May 2015 01:24:16 GMT
HIVE-10591: Support limited integer type promotion in ORC (Prasanth Jayachandran reviewed by
Gunther Hagleitner)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/08b0708b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/08b0708b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/08b0708b

Branch: refs/heads/llap
Commit: 08b0708b42da93a3d576210c4a2e8f6b286b12fa
Parents: 3e713bc
Author: Prasanth Jayachandran <j.prasanth.j@gmail.com>
Authored: Sat May 9 16:57:36 2015 -0700
Committer: Prasanth Jayachandran <j.prasanth.j@gmail.com>
Committed: Sat May 9 16:57:36 2015 -0700

----------------------------------------------------------------------
 .../ql/io/orc/ConversionTreeReaderFactory.java  |  38 ++
 .../hadoop/hive/ql/io/orc/OrcRecordUpdater.java |  24 +-
 .../hive/ql/io/orc/RecordReaderFactory.java     | 269 +++++++++++++
 .../hadoop/hive/ql/io/orc/RecordReaderImpl.java |   2 +-
 .../clientpositive/orc_int_type_promotion.q     |  79 ++++
 .../clientpositive/orc_int_type_promotion.q.out | 377 +++++++++++++++++++
 6 files changed, 779 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/08b0708b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ConversionTreeReaderFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ConversionTreeReaderFactory.java
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ConversionTreeReaderFactory.java
new file mode 100644
index 0000000..aaf4eb4
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ConversionTreeReaderFactory.java
@@ -0,0 +1,38 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import java.io.IOException;
+import java.util.List;
+
+/**
+ * Factory for creating ORC tree readers. These tree readers can handle type promotions and
type
+ * conversions.
+ */
+public class ConversionTreeReaderFactory extends TreeReaderFactory {
+
+  // TODO: This is currently only a place holder for type conversions.
+
+  public static TreeReader createTreeReader(int columnId,
+      List<OrcProto.Type> types,
+      boolean[] included,
+      boolean skipCorrupt
+  ) throws IOException {
+    return TreeReaderFactory.createTreeReader(columnId, types, included, skipCorrupt);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/08b0708b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java
index b62aa17..b576496 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java
@@ -17,7 +17,14 @@
  */
 package org.apache.hadoop.hive.ql.io.orc;
 
-import com.google.common.annotations.VisibleForTesting;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.charset.CharacterCodingException;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.util.ArrayList;
+import java.util.List;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -37,14 +44,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectIn
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
 
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.nio.charset.CharacterCodingException;
-import java.nio.charset.Charset;
-import java.nio.charset.CharsetDecoder;
-
-import java.util.ArrayList;
-import java.util.List;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.Lists;
 
 /**
  * A RecordUpdater where the files are stored as ORC.
@@ -205,6 +206,11 @@ public class OrcRecordUpdater implements RecordUpdater {
     return new OrcStruct.OrcStructInspector(fields);
   }
 
+  public static List<String> getAcidEventFields() {
+    return Lists.newArrayList("operation", "originalTransaction", "bucket", "rowId",
+        "currentTransaction", "row");
+  }
+
   OrcRecordUpdater(Path path,
                    AcidOutputFormat.Options options) throws IOException {
     this.options = options;

http://git-wip-us.apache.org/repos/asf/hive/blob/08b0708b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderFactory.java
new file mode 100644
index 0000000..8740ee6
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderFactory.java
@@ -0,0 +1,269 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
+
+import com.google.common.collect.Lists;
+
+/**
+ * Factory to create ORC tree readers. It also compares file schema with schema specified
on read
+ * to see if type promotions are possible.
+ */
+public class RecordReaderFactory {
+  static final Log LOG = LogFactory.getLog(RecordReaderFactory.class);
+  private static final boolean isLogInfoEnabled = LOG.isInfoEnabled();
+
+  public static TreeReaderFactory.TreeReader createTreeReader(int colId,
+      Configuration conf,
+      List<OrcProto.Type> fileSchema,
+      boolean[] included,
+      boolean skipCorrupt) throws IOException {
+    final boolean isAcid = checkAcidSchema(fileSchema);
+    final List<OrcProto.Type> originalFileSchema;
+    if (isAcid) {
+      originalFileSchema = fileSchema.subList(fileSchema.get(0).getSubtypesCount(),
+          fileSchema.size());
+    } else {
+      originalFileSchema = fileSchema;
+    }
+    final int numCols = originalFileSchema.get(0).getSubtypesCount();
+    List<OrcProto.Type> schemaOnRead = getSchemaOnRead(numCols, conf);
+    List<OrcProto.Type> schemaUsed = getMatchingSchema(fileSchema, schemaOnRead);
+    if (schemaUsed == null) {
+      return TreeReaderFactory.createTreeReader(colId, fileSchema, included, skipCorrupt);
+    } else {
+      return ConversionTreeReaderFactory.createTreeReader(colId, schemaUsed, included, skipCorrupt);
+    }
+  }
+
+  private static boolean checkAcidSchema(List<OrcProto.Type> fileSchema) {
+    if (fileSchema.get(0).getKind().equals(OrcProto.Type.Kind.STRUCT)) {
+      List<String> acidFields = OrcRecordUpdater.getAcidEventFields();
+      List<String> rootFields = fileSchema.get(0).getFieldNamesList();
+      if (acidFields.equals(rootFields)) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  private static List<OrcProto.Type> getMatchingSchema(List<OrcProto.Type> fileSchema,
+      List<OrcProto.Type> schemaOnRead) {
+    if (schemaOnRead == null) {
+      if (isLogInfoEnabled) {
+        LOG.info("Schema is not specified on read. Using file schema.");
+      }
+      return null;
+    }
+
+    if (fileSchema.size() != schemaOnRead.size()) {
+      if (isLogInfoEnabled) {
+        LOG.info("Schema on read column count does not match file schema's column count."
+
+            " Falling back to using file schema.");
+      }
+      return null;
+    } else {
+      List<OrcProto.Type> result = Lists.newArrayList(fileSchema);
+      // check type promotion. ORC can only support type promotions for integer types
+      // short -> int -> bigint as same integer readers are used for the above types.
+      boolean canPromoteType = false;
+      for (int i = 0; i < fileSchema.size(); i++) {
+        OrcProto.Type fColType = fileSchema.get(i);
+        OrcProto.Type rColType = schemaOnRead.get(i);
+        if (!fColType.getKind().equals(rColType.getKind())) {
+
+          if (fColType.getKind().equals(OrcProto.Type.Kind.SHORT)) {
+
+            if (rColType.getKind().equals(OrcProto.Type.Kind.INT) ||
+                rColType.getKind().equals(OrcProto.Type.Kind.LONG)) {
+              // type promotion possible, converting SHORT to INT/LONG requested type
+              result.set(i, result.get(i).toBuilder().setKind(rColType.getKind()).build());
+              canPromoteType = true;
+            } else {
+              canPromoteType = false;
+            }
+
+          } else if (fColType.getKind().equals(OrcProto.Type.Kind.INT)) {
+
+            if (rColType.getKind().equals(OrcProto.Type.Kind.LONG)) {
+              // type promotion possible, converting INT to LONG requested type
+              result.set(i, result.get(i).toBuilder().setKind(rColType.getKind()).build());
+              canPromoteType = true;
+            } else {
+              canPromoteType = false;
+            }
+
+          } else {
+            canPromoteType = false;
+          }
+        }
+      }
+
+      if (canPromoteType) {
+        if (isLogInfoEnabled) {
+          LOG.info("Integer type promotion happened in ORC record reader. Using promoted
schema.");
+        }
+        return result;
+      }
+    }
+
+    return null;
+  }
+
+  private static List<OrcProto.Type> getSchemaOnRead(int numCols, Configuration conf)
{
+    String columnTypeProperty = conf.get(serdeConstants.LIST_COLUMN_TYPES);
+    final String columnNameProperty = conf.get(serdeConstants.LIST_COLUMNS);
+    if (columnTypeProperty == null || columnNameProperty == null) {
+      return null;
+    }
+
+    ArrayList<String> columnNames = Lists.newArrayList(columnNameProperty.split(","));
+    ArrayList<TypeInfo> fieldTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
+    StructTypeInfo structTypeInfo = new StructTypeInfo();
+    // Column types from conf includes virtual and partition columns at the end. We consider
only
+    // the actual columns in the file.
+    structTypeInfo.setAllStructFieldNames(Lists.newArrayList(columnNames.subList(0, numCols)));
+    structTypeInfo.setAllStructFieldTypeInfos(Lists.newArrayList(fieldTypes.subList(0, numCols)));
+    ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(structTypeInfo);
+    return getOrcTypes(oi);
+  }
+
+  private static List<OrcProto.Type> getOrcTypes(ObjectInspector inspector) {
+    List<OrcProto.Type> result = Lists.newArrayList();
+    getOrcTypesImpl(result, inspector);
+    return result;
+  }
+
+  private static void getOrcTypesImpl(List<OrcProto.Type> result, ObjectInspector inspector)
{
+    OrcProto.Type.Builder type = OrcProto.Type.newBuilder();
+    switch (inspector.getCategory()) {
+      case PRIMITIVE:
+        switch (((PrimitiveObjectInspector) inspector).getPrimitiveCategory()) {
+          case BOOLEAN:
+            type.setKind(OrcProto.Type.Kind.BOOLEAN);
+            break;
+          case BYTE:
+            type.setKind(OrcProto.Type.Kind.BYTE);
+            break;
+          case SHORT:
+            type.setKind(OrcProto.Type.Kind.SHORT);
+            break;
+          case INT:
+            type.setKind(OrcProto.Type.Kind.INT);
+            break;
+          case LONG:
+            type.setKind(OrcProto.Type.Kind.LONG);
+            break;
+          case FLOAT:
+            type.setKind(OrcProto.Type.Kind.FLOAT);
+            break;
+          case DOUBLE:
+            type.setKind(OrcProto.Type.Kind.DOUBLE);
+            break;
+          case STRING:
+            type.setKind(OrcProto.Type.Kind.STRING);
+            break;
+          case CHAR:
+            // The char length needs to be written to file and should be available
+            // from the object inspector
+            CharTypeInfo charTypeInfo = (CharTypeInfo) ((PrimitiveObjectInspector) inspector)
+                .getTypeInfo();
+            type.setKind(OrcProto.Type.Kind.CHAR);
+            type.setMaximumLength(charTypeInfo.getLength());
+            break;
+          case VARCHAR:
+            // The varchar length needs to be written to file and should be available
+            // from the object inspector
+            VarcharTypeInfo typeInfo = (VarcharTypeInfo) ((PrimitiveObjectInspector) inspector)
+                .getTypeInfo();
+            type.setKind(OrcProto.Type.Kind.VARCHAR);
+            type.setMaximumLength(typeInfo.getLength());
+            break;
+          case BINARY:
+            type.setKind(OrcProto.Type.Kind.BINARY);
+            break;
+          case TIMESTAMP:
+            type.setKind(OrcProto.Type.Kind.TIMESTAMP);
+            break;
+          case DATE:
+            type.setKind(OrcProto.Type.Kind.DATE);
+            break;
+          case DECIMAL:
+            DecimalTypeInfo decTypeInfo = (DecimalTypeInfo) ((PrimitiveObjectInspector) inspector)
+                .getTypeInfo();
+            type.setKind(OrcProto.Type.Kind.DECIMAL);
+            type.setPrecision(decTypeInfo.precision());
+            type.setScale(decTypeInfo.scale());
+            break;
+          default:
+            throw new IllegalArgumentException("Unknown primitive category: " +
+                ((PrimitiveObjectInspector) inspector).getPrimitiveCategory());
+        }
+        result.add(type.build());
+        break;
+      case LIST:
+        type.setKind(OrcProto.Type.Kind.LIST);
+        result.add(type.build());
+        getOrcTypesImpl(result, ((ListObjectInspector) inspector).getListElementObjectInspector());
+        break;
+      case MAP:
+        type.setKind(OrcProto.Type.Kind.MAP);
+        result.add(type.build());
+        getOrcTypesImpl(result, ((MapObjectInspector) inspector).getMapKeyObjectInspector());
+        getOrcTypesImpl(result, ((MapObjectInspector) inspector).getMapValueObjectInspector());
+        break;
+      case STRUCT:
+        type.setKind(OrcProto.Type.Kind.STRUCT);
+        result.add(type.build());
+        for (StructField field : ((StructObjectInspector) inspector).getAllStructFieldRefs())
{
+          getOrcTypesImpl(result, field.getFieldObjectInspector());
+        }
+        break;
+      case UNION:
+        type.setKind(OrcProto.Type.Kind.UNION);
+        result.add(type.build());
+        for (ObjectInspector oi : ((UnionObjectInspector) inspector).getObjectInspectors())
{
+          getOrcTypesImpl(result, oi);
+        }
+        break;
+      default:
+        throw new IllegalArgumentException("Unknown category: " + inspector.getCategory());
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/08b0708b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
index a5a5943..58e19cb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
@@ -199,7 +199,7 @@ class RecordReaderImpl implements RecordReader {
     firstRow = skippedRows;
     totalRowCount = rows;
     boolean skipCorrupt = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_ORC_SKIP_CORRUPT_DATA);
-    reader = TreeReaderFactory.createTreeReader(0, types, included, skipCorrupt);
+    reader = RecordReaderFactory.createTreeReader(0, conf, types, included, skipCorrupt);
     indexes = new OrcProto.RowIndex[types.size()];
     bloomFilterIndices = new OrcProto.BloomFilterIndex[types.size()];
     advanceToNextRow(reader, 0L, true);

http://git-wip-us.apache.org/repos/asf/hive/blob/08b0708b/ql/src/test/queries/clientpositive/orc_int_type_promotion.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/orc_int_type_promotion.q b/ql/src/test/queries/clientpositive/orc_int_type_promotion.q
new file mode 100644
index 0000000..4a805a0
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/orc_int_type_promotion.q
@@ -0,0 +1,79 @@
+create table if not exists alltypes (
+ bo boolean,
+ ti tinyint,
+ si smallint,
+ i int,
+ bi bigint,
+ f float,
+ d double,
+ de decimal(10,3),
+ ts timestamp,
+ da date,
+ s string,
+ c char(5),
+ vc varchar(5),
+ m map<string, string>,
+ l array<int>,
+ st struct<c1:int, c2:string>
+) row format delimited fields terminated by '|'
+collection items terminated by ','
+map keys terminated by ':' stored as textfile;
+
+create table if not exists alltypes_orc (
+ bo boolean,
+ ti tinyint,
+ si smallint,
+ i int,
+ bi bigint,
+ f float,
+ d double,
+ de decimal(10,3),
+ ts timestamp,
+ da date,
+ s string,
+ c char(5),
+ vc varchar(5),
+ m map<string, string>,
+ l array<int>,
+ st struct<c1:int, c2:string>
+) stored as orc;
+
+load data local inpath '../../data/files/alltypes2.txt' overwrite into table alltypes;
+
+insert overwrite table alltypes_orc select * from alltypes;
+
+select * from alltypes_orc;
+
+alter table alltypes_orc change si si int;
+select * from alltypes_orc;
+
+alter table alltypes_orc change si si bigint;
+alter table alltypes_orc change i i bigint;
+select * from alltypes_orc;
+
+alter table alltypes_orc change l l array<bigint>;
+select * from alltypes_orc;
+
+set hive.vectorized.execution.enabled=true;
+set hive.fetch.task.conversion=none;
+alter table alltypes_orc change si si smallint;
+alter table alltypes_orc change i i int;
+
+explain select ti, si, i, bi from alltypes_orc;
+select ti, si, i, bi from alltypes_orc;
+
+alter table alltypes_orc change si si int;
+select ti, si, i, bi from alltypes_orc;
+
+alter table alltypes_orc change si si bigint;
+alter table alltypes_orc change i i bigint;
+select ti, si, i, bi from alltypes_orc;
+
+set hive.exec.dynamic.partition.mode=nonstrict;
+create table src_part_orc (key int, value string) partitioned by (ds string) stored as orc;
+insert overwrite table src_part_orc partition(ds) select key, value, ds from srcpart where
ds is not null;
+
+select * from src_part_orc limit 10;
+
+alter table src_part_orc change key key bigint;
+select * from src_part_orc limit 10;

http://git-wip-us.apache.org/repos/asf/hive/blob/08b0708b/ql/src/test/results/clientpositive/orc_int_type_promotion.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/orc_int_type_promotion.q.out b/ql/src/test/results/clientpositive/orc_int_type_promotion.q.out
new file mode 100644
index 0000000..d26dff2
--- /dev/null
+++ b/ql/src/test/results/clientpositive/orc_int_type_promotion.q.out
@@ -0,0 +1,377 @@
+PREHOOK: query: create table if not exists alltypes (
+ bo boolean,
+ ti tinyint,
+ si smallint,
+ i int,
+ bi bigint,
+ f float,
+ d double,
+ de decimal(10,3),
+ ts timestamp,
+ da date,
+ s string,
+ c char(5),
+ vc varchar(5),
+ m map<string, string>,
+ l array<int>,
+ st struct<c1:int, c2:string>
+) row format delimited fields terminated by '|'
+collection items terminated by ','
+map keys terminated by ':' stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@alltypes
+POSTHOOK: query: create table if not exists alltypes (
+ bo boolean,
+ ti tinyint,
+ si smallint,
+ i int,
+ bi bigint,
+ f float,
+ d double,
+ de decimal(10,3),
+ ts timestamp,
+ da date,
+ s string,
+ c char(5),
+ vc varchar(5),
+ m map<string, string>,
+ l array<int>,
+ st struct<c1:int, c2:string>
+) row format delimited fields terminated by '|'
+collection items terminated by ','
+map keys terminated by ':' stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@alltypes
+PREHOOK: query: create table if not exists alltypes_orc (
+ bo boolean,
+ ti tinyint,
+ si smallint,
+ i int,
+ bi bigint,
+ f float,
+ d double,
+ de decimal(10,3),
+ ts timestamp,
+ da date,
+ s string,
+ c char(5),
+ vc varchar(5),
+ m map<string, string>,
+ l array<int>,
+ st struct<c1:int, c2:string>
+) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@alltypes_orc
+POSTHOOK: query: create table if not exists alltypes_orc (
+ bo boolean,
+ ti tinyint,
+ si smallint,
+ i int,
+ bi bigint,
+ f float,
+ d double,
+ de decimal(10,3),
+ ts timestamp,
+ da date,
+ s string,
+ c char(5),
+ vc varchar(5),
+ m map<string, string>,
+ l array<int>,
+ st struct<c1:int, c2:string>
+) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@alltypes_orc
+PREHOOK: query: load data local inpath '../../data/files/alltypes2.txt' overwrite into table
alltypes
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@alltypes
+POSTHOOK: query: load data local inpath '../../data/files/alltypes2.txt' overwrite into table
alltypes
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@alltypes
+PREHOOK: query: insert overwrite table alltypes_orc select * from alltypes
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypes
+PREHOOK: Output: default@alltypes_orc
+POSTHOOK: query: insert overwrite table alltypes_orc select * from alltypes
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypes
+POSTHOOK: Output: default@alltypes_orc
+POSTHOOK: Lineage: alltypes_orc.bi SIMPLE [(alltypes)alltypes.FieldSchema(name:bi, type:bigint,
comment:null), ]
+POSTHOOK: Lineage: alltypes_orc.bo SIMPLE [(alltypes)alltypes.FieldSchema(name:bo, type:boolean,
comment:null), ]
+POSTHOOK: Lineage: alltypes_orc.c SIMPLE [(alltypes)alltypes.FieldSchema(name:c, type:char(5),
comment:null), ]
+POSTHOOK: Lineage: alltypes_orc.d SIMPLE [(alltypes)alltypes.FieldSchema(name:d, type:double,
comment:null), ]
+POSTHOOK: Lineage: alltypes_orc.da SIMPLE [(alltypes)alltypes.FieldSchema(name:da, type:date,
comment:null), ]
+POSTHOOK: Lineage: alltypes_orc.de SIMPLE [(alltypes)alltypes.FieldSchema(name:de, type:decimal(10,3),
comment:null), ]
+POSTHOOK: Lineage: alltypes_orc.f SIMPLE [(alltypes)alltypes.FieldSchema(name:f, type:float,
comment:null), ]
+POSTHOOK: Lineage: alltypes_orc.i SIMPLE [(alltypes)alltypes.FieldSchema(name:i, type:int,
comment:null), ]
+POSTHOOK: Lineage: alltypes_orc.l SIMPLE [(alltypes)alltypes.FieldSchema(name:l, type:array<int>,
comment:null), ]
+POSTHOOK: Lineage: alltypes_orc.m SIMPLE [(alltypes)alltypes.FieldSchema(name:m, type:map<string,string>,
comment:null), ]
+POSTHOOK: Lineage: alltypes_orc.s SIMPLE [(alltypes)alltypes.FieldSchema(name:s, type:string,
comment:null), ]
+POSTHOOK: Lineage: alltypes_orc.si SIMPLE [(alltypes)alltypes.FieldSchema(name:si, type:smallint,
comment:null), ]
+POSTHOOK: Lineage: alltypes_orc.st SIMPLE [(alltypes)alltypes.FieldSchema(name:st, type:struct<c1:int,c2:string>,
comment:null), ]
+POSTHOOK: Lineage: alltypes_orc.ti SIMPLE [(alltypes)alltypes.FieldSchema(name:ti, type:tinyint,
comment:null), ]
+POSTHOOK: Lineage: alltypes_orc.ts SIMPLE [(alltypes)alltypes.FieldSchema(name:ts, type:timestamp,
comment:null), ]
+POSTHOOK: Lineage: alltypes_orc.vc SIMPLE [(alltypes)alltypes.FieldSchema(name:vc, type:varchar(5),
comment:null), ]
+PREHOOK: query: select * from alltypes_orc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypes_orc
+#### A masked pattern was here ####
+POSTHOOK: query: select * from alltypes_orc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypes_orc
+#### A masked pattern was here ####
+true	10	100	1000	10000	4.0	20.0	4.222	1969-12-31 15:59:58.174	1970-01-01	string	hello	hello
{"k1":"v1","k2":"v2"}	[100,200]	{"c1":null,"c2":" \"foo\"}"}
+false	20	200	2000	20000	8.0	40.0	2.222	1970-12-31 15:59:58.174	1971-01-01	abcd	world	world
{"k3":"v3","k4":"v4"}	[200,300]	{"c1":null,"c2":" \"bar\"}"}
+PREHOOK: query: alter table alltypes_orc change si si int
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@alltypes_orc
+PREHOOK: Output: default@alltypes_orc
+POSTHOOK: query: alter table alltypes_orc change si si int
+POSTHOOK: type: ALTERTABLE_RENAMECOL
+POSTHOOK: Input: default@alltypes_orc
+POSTHOOK: Output: default@alltypes_orc
+PREHOOK: query: select * from alltypes_orc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypes_orc
+#### A masked pattern was here ####
+POSTHOOK: query: select * from alltypes_orc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypes_orc
+#### A masked pattern was here ####
+true	10	100	1000	10000	4.0	20.0	4.222	1969-12-31 15:59:58.174	1970-01-01	string	hello	hello
{"k1":"v1","k2":"v2"}	[100,200]	{"c1":null,"c2":" \"foo\"}"}
+false	20	200	2000	20000	8.0	40.0	2.222	1970-12-31 15:59:58.174	1971-01-01	abcd	world	world
{"k3":"v3","k4":"v4"}	[200,300]	{"c1":null,"c2":" \"bar\"}"}
+PREHOOK: query: alter table alltypes_orc change si si bigint
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@alltypes_orc
+PREHOOK: Output: default@alltypes_orc
+POSTHOOK: query: alter table alltypes_orc change si si bigint
+POSTHOOK: type: ALTERTABLE_RENAMECOL
+POSTHOOK: Input: default@alltypes_orc
+POSTHOOK: Output: default@alltypes_orc
+PREHOOK: query: alter table alltypes_orc change i i bigint
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@alltypes_orc
+PREHOOK: Output: default@alltypes_orc
+POSTHOOK: query: alter table alltypes_orc change i i bigint
+POSTHOOK: type: ALTERTABLE_RENAMECOL
+POSTHOOK: Input: default@alltypes_orc
+POSTHOOK: Output: default@alltypes_orc
+PREHOOK: query: select * from alltypes_orc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypes_orc
+#### A masked pattern was here ####
+POSTHOOK: query: select * from alltypes_orc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypes_orc
+#### A masked pattern was here ####
+true	10	100	1000	10000	4.0	20.0	4.222	1969-12-31 15:59:58.174	1970-01-01	string	hello	hello
{"k1":"v1","k2":"v2"}	[100,200]	{"c1":null,"c2":" \"foo\"}"}
+false	20	200	2000	20000	8.0	40.0	2.222	1970-12-31 15:59:58.174	1971-01-01	abcd	world	world
{"k3":"v3","k4":"v4"}	[200,300]	{"c1":null,"c2":" \"bar\"}"}
+PREHOOK: query: alter table alltypes_orc change l l array<bigint>
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@alltypes_orc
+PREHOOK: Output: default@alltypes_orc
+POSTHOOK: query: alter table alltypes_orc change l l array<bigint>
+POSTHOOK: type: ALTERTABLE_RENAMECOL
+POSTHOOK: Input: default@alltypes_orc
+POSTHOOK: Output: default@alltypes_orc
+PREHOOK: query: select * from alltypes_orc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypes_orc
+#### A masked pattern was here ####
+POSTHOOK: query: select * from alltypes_orc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypes_orc
+#### A masked pattern was here ####
+true	10	100	1000	10000	4.0	20.0	4.222	1969-12-31 15:59:58.174	1970-01-01	string	hello	hello
{"k1":"v1","k2":"v2"}	[100,200]	{"c1":null,"c2":" \"foo\"}"}
+false	20	200	2000	20000	8.0	40.0	2.222	1970-12-31 15:59:58.174	1971-01-01	abcd	world	world
{"k3":"v3","k4":"v4"}	[200,300]	{"c1":null,"c2":" \"bar\"}"}
+PREHOOK: query: alter table alltypes_orc change si si smallint
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@alltypes_orc
+PREHOOK: Output: default@alltypes_orc
+POSTHOOK: query: alter table alltypes_orc change si si smallint
+POSTHOOK: type: ALTERTABLE_RENAMECOL
+POSTHOOK: Input: default@alltypes_orc
+POSTHOOK: Output: default@alltypes_orc
+PREHOOK: query: alter table alltypes_orc change i i int
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@alltypes_orc
+PREHOOK: Output: default@alltypes_orc
+POSTHOOK: query: alter table alltypes_orc change i i int
+POSTHOOK: type: ALTERTABLE_RENAMECOL
+POSTHOOK: Input: default@alltypes_orc
+POSTHOOK: Output: default@alltypes_orc
+PREHOOK: query: explain select ti, si, i, bi from alltypes_orc
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select ti, si, i, bi from alltypes_orc
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: alltypes_orc
+            Statistics: Num rows: 88 Data size: 1772 Basic stats: COMPLETE Column stats:
NONE
+            Select Operator
+              expressions: ti (type: tinyint), si (type: smallint), i (type: int), bi (type:
bigint)
+              outputColumnNames: _col0, _col1, _col2, _col3
+              Statistics: Num rows: 88 Data size: 1772 Basic stats: COMPLETE Column stats:
NONE
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 88 Data size: 1772 Basic stats: COMPLETE Column stats:
NONE
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select ti, si, i, bi from alltypes_orc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypes_orc
+#### A masked pattern was here ####
+POSTHOOK: query: select ti, si, i, bi from alltypes_orc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypes_orc
+#### A masked pattern was here ####
+10	100	1000	10000
+20	200	2000	20000
+PREHOOK: query: alter table alltypes_orc change si si int
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@alltypes_orc
+PREHOOK: Output: default@alltypes_orc
+POSTHOOK: query: alter table alltypes_orc change si si int
+POSTHOOK: type: ALTERTABLE_RENAMECOL
+POSTHOOK: Input: default@alltypes_orc
+POSTHOOK: Output: default@alltypes_orc
+PREHOOK: query: select ti, si, i, bi from alltypes_orc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypes_orc
+#### A masked pattern was here ####
+POSTHOOK: query: select ti, si, i, bi from alltypes_orc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypes_orc
+#### A masked pattern was here ####
+10	100	1000	10000
+20	200	2000	20000
+PREHOOK: query: alter table alltypes_orc change si si bigint
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@alltypes_orc
+PREHOOK: Output: default@alltypes_orc
+POSTHOOK: query: alter table alltypes_orc change si si bigint
+POSTHOOK: type: ALTERTABLE_RENAMECOL
+POSTHOOK: Input: default@alltypes_orc
+POSTHOOK: Output: default@alltypes_orc
+PREHOOK: query: alter table alltypes_orc change i i bigint
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@alltypes_orc
+PREHOOK: Output: default@alltypes_orc
+POSTHOOK: query: alter table alltypes_orc change i i bigint
+POSTHOOK: type: ALTERTABLE_RENAMECOL
+POSTHOOK: Input: default@alltypes_orc
+POSTHOOK: Output: default@alltypes_orc
+PREHOOK: query: select ti, si, i, bi from alltypes_orc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypes_orc
+#### A masked pattern was here ####
+POSTHOOK: query: select ti, si, i, bi from alltypes_orc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypes_orc
+#### A masked pattern was here ####
+10	100	1000	10000
+20	200	2000	20000
+PREHOOK: query: create table src_part_orc (key int, value string) partitioned by (ds string)
stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@src_part_orc
+POSTHOOK: query: create table src_part_orc (key int, value string) partitioned by (ds string)
stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@src_part_orc
+PREHOOK: query: insert overwrite table src_part_orc partition(ds) select key, value, ds from
srcpart where ds is not null
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+PREHOOK: Output: default@src_part_orc
+POSTHOOK: query: insert overwrite table src_part_orc partition(ds) select key, value, ds
from srcpart where ds is not null
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+POSTHOOK: Output: default@src_part_orc@ds=2008-04-08
+POSTHOOK: Output: default@src_part_orc@ds=2008-04-09
+POSTHOOK: Lineage: src_part_orc PARTITION(ds=2008-04-08).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key,
type:string, comment:default), ]
+POSTHOOK: Lineage: src_part_orc PARTITION(ds=2008-04-08).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value,
type:string, comment:default), ]
+POSTHOOK: Lineage: src_part_orc PARTITION(ds=2008-04-09).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key,
type:string, comment:default), ]
+POSTHOOK: Lineage: src_part_orc PARTITION(ds=2008-04-09).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value,
type:string, comment:default), ]
+PREHOOK: query: select * from src_part_orc limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_part_orc
+PREHOOK: Input: default@src_part_orc@ds=2008-04-08
+PREHOOK: Input: default@src_part_orc@ds=2008-04-09
+#### A masked pattern was here ####
+POSTHOOK: query: select * from src_part_orc limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_part_orc
+POSTHOOK: Input: default@src_part_orc@ds=2008-04-08
+POSTHOOK: Input: default@src_part_orc@ds=2008-04-09
+#### A masked pattern was here ####
+238	val_238	2008-04-08
+86	val_86	2008-04-08
+311	val_311	2008-04-08
+27	val_27	2008-04-08
+165	val_165	2008-04-08
+409	val_409	2008-04-08
+255	val_255	2008-04-08
+278	val_278	2008-04-08
+98	val_98	2008-04-08
+484	val_484	2008-04-08
+PREHOOK: query: alter table src_part_orc change key key bigint
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@src_part_orc
+PREHOOK: Output: default@src_part_orc
+POSTHOOK: query: alter table src_part_orc change key key bigint
+POSTHOOK: type: ALTERTABLE_RENAMECOL
+POSTHOOK: Input: default@src_part_orc
+POSTHOOK: Output: default@src_part_orc
+PREHOOK: query: select * from src_part_orc limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_part_orc
+PREHOOK: Input: default@src_part_orc@ds=2008-04-08
+PREHOOK: Input: default@src_part_orc@ds=2008-04-09
+#### A masked pattern was here ####
+POSTHOOK: query: select * from src_part_orc limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_part_orc
+POSTHOOK: Input: default@src_part_orc@ds=2008-04-08
+POSTHOOK: Input: default@src_part_orc@ds=2008-04-09
+#### A masked pattern was here ####
+238	val_238	2008-04-08
+86	val_86	2008-04-08
+311	val_311	2008-04-08
+27	val_27	2008-04-08
+165	val_165	2008-04-08
+409	val_409	2008-04-08
+255	val_255	2008-04-08
+278	val_278	2008-04-08
+98	val_98	2008-04-08
+484	val_484	2008-04-08


Mime
View raw message