hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rem...@apache.org
Subject svn commit: r1585290 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/exec/vector/ java/org/apache/hadoop/hive/ql/io/parquet/ test/queries/clientpositive/ test/results/clientpositive/
Date Sun, 06 Apr 2014 11:58:32 GMT
Author: remusr
Date: Sun Apr  6 11:58:31 2014
New Revision: 1585290

URL: http://svn.apache.org/r1585290
Log:
HIVE-5998 Add vectorized reader for Parquet files reviewed by Brock

Added:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/VectorizedParquetInputFormat.java
    hive/trunk/ql/src/test/queries/clientpositive/vectorized_parquet.q
    hive/trunk/ql/src/test/results/clientpositive/vectorized_parquet.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java?rev=1585290&r1=1585289&r2=1585290&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java
Sun Apr  6 11:58:31 2014
@@ -26,6 +26,7 @@ import java.util.Map;
 
 import org.apache.hadoop.hive.common.type.Decimal128;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.ql.io.parquet.writable.BinaryWritable;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.serde2.io.ByteWritable;
 import org.apache.hadoop.hive.serde2.io.DateWritable;
@@ -35,6 +36,7 @@ import org.apache.hadoop.hive.serde2.io.
 import org.apache.hadoop.hive.serde2.io.TimestampWritable;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import org.apache.hadoop.io.BooleanWritable;
@@ -42,6 +44,7 @@ import org.apache.hadoop.io.FloatWritabl
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
 
 /**
  * This class is used as a static factory for VectorColumnAssign.
@@ -215,10 +218,31 @@ public class VectorColumnAssignFactory {
   public static VectorColumnAssign buildObjectAssign(VectorizedRowBatch outputBatch,
       int outColIndex, ObjectInspector objInspector) throws HiveException {
     PrimitiveObjectInspector poi = (PrimitiveObjectInspector) objInspector;
+    return buildObjectAssign(outputBatch, outColIndex, poi.getPrimitiveCategory());
+  }
+
+  public static VectorColumnAssign buildObjectAssign(VectorizedRowBatch outputBatch,
+      int outColIndex, PrimitiveCategory category) throws HiveException {
     VectorColumnAssign outVCA = null;
     ColumnVector destCol = outputBatch.cols[outColIndex];
-    if (destCol instanceof LongColumnVector) {
-      switch(poi.getPrimitiveCategory()) {
+    if (destCol == null) {
+      switch(category) {
+      case VOID:
+        outVCA = new VectorLongColumnAssign() {
+          // This is a dummy assigner
+          @Override
+          public void assignObjectValue(Object val, int destIndex) throws HiveException {
+            // This is no-op, there is no column to assign to and val is expected to be null
+            assert (val == null);
+          }
+        };
+        break;
+      default:
+        throw new HiveException("Incompatible (null) vector column and primitive category
" +
+            category);
+      }
+    } else if (destCol instanceof LongColumnVector) {
+      switch(category) {
       case BOOLEAN:
         outVCA = new VectorLongColumnAssign() {
           @Override
@@ -320,11 +344,11 @@ public class VectorColumnAssignFactory {
         break;
       default:
         throw new HiveException("Incompatible Long vector column and primitive category "
+
-            poi.getPrimitiveCategory());
+            category);
       }
     }
     else if (destCol instanceof DoubleColumnVector) {
-      switch(poi.getPrimitiveCategory()) {
+      switch(category) {
       case DOUBLE:
         outVCA = new VectorDoubleColumnAssign() {
           @Override
@@ -355,11 +379,26 @@ public class VectorColumnAssignFactory {
         break;
       default:
         throw new HiveException("Incompatible Double vector column and primitive category
" +
-            poi.getPrimitiveCategory());
+            category);
       }
     }
     else if (destCol instanceof BytesColumnVector) {
-      switch(poi.getPrimitiveCategory()) {
+      switch(category) {
+      case BINARY:
+        outVCA = new VectorBytesColumnAssign() {
+          @Override
+          public void assignObjectValue(Object val, int destIndex) throws HiveException {
+            if (val == null) {
+              assignNull(destIndex);
+            }
+            else {
+              BinaryWritable bw = (BinaryWritable) val;
+              byte[] bytes = bw.getBytes();
+              assignBytes(bytes, 0, bytes.length, destIndex);
+            }
+          }
+        }.init(outputBatch, (BytesColumnVector) destCol);
+        break;
       case STRING:
         outVCA = new VectorBytesColumnAssign() {
           @Override
@@ -377,11 +416,11 @@ public class VectorColumnAssignFactory {
         break;
       default:
         throw new HiveException("Incompatible Bytes vector column and primitive category
" +
-            poi.getPrimitiveCategory());
+            category);
       }
     }
     else if (destCol instanceof DecimalColumnVector) {
-      switch(poi.getPrimitiveCategory()) {
+      switch(category) {
       case DECIMAL:
         outVCA = new VectorDecimalColumnAssign() {
           @Override
@@ -398,7 +437,7 @@ public class VectorColumnAssignFactory {
           break;
         default:
           throw new HiveException("Incompatible Decimal vector column and primitive category
" +
-              poi.getPrimitiveCategory());
+              category);
         }
     }
     else {
@@ -431,4 +470,39 @@ public class VectorColumnAssignFactory {
     }
     return vcas;
   }
-}
\ No newline at end of file
+
+  public static VectorColumnAssign[] buildAssigners(VectorizedRowBatch outputBatch,
+      Writable[] writables) throws HiveException {
+    VectorColumnAssign[] vcas = new VectorColumnAssign[outputBatch.numCols];
+    for (int i = 0; i < outputBatch.numCols; ++i) {
+      if (writables[i] == null) {
+        assert(outputBatch.cols[i] == null);
+        vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.VOID);
+      } else if (writables[i] instanceof ByteWritable) {
+        vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.BYTE);
+      } else if (writables[i] instanceof ShortWritable) {
+        vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.SHORT);
+      } else if (writables[i] instanceof IntWritable) {
+        vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.INT);
+      } else if (writables[i] instanceof LongWritable) {
+        vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.LONG);
+      } else if (writables[i] instanceof FloatWritable) {
+        vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.FLOAT);
+      } else if (writables[i] instanceof DoubleWritable) {
+        vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.DOUBLE);
+      } else if (writables[i] instanceof Text) {
+        vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.STRING);
+      } else if (writables[i] instanceof BinaryWritable) {
+        vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.BINARY);
+      } else if (writables[i] instanceof TimestampWritable) {
+        vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.TIMESTAMP);
+      } else if (writables[i] instanceof BooleanWritable) {
+        vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.BOOLEAN);
+      } else {
+        throw new HiveException("Unimplemented vector assigner for writable type " +
+           writables[i].getClass());
+      }
+    }
+    return vcas;
+  }
+}

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java?rev=1585290&r1=1585289&r2=1585290&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java Sun
Apr  6 11:58:31 2014
@@ -97,6 +97,13 @@ public class VectorizedRowBatch implemen
   }
 
   /**
+   * Returns the maximum size of the batch (number of rows it can hold)
+   */
+  public int getMaxSize() {
+      return selected.length;
+  }
+
+  /**
    * Return count of qualifying rows.
    *
    * @return number of rows that have not been filtered out

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java?rev=1585290&r1=1585289&r2=1585290&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
Sun Apr  6 11:58:31 2014
@@ -582,4 +582,17 @@ public class VectorizedRowBatchCtx {
     }
   }
 
+  public VectorColumnAssign[] buildObjectAssigners(VectorizedRowBatch outputBatch)
+        throws HiveException {
+    List<? extends StructField> fieldRefs = rowOI.getAllStructFieldRefs();
+    assert outputBatch.numCols == fieldRefs.size();
+    VectorColumnAssign[] assigners = new VectorColumnAssign[fieldRefs.size()];
+    for(int i = 0; i < assigners.length; ++i) {
+        StructField fieldRef = fieldRefs.get(i);
+        ObjectInspector fieldOI = fieldRef.getFieldObjectInspector();
+        assigners[i] = VectorColumnAssignFactory.buildObjectAssign(
+                outputBatch, i, fieldOI);
+    }
+    return assigners;
+  }
 }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java?rev=1585290&r1=1585289&r2=1585290&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java
Sun Apr  6 11:58:31 2014
@@ -14,7 +14,10 @@
 package org.apache.hadoop.hive.ql.io.parquet;
 
 import java.io.IOException;
-
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface;
 import org.apache.hadoop.hive.ql.io.parquet.read.DataWritableReadSupport;
 import org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper;
 import org.apache.hadoop.io.ArrayWritable;
@@ -29,18 +32,25 @@ import parquet.hadoop.ParquetInputFormat
  * A Parquet InputFormat for Hive (with the deprecated package mapred)
  *
  */
-public class MapredParquetInputFormat extends FileInputFormat<Void, ArrayWritable>
{
+public class MapredParquetInputFormat extends FileInputFormat<Void, ArrayWritable>
+    implements VectorizedInputFormatInterface {
+
+  private static final Log LOG = LogFactory.getLog(MapredParquetInputFormat.class);
 
   private final ParquetInputFormat<ArrayWritable> realInput;
 
+  private final transient VectorizedParquetInputFormat vectorizedSelf;
+
   public MapredParquetInputFormat() {
     this(new ParquetInputFormat<ArrayWritable>(DataWritableReadSupport.class));
   }
 
   protected MapredParquetInputFormat(final ParquetInputFormat<ArrayWritable> inputFormat)
{
     this.realInput = inputFormat;
+    vectorizedSelf = new VectorizedParquetInputFormat(inputFormat);
   }
 
+  @SuppressWarnings({ "unchecked", "rawtypes" })
   @Override
   public org.apache.hadoop.mapred.RecordReader<Void, ArrayWritable> getRecordReader(
       final org.apache.hadoop.mapred.InputSplit split,
@@ -48,7 +58,19 @@ public class MapredParquetInputFormat ex
       final org.apache.hadoop.mapred.Reporter reporter
       ) throws IOException {
     try {
-      return (RecordReader<Void, ArrayWritable>) new ParquetRecordReaderWrapper(realInput,
split, job, reporter);
+      if (Utilities.isVectorMode(job)) {
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("Using vectorized record reader");
+        }
+        return (RecordReader) vectorizedSelf.getRecordReader(split, job, reporter);
+      }
+      else {
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("Using row-mode record reader");
+        }
+        return (RecordReader<Void, ArrayWritable>)
+          new ParquetRecordReaderWrapper(realInput, split, job, reporter);
+      }
     } catch (final InterruptedException e) {
       throw new RuntimeException("Cannot create a RecordReaderWrapper", e);
     }

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/VectorizedParquetInputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/VectorizedParquetInputFormat.java?rev=1585290&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/VectorizedParquetInputFormat.java
(added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/VectorizedParquetInputFormat.java
Sun Apr  6 11:58:31 2014
@@ -0,0 +1,160 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.parquet;
+
+import java.io.IOException;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.exec.vector.VectorColumnAssign;
+import org.apache.hadoop.hive.ql.exec.vector.VectorColumnAssignFactory;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx;
+import org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.io.ArrayWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reporter;
+
+import parquet.hadoop.ParquetInputFormat;
+
+/**
+ * Vectorized input format for Parquet files
+ */
+public class VectorizedParquetInputFormat extends FileInputFormat<NullWritable, VectorizedRowBatch>
+  implements VectorizedInputFormatInterface {
+
+  private static final Log LOG = LogFactory.getLog(VectorizedParquetInputFormat.class);
+
+  /**
+   * Vectorized record reader for vectorized Parquet input format
+   */
+  private static class VectorizedParquetRecordReader implements
+      RecordReader<NullWritable, VectorizedRowBatch> {
+    private static final Log LOG = LogFactory.getLog(VectorizedParquetRecordReader.class);
+
+    private final ParquetRecordReaderWrapper internalReader;
+      private VectorizedRowBatchCtx rbCtx;
+      private ArrayWritable internalValues;
+      private Void internalKey;
+      private VectorColumnAssign[] assigners;
+
+    public VectorizedParquetRecordReader(
+        ParquetInputFormat<ArrayWritable> realInput,
+        FileSplit split,
+        JobConf conf, Reporter reporter) throws IOException, InterruptedException {
+      internalReader = new ParquetRecordReaderWrapper(
+        realInput,
+        split,
+        conf,
+        reporter);
+      try {
+        rbCtx = new VectorizedRowBatchCtx();
+        rbCtx.init(conf, split);
+      } catch (Exception e) {
+        throw new RuntimeException(e);
+      }
+    }
+
+      @Override
+      public NullWritable createKey() {
+        internalKey = internalReader.createKey();
+        return NullWritable.get();
+      }
+
+      @Override
+      public VectorizedRowBatch createValue() {
+        VectorizedRowBatch outputBatch = null;
+        try {
+          outputBatch = rbCtx.createVectorizedRowBatch();
+          internalValues = internalReader.createValue();
+        } catch (HiveException e) {
+          throw new RuntimeException("Error creating a batch", e);
+        }
+        return outputBatch;
+      }
+
+      @Override
+      public long getPos() throws IOException {
+        return internalReader.getPos();
+      }
+
+      @Override
+      public void close() throws IOException {
+        internalReader.close();
+      }
+
+      @Override
+      public float getProgress() throws IOException {
+        return internalReader.getProgress();
+      }
+
+    @Override
+    public boolean next(NullWritable key, VectorizedRowBatch outputBatch)
+        throws IOException {
+      assert(outputBatch.numCols == assigners.length);
+      outputBatch.reset();
+      int maxSize = outputBatch.getMaxSize();
+      try {
+        while (outputBatch.size < maxSize) {
+          if (false == internalReader.next(internalKey, internalValues)) {
+            outputBatch.endOfFile = true;
+            break;
+          }
+          Writable[] writables = internalValues.get();
+
+          if (null == assigners) {
+            // Normally we'd build the assigners from the rbCtx.rowOI, but with Parquet
+            // we have a discrepancy between the metadata type (Eg. tinyint -> BYTE) and
+            // the writable value (IntWritable). see Parquet's ETypeConverter class.
+            assigners = VectorColumnAssignFactory.buildAssigners(outputBatch, writables);
+          }
+
+          for(int i = 0; i < outputBatch.numCols; ++i) {
+            assigners[i].assignObjectValue(writables[i], outputBatch.size);
+          }
+          ++outputBatch.size;
+         }
+      } catch (HiveException e) {
+        throw new RuntimeException(e);
+      }
+      return outputBatch.size > 0;
+    }
+  }
+
+  private final ParquetInputFormat<ArrayWritable> realInput;
+
+  public VectorizedParquetInputFormat(ParquetInputFormat<ArrayWritable> realInput)
{
+    this.realInput = realInput;
+  }
+
+  @SuppressWarnings("unchecked")
+  @Override
+  public RecordReader<NullWritable, VectorizedRowBatch> getRecordReader(
+      InputSplit split, JobConf conf, Reporter reporter) throws IOException {
+    try {
+      return (RecordReader<NullWritable, VectorizedRowBatch>)
+        new VectorizedParquetRecordReader(realInput, (FileSplit) split, conf, reporter);
+    } catch (final InterruptedException e) {
+      throw new RuntimeException("Cannot create a VectorizedParquetRecordReader", e);
+    }
+  }
+
+}

Added: hive/trunk/ql/src/test/queries/clientpositive/vectorized_parquet.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/vectorized_parquet.q?rev=1585290&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/vectorized_parquet.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/vectorized_parquet.q Sun Apr  6 11:58:31
2014
@@ -0,0 +1,44 @@
+create table if not exists alltypes_parquet (
+  cint int, 
+  ctinyint tinyint, 
+  csmallint smallint, 
+  cfloat float, 
+  cdouble double, 
+  cstring1 string) stored as parquet;
+  
+insert overwrite table alltypes_parquet 
+  select cint, 
+    ctinyint, 
+    csmallint, 
+    cfloat, 
+    cdouble, 
+    cstring1 
+  from alltypesorc;
+  
+SET hive.vectorized.execution.enabled=true;
+  
+explain select * 
+  from alltypes_parquet
+  where cint = 528534767 
+  limit 10;
+select * 
+  from alltypes_parquet
+  where cint = 528534767 
+  limit 10;
+
+explain select ctinyint, 
+  max(cint), 
+  min(csmallint), 
+  count(cstring1), 
+  avg(cfloat), 
+  stddev_pop(cdouble)
+  from alltypes_parquet
+  group by ctinyint;
+select ctinyint, 
+  max(cint), 
+  min(csmallint), 
+  count(cstring1), 
+  avg(cfloat), 
+  stddev_pop(cdouble)
+  from alltypes_parquet
+  group by ctinyint;

Added: hive/trunk/ql/src/test/results/clientpositive/vectorized_parquet.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/vectorized_parquet.q.out?rev=1585290&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/vectorized_parquet.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/vectorized_parquet.q.out Sun Apr  6 11:58:31
2014
@@ -0,0 +1,358 @@
+PREHOOK: query: create table if not exists alltypes_parquet (
+  cint int, 
+  ctinyint tinyint, 
+  csmallint smallint, 
+  cfloat float, 
+  cdouble double, 
+  cstring1 string) stored as parquet
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+POSTHOOK: query: create table if not exists alltypes_parquet (
+  cint int, 
+  ctinyint tinyint, 
+  csmallint smallint, 
+  cfloat float, 
+  cdouble double, 
+  cstring1 string) stored as parquet
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@alltypes_parquet
+PREHOOK: query: insert overwrite table alltypes_parquet 
+  select cint, 
+    ctinyint, 
+    csmallint, 
+    cfloat, 
+    cdouble, 
+    cstring1 
+  from alltypesorc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@alltypes_parquet
+POSTHOOK: query: insert overwrite table alltypes_parquet 
+  select cint, 
+    ctinyint, 
+    csmallint, 
+    cfloat, 
+    cdouble, 
+    cstring1 
+  from alltypesorc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@alltypes_parquet
+POSTHOOK: Lineage: alltypes_parquet.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble,
type:double, comment:null), ]
+POSTHOOK: Lineage: alltypes_parquet.cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat,
type:float, comment:null), ]
+POSTHOOK: Lineage: alltypes_parquet.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint,
type:int, comment:null), ]
+POSTHOOK: Lineage: alltypes_parquet.csmallint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:csmallint,
type:smallint, comment:null), ]
+POSTHOOK: Lineage: alltypes_parquet.cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1,
type:string, comment:null), ]
+POSTHOOK: Lineage: alltypes_parquet.ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint,
type:tinyint, comment:null), ]
+PREHOOK: query: explain select * 
+  from alltypes_parquet
+  where cint = 528534767 
+  limit 10
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * 
+  from alltypes_parquet
+  where cint = 528534767 
+  limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: alltypes_parquet.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble,
type:double, comment:null), ]
+POSTHOOK: Lineage: alltypes_parquet.cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat,
type:float, comment:null), ]
+POSTHOOK: Lineage: alltypes_parquet.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint,
type:int, comment:null), ]
+POSTHOOK: Lineage: alltypes_parquet.csmallint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:csmallint,
type:smallint, comment:null), ]
+POSTHOOK: Lineage: alltypes_parquet.cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1,
type:string, comment:null), ]
+POSTHOOK: Lineage: alltypes_parquet.ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint,
type:tinyint, comment:null), ]
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: alltypes_parquet
+            Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats:
NONE
+            Filter Operator
+              predicate: (cint = 528534767) (type: boolean)
+              Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats:
NONE
+              Select Operator
+                expressions: cint (type: int), ctinyint (type: tinyint), csmallint (type:
smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column
stats: NONE
+                Limit
+                  Number of rows: 10
+                  Statistics: Num rows: 10 Data size: 60 Basic stats: COMPLETE Column stats:
NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 10 Data size: 60 Basic stats: COMPLETE Column stats:
NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 10
+
+PREHOOK: query: select * 
+  from alltypes_parquet
+  where cint = 528534767 
+  limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypes_parquet
+#### A masked pattern was here ####
+POSTHOOK: query: select * 
+  from alltypes_parquet
+  where cint = 528534767 
+  limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypes_parquet
+#### A masked pattern was here ####
+POSTHOOK: Lineage: alltypes_parquet.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble,
type:double, comment:null), ]
+POSTHOOK: Lineage: alltypes_parquet.cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat,
type:float, comment:null), ]
+POSTHOOK: Lineage: alltypes_parquet.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint,
type:int, comment:null), ]
+POSTHOOK: Lineage: alltypes_parquet.csmallint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:csmallint,
type:smallint, comment:null), ]
+POSTHOOK: Lineage: alltypes_parquet.cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1,
type:string, comment:null), ]
+POSTHOOK: Lineage: alltypes_parquet.ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint,
type:tinyint, comment:null), ]
+528534767	-50	-13326	-50.0	-13326.0	cvLH6Eat2yFsyy7p
+528534767	NULL	-4213	NULL	-4213.0	cvLH6Eat2yFsyy7p
+528534767	-28	-15813	-28.0	-15813.0	cvLH6Eat2yFsyy7p
+528534767	31	-9566	31.0	-9566.0	cvLH6Eat2yFsyy7p
+528534767	-34	15007	-34.0	15007.0	cvLH6Eat2yFsyy7p
+528534767	29	7021	29.0	7021.0	cvLH6Eat2yFsyy7p
+528534767	31	4963	31.0	4963.0	cvLH6Eat2yFsyy7p
+528534767	27	-7824	27.0	-7824.0	cvLH6Eat2yFsyy7p
+528534767	-11	-15431	-11.0	-15431.0	cvLH6Eat2yFsyy7p
+528534767	61	-15549	61.0	-15549.0	cvLH6Eat2yFsyy7p
+PREHOOK: query: explain select ctinyint, 
+  max(cint), 
+  min(csmallint), 
+  count(cstring1), 
+  avg(cfloat), 
+  stddev_pop(cdouble)
+  from alltypes_parquet
+  group by ctinyint
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select ctinyint, 
+  max(cint), 
+  min(csmallint), 
+  count(cstring1), 
+  avg(cfloat), 
+  stddev_pop(cdouble)
+  from alltypes_parquet
+  group by ctinyint
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: alltypes_parquet.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble,
type:double, comment:null), ]
+POSTHOOK: Lineage: alltypes_parquet.cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat,
type:float, comment:null), ]
+POSTHOOK: Lineage: alltypes_parquet.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint,
type:int, comment:null), ]
+POSTHOOK: Lineage: alltypes_parquet.csmallint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:csmallint,
type:smallint, comment:null), ]
+POSTHOOK: Lineage: alltypes_parquet.cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1,
type:string, comment:null), ]
+POSTHOOK: Lineage: alltypes_parquet.ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint,
type:tinyint, comment:null), ]
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: alltypes_parquet
+            Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats:
NONE
+            Select Operator
+              expressions: ctinyint (type: tinyint), cint (type: int), csmallint (type: smallint),
cstring1 (type: string), cfloat (type: float), cdouble (type: double)
+              outputColumnNames: ctinyint, cint, csmallint, cstring1, cfloat, cdouble
+              Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats:
NONE
+              Group By Operator
+                aggregations: max(cint), min(csmallint), count(cstring1), avg(cfloat), stddev_pop(cdouble)
+                keys: ctinyint (type: tinyint)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column
stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: tinyint)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: tinyint)
+                  Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column
stats: NONE
+                  value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type:
bigint), _col4 (type: struct<count:bigint,sum:double,input:float>), _col5 (type: struct<count:bigint,sum:double,variance:double>)
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3),
stddev_pop(VALUE._col4)
+          keys: KEY._col0 (type: tinyint)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+          Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats:
NONE
+          Select Operator
+            expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint),
_col3 (type: bigint), _col4 (type: double), _col5 (type: double)
+            outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+            Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats:
NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats:
NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+PREHOOK: query: select ctinyint, 
+  max(cint), 
+  min(csmallint), 
+  count(cstring1), 
+  avg(cfloat), 
+  stddev_pop(cdouble)
+  from alltypes_parquet
+  group by ctinyint
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypes_parquet
+#### A masked pattern was here ####
+POSTHOOK: query: select ctinyint, 
+  max(cint), 
+  min(csmallint), 
+  count(cstring1), 
+  avg(cfloat), 
+  stddev_pop(cdouble)
+  from alltypes_parquet
+  group by ctinyint
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypes_parquet
+#### A masked pattern was here ####
+POSTHOOK: Lineage: alltypes_parquet.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble,
type:double, comment:null), ]
+POSTHOOK: Lineage: alltypes_parquet.cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat,
type:float, comment:null), ]
+POSTHOOK: Lineage: alltypes_parquet.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint,
type:int, comment:null), ]
+POSTHOOK: Lineage: alltypes_parquet.csmallint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:csmallint,
type:smallint, comment:null), ]
+POSTHOOK: Lineage: alltypes_parquet.cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1,
type:string, comment:null), ]
+POSTHOOK: Lineage: alltypes_parquet.ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint,
type:tinyint, comment:null), ]
+NULL	1073418988	-16379	3115	NULL	305051.4870777435
+-64	626923679	-15920	21	-64.0	9254.456539277186
+-63	626923679	-12516	16	-63.0	9263.605837223322
+-62	626923679	-15992	24	-62.0	9004.593091474135
+-61	626923679	-15142	22	-61.0	9357.236187870849
+-60	626923679	-15792	24	-60.0	9892.656196775464
+-59	626923679	-15789	28	-59.0	9829.790704244733
+-58	626923679	-15169	20	-58.0	9549.096672008198
+-57	626923679	-14893	32	-57.0	8572.083461570477
+-56	626923679	-11999	33	-56.0	9490.842152672341
+-55	626923679	-13381	26	-55.0	9157.562103946742
+-54	626923679	-14815	23	-54.0	9614.154026896626
+-53	626923679	-15445	19	-53.0	9387.739325499799
+-52	626923679	-16369	30	-52.0	8625.06871423408
+-51	1073680599	-15734	1028	-51.0	9531.569305177045
+-50	626923679	-14320	27	-50.0	8548.827748002343
+-49	626923679	-14831	23	-49.0	9894.429191738676
+-48	626923679	-15462	26	-48.0	9913.883371354861
+-47	626923679	-16096	19	-47.0	9011.009178780589
+-46	626923679	-12427	21	-46.0	9182.943188188632
+-45	626923679	-15027	21	-45.0	8567.489593562543
+-44	626923679	-15667	21	-44.0	10334.01810499552
+-43	626923679	-15607	27	-43.0	8715.255026265124
+-42	626923679	-16025	14	-42.0	9692.646755759979
+-41	626923679	-12606	21	-41.0	9034.40949481481
+-40	626923679	-14678	23	-40.0	9883.334986561835
+-39	626923679	-15612	19	-39.0	9765.551806305297
+-38	626923679	-14914	28	-38.0	8767.375358291503
+-37	626923679	-14780	17	-37.0	10368.905538788269
+-36	626923679	-16208	23	-36.0	8773.547684436919
+-35	626923679	-16059	23	-35.0	10136.580492864763
+-34	626923679	-15450	29	-34.0	8708.243526705026
+-33	626923679	-12779	21	-33.0	8854.331159704514
+-32	626923679	-15866	25	-32.0	9535.546396775915
+-31	626923679	-15915	22	-31.0	9187.596784112568
+-30	626923679	-14863	23	-30.0	9193.941914019653
+-29	626923679	-14747	26	-29.0	9052.945656011721
+-28	626923679	-15813	20	-28.0	9616.869413270924
+-27	626923679	-14984	20	-27.0	8465.29660255097
+-26	626923679	-15686	15	-26.0	10874.523900405318
+-25	626923679	-15862	24	-25.0	9778.256724727018
+-24	626923679	-16311	26	-24.0	9386.736402961187
+-23	626923679	-16355	36	-23.345263230173213	9401.831290253447
+-22	626923679	-14701	22	-22.0	8809.230165774987
+-21	626923679	-16017	27	-21.0	9480.349236669877
+-20	626923679	-16126	24	-20.0	9868.92268080106
+-19	626923679	-15935	25	-19.0	9967.22240685782
+-18	626923679	-14863	24	-18.0	9638.430684071413
+-17	626923679	-15922	19	-17.0	9944.104273894172
+-16	626923679	-15154	21	-16.0	8884.207393686478
+-15	626923679	-16036	24	-15.0	9450.506254395024
+-14	626923679	-13884	22	-14.0	10125.818731386042
+-13	626923679	-15446	30	-13.0	8907.942987576693
+-12	626923679	-16373	22	-12.0	10173.15707541171
+-11	626923679	-15659	32	-11.0	10453.738567408038
+-10	626923679	-15384	28	-10.0	8850.451610567823
+-9	626923679	-15329	31	-9.0	8999.391457373968
+-8	626923679	-14678	18	-8.0	9976.831992670684
+-7	626923679	-14584	23	-7.0	9946.605446407746
+-6	626923679	-15980	30	-6.0	10262.829252317424
+-5	626923679	-15780	24	-5.0	10599.227726422314
+-4	626923679	-16207	21	-4.0	9682.726604102581
+-3	626923679	-13632	16	-3.0	8836.215573422822
+-2	626923679	-16277	20	-2.0	10800.090249507177
+-1	626923679	-15441	36	-1.0486250072717667	8786.246963933321
+0	626923679	-14254	24	0.0	10057.5018088718
+1	626923679	-14610	30	1.0	10016.486277900643
+2	626923679	-16227	25	2.0	10083.276127543355
+3	626923679	-16339	30	3.0	10483.526375885149
+4	626923679	-15999	29	4.0	9516.189702058042
+5	626923679	-16169	31	5.0	11114.001902469323
+6	626923679	-15948	30	6.0	9644.247255286113
+7	626923679	-15839	25	7.0	10077.151640330823
+8	1070764888	-15778	1034	8.0	9562.355155774725
+9	626923679	-13629	25	9.0	10157.217948808622
+10	626923679	-15887	26	10.0	9104.820520135108
+11	1072654057	-14696	1035	11.0	9531.018991371746
+12	626923679	-14642	18	12.0	9696.038286378725
+13	626923679	-14771	26	13.0	8128.265919972384
+14	626923679	-13367	28	14.0	9074.674998750581
+15	626923679	-16339	28	15.0	9770.473400901916
+16	626923679	-14001	26	16.0	10130.883606275334
+17	626923679	-16109	22	16.73235294865627	1353416.3383574807
+18	626923679	-15779	21	18.0	10820.004053788869
+19	626923679	-16049	21	19.0	9423.560227007669
+20	626923679	-15149	21	20.0	11161.893298093504
+21	626923679	-15931	23	21.0	9683.044864861204
+22	626923679	-16280	26	22.0	9693.155720861765
+23	626923679	-15514	24	23.0	8542.419116415425
+24	626923679	-15086	24	24.0	9661.203790645088
+25	626923679	-11349	23	25.0	8888.959012093468
+26	626923679	-14516	29	26.0	9123.125508880432
+27	626923679	-14965	24	27.0	9802.871860196345
+28	626923679	-14455	20	28.0	9283.289383115296
+29	626923679	-15892	16	29.0	9874.046501817154
+30	626923679	-14111	27	30.0	10066.520234676527
+31	626923679	-15960	24	31.0	10427.970184550613
+32	626923679	-14044	24	32.0	8376.464579403413
+33	626923679	-14642	29	40.61776386607777	1304429.5939037625
+34	626923679	-15059	28	34.0	8756.731536033676
+35	626923679	-16153	27	35.0	10351.008404963042
+36	626923679	-15912	20	36.0	9475.257975138164
+37	626923679	-12081	24	37.0	9017.860034890362
+38	626923679	-15248	29	38.0	9900.256257785535
+39	626923679	-14887	28	39.0	10513.343644635232
+40	626923679	-15861	22	40.0	9283.318678549174
+41	626923679	-13480	21	41.0	9016.291129937847
+42	626923679	-15834	28	42.0	10318.01399719996
+43	626923679	-15703	28	43.0	8757.796089055722
+44	626923679	-11185	16	44.0	9425.076634933797
+45	626923679	-15228	18	45.0	9459.968668643689
+46	626923679	-15187	22	46.0	9685.908173160062
+47	626923679	-16324	22	47.0	9822.220821743611
+48	626923679	-16372	29	48.0	10079.286173063345
+49	626923679	-15923	27	49.0	9850.111848934683
+50	626923679	-16236	21	50.0	9398.176197406601
+51	626923679	-15790	17	51.0	9220.075799194028
+52	626923679	-15450	20	52.0	9261.723648435052
+53	626923679	-16217	30	53.0	9895.247408969733
+54	626923679	-15245	16	54.0	9789.50878424882
+55	626923679	-15887	21	55.0	9826.38569192808
+56	626923679	-12631	21	56.0	8860.917133763547
+57	626923679	-15620	25	57.0	9413.99393840875
+58	626923679	-13627	20	58.0	9083.529665947459
+59	626923679	-16076	17	59.0	10117.44967077967
+60	626923679	-13606	23	60.0	8346.267436552042
+61	626923679	-15894	29	61.0	8785.714950987198
+62	626923679	-14307	17	62.0	9491.752726667326



Mime
View raw message