hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ser...@apache.org
Subject [2/2] hive git commit: HIVE-12631 : LLAP IO: support ORC ACID tables (Teddy Choi, reviewed by Sergey Shelukhin)
Date Wed, 18 Oct 2017 22:25:35 GMT
HIVE-12631 : LLAP IO: support ORC ACID tables (Teddy Choi, reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7decd421
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7decd421
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7decd421

Branch: refs/heads/master
Commit: 7decd4218135154180fd21f4b73be5e954814d40
Parents: c6c374e
Author: sergey <sershe@apache.org>
Authored: Wed Oct 18 15:19:18 2017 -0700
Committer: sergey <sershe@apache.org>
Committed: Wed Oct 18 15:19:30 2017 -0700

----------------------------------------------------------------------
 .../test/resources/testconfiguration.properties |   2 +
 .../hive/llap/io/api/impl/LlapInputFormat.java  |  18 +-
 .../hive/llap/io/api/impl/LlapRecordReader.java |  83 ++++-
 .../llap/io/decode/OrcEncodedDataConsumer.java  |   3 +-
 .../llap/io/encoded/OrcEncodedDataReader.java   |  26 +-
 .../exec/vector/VectorExpressionDescriptor.java |   3 +
 .../hive/ql/exec/vector/VectorExtractRow.java   |   2 +-
 .../hive/ql/exec/vector/VectorMapOperator.java  |  11 +-
 .../ql/exec/vector/VectorizationContext.java    |   5 +
 .../ql/exec/vector/VectorizedRowBatchCtx.java   |  48 ++-
 .../VectorExpressionWriterFactory.java          |  52 ++-
 .../hadoop/hive/ql/io/LlapAwareSplit.java       |   4 +-
 .../hadoop/hive/ql/io/orc/OrcInputFormat.java   |  19 +-
 .../apache/hadoop/hive/ql/io/orc/OrcSplit.java  |  35 +-
 .../io/orc/VectorizedOrcAcidRowBatchReader.java | 156 +++++---
 .../ql/io/orc/VectorizedOrcAcidRowReader.java   |  36 +-
 .../orc/encoded/EncodedTreeReaderFactory.java   |  10 +-
 .../ql/parse/UpdateDeleteSemanticAnalyzer.java  |   4 +
 .../queries/clientpositive/llap_acid_fast.q     |  49 +++
 .../clientpositive/llap/acid_no_buckets.q.out   |   8 +-
 .../results/clientpositive/llap/llap_acid.q.out | 321 +++++++++++++++++
 .../clientpositive/llap/llap_acid_fast.q.out    | 361 +++++++++++++++++++
 .../results/clientpositive/llap_acid_fast.q.out | 315 ++++++++++++++++
 23 files changed, 1437 insertions(+), 134 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/7decd421/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 8d92da3..06ebc98 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -190,6 +190,8 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\
   kill_query.q,\
   leftsemijoin.q,\
   limit_pushdown.q,\
+  llap_acid.q,\
+  llap_acid_fast.q,\
   load_dyn_part1.q,\
   load_dyn_part2.q,\
   load_dyn_part3.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/7decd421/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java
index 79ec4ed..1cf5f49 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java
@@ -138,7 +138,7 @@ public class LlapInputFormat implements InputFormat<NullWritable, VectorizedRowB
       InputSplit split, JobConf job, Reporter reporter) throws IOException {
     boolean useLlapIo = true;
     if (split instanceof LlapAwareSplit) {
-      useLlapIo = ((LlapAwareSplit) split).canUseLlapIo();
+      useLlapIo = ((LlapAwareSplit) split).canUseLlapIo(job);
     }
     if (useLlapIo) return null;
 
@@ -170,9 +170,14 @@ public class LlapInputFormat implements InputFormat<NullWritable, VectorizedRowB
     RowSchema rowSchema = findTsOp(mapWork).getSchema();
     final List<String> colNames = new ArrayList<String>(rowSchema.getSignature().size());
     final List<TypeInfo> colTypes = new ArrayList<TypeInfo>(rowSchema.getSignature().size());
+    boolean hasRowId = false;
     for (ColumnInfo c : rowSchema.getSignature()) {
       String columnName = c.getInternalName();
-      if (VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(columnName)) continue;
+      if (VirtualColumn.ROWID.getName().equals(columnName)) {
+        hasRowId = true;
+      } else {
+        if (VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(columnName)) continue;
+      }
       colNames.add(columnName);
       colTypes.add(TypeInfoUtils.getTypeInfoFromTypeString(c.getTypeName()));
     }
@@ -190,10 +195,15 @@ public class LlapInputFormat implements InputFormat<NullWritable, VectorizedRowB
         }
       }
     }
-    // UNDONE: Virtual column support?
+    final VirtualColumn[] virtualColumns;
+    if (hasRowId) {
+      virtualColumns = new VirtualColumn[] {VirtualColumn.ROWID};
+    } else {
+      virtualColumns = new VirtualColumn[0];
+    }
     return new VectorizedRowBatchCtx(colNames.toArray(new String[colNames.size()]),
         colTypes.toArray(new TypeInfo[colTypes.size()]), null, partitionColumnCount,
-        new VirtualColumn[0], new String[0]);
+        virtualColumns, new String[0]);
   }
 
   static TableScanOperator findTsOp(MapWork mapWork) throws HiveException {

http://git-wip-us.apache.org/repos/asf/hive/blob/7decd421/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java
index 70bd05c..d66fac2 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java
@@ -41,6 +41,7 @@ import org.apache.hadoop.hive.ql.exec.tez.DagUtils;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx;
 import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
+import org.apache.hadoop.hive.ql.io.orc.VectorizedOrcAcidRowBatchReader;
 import org.apache.hadoop.hive.ql.io.orc.encoded.Consumer;
 import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
@@ -140,8 +141,20 @@ class LlapRecordReader
     isAcidScan = HiveConf.getBoolVar(jobConf, ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN);
     TypeDescription schema = OrcInputFormat.getDesiredRowTypeDescr(
         job, isAcidScan, Integer.MAX_VALUE);
-    this.columnIds = includedCols;
-    this.columnCount = columnIds.size();
+    if (isAcidScan) {
+      this.columnIds = new ArrayList<>();
+      final int ACID_FIELDS = OrcInputFormat.getRootColumn(false);
+      for (int i = 0; i < ACID_FIELDS; i++) {
+        columnIds.add(i);
+      }
+      for (int i = 0; i < includedCols.size(); i++) {
+        columnIds.add(i + ACID_FIELDS);
+      }
+      this.columnCount = columnIds.size();
+    } else {
+      this.columnIds = includedCols;
+      this.columnCount = columnIds.size();
+    }
 
     VectorizedRowBatchCtx ctx = mapWork.getVectorizedRowBatchCtx();
     rbCtx = ctx != null ? ctx : LlapInputFormat.createFakeVrbCtx(mapWork);
@@ -254,17 +267,63 @@ class LlapRecordReader
       counters.incrTimeCounter(LlapIOCounters.CONSUMER_TIME_NS, firstReturnTime);
       return false;
     }
-    if (columnCount != cvb.cols.length) {
-      throw new RuntimeException("Unexpected number of columns, VRB has " + columnCount
-          + " included, but the reader returned " + cvb.cols.length);
-    }
-    // VRB was created from VrbCtx, so we already have pre-allocated column vectors
-    for (int i = 0; i < cvb.cols.length; ++i) {
-      // Return old CVs (if any) to caller. We assume these things all have the same schema.
-      cvb.swapColumnVector(i, value.cols, columnIds.get(i));
+    final boolean isVectorized = HiveConf.getBoolVar(jobConf,
+        HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED);
+
+    if (isAcidScan) {
+      value.selectedInUse = true;
+      if (isVectorized) {
+        final VectorizedRowBatch acidVrb = new VectorizedRowBatch(cvb.cols.length);
+        acidVrb.cols = cvb.cols;
+        acidVrb.size = cvb.size;
+        final VectorizedOrcAcidRowBatchReader acidReader =
+            new VectorizedOrcAcidRowBatchReader(split, jobConf, Reporter.NULL,
+                new RecordReader<NullWritable, VectorizedRowBatch>() {
+                  @Override
+                  public boolean next(NullWritable key, VectorizedRowBatch value) throws IOException {
+                    return true;
+                  }
+
+                  @Override
+                  public NullWritable createKey() {
+                    return NullWritable.get();
+                  }
+
+                  @Override
+                  public VectorizedRowBatch createValue() {
+                    return acidVrb;
+                  }
+
+                  @Override
+                  public long getPos() throws IOException {
+                    return 0;
+                  }
+
+                  @Override
+                  public void close() throws IOException {
+                  }
+
+                  @Override
+                  public float getProgress() throws IOException {
+                    return 0;
+                  }
+                }, rbCtx);
+        acidReader.next(NullWritable.get(), value);
+      }
+    } else {
+      if (columnCount != cvb.cols.length) {
+        throw new RuntimeException("Unexpected number of columns, VRB has " + columnCount
+            + " included, but the reader returned " + cvb.cols.length);
+      }
+      // VRB was created from VrbCtx, so we already have pre-allocated column vectors
+      for (int i = 0; i < cvb.cols.length; ++i) {
+        // Return old CVs (if any) to caller. We assume these things all have the same schema.
+        cvb.swapColumnVector(i, value.cols, columnIds.get(i));
+      }
+      value.selectedInUse = false;
+      value.size = cvb.size;
     }
-    value.selectedInUse = false;
-    value.size = cvb.size;
+
     if (wasFirst) {
       firstReturnTime = counters.startTimeCounter();
     }

http://git-wip-us.apache.org/repos/asf/hive/blob/7decd421/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java
index a77ccc1..2930497 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java
@@ -154,7 +154,8 @@ public class OrcEncodedDataConsumer
           if (cvb.cols[idx] == null) {
             // Orc store rows inside a root struct (hive writes it this way).
             // When we populate column vectors we skip over the root struct.
-            cvb.cols[idx] = createColumn(schema.getChildren().get(columnMapping[idx]), batchSize);
+            cvb.cols[idx] = createColumn(schema.getChildren().get(columnMapping[idx]),
+                VectorizedRowBatch.DEFAULT_SIZE);
           }
           trace.logTreeReaderNextVector(idx);
           ColumnVector cv = cvb.cols[idx];

http://git-wip-us.apache.org/repos/asf/hive/blob/7decd421/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
index 2e47a56..c32f79f 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
@@ -181,7 +181,7 @@ public class OrcEncodedDataReader extends CallableWithNdc<Void>
   @SuppressWarnings("unused")
   private volatile boolean isPaused = false;
 
-  boolean[] globalIncludes = null, sargColumns = null;
+  boolean[] readerIncludes = null, sargColumns = null, fileIncludes = null;
   private final IoTrace trace;
   private Pool<IoTrace> tracePool;
 
@@ -222,10 +222,16 @@ public class OrcEncodedDataReader extends CallableWithNdc<Void>
         HiveConf.getBoolVar(daemonConf, ConfVars.LLAP_CACHE_ALLOW_SYNTHETIC_FILEID),
         HiveConf.getBoolVar(daemonConf, ConfVars.LLAP_CACHE_DEFAULT_FS_FILE_ID));
     fileMetadata = getFileFooterFromCacheOrDisk();
+    final TypeDescription fileSchema = fileMetadata.getSchema();
     if (readerSchema == null) {
       readerSchema = fileMetadata.getSchema();
     }
-    globalIncludes = OrcInputFormat.genIncludedColumns(readerSchema, includedColumnIds);
+    readerIncludes = OrcInputFormat.genIncludedColumns(readerSchema, includedColumnIds);
+    if (HiveConf.getBoolVar(jobConf, ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN)) {
+      fileIncludes = OrcInputFormat.shiftReaderIncludedForAcid(readerIncludes);
+    } else {
+      fileIncludes = OrcInputFormat.genIncludedColumns(fileSchema, includedColumnIds);
+    }
     // Do not allow users to override zero-copy setting. The rest can be taken from user config.
     boolean useZeroCopy = OrcConf.USE_ZEROCOPY.getBoolean(daemonConf);
     if (useZeroCopy != OrcConf.USE_ZEROCOPY.getBoolean(jobConf)) {
@@ -233,10 +239,10 @@ public class OrcEncodedDataReader extends CallableWithNdc<Void>
       jobConf.setBoolean(OrcConf.USE_ZEROCOPY.getAttribute(), useZeroCopy);
     }
     this.jobConf = jobConf;
-    Reader.Options options = new Reader.Options(jobConf).include(globalIncludes);
+    Reader.Options options = new Reader.Options(jobConf).include(readerIncludes);
     evolution = new SchemaEvolution(fileMetadata.getSchema(), readerSchema, options);
     consumer.setFileMetadata(fileMetadata);
-    consumer.setIncludedColumns(globalIncludes);
+    consumer.setIncludedColumns(readerIncludes);
     consumer.setSchemaEvolution(evolution);
   }
 
@@ -309,7 +315,7 @@ public class OrcEncodedDataReader extends CallableWithNdc<Void>
         int[] filterColumns = RecordReaderImpl.mapSargColumnsToOrcInternalColIdx(
           sarg.getLeaves(), evolution);
         // included will not be null, row options will fill the array with trues if null
-        sargColumns = new boolean[globalIncludes.length];
+        sargColumns = new boolean[evolution.getFileSchema().getMaximumId() + 1];
         for (int i : filterColumns) {
           // filter columns may have -1 as index which could be partition column in SARG.
           if (i > 0) {
@@ -318,11 +324,11 @@ public class OrcEncodedDataReader extends CallableWithNdc<Void>
         }
 
         // If SARG is present, get relevant stripe metadata from cache or readers.
-        stripeMetadatas = readStripesMetadata(globalIncludes, sargColumns);
+        stripeMetadatas = readStripesMetadata(fileIncludes, sargColumns);
       }
 
       // Now, apply SARG if any; w/o sarg, this will just initialize stripeRgs.
-      boolean hasData = determineRgsToRead(globalIncludes, stride, stripeMetadatas);
+      boolean hasData = determineRgsToRead(fileIncludes, stride, stripeMetadatas);
       if (!hasData) {
         consumer.setDone();
         recordReaderTime(startTime);
@@ -382,10 +388,10 @@ public class OrcEncodedDataReader extends CallableWithNdc<Void>
           stripeKey.stripeIx = stripeIx;
           OrcProto.StripeFooter footer = getStripeFooterFromCacheOrDisk(si, stripeKey);
           stripeMetadata = createOrcStripeMetadataObject(
-              stripeIx, si, footer, globalIncludes, sargColumns);
+              stripeIx, si, footer, fileIncludes, sargColumns);
           ensureDataReader();
           stripeReader.readIndexStreams(stripeMetadata.getIndex(),
-              si, footer.getStreamsList(), globalIncludes, sargColumns);
+              si, footer.getStreamsList(), fileIncludes, sargColumns);
           consumer.setStripeMetadata(stripeMetadata);
         }
       } catch (Throwable t) {
@@ -405,7 +411,7 @@ public class OrcEncodedDataReader extends CallableWithNdc<Void>
         // Also, currently readEncodedColumns is not stoppable. The consumer will discard the
         // data it receives for one stripe. We could probably interrupt it, if it checked that.
         stripeReader.readEncodedColumns(stripeIx, si, stripeMetadata.getRowIndexes(),
-            stripeMetadata.getEncodings(), stripeMetadata.getStreams(), globalIncludes,
+            stripeMetadata.getEncodings(), stripeMetadata.getStreams(), fileIncludes,
             rgs, consumer);
       } catch (Throwable t) {
         handleReaderError(startTime, t);

http://git-wip-us.apache.org/repos/asf/hive/blob/7decd421/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java
index f4499d7..a5bdbef 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java
@@ -76,6 +76,7 @@ public class VectorExpressionDescriptor {
     INTERVAL_YEAR_MONTH     (0x100),
     INTERVAL_DAY_TIME       (0x200),
     BINARY                  (0x400),
+    STRUCT                  (0x800),
     DATETIME_FAMILY         (DATE.value | TIMESTAMP.value),
     INTERVAL_FAMILY         (INTERVAL_YEAR_MONTH.value | INTERVAL_DAY_TIME.value),
     INT_INTERVAL_YEAR_MONTH     (INT_FAMILY.value | INTERVAL_YEAR_MONTH.value),
@@ -122,6 +123,8 @@ public class VectorExpressionDescriptor {
         return INTERVAL_YEAR_MONTH;
       } else if (lower.equals(serdeConstants.INTERVAL_DAY_TIME_TYPE_NAME)) {
         return INTERVAL_DAY_TIME;
+      } else if (VectorizationContext.structTypePattern.matcher(lower).matches()) {
+        return STRUCT;
       } else if (lower.equals("void")) {
         // The old code let void through...
         return INT_FAMILY;

http://git-wip-us.apache.org/repos/asf/hive/blob/7decd421/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java
index 23fdaa5..fba17a8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java
@@ -188,7 +188,7 @@ public class VectorExtractRow {
         colVector, typeInfos[logicalColumnIndex], objectInspectors[logicalColumnIndex], batchIndex);
   }
 
-  Object extractRowColumn(
+  public Object extractRowColumn(
       ColumnVector colVector, TypeInfo typeInfo, ObjectInspector objectInspector, int batchIndex) {
 
     if (colVector == null) {

http://git-wip-us.apache.org/repos/asf/hive/blob/7decd421/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java
index e8c73a9..26ab360 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java
@@ -818,12 +818,11 @@ public class VectorMapOperator extends AbstractMapOperator {
             VectorizedRowBatch batch = (VectorizedRowBatch) value;
             numRows += batch.size;
             if (hasRowIdentifier) {
-
-              // UNDONE: Pass ROW__ID STRUCT column through IO Context to get filled in by ACID reader
-              // UNDONE: Or, perhaps tell it to do it before calling us, ...
-              // UNDONE: For now, set column to NULL.
-
-              setRowIdentiferToNull(batch);
+              if (batchContext.getRecordIdColumnVector() == null) {
+                setRowIdentiferToNull(batch);
+              } else {
+                batch.cols[rowIdentifierColumnNum] = batchContext.getRecordIdColumnVector();
+              }
             }
           }
           oneRootOperator.process(value, 0);

http://git-wip-us.apache.org/repos/asf/hive/blob/7decd421/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index 13d78e2..3fd2141 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -347,6 +347,9 @@ public class VectorizationContext {
   public static final Pattern charVarcharTypePattern = Pattern.compile("char.*|varchar.*",
       Pattern.CASE_INSENSITIVE);
 
+  public static final Pattern structTypePattern = Pattern.compile("struct.*",
+      Pattern.CASE_INSENSITIVE);
+
   //Map column number to type
   private OutputColumnManager ocm;
 
@@ -2714,6 +2717,8 @@ public class VectorizationContext {
     case INTERVAL_YEAR_MONTH:
     case INTERVAL_DAY_TIME:
       return hiveTypeName;
+    case STRUCT:
+      return "Struct";
     default:
       throw new HiveException("Unexpected hive type name " + hiveTypeName);
     }

http://git-wip-us.apache.org/repos/asf/hive/blob/7decd421/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
index b5733ec..9c35488 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
@@ -81,6 +81,13 @@ public class VectorizedRowBatchCtx {
   private int partitionColumnCount;
   private int virtualColumnCount;
   private VirtualColumn[] neededVirtualColumns;
+  /**
+   * A record ID column is a virtual column, so it should be separated from normal data column
+   * processes. A recordIdColumnVector contains RecordIdentifier information in a
+   * StructColumnVector. It has three LongColumnVectors as its fields; original transaction IDs,
+   * bucket IDs, and row IDs.
+   */
+  private StructColumnVector recordIdColumnVector;
 
   private String[] scratchColumnTypeNames;
 
@@ -136,6 +143,14 @@ public class VectorizedRowBatchCtx {
     return scratchColumnTypeNames;
   }
 
+  public StructColumnVector getRecordIdColumnVector() {
+    return this.recordIdColumnVector;
+  }
+
+  public void setRecordIdColumnVector(StructColumnVector recordIdColumnVector) {
+    this.recordIdColumnVector = recordIdColumnVector;
+  }
+
   /**
    * Initializes the VectorizedRowBatch context based on an scratch column type names and
    * object inspector.
@@ -274,6 +289,11 @@ public class VectorizedRowBatchCtx {
    */
   public void addPartitionColsToBatch(VectorizedRowBatch batch, Object[] partitionValues)
   {
+    addPartitionColsToBatch(batch.cols, partitionValues);
+  }
+
+  public void addPartitionColsToBatch(ColumnVector[] cols, Object[] partitionValues)
+  {
     if (partitionValues != null) {
       for (int i = 0; i < partitionColumnCount; i++) {
         Object value = partitionValues[i];
@@ -283,7 +303,7 @@ public class VectorizedRowBatchCtx {
         PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) rowColumnTypeInfos[colIndex];
         switch (primitiveTypeInfo.getPrimitiveCategory()) {
         case BOOLEAN: {
-          LongColumnVector lcv = (LongColumnVector) batch.cols[colIndex];
+          LongColumnVector lcv = (LongColumnVector) cols[colIndex];
           if (value == null) {
             lcv.noNulls = false;
             lcv.isNull[0] = true;
@@ -296,7 +316,7 @@ public class VectorizedRowBatchCtx {
         break;
 
         case BYTE: {
-          LongColumnVector lcv = (LongColumnVector) batch.cols[colIndex];
+          LongColumnVector lcv = (LongColumnVector) cols[colIndex];
           if (value == null) {
             lcv.noNulls = false;
             lcv.isNull[0] = true;
@@ -309,7 +329,7 @@ public class VectorizedRowBatchCtx {
         break;
 
         case SHORT: {
-          LongColumnVector lcv = (LongColumnVector) batch.cols[colIndex];
+          LongColumnVector lcv = (LongColumnVector) cols[colIndex];
           if (value == null) {
             lcv.noNulls = false;
             lcv.isNull[0] = true;
@@ -322,7 +342,7 @@ public class VectorizedRowBatchCtx {
         break;
 
         case INT: {
-          LongColumnVector lcv = (LongColumnVector) batch.cols[colIndex];
+          LongColumnVector lcv = (LongColumnVector) cols[colIndex];
           if (value == null) {
             lcv.noNulls = false;
             lcv.isNull[0] = true;
@@ -335,7 +355,7 @@ public class VectorizedRowBatchCtx {
         break;
 
         case LONG: {
-          LongColumnVector lcv = (LongColumnVector) batch.cols[colIndex];
+          LongColumnVector lcv = (LongColumnVector) cols[colIndex];
           if (value == null) {
             lcv.noNulls = false;
             lcv.isNull[0] = true;
@@ -348,7 +368,7 @@ public class VectorizedRowBatchCtx {
         break;
 
         case DATE: {
-          LongColumnVector lcv = (LongColumnVector) batch.cols[colIndex];
+          LongColumnVector lcv = (LongColumnVector) cols[colIndex];
           if (value == null) {
             lcv.noNulls = false;
             lcv.isNull[0] = true;
@@ -361,7 +381,7 @@ public class VectorizedRowBatchCtx {
         break;
 
         case TIMESTAMP: {
-          TimestampColumnVector lcv = (TimestampColumnVector) batch.cols[colIndex];
+          TimestampColumnVector lcv = (TimestampColumnVector) cols[colIndex];
           if (value == null) {
             lcv.noNulls = false;
             lcv.isNull[0] = true;
@@ -374,7 +394,7 @@ public class VectorizedRowBatchCtx {
         break;
 
         case INTERVAL_YEAR_MONTH: {
-          LongColumnVector lcv = (LongColumnVector) batch.cols[colIndex];
+          LongColumnVector lcv = (LongColumnVector) cols[colIndex];
           if (value == null) {
             lcv.noNulls = false;
             lcv.isNull[0] = true;
@@ -386,7 +406,7 @@ public class VectorizedRowBatchCtx {
         }
 
         case INTERVAL_DAY_TIME: {
-          IntervalDayTimeColumnVector icv = (IntervalDayTimeColumnVector) batch.cols[colIndex];
+          IntervalDayTimeColumnVector icv = (IntervalDayTimeColumnVector) cols[colIndex];
           if (value == null) {
             icv.noNulls = false;
             icv.isNull[0] = true;
@@ -398,7 +418,7 @@ public class VectorizedRowBatchCtx {
         }
 
         case FLOAT: {
-          DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[colIndex];
+          DoubleColumnVector dcv = (DoubleColumnVector) cols[colIndex];
           if (value == null) {
             dcv.noNulls = false;
             dcv.isNull[0] = true;
@@ -411,7 +431,7 @@ public class VectorizedRowBatchCtx {
         break;
 
         case DOUBLE: {
-          DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[colIndex];
+          DoubleColumnVector dcv = (DoubleColumnVector) cols[colIndex];
           if (value == null) {
             dcv.noNulls = false;
             dcv.isNull[0] = true;
@@ -424,7 +444,7 @@ public class VectorizedRowBatchCtx {
         break;
 
         case DECIMAL: {
-          DecimalColumnVector dv = (DecimalColumnVector) batch.cols[colIndex];
+          DecimalColumnVector dv = (DecimalColumnVector) cols[colIndex];
           if (value == null) {
             dv.noNulls = false;
             dv.isNull[0] = true;
@@ -439,7 +459,7 @@ public class VectorizedRowBatchCtx {
         break;
 
         case BINARY: {
-            BytesColumnVector bcv = (BytesColumnVector) batch.cols[colIndex];
+            BytesColumnVector bcv = (BytesColumnVector) cols[colIndex];
             byte[] bytes = (byte[]) value;
             if (bytes == null) {
               bcv.noNulls = false;
@@ -455,7 +475,7 @@ public class VectorizedRowBatchCtx {
         case STRING:
         case CHAR:
         case VARCHAR: {
-          BytesColumnVector bcv = (BytesColumnVector) batch.cols[colIndex];
+          BytesColumnVector bcv = (BytesColumnVector) cols[colIndex];
           String sVal = value.toString();
           if (sVal == null) {
             bcv.noNulls = false;

http://git-wip-us.apache.org/repos/asf/hive/blob/7decd421/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java
index 1fb70f8..d8df5cc 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java
@@ -61,6 +61,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableTimestamp
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableShortObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableStringObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.VoidObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
 import org.apache.hadoop.io.Text;
 import org.apache.hive.common.util.DateUtils;
@@ -1462,12 +1464,18 @@ public final class VectorExpressionWriterFactory {
   private static VectorExpressionWriter genVectorExpressionWritableStruct(
       SettableStructObjectInspector fieldObjInspector) throws HiveException {
 
-    return new VectorExpressionWriterMap() {
+    return new VectorExpressionWriterStruct() {
       private Object obj;
+      private VectorExtractRow vectorExtractRow;
+      private StructTypeInfo structTypeInfo;
 
-      public VectorExpressionWriter init(SettableStructObjectInspector objInspector) throws HiveException {
+      public VectorExpressionWriter init(SettableStructObjectInspector objInspector)
+          throws HiveException {
         super.init(objInspector);
         obj = initValue(null);
+        vectorExtractRow = new VectorExtractRow();
+        structTypeInfo = (StructTypeInfo)
+            TypeInfoUtils.getTypeInfoFromTypeString(objInspector.getTypeName());
         return this;
       }
 
@@ -1477,15 +1485,43 @@ public final class VectorExpressionWriterFactory {
       }
 
       @Override
-      public Object writeValue(ColumnVector column, int row)
-          throws HiveException {
-        throw new HiveException("Not implemented yet");
+      public Object writeValue(ColumnVector column, int row) throws HiveException {
+        final StructColumnVector structColVector = (StructColumnVector) column;
+        final SettableStructObjectInspector structOI =
+            (SettableStructObjectInspector) this.objectInspector;
+        final List<? extends StructField> fields = structOI.getAllStructFieldRefs();
+        final List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
+
+        final int fieldSize = fields.size();
+        for (int i = 0; i < fieldSize; i++) {
+          final StructField structField = fields.get(i);
+          final Object value = vectorExtractRow.extractRowColumn(structColVector.fields[i],
+              fieldTypeInfos.get(i), structField.getFieldObjectInspector(), row);
+          structOI.setStructFieldData(obj, structField, value);
+        }
+        return this.obj;
       }
 
       @Override
-      public Object setValue(Object row, ColumnVector column, int columnRow)
-          throws HiveException {
-        throw new HiveException("Not implemented yet");
+      public Object setValue(Object field, ColumnVector column, int row) throws HiveException {
+        if (null == field) {
+          field = initValue(null);
+        }
+
+        final StructColumnVector structColVector = (StructColumnVector) column;
+        final SettableStructObjectInspector structOI =
+            (SettableStructObjectInspector) this.objectInspector;
+        final List<? extends StructField> fields = structOI.getAllStructFieldRefs();
+        final List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
+
+        final int fieldSize = fields.size();
+        for (int i = 0; i < fieldSize; i++) {
+          final StructField structField = fields.get(i);
+          final Object value = vectorExtractRow.extractRowColumn(structColVector.fields[i],
+              fieldTypeInfos.get(i), structField.getFieldObjectInspector(), row);
+          structOI.setStructFieldData(obj, structField, value);
+        }
+        return field;
       }
     }.init(fieldObjInspector);
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/7decd421/ql/src/java/org/apache/hadoop/hive/ql/io/LlapAwareSplit.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/LlapAwareSplit.java b/ql/src/java/org/apache/hadoop/hive/ql/io/LlapAwareSplit.java
index ead4678..20e1345 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/LlapAwareSplit.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/LlapAwareSplit.java
@@ -17,11 +17,13 @@
  */
 package org.apache.hadoop.hive.ql.io;
 
+import org.apache.hadoop.conf.Configuration;
+
 /**
  * Split that is aware that it could be executed in LLAP. Allows LlapInputFormat to do
  * a last-minute check to see of LLAP IO pipeline should be used for this particular split.
  * By default, there is no such check - whatever is sent in is attempted with LLAP IO.
  */
 public interface LlapAwareSplit {
-  boolean canUseLlapIo();
+  boolean canUseLlapIo(Configuration conf);
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/7decd421/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index bc42135..c364343 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -1607,20 +1607,19 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
       }
       return ReaderImpl.getRawDataSizeFromColIndices(internalColIds, fileTypes, stats);
     }
+  }
 
-    private boolean[] shiftReaderIncludedForAcid(boolean[] included) {
-      // We always need the base row
-      included[0] = true;
-      boolean[] newIncluded = new boolean[included.length + OrcRecordUpdater.FIELDS];
-      Arrays.fill(newIncluded, 0, OrcRecordUpdater.FIELDS, true);
-      for(int i= 0; i < included.length; ++i) {
-        newIncluded[i + OrcRecordUpdater.FIELDS] = included[i];
-      }
-      return newIncluded;
+  public static boolean[] shiftReaderIncludedForAcid(boolean[] included) {
+    // We always need the base row
+    included[0] = true;
+    boolean[] newIncluded = new boolean[included.length + OrcRecordUpdater.FIELDS];
+    Arrays.fill(newIncluded, 0, OrcRecordUpdater.FIELDS, true);
+    for (int i = 0; i < included.length; ++i) {
+      newIncluded[i + OrcRecordUpdater.FIELDS] = included[i];
     }
+    return newIncluded;
   }
 
-
   /** Class intended to update two values from methods... Java-related cruft. */
   @VisibleForTesting
   static final class CombinedCtx {

http://git-wip-us.apache.org/repos/asf/hive/blob/7decd421/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java
index 37aaeb6..260a5ac 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java
@@ -26,8 +26,12 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
 import org.apache.hadoop.hive.ql.io.AcidInputFormat;
+import org.apache.hadoop.hive.ql.io.AcidUtils;
 import org.apache.hadoop.hive.ql.io.ColumnarSplit;
 import org.apache.hadoop.hive.ql.io.LlapAwareSplit;
 import org.apache.hadoop.hive.ql.io.SyntheticFileId;
@@ -225,8 +229,35 @@ public class OrcSplit extends FileSplit implements ColumnarSplit, LlapAwareSplit
   }
 
   @Override
-  public boolean canUseLlapIo() {
-    return isOriginal && (deltas == null || deltas.isEmpty());
+  public boolean canUseLlapIo(Configuration conf) {
+    final boolean hasDelta = deltas != null && !deltas.isEmpty();
+    final boolean isAcidRead = HiveConf.getBoolVar(conf,
+        HiveConf.ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN);
+    final boolean isVectorized = HiveConf.getBoolVar(conf,
+        HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED);
+    final AcidUtils.AcidOperationalProperties acidOperationalProperties
+        = AcidUtils.getAcidOperationalProperties(conf);
+    final boolean isSplitUpdate = acidOperationalProperties.isSplitUpdate();
+
+    if (isOriginal) {
+      if (!isAcidRead && !hasDelta) {
+        // Original scan only
+        return true;
+      }
+    } else {
+      if (isAcidRead && hasBase && isVectorized) {
+        if (hasDelta) {
+          if (isSplitUpdate) {
+            // Base with delete deltas
+            return true;
+          }
+        } else {
+          // Base scan only
+          return true;
+        }
+      }
+    }
+    return false;
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/hive/blob/7decd421/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java
index 138e56e..1e16f09 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java
@@ -32,6 +32,7 @@ import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
 import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
 import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
@@ -62,20 +63,72 @@ public class VectorizedOrcAcidRowBatchReader
 
   private static final Logger LOG = LoggerFactory.getLogger(VectorizedOrcAcidRowBatchReader.class);
 
-  private org.apache.hadoop.hive.ql.io.orc.RecordReader baseReader;
-  private VectorizedRowBatchCtx rbCtx;
-  private VectorizedRowBatch vectorizedRowBatchBase;
+  public org.apache.hadoop.mapred.RecordReader<NullWritable, VectorizedRowBatch> baseReader;
+  protected VectorizedRowBatchCtx rbCtx;
+  protected VectorizedRowBatch vectorizedRowBatchBase;
   private long offset;
   private long length;
-  private float progress = 0.0f;
-  private Object[] partitionValues;
-  private boolean addPartitionCols = true;
+  protected float progress = 0.0f;
+  protected Object[] partitionValues;
+  protected boolean addPartitionCols = true;
   private ValidTxnList validTxnList;
-  private DeleteEventRegistry deleteEventRegistry;
+  protected DeleteEventRegistry deleteEventRegistry;
+  protected StructColumnVector recordIdColumnVector;
+  private org.apache.orc.Reader.Options readerOptions;
 
   public VectorizedOrcAcidRowBatchReader(InputSplit inputSplit, JobConf conf,
         Reporter reporter) throws IOException {
+    this.init(inputSplit, conf, reporter, Utilities.getVectorizedRowBatchCtx(conf));
 
+    final Reader reader = OrcInputFormat.createOrcReaderForSplit(conf, (OrcSplit) inputSplit);
+    // Careful with the range here now, we do not want to read the whole base file like deltas.
+    final RecordReader innerReader = reader.rowsOptions(readerOptions.range(offset, length));
+    baseReader = new org.apache.hadoop.mapred.RecordReader<NullWritable, VectorizedRowBatch>() {
+
+      @Override
+      public boolean next(NullWritable key, VectorizedRowBatch value) throws IOException {
+        return innerReader.nextBatch(value);
+      }
+
+      @Override
+      public NullWritable createKey() {
+        return NullWritable.get();
+      }
+
+      @Override
+      public VectorizedRowBatch createValue() {
+        return rbCtx.createVectorizedRowBatch();
+      }
+
+      @Override
+      public long getPos() throws IOException {
+        return 0;
+      }
+
+      @Override
+      public void close() throws IOException {
+        innerReader.close();
+      }
+
+      @Override
+      public float getProgress() throws IOException {
+        return innerReader.getProgress();
+      }
+    };
+    this.vectorizedRowBatchBase = ((RecordReaderImpl) innerReader).createRowBatch();
+  }
+
+  public VectorizedOrcAcidRowBatchReader(InputSplit inputSplit, JobConf conf, Reporter reporter,
+      org.apache.hadoop.mapred.RecordReader<NullWritable, VectorizedRowBatch> baseReader,
+      VectorizedRowBatchCtx rbCtx) throws IOException {
+    this.init(inputSplit, conf, reporter, rbCtx);
+    this.baseReader = baseReader;
+    this.vectorizedRowBatchBase = baseReader.createValue();
+  }
+
+  private void init(InputSplit inputSplit, JobConf conf, Reporter reporter,
+      VectorizedRowBatchCtx rowBatchCtx) throws IOException {
+    this.rbCtx = rowBatchCtx;
     final boolean isAcidRead = HiveConf.getBoolVar(conf, ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN);
     final AcidUtils.AcidOperationalProperties acidOperationalProperties
             = AcidUtils.getAcidOperationalProperties(conf);
@@ -89,28 +142,13 @@ public class VectorizedOrcAcidRowBatchReader
     }
     final OrcSplit orcSplit = (OrcSplit) inputSplit;
 
-    rbCtx = Utilities.getVectorizedRowBatchCtx(conf);
-
     reporter.setStatus(orcSplit.toString());
-    Reader reader = OrcInputFormat.createOrcReaderForSplit(conf, orcSplit);
-    Reader.Options readerOptions = OrcInputFormat.createOptionsForReader(conf);
+    readerOptions = OrcInputFormat.createOptionsForReader(conf);
     readerOptions = OrcRawRecordMerger.createEventOptions(readerOptions);
 
     this.offset = orcSplit.getStart();
     this.length = orcSplit.getLength();
 
-    // Careful with the range here now, we do not want to read the whole base file like deltas.
-    this.baseReader = reader.rowsOptions(readerOptions.range(offset, length));
-
-    // VectorizedRowBatchBase schema is picked up from the baseReader because the SchemaEvolution
-    // stuff happens at the ORC layer that understands how to map user schema to acid schema.
-    if (this.baseReader instanceof RecordReaderImpl) {
-      this.vectorizedRowBatchBase = ((RecordReaderImpl) this.baseReader).createRowBatch();
-    } else {
-      throw new IOException("Failed to create vectorized row batch for the reader of type "
-          + this.baseReader.getClass().getName());
-    }
-
     int partitionColumnCount = (rbCtx != null) ? rbCtx.getPartitionColumnCount() : 0;
     if (partitionColumnCount > 0) {
       partitionValues = new Object[partitionColumnCount];
@@ -137,6 +175,8 @@ public class VectorizedOrcAcidRowBatchReader
       // delete event on-demand. Caps the memory consumption to (some_const * no. of readers).
       this.deleteEventRegistry = new SortMergedDeleteEventRegistry(conf, orcSplit, deleteEventReaderOptions);
     }
+
+    recordIdColumnVector = new StructColumnVector(VectorizedRowBatch.DEFAULT_SIZE, null, null, null);
   }
 
   /**
@@ -190,7 +230,7 @@ public class VectorizedOrcAcidRowBatchReader
         }
         addPartitionCols = false;
       }
-      if (!baseReader.nextBatch(vectorizedRowBatchBase)) {
+      if (!baseReader.next(null, vectorizedRowBatchBase)) {
         return false;
       }
     } catch (Exception e) {
@@ -222,7 +262,8 @@ public class VectorizedOrcAcidRowBatchReader
     findRecordsWithInvalidTransactionIds(vectorizedRowBatchBase, selectedBitSet);
 
     // Case 2- find rows which have been deleted.
-    this.deleteEventRegistry.findDeletedRecords(vectorizedRowBatchBase, selectedBitSet);
+    this.deleteEventRegistry.findDeletedRecords(vectorizedRowBatchBase.cols,
+        vectorizedRowBatchBase.size, selectedBitSet);
 
     if (selectedBitSet.cardinality() == vectorizedRowBatchBase.size) {
       // None of the cases above matched and everything is selected. Hence, we will use the
@@ -251,23 +292,33 @@ public class VectorizedOrcAcidRowBatchReader
     StructColumnVector payloadStruct = (StructColumnVector) vectorizedRowBatchBase.cols[OrcRecordUpdater.ROW];
     // Transfer columnVector objects from base batch to outgoing batch.
     System.arraycopy(payloadStruct.fields, 0, value.cols, 0, value.getDataColumnCount());
+    if (rbCtx != null) {
+      recordIdColumnVector.fields[0] = vectorizedRowBatchBase.cols[OrcRecordUpdater.ORIGINAL_TRANSACTION];
+      recordIdColumnVector.fields[1] = vectorizedRowBatchBase.cols[OrcRecordUpdater.BUCKET];
+      recordIdColumnVector.fields[2] = vectorizedRowBatchBase.cols[OrcRecordUpdater.ROW_ID];
+      rbCtx.setRecordIdColumnVector(recordIdColumnVector);
+    }
     progress = baseReader.getProgress();
     return true;
   }
 
-  private void findRecordsWithInvalidTransactionIds(VectorizedRowBatch batch, BitSet selectedBitSet) {
-    if (batch.cols[OrcRecordUpdater.CURRENT_TRANSACTION].isRepeating) {
+  protected void findRecordsWithInvalidTransactionIds(VectorizedRowBatch batch, BitSet selectedBitSet) {
+    findRecordsWithInvalidTransactionIds(batch.cols, batch.size, selectedBitSet);
+  }
+
+  protected void findRecordsWithInvalidTransactionIds(ColumnVector[] cols, int size, BitSet selectedBitSet) {
+    if (cols[OrcRecordUpdater.CURRENT_TRANSACTION].isRepeating) {
       // When we have repeating values, we can unset the whole bitset at once
       // if the repeating value is not a valid transaction.
       long currentTransactionIdForBatch = ((LongColumnVector)
-          batch.cols[OrcRecordUpdater.CURRENT_TRANSACTION]).vector[0];
+          cols[OrcRecordUpdater.CURRENT_TRANSACTION]).vector[0];
       if (!validTxnList.isTxnValid(currentTransactionIdForBatch)) {
-        selectedBitSet.clear(0, batch.size);
+        selectedBitSet.clear(0, size);
       }
       return;
     }
     long[] currentTransactionVector =
-        ((LongColumnVector) batch.cols[OrcRecordUpdater.CURRENT_TRANSACTION]).vector;
+        ((LongColumnVector) cols[OrcRecordUpdater.CURRENT_TRANSACTION]).vector;
     // Loop through the bits that are set to true and mark those rows as false, if their
     // current transactions are not valid.
     for (int setBitIndex = selectedBitSet.nextSetBit(0);
@@ -319,15 +370,16 @@ public class VectorizedOrcAcidRowBatchReader
    * will read the delete delta files and will create their own internal
    * data structures to maintain record ids of the records that got deleted.
    */
-  static interface DeleteEventRegistry {
+  protected static interface DeleteEventRegistry {
     /**
      * Modifies the passed bitset to indicate which of the rows in the batch
      * have been deleted. Assumes that the batch.size is equal to bitset size.
-     * @param batch
+     * @param cols
+     * @param size
      * @param selectedBitSet
      * @throws IOException
      */
-    public void findDeletedRecords(VectorizedRowBatch batch, BitSet selectedBitSet) throws IOException;
+    public void findDeletedRecords(ColumnVector[] cols, int size, BitSet selectedBitSet) throws IOException;
 
     /**
      * The close() method can be called externally to signal the implementing classes
@@ -376,29 +428,29 @@ public class VectorizedOrcAcidRowBatchReader
     }
 
     @Override
-    public void findDeletedRecords(VectorizedRowBatch batch, BitSet selectedBitSet)
+    public void findDeletedRecords(ColumnVector[] cols, int size, BitSet selectedBitSet)
         throws IOException {
       if (!isDeleteRecordAvailable) {
         return;
       }
 
       long[] originalTransaction =
-          batch.cols[OrcRecordUpdater.ORIGINAL_TRANSACTION].isRepeating ? null
-              : ((LongColumnVector) batch.cols[OrcRecordUpdater.ORIGINAL_TRANSACTION]).vector;
+          cols[OrcRecordUpdater.ORIGINAL_TRANSACTION].isRepeating ? null
+              : ((LongColumnVector) cols[OrcRecordUpdater.ORIGINAL_TRANSACTION]).vector;
       long[] bucket =
-          batch.cols[OrcRecordUpdater.BUCKET].isRepeating ? null
-              : ((LongColumnVector) batch.cols[OrcRecordUpdater.BUCKET]).vector;
+          cols[OrcRecordUpdater.BUCKET].isRepeating ? null
+              : ((LongColumnVector) cols[OrcRecordUpdater.BUCKET]).vector;
       long[] rowId =
-          batch.cols[OrcRecordUpdater.ROW_ID].isRepeating ? null
-              : ((LongColumnVector) batch.cols[OrcRecordUpdater.ROW_ID]).vector;
+          cols[OrcRecordUpdater.ROW_ID].isRepeating ? null
+              : ((LongColumnVector) cols[OrcRecordUpdater.ROW_ID]).vector;
 
       // The following repeatedX values will be set, if any of the columns are repeating.
       long repeatedOriginalTransaction = (originalTransaction != null) ? -1
-          : ((LongColumnVector) batch.cols[OrcRecordUpdater.ORIGINAL_TRANSACTION]).vector[0];
+          : ((LongColumnVector) cols[OrcRecordUpdater.ORIGINAL_TRANSACTION]).vector[0];
       long repeatedBucket = (bucket != null) ? -1
-          : ((LongColumnVector) batch.cols[OrcRecordUpdater.BUCKET]).vector[0];
+          : ((LongColumnVector) cols[OrcRecordUpdater.BUCKET]).vector[0];
       long repeatedRowId = (rowId != null) ? -1
-          : ((LongColumnVector) batch.cols[OrcRecordUpdater.ROW_ID]).vector[0];
+          : ((LongColumnVector) cols[OrcRecordUpdater.ROW_ID]).vector[0];
 
 
       // Get the first valid row in the batch still available.
@@ -413,7 +465,7 @@ public class VectorizedOrcAcidRowBatchReader
               rowId != null ? (int)  rowId[firstValidIndex] : repeatedRowId);
 
       // Get the last valid row in the batch still available.
-      int lastValidIndex = selectedBitSet.previousSetBit(batch.size - 1);
+      int lastValidIndex = selectedBitSet.previousSetBit(size - 1);
       RecordIdentifier lastRecordIdInBatch =
           new RecordIdentifier(
               originalTransaction != null ? originalTransaction[lastValidIndex] : repeatedOriginalTransaction,
@@ -860,7 +912,7 @@ public class VectorizedOrcAcidRowBatchReader
     }
 
     @Override
-    public void findDeletedRecords(VectorizedRowBatch batch, BitSet selectedBitSet)
+    public void findDeletedRecords(ColumnVector[] cols, int size, BitSet selectedBitSet)
         throws IOException {
       if (rowIds == null || compressedOtids == null) {
         return;
@@ -869,19 +921,19 @@ public class VectorizedOrcAcidRowBatchReader
       // check if it is deleted or not.
 
       long[] originalTransactionVector =
-          batch.cols[OrcRecordUpdater.ORIGINAL_TRANSACTION].isRepeating ? null
-              : ((LongColumnVector) batch.cols[OrcRecordUpdater.ORIGINAL_TRANSACTION]).vector;
+          cols[OrcRecordUpdater.ORIGINAL_TRANSACTION].isRepeating ? null
+              : ((LongColumnVector) cols[OrcRecordUpdater.ORIGINAL_TRANSACTION]).vector;
       long repeatedOriginalTransaction = (originalTransactionVector != null) ? -1
-          : ((LongColumnVector) batch.cols[OrcRecordUpdater.ORIGINAL_TRANSACTION]).vector[0];
+          : ((LongColumnVector) cols[OrcRecordUpdater.ORIGINAL_TRANSACTION]).vector[0];
 
       long[] bucketProperties =
-        batch.cols[OrcRecordUpdater.BUCKET].isRepeating ? null
-          : ((LongColumnVector)batch.cols[OrcRecordUpdater.BUCKET]).vector;
+        cols[OrcRecordUpdater.BUCKET].isRepeating ? null
+          : ((LongColumnVector) cols[OrcRecordUpdater.BUCKET]).vector;
       int repeatedBucketProperty = (bucketProperties != null) ? -1
-        : (int)((LongColumnVector) batch.cols[OrcRecordUpdater.BUCKET]).vector[0];
+        : (int)((LongColumnVector) cols[OrcRecordUpdater.BUCKET]).vector[0];
 
       long[] rowIdVector =
-          ((LongColumnVector) batch.cols[OrcRecordUpdater.ROW_ID]).vector;
+          ((LongColumnVector) cols[OrcRecordUpdater.ROW_ID]).vector;
 
       for (int setBitIndex = selectedBitSet.nextSetBit(0);
           setBitIndex >= 0;

http://git-wip-us.apache.org/repos/asf/hive/blob/7decd421/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowReader.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowReader.java
index a2725b2..885ef83 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowReader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowReader.java
@@ -19,6 +19,8 @@
 package org.apache.hadoop.hive.ql.io.orc;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx;
@@ -39,7 +41,7 @@ import java.io.IOException;
  * the non-vectorized ACID reader and moving the data into a vectorized row
  * batch.
  */
-class VectorizedOrcAcidRowReader
+public class VectorizedOrcAcidRowReader
     implements org.apache.hadoop.mapred.RecordReader<NullWritable,
                                                      VectorizedRowBatch> {
   private final AcidInputFormat.RowReader<OrcStruct> innerReader;
@@ -49,11 +51,14 @@ class VectorizedOrcAcidRowReader
   private Object[] partitionValues;
   private final ObjectInspector objectInspector;
   private final DataOutputBuffer buffer = new DataOutputBuffer();
+  private final StructColumnVector recordIdColumnVector;
+  private final LongColumnVector transactionColumnVector;
+  private final LongColumnVector bucketColumnVector;
+  private final LongColumnVector rowIdColumnVector;
 
-  VectorizedOrcAcidRowReader(AcidInputFormat.RowReader<OrcStruct> inner,
-                             Configuration conf,
-                             VectorizedRowBatchCtx vectorizedRowBatchCtx,
-                             FileSplit split) throws IOException {
+  public VectorizedOrcAcidRowReader(AcidInputFormat.RowReader<OrcStruct> inner,
+      Configuration conf, VectorizedRowBatchCtx vectorizedRowBatchCtx, FileSplit split)
+      throws IOException {
     this.innerReader = inner;
     this.key = inner.createKey();
     rbCtx = vectorizedRowBatchCtx;
@@ -64,6 +69,12 @@ class VectorizedOrcAcidRowReader
     }
     this.value = inner.createValue();
     this.objectInspector = inner.getObjectInspector();
+    this.transactionColumnVector = new LongColumnVector();
+    this.bucketColumnVector = new LongColumnVector();
+    this.rowIdColumnVector = new LongColumnVector();
+    this.recordIdColumnVector =
+        new StructColumnVector(VectorizedRowBatch.DEFAULT_SIZE,
+            transactionColumnVector, bucketColumnVector, rowIdColumnVector);
   }
 
   @Override
@@ -81,19 +92,30 @@ class VectorizedOrcAcidRowReader
     try {
       VectorizedBatchUtil.acidAddRowToBatch(value,
           (StructObjectInspector) objectInspector,
-          vectorizedRowBatch.size++, vectorizedRowBatch, rbCtx, buffer);
+          vectorizedRowBatch.size, vectorizedRowBatch, rbCtx, buffer);
+      addRecordId(vectorizedRowBatch.size, key);
+      vectorizedRowBatch.size++;
       while (vectorizedRowBatch.size < vectorizedRowBatch.selected.length &&
           innerReader.next(key, value)) {
         VectorizedBatchUtil.acidAddRowToBatch(value,
             (StructObjectInspector) objectInspector,
-            vectorizedRowBatch.size++, vectorizedRowBatch, rbCtx, buffer);
+            vectorizedRowBatch.size, vectorizedRowBatch, rbCtx, buffer);
+        addRecordId(vectorizedRowBatch.size, key);
+        vectorizedRowBatch.size++;
       }
+      rbCtx.setRecordIdColumnVector(recordIdColumnVector);
     } catch (Exception e) {
       throw new IOException("error iterating", e);
     }
     return true;
   }
 
+  private void addRecordId(int index, RecordIdentifier key) {
+    transactionColumnVector.vector[index] = key.getTransactionId();
+    bucketColumnVector.vector[index] = key.getBucketProperty();
+    rowIdColumnVector.vector[index] = key.getRowId();
+  }
+
   @Override
   public NullWritable createKey() {
     return NullWritable.get();

http://git-wip-us.apache.org/repos/asf/hive/blob/7decd421/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedTreeReaderFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedTreeReaderFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedTreeReaderFactory.java
index c21327f..c8a0415 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedTreeReaderFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedTreeReaderFactory.java
@@ -2149,7 +2149,11 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory {
     } else if (batch.hasVectors(columnIndex)) {
       vectors = batch.getColumnVectors(columnIndex);
     } else {
-      throw new AssertionError("Batch has no data for " + columnIndex + ": " + batch);
+      // A struct column can have a null child column
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Batch has no data for " + columnIndex + ": " + batch);
+      }
+      return null;
     }
 
     // EncodedColumnBatch is already decompressed, we don't really need to pass codec.
@@ -2712,7 +2716,9 @@ public class EncodedTreeReaderFactory extends TreeReaderFactory {
       }
       if (fields != null) {
         for (TreeReader child : fields) {
-          child.seek(index);
+          if (child != null) {
+            child.seek(index);
+          }
         }
       }
     }

http://git-wip-us.apache.org/repos/asf/hive/blob/7decd421/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
index b3193d1..5765c17 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
@@ -17,6 +17,8 @@
  */
 package org.apache.hadoop.hive.ql.parse;
 
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
+
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.HashMap;
@@ -69,6 +71,8 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
     if (useSuper) {
       super.analyzeInternal(tree);
     } else {
+      // TODO: remove when this is enabled everywhere
+      HiveConf.setBoolVar(conf, ConfVars.HIVE_VECTORIZATION_ROW_IDENTIFIER_ENABLED, true);
 
       if (!getTxnMgr().supportsAcid()) {
         throw new SemanticException(ErrorMsg.ACID_OP_ON_NONACID_TXNMGR.getMsg());

http://git-wip-us.apache.org/repos/asf/hive/blob/7decd421/ql/src/test/queries/clientpositive/llap_acid_fast.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/llap_acid_fast.q b/ql/src/test/queries/clientpositive/llap_acid_fast.q
new file mode 100644
index 0000000..376b19c
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/llap_acid_fast.q
@@ -0,0 +1,49 @@
+set hive.mapred.mode=nonstrict;
+SET hive.vectorized.execution.enabled=true;
+
+SET hive.llap.io.enabled=true;
+
+SET hive.exec.orc.default.buffer.size=32768;
+SET hive.exec.orc.default.row.index.stride=1000;
+SET hive.optimize.index.filter=true;
+set hive.fetch.task.conversion=none;
+
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+DROP TABLE orc_llap_acid_fast;
+
+CREATE TABLE orc_llap_acid_fast (
+    cint INT,
+    cbigint BIGINT,
+    cfloat FLOAT,
+    cdouble DOUBLE)
+partitioned by (csmallint smallint)
+clustered by (cint) into 2 buckets stored as orc
+TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default');
+
+insert into table orc_llap_acid_fast partition (csmallint = 1)
+select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10;
+insert into table orc_llap_acid_fast partition (csmallint = 2)
+select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10;
+insert into table orc_llap_acid_fast partition (csmallint = 3)
+select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble desc limit 10;
+
+explain vectorization only detail
+select cint, csmallint, cbigint from orc_llap_acid_fast where cint is not null order
+by csmallint, cint;
+select cint, csmallint, cbigint from orc_llap_acid_fast where cint is not null order
+by csmallint, cint;
+
+insert into table orc_llap_acid_fast partition (csmallint = 1) values (1, 1, 1, 1);
+
+update orc_llap_acid_fast set cbigint = 2 where cint = 1;
+
+explain vectorization only detail
+select cint, csmallint, cbigint from orc_llap_acid_fast where cint is not null order
+by csmallint, cint;
+select cint, csmallint, cbigint from orc_llap_acid_fast where cint is not null order
+by csmallint, cint;
+
+DROP TABLE orc_llap_acid_fast;

http://git-wip-us.apache.org/repos/asf/hive/blob/7decd421/ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out b/ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out
index 7905194..85f005e 100644
--- a/ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out
+++ b/ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out
@@ -1124,7 +1124,7 @@ STAGE PLANS:
                         sort order: +
                         Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL
                         value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string)
-            Execution mode: llap
+            Execution mode: vectorized, llap
             LLAP IO: may be used (ACID table)
         Reducer 2 
             Execution mode: vectorized, llap
@@ -1303,7 +1303,7 @@ STAGE PLANS:
                         sort order: +
                         Statistics: Num rows: 20 Data size: 8880 Basic stats: COMPLETE Column stats: PARTIAL
                         value expressions: _col1 (type: string), _col2 (type: string)
-            Execution mode: llap
+            Execution mode: vectorized, llap
             LLAP IO: may be used (ACID table)
         Reducer 2 
             Execution mode: vectorized, llap
@@ -1620,7 +1620,7 @@ STAGE PLANS:
                         Map-reduce partition columns: UDFToInteger(_col0) (type: int)
                         Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL
                         value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string)
-            Execution mode: llap
+            Execution mode: vectorized, llap
             LLAP IO: may be used (ACID table)
         Reducer 2 
             Execution mode: vectorized, llap
@@ -1800,7 +1800,7 @@ STAGE PLANS:
                         Map-reduce partition columns: UDFToInteger(_col0) (type: int)
                         Statistics: Num rows: 20 Data size: 8880 Basic stats: COMPLETE Column stats: PARTIAL
                         value expressions: _col1 (type: string), _col2 (type: string)
-            Execution mode: llap
+            Execution mode: vectorized, llap
             LLAP IO: may be used (ACID table)
         Reducer 2 
             Execution mode: vectorized, llap

http://git-wip-us.apache.org/repos/asf/hive/blob/7decd421/ql/src/test/results/clientpositive/llap/llap_acid.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/llap_acid.q.out b/ql/src/test/results/clientpositive/llap/llap_acid.q.out
new file mode 100644
index 0000000..ff89d1d
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/llap_acid.q.out
@@ -0,0 +1,321 @@
+PREHOOK: query: DROP TABLE orc_llap
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE orc_llap
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE orc_llap (
+    cint INT,
+    cbigint BIGINT,
+    cfloat FLOAT,
+    cdouble DOUBLE)
+partitioned by (csmallint smallint)
+clustered by (cint) into 2 buckets stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orc_llap
+POSTHOOK: query: CREATE TABLE orc_llap (
+    cint INT,
+    cbigint BIGINT,
+    cfloat FLOAT,
+    cdouble DOUBLE)
+partitioned by (csmallint smallint)
+clustered by (cint) into 2 buckets stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orc_llap
+PREHOOK: query: insert into table orc_llap partition (csmallint = 1)
+select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@orc_llap@csmallint=1
+POSTHOOK: query: insert into table orc_llap partition (csmallint = 1)
+select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@orc_llap@csmallint=1
+POSTHOOK: Lineage: orc_llap PARTITION(csmallint=1).cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ]
+POSTHOOK: Lineage: orc_llap PARTITION(csmallint=1).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ]
+POSTHOOK: Lineage: orc_llap PARTITION(csmallint=1).cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ]
+POSTHOOK: Lineage: orc_llap PARTITION(csmallint=1).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ]
+PREHOOK: query: insert into table orc_llap partition (csmallint = 2)
+select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@orc_llap@csmallint=2
+POSTHOOK: query: insert into table orc_llap partition (csmallint = 2)
+select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@orc_llap@csmallint=2
+POSTHOOK: Lineage: orc_llap PARTITION(csmallint=2).cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ]
+POSTHOOK: Lineage: orc_llap PARTITION(csmallint=2).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ]
+POSTHOOK: Lineage: orc_llap PARTITION(csmallint=2).cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ]
+POSTHOOK: Lineage: orc_llap PARTITION(csmallint=2).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ]
+PREHOOK: query: alter table orc_llap SET TBLPROPERTIES ('transactional'='true')
+PREHOOK: type: ALTERTABLE_PROPERTIES
+PREHOOK: Input: default@orc_llap
+PREHOOK: Output: default@orc_llap
+POSTHOOK: query: alter table orc_llap SET TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: ALTERTABLE_PROPERTIES
+POSTHOOK: Input: default@orc_llap
+POSTHOOK: Output: default@orc_llap
+PREHOOK: query: insert into table orc_llap partition (csmallint = 3)
+select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble desc limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@orc_llap@csmallint=3
+POSTHOOK: query: insert into table orc_llap partition (csmallint = 3)
+select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble desc limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@orc_llap@csmallint=3
+POSTHOOK: Lineage: orc_llap PARTITION(csmallint=3).cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ]
+POSTHOOK: Lineage: orc_llap PARTITION(csmallint=3).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ]
+POSTHOOK: Lineage: orc_llap PARTITION(csmallint=3).cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ]
+POSTHOOK: Lineage: orc_llap PARTITION(csmallint=3).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ]
+PREHOOK: query: explain
+select cint, csmallint, cbigint from orc_llap where cint is not null order
+by csmallint, cint
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select cint, csmallint, cbigint from orc_llap where cint is not null order
+by csmallint, cint
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: orc_llap
+                  filterExpr: cint is not null (type: boolean)
+                  Statistics: Num rows: 20 Data size: 616 Basic stats: COMPLETE Column stats: PARTIAL
+                  Filter Operator
+                    predicate: cint is not null (type: boolean)
+                    Statistics: Num rows: 19 Data size: 304 Basic stats: COMPLETE Column stats: PARTIAL
+                    Select Operator
+                      expressions: cint (type: int), csmallint (type: smallint), cbigint (type: bigint)
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 19 Data size: 304 Basic stats: COMPLETE Column stats: PARTIAL
+                      Reduce Output Operator
+                        key expressions: _col1 (type: smallint), _col0 (type: int)
+                        sort order: ++
+                        Statistics: Num rows: 19 Data size: 304 Basic stats: COMPLETE Column stats: PARTIAL
+                        value expressions: _col2 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: may be used (ACID table)
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: bigint)
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 19 Data size: 304 Basic stats: COMPLETE Column stats: PARTIAL
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 19 Data size: 304 Basic stats: COMPLETE Column stats: PARTIAL
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select cint, csmallint, cbigint from orc_llap where cint is not null order
+by csmallint, cint
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_llap
+PREHOOK: Input: default@orc_llap@csmallint=1
+PREHOOK: Input: default@orc_llap@csmallint=2
+PREHOOK: Input: default@orc_llap@csmallint=3
+#### A masked pattern was here ####
+POSTHOOK: query: select cint, csmallint, cbigint from orc_llap where cint is not null order
+by csmallint, cint
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_llap
+POSTHOOK: Input: default@orc_llap@csmallint=1
+POSTHOOK: Input: default@orc_llap@csmallint=2
+POSTHOOK: Input: default@orc_llap@csmallint=3
+#### A masked pattern was here ####
+-285355633	1	-1241163445
+-109813638	1	-58941842
+164554497	1	1161977292
+199879534	1	123351087
+246423894	1	-1645852809
+354670578	1	562841852
+455419170	1	1108177470
+665801232	1	480783141
+708885482	1	-1645852809
+-285355633	2	-1241163445
+-109813638	2	-58941842
+164554497	2	1161977292
+199879534	2	123351087
+246423894	2	-1645852809
+354670578	2	562841852
+455419170	2	1108177470
+665801232	2	480783141
+708885482	2	-1645852809
+-923308739	3	-1887561756
+-3728	3	-1887561756
+762	3	-1645852809
+6981	3	-1887561756
+253665376	3	NULL
+497728223	3	-1887561756
+528534767	3	NULL
+528534767	3	NULL
+528534767	3	NULL
+528534767	3	NULL
+PREHOOK: query: insert into table orc_llap partition (csmallint = 1) values (1, 1, 1, 1)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@orc_llap@csmallint=1
+POSTHOOK: query: insert into table orc_llap partition (csmallint = 1) values (1, 1, 1, 1)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@orc_llap@csmallint=1
+POSTHOOK: Lineage: orc_llap PARTITION(csmallint=1).cbigint EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: orc_llap PARTITION(csmallint=1).cdouble EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, type:string, comment:), ]
+POSTHOOK: Lineage: orc_llap PARTITION(csmallint=1).cfloat EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+POSTHOOK: Lineage: orc_llap PARTITION(csmallint=1).cint EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: update orc_llap set cbigint = 2 where cint = 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_llap
+PREHOOK: Input: default@orc_llap@csmallint=1
+PREHOOK: Input: default@orc_llap@csmallint=2
+PREHOOK: Input: default@orc_llap@csmallint=3
+PREHOOK: Output: default@orc_llap@csmallint=1
+PREHOOK: Output: default@orc_llap@csmallint=2
+PREHOOK: Output: default@orc_llap@csmallint=3
+POSTHOOK: query: update orc_llap set cbigint = 2 where cint = 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_llap
+POSTHOOK: Input: default@orc_llap@csmallint=1
+POSTHOOK: Input: default@orc_llap@csmallint=2
+POSTHOOK: Input: default@orc_llap@csmallint=3
+POSTHOOK: Output: default@orc_llap@csmallint=1
+POSTHOOK: Output: default@orc_llap@csmallint=2
+POSTHOOK: Output: default@orc_llap@csmallint=3
+PREHOOK: query: explain
+select cint, csmallint, cbigint from orc_llap where cint is not null order
+by csmallint, cint
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select cint, csmallint, cbigint from orc_llap where cint is not null order
+by csmallint, cint
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: orc_llap
+                  filterExpr: cint is not null (type: boolean)
+                  Statistics: Num rows: 20 Data size: 616 Basic stats: COMPLETE Column stats: PARTIAL
+                  Filter Operator
+                    predicate: cint is not null (type: boolean)
+                    Statistics: Num rows: 19 Data size: 304 Basic stats: COMPLETE Column stats: PARTIAL
+                    Select Operator
+                      expressions: cint (type: int), csmallint (type: smallint), cbigint (type: bigint)
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 19 Data size: 304 Basic stats: COMPLETE Column stats: PARTIAL
+                      Reduce Output Operator
+                        key expressions: _col1 (type: smallint), _col0 (type: int)
+                        sort order: ++
+                        Statistics: Num rows: 19 Data size: 304 Basic stats: COMPLETE Column stats: PARTIAL
+                        value expressions: _col2 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: may be used (ACID table)
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: bigint)
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 19 Data size: 304 Basic stats: COMPLETE Column stats: PARTIAL
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 19 Data size: 304 Basic stats: COMPLETE Column stats: PARTIAL
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select cint, csmallint, cbigint from orc_llap where cint is not null order
+by csmallint, cint
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_llap
+PREHOOK: Input: default@orc_llap@csmallint=1
+PREHOOK: Input: default@orc_llap@csmallint=2
+PREHOOK: Input: default@orc_llap@csmallint=3
+#### A masked pattern was here ####
+POSTHOOK: query: select cint, csmallint, cbigint from orc_llap where cint is not null order
+by csmallint, cint
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_llap
+POSTHOOK: Input: default@orc_llap@csmallint=1
+POSTHOOK: Input: default@orc_llap@csmallint=2
+POSTHOOK: Input: default@orc_llap@csmallint=3
+#### A masked pattern was here ####
+-285355633	1	-1241163445
+-109813638	1	-58941842
+1	1	2
+164554497	1	1161977292
+199879534	1	123351087
+246423894	1	-1645852809
+354670578	1	562841852
+455419170	1	1108177470
+665801232	1	480783141
+708885482	1	-1645852809
+-285355633	2	-1241163445
+-109813638	2	-58941842
+164554497	2	1161977292
+199879534	2	123351087
+246423894	2	-1645852809
+354670578	2	562841852
+455419170	2	1108177470
+665801232	2	480783141
+708885482	2	-1645852809
+-923308739	3	-1887561756
+-3728	3	-1887561756
+762	3	-1645852809
+6981	3	-1887561756
+253665376	3	NULL
+497728223	3	-1887561756
+528534767	3	NULL
+528534767	3	NULL
+528534767	3	NULL
+528534767	3	NULL
+PREHOOK: query: DROP TABLE orc_llap
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@orc_llap
+PREHOOK: Output: default@orc_llap
+POSTHOOK: query: DROP TABLE orc_llap
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@orc_llap
+POSTHOOK: Output: default@orc_llap


Mime
View raw message