hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ga...@apache.org
Subject svn commit: r1633657 - in /hive/branches/branch-0.14/ql/src: java/org/apache/hadoop/hive/ql/exec/vector/ java/org/apache/hadoop/hive/ql/io/orc/ test/queries/clientpositive/ test/results/clientpositive/
Date Wed, 22 Oct 2014 17:20:48 GMT
Author: gates
Date: Wed Oct 22 17:20:48 2014
New Revision: 1633657

URL: http://svn.apache.org/r1633657
Log:
HIVE-8474 Vectorized reads of transactional tables fail when not all columns are selected
(Alan Gates, reviewed by Ashutosh Chauhan)

Added:
    hive/branches/branch-0.14/ql/src/test/queries/clientpositive/acid_vectorization_partition.q
    hive/branches/branch-0.14/ql/src/test/queries/clientpositive/acid_vectorization_project.q
    hive/branches/branch-0.14/ql/src/test/results/clientpositive/acid_vectorization_partition.q.out
    hive/branches/branch-0.14/ql/src/test/results/clientpositive/acid_vectorization_project.q.out
Modified:
    hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java
    hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
    hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowReader.java

Modified: hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java?rev=1633657&r1=1633656&r2=1633657&view=diff
==============================================================================
--- hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java
(original)
+++ hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java
Wed Oct 22 17:20:48 2014
@@ -23,6 +23,8 @@ import java.sql.Timestamp;
 import java.util.LinkedList;
 import java.util.List;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.common.type.HiveChar;
 import org.apache.hadoop.hive.common.type.HiveVarchar;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
@@ -50,6 +52,7 @@ import org.apache.hadoop.io.LongWritable
 import org.apache.hadoop.io.Text;
 
 public class VectorizedBatchUtil {
+  private static final Log LOG = LogFactory.getLog(VectorizedBatchUtil.class);
 
   /**
    * Sets the IsNull value for ColumnVector at specified index
@@ -232,169 +235,237 @@ public class VectorizedBatchUtil {
     final int off = colOffset;
     // Iterate thru the cols and load the batch
     for (int i = 0; i < fieldRefs.size(); i++) {
-      Object fieldData = oi.getStructFieldData(row, fieldRefs.get(i));
-      ObjectInspector foi = fieldRefs.get(i).getFieldObjectInspector();
-
-      // Vectorization only supports PRIMITIVE data types. Assert the same
-      assert (foi.getCategory() == Category.PRIMITIVE);
+      setVector(row, oi, fieldRefs, batch, buffer, rowIndex, i, off);
+    }
+  }
 
-      // Get writable object
-      PrimitiveObjectInspector poi = (PrimitiveObjectInspector) foi;
-      Object writableCol = poi.getPrimitiveWritableObject(fieldData);
-
-      // NOTE: The default value for null fields in vectorization is 1 for int types, NaN
for
-      // float/double. String types have no default value for null.
-      switch (poi.getPrimitiveCategory()) {
-      case BOOLEAN: {
-        LongColumnVector lcv = (LongColumnVector) batch.cols[off + i];
-        if (writableCol != null) {
-          lcv.vector[rowIndex] = ((BooleanWritable) writableCol).get() ? 1 : 0;
-          lcv.isNull[rowIndex] = false;
-        } else {
-          lcv.vector[rowIndex] = 1;
-          setNullColIsNullValue(lcv, rowIndex);
-        }
+  /**
+   * Iterates thru all the columns in a given row and populates the batch
+   * from a given offset
+   *
+   * @param row Deserialized row object
+   * @param oi Object insepector for that row
+   * @param rowIndex index to which the row should be added to batch
+   * @param batch Vectorized batch to which the row is added at rowIndex
+   * @param context context object for this vectorized batch
+   * @param buffer
+   * @throws HiveException
+   */
+  public static void acidAddRowToBatch(Object row,
+                                       StructObjectInspector oi,
+                                       int rowIndex,
+                                       VectorizedRowBatch batch,
+                                       VectorizedRowBatchCtx context,
+                                       DataOutputBuffer buffer) throws HiveException {
+    List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
+    // Iterate thru the cols and load the batch
+    for (int i = 0; i < fieldRefs.size(); i++) {
+      if (batch.cols[i] == null) {
+        // This means the column was not included in the projection from the underlying read
+        continue;
+      }
+      if (context.isPartitionCol(i)) {
+        // The value will have already been set before we're called, so don't overwrite it
+        continue;
       }
-        break;
-      case BYTE: {
-        LongColumnVector lcv = (LongColumnVector) batch.cols[off + i];
-        if (writableCol != null) {
-          lcv.vector[rowIndex] = ((ByteWritable) writableCol).get();
-          lcv.isNull[rowIndex] = false;
-        } else {
-          lcv.vector[rowIndex] = 1;
-          setNullColIsNullValue(lcv, rowIndex);
-        }
+      setVector(row, oi, fieldRefs, batch, buffer, rowIndex, i, 0);
+    }
+  }
+
+  private static void setVector(Object row,
+                                StructObjectInspector oi,
+                                List<? extends StructField> fieldRefs,
+                                VectorizedRowBatch batch,
+                                DataOutputBuffer buffer,
+                                int rowIndex,
+                                int colIndex,
+                                int offset) throws HiveException {
+
+    Object fieldData = oi.getStructFieldData(row, fieldRefs.get(colIndex));
+    ObjectInspector foi = fieldRefs.get(colIndex).getFieldObjectInspector();
+
+    // Vectorization only supports PRIMITIVE data types. Assert the same
+    assert (foi.getCategory() == Category.PRIMITIVE);
+
+    // Get writable object
+    PrimitiveObjectInspector poi = (PrimitiveObjectInspector) foi;
+    Object writableCol = poi.getPrimitiveWritableObject(fieldData);
+
+    // NOTE: The default value for null fields in vectorization is 1 for int types, NaN for
+    // float/double. String types have no default value for null.
+    switch (poi.getPrimitiveCategory()) {
+    case BOOLEAN: {
+      LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
+      if (writableCol != null) {
+        lcv.vector[rowIndex] = ((BooleanWritable) writableCol).get() ? 1 : 0;
+        lcv.isNull[rowIndex] = false;
+      } else {
+        lcv.vector[rowIndex] = 1;
+        setNullColIsNullValue(lcv, rowIndex);
       }
-        break;
-      case SHORT: {
-        LongColumnVector lcv = (LongColumnVector) batch.cols[off + i];
-        if (writableCol != null) {
-          lcv.vector[rowIndex] = ((ShortWritable) writableCol).get();
-          lcv.isNull[rowIndex] = false;
-        } else {
-          lcv.vector[rowIndex] = 1;
-          setNullColIsNullValue(lcv, rowIndex);
-        }
+    }
+      break;
+    case BYTE: {
+      LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
+      if (writableCol != null) {
+        lcv.vector[rowIndex] = ((ByteWritable) writableCol).get();
+        lcv.isNull[rowIndex] = false;
+      } else {
+        lcv.vector[rowIndex] = 1;
+        setNullColIsNullValue(lcv, rowIndex);
       }
-        break;
-      case INT: {
-        LongColumnVector lcv = (LongColumnVector) batch.cols[off + i];
-        if (writableCol != null) {
-          lcv.vector[rowIndex] = ((IntWritable) writableCol).get();
-          lcv.isNull[rowIndex] = false;
-        } else {
-          lcv.vector[rowIndex] = 1;
-          setNullColIsNullValue(lcv, rowIndex);
-        }
+    }
+      break;
+    case SHORT: {
+      LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
+      if (writableCol != null) {
+        lcv.vector[rowIndex] = ((ShortWritable) writableCol).get();
+        lcv.isNull[rowIndex] = false;
+      } else {
+        lcv.vector[rowIndex] = 1;
+        setNullColIsNullValue(lcv, rowIndex);
       }
-        break;
-      case LONG: {
-        LongColumnVector lcv = (LongColumnVector) batch.cols[off + i];
-        if (writableCol != null) {
-          lcv.vector[rowIndex] = ((LongWritable) writableCol).get();
-          lcv.isNull[rowIndex] = false;
-        } else {
-          lcv.vector[rowIndex] = 1;
-          setNullColIsNullValue(lcv, rowIndex);
-        }
+    }
+      break;
+    case INT: {
+      LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
+      if (writableCol != null) {
+        lcv.vector[rowIndex] = ((IntWritable) writableCol).get();
+        lcv.isNull[rowIndex] = false;
+      } else {
+        lcv.vector[rowIndex] = 1;
+        setNullColIsNullValue(lcv, rowIndex);
       }
-        break;
-      case DATE: {
-        LongColumnVector lcv = (LongColumnVector) batch.cols[off + i];
-        if (writableCol != null) {
-          lcv.vector[rowIndex] = ((DateWritable) writableCol).getDays();
-          lcv.isNull[rowIndex] = false;
-        } else {
-          lcv.vector[rowIndex] = 1;
-          setNullColIsNullValue(lcv, rowIndex);
-        }
+    }
+      break;
+    case LONG: {
+      LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
+      if (writableCol != null) {
+        lcv.vector[rowIndex] = ((LongWritable) writableCol).get();
+        lcv.isNull[rowIndex] = false;
+      } else {
+        lcv.vector[rowIndex] = 1;
+        setNullColIsNullValue(lcv, rowIndex);
       }
-        break;
-      case FLOAT: {
-        DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[off + i];
-        if (writableCol != null) {
-          dcv.vector[rowIndex] = ((FloatWritable) writableCol).get();
-          dcv.isNull[rowIndex] = false;
-        } else {
-          dcv.vector[rowIndex] = Double.NaN;
-          setNullColIsNullValue(dcv, rowIndex);
-        }
+    }
+      break;
+    case DATE: {
+      LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
+      if (writableCol != null) {
+        lcv.vector[rowIndex] = ((DateWritable) writableCol).getDays();
+        lcv.isNull[rowIndex] = false;
+      } else {
+        lcv.vector[rowIndex] = 1;
+        setNullColIsNullValue(lcv, rowIndex);
       }
-        break;
-      case DOUBLE: {
-        DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[off + i];
-        if (writableCol != null) {
-          dcv.vector[rowIndex] = ((DoubleWritable) writableCol).get();
-          dcv.isNull[rowIndex] = false;
-        } else {
-          dcv.vector[rowIndex] = Double.NaN;
-          setNullColIsNullValue(dcv, rowIndex);
-        }
+    }
+      break;
+    case FLOAT: {
+      DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[offset + colIndex];
+      if (writableCol != null) {
+        dcv.vector[rowIndex] = ((FloatWritable) writableCol).get();
+        dcv.isNull[rowIndex] = false;
+      } else {
+        dcv.vector[rowIndex] = Double.NaN;
+        setNullColIsNullValue(dcv, rowIndex);
       }
-        break;
-      case TIMESTAMP: {
-        LongColumnVector lcv = (LongColumnVector) batch.cols[off + i];
-        if (writableCol != null) {
-          Timestamp t = ((TimestampWritable) writableCol).getTimestamp();
-          lcv.vector[rowIndex] = TimestampUtils.getTimeNanoSec(t);
-          lcv.isNull[rowIndex] = false;
-        } else {
-          lcv.vector[rowIndex] = 1;
-          setNullColIsNullValue(lcv, rowIndex);
-        }
+    }
+      break;
+    case DOUBLE: {
+      DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[offset + colIndex];
+      if (writableCol != null) {
+        dcv.vector[rowIndex] = ((DoubleWritable) writableCol).get();
+        dcv.isNull[rowIndex] = false;
+      } else {
+        dcv.vector[rowIndex] = Double.NaN;
+        setNullColIsNullValue(dcv, rowIndex);
       }
-        break;
-      case BINARY: {
-        BytesColumnVector bcv = (BytesColumnVector) batch.cols[off + i];
-        if (writableCol != null) {
-            bcv.isNull[rowIndex] = false;
-            BytesWritable bw = (BytesWritable) writableCol;
-            byte[] bytes = bw.getBytes();
-            int start = buffer.getLength();
-            int length = bytes.length;
-            try {
-              buffer.write(bytes, 0, length);
-            } catch (IOException ioe) {
-              throw new IllegalStateException("bad write", ioe);
-            }
-            bcv.setRef(rowIndex, buffer.getData(), start, length);
-        } else {
-          setNullColIsNullValue(bcv, rowIndex);
-        }
+    }
+      break;
+    case TIMESTAMP: {
+      LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
+      if (writableCol != null) {
+        Timestamp t = ((TimestampWritable) writableCol).getTimestamp();
+        lcv.vector[rowIndex] = TimestampUtils.getTimeNanoSec(t);
+        lcv.isNull[rowIndex] = false;
+      } else {
+        lcv.vector[rowIndex] = 1;
+        setNullColIsNullValue(lcv, rowIndex);
       }
-        break;
-      case STRING: {
-        BytesColumnVector bcv = (BytesColumnVector) batch.cols[off + i];
-        if (writableCol != null) {
+    }
+      break;
+    case BINARY: {
+      BytesColumnVector bcv = (BytesColumnVector) batch.cols[offset + colIndex];
+      if (writableCol != null) {
           bcv.isNull[rowIndex] = false;
-          Text colText = (Text) writableCol;
+          BytesWritable bw = (BytesWritable) writableCol;
+          byte[] bytes = bw.getBytes();
           int start = buffer.getLength();
-          int length = colText.getLength();
+          int length = bytes.length;
           try {
-            buffer.write(colText.getBytes(), 0, length);
+            buffer.write(bytes, 0, length);
           } catch (IOException ioe) {
             throw new IllegalStateException("bad write", ioe);
           }
           bcv.setRef(rowIndex, buffer.getData(), start, length);
-        } else {
-          setNullColIsNullValue(bcv, rowIndex);
-        }
+      } else {
+        setNullColIsNullValue(bcv, rowIndex);
       }
-        break;
-      case CHAR: {
-        BytesColumnVector bcv = (BytesColumnVector) batch.cols[off + i];
+    }
+      break;
+    case STRING: {
+      BytesColumnVector bcv = (BytesColumnVector) batch.cols[offset + colIndex];
+      if (writableCol != null) {
+        bcv.isNull[rowIndex] = false;
+        Text colText = (Text) writableCol;
+        int start = buffer.getLength();
+        int length = colText.getLength();
+        try {
+          buffer.write(colText.getBytes(), 0, length);
+        } catch (IOException ioe) {
+          throw new IllegalStateException("bad write", ioe);
+        }
+        bcv.setRef(rowIndex, buffer.getData(), start, length);
+      } else {
+        setNullColIsNullValue(bcv, rowIndex);
+      }
+    }
+      break;
+    case CHAR: {
+      BytesColumnVector bcv = (BytesColumnVector) batch.cols[offset + colIndex];
+      if (writableCol != null) {
+        bcv.isNull[rowIndex] = false;
+        HiveChar colHiveChar = ((HiveCharWritable) writableCol).getHiveChar();
+        byte[] bytes = colHiveChar.getStrippedValue().getBytes();
+
+        // We assume the CHAR maximum length was enforced when the object was created.
+        int length = bytes.length;
+
+        int start = buffer.getLength();
+        try {
+          // In vector mode, we store CHAR as unpadded.
+          buffer.write(bytes, 0, length);
+        } catch (IOException ioe) {
+          throw new IllegalStateException("bad write", ioe);
+        }
+        bcv.setRef(rowIndex, buffer.getData(), start, length);
+      } else {
+        setNullColIsNullValue(bcv, rowIndex);
+      }
+    }
+      break;
+    case VARCHAR: {
+        BytesColumnVector bcv = (BytesColumnVector) batch.cols[offset + colIndex];
         if (writableCol != null) {
           bcv.isNull[rowIndex] = false;
-          HiveChar colHiveChar = ((HiveCharWritable) writableCol).getHiveChar();
-          byte[] bytes = colHiveChar.getStrippedValue().getBytes();
-          
-          // We assume the CHAR maximum length was enforced when the object was created.
+          HiveVarchar colHiveVarchar = ((HiveVarcharWritable) writableCol).getHiveVarchar();
+          byte[] bytes = colHiveVarchar.getValue().getBytes();
+
+          // We assume the VARCHAR maximum length was enforced when the object was created.
           int length = bytes.length;
 
           int start = buffer.getLength();
           try {
-            // In vector mode, we store CHAR as unpadded.
             buffer.write(bytes, 0, length);
           } catch (IOException ioe) {
             throw new IllegalStateException("bad write", ioe);
@@ -405,45 +476,21 @@ public class VectorizedBatchUtil {
         }
       }
         break;
-      case VARCHAR: {
-          BytesColumnVector bcv = (BytesColumnVector) batch.cols[off + i];
-          if (writableCol != null) {
-            bcv.isNull[rowIndex] = false;
-            HiveVarchar colHiveVarchar = ((HiveVarcharWritable) writableCol).getHiveVarchar();
-            byte[] bytes = colHiveVarchar.getValue().getBytes();
-
-            // We assume the VARCHAR maximum length was enforced when the object was created.
-            int length = bytes.length;
-
-            int start = buffer.getLength();
-            try {
-              buffer.write(bytes, 0, length);
-            } catch (IOException ioe) {
-              throw new IllegalStateException("bad write", ioe);
-            }
-            bcv.setRef(rowIndex, buffer.getData(), start, length);
-          } else {
-            setNullColIsNullValue(bcv, rowIndex);
-          }
-        }
-          break;
-      case DECIMAL:
-        DecimalColumnVector dcv = (DecimalColumnVector) batch.cols[off + i];
-        if (writableCol != null) {
-          dcv.isNull[rowIndex] = false;
-          HiveDecimalWritable wobj = (HiveDecimalWritable) writableCol;
-          dcv.vector[rowIndex].update(wobj.getHiveDecimal().unscaledValue(),
-              (short) wobj.getScale());
-        } else {
-          setNullColIsNullValue(dcv, rowIndex);
-        }
-        break;
-      default:
-        throw new HiveException("Vectorizaton is not supported for datatype:"
-            + poi.getPrimitiveCategory());
-      }
+    case DECIMAL:
+      DecimalColumnVector dcv = (DecimalColumnVector) batch.cols[offset + colIndex];
+      if (writableCol != null) {
+        dcv.isNull[rowIndex] = false;
+        HiveDecimalWritable wobj = (HiveDecimalWritable) writableCol;
+        dcv.vector[rowIndex].update(wobj.getHiveDecimal().unscaledValue(),
+            (short) wobj.getScale());
+      } else {
+        setNullColIsNullValue(dcv, rowIndex);
+      }
+      break;
+    default:
+      throw new HiveException("Vectorizaton is not supported for datatype:" +
+          poi.getPrimitiveCategory());
     }
   }
-
 }
 

Modified: hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java?rev=1633657&r1=1633656&r2=1633657&view=diff
==============================================================================
--- hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
(original)
+++ hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
Wed Oct 22 17:20:48 2014
@@ -22,10 +22,12 @@ import java.sql.Date;
 import java.sql.Timestamp;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.HashSet;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Properties;
+import java.util.Set;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
@@ -84,7 +86,11 @@ public class VectorizedRowBatchCtx {
   private Map<String, Object> partitionValues;
   
   //partition types
-  private Map<String, PrimitiveCategory> partitionTypes;  
+  private Map<String, PrimitiveCategory> partitionTypes;
+
+  // partition column positions, for use by classes that need to know whether a given column
is a
+  // partition column
+  private Set<Integer> partitionCols;
   
   // Column projection list - List of column indexes to include. This
   // list does not contain partition columns
@@ -203,12 +209,13 @@ public class VectorizedRowBatchCtx {
     // Check to see if this split is part of a partition of a table
     String pcols = partProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS);
 
+    String[] partKeys = null;
     if (pcols != null && pcols.length() > 0) {
 
       // Partitions exist for this table. Get the partition object inspector and
       // raw row object inspector (row with out partition col)
       LinkedHashMap<String, String> partSpec = part.getPartSpec();
-      String[] partKeys = pcols.trim().split("/");
+      partKeys = pcols.trim().split("/");
       String pcolTypes = partProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES);
     
       String[] partKeyTypes = pcolTypes.trim().split(":");      
       
@@ -262,6 +269,15 @@ public class VectorizedRowBatchCtx {
               .asList(new StructObjectInspector[] {partRawRowObjectInspector, partObjectInspector}));
       rowOI = rowObjectInspector;
       rawRowOI = partRawRowObjectInspector;
+
+      // We have to do this after we've set rowOI, as getColIndexBasedOnColName uses it
+      partitionCols = new HashSet<Integer>();
+      if (pcols != null && pcols.length() > 0) {
+        for (int i = 0; i < partKeys.length; i++) {
+          partitionCols.add(getColIndexBasedOnColName(partKeys[i]));
+        }
+      }
+
     } else {
 
       // No partitions for this table, hence row OI equals raw row OI
@@ -586,6 +602,16 @@ public class VectorizedRowBatchCtx {
     }
   }
 
+  /**
+   * Determine whether a given column is a partition column
+   * @param colnum column number in
+   * {@link org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch}s created by this context.
+   * @return true if it is a partition column, false otherwise
+   */
+  public final boolean isPartitionCol(int colnum) {
+    return (partitionCols == null) ? false : partitionCols.contains(colnum);
+  }
+
   private void addScratchColumnsToBatch(VectorizedRowBatch vrb) throws HiveException {
     if (columnTypeMap != null && !columnTypeMap.isEmpty()) {
       int origNumCols = vrb.numCols;

Modified: hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowReader.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowReader.java?rev=1633657&r1=1633656&r2=1633657&view=diff
==============================================================================
--- hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowReader.java
(original)
+++ hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowReader.java
Wed Oct 22 17:20:48 2014
@@ -48,7 +48,6 @@ class VectorizedOrcAcidRowReader
   private final OrcStruct value;
   private final VectorizedRowBatchCtx rowBatchCtx;
   private final ObjectInspector objectInspector;
-  private boolean needToSetPartition = true;
   private final DataOutputBuffer buffer = new DataOutputBuffer();
 
   VectorizedOrcAcidRowReader(AcidInputFormat.RowReader<OrcStruct> inner,
@@ -83,23 +82,20 @@ class VectorizedOrcAcidRowReader
     if (!innerReader.next(key, value)) {
       return false;
     }
-    if (needToSetPartition) {
-      try {
-        rowBatchCtx.addPartitionColsToBatch(vectorizedRowBatch);
-      } catch (HiveException e) {
-        throw new IOException("Problem adding partition column", e);
-      }
-      needToSetPartition = false;
+    try {
+      rowBatchCtx.addPartitionColsToBatch(vectorizedRowBatch);
+    } catch (HiveException e) {
+      throw new IOException("Problem adding partition column", e);
     }
     try {
-      VectorizedBatchUtil.addRowToBatch(value,
+      VectorizedBatchUtil.acidAddRowToBatch(value,
           (StructObjectInspector) objectInspector,
-          vectorizedRowBatch.size++, vectorizedRowBatch, buffer);
+          vectorizedRowBatch.size++, vectorizedRowBatch, rowBatchCtx, buffer);
       while (vectorizedRowBatch.size < vectorizedRowBatch.selected.length &&
           innerReader.next(key, value)) {
-        VectorizedBatchUtil.addRowToBatch(value,
+        VectorizedBatchUtil.acidAddRowToBatch(value,
             (StructObjectInspector) objectInspector,
-            vectorizedRowBatch.size++, vectorizedRowBatch, buffer);
+            vectorizedRowBatch.size++, vectorizedRowBatch, rowBatchCtx, buffer);
       }
     } catch (HiveException he) {
       throw new IOException("error iterating", he);

Added: hive/branches/branch-0.14/ql/src/test/queries/clientpositive/acid_vectorization_partition.q
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/test/queries/clientpositive/acid_vectorization_partition.q?rev=1633657&view=auto
==============================================================================
--- hive/branches/branch-0.14/ql/src/test/queries/clientpositive/acid_vectorization_partition.q
(added)
+++ hive/branches/branch-0.14/ql/src/test/queries/clientpositive/acid_vectorization_partition.q
Wed Oct 22 17:20:48 2014
@@ -0,0 +1,10 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.enforce.bucketing=true;
+set hive.exec.dynamic.partition.mode=nonstrict;
+
+CREATE TABLE acid_vectorized_part(a INT, b STRING) partitioned by (ds string) CLUSTERED BY(a)
INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true');
+insert into table acid_vectorized_part partition (ds = 'today') select cint, cstring1 from
alltypesorc where cint is not null order by cint limit 10;
+insert into table acid_vectorized_part partition (ds = 'tomorrow') select cint, cstring1
from alltypesorc where cint is not null order by cint limit 10;
+set hive.vectorized.execution.enabled=true;
+select * from acid_vectorized_part order by a, b;

Added: hive/branches/branch-0.14/ql/src/test/queries/clientpositive/acid_vectorization_project.q
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/test/queries/clientpositive/acid_vectorization_project.q?rev=1633657&view=auto
==============================================================================
--- hive/branches/branch-0.14/ql/src/test/queries/clientpositive/acid_vectorization_project.q
(added)
+++ hive/branches/branch-0.14/ql/src/test/queries/clientpositive/acid_vectorization_project.q
Wed Oct 22 17:20:48 2014
@@ -0,0 +1,11 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.enforce.bucketing=true;
+set hive.exec.dynamic.partition.mode=nonstrict;
+
+CREATE TABLE acid_vectorized(a INT, b STRING, c float) CLUSTERED BY(a) INTO 2 BUCKETS STORED
AS ORC TBLPROPERTIES ('transactional'='true');
+insert into table acid_vectorized select cint, cstring1, cfloat from alltypesorc where cint
is not null order by cint limit 10;
+set hive.vectorized.execution.enabled=true;
+select a,b from acid_vectorized order by a;
+select a,c from acid_vectorized order by a;
+select b,c from acid_vectorized order by b;

Added: hive/branches/branch-0.14/ql/src/test/results/clientpositive/acid_vectorization_partition.q.out
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/test/results/clientpositive/acid_vectorization_partition.q.out?rev=1633657&view=auto
==============================================================================
--- hive/branches/branch-0.14/ql/src/test/results/clientpositive/acid_vectorization_partition.q.out
(added)
+++ hive/branches/branch-0.14/ql/src/test/results/clientpositive/acid_vectorization_partition.q.out
Wed Oct 22 17:20:48 2014
@@ -0,0 +1,60 @@
+PREHOOK: query: CREATE TABLE acid_vectorized_part(a INT, b STRING) partitioned by (ds string)
CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@acid_vectorized_part
+POSTHOOK: query: CREATE TABLE acid_vectorized_part(a INT, b STRING) partitioned by (ds string)
CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@acid_vectorized_part
+PREHOOK: query: insert into table acid_vectorized_part partition (ds = 'today') select cint,
cstring1 from alltypesorc where cint is not null order by cint limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@acid_vectorized_part@ds=today
+POSTHOOK: query: insert into table acid_vectorized_part partition (ds = 'today') select cint,
cstring1 from alltypesorc where cint is not null order by cint limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@acid_vectorized_part@ds=today
+POSTHOOK: Lineage: acid_vectorized_part PARTITION(ds=today).a SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint,
type:int, comment:null), ]
+POSTHOOK: Lineage: acid_vectorized_part PARTITION(ds=today).b SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1,
type:string, comment:null), ]
+PREHOOK: query: insert into table acid_vectorized_part partition (ds = 'tomorrow') select
cint, cstring1 from alltypesorc where cint is not null order by cint limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@acid_vectorized_part@ds=tomorrow
+POSTHOOK: query: insert into table acid_vectorized_part partition (ds = 'tomorrow') select
cint, cstring1 from alltypesorc where cint is not null order by cint limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@acid_vectorized_part@ds=tomorrow
+POSTHOOK: Lineage: acid_vectorized_part PARTITION(ds=tomorrow).a SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint,
type:int, comment:null), ]
+POSTHOOK: Lineage: acid_vectorized_part PARTITION(ds=tomorrow).b SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1,
type:string, comment:null), ]
+PREHOOK: query: select * from acid_vectorized_part order by a, b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acid_vectorized_part
+PREHOOK: Input: default@acid_vectorized_part@ds=today
+PREHOOK: Input: default@acid_vectorized_part@ds=tomorrow
+#### A masked pattern was here ####
+POSTHOOK: query: select * from acid_vectorized_part order by a, b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acid_vectorized_part
+POSTHOOK: Input: default@acid_vectorized_part@ds=today
+POSTHOOK: Input: default@acid_vectorized_part@ds=tomorrow
+#### A masked pattern was here ####
+-1073279343	oj1YrV5Wa	today
+-1073279343	oj1YrV5Wa	tomorrow
+-1073051226	A34p7oRr2WvUJNf	tomorrow
+-1073051226	A34p7oRr2WvUJNf	today
+-1072910839	0iqrc5	tomorrow
+-1072910839	0iqrc5	today
+-1072081801	dPkN74F7	today
+-1072081801	dPkN74F7	tomorrow
+-1072076362	2uLyD28144vklju213J1mr	today
+-1072076362	2uLyD28144vklju213J1mr	tomorrow
+-1071480828	aw724t8c5558x2xneC624	tomorrow
+-1071480828	aw724t8c5558x2xneC624	today
+-1071363017	Anj0oF	today
+-1071363017	Anj0oF	tomorrow
+-1070883071	0ruyd6Y50JpdGRf6HqD	tomorrow
+-1070883071	0ruyd6Y50JpdGRf6HqD	today
+-1070551679	iUR3Q	today
+-1070551679	iUR3Q	tomorrow
+-1069736047	k17Am8uPHWk02cEf1jet	tomorrow
+-1069736047	k17Am8uPHWk02cEf1jet	today

Added: hive/branches/branch-0.14/ql/src/test/results/clientpositive/acid_vectorization_project.q.out
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/test/results/clientpositive/acid_vectorization_project.q.out?rev=1633657&view=auto
==============================================================================
--- hive/branches/branch-0.14/ql/src/test/results/clientpositive/acid_vectorization_project.q.out
(added)
+++ hive/branches/branch-0.14/ql/src/test/results/clientpositive/acid_vectorization_project.q.out
Wed Oct 22 17:20:48 2014
@@ -0,0 +1,73 @@
+PREHOOK: query: CREATE TABLE acid_vectorized(a INT, b STRING, c float) CLUSTERED BY(a) INTO
2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@acid_vectorized
+POSTHOOK: query: CREATE TABLE acid_vectorized(a INT, b STRING, c float) CLUSTERED BY(a) INTO
2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@acid_vectorized
+PREHOOK: query: insert into table acid_vectorized select cint, cstring1, cfloat from alltypesorc
where cint is not null order by cint limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@acid_vectorized
+POSTHOOK: query: insert into table acid_vectorized select cint, cstring1, cfloat from alltypesorc
where cint is not null order by cint limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@acid_vectorized
+POSTHOOK: Lineage: acid_vectorized.a SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint,
type:int, comment:null), ]
+POSTHOOK: Lineage: acid_vectorized.b SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1,
type:string, comment:null), ]
+POSTHOOK: Lineage: acid_vectorized.c SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat,
type:float, comment:null), ]
+PREHOOK: query: select a,b from acid_vectorized order by a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acid_vectorized
+#### A masked pattern was here ####
+POSTHOOK: query: select a,b from acid_vectorized order by a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acid_vectorized
+#### A masked pattern was here ####
+-1073279343	oj1YrV5Wa
+-1073051226	A34p7oRr2WvUJNf
+-1072910839	0iqrc5
+-1072081801	dPkN74F7
+-1072076362	2uLyD28144vklju213J1mr
+-1071480828	aw724t8c5558x2xneC624
+-1071363017	Anj0oF
+-1070883071	0ruyd6Y50JpdGRf6HqD
+-1070551679	iUR3Q
+-1069736047	k17Am8uPHWk02cEf1jet
+PREHOOK: query: select a,c from acid_vectorized order by a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acid_vectorized
+#### A masked pattern was here ####
+POSTHOOK: query: select a,c from acid_vectorized order by a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acid_vectorized
+#### A masked pattern was here ####
+-1073279343	11.0
+-1073051226	NULL
+-1072910839	11.0
+-1072081801	NULL
+-1072076362	NULL
+-1071480828	-51.0
+-1071363017	8.0
+-1070883071	NULL
+-1070551679	NULL
+-1069736047	11.0
+PREHOOK: query: select b,c from acid_vectorized order by b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acid_vectorized
+#### A masked pattern was here ####
+POSTHOOK: query: select b,c from acid_vectorized order by b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acid_vectorized
+#### A masked pattern was here ####
+0iqrc5	11.0
+0ruyd6Y50JpdGRf6HqD	NULL
+2uLyD28144vklju213J1mr	NULL
+A34p7oRr2WvUJNf	NULL
+Anj0oF	8.0
+aw724t8c5558x2xneC624	-51.0
+dPkN74F7	NULL
+iUR3Q	NULL
+k17Am8uPHWk02cEf1jet	11.0
+oj1YrV5Wa	11.0



Mime
View raw message