hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ser...@apache.org
Subject svn commit: r1669718 [11/29] - in /hive/branches/llap: ./ ant/src/org/apache/hadoop/hive/ant/ common/src/java/org/apache/hadoop/hive/common/ common/src/java/org/apache/hadoop/hive/common/type/ common/src/java/org/apache/hadoop/hive/conf/ common/src/jav...
Date Sat, 28 Mar 2015 00:22:27 GMT
Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/SplitGrouper.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/SplitGrouper.java?rev=1669718&r1=1669717&r2=1669718&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/SplitGrouper.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/SplitGrouper.java Sat Mar 28 00:22:15 2015
@@ -26,13 +26,20 @@ import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils;
 import org.apache.hadoop.hive.ql.io.HiveInputFormat;
+import org.apache.hadoop.hive.ql.plan.MapWork;
+import org.apache.hadoop.hive.ql.plan.PartitionDesc;
 import org.apache.hadoop.mapred.FileSplit;
 import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.split.TezGroupedSplit;
 import org.apache.hadoop.mapred.split.TezMapredSplitsGrouper;
 import org.apache.tez.dag.api.TaskLocationHint;
@@ -49,8 +56,15 @@ public class SplitGrouper {
 
   private static final Log LOG = LogFactory.getLog(SplitGrouper.class);
 
+  // TODO This needs to be looked at. Map of Map to Map... Made concurrent for now since split generation
+  // can happen in parallel.
+  private static final Map<Map<String, PartitionDesc>, Map<String, PartitionDesc>> cache =
+      new ConcurrentHashMap<>();
+
   private final TezMapredSplitsGrouper tezGrouper = new TezMapredSplitsGrouper();
 
+
+
   /**
    * group splits for each bucket separately - while evenly filling all the
    * available slots with tasks
@@ -87,12 +101,83 @@ public class SplitGrouper {
     return bucketGroupedSplitMultimap;
   }
 
+
+  /**
+   * Create task location hints from a set of input splits
+   * @param splits the actual splits
+   * @return taskLocationHints - 1 per input split specified
+   * @throws IOException
+   */
+  public List<TaskLocationHint> createTaskLocationHints(InputSplit[] splits) throws IOException {
+
+    List<TaskLocationHint> locationHints = Lists.newArrayListWithCapacity(splits.length);
+
+    for (InputSplit split : splits) {
+      String rack = (split instanceof TezGroupedSplit) ? ((TezGroupedSplit) split).getRack() : null;
+      if (rack == null) {
+        if (split.getLocations() != null) {
+          locationHints.add(TaskLocationHint.createTaskLocationHint(new HashSet<String>(Arrays.asList(split
+              .getLocations())), null));
+        } else {
+          locationHints.add(TaskLocationHint.createTaskLocationHint(null, null));
+        }
+      } else {
+        locationHints.add(TaskLocationHint.createTaskLocationHint(null, Collections.singleton(rack)));
+      }
+    }
+
+    return locationHints;
+  }
+
+  /** Generate groups of splits, separated by schema evolution boundaries */
+  public Multimap<Integer, InputSplit> generateGroupedSplits(JobConf jobConf,
+                                                                    Configuration conf,
+                                                                    InputSplit[] splits,
+                                                                    float waves, int availableSlots)
+      throws Exception {
+    return generateGroupedSplits(jobConf, conf, splits, waves, availableSlots, null, true);
+  }
+
+  /** Generate groups of splits, separated by schema evolution boundaries */
+  public Multimap<Integer, InputSplit> generateGroupedSplits(JobConf jobConf,
+                                                                    Configuration conf,
+                                                                    InputSplit[] splits,
+                                                                    float waves, int availableSlots,
+                                                                    String inputName,
+                                                                    boolean groupAcrossFiles) throws
+      Exception {
+
+    MapWork work = populateMapWork(jobConf, inputName);
+    Multimap<Integer, InputSplit> bucketSplitMultiMap =
+        ArrayListMultimap.<Integer, InputSplit> create();
+
+    int i = 0;
+    InputSplit prevSplit = null;
+    for (InputSplit s : splits) {
+      // this is the bit where we make sure we don't group across partition
+      // schema boundaries
+      if (schemaEvolved(s, prevSplit, groupAcrossFiles, work)) {
+        ++i;
+        prevSplit = s;
+      }
+      bucketSplitMultiMap.put(i, s);
+    }
+    LOG.info("# Src groups for split generation: " + (i + 1));
+
+    // group them into the chunks we want
+    Multimap<Integer, InputSplit> groupedSplits =
+        this.group(jobConf, bucketSplitMultiMap, availableSlots, waves);
+
+    return groupedSplits;
+  }
+
+
   /**
    * get the size estimates for each bucket in tasks. This is used to make sure
    * we allocate the head room evenly
    */
   private Map<Integer, Integer> estimateBucketSizes(int availableSlots, float waves,
-      Map<Integer, Collection<InputSplit>> bucketSplitMap) {
+                                                    Map<Integer, Collection<InputSplit>> bucketSplitMap) {
 
     // mapping of bucket id to size of all splits in bucket in bytes
     Map<Integer, Long> bucketSizeMap = new HashMap<Integer, Long>();
@@ -147,24 +232,54 @@ public class SplitGrouper {
     return bucketTaskMap;
   }
 
-  public List<TaskLocationHint> createTaskLocationHints(InputSplit[] splits) throws IOException {
+  private static MapWork populateMapWork(JobConf jobConf, String inputName) {
+    MapWork work = null;
+    if (inputName != null) {
+      work = (MapWork) Utilities.getMergeWork(jobConf, inputName);
+      // work can still be null if there is no merge work for this input
+    }
+    if (work == null) {
+      work = Utilities.getMapWork(jobConf);
+    }
 
-    List<TaskLocationHint> locationHints = Lists.newArrayListWithCapacity(splits.length);
+    return work;
+  }
 
-    for (InputSplit split : splits) {
-      String rack = (split instanceof TezGroupedSplit) ? ((TezGroupedSplit) split).getRack() : null;
-      if (rack == null) {
-        if (split.getLocations() != null) {
-          locationHints.add(TaskLocationHint.createTaskLocationHint(new HashSet<String>(Arrays.asList(split
-              .getLocations())), null));
-        } else {
-          locationHints.add(TaskLocationHint.createTaskLocationHint(null, null));
-        }
-      } else {
-        locationHints.add(TaskLocationHint.createTaskLocationHint(null, Collections.singleton(rack)));
+  private boolean schemaEvolved(InputSplit s, InputSplit prevSplit, boolean groupAcrossFiles,
+                                       MapWork work) throws IOException {
+    boolean retval = false;
+    Path path = ((FileSplit) s).getPath();
+    PartitionDesc pd =
+        HiveFileFormatUtils.getPartitionDescFromPathRecursively(work.getPathToPartitionInfo(),
+            path, cache);
+    String currentDeserializerClass = pd.getDeserializerClassName();
+    Class<?> currentInputFormatClass = pd.getInputFileFormatClass();
+
+    Class<?> previousInputFormatClass = null;
+    String previousDeserializerClass = null;
+    if (prevSplit != null) {
+      Path prevPath = ((FileSplit) prevSplit).getPath();
+      if (!groupAcrossFiles) {
+        return !path.equals(prevPath);
       }
+      PartitionDesc prevPD =
+          HiveFileFormatUtils.getPartitionDescFromPathRecursively(work.getPathToPartitionInfo(),
+              prevPath, cache);
+      previousDeserializerClass = prevPD.getDeserializerClassName();
+      previousInputFormatClass = prevPD.getInputFileFormatClass();
     }
 
-    return locationHints;
+    if ((currentInputFormatClass != previousInputFormatClass)
+        || (!currentDeserializerClass.equals(previousDeserializerClass))) {
+      retval = true;
+    }
+
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Adding split " + path + " to src new group? " + retval);
+    }
+    return retval;
   }
+
+
+
 }

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampUtils.java?rev=1669718&r1=1669717&r2=1669718&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampUtils.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampUtils.java Sat Mar 28 00:22:15 2015
@@ -20,6 +20,8 @@ package org.apache.hadoop.hive.ql.exec.v
 
 import java.sql.Timestamp;
 
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+
 public final class TimestampUtils {
 
   /**
@@ -58,4 +60,8 @@ public final class TimestampUtils {
   public static long doubleToNanoseconds(double d) {
     return (long) (d * 1000000000);
   }
+
+  public static long daysToNanoseconds(long daysSinceEpoch) {
+    return DateWritable.daysToMillis((int) daysSinceEpoch) * 1000000;
+  }
 }

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java?rev=1669718&r1=1669717&r2=1669718&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java Sat Mar 28 00:22:15 2015
@@ -32,6 +32,8 @@ import org.apache.hadoop.hive.serde2.io.
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
 import org.apache.hadoop.hive.serde2.io.HiveCharWritable;
 import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable;
+import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable;
 import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
 import org.apache.hadoop.hive.serde2.io.ShortWritable;
 import org.apache.hadoop.hive.serde2.io.TimestampWritable;
@@ -47,6 +49,7 @@ import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
+import org.apache.hive.common.util.DateUtils;
 
 /**
  * This class is used as a static factory for VectorColumnAssign.
@@ -338,6 +341,35 @@ public class VectorColumnAssignFactory {
           }
         }.init(outputBatch, (LongColumnVector) destCol);
         break;
+      case INTERVAL_YEAR_MONTH:
+        outVCA = new VectorLongColumnAssign() {
+          @Override
+          public void assignObjectValue(Object val, int destIndex) throws HiveException {
+            if (val == null) {
+              assignNull(destIndex);
+            }
+            else {
+              HiveIntervalYearMonthWritable bw = (HiveIntervalYearMonthWritable) val;
+              assignLong(bw.getHiveIntervalYearMonth().getTotalMonths(), destIndex);
+            }
+          }
+        }.init(outputBatch, (LongColumnVector) destCol);
+        break;
+      case INTERVAL_DAY_TIME:outVCA = new VectorLongColumnAssign() {
+        @Override
+        public void assignObjectValue(Object val, int destIndex) throws HiveException {
+          if (val == null) {
+            assignNull(destIndex);
+          }
+          else {
+            HiveIntervalDayTimeWritable bw = (HiveIntervalDayTimeWritable) val;
+            assignLong(
+                DateUtils.getIntervalDayTimeTotalNanos(bw.getHiveIntervalDayTime()),
+                destIndex);
+          }
+        }
+      }.init(outputBatch, (LongColumnVector) destCol);
+      break;
       default:
         throw new HiveException("Incompatible Long vector column and primitive category " +
             category);
@@ -535,6 +567,10 @@ public class VectorColumnAssignFactory {
         vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.BINARY);
       } else if (writables[i] instanceof TimestampWritable) {
         vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.TIMESTAMP);
+      } else if (writables[i] instanceof HiveIntervalYearMonthWritable) {
+        vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.INTERVAL_YEAR_MONTH);
+      } else if (writables[i] instanceof HiveIntervalDayTimeWritable) {
+        vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.INTERVAL_DAY_TIME);
       } else if (writables[i] instanceof BooleanWritable) {
         vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.BOOLEAN);
       } else {

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java?rev=1669718&r1=1669717&r2=1669718&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java Sat Mar 28 00:22:15 2015
@@ -22,6 +22,7 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hive.common.util.AnnotationUtils;
 
 /**
@@ -66,9 +67,13 @@ public class VectorExpressionDescriptor
     STRING_FAMILY           (STRING.value | CHAR.value | VARCHAR.value),
     DATE                    (0x040),
     TIMESTAMP               (0x080),
+    INTERVAL_YEAR_MONTH     (0x100),
+    INTERVAL_DAY_TIME       (0x200),
     DATETIME_FAMILY         (DATE.value | TIMESTAMP.value),
+    INTERVAL_FAMILY         (INTERVAL_YEAR_MONTH.value | INTERVAL_DAY_TIME.value),
     INT_TIMESTAMP_FAMILY    (INT_FAMILY.value | TIMESTAMP.value),
-    INT_DATETIME_FAMILY     (INT_FAMILY.value | DATETIME_FAMILY.value),
+    INT_INTERVAL_FAMILY     (INT_FAMILY.value | INTERVAL_FAMILY.value),
+    INT_DATETIME_INTERVAL_FAMILY  (INT_FAMILY.value | DATETIME_FAMILY.value | INTERVAL_FAMILY.value),
     STRING_DATETIME_FAMILY  (STRING_FAMILY.value | DATETIME_FAMILY.value),
     ALL_FAMILY              (0xFFF);
 
@@ -105,6 +110,10 @@ public class VectorExpressionDescriptor
         return TIMESTAMP;
       } else if (lower.equals("date")) {
         return DATE;
+      } else if (lower.equals(serdeConstants.INTERVAL_YEAR_MONTH_TYPE_NAME)) {
+        return INTERVAL_YEAR_MONTH;
+      } else if (lower.equals(serdeConstants.INTERVAL_DAY_TIME_TYPE_NAME)) {
+        return INTERVAL_DAY_TIME;
       } else if (lower.equals("void")) {
         // The old code let void through...
         return INT_FAMILY;
@@ -137,7 +146,9 @@ public class VectorExpressionDescriptor
     public static String getVectorColumnSimpleName(ArgumentType argType) {
       if (argType == INT_FAMILY ||
           argType == DATE ||
-          argType == TIMESTAMP) {
+          argType == TIMESTAMP ||
+          argType == INTERVAL_YEAR_MONTH ||
+          argType == INTERVAL_DAY_TIME) {
         return "Long";
       } else if (argType == FLOAT_FAMILY) {
         return "Double";

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java?rev=1669718&r1=1669717&r2=1669718&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java Sat Mar 28 00:22:15 2015
@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.hive.ql.exec.vector;
 
+import java.util.Arrays;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
@@ -29,8 +30,12 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
+import org.apache.hadoop.hive.ql.exec.JoinUtil;
 import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer;
+import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer;
 import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor;
+import org.apache.hadoop.hive.ql.exec.persistence.ObjectContainer;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory;
@@ -40,6 +45,7 @@ import org.apache.hadoop.hive.ql.plan.Ma
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.io.DataOutputBuffer;
 
 /**
  * The vectorized version of the MapJoinOperator.
@@ -66,6 +72,7 @@ public class VectorMapJoinOperator exten
   //---------------------------------------------------------------------------
 
   private transient VectorizedRowBatch outputBatch;
+  private transient VectorizedRowBatch scratchBatch;  // holds restored (from disk) big table rows
   private transient VectorExpressionWriter[] valueWriters;
   private transient Map<ObjectInspector, VectorColumnAssign[]> outputVectorAssigners;
 
@@ -79,6 +86,10 @@ public class VectorMapJoinOperator exten
 
   private transient VectorizedRowBatchCtx vrbCtx = null;
 
+  private transient int tag;  // big table alias
+  private VectorExpressionWriter[] rowWriters;  // Writer for producing row from input batch
+  protected transient Object[] singleRow;
+
   public VectorMapJoinOperator() {
     super();
   }
@@ -115,6 +126,19 @@ public class VectorMapJoinOperator exten
 
   @Override
   public Collection<Future<?>> initializeOp(Configuration hconf) throws HiveException {
+    // Code borrowed from VectorReduceSinkOperator.initializeOp
+    VectorExpressionWriterFactory.processVectorInspector(
+        (StructObjectInspector) inputObjInspectors[0],
+        new VectorExpressionWriterFactory.SingleOIDClosure() {
+          @Override
+          public void assign(VectorExpressionWriter[] writers,
+                             ObjectInspector objectInspector) {
+            rowWriters = writers;
+            inputObjInspectors[0] = objectInspector;
+          }
+        });
+    singleRow = new Object[rowWriters.length];
+
     Collection<Future<?>> result = super.initializeOp(hconf);
 
     List<ExprNodeDesc> keyDesc = conf.getKeys().get(posBigTable);
@@ -211,15 +235,21 @@ public class VectorMapJoinOperator exten
 
   @Override
   public void closeOp(boolean aborted) throws HiveException {
+    super.closeOp(aborted);
+    for (MapJoinTableContainer tableContainer : mapJoinTables) {
+      if (tableContainer != null) {
+        tableContainer.dumpMetrics();
+      }
+    }
     if (!aborted && 0 < outputBatch.size) {
       flushOutput();
     }
   }
 
   @Override
-  protected void setMapJoinKey(ReusableGetAdaptor dest, Object row, byte alias)
+  protected JoinUtil.JoinResult setMapJoinKey(ReusableGetAdaptor dest, Object row, byte alias)
       throws HiveException {
-    dest.setFromVector(keyValues[batchIndex], keyOutputWriters, keyWrapperBatch);
+    return dest.setFromVector(keyValues[batchIndex], keyOutputWriters, keyWrapperBatch);
   }
 
   @Override
@@ -227,6 +257,12 @@ public class VectorMapJoinOperator exten
     byte alias = (byte) tag;
     VectorizedRowBatch inBatch = (VectorizedRowBatch) row;
 
+    // Preparation for hybrid grace hash join
+    this.tag = tag;
+    if (scratchBatch == null) {
+      scratchBatch = makeLike(inBatch);
+    }
+
     if (null != bigTableFilterExpressions) {
       for(VectorExpression ve:bigTableFilterExpressions) {
         ve.evaluate(inBatch);
@@ -262,4 +298,94 @@ public class VectorMapJoinOperator exten
   public VectorizationContext getOuputVectorizationContext() {
     return vOutContext;
   }
+
+  @Override
+  protected void spillBigTableRow(MapJoinTableContainer hybridHtContainer, Object row)
+      throws HiveException {
+    // Extract the actual row from row batch
+    VectorizedRowBatch inBatch = (VectorizedRowBatch) row;
+    Object[] actualRow = getRowObject(inBatch, batchIndex);
+    super.spillBigTableRow(hybridHtContainer, actualRow);
+  }
+
+  @Override
+  protected void reProcessBigTable(HybridHashTableContainer.HashPartition partition)
+      throws HiveException {
+    ObjectContainer bigTable = partition.getMatchfileObjContainer();
+
+    DataOutputBuffer dataOutputBuffer = new DataOutputBuffer();
+    while (bigTable.hasNext()) {
+      Object row = bigTable.next();
+      VectorizedBatchUtil.addProjectedRowToBatchFrom(row,
+          (StructObjectInspector) inputObjInspectors[posBigTable],
+          scratchBatch.size, scratchBatch, dataOutputBuffer);
+      scratchBatch.size++;
+
+      if (scratchBatch.size == VectorizedRowBatch.DEFAULT_SIZE) {
+        process(scratchBatch, tag); // call process once we have a full batch
+        scratchBatch.reset();
+        dataOutputBuffer.reset();
+      }
+    }
+    // Process the row batch that has less than DEFAULT_SIZE rows
+    if (scratchBatch.size > 0) {
+      process(scratchBatch, tag);
+      scratchBatch.reset();
+      dataOutputBuffer.reset();
+    }
+    bigTable.clear();
+  }
+
+  // Code borrowed from VectorReduceSinkOperator
+  private Object[] getRowObject(VectorizedRowBatch vrb, int rowIndex) throws HiveException {
+    int batchIndex = rowIndex;
+    if (vrb.selectedInUse) {
+      batchIndex = vrb.selected[rowIndex];
+    }
+    for (int i = 0; i < vrb.projectionSize; i++) {
+      ColumnVector vectorColumn = vrb.cols[vrb.projectedColumns[i]];
+      if (vectorColumn != null) {
+        singleRow[i] = rowWriters[i].writeValue(vectorColumn, batchIndex);
+      } else {
+        // Some columns from tables are not used.
+        singleRow[i] = null;
+      }
+    }
+    return singleRow;
+  }
+
+  /**
+   * Make a new (scratch) batch, which is exactly "like" the batch provided, except that it's empty
+   * @param batch the batch to imitate
+   * @return the new batch
+   * @throws HiveException
+   */
+  VectorizedRowBatch makeLike(VectorizedRowBatch batch) throws HiveException {
+    VectorizedRowBatch newBatch = new VectorizedRowBatch(batch.numCols);
+    for (int i = 0; i < batch.numCols; i++) {
+      ColumnVector colVector = batch.cols[i];
+      if (colVector != null) {
+        ColumnVector newColVector;
+        if (colVector instanceof LongColumnVector) {
+          newColVector = new LongColumnVector();
+        } else if (colVector instanceof DoubleColumnVector) {
+          newColVector = new DoubleColumnVector();
+        } else if (colVector instanceof BytesColumnVector) {
+          newColVector = new BytesColumnVector();
+        } else if (colVector instanceof DecimalColumnVector) {
+          DecimalColumnVector decColVector = (DecimalColumnVector) colVector;
+          newColVector = new DecimalColumnVector(decColVector.precision, decColVector.scale);
+        } else {
+          throw new HiveException("Column vector class " + colVector.getClass().getName() +
+          " is not supported!");
+        }
+        newBatch.cols[i] = newColVector;
+        newBatch.cols[i].init();
+      }
+    }
+    newBatch.projectedColumns = Arrays.copyOf(batch.projectedColumns, batch.projectedColumns.length);
+    newBatch.projectionSize = batch.projectionSize;
+    newBatch.reset();
+    return newBatch;
+  }
 }

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java?rev=1669718&r1=1669717&r2=1669718&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java Sat Mar 28 00:22:15 2015
@@ -38,6 +38,8 @@ import org.apache.commons.logging.LogFac
 import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
 import org.apache.hadoop.hive.common.type.HiveChar;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
+import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
 import org.apache.hadoop.hive.common.type.HiveVarchar;
 import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
 import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
@@ -113,6 +115,7 @@ import org.apache.hadoop.hive.serde2.typ
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
 import org.apache.hadoop.util.StringUtils;
+import org.apache.hive.common.util.DateUtils;
 
 /**
  * Context class for vectorization execution.
@@ -253,6 +256,8 @@ public class VectorizationContext {
     castExpressionUdfs.add(GenericUDFToChar.class);
     castExpressionUdfs.add(GenericUDFToVarchar.class);
     castExpressionUdfs.add(GenericUDFTimestamp.class);
+    castExpressionUdfs.add(GenericUDFToIntervalYearMonth.class);
+    castExpressionUdfs.add(GenericUDFToIntervalDayTime.class);
     castExpressionUdfs.add(UDFToByte.class);
     castExpressionUdfs.add(UDFToBoolean.class);
     castExpressionUdfs.add(UDFToDouble.class);
@@ -658,6 +663,12 @@ public class VectorizationContext {
       case TIMESTAMP:
         genericUdf = new GenericUDFToUnixTimeStamp();
         break;
+      case INTERVAL_YEAR_MONTH:
+        genericUdf = new GenericUDFToIntervalYearMonth();
+        break;
+      case INTERVAL_DAY_TIME:
+        genericUdf = new GenericUDFToIntervalDayTime();
+        break;
       case BINARY:
         genericUdf = new GenericUDFToBinary();
         break;
@@ -871,8 +882,16 @@ public class VectorizationContext {
     switch (vectorArgType) {
     case INT_FAMILY:
       return new ConstantVectorExpression(outCol, ((Number) constantValue).longValue());
+    case DATE:
+      return new ConstantVectorExpression(outCol, DateWritable.dateToDays((Date) constantValue));
     case TIMESTAMP:
       return new ConstantVectorExpression(outCol, TimestampUtils.getTimeNanoSec((Timestamp) constantValue));
+    case INTERVAL_YEAR_MONTH:
+      return new ConstantVectorExpression(outCol,
+          ((HiveIntervalYearMonth) constantValue).getTotalMonths());
+    case INTERVAL_DAY_TIME:
+      return new ConstantVectorExpression(outCol,
+          DateUtils.getIntervalDayTimeTotalNanos((HiveIntervalDayTime) constantValue));
     case FLOAT_FAMILY:
       return new ConstantVectorExpression(outCol, ((Number) constantValue).doubleValue());
     case DECIMAL:
@@ -1773,6 +1792,14 @@ public class VectorizationContext {
     return resultType.equalsIgnoreCase("date");
   }
 
+  public static boolean isIntervalYearMonthFamily(String resultType) {
+    return resultType.equalsIgnoreCase("interval_year_month");
+  }
+
+  public static boolean isIntervalDayTimeFamily(String resultType) {
+    return resultType.equalsIgnoreCase("interval_day_time");
+  }
+
   // return true if this is any kind of float
   public static boolean isFloatFamily(String resultType) {
     return resultType.equalsIgnoreCase("double")
@@ -1843,12 +1870,19 @@ public class VectorizationContext {
 
   private Object getVectorTypeScalarValue(ExprNodeConstantDesc constDesc) throws HiveException {
     String t = constDesc.getTypeInfo().getTypeName();
-    if (isTimestampFamily(t)) {
-      return TimestampUtils.getTimeNanoSec((Timestamp) getScalarValue(constDesc));
-    } else if (isDateFamily(t)) {
-      return DateWritable.dateToDays((Date) getScalarValue(constDesc));
-    } else {
-      return getScalarValue(constDesc);
+    VectorExpression.Type type = VectorExpression.Type.getValue(t);
+    Object scalarValue = getScalarValue(constDesc);
+    switch (type) {
+      case TIMESTAMP:
+        return TimestampUtils.getTimeNanoSec((Timestamp) scalarValue);
+      case DATE:
+        return DateWritable.dateToDays((Date) scalarValue);
+      case INTERVAL_YEAR_MONTH:
+        return ((HiveIntervalYearMonth) scalarValue).getTotalMonths();
+      case INTERVAL_DAY_TIME:
+        return DateUtils.getIntervalDayTimeTotalNanos((HiveIntervalDayTime) scalarValue);
+      default:
+        return scalarValue;
     }
   }
 
@@ -1935,6 +1969,9 @@ public class VectorizationContext {
       return "Date";
     case TIMESTAMP:
       return "Timestamp";
+    case INTERVAL_YEAR_MONTH:
+    case INTERVAL_DAY_TIME:
+      return hiveTypeName;
     default:
       return "None";
     }
@@ -1959,6 +1996,9 @@ public class VectorizationContext {
       return "Date";
     case TIMESTAMP:
       return "Timestamp";
+    case INTERVAL_YEAR_MONTH:
+    case INTERVAL_DAY_TIME:
+      return hiveTypeName;
     default:
       return "None";
     }
@@ -1969,16 +2009,16 @@ public class VectorizationContext {
   // TODO:   And, investigate if different reduce-side versions are needed for var* and std*, or if map-side aggregate can be used..  Right now they are conservatively
   //         marked map-side (HASH).
   static ArrayList<AggregateDefinition> aggregatesDefinition = new ArrayList<AggregateDefinition>() {{
-    add(new AggregateDefinition("min",         VectorExpressionDescriptor.ArgumentType.INT_DATETIME_FAMILY,    null,                          VectorUDAFMinLong.class));
+    add(new AggregateDefinition("min",         VectorExpressionDescriptor.ArgumentType.INT_DATETIME_INTERVAL_FAMILY,    null,                          VectorUDAFMinLong.class));
     add(new AggregateDefinition("min",         VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY,           null,                          VectorUDAFMinDouble.class));
     add(new AggregateDefinition("min",         VectorExpressionDescriptor.ArgumentType.STRING_FAMILY,          null,                          VectorUDAFMinString.class));
     add(new AggregateDefinition("min",         VectorExpressionDescriptor.ArgumentType.DECIMAL,                null,                          VectorUDAFMinDecimal.class));
-    add(new AggregateDefinition("max",         VectorExpressionDescriptor.ArgumentType.INT_DATETIME_FAMILY,    null,                          VectorUDAFMaxLong.class));
+    add(new AggregateDefinition("max",         VectorExpressionDescriptor.ArgumentType.INT_DATETIME_INTERVAL_FAMILY,    null,                          VectorUDAFMaxLong.class));
     add(new AggregateDefinition("max",         VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY,           null,                          VectorUDAFMaxDouble.class));
     add(new AggregateDefinition("max",         VectorExpressionDescriptor.ArgumentType.STRING_FAMILY,          null,                          VectorUDAFMaxString.class));
     add(new AggregateDefinition("max",         VectorExpressionDescriptor.ArgumentType.DECIMAL,                null,                          VectorUDAFMaxDecimal.class));
     add(new AggregateDefinition("count",       VectorExpressionDescriptor.ArgumentType.NONE,                   GroupByDesc.Mode.HASH,         VectorUDAFCountStar.class));
-    add(new AggregateDefinition("count",       VectorExpressionDescriptor.ArgumentType.INT_DATETIME_FAMILY,    GroupByDesc.Mode.HASH,         VectorUDAFCount.class));
+    add(new AggregateDefinition("count",       VectorExpressionDescriptor.ArgumentType.INT_DATETIME_INTERVAL_FAMILY,    GroupByDesc.Mode.HASH,         VectorUDAFCount.class));
     add(new AggregateDefinition("count",       VectorExpressionDescriptor.ArgumentType.INT_FAMILY,             GroupByDesc.Mode.MERGEPARTIAL, VectorUDAFCountMerge.class));
     add(new AggregateDefinition("count",       VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY,           GroupByDesc.Mode.HASH,         VectorUDAFCount.class));
     add(new AggregateDefinition("count",       VectorExpressionDescriptor.ArgumentType.STRING_FAMILY,          GroupByDesc.Mode.HASH,         VectorUDAFCount.class));

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java?rev=1669718&r1=1669717&r2=1669718&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java Sat Mar 28 00:22:15 2015
@@ -26,6 +26,8 @@ import java.util.List;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.common.type.HiveChar;
+import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
+import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
 import org.apache.hadoop.hive.common.type.HiveVarchar;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -34,6 +36,8 @@ import org.apache.hadoop.hive.serde2.io.
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
 import org.apache.hadoop.hive.serde2.io.HiveCharWritable;
 import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable;
+import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable;
 import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
 import org.apache.hadoop.hive.serde2.io.ShortWritable;
 import org.apache.hadoop.hive.serde2.io.TimestampWritable;
@@ -50,6 +54,7 @@ import org.apache.hadoop.io.FloatWritabl
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
+import org.apache.hive.common.util.DateUtils;
 
 public class VectorizedBatchUtil {
   private static final Log LOG = LogFactory.getLog(VectorizedBatchUtil.class);
@@ -126,6 +131,8 @@ public class VectorizedBatchUtil {
         case LONG:
         case TIMESTAMP:
         case DATE:
+        case INTERVAL_YEAR_MONTH:
+        case INTERVAL_DAY_TIME:
           cvList.add(new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE));
           break;
         case FLOAT:
@@ -235,11 +242,31 @@ public class VectorizedBatchUtil {
     final int off = colOffset;
     // Iterate thru the cols and load the batch
     for (int i = 0; i < fieldRefs.size(); i++) {
-      setVector(row, oi, fieldRefs, batch, buffer, rowIndex, i, off);
+      setVector(row, oi, fieldRefs.get(i), batch, buffer, rowIndex, i, off);
     }
   }
 
   /**
+   * Add only the projected column of a regular row to the specified vectorized row batch
+   * @param row the regular row
+   * @param oi object inspector for the row
+   * @param rowIndex the offset to add in the batch
+   * @param batch vectorized row batch
+   * @param buffer data output buffer
+   * @throws HiveException
+   */
+  public static void addProjectedRowToBatchFrom(Object row, StructObjectInspector oi,
+      int rowIndex, VectorizedRowBatch batch, DataOutputBuffer buffer) throws HiveException {
+    List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
+    for (int i = 0; i < fieldRefs.size(); i++) {
+      int projectedOutputCol = batch.projectedColumns[i];
+      if (batch.cols[projectedOutputCol] == null) {
+        continue;
+      }
+      setVector(row, oi, fieldRefs.get(i), batch, buffer, rowIndex, projectedOutputCol, 0);
+    }
+  }
+  /**
    * Iterates thru all the columns in a given row and populates the batch
    * from a given offset
    *
@@ -268,21 +295,21 @@ public class VectorizedBatchUtil {
         // The value will have already been set before we're called, so don't overwrite it
         continue;
       }
-      setVector(row, oi, fieldRefs, batch, buffer, rowIndex, i, 0);
+      setVector(row, oi, fieldRefs.get(i), batch, buffer, rowIndex, i, 0);
     }
   }
 
   private static void setVector(Object row,
                                 StructObjectInspector oi,
-                                List<? extends StructField> fieldRefs,
+                                StructField field,
                                 VectorizedRowBatch batch,
                                 DataOutputBuffer buffer,
                                 int rowIndex,
                                 int colIndex,
                                 int offset) throws HiveException {
 
-    Object fieldData = oi.getStructFieldData(row, fieldRefs.get(colIndex));
-    ObjectInspector foi = fieldRefs.get(colIndex).getFieldObjectInspector();
+    Object fieldData = oi.getStructFieldData(row, field);
+    ObjectInspector foi = field.getFieldObjectInspector();
 
     // Vectorization only supports PRIMITIVE data types. Assert the same
     assert (foi.getCategory() == Category.PRIMITIVE);
@@ -390,6 +417,30 @@ public class VectorizedBatchUtil {
         lcv.isNull[rowIndex] = false;
       } else {
         lcv.vector[rowIndex] = 1;
+        setNullColIsNullValue(lcv, rowIndex);
+      }
+    }
+      break;
+    case INTERVAL_YEAR_MONTH: {
+      LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
+      if (writableCol != null) {
+        HiveIntervalYearMonth i = ((HiveIntervalYearMonthWritable) writableCol).getHiveIntervalYearMonth();
+        lcv.vector[rowIndex] = i.getTotalMonths();
+        lcv.isNull[rowIndex] = false;
+      } else {
+        lcv.vector[rowIndex] = 1;
+        setNullColIsNullValue(lcv, rowIndex);
+      }
+    }
+      break;
+    case INTERVAL_DAY_TIME: {
+      LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
+      if (writableCol != null) {
+        HiveIntervalDayTime i = ((HiveIntervalDayTimeWritable) writableCol).getHiveIntervalDayTime();
+        lcv.vector[rowIndex] = DateUtils.getIntervalDayTimeTotalNanos(i);
+        lcv.isNull[rowIndex] = false;
+      } else {
+        lcv.vector[rowIndex] = 1;
         setNullColIsNullValue(lcv, rowIndex);
       }
     }

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java?rev=1669718&r1=1669717&r2=1669718&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java Sat Mar 28 00:22:15 2015
@@ -35,6 +35,8 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
+import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
 import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
 import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils;
@@ -42,6 +44,7 @@ import org.apache.hadoop.hive.ql.io.IOPr
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.plan.MapredWork;
 import org.apache.hadoop.hive.ql.plan.PartitionDesc;
+import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
 import org.apache.hadoop.hive.serde2.Deserializer;
 import org.apache.hadoop.hive.serde2.SerDeException;
@@ -61,6 +64,7 @@ import org.apache.hadoop.hive.serde2.typ
 import org.apache.hadoop.io.DataOutputBuffer;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hive.common.util.DateUtils;
 
 /**
  * Context for Vectorized row batch. this calss does eager deserialization of row data using serde
@@ -300,6 +304,8 @@ public class VectorizedRowBatchCtx {
           case LONG:
           case TIMESTAMP:
           case DATE:
+          case INTERVAL_YEAR_MONTH:
+          case INTERVAL_DAY_TIME:
             result.cols[j] = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
             break;
           case FLOAT:
@@ -501,7 +507,31 @@ public class VectorizedRowBatchCtx {
           }
         }
         break;
-        
+
+        case INTERVAL_YEAR_MONTH: {
+          LongColumnVector lcv = (LongColumnVector) batch.cols[colIndex];
+          if (value == null) {
+            lcv.noNulls = false;
+            lcv.isNull[0] = true;
+            lcv.isRepeating = true;
+          } else {
+            lcv.fill(((HiveIntervalYearMonth) value).getTotalMonths());
+            lcv.isNull[0] = false;
+          }
+        }
+
+        case INTERVAL_DAY_TIME: {
+          LongColumnVector lcv = (LongColumnVector) batch.cols[colIndex];
+          if (value == null) {
+            lcv.noNulls = false;
+            lcv.isNull[0] = true;
+            lcv.isRepeating = true;
+          } else {
+            lcv.fill(DateUtils.getIntervalDayTimeTotalNanos((HiveIntervalDayTime) value));
+            lcv.isNull[0] = false;
+          }
+        }
+
         case FLOAT: {
           DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[colIndex];
           if (value == null) {
@@ -635,7 +665,9 @@ public class VectorizedRowBatchCtx {
       return new DecimalColumnVector(defaultSize, precisionScale[0], precisionScale[1]);
     } else if (type.equalsIgnoreCase("long") ||
                type.equalsIgnoreCase("date") ||
-               type.equalsIgnoreCase("timestamp")) {
+               type.equalsIgnoreCase("timestamp") ||
+               type.equalsIgnoreCase(serdeConstants.INTERVAL_YEAR_MONTH_TYPE_NAME) ||
+               type.equalsIgnoreCase(serdeConstants.INTERVAL_DAY_TIME_TYPE_NAME)) {
       return new LongColumnVector(defaultSize);
     } else {
       throw new Error("Cannot allocate vector column for " + type);

Added: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java?rev=1669718&view=auto
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java (added)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java Sat Mar 28 00:22:15 2015
@@ -0,0 +1,160 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hive.common.util.DateUtils;
+
+
+/**
+ * Casts a string vector to a interval day-time vector.
+ */
+public class CastStringToIntervalDayTime extends VectorExpression {
+  private static final long serialVersionUID = 1L;
+
+  private int inputColumn;
+  private int outputColumn;
+
+  public CastStringToIntervalDayTime() {
+
+  }
+
+  public CastStringToIntervalDayTime(int inputColumn, int outputColumn) {
+    this.inputColumn = inputColumn;
+    this.outputColumn = outputColumn;
+  }
+
+  @Override
+  public void evaluate(VectorizedRowBatch batch) {
+
+    if (childExpressions != null) {
+      super.evaluateChildren(batch);
+    }
+
+    BytesColumnVector inV = (BytesColumnVector) batch.cols[inputColumn];
+    int[] sel = batch.selected;
+    int n = batch.size;
+    LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn];
+
+    if (n == 0) {
+
+      // Nothing to do
+      return;
+    }
+
+    if (inV.noNulls) {
+      outV.noNulls = true;
+      if (inV.isRepeating) {
+        outV.isRepeating = true;
+        evaluate(outV, inV, 0);
+      } else if (batch.selectedInUse) {
+        for(int j = 0; j != n; j++) {
+          int i = sel[j];
+          evaluate(outV, inV, i);
+        }
+        outV.isRepeating = false;
+      } else {
+        for(int i = 0; i != n; i++) {
+          evaluate(outV, inV, i);
+        }
+        outV.isRepeating = false;
+      }
+    } else {
+
+      // Handle case with nulls. Don't do function if the value is null,
+      // because the data may be undefined for a null value.
+      outV.noNulls = false;
+      if (inV.isRepeating) {
+        outV.isRepeating = true;
+        outV.isNull[0] = inV.isNull[0];
+        if (!inV.isNull[0]) {
+          evaluate(outV, inV, 0);
+        }
+      } else if (batch.selectedInUse) {
+        for(int j = 0; j != n; j++) {
+          int i = sel[j];
+          outV.isNull[i] = inV.isNull[i];
+          if (!inV.isNull[i]) {
+            evaluate(outV, inV, i);
+          }
+        }
+        outV.isRepeating = false;
+      } else {
+        System.arraycopy(inV.isNull, 0, outV.isNull, 0, n);
+        for(int i = 0; i != n; i++) {
+          if (!inV.isNull[i]) {
+            evaluate(outV, inV, i);
+          }
+        }
+        outV.isRepeating = false;
+      }
+    }
+  }
+
+  private void evaluate(LongColumnVector outV, BytesColumnVector inV, int i) {
+    try {
+      HiveIntervalDayTime interval = HiveIntervalDayTime.valueOf(
+          new String(inV.vector[i], inV.start[i], inV.length[i], "UTF-8"));
+      outV.vector[i] = DateUtils.getIntervalDayTimeTotalNanos(interval);
+    } catch (Exception e) {
+      outV.vector[i] = 1;
+      outV.isNull[i] = true;
+      outV.noNulls = false;
+    }
+  }
+
+  @Override
+  public int getOutputColumn() {
+    return outputColumn;
+  }
+
+  public void setOutputColumn(int outputColumn) {
+    this.outputColumn = outputColumn;
+  }
+
+  public int getInputColumn() {
+    return inputColumn;
+  }
+
+  public void setInputColumn(int inputColumn) {
+    this.inputColumn = inputColumn;
+  }
+
+  @Override
+  public String getOutputType() {
+    return serdeConstants.INTERVAL_DAY_TIME_TYPE_NAME;
+  }
+
+  @Override
+  public VectorExpressionDescriptor.Descriptor getDescriptor() {
+    VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder();
+    b.setMode(VectorExpressionDescriptor.Mode.PROJECTION)
+        .setNumArguments(1)
+        .setArgumentTypes(
+            VectorExpressionDescriptor.ArgumentType.STRING_FAMILY)
+        .setInputExpressionTypes(
+            VectorExpressionDescriptor.InputExpressionType.COLUMN);
+    return b.build();
+  }
+}

Added: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalYearMonth.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalYearMonth.java?rev=1669718&view=auto
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalYearMonth.java (added)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalYearMonth.java Sat Mar 28 00:22:15 2015
@@ -0,0 +1,159 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.serde.serdeConstants;
+
+
+/**
+ * Casts a string vector to a interval year-month vector.
+ */
+public class CastStringToIntervalYearMonth extends VectorExpression {
+  private static final long serialVersionUID = 1L;
+
+  private int inputColumn;
+  private int outputColumn;
+
+  public CastStringToIntervalYearMonth() {
+
+  }
+
+  public CastStringToIntervalYearMonth(int inputColumn, int outputColumn) {
+    this.inputColumn = inputColumn;
+    this.outputColumn = outputColumn;
+  }
+
+  @Override
+  public void evaluate(VectorizedRowBatch batch) {
+
+    if (childExpressions != null) {
+      super.evaluateChildren(batch);
+    }
+
+    BytesColumnVector inV = (BytesColumnVector) batch.cols[inputColumn];
+    int[] sel = batch.selected;
+    int n = batch.size;
+    LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn];
+
+    if (n == 0) {
+
+      // Nothing to do
+      return;
+    }
+
+    if (inV.noNulls) {
+      outV.noNulls = true;
+      if (inV.isRepeating) {
+        outV.isRepeating = true;
+        evaluate(outV, inV, 0);
+      } else if (batch.selectedInUse) {
+        for(int j = 0; j != n; j++) {
+          int i = sel[j];
+          evaluate(outV, inV, i);
+        }
+        outV.isRepeating = false;
+      } else {
+        for(int i = 0; i != n; i++) {
+          evaluate(outV, inV, i);
+        }
+        outV.isRepeating = false;
+      }
+    } else {
+
+      // Handle case with nulls. Don't do function if the value is null,
+      // because the data may be undefined for a null value.
+      outV.noNulls = false;
+      if (inV.isRepeating) {
+        outV.isRepeating = true;
+        outV.isNull[0] = inV.isNull[0];
+        if (!inV.isNull[0]) {
+          evaluate(outV, inV, 0);
+        }
+      } else if (batch.selectedInUse) {
+        for(int j = 0; j != n; j++) {
+          int i = sel[j];
+          outV.isNull[i] = inV.isNull[i];
+          if (!inV.isNull[i]) {
+            evaluate(outV, inV, i);
+          }
+        }
+        outV.isRepeating = false;
+      } else {
+        System.arraycopy(inV.isNull, 0, outV.isNull, 0, n);
+        for(int i = 0; i != n; i++) {
+          if (!inV.isNull[i]) {
+            evaluate(outV, inV, i);
+          }
+        }
+        outV.isRepeating = false;
+      }
+    }
+  }
+
+  private void evaluate(LongColumnVector outV, BytesColumnVector inV, int i) {
+    try {
+      HiveIntervalYearMonth interval = HiveIntervalYearMonth.valueOf(
+          new String(inV.vector[i], inV.start[i], inV.length[i], "UTF-8"));
+      outV.vector[i] = interval.getTotalMonths();
+    } catch (Exception e) {
+      outV.vector[i] = 1;
+      outV.isNull[i] = true;
+      outV.noNulls = false;
+    }
+  }
+
+  @Override
+  public int getOutputColumn() {
+    return outputColumn;
+  }
+
+  public void setOutputColumn(int outputColumn) {
+    this.outputColumn = outputColumn;
+  }
+
+  public int getInputColumn() {
+    return inputColumn;
+  }
+
+  public void setInputColumn(int inputColumn) {
+    this.inputColumn = inputColumn;
+  }
+
+  @Override
+  public String getOutputType() {
+    return serdeConstants.INTERVAL_YEAR_MONTH_TYPE_NAME;
+  }
+
+  @Override
+  public VectorExpressionDescriptor.Descriptor getDescriptor() {
+    VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder();
+    b.setMode(VectorExpressionDescriptor.Mode.PROJECTION)
+        .setNumArguments(1)
+        .setArgumentTypes(
+            VectorExpressionDescriptor.ArgumentType.STRING_FAMILY)
+        .setInputExpressionTypes(
+            VectorExpressionDescriptor.InputExpressionType.COLUMN);
+    return b.build();
+  }
+}

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetBytes.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetBytes.java?rev=1669718&r1=1669717&r2=1669718&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetBytes.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetBytes.java Sat Mar 28 00:22:15 2015
@@ -206,7 +206,7 @@ public class CuckooSetBytes {
     // Save original values
     if (prev1 == null) {
       prev1 = t1;
-      prev1 = t2;
+      prev2 = t2;
     }
     t1 = new byte[n][];
     t2 = new byte[n][];

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetDouble.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetDouble.java?rev=1669718&r1=1669717&r2=1669718&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetDouble.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetDouble.java Sat Mar 28 00:22:15 2015
@@ -18,8 +18,6 @@
 
 package org.apache.hadoop.hive.ql.exec.vector.expressions;
 
-import java.util.Arrays;
-import java.util.Random;
 
 /**
  * A high-performance set implementation used to support fast set membership testing,

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetLong.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetLong.java?rev=1669718&r1=1669717&r2=1669718&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetLong.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetLong.java Sat Mar 28 00:22:15 2015
@@ -244,7 +244,7 @@ public class CuckooSetLong {
     // Save original values
     if (prev1 == null) {
       prev1 = t1;
-      prev1 = t2;
+      prev2 = t2;
     }
     t1 = new long[n];
     t2 = new long[n];

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathExpr.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathExpr.java?rev=1669718&r1=1669717&r2=1669718&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathExpr.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathExpr.java Sat Mar 28 00:22:15 2015
@@ -75,11 +75,11 @@ public class MathExpr {
     return v == 0.0D ? 0L : 1L;
   }
 
-  /* Convert an integer value in miliseconds since the epoch to a timestamp value
+  /* Convert an integer value in seconds since the epoch to a timestamp value
    * for use in a long column vector, which is represented in nanoseconds since the epoch.
    */
   public static long longToTimestamp(long v) {
-    return v * 1000000;
+    return v * 1000000000;
   }
 
   // Convert seconds since the epoch (with fraction) to nanoseconds, as a long integer.

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java?rev=1669718&r1=1669717&r2=1669718&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java Sat Mar 28 00:22:15 2015
@@ -30,7 +30,8 @@ import org.apache.hadoop.hive.ql.exec.ve
  */
 public abstract class VectorExpression implements Serializable {
   public enum Type {
-    STRING, CHAR, VARCHAR, TIMESTAMP, DATE, LONG, DOUBLE, DECIMAL, OTHER;
+    STRING, CHAR, VARCHAR, TIMESTAMP, DATE, LONG, DOUBLE, DECIMAL,
+    INTERVAL_YEAR_MONTH, INTERVAL_DAY_TIME, OTHER;
     private static Map<String, Type> types = ImmutableMap.<String, Type>builder()
         .put("string", STRING)
         .put("char", CHAR)
@@ -40,6 +41,8 @@ public abstract class VectorExpression i
         .put("long", LONG)
         .put("double", DOUBLE)
         .put("decimal", DECIMAL)
+        .put("interval_year_month", INTERVAL_YEAR_MONTH)
+        .put("interval_day_time", INTERVAL_DAY_TIME)
         .build();
 
     public static Type getValue(String name) {

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java?rev=1669718&r1=1669717&r2=1669718&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java Sat Mar 28 00:22:15 2015
@@ -28,6 +28,8 @@ import org.apache.commons.lang.ArrayUtil
 import org.apache.commons.lang.StringUtils;
 import org.apache.hadoop.hive.common.type.HiveChar;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
+import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
 import org.apache.hadoop.hive.common.type.HiveVarchar;
 import org.apache.hadoop.hive.ql.exec.vector.*;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -47,6 +49,8 @@ import org.apache.hadoop.hive.serde2.obj
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableFloatObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveCharObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveDecimalObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveIntervalDayTimeObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveIntervalYearMonthObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveVarcharObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableIntObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableLongObjectInspector;
@@ -56,6 +60,7 @@ import org.apache.hadoop.hive.serde2.obj
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.VoidObjectInspector;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
 import org.apache.hadoop.io.Text;
+import org.apache.hive.common.util.DateUtils;
 
 /**
  * VectorExpressionWritableFactory helper class for generating VectorExpressionWritable objects.
@@ -430,6 +435,12 @@ public final class VectorExpressionWrite
           case DATE:
             return genVectorExpressionWritableDate(
                 (SettableDateObjectInspector) fieldObjInspector);
+          case INTERVAL_YEAR_MONTH:
+            return genVectorExpressionWritableIntervalYearMonth(
+                (SettableHiveIntervalYearMonthObjectInspector) fieldObjInspector);
+          case INTERVAL_DAY_TIME:
+            return genVectorExpressionWritableIntervalDayTime(
+                (SettableHiveIntervalDayTimeObjectInspector) fieldObjInspector);
           case DECIMAL:
             return genVectorExpressionWritableDecimal(
                 (SettableHiveDecimalObjectInspector) fieldObjInspector);
@@ -586,6 +597,84 @@ public final class VectorExpressionWrite
       }
    }.init(fieldObjInspector);
   }
+
+  private static VectorExpressionWriter genVectorExpressionWritableIntervalYearMonth(
+      SettableHiveIntervalYearMonthObjectInspector fieldObjInspector) throws HiveException {
+    return new VectorExpressionWriterLong() {
+      private Object obj;
+      private HiveIntervalYearMonth interval;
+
+      public VectorExpressionWriter init(SettableHiveIntervalYearMonthObjectInspector objInspector)
+          throws HiveException {
+        super.init(objInspector);
+        interval = new HiveIntervalYearMonth();
+        obj = initValue(null);
+        return this;
+      }
+
+      @Override
+      public Object writeValue(long value) {
+        interval.set((int) value);
+        ((SettableHiveIntervalYearMonthObjectInspector) this.objectInspector).set(obj, interval);
+        return obj;
+      }
+
+      @Override
+      public Object setValue(Object field, long value) {
+        if (null == field) {
+          field = initValue(null);
+        }
+        interval.set((int) value);
+        ((SettableHiveIntervalYearMonthObjectInspector) this.objectInspector).set(field, interval);
+        return field;
+      }
+
+      @Override
+      public Object initValue(Object ignored) {
+        return ((SettableHiveIntervalYearMonthObjectInspector) this.objectInspector)
+            .create(new HiveIntervalYearMonth());
+      }
+   }.init(fieldObjInspector);
+  }
+
+  private static VectorExpressionWriter genVectorExpressionWritableIntervalDayTime(
+      SettableHiveIntervalDayTimeObjectInspector fieldObjInspector) throws HiveException {
+    return new VectorExpressionWriterLong() {
+      private Object obj;
+      private HiveIntervalDayTime interval;
+
+      public VectorExpressionWriter init(SettableHiveIntervalDayTimeObjectInspector objInspector)
+          throws HiveException {
+        super.init(objInspector);
+        interval = new HiveIntervalDayTime();
+        obj = initValue(null);
+        return this;
+      }
+
+      @Override
+      public Object writeValue(long value) {
+        DateUtils.setIntervalDayTimeTotalNanos(interval, value);
+        ((SettableHiveIntervalDayTimeObjectInspector) this.objectInspector).set(obj, interval);
+        return obj;
+      }
+
+      @Override
+      public Object setValue(Object field, long value) {
+        if (null == field) {
+          field = initValue(null);
+        }
+        DateUtils.setIntervalDayTimeTotalNanos(interval, value);
+        ((SettableHiveIntervalDayTimeObjectInspector) this.objectInspector).set(field, interval);
+        return field;
+      }
+
+      @Override
+      public Object initValue(Object ignored) {
+        return ((SettableHiveIntervalDayTimeObjectInspector) this.objectInspector)
+            .create(new HiveIntervalDayTime());
+      }
+   }.init(fieldObjInspector);
+  }
 
   private static VectorExpressionWriter genVectorExpressionWritableChar(
         SettableHiveCharObjectInspector fieldObjInspector) throws HiveException {

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/hooks/HookUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/hooks/HookUtils.java?rev=1669718&r1=1669717&r2=1669718&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/hooks/HookUtils.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/hooks/HookUtils.java Sat Mar 28 00:22:15 2015
@@ -65,4 +65,19 @@ public class HookUtils {
     return hooks;
   }
 
+  public static String redactLogString(HiveConf conf, String logString)
+      throws InstantiationException, IllegalAccessException, ClassNotFoundException {
+
+    String redactedString = logString;
+
+    if (conf != null && logString != null) {
+      List<Redactor> queryRedactors = getHooks(conf, ConfVars.QUERYREDACTORHOOKS, Redactor.class);
+      for (Redactor redactor : queryRedactors) {
+        redactor.setConf(conf);
+        redactedString = redactor.redactQuery(redactedString);
+      }
+    }
+
+    return redactedString;
+  }
 }

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/VectorizedRCFileRecordReader.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/VectorizedRCFileRecordReader.java?rev=1669718&r1=1669717&r2=1669718&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/VectorizedRCFileRecordReader.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/VectorizedRCFileRecordReader.java Sat Mar 28 00:22:15 2015
@@ -146,11 +146,11 @@ public class VectorizedRCFileRecordReade
 
   @Override
   public VectorizedRowBatch createValue() {
-    VectorizedRowBatch result = null;
+    VectorizedRowBatch result;
     try {
       result = rbCtx.createVectorizedRowBatch();
     } catch (HiveException e) {
-      new RuntimeException("Error creating a batch", e);
+      throw new RuntimeException("Error creating a batch", e);
     }
     return result;
   }
@@ -193,7 +193,7 @@ public class VectorizedRCFileRecordReade
         }
       }
     } catch (Exception e) {
-      new RuntimeException("Error while getting next row", e);
+      throw new RuntimeException("Error while getting next row", e);
     }
     value.size = i;
     return more;

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java?rev=1669718&r1=1669717&r2=1669718&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java Sat Mar 28 00:22:15 2015
@@ -50,7 +50,7 @@ import org.codehaus.jettison.json.JSONWr
  * A tool for printing out the file structure of ORC files.
  */
 public final class FileDump {
-  private static final String ROWINDEX_PREFIX = "--rowindex=";
+  private static final String UNKNOWN = "UNKNOWN";
 
   // not used
   private FileDump() {}
@@ -77,9 +77,13 @@ public final class FileDump {
       }
     }
 
+    boolean printTimeZone = false;
+    if (cli.hasOption('t')) {
+      printTimeZone = true;
+    }
     String[] files = cli.getArgs();
     if (dumpData) printData(Arrays.asList(files), conf);
-    else printMetaData(Arrays.asList(files), conf, rowIndexCols);
+    else printMetaData(Arrays.asList(files), conf, rowIndexCols, printTimeZone);
   }
 
   private static void printData(List<String> files, Configuration conf) throws IOException,
@@ -90,7 +94,7 @@ public final class FileDump {
   }
 
   private static void printMetaData(List<String> files, Configuration conf,
-                                    List<Integer> rowIndexCols) throws IOException {
+      List<Integer> rowIndexCols, boolean printTimeZone) throws IOException {
     for (String filename : files) {
       System.out.println("Structure for " + filename);
       Path path = new Path(filename);
@@ -125,11 +129,19 @@ public final class FileDump {
       for (StripeInformation stripe : reader.getStripes()) {
         ++stripeIx;
         long stripeStart = stripe.getOffset();
-        System.out.println("  Stripe: " + stripe.toString());
         OrcProto.StripeFooter footer = rows.readStripeFooter(stripe);
+        if (printTimeZone) {
+          String tz = footer.getWriterTimezone();
+          if (tz == null || tz.isEmpty()) {
+            tz = UNKNOWN;
+          }
+          System.out.println("  Stripe: " + stripe.toString() + " timezone: " + tz);
+        } else {
+          System.out.println("  Stripe: " + stripe.toString());
+        }
         long sectionStart = stripeStart;
         for(OrcProto.Stream section: footer.getStreamsList()) {
-          String kind = section.hasKind() ? section.getKind().name() : "UNKNOWN";
+          String kind = section.hasKind() ? section.getKind().name() : UNKNOWN;
           System.out.println("    Stream: column " + section.getColumn() +
               " section " + kind + " start: " + sectionStart +
               " length " + section.getLength());
@@ -278,6 +290,13 @@ public final class FileDump {
         .withDescription("Should the data be printed")
         .create('d'));
 
+    // to avoid breaking unit tests (when run in different time zones) for file dump, printing
+    // of timezone is made optional
+    result.addOption(OptionBuilder
+        .withLongOpt("timezone")
+        .withDescription("Print writer's time zone")
+        .create('t'));
+
     result.addOption(OptionBuilder
         .withLongOpt("help")
         .withDescription("print help message")

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java?rev=1669718&r1=1669717&r2=1669718&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java Sat Mar 28 00:22:15 2015
@@ -21,6 +21,7 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
+import java.util.LinkedList;
 import java.util.List;
 import java.util.ListIterator;
 
@@ -59,9 +60,13 @@ public abstract class InStream extends I
     return name;
   }
 
+  public long getStreamLength() {
+    return length;
+  }
 
   public static class UncompressedStream extends InStream {
-    protected List<DiskRange> bytes;
+    private List<DiskRange> bytes;
+    private long length;
     private long currentOffset;
     private ByteBuffer range;
     private int currentRange;
@@ -440,8 +445,8 @@ public abstract class InStream extends I
           if (desired != range.getOffset()) {
             throw new IOException("Cannot seek into the middle of uncompressed cached data");
           }
-          currentOffset = desired;
         }
+        currentOffset = desired;
         return;
       }
       throw new IOException("Seek outside of data in " + this + " to " + desired);

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java?rev=1669718&r1=1669717&r2=1669718&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java Sat Mar 28 00:22:15 2015
@@ -267,6 +267,7 @@ public class OrcInputFormat  implements
     }
     return result;
   }
+
   /**
    * Take the configuration and figure out which columns we need to include.
    * @param options the options to update



Mime
View raw message