hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From br...@apache.org
Subject svn commit: r1529308 [6/10] - in /hive/branches/maven: ./ ant/src/org/apache/hadoop/hive/ant/ beeline/src/java/org/apache/hive/beeline/ beeline/src/test/org/apache/hive/beeline/src/test/ bin/ common/ common/src/java/org/apache/hadoop/hive/conf/ common/...
Date Fri, 04 Oct 2013 21:30:46 GMT
Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java?rev=1529308&r1=1529307&r2=1529308&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java Fri Oct  4 21:30:38 2013
@@ -27,15 +27,17 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
-
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.ql.io.orc.RunLengthIntegerWriterV2.EncodingType;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
@@ -234,6 +236,38 @@ class RecordReaderImpl implements Record
       }
       return previous;
     }
+    /**
+     * Populates the isNull vector array in the previousVector object based on
+     * the present stream values. This function is called from all the child
+     * readers, and they all set the values based on isNull field value.
+     * @param previousVector The columnVector object whose isNull value is populated
+     * @param batchSize Size of the column vector
+     * @return
+     * @throws IOException
+     */
+    Object nextVector(Object previousVector, long batchSize) throws IOException {
+
+      ColumnVector result = (ColumnVector) previousVector;
+      if (present != null) {
+        // Set noNulls and isNull vector of the ColumnVector based on
+        // present stream
+        result.noNulls = true;
+        for (int i = 0; i < batchSize; i++) {
+          result.isNull[i] = (present.next() != 1);
+          if (result.noNulls && result.isNull[i]) {
+            result.noNulls = false;
+          }
+        }
+      } else {
+        // There is not present stream, this means that all the values are
+        // present.
+        result.noNulls = true;
+        for (int i = 0; i < batchSize; i++) {
+          result.isNull[i] = false;
+        }
+      }
+      return previousVector;
+    }
   }
 
   private static class BooleanTreeReader extends TreeReader{
@@ -277,6 +311,23 @@ class RecordReaderImpl implements Record
       }
       return result;
     }
+
+    @Override
+    Object nextVector(Object previousVector, long batchSize) throws IOException {
+      LongColumnVector result = null;
+      if (previousVector == null) {
+        result = new LongColumnVector();
+      } else {
+        result = (LongColumnVector) previousVector;
+      }
+
+      // Read present/isNull stream
+      super.nextVector(result, batchSize);
+
+      // Read value entries based on isNull entries
+      reader.nextVector(result, batchSize);
+      return result;
+    }
   }
 
   private static class ByteTreeReader extends TreeReader{
@@ -317,6 +368,23 @@ class RecordReaderImpl implements Record
     }
 
     @Override
+    Object nextVector(Object previousVector, long batchSize) throws IOException {
+      LongColumnVector result = null;
+      if (previousVector == null) {
+        result = new LongColumnVector();
+      } else {
+        result = (LongColumnVector) previousVector;
+      }
+
+      // Read present/isNull stream
+      super.nextVector(result, batchSize);
+
+      // Read value entries based on isNull entries
+      reader.nextVector(result, batchSize);
+      return result;
+    }
+
+    @Override
     void skipRows(long items) throws IOException {
       reader.skip(countNonNulls(items));
     }
@@ -370,6 +438,23 @@ class RecordReaderImpl implements Record
     }
 
     @Override
+    Object nextVector(Object previousVector, long batchSize) throws IOException {
+      LongColumnVector result = null;
+      if (previousVector == null) {
+        result = new LongColumnVector();
+      } else {
+        result = (LongColumnVector) previousVector;
+      }
+
+      // Read present/isNull stream
+      super.nextVector(result, batchSize);
+
+      // Read value entries based on isNull entries
+      reader.nextVector(result, batchSize);
+      return result;
+    }
+
+    @Override
     void skipRows(long items) throws IOException {
       reader.skip(countNonNulls(items));
     }
@@ -423,6 +508,23 @@ class RecordReaderImpl implements Record
     }
 
     @Override
+    Object nextVector(Object previousVector, long batchSize) throws IOException {
+      LongColumnVector result = null;
+      if (previousVector == null) {
+        result = new LongColumnVector();
+      } else {
+        result = (LongColumnVector) previousVector;
+      }
+
+      // Read present/isNull stream
+      super.nextVector(result, batchSize);
+
+      // Read value entries based on isNull entries
+      reader.nextVector(result, batchSize);
+      return result;
+    }
+
+    @Override
     void skipRows(long items) throws IOException {
       reader.skip(countNonNulls(items));
     }
@@ -476,6 +578,23 @@ class RecordReaderImpl implements Record
     }
 
     @Override
+    Object nextVector(Object previousVector, long batchSize) throws IOException {
+      LongColumnVector result = null;
+      if (previousVector == null) {
+        result = new LongColumnVector();
+      } else {
+        result = (LongColumnVector) previousVector;
+      }
+
+      // Read present/isNull stream
+      super.nextVector(result, batchSize);
+
+      // Read value entries based on isNull entries
+      reader.nextVector(result, batchSize);
+      return result;
+    }
+
+    @Override
     void skipRows(long items) throws IOException {
       reader.skip(countNonNulls(items));
     }
@@ -520,6 +639,39 @@ class RecordReaderImpl implements Record
     }
 
     @Override
+    Object nextVector(Object previousVector, long batchSize) throws IOException {
+      DoubleColumnVector result = null;
+      if (previousVector == null) {
+        result = new DoubleColumnVector();
+      } else {
+        result = (DoubleColumnVector) previousVector;
+      }
+
+      // Read present/isNull stream
+      super.nextVector(result, batchSize);
+
+      // Read value entries based on isNull entries
+      for (int i = 0; i < batchSize; i++) {
+        if (!result.isNull[i]) {
+          result.vector[i] = SerializationUtils.readFloat(stream);
+        } else {
+
+          // If the value is not present then set NaN
+          result.vector[i] = Double.NaN;
+        }
+      }
+
+      // Set isRepeating flag
+      result.isRepeating = true;
+      for (int i = 0; (i < batchSize - 1 && result.isRepeating); i++) {
+        if (result.vector[i] != result.vector[i + 1]) {
+          result.isRepeating = false;
+        }
+      }
+      return result;
+    }
+
+    @Override
     void skipRows(long items) throws IOException {
       items = countNonNulls(items);
       for(int i=0; i < items; ++i) {
@@ -568,6 +720,38 @@ class RecordReaderImpl implements Record
     }
 
     @Override
+    Object nextVector(Object previousVector, long batchSize) throws IOException {
+      DoubleColumnVector result = null;
+      if (previousVector == null) {
+        result = new DoubleColumnVector();
+      } else {
+        result = (DoubleColumnVector) previousVector;
+      }
+
+      // Read present/isNull stream
+      super.nextVector(result, batchSize);
+
+      // Read value entries based on isNull entries
+      for (int i = 0; i < batchSize; i++) {
+        if (!result.isNull[i]) {
+          result.vector[i] = SerializationUtils.readDouble(stream);
+        } else {
+          // If the value is not present then set NaN
+          result.vector[i] = Double.NaN;
+        }
+      }
+
+      // Set isRepeating flag
+      result.isRepeating = true;
+      for (int i = 0; (i < batchSize - 1 && result.isRepeating); i++) {
+        if (result.vector[i] != result.vector[i + 1]) {
+          result.isRepeating = false;
+        }
+      }
+      return result;
+    }
+
+    @Override
     void skipRows(long items) throws IOException {
       items = countNonNulls(items);
       stream.skip(items * 8);
@@ -636,6 +820,12 @@ class RecordReaderImpl implements Record
     }
 
     @Override
+    Object nextVector(Object previousVector, long batchSize) throws IOException {
+      throw new UnsupportedOperationException(
+          "NextBatch is not supported operation for Binary type");
+    }
+
+    @Override
     void skipRows(long items) throws IOException {
       items = countNonNulls(items);
       long lengthToSkip = 0;
@@ -649,6 +839,7 @@ class RecordReaderImpl implements Record
   private static class TimestampTreeReader extends TreeReader{
     private IntegerReader data = null;
     private IntegerReader nanos = null;
+    private final LongColumnVector nanoVector = new LongColumnVector();
 
     TimestampTreeReader(Path path, int columnId) {
       super(path, columnId);
@@ -708,6 +899,53 @@ class RecordReaderImpl implements Record
       return result;
     }
 
+    @Override
+    Object nextVector(Object previousVector, long batchSize) throws IOException {
+      LongColumnVector result = null;
+      if (previousVector == null) {
+        result = new LongColumnVector();
+      } else {
+        result = (LongColumnVector) previousVector;
+      }
+
+      // Read present/isNull stream
+      super.nextVector(result, batchSize);
+
+      data.nextVector(result, batchSize);
+      nanoVector.isNull = result.isNull;
+      nanos.nextVector(nanoVector, batchSize);
+
+      if(result.isRepeating && nanoVector.isRepeating) {
+        batchSize = 1;
+      }
+
+      // Non repeating values preset in the vector. Iterate thru the vector and populate the time
+      for (int i = 0; i < batchSize; i++) {
+        if (!result.isNull[i]) {
+          long ms = (result.vector[result.isRepeating ? 0 : i] + WriterImpl.BASE_TIMESTAMP)
+              * WriterImpl.MILLIS_PER_SECOND;
+          long ns = parseNanos(nanoVector.vector[nanoVector.isRepeating ? 0 : i]);
+          // the rounding error exists because java always rounds up when dividing integers
+          // -42001/1000 = -42; and -42001 % 1000 = -1 (+ 1000)
+          // to get the correct value we need
+          // (-42 - 1)*1000 + 999 = -42001
+          // (42)*1000 + 1 = 42001
+          if(ms < 0 && ns != 0) {
+            ms -= 1000;
+          }
+          // Convert millis into nanos and add the nano vector value to it
+          result.vector[i] = (ms * 1000000) + ns;
+        }
+      }
+
+      if(!(result.isRepeating && nanoVector.isRepeating)) {
+        // both have to repeat for the result to be repeating
+        result.isRepeating = false;
+      }
+
+      return result;
+    }
+
     private static int parseNanos(long serialized) {
       int zeros = 7 & (int) serialized;
       int result = (int) serialized >>> 3;
@@ -826,6 +1064,12 @@ class RecordReaderImpl implements Record
     }
 
     @Override
+    Object nextVector(Object previousVector, long batchSize) throws IOException {
+      throw new UnsupportedOperationException(
+          "NextVector is not supported operation for Decimal type");
+    }
+
+    @Override
     void skipRows(long items) throws IOException {
       items = countNonNulls(items);
       for(int i=0; i < items; i++) {
@@ -885,6 +1129,11 @@ class RecordReaderImpl implements Record
     }
 
     @Override
+    Object nextVector(Object previousVector, long batchSize) throws IOException {
+      return reader.nextVector(previousVector, batchSize);
+    }
+
+    @Override
     void skipRows(long items) throws IOException {
       reader.skipRows(items);
     }
@@ -898,8 +1147,11 @@ class RecordReaderImpl implements Record
     private InStream stream;
     private IntegerReader lengths;
 
+    private final LongColumnVector scratchlcv;
+
     StringDirectTreeReader(Path path, int columnId) {
       super(path, columnId);
+      scratchlcv = new LongColumnVector();
     }
 
     @Override
@@ -958,6 +1210,72 @@ class RecordReaderImpl implements Record
     }
 
     @Override
+    Object nextVector(Object previousVector, long batchSize) throws IOException {
+      BytesColumnVector result = null;
+      if (previousVector == null) {
+        result = new BytesColumnVector();
+      } else {
+        result = (BytesColumnVector) previousVector;
+      }
+
+      // Read present/isNull stream
+      super.nextVector(result, batchSize);
+
+      // Read lengths
+      scratchlcv.isNull = result.isNull;
+      lengths.nextVector(scratchlcv, batchSize);
+      int totalLength = 0;
+      if (!scratchlcv.isRepeating) {
+        for (int i = 0; i < batchSize; i++) {
+          if (!scratchlcv.isNull[i]) {
+            totalLength += (int) scratchlcv.vector[i];
+          }
+        }
+      } else {
+        if (!scratchlcv.isNull[0]) {
+          totalLength = (int) (batchSize * scratchlcv.vector[0]);
+        }
+      }
+
+      //Read all the strings for this batch
+      byte[] allBytes = new byte[totalLength];
+      int offset = 0;
+      int len = totalLength;
+      while (len > 0) {
+        int bytesRead = stream.read(allBytes, offset, len);
+        if (bytesRead < 0) {
+          throw new EOFException("Can't finish byte read from " + stream);
+        }
+        len -= bytesRead;
+        offset += bytesRead;
+      }
+
+      // Too expensive to figure out 'repeating' by comparisons.
+      result.isRepeating = false;
+      offset = 0;
+      if (!scratchlcv.isRepeating) {
+        for (int i = 0; i < batchSize; i++) {
+          if (!scratchlcv.isNull[i]) {
+            result.setRef(i, allBytes, offset, (int) scratchlcv.vector[i]);
+            offset += scratchlcv.vector[i];
+          } else {
+            result.setRef(i, allBytes, 0, 0);
+          }
+        }
+      } else {
+        for (int i = 0; i < batchSize; i++) {
+          if (!scratchlcv.isNull[i]) {
+            result.setRef(i, allBytes, offset, (int) scratchlcv.vector[0]);
+            offset += scratchlcv.vector[0];
+          } else {
+            result.setRef(i, allBytes, 0, 0);
+          }
+        }
+      }
+      return result;
+    }
+
+    @Override
     void skipRows(long items) throws IOException {
       items = countNonNulls(items);
       long lengthToSkip = 0;
@@ -977,8 +1295,12 @@ class RecordReaderImpl implements Record
     private int[] dictionaryOffsets;
     private IntegerReader reader;
 
+    private byte[] dictionaryBufferInBytesCache = null;
+    private final LongColumnVector scratchlcv;
+
     StringDictionaryTreeReader(Path path, int columnId) {
       super(path, columnId);
+      scratchlcv = new LongColumnVector();
     }
 
     @Override
@@ -1004,6 +1326,8 @@ class RecordReaderImpl implements Record
       if (in.available() > 0) {
         dictionaryBuffer = new DynamicByteArray(64, in.available());
         dictionaryBuffer.readAll(in);
+        // Since its start of strip invalidate the cache.
+        dictionaryBufferInBytesCache = null;
       } else {
         dictionaryBuffer = null;
       }
@@ -1050,14 +1374,7 @@ class RecordReaderImpl implements Record
           result = (Text) previous;
         }
         int offset = dictionaryOffsets[entry];
-        int length;
-        // if it isn't the last entry, subtract the offsets otherwise use
-        // the buffer length.
-        if (entry < dictionaryOffsets.length - 1) {
-          length = dictionaryOffsets[entry + 1] - offset;
-        } else {
-          length = dictionaryBuffer.size() - offset;
-        }
+        int length = getDictionaryEntryLength(entry, offset);
         // If the column is just empty strings, the size will be zero,
         // so the buffer will be null, in that case just return result
         // as it will default to empty
@@ -1071,6 +1388,74 @@ class RecordReaderImpl implements Record
     }
 
     @Override
+    Object nextVector(Object previousVector, long batchSize) throws IOException {
+      BytesColumnVector result = null;
+      int offset = 0, length = 0;
+      if (previousVector == null) {
+        result = new BytesColumnVector();
+      } else {
+        result = (BytesColumnVector) previousVector;
+      }
+
+      // Read present/isNull stream
+      super.nextVector(result, batchSize);
+
+      if (dictionaryBuffer != null) {
+
+        // Load dictionaryBuffer into cache.
+        if (dictionaryBufferInBytesCache == null) {
+          dictionaryBufferInBytesCache = dictionaryBuffer.get();
+        }
+
+        // Read string offsets
+        scratchlcv.isNull = result.isNull;
+        reader.nextVector(scratchlcv, batchSize);
+        if (!scratchlcv.isRepeating) {
+
+          // The vector has non-repeating strings. Iterate thru the batch
+          // and set strings one by one
+          for (int i = 0; i < batchSize; i++) {
+            if (!scratchlcv.isNull[i]) {
+              offset = dictionaryOffsets[(int) scratchlcv.vector[i]];
+              length = getDictionaryEntryLength((int) scratchlcv.vector[i], offset);
+              result.setRef(i, dictionaryBufferInBytesCache, offset, length);
+            } else {
+              // If the value is null then set offset and length to zero (null string)
+              result.setRef(i, dictionaryBufferInBytesCache, 0, 0);
+            }
+          }
+        } else {
+          // If the value is repeating then just set the first value in the
+          // vector and set the isRepeating flag to true. No need to iterate thru and
+          // set all the elements to the same value
+          offset = dictionaryOffsets[(int) scratchlcv.vector[0]];
+          length = getDictionaryEntryLength((int) scratchlcv.vector[0], offset);
+          result.setRef(0, dictionaryBufferInBytesCache, offset, length);
+        }
+        result.isRepeating = scratchlcv.isRepeating;
+      } else {
+        // Entire stripe contains null strings.
+        result.isRepeating = true;
+        result.noNulls = false;
+        result.isNull[0] = true;
+        result.setRef(0, "".getBytes(), 0, 0);
+      }
+      return result;
+    }
+
+    int getDictionaryEntryLength(int entry, int offset) {
+      int length = 0;
+      // if it isn't the last entry, subtract the offsets otherwise use
+      // the buffer length.
+      if (entry < dictionaryOffsets.length - 1) {
+        length = dictionaryOffsets[entry + 1] - offset;
+      } else {
+        length = dictionaryBuffer.size() - offset;
+      }
+      return length;
+    }
+
+    @Override
     void skipRows(long items) throws IOException {
       reader.skip(countNonNulls(items));
     }
@@ -1162,6 +1547,28 @@ class RecordReaderImpl implements Record
     }
 
     @Override
+    Object nextVector(Object previousVector, long batchSize) throws IOException {
+      ColumnVector[] result = null;
+      if (previousVector == null) {
+        result = new ColumnVector[fields.length];
+      } else {
+        result = (ColumnVector[]) previousVector;
+      }
+
+      // Read all the members of struct as column vectors
+      for (int i = 0; i < fields.length; i++) {
+        if (fields[i] != null) {
+          if (result[i] == null) {
+            result[i] = (ColumnVector) fields[i].nextVector(null, batchSize);
+          } else {
+            fields[i].nextVector(result[i], batchSize);
+          }
+        }
+      }
+      return result;
+    }
+
+    @Override
     void startStripe(Map<StreamName, InStream> streams,
                      List<OrcProto.ColumnEncoding> encodings
                     ) throws IOException {
@@ -1231,6 +1638,12 @@ class RecordReaderImpl implements Record
     }
 
     @Override
+    Object nextVector(Object previousVector, long batchSize) throws IOException {
+      throw new UnsupportedOperationException(
+          "NextVector is not supported operation for Union type");
+    }
+
+    @Override
     void startStripe(Map<StreamName, InStream> streams,
                      List<OrcProto.ColumnEncoding> encodings
                      ) throws IOException {
@@ -1308,6 +1721,11 @@ class RecordReaderImpl implements Record
     }
 
     @Override
+    Object nextVector(Object previous, long batchSize) throws IOException {
+      throw new UnsupportedOperationException(
+          "NextVector is not supported operation for List type");
+    }
+
     void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
       if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) &&
           (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) {
@@ -1396,6 +1814,11 @@ class RecordReaderImpl implements Record
     }
 
     @Override
+    Object nextVector(Object previous, long batchSize) throws IOException {
+      throw new UnsupportedOperationException(
+          "NextVector is not supported operation for Map type");
+    }
+
     void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
       if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) &&
           (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) {
@@ -1706,13 +2129,18 @@ class RecordReaderImpl implements Record
     TruthValue[] leafValues = new TruthValue[sargLeaves.size()];
     for(int rowGroup=0; rowGroup < result.length; ++rowGroup) {
       for(int pred=0; pred < leafValues.length; ++pred) {
-        OrcProto.ColumnStatistics stats =
-            indexes[filterColumns[pred]].getEntry(rowGroup).getStatistics();
-        leafValues[pred] = evaluatePredicate(stats, sargLeaves.get(pred));
-        if (LOG.isDebugEnabled()) {
-          LOG.debug("Stats = " + stats);
-          LOG.debug("Setting " + sargLeaves.get(pred) + " to " +
-              leafValues[pred]);
+        if (filterColumns[pred] != -1) {
+          OrcProto.ColumnStatistics stats =
+              indexes[filterColumns[pred]].getEntry(rowGroup).getStatistics();
+          leafValues[pred] = evaluatePredicate(stats, sargLeaves.get(pred));
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("Stats = " + stats);
+            LOG.debug("Setting " + sargLeaves.get(pred) + " to " +
+                leafValues[pred]);
+          }
+        } else {
+          // the column is a virtual column
+          leafValues[pred] = TruthValue.YES_NO_NULL;
         }
       }
       result[rowGroup] = sarg.evaluate(leafValues).isNotNeeded();
@@ -2196,6 +2624,31 @@ class RecordReaderImpl implements Record
   }
 
   @Override
+  public VectorizedRowBatch nextBatch(VectorizedRowBatch previous) throws IOException {
+    VectorizedRowBatch result = null;
+    if (rowInStripe >= rowCountInStripe) {
+      currentStripe += 1;
+      readStripe();
+    }
+
+    long batchSize = Math.min(VectorizedRowBatch.DEFAULT_SIZE, (rowCountInStripe - rowInStripe));
+    rowInStripe += batchSize;
+    if (previous == null) {
+      ColumnVector[] cols = (ColumnVector[]) reader.nextVector(null, (int) batchSize);
+      result = new VectorizedRowBatch(cols.length);
+      result.cols = cols;
+    } else {
+      result = (VectorizedRowBatch) previous;
+      result.selectedInUse = false;
+      reader.nextVector(result.cols, (int) batchSize);
+    }
+
+    result.size = (int) batchSize;
+    advanceToNextRow(rowInStripe + rowBaseInStripe);
+    return result;
+  }
+
+  @Override
   public void close() throws IOException {
     file.close();
   }

Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteReader.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteReader.java?rev=1529308&r1=1529307&r2=1529308&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteReader.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteReader.java Fri Oct  4 21:30:38 2013
@@ -20,6 +20,8 @@ package org.apache.hadoop.hive.ql.io.orc
 import java.io.EOFException;
 import java.io.IOException;
 
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+
 /**
  * A reader that reads a sequence of bytes. A control byte is read before
  * each run with positive values 0 to 127 meaning 3 to 130 repetitions. If the
@@ -82,6 +84,29 @@ class RunLengthByteReader {
     return result;
   }
 
+  void nextVector(LongColumnVector previous, long previousLen)
+      throws IOException {
+    previous.isRepeating = true;
+    for (int i = 0; i < previousLen; i++) {
+      if (!previous.isNull[i]) {
+        previous.vector[i] = next();
+      } else {
+        // The default value of null for int types in vectorized
+        // processing is 1, so set that if the value is null
+        previous.vector[i] = 1;
+      }
+
+      // The default value for nulls in Vectorization for int types is 1
+      // and given that non null value can also be 1, we need to check for isNull also
+      // when determining the isRepeating flag.
+      if (previous.isRepeating
+          && i > 0
+          && ((previous.vector[i - 1] != previous.vector[i]) || (previous.isNull[i - 1] != previous.isNull[i]))) {
+        previous.isRepeating = false;
+      }
+    }
+  }
+
   void seek(PositionProvider index) throws IOException {
     input.seek(index);
     int consumed = (int) index.getNext();

Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReader.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReader.java?rev=1529308&r1=1529307&r2=1529308&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReader.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReader.java Fri Oct  4 21:30:38 2013
@@ -20,6 +20,8 @@ package org.apache.hadoop.hive.ql.io.orc
 import java.io.EOFException;
 import java.io.IOException;
 
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+
 /**
  * A reader that reads a sequence of integers.
  * */
@@ -91,6 +93,30 @@ class RunLengthIntegerReader implements 
   }
 
   @Override
+  public void nextVector(LongColumnVector previous, long previousLen)
+      throws IOException {
+    previous.isRepeating = true;
+    for (int i = 0; i < previousLen; i++) {
+      if (!previous.isNull[i]) {
+        previous.vector[i] = next();
+      } else {
+        // The default value of null for int type in vectorized
+        // processing is 1, so set that if the value is null
+        previous.vector[i] = 1;
+      }
+
+      // The default value for nulls in Vectorization for int types is 1
+      // and given that non null value can also be 1, we need to check for isNull also
+      // when determining the isRepeating flag.
+      if (previous.isRepeating
+          && i > 0
+          && (previous.vector[i - 1] != previous.vector[i] || previous.isNull[i - 1] != previous.isNull[i])) {
+        previous.isRepeating = false;
+      }
+    }
+  }
+
+  @Override
   public void seek(PositionProvider index) throws IOException {
     input.seek(index);
     int consumed = (int) index.getNext();

Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReaderV2.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReaderV2.java?rev=1529308&r1=1529307&r2=1529308&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReaderV2.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReaderV2.java Fri Oct  4 21:30:38 2013
@@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.io.orc
 import java.io.EOFException;
 import java.io.IOException;
 
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.io.orc.RunLengthIntegerWriterV2.EncodingType;
 
 /**
@@ -322,4 +323,28 @@ class RunLengthIntegerReaderV2 implement
       numValues -= consume;
     }
   }
+
+  @Override
+  public void nextVector(LongColumnVector previous, long previousLen) throws IOException {
+    previous.isRepeating = true;
+    for (int i = 0; i < previousLen; i++) {
+      if (!previous.isNull[i]) {
+        previous.vector[i] = next();
+      } else {
+        // The default value of null for int type in vectorized
+        // processing is 1, so set that if the value is null
+        previous.vector[i] = 1;
+      }
+
+      // The default value for nulls in Vectorization for int types is 1
+      // and given that non null value can also be 1, we need to check for isNull also
+      // when determining the isRepeating flag.
+      if (previous.isRepeating
+          && i > 0
+          && (previous.vector[i - 1] != previous.vector[i] ||
+          previous.isNull[i - 1] != previous.isNull[i])) {
+        previous.isRepeating = false;
+      }
+    }
+  }
 }

Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringColumnStatistics.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringColumnStatistics.java?rev=1529308&r1=1529307&r2=1529308&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringColumnStatistics.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringColumnStatistics.java Fri Oct  4 21:30:38 2013
@@ -32,4 +32,10 @@ public interface StringColumnStatistics 
    * @return the maximum
    */
   String getMaximum();
+
+  /**
+   * Get the total length of all strings
+   * @return the sum (total length)
+   */
+  long getSum();
 }

Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Writer.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Writer.java?rev=1529308&r1=1529307&r2=1529308&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Writer.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Writer.java Fri Oct  4 21:30:38 2013
@@ -47,4 +47,22 @@ public interface Writer {
    * @throws IOException
    */
   void close() throws IOException;
+
+  /**
+   * Return the deserialized data size. Raw data size will be compute when
+   * writing the file footer. Hence raw data size value will be available only
+   * after closing the writer.
+   *
+   * @return raw data size
+   */
+  long getRawDataSize();
+
+  /**
+   * Return the number of rows in file. Row count gets updated when flushing
+   * the stripes. To get accurate row count this method should be called after
+   * closing the writer.
+   *
+   * @return row count
+   */
+  long getNumberOfRows();
 }

Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java?rev=1529308&r1=1529307&r2=1529308&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java Fri Oct  4 21:30:38 2013
@@ -27,19 +27,17 @@ import java.util.List;
 import java.util.Map;
 import java.util.TreeMap;
 
-
-import com.google.protobuf.ByteString;
-import com.google.protobuf.CodedOutputStream;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.io.orc.OrcProto.RowIndexEntry;
 import org.apache.hadoop.hive.ql.io.orc.OrcProto.Type;
+import org.apache.hadoop.hive.ql.util.JavaDataModel;
 import org.apache.hadoop.hive.serde2.io.DateWritable;
 import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
@@ -66,6 +64,9 @@ import org.apache.hadoop.hive.serde2.typ
 import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.Text;
 
+import com.google.protobuf.ByteString;
+import com.google.protobuf.CodedOutputStream;
+
 /**
  * An ORC file writer. The file is divided into stripes, which is the natural
  * unit of work when reading. Each stripe is buffered in memory until the
@@ -111,6 +112,7 @@ class WriterImpl implements Writer, Memo
   private int columnCount;
   private long rowCount = 0;
   private long rowsInStripe = 0;
+  private long rawDataSize = 0;
   private int rowsInIndex = 0;
   private final List<OrcProto.StripeInformation> stripes =
     new ArrayList<OrcProto.StripeInformation>();
@@ -1085,6 +1087,7 @@ class WriterImpl implements Writer, Memo
             ((BinaryObjectInspector) inspector).getPrimitiveWritableObject(obj);
         stream.write(val.getBytes(), 0, val.getLength());
         length.write(val.getLength());
+        indexStatistics.updateBinary(val);
       }
     }
 
@@ -1760,6 +1763,74 @@ class WriterImpl implements Writer, Memo
     }
   }
 
+  private long computeRawDataSize() {
+    long result = 0;
+    for (TreeWriter child : treeWriter.getChildrenWriters()) {
+      result += getRawDataSizeFromInspectors(child, child.inspector);
+    }
+    return result;
+  }
+
+  private long getRawDataSizeFromInspectors(TreeWriter child, ObjectInspector oi) {
+    long total = 0;
+    switch (oi.getCategory()) {
+    case PRIMITIVE:
+      total += getRawDataSizeFromPrimitives(child, oi);
+      break;
+    case LIST:
+    case MAP:
+    case UNION:
+    case STRUCT:
+      for (TreeWriter tw : child.childrenWriters) {
+        total += getRawDataSizeFromInspectors(tw, tw.inspector);
+      }
+      break;
+    default:
+      LOG.debug("Unknown object inspector category.");
+      break;
+    }
+    return total;
+  }
+
+  private long getRawDataSizeFromPrimitives(TreeWriter child, ObjectInspector oi) {
+    long result = 0;
+    long numVals = child.fileStatistics.getNumberOfValues();
+    switch (((PrimitiveObjectInspector) oi).getPrimitiveCategory()) {
+    case BOOLEAN:
+    case BYTE:
+    case SHORT:
+    case INT:
+    case FLOAT:
+      return numVals * JavaDataModel.get().primitive1();
+    case LONG:
+    case DOUBLE:
+      return numVals * JavaDataModel.get().primitive2();
+    case STRING:
+      // ORC strings are converted to java Strings. so use JavaDataModel to
+      // compute the overall size of strings
+      child = (StringTreeWriter) child;
+      StringColumnStatistics scs = (StringColumnStatistics) child.fileStatistics;
+      numVals = numVals == 0 ? 1 : numVals;
+      int avgStringLen = (int) (scs.getSum() / numVals);
+      return numVals * JavaDataModel.get().lengthForStringOfLength(avgStringLen);
+    case DECIMAL:
+      return numVals * JavaDataModel.get().lengthOfDecimal();
+    case DATE:
+      return numVals * JavaDataModel.get().lengthOfDate();
+    case BINARY:
+      // get total length of binary blob
+      BinaryColumnStatistics bcs = (BinaryColumnStatistics) child.fileStatistics;
+      return bcs.getSum();
+    case TIMESTAMP:
+      return numVals * JavaDataModel.get().lengthOfTimestamp();
+    default:
+      LOG.debug("Unknown primitive category.");
+      break;
+    }
+
+    return result;
+  }
+
   private OrcProto.CompressionKind writeCompressionKind(CompressionKind kind) {
     switch (kind) {
       case NONE: return OrcProto.CompressionKind.NONE;
@@ -1786,6 +1857,8 @@ class WriterImpl implements Writer, Memo
     builder.setHeaderLength(headerLength);
     builder.setNumberOfRows(rowCount);
     builder.setRowIndexStride(rowIndexStride);
+    // populate raw data size
+    rawDataSize = computeRawDataSize();
     // serialize the types
     writeTypes(builder, treeWriter);
     // add the stripe information
@@ -1871,4 +1944,22 @@ class WriterImpl implements Writer, Memo
       rawWriter.close();
     }
   }
+
+  /**
+   * Raw data size will be compute when writing the file footer. Hence raw data
+   * size value will be available only after closing the writer.
+   */
+  @Override
+  public long getRawDataSize() {
+    return rawDataSize;
+  }
+
+  /**
+   * Row count gets updated when flushing the stripes. To get accurate row
+   * count call this method after writer is closed.
+   */
+  @Override
+  public long getNumberOfRows() {
+    return rowCount;
+  }
 }

Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java?rev=1529308&r1=1529307&r2=1529308&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java Fri Oct  4 21:30:38 2013
@@ -67,14 +67,14 @@ import org.apache.hadoop.hive.ql.plan.Jo
 import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 import org.apache.hadoop.hive.ql.plan.PTFDesc;
-import org.apache.hadoop.hive.ql.plan.PTFDesc.PTFExpressionDef;
-import org.apache.hadoop.hive.ql.plan.PTFDesc.WindowFunctionDef;
-import org.apache.hadoop.hive.ql.plan.PTFDesc.WindowTableFunctionDef;
 import org.apache.hadoop.hive.ql.plan.PlanUtils;
 import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
 import org.apache.hadoop.hive.ql.plan.SelectDesc;
 import org.apache.hadoop.hive.ql.plan.TableDesc;
 import org.apache.hadoop.hive.ql.plan.TableScanDesc;
+import org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef;
+import org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef;
+import org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef;
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 

Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java?rev=1529308&r1=1529307&r2=1529308&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java Fri Oct  4 21:30:38 2013
@@ -378,7 +378,8 @@ public final class CorrelationUtilities 
       // copies desc of cGBYm to cGBYr and remove cGBYm and cRS
       GroupByOperator cGBYm = (GroupByOperator) parent;
 
-      cGBYr.getConf().setKeys(cGBYm.getConf().getKeys());
+      cGBYr.getConf().setKeys(ExprNodeDescUtils.backtrack(ExprNodeDescUtils.backtrack(cGBYr
+              .getConf().getKeys(), cGBYr, cRS), cRS, cGBYm));
       cGBYr.getConf().setAggregators(cGBYm.getConf().getAggregators());
       for (AggregationDesc aggr : cGBYm.getConf().getAggregators()) {
         aggr.setMode(GenericUDAFEvaluator.Mode.COMPLETE);

Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java?rev=1529308&r1=1529307&r2=1529308&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java Fri Oct  4 21:30:38 2013
@@ -77,6 +77,13 @@ public class PhysicalOptimizer {
     if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_INFER_BUCKET_SORT)) {
       resolvers.add(new BucketingSortingInferenceOptimizer());
     }
+
+    // Vectorization should be the last optimization, because it doesn't modify the plan
+    // or any operators. It makes a very low level transformation to the expressions to
+    // run in the vectorized mode.
+    if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)) {
+      resolvers.add(new Vectorizer());
+    }
   }
 
   /**

Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java?rev=1529308&r1=1529307&r2=1529308&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java Fri Oct  4 21:30:38 2013
@@ -29,6 +29,7 @@ import java.util.LinkedHashMap;
 import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.Map.Entry;
 
 import org.antlr.runtime.tree.CommonTree;
 import org.antlr.runtime.tree.Tree;
@@ -40,6 +41,7 @@ import org.apache.hadoop.hive.metastore.
 import org.apache.hadoop.hive.ql.Context;
 import org.apache.hadoop.hive.ql.ErrorMsg;
 import org.apache.hadoop.hive.ql.QueryProperties;
+import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
 import org.apache.hadoop.hive.ql.exec.FetchTask;
 import org.apache.hadoop.hive.ql.exec.Task;
 import org.apache.hadoop.hive.ql.exec.Utilities;
@@ -59,10 +61,15 @@ import org.apache.hadoop.hive.ql.metadat
 import org.apache.hadoop.hive.ql.metadata.Partition;
 import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.ql.optimizer.listbucketingpruner.ListBucketingPrunerUtils;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.ListBucketingCtx;
 import org.apache.hadoop.hive.ql.plan.PlanUtils;
 import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
 import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
 import org.apache.hadoop.mapred.SequenceFileInputFormat;
 import org.apache.hadoop.mapred.SequenceFileOutputFormat;
 import org.apache.hadoop.mapred.TextInputFormat;
@@ -304,6 +311,13 @@ public abstract class BaseSemanticAnalyz
     rootTasks = new ArrayList<Task<? extends Serializable>>();
   }
 
+  public static String stripIdentifierQuotes(String val) {
+    if ((val.charAt(0) == '`' && val.charAt(val.length() - 1) == '`')) {
+      val = val.substring(1, val.length() - 1);
+    }
+    return val;
+  }
+
   public static String stripQuotes(String val) {
     return PlanUtils.stripQuotes(val);
   }
@@ -580,7 +594,7 @@ public abstract class BaseSemanticAnalyz
         // child 2 is the optional comment of the column
         if (child.getChildCount() == 3) {
           col.setComment(unescapeSQLString(child.getChild(2).getText()));
-        }        
+        }
       }
       colList.add(col);
     }
@@ -748,7 +762,7 @@ public abstract class BaseSemanticAnalyz
         }
 
         // check if the columns specified in the partition() clause are actually partition columns
-        Utilities.validatePartSpec(tableHandle, partSpec);
+        validatePartSpec(tableHandle, partSpec, ast, conf);
 
         // check if the partition spec is valid
         if (numDynParts > 0) {
@@ -1115,4 +1129,79 @@ public abstract class BaseSemanticAnalyz
     return storedAsDirs;
   }
 
+  private static void getPartExprNodeDesc(ASTNode astNode,
+      Map<ASTNode, ExprNodeDesc> astExprNodeMap)
+          throws SemanticException, HiveException {
+
+    if ((astNode == null) || (astNode.getChildren() == null) ||
+        (astNode.getChildren().size() <= 1)) {
+      return;
+    }
+
+    TypeCheckCtx typeCheckCtx = new TypeCheckCtx(null);
+    for (Node childNode : astNode.getChildren()) {
+      ASTNode childASTNode = (ASTNode)childNode;
+
+      if (childASTNode.getType() != HiveParser.TOK_PARTVAL) {
+        getPartExprNodeDesc(childASTNode, astExprNodeMap);
+      } else {
+        if (childASTNode.getChildren().size() <= 1) {
+          throw new HiveException("This is dynamic partitioning");
+        }
+
+        ASTNode partValASTChild = (ASTNode)childASTNode.getChildren().get(1);
+        astExprNodeMap.put((ASTNode)childASTNode.getChildren().get(0),
+            TypeCheckProcFactory.genExprNode(partValASTChild, typeCheckCtx).get(partValASTChild));
+      }
+    }
+  }
+
+  public static void validatePartSpec(Table tbl,
+      Map<String, String> partSpec, ASTNode astNode, HiveConf conf) throws SemanticException {
+
+    Map<ASTNode, ExprNodeDesc> astExprNodeMap = new HashMap<ASTNode, ExprNodeDesc>();
+
+    Utilities.validatePartSpec(tbl, partSpec);
+
+    if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_TYPE_CHECK_ON_INSERT)) {
+      try {
+        getPartExprNodeDesc(astNode, astExprNodeMap);
+      } catch (HiveException e) {
+        return;
+      }
+      List<FieldSchema> parts = tbl.getPartitionKeys();
+      Map<String, String> partCols = new HashMap<String, String>(parts.size());
+      for (FieldSchema col : parts) {
+        partCols.put(col.getName(), col.getType().toLowerCase());
+      }
+      for (Entry<ASTNode, ExprNodeDesc> astExprNodePair : astExprNodeMap.entrySet()) {
+
+        String astKeyName = astExprNodePair.getKey().toString().toLowerCase();
+        if (astExprNodePair.getKey().getType() == HiveParser.Identifier) {
+          astKeyName = stripIdentifierQuotes(astKeyName);
+        }
+        String colType = partCols.get(astKeyName);
+        ObjectInspector inputOI = astExprNodePair.getValue().getWritableObjectInspector();
+
+        TypeInfo expectedType =
+            TypeInfoUtils.getTypeInfoFromTypeString(colType);
+        ObjectInspector outputOI =
+            TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(expectedType);
+        Object value = null;
+        try {
+          value =
+              ExprNodeEvaluatorFactory.get(astExprNodePair.getValue()).
+              evaluate(partSpec.get(astKeyName));
+        } catch (HiveException e) {
+          throw new SemanticException(e);
+        }
+        Object convertedValue =
+          ObjectInspectorConverters.getConverter(inputOI, outputOI).convert(value);
+        if (convertedValue == null) {
+          throw new SemanticException(ErrorMsg.PARTITION_SPEC_TYPE_MISMATCH.format(astKeyName,
+              inputOI.getTypeName(), outputOI.getTypeName()));
+        }
+      }
+    }
+  }
 }

Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java?rev=1529308&r1=1529307&r2=1529308&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java Fri Oct  4 21:30:38 2013
@@ -522,6 +522,7 @@ public class ColumnStatsSemanticAnalyzer
       boolean isPartitionStats = isPartitionLevelStats(tree);
       PartitionList partList = null;
       checkForPartitionColumns(colNames, getPartitionKeys(tableName));
+      validateSpecifiedColumnNames(tableName, colNames);
 
       if (isPartitionStats) {
         isTableLevel = false;
@@ -545,6 +546,25 @@ public class ColumnStatsSemanticAnalyzer
     }
   }
 
+  // fail early if the columns specified for column statistics are not valid
+  private void validateSpecifiedColumnNames(String tableName, List<String> specifiedCols)
+      throws SemanticException {
+    List<FieldSchema> fields = null;
+    try {
+      fields = db.getTable(tableName).getAllCols();
+    } catch (HiveException e) {
+      throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(tableName));
+    }
+    List<String> tableCols = Utilities.getColumnNamesFromFieldSchema(fields);
+
+    for(String sc : specifiedCols) {
+      if (!tableCols.contains(sc.toLowerCase())) {
+        String msg = "'" + sc + "' (possible columns are " + tableCols.toString() + ")";
+        throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(msg));
+      }
+    }
+  }
+
   private List<String> getPartitionKeys(String tableName) throws SemanticException {
     List<FieldSchema> fields;
     try {

Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java?rev=1529308&r1=1529307&r2=1529308&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java Fri Oct  4 21:30:38 2013
@@ -2613,6 +2613,7 @@ public class DDLSemanticAnalyzer extends
           currentLocation = null;
         }
         currentPart = getPartSpec(child);
+        validatePartSpec(tab, currentPart, (ASTNode)child, conf);
         break;
       case HiveParser.TOK_PARTITIONLOCATION:
         // if location specified, set in partition

Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveSemanticAnalyzerHookContext.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveSemanticAnalyzerHookContext.java?rev=1529308&r1=1529307&r2=1529308&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveSemanticAnalyzerHookContext.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveSemanticAnalyzerHookContext.java Fri Oct  4 21:30:38 2013
@@ -53,4 +53,8 @@ public interface HiveSemanticAnalyzerHoo
   public Set<ReadEntity> getInputs();
 
   public Set<WriteEntity> getOutputs();
+
+  public String getUserName();
+
+  public void setUserName(String userName);
 }

Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveSemanticAnalyzerHookContextImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveSemanticAnalyzerHookContextImpl.java?rev=1529308&r1=1529307&r2=1529308&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveSemanticAnalyzerHookContextImpl.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveSemanticAnalyzerHookContextImpl.java Fri Oct  4 21:30:38 2013
@@ -32,6 +32,7 @@ public class HiveSemanticAnalyzerHookCon
   Configuration conf;
   Set<ReadEntity> inputs = null;
   Set<WriteEntity> outputs = null;
+  private String userName;
 
   @Override
   public Hive getHive() throws HiveException {
@@ -64,4 +65,12 @@ public class HiveSemanticAnalyzerHookCon
   public Set<WriteEntity> getOutputs() {
     return outputs;
   }
+
+  public String getUserName() {
+    return userName;
+  }
+
+  public void setUserName(String userName) {
+    this.userName = userName;
+  }
 }

Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFInvocationSpec.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFInvocationSpec.java?rev=1529308&r1=1529307&r2=1529308&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFInvocationSpec.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFInvocationSpec.java Fri Oct  4 21:30:38 2013
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hive.ql.parse;
 
 import java.util.ArrayList;
+import java.util.List;
 
 import org.apache.hadoop.hive.ql.exec.PTFUtils;
 
@@ -124,7 +125,7 @@ public class PTFInvocationSpec {
   public static class PartitionedTableFunctionSpec  extends PTFInputSpec {
     String name;
     String alias;
-    ArrayList<ASTNode> args;
+    List<ASTNode> args;
     PartitioningSpec partitioning;
     PTFInputSpec input;
     public String getName() {
@@ -139,10 +140,10 @@ public class PTFInvocationSpec {
     public void setAlias(String alias) {
       this.alias = alias;
     }
-    public ArrayList<ASTNode> getArgs() {
+    public List<ASTNode> getArgs() {
       return args;
     }
-    public void setArgs(ArrayList<ASTNode> args) {
+    public void setArgs(List<ASTNode> args) {
       this.args = args;
     }
     public PartitioningSpec getPartitioning() {

Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java?rev=1529308&r1=1529307&r2=1529308&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java Fri Oct  4 21:30:38 2013
@@ -18,7 +18,9 @@
 
 package org.apache.hadoop.hive.ql.parse;
 
+import java.util.ArrayDeque;
 import java.util.ArrayList;
+import java.util.Deque;
 import java.util.HashMap;
 import java.util.LinkedHashMap;
 import java.util.List;
@@ -60,22 +62,22 @@ import org.apache.hadoop.hive.ql.parse.W
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
 import org.apache.hadoop.hive.ql.plan.PTFDesc;
-import org.apache.hadoop.hive.ql.plan.PTFDesc.BoundaryDef;
-import org.apache.hadoop.hive.ql.plan.PTFDesc.CurrentRowDef;
-import org.apache.hadoop.hive.ql.plan.PTFDesc.OrderDef;
-import org.apache.hadoop.hive.ql.plan.PTFDesc.OrderExpressionDef;
-import org.apache.hadoop.hive.ql.plan.PTFDesc.PTFExpressionDef;
-import org.apache.hadoop.hive.ql.plan.PTFDesc.PTFInputDef;
-import org.apache.hadoop.hive.ql.plan.PTFDesc.PTFQueryInputDef;
-import org.apache.hadoop.hive.ql.plan.PTFDesc.PartitionDef;
-import org.apache.hadoop.hive.ql.plan.PTFDesc.PartitionedTableFunctionDef;
-import org.apache.hadoop.hive.ql.plan.PTFDesc.RangeBoundaryDef;
-import org.apache.hadoop.hive.ql.plan.PTFDesc.ShapeDetails;
-import org.apache.hadoop.hive.ql.plan.PTFDesc.ValueBoundaryDef;
-import org.apache.hadoop.hive.ql.plan.PTFDesc.WindowFrameDef;
-import org.apache.hadoop.hive.ql.plan.PTFDesc.WindowFunctionDef;
-import org.apache.hadoop.hive.ql.plan.PTFDesc.WindowTableFunctionDef;
 import org.apache.hadoop.hive.ql.plan.PTFDeserializer;
+import org.apache.hadoop.hive.ql.plan.ptf.BoundaryDef;
+import org.apache.hadoop.hive.ql.plan.ptf.CurrentRowDef;
+import org.apache.hadoop.hive.ql.plan.ptf.OrderDef;
+import org.apache.hadoop.hive.ql.plan.ptf.OrderExpressionDef;
+import org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef;
+import org.apache.hadoop.hive.ql.plan.ptf.PTFInputDef;
+import org.apache.hadoop.hive.ql.plan.ptf.PTFQueryInputDef;
+import org.apache.hadoop.hive.ql.plan.ptf.PartitionDef;
+import org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef;
+import org.apache.hadoop.hive.ql.plan.ptf.RangeBoundaryDef;
+import org.apache.hadoop.hive.ql.plan.ptf.ShapeDetails;
+import org.apache.hadoop.hive.ql.plan.ptf.ValueBoundaryDef;
+import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef;
+import org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef;
+import org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFLeadLag;
 import org.apache.hadoop.hive.ql.udf.ptf.TableFunctionEvaluator;
@@ -224,7 +226,7 @@ public class PTFTranslator {
 
   private void translatePTFChain() throws SemanticException {
 
-    Stack<PTFInputSpec> ptfChain = new Stack<PTFInvocationSpec.PTFInputSpec>();
+    Deque<PTFInputSpec> ptfChain = new ArrayDeque<PTFInvocationSpec.PTFInputSpec>();
     PTFInputSpec currentSpec = ptfInvocation.getFunction();
     while (currentSpec != null) {
       ptfChain.push(currentSpec);
@@ -280,7 +282,7 @@ public class PTFTranslator {
     /*
      * translate args
      */
-    ArrayList<ASTNode> args = spec.getArgs();
+    List<ASTNode> args = spec.getArgs();
     if (args != null)
     {
       for (ASTNode expr : args)
@@ -303,7 +305,7 @@ public class PTFTranslator {
 
     if (tFn.transformsRawInput()) {
       StructObjectInspector rawInOutOI = tEval.getRawInputOI();
-      ArrayList<String> rawInOutColNames = tFn.getRawInputColumnNames();
+      List<String> rawInOutColNames = tFn.getRawInputColumnNames();
       RowResolver rawInRR = buildRowResolverForPTF(def.getName(),
           spec.getAlias(),
           rawInOutOI,
@@ -324,7 +326,7 @@ public class PTFTranslator {
     tFn.setupOutputOI();
 
     StructObjectInspector outputOI = tEval.getOutputOI();
-    ArrayList<String> outColNames = tFn.getOutputColumnNames();
+    List<String> outColNames = tFn.getOutputColumnNames();
     RowResolver outRR = buildRowResolverForPTF(def.getName(),
         spec.getAlias(),
         outputOI,
@@ -566,8 +568,8 @@ public class PTFTranslator {
   }
 
   static void setupWdwFnEvaluator(WindowFunctionDef def) throws HiveException {
-    ArrayList<PTFExpressionDef> args = def.getArgs();
-    ArrayList<ObjectInspector> argOIs = new ArrayList<ObjectInspector>();
+    List<PTFExpressionDef> args = def.getArgs();
+    List<ObjectInspector> argOIs = new ArrayList<ObjectInspector>();
     ObjectInspector[] funcArgOIs = null;
 
     if (args != null) {
@@ -619,7 +621,7 @@ public class PTFTranslator {
   }
 
   private ShapeDetails setupTableFnShape(String fnName, ShapeDetails inpShape,
-      StructObjectInspector OI, ArrayList<String> columnNames, RowResolver rr)
+      StructObjectInspector OI, List<String> columnNames, RowResolver rr)
       throws SemanticException {
     if (fnName.equals(FunctionRegistry.NOOP_TABLE_FUNCTION)
         || fnName.equals(
@@ -630,7 +632,7 @@ public class PTFTranslator {
   }
 
   private ShapeDetails setupShape(StructObjectInspector OI,
-      ArrayList<String> columnNames,
+      List<String> columnNames,
       RowResolver rr) throws SemanticException {
     Map<String, String> serdePropsMap = new LinkedHashMap<String, String>();
     SerDe serde = null;
@@ -672,7 +674,7 @@ public class PTFTranslator {
 
   private ShapeDetails setupShapeForNoop(ShapeDetails inpShape,
       StructObjectInspector OI,
-      ArrayList<String> columnNames,
+      List<String> columnNames,
       RowResolver rr) throws SemanticException {
     ShapeDetails shp = new ShapeDetails();
 
@@ -738,7 +740,7 @@ public class PTFTranslator {
       throw new SemanticException("Ranking Functions can take no arguments");
     }
     OrderDef oDef = wdwTFnDef.getOrder();
-    ArrayList<OrderExpressionDef> oExprs = oDef.getExpressions();
+    List<OrderExpressionDef> oExprs = oDef.getExpressions();
     for (OrderExpressionDef oExpr : oExprs) {
       wFnDef.addArg(oExpr);
     }
@@ -871,7 +873,7 @@ public class PTFTranslator {
 
   protected static RowResolver buildRowResolverForPTF(String tbFnName, String tabAlias,
       StructObjectInspector rowObjectInspector,
-      ArrayList<String> outputColNames, RowResolver inputRR) throws SemanticException {
+      List<String> outputColNames, RowResolver inputRR) throws SemanticException {
 
     if (tbFnName.equals(FunctionRegistry.NOOP_TABLE_FUNCTION) ||
         tbFnName.equals(FunctionRegistry.NOOP_MAP_TABLE_FUNCTION)) {

Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1529308&r1=1529307&r2=1529308&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Fri Oct  4 21:30:38 2013
@@ -139,9 +139,6 @@ import org.apache.hadoop.hive.ql.plan.Lo
 import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 import org.apache.hadoop.hive.ql.plan.PTFDesc;
-import org.apache.hadoop.hive.ql.plan.PTFDesc.OrderExpressionDef;
-import org.apache.hadoop.hive.ql.plan.PTFDesc.PTFExpressionDef;
-import org.apache.hadoop.hive.ql.plan.PTFDesc.PartitionedTableFunctionDef;
 import org.apache.hadoop.hive.ql.plan.PlanUtils;
 import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
 import org.apache.hadoop.hive.ql.plan.ScriptDesc;
@@ -150,6 +147,9 @@ import org.apache.hadoop.hive.ql.plan.Ta
 import org.apache.hadoop.hive.ql.plan.TableScanDesc;
 import org.apache.hadoop.hive.ql.plan.UDTFDesc;
 import org.apache.hadoop.hive.ql.plan.UnionDesc;
+import org.apache.hadoop.hive.ql.plan.ptf.OrderExpressionDef;
+import org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef;
+import org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef;
 import org.apache.hadoop.hive.ql.session.SessionState;
 import org.apache.hadoop.hive.ql.session.SessionState.ResourceType;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
@@ -9935,7 +9935,7 @@ public class SemanticAnalyzer extends Ba
       RowResolver rsOpRR,
       RowResolver extractRR) throws SemanticException {
 
-    ArrayList<PTFExpressionDef> partColList = tabDef.getPartition().getExpressions();
+    List<PTFExpressionDef> partColList = tabDef.getPartition().getExpressions();
 
     for (PTFExpressionDef colDef : partColList) {
       partCols.add(colDef.getExprNode());
@@ -9950,7 +9950,7 @@ public class SemanticAnalyzer extends Ba
      * we need to set includeKeyCols = false while creating the
      * ReduceSinkDesc
      */
-    ArrayList<OrderExpressionDef> orderColList = tabDef.getOrder().getExpressions();
+    List<OrderExpressionDef> orderColList = tabDef.getOrder().getExpressions();
     for (int i = 0; i < orderColList.size(); i++) {
       OrderExpressionDef colDef = orderColList.get(i);
       org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.Order order = colDef.getOrder();

Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java?rev=1529308&r1=1529307&r2=1529308&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java Fri Oct  4 21:30:38 2013
@@ -110,9 +110,14 @@ public final class TypeCheckProcFactory 
     // build the exprNodeFuncDesc with recursively built children.
     ASTNode expr = (ASTNode) nd;
     TypeCheckCtx ctx = (TypeCheckCtx) procCtx;
+
     RowResolver input = ctx.getInputRR();
     ExprNodeDesc desc = null;
 
+    if ((ctx == null) || (input == null)) {
+      return null;
+    }
+
     // If the current subExpression is pre-calculated, as in Group-By etc.
     ColumnInfo colInfo = input.getExpression(expr);
     if (colInfo != null) {
@@ -898,7 +903,7 @@ public final class TypeCheckProcFactory 
           }
         }
 
-        desc = ExprNodeGenericFuncDesc.newInstance(genericUDF, children);
+        desc = ExprNodeGenericFuncDesc.newInstance(genericUDF, funcText, children);
       }
       // UDFOPPositive is a no-op.
       // However, we still create it, and then remove it here, to make sure we

Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java?rev=1529308&r1=1529307&r2=1529308&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java Fri Oct  4 21:30:38 2013
@@ -19,8 +19,20 @@
 package org.apache.hadoop.hive.ql.plan;
 
 public class AbstractOperatorDesc implements OperatorDesc {
+
+  private boolean vectorMode = false;
+
   @Override
   public Object clone() throws CloneNotSupportedException {
     throw new CloneNotSupportedException("clone not supported");
   }
+
+  @Explain(displayName = "Vectorized execution", displayOnlyOnTrue = true)
+  public boolean getVectorModeOn() {
+    return vectorMode;
+  }
+
+  public void setVectorMode(boolean vm) {
+    this.vectorMode = vm;
+  }
 }

Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java?rev=1529308&r1=1529307&r2=1529308&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java Fri Oct  4 21:30:38 2013
@@ -62,6 +62,7 @@ public class ExprNodeGenericFuncDesc ext
    */
   private GenericUDF genericUDF;
   private List<ExprNodeDesc> childExprs;
+  private transient String funcText;
   /**
    * This class uses a writableObjectInspector rather than a TypeInfo to store
    * the canonical type information for this NodeDesc.
@@ -73,13 +74,19 @@ public class ExprNodeGenericFuncDesc ext
   public ExprNodeGenericFuncDesc() {
   }
 
+  /* If the function has an explicit name like func(args) then call a
+   * constructor that explicitly provides the function name in the
+   * funcText argument.
+   */
   public ExprNodeGenericFuncDesc(TypeInfo typeInfo, GenericUDF genericUDF,
+      String funcText,
       List<ExprNodeDesc> children) {
     this(TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo),
-         genericUDF, children);
+         genericUDF, funcText, children);
   }
 
   public ExprNodeGenericFuncDesc(ObjectInspector oi, GenericUDF genericUDF,
+      String funcText,
       List<ExprNodeDesc> children) {
     super(TypeInfoUtils.getTypeInfoFromObjectInspector(oi));
     this.writableObjectInspector =
@@ -87,6 +94,18 @@ public class ExprNodeGenericFuncDesc ext
     assert (genericUDF != null);
     this.genericUDF = genericUDF;
     this.childExprs = children;
+    this.funcText = funcText;
+  }
+
+  // Backward-compatibility interfaces for functions without a user-visible name.
+  public ExprNodeGenericFuncDesc(TypeInfo typeInfo, GenericUDF genericUDF,
+      List<ExprNodeDesc> children) {
+    this(typeInfo, genericUDF, null, children);
+  }
+
+  public ExprNodeGenericFuncDesc(ObjectInspector oi, GenericUDF genericUDF,
+      List<ExprNodeDesc> children) {
+    this(oi, genericUDF, null, children);
   }
 
   @Override
@@ -165,17 +184,20 @@ public class ExprNodeGenericFuncDesc ext
       cloneCh.add(ch.clone());
     }
     ExprNodeGenericFuncDesc clone = new ExprNodeGenericFuncDesc(typeInfo,
-        FunctionRegistry.cloneGenericUDF(genericUDF), cloneCh);
+        FunctionRegistry.cloneGenericUDF(genericUDF), funcText, cloneCh);
     return clone;
   }
 
   /**
-   * Create a exprNodeGenericFuncDesc based on the genericUDFClass and the
-   * children parameters.
+   * Create a ExprNodeGenericFuncDesc based on the genericUDFClass and the
+   * children parameters. If the function has an explicit name, the
+   * newInstance method should be passed the function name in the funcText
+   * argument.
    *
    * @throws UDFArgumentException
    */
   public static ExprNodeGenericFuncDesc newInstance(GenericUDF genericUDF,
+      String funcText,
       List<ExprNodeDesc> children) throws UDFArgumentException {
     ObjectInspector[] childrenOIs = new ObjectInspector[children.size()];
     for (int i = 0; i < childrenOIs.length; i++) {
@@ -232,7 +254,15 @@ public class ExprNodeGenericFuncDesc ext
       }
     }
 
-    return new ExprNodeGenericFuncDesc(oi, genericUDF, children);
+    return new ExprNodeGenericFuncDesc(oi, genericUDF, funcText, children);
+  }
+
+  /* Backward-compatibility interface for the case where there is no explicit
+   * name for the function.
+   */
+  public static ExprNodeGenericFuncDesc newInstance(GenericUDF genericUDF,
+    List<ExprNodeDesc> children) throws UDFArgumentException {
+    return newInstance(genericUDF, null, children);
   }
 
   @Override
@@ -285,4 +315,8 @@ public class ExprNodeGenericFuncDesc ext
   public void setSortedExpr(boolean isSortedExpr) {
     this.isSortedExpr = isSortedExpr;
   }
+
+  public String getFuncText() {
+    return this.funcText;
+  }
 }

Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java?rev=1529308&r1=1529307&r2=1529308&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java Fri Oct  4 21:30:38 2013
@@ -112,6 +112,9 @@ public class MapWork extends BaseWork {
 
   private boolean useBucketizedHiveInputFormat;
 
+  private Map<String, Map<Integer, String>> scratchColumnVectorTypes = null;
+  private boolean vectorMode = false;
+
   public MapWork() {
   }
 
@@ -479,4 +482,21 @@ public class MapWork extends BaseWork {
       PlanUtils.configureJobConf(fs.getConf().getTableInfo(), job);
     }
   }
+
+  public Map<String, Map<Integer, String>> getScratchColumnVectorTypes() {
+    return scratchColumnVectorTypes;
+  }
+
+  public void setScratchColumnVectorTypes(
+      Map<String, Map<Integer, String>> scratchColumnVectorTypes) {
+    this.scratchColumnVectorTypes = scratchColumnVectorTypes;
+  }
+
+  public boolean getVectorMode() {
+    return vectorMode;
+  }
+
+  public void setVectorMode(boolean vectorMode) {
+    this.vectorMode = vectorMode;
+  }
 }

Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDesc.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDesc.java?rev=1529308&r1=1529307&r2=1529308&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDesc.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDesc.java Fri Oct  4 21:30:38 2013
@@ -18,29 +18,19 @@
 
 package org.apache.hadoop.hive.ql.plan;
 
-import java.util.ArrayList;
-import java.util.Map;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
 import org.apache.hadoop.hive.ql.exec.PTFUtils;
 import org.apache.hadoop.hive.ql.parse.LeadLagInfo;
 import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.Order;
 import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PTFQueryInputType;
-import org.apache.hadoop.hive.ql.parse.RowResolver;
-import org.apache.hadoop.hive.ql.parse.TypeCheckCtx;
-import org.apache.hadoop.hive.ql.parse.WindowingSpec.Direction;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
-import org.apache.hadoop.hive.ql.udf.ptf.TableFunctionEvaluator;
-import org.apache.hadoop.hive.serde2.SerDe;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef;
+import org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef;
 
 @Explain(displayName = "PTF Operator")
-public class PTFDesc extends AbstractOperatorDesc
-{
+public class PTFDesc extends AbstractOperatorDesc {
   private static final long serialVersionUID = 1L;
   @SuppressWarnings("unused")
   private static final Log LOG = LogFactory.getLog(PTFDesc.class.getName());
@@ -99,538 +89,4 @@ public class PTFDesc extends AbstractOpe
     this.cfg = cfg;
   }
 
-  public abstract static class PTFInputDef {
-    String expressionTreeString;
-    ShapeDetails outputShape;
-    String alias;
-
-    public String getExpressionTreeString() {
-      return expressionTreeString;
-    }
-
-    public void setExpressionTreeString(String expressionTreeString) {
-      this.expressionTreeString = expressionTreeString;
-    }
-
-    public ShapeDetails getOutputShape() {
-      return outputShape;
-    }
-
-    public void setOutputShape(ShapeDetails outputShape) {
-      this.outputShape = outputShape;
-    }
-    public String getAlias() {
-      return alias;
-    }
-    public void setAlias(String alias) {
-      this.alias = alias;
-    }
-
-    public abstract PTFInputDef getInput();
-  }
-
-  public static class PTFQueryInputDef extends PTFInputDef {
-    String destination;
-    PTFQueryInputType type;
-    public String getDestination() {
-      return destination;
-    }
-    public void setDestination(String destination) {
-      this.destination = destination;
-    }
-    public PTFQueryInputType getType() {
-      return type;
-    }
-    public void setType(PTFQueryInputType type) {
-      this.type = type;
-    }
-
-    @Override
-    public PTFInputDef getInput() {
-      return null;
-    }
-  }
-
-  public static class PartitionedTableFunctionDef extends  PTFInputDef {
-    String name;
-    String resolverClassName;
-    ShapeDetails rawInputShape;
-    boolean carryForwardNames;
-    PTFInputDef input;
-    ArrayList<PTFExpressionDef> args;
-    PartitionDef partition;
-    OrderDef order;
-    TableFunctionEvaluator tFunction;
-    boolean transformsRawInput;
-    public String getName() {
-      return name;
-    }
-    public void setName(String name) {
-      this.name = name;
-    }
-    public ShapeDetails getRawInputShape() {
-      return rawInputShape;
-    }
-    public void setRawInputShape(ShapeDetails rawInputShape) {
-      this.rawInputShape = rawInputShape;
-    }
-    public boolean isCarryForwardNames() {
-      return carryForwardNames;
-    }
-    public void setCarryForwardNames(boolean carryForwardNames) {
-      this.carryForwardNames = carryForwardNames;
-    }
-    @Override
-    public PTFInputDef getInput() {
-      return input;
-    }
-    public void setInput(PTFInputDef input) {
-      this.input = input;
-    }
-    public PartitionDef getPartition() {
-      return partition;
-    }
-    public void setPartition(PartitionDef partition) {
-      this.partition = partition;
-    }
-    public OrderDef getOrder() {
-      return order;
-    }
-    public void setOrder(OrderDef order) {
-      this.order = order;
-    }
-    public TableFunctionEvaluator getTFunction() {
-      return tFunction;
-    }
-    public void setTFunction(TableFunctionEvaluator tFunction) {
-      this.tFunction = tFunction;
-    }
-    public ArrayList<PTFExpressionDef> getArgs() {
-      return args;
-    }
-
-    public void setArgs(ArrayList<PTFExpressionDef> args) {
-      this.args = args;
-    }
-
-    public void addArg(PTFExpressionDef arg) {
-      args = args == null ? new ArrayList<PTFExpressionDef>() : args;
-      args.add(arg);
-    }
-
-    public PartitionedTableFunctionDef getStartOfChain() {
-      if (input instanceof PartitionedTableFunctionDef ) {
-        return ((PartitionedTableFunctionDef)input).getStartOfChain();
-      }
-      return this;
-    }
-    public boolean isTransformsRawInput() {
-      return transformsRawInput;
-    }
-    public void setTransformsRawInput(boolean transformsRawInput) {
-      this.transformsRawInput = transformsRawInput;
-    }
-    public String getResolverClassName() {
-      return resolverClassName;
-    }
-    public void setResolverClassName(String resolverClassName) {
-      this.resolverClassName = resolverClassName;
-    }
-  }
-
-  public static class WindowTableFunctionDef extends PartitionedTableFunctionDef {
-    ArrayList<WindowFunctionDef> windowFunctions;
-
-    public ArrayList<WindowFunctionDef> getWindowFunctions() {
-      return windowFunctions;
-    }
-    public void setWindowFunctions(ArrayList<WindowFunctionDef> windowFunctions) {
-      this.windowFunctions = windowFunctions;
-    }
-  }
-
-  public static class ShapeDetails {
-    String serdeClassName;
-    Map<String, String> serdeProps;
-    ArrayList<String> columnNames;
-    transient StructObjectInspector OI;
-    transient SerDe serde;
-    transient RowResolver rr;
-    transient TypeCheckCtx typeCheckCtx;
-
-    static{
-      PTFUtils.makeTransient(ShapeDetails.class, "OI", "serde", "rr", "typeCheckCtx");
-    }
-
-    public String getSerdeClassName() {
-      return serdeClassName;
-    }
-
-    public void setSerdeClassName(String serdeClassName) {
-      this.serdeClassName = serdeClassName;
-    }
-
-    public Map<String, String> getSerdeProps() {
-      return serdeProps;
-    }
-
-    public void setSerdeProps(Map<String, String> serdeProps) {
-      this.serdeProps = serdeProps;
-    }
-
-    public ArrayList<String> getColumnNames() {
-      return columnNames;
-    }
-
-    public void setColumnNames(ArrayList<String> columnNames) {
-      this.columnNames = columnNames;
-    }
-
-    public StructObjectInspector getOI() {
-      return OI;
-    }
-
-    public void setOI(StructObjectInspector oI) {
-      OI = oI;
-    }
-
-    public SerDe getSerde() {
-      return serde;
-    }
-
-    public void setSerde(SerDe serde) {
-      this.serde = serde;
-    }
-
-    public RowResolver getRr() {
-      return rr;
-    }
-
-    public void setRr(RowResolver rr) {
-      this.rr = rr;
-    }
-
-    public TypeCheckCtx getTypeCheckCtx() {
-      return typeCheckCtx;
-    }
-
-    public void setTypeCheckCtx(TypeCheckCtx typeCheckCtx) {
-      this.typeCheckCtx = typeCheckCtx;
-    }
-  }
-
-  public static class PartitionDef {
-    ArrayList<PTFExpressionDef> expressions;
-
-    public ArrayList<PTFExpressionDef> getExpressions() {
-      return expressions;
-    }
-
-    public void setExpressions(ArrayList<PTFExpressionDef> expressions) {
-      this.expressions = expressions;
-    }
-    public void addExpression(PTFExpressionDef e) {
-      expressions = expressions == null ? new ArrayList<PTFExpressionDef>() : expressions;
-      expressions.add(e);
-    }
-  }
-
-  public static class OrderDef {
-    ArrayList<OrderExpressionDef> expressions;
-
-    public OrderDef() {}
-
-    public OrderDef(PartitionDef pDef) {
-      for(PTFExpressionDef eDef : pDef.getExpressions())
-      {
-        addExpression(new OrderExpressionDef(eDef));
-      }
-    }
-
-    public ArrayList<OrderExpressionDef> getExpressions() {
-      return expressions;
-    }
-
-    public void setExpressions(ArrayList<OrderExpressionDef> expressions) {
-      this.expressions = expressions;
-    }
-    public void addExpression(OrderExpressionDef e) {
-      expressions = expressions == null ? new ArrayList<OrderExpressionDef>() : expressions;
-      expressions.add(e);
-    }
-  }
-
-  public static class OrderExpressionDef extends PTFExpressionDef {
-    Order order;
-
-    public OrderExpressionDef() {}
-    public OrderExpressionDef(PTFExpressionDef e) {
-      super(e);
-      order = Order.ASC;
-    }
-
-    public Order getOrder() {
-      return order;
-    }
-
-    public void setOrder(Order order) {
-      this.order = order;
-    }
-  }
-
-  public static class WindowExpressionDef  extends PTFExpressionDef {
-    String alias;
-
-    public WindowExpressionDef() {}
-    public WindowExpressionDef(PTFExpressionDef eDef) {
-      super(eDef);
-    }
-    public String getAlias() {
-      return alias;
-    }
-
-    public void setAlias(String alias) {
-      this.alias = alias;
-    }
-  }
-
-  public static class WindowFunctionDef extends WindowExpressionDef
-  {
-    String name;
-    boolean isStar;
-    boolean isDistinct;
-    ArrayList<PTFExpressionDef> args;
-    WindowFrameDef windowFrame;
-    GenericUDAFEvaluator wFnEval;
-    boolean pivotResult;
-
-    public String getName() {
-      return name;
-    }
-
-    public void setName(String name) {
-      this.name = name;
-    }
-
-    public boolean isStar() {
-      return isStar;
-    }
-
-    public void setStar(boolean isStar) {
-      this.isStar = isStar;
-    }
-
-    public boolean isDistinct() {
-      return isDistinct;
-    }
-
-    public void setDistinct(boolean isDistinct) {
-      this.isDistinct = isDistinct;
-    }
-
-    public ArrayList<PTFExpressionDef> getArgs() {
-      return args;
-    }
-
-    public void setArgs(ArrayList<PTFExpressionDef> args) {
-      this.args = args;
-    }
-
-    public void addArg(PTFExpressionDef arg) {
-      args = args == null ? new ArrayList<PTFExpressionDef>() : args;
-      args.add(arg);
-    }
-
-    public WindowFrameDef getWindowFrame() {
-      return windowFrame;
-    }
-
-    public void setWindowFrame(WindowFrameDef windowFrame) {
-      this.windowFrame = windowFrame;
-    }
-
-    public GenericUDAFEvaluator getWFnEval() {
-      return wFnEval;
-    }
-
-    public void setWFnEval(GenericUDAFEvaluator wFnEval) {
-      this.wFnEval = wFnEval;
-    }
-
-    public boolean isPivotResult() {
-      return pivotResult;
-    }
-
-    public void setPivotResult(boolean pivotResult) {
-      this.pivotResult = pivotResult;
-    }
-
-  }
-
-  public static class WindowFrameDef
-  {
-    BoundaryDef start;
-    BoundaryDef end;
-    public BoundaryDef getStart() {
-      return start;
-    }
-    public void setStart(BoundaryDef start) {
-      this.start = start;
-    }
-    public BoundaryDef getEnd() {
-      return end;
-    }
-    public void setEnd(BoundaryDef end) {
-      this.end = end;
-    }
-  }
-
-  public static abstract class BoundaryDef {
-    Direction direction;
-
-    public Direction getDirection() {
-      return direction;
-    }
-
-    public void setDirection(Direction direction) {
-      this.direction = direction;
-    }
-
-    public abstract int getAmt();
-  }
-
-  public static class RangeBoundaryDef extends BoundaryDef {
-    int amt;
-
-    public int compareTo(BoundaryDef other)
-    {
-      int c = getDirection().compareTo(other.getDirection());
-      if ( c != 0) {
-        return c;
-      }
-      RangeBoundaryDef rb = (RangeBoundaryDef) other;
-      return getAmt() - rb.getAmt();
-    }
-
-    @Override
-    public int getAmt() {
-      return amt;
-    }
-
-    public void setAmt(int amt) {
-      this.amt = amt;
-    }
-  }
-
-  public static class CurrentRowDef extends BoundaryDef
-  {
-    public int compareTo(BoundaryDef other)
-    {
-      return getDirection().compareTo(other.getDirection());
-    }
-    @Override
-    public Direction getDirection() {
-      return Direction.CURRENT;
-    }
-
-    @Override
-    public int getAmt() { return 0; }
-  }
-
-  public static class ValueBoundaryDef extends BoundaryDef
-  {
-    PTFExpressionDef expressionDef;
-    int amt;
-
-    public int compareTo(BoundaryDef other) {
-      int c = getDirection().compareTo(other.getDirection());
-      if ( c != 0) {
-        return c;
-      }
-      ValueBoundaryDef vb = (ValueBoundaryDef) other;
-      return getAmt() - vb.getAmt();
-    }
-
-    public PTFExpressionDef getExpressionDef() {
-      return expressionDef;
-    }
-
-    public void setExpressionDef(PTFExpressionDef expressionDef) {
-      this.expressionDef = expressionDef;
-    }
-
-    public ExprNodeDesc getExprNode() {
-      return expressionDef == null ? null : expressionDef.getExprNode();
-    }
-
-    public ExprNodeEvaluator getExprEvaluator() {
-      return expressionDef == null ? null : expressionDef.getExprEvaluator();
-    }
-
-    public ObjectInspector getOI() {
-      return expressionDef == null ? null : expressionDef.getOI();
-    }
-
-    @Override
-    public int getAmt() {
-      return amt;
-    }
-
-    public void setAmt(int amt) {
-      this.amt = amt;
-    }
-  }
-
-  public static class PTFExpressionDef
-  {
-    String expressionTreeString;
-    ExprNodeDesc exprNode;
-    transient ExprNodeEvaluator exprEvaluator;
-    transient ObjectInspector OI;
-
-    static{
-      PTFUtils.makeTransient(PTFExpressionDef.class, "exprEvaluator", "OI");
-    }
-
-    public PTFExpressionDef() {}
-    public PTFExpressionDef(PTFExpressionDef e) {
-      expressionTreeString = e.getExpressionTreeString();
-      exprNode = e.getExprNode();
-      exprEvaluator = e.getExprEvaluator();
-      OI = e.getOI();
-    }
-
-    public String getExpressionTreeString() {
-      return expressionTreeString;
-    }
-
-    public void setExpressionTreeString(String expressionTreeString) {
-      this.expressionTreeString = expressionTreeString;
-    }
-
-    public ExprNodeDesc getExprNode() {
-      return exprNode;
-    }
-
-    public void setExprNode(ExprNodeDesc exprNode) {
-      this.exprNode = exprNode;
-    }
-
-    public ExprNodeEvaluator getExprEvaluator() {
-      return exprEvaluator;
-    }
-
-    public void setExprEvaluator(ExprNodeEvaluator exprEvaluator) {
-      this.exprEvaluator = exprEvaluator;
-    }
-
-    public ObjectInspector getOI() {
-      return OI;
-    }
-
-    public void setOI(ObjectInspector oI) {
-      OI = oI;
-    }
-  }
-
 }



Mime
View raw message