hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hashut...@apache.org
Subject svn commit: r1506785 [3/8] - in /hive/branches/vectorization: ./ beeline/src/java/org/apache/hive/beeline/ cli/src/java/org/apache/hadoop/hive/cli/ common/src/java/org/apache/hadoop/hive/conf/ conf/ contrib/src/test/results/clientnegative/ data/files/ ...
Date Thu, 25 Jul 2013 00:15:07 GMT
Modified: hive/branches/vectorization/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java?rev=1506785&r1=1506784&r2=1506785&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java (original)
+++ hive/branches/vectorization/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java Thu Jul 25 00:14:58 2013
@@ -9791,6 +9791,15 @@ public final class OrcProto {
     // optional uint64 compressionBlockSize = 3;
     boolean hasCompressionBlockSize();
     long getCompressionBlockSize();
+    
+    // repeated uint32 version = 4 [packed = true];
+    java.util.List<java.lang.Integer> getVersionList();
+    int getVersionCount();
+    int getVersion(int index);
+    
+    // optional string magic = 8000;
+    boolean hasMagic();
+    String getMagic();
   }
   public static final class PostScript extends
       com.google.protobuf.GeneratedMessage
@@ -9851,10 +9860,59 @@ public final class OrcProto {
       return compressionBlockSize_;
     }
     
+    // repeated uint32 version = 4 [packed = true];
+    public static final int VERSION_FIELD_NUMBER = 4;
+    private java.util.List<java.lang.Integer> version_;
+    public java.util.List<java.lang.Integer>
+        getVersionList() {
+      return version_;
+    }
+    public int getVersionCount() {
+      return version_.size();
+    }
+    public int getVersion(int index) {
+      return version_.get(index);
+    }
+    private int versionMemoizedSerializedSize = -1;
+    
+    // optional string magic = 8000;
+    public static final int MAGIC_FIELD_NUMBER = 8000;
+    private java.lang.Object magic_;
+    public boolean hasMagic() {
+      return ((bitField0_ & 0x00000008) == 0x00000008);
+    }
+    public String getMagic() {
+      java.lang.Object ref = magic_;
+      if (ref instanceof String) {
+        return (String) ref;
+      } else {
+        com.google.protobuf.ByteString bs = 
+            (com.google.protobuf.ByteString) ref;
+        String s = bs.toStringUtf8();
+        if (com.google.protobuf.Internal.isValidUtf8(bs)) {
+          magic_ = s;
+        }
+        return s;
+      }
+    }
+    private com.google.protobuf.ByteString getMagicBytes() {
+      java.lang.Object ref = magic_;
+      if (ref instanceof String) {
+        com.google.protobuf.ByteString b = 
+            com.google.protobuf.ByteString.copyFromUtf8((String) ref);
+        magic_ = b;
+        return b;
+      } else {
+        return (com.google.protobuf.ByteString) ref;
+      }
+    }
+    
     private void initFields() {
       footerLength_ = 0L;
       compression_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.CompressionKind.NONE;
       compressionBlockSize_ = 0L;
+      version_ = java.util.Collections.emptyList();;
+      magic_ = "";
     }
     private byte memoizedIsInitialized = -1;
     public final boolean isInitialized() {
@@ -9877,6 +9935,16 @@ public final class OrcProto {
       if (((bitField0_ & 0x00000004) == 0x00000004)) {
         output.writeUInt64(3, compressionBlockSize_);
       }
+      if (getVersionList().size() > 0) {
+        output.writeRawVarint32(34);
+        output.writeRawVarint32(versionMemoizedSerializedSize);
+      }
+      for (int i = 0; i < version_.size(); i++) {
+        output.writeUInt32NoTag(version_.get(i));
+      }
+      if (((bitField0_ & 0x00000008) == 0x00000008)) {
+        output.writeBytes(8000, getMagicBytes());
+      }
       getUnknownFields().writeTo(output);
     }
     
@@ -9898,6 +9966,24 @@ public final class OrcProto {
         size += com.google.protobuf.CodedOutputStream
           .computeUInt64Size(3, compressionBlockSize_);
       }
+      {
+        int dataSize = 0;
+        for (int i = 0; i < version_.size(); i++) {
+          dataSize += com.google.protobuf.CodedOutputStream
+            .computeUInt32SizeNoTag(version_.get(i));
+        }
+        size += dataSize;
+        if (!getVersionList().isEmpty()) {
+          size += 1;
+          size += com.google.protobuf.CodedOutputStream
+              .computeInt32SizeNoTag(dataSize);
+        }
+        versionMemoizedSerializedSize = dataSize;
+      }
+      if (((bitField0_ & 0x00000008) == 0x00000008)) {
+        size += com.google.protobuf.CodedOutputStream
+          .computeBytesSize(8000, getMagicBytes());
+      }
       size += getUnknownFields().getSerializedSize();
       memoizedSerializedSize = size;
       return size;
@@ -10028,6 +10114,10 @@ public final class OrcProto {
         bitField0_ = (bitField0_ & ~0x00000002);
         compressionBlockSize_ = 0L;
         bitField0_ = (bitField0_ & ~0x00000004);
+        version_ = java.util.Collections.emptyList();;
+        bitField0_ = (bitField0_ & ~0x00000008);
+        magic_ = "";
+        bitField0_ = (bitField0_ & ~0x00000010);
         return this;
       }
       
@@ -10078,6 +10168,15 @@ public final class OrcProto {
           to_bitField0_ |= 0x00000004;
         }
         result.compressionBlockSize_ = compressionBlockSize_;
+        if (((bitField0_ & 0x00000008) == 0x00000008)) {
+          version_ = java.util.Collections.unmodifiableList(version_);
+          bitField0_ = (bitField0_ & ~0x00000008);
+        }
+        result.version_ = version_;
+        if (((from_bitField0_ & 0x00000010) == 0x00000010)) {
+          to_bitField0_ |= 0x00000008;
+        }
+        result.magic_ = magic_;
         result.bitField0_ = to_bitField0_;
         onBuilt();
         return result;
@@ -10103,6 +10202,19 @@ public final class OrcProto {
         if (other.hasCompressionBlockSize()) {
           setCompressionBlockSize(other.getCompressionBlockSize());
         }
+        if (!other.version_.isEmpty()) {
+          if (version_.isEmpty()) {
+            version_ = other.version_;
+            bitField0_ = (bitField0_ & ~0x00000008);
+          } else {
+            ensureVersionIsMutable();
+            version_.addAll(other.version_);
+          }
+          onChanged();
+        }
+        if (other.hasMagic()) {
+          setMagic(other.getMagic());
+        }
         this.mergeUnknownFields(other.getUnknownFields());
         return this;
       }
@@ -10155,6 +10267,25 @@ public final class OrcProto {
               compressionBlockSize_ = input.readUInt64();
               break;
             }
+            case 32: {
+              ensureVersionIsMutable();
+              version_.add(input.readUInt32());
+              break;
+            }
+            case 34: {
+              int length = input.readRawVarint32();
+              int limit = input.pushLimit(length);
+              while (input.getBytesUntilLimit() > 0) {
+                addVersion(input.readUInt32());
+              }
+              input.popLimit(limit);
+              break;
+            }
+            case 64002: {
+              bitField0_ |= 0x00000010;
+              magic_ = input.readBytes();
+              break;
+            }
           }
         }
       }
@@ -10227,6 +10358,87 @@ public final class OrcProto {
         return this;
       }
       
+      // repeated uint32 version = 4 [packed = true];
+      private java.util.List<java.lang.Integer> version_ = java.util.Collections.emptyList();;
+      private void ensureVersionIsMutable() {
+        if (!((bitField0_ & 0x00000008) == 0x00000008)) {
+          version_ = new java.util.ArrayList<java.lang.Integer>(version_);
+          bitField0_ |= 0x00000008;
+         }
+      }
+      public java.util.List<java.lang.Integer>
+          getVersionList() {
+        return java.util.Collections.unmodifiableList(version_);
+      }
+      public int getVersionCount() {
+        return version_.size();
+      }
+      public int getVersion(int index) {
+        return version_.get(index);
+      }
+      public Builder setVersion(
+          int index, int value) {
+        ensureVersionIsMutable();
+        version_.set(index, value);
+        onChanged();
+        return this;
+      }
+      public Builder addVersion(int value) {
+        ensureVersionIsMutable();
+        version_.add(value);
+        onChanged();
+        return this;
+      }
+      public Builder addAllVersion(
+          java.lang.Iterable<? extends java.lang.Integer> values) {
+        ensureVersionIsMutable();
+        super.addAll(values, version_);
+        onChanged();
+        return this;
+      }
+      public Builder clearVersion() {
+        version_ = java.util.Collections.emptyList();;
+        bitField0_ = (bitField0_ & ~0x00000008);
+        onChanged();
+        return this;
+      }
+      
+      // optional string magic = 8000;
+      private java.lang.Object magic_ = "";
+      public boolean hasMagic() {
+        return ((bitField0_ & 0x00000010) == 0x00000010);
+      }
+      public String getMagic() {
+        java.lang.Object ref = magic_;
+        if (!(ref instanceof String)) {
+          String s = ((com.google.protobuf.ByteString) ref).toStringUtf8();
+          magic_ = s;
+          return s;
+        } else {
+          return (String) ref;
+        }
+      }
+      public Builder setMagic(String value) {
+        if (value == null) {
+    throw new NullPointerException();
+  }
+  bitField0_ |= 0x00000010;
+        magic_ = value;
+        onChanged();
+        return this;
+      }
+      public Builder clearMagic() {
+        bitField0_ = (bitField0_ & ~0x00000010);
+        magic_ = getDefaultInstance().getMagic();
+        onChanged();
+        return this;
+      }
+      void setMagic(com.google.protobuf.ByteString value) {
+        bitField0_ |= 0x00000010;
+        magic_ = value;
+        onChanged();
+      }
+      
       // @@protoc_insertion_point(builder_scope:org.apache.hadoop.hive.ql.io.orc.PostScript)
     }
     
@@ -10384,12 +10596,13 @@ public final class OrcProto {
       ".hive.ql.io.orc.UserMetadataItem\022\024\n\014numb" +
       "erOfRows\030\006 \001(\004\022F\n\nstatistics\030\007 \003(\01322.org" +
       ".apache.hadoop.hive.ql.io.orc.ColumnStat" +
-      "istics\022\026\n\016rowIndexStride\030\010 \001(\r\"\210\001\n\nPostS" +
+      "istics\022\026\n\016rowIndexStride\030\010 \001(\r\"\255\001\n\nPostS" +
       "cript\022\024\n\014footerLength\030\001 \001(\004\022F\n\013compressi" +
       "on\030\002 \001(\01621.org.apache.hadoop.hive.ql.io.",
       "orc.CompressionKind\022\034\n\024compressionBlockS" +
-      "ize\030\003 \001(\004*:\n\017CompressionKind\022\010\n\004NONE\020\000\022\010" +
-      "\n\004ZLIB\020\001\022\n\n\006SNAPPY\020\002\022\007\n\003LZO\020\003"
+      "ize\030\003 \001(\004\022\023\n\007version\030\004 \003(\rB\002\020\001\022\016\n\005magic\030" +
+      "\300> \001(\t*:\n\017CompressionKind\022\010\n\004NONE\020\000\022\010\n\004Z" +
+      "LIB\020\001\022\n\n\006SNAPPY\020\002\022\007\n\003LZO\020\003"
     };
     com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner =
       new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() {
@@ -10521,7 +10734,7 @@ public final class OrcProto {
           internal_static_org_apache_hadoop_hive_ql_io_orc_PostScript_fieldAccessorTable = new
             com.google.protobuf.GeneratedMessage.FieldAccessorTable(
               internal_static_org_apache_hadoop_hive_ql_io_orc_PostScript_descriptor,
-              new java.lang.String[] { "FooterLength", "Compression", "CompressionBlockSize", },
+              new java.lang.String[] { "FooterLength", "Compression", "CompressionBlockSize", "Version", "Magic", },
               org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript.class,
               org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript.Builder.class);
           return null;

Modified: hive/branches/vectorization/ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/OperatorType.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/OperatorType.java?rev=1506785&r1=1506784&r2=1506785&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/OperatorType.java (original)
+++ hive/branches/vectorization/ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/OperatorType.java Thu Jul 25 00:14:58 2013
@@ -30,7 +30,9 @@ public enum OperatorType implements org.
   LATERALVIEWFORWARD(15),
   HASHTABLESINK(16),
   HASHTABLEDUMMY(17),
-  PTF(18);
+  PTF(18),
+  MUX(19),
+  DEMUX(20);
 
   private final int value;
 
@@ -49,7 +51,7 @@ public enum OperatorType implements org.
    * Find a the enum type by its integer value, as defined in the Thrift IDL.
    * @return null if the value is not found.
    */
-  public static OperatorType findByValue(int value) { 
+  public static OperatorType findByValue(int value) {
     switch (value) {
       case 0:
         return JOIN;
@@ -89,6 +91,10 @@ public enum OperatorType implements org.
         return HASHTABLEDUMMY;
       case 18:
         return PTF;
+      case 19:
+        return MUX;
+      case 20:
+        return DEMUX;
       default:
         return null;
     }

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/Driver.java?rev=1506785&r1=1506784&r2=1506785&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/Driver.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/Driver.java Thu Jul 25 00:14:58 2013
@@ -46,7 +46,6 @@ import org.apache.hadoop.hive.metastore.
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.Schema;
 import org.apache.hadoop.hive.ql.exec.ConditionalTask;
-import org.apache.hadoop.hive.ql.exec.ExecDriver;
 import org.apache.hadoop.hive.ql.exec.FetchTask;
 import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.exec.TableScanOperator;
@@ -198,7 +197,7 @@ public class Driver implements CommandPr
   public ClusterStatus getClusterStatus() throws Exception {
     ClusterStatus cs;
     try {
-      JobConf job = new JobConf(conf, ExecDriver.class);
+      JobConf job = new JobConf(conf);
       JobClient jc = new JobClient(job);
       cs = jc.getClusterStatus();
     } catch (Exception e) {

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java?rev=1506785&r1=1506784&r2=1506785&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java Thu Jul 25 00:14:58 2013
@@ -38,10 +38,10 @@ import java.util.concurrent.ConcurrentHa
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.ql.exec.ConditionalTask;
-import org.apache.hadoop.hive.ql.exec.ExecDriver;
 import org.apache.hadoop.hive.ql.exec.FetchTask;
 import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.exec.Task;
+import org.apache.hadoop.hive.ql.exec.mr.ExecDriver;
 import org.apache.hadoop.hive.ql.hooks.LineageInfo;
 import org.apache.hadoop.hive.ql.hooks.ReadEntity;
 import org.apache.hadoop.hive.ql.hooks.WriteEntity;

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java?rev=1506785&r1=1506784&r2=1506785&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java Thu Jul 25 00:14:58 2013
@@ -74,7 +74,7 @@ public class ColumnStatsTask extends Tas
     super.initialize(conf, queryPlan, ctx);
     work.initializeForFetch();
     try {
-      JobConf job = new JobConf(conf, ExecDriver.class);
+      JobConf job = new JobConf(conf);
       ftOp = new FetchOperator(work.getfWork(), job);
     } catch (Exception e) {
       LOG.error(StringUtils.stringifyException(e));

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java?rev=1506785&r1=1506784&r2=1506785&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java Thu Jul 25 00:14:58 2013
@@ -331,6 +331,7 @@ public abstract class CommonJoinOperator
     for (AbstractRowContainer<ArrayList<Object>> alw : storage) {
       alw.clear();
     }
+    super.startGroup();
   }
 
   protected int getNextSize(int sz) {

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java?rev=1506785&r1=1506784&r2=1506785&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java Thu Jul 25 00:14:58 2013
@@ -508,6 +508,7 @@ public class DDLTask extends Task<DDLWor
             null, null, null, null);
         if (users != null && users.size() > 0) {
           boolean first = true;
+          sortPrivileges(users);
           for (HiveObjectPrivilege usr : users) {
             if (!first) {
               outStream.write(terminator);
@@ -563,6 +564,7 @@ public class DDLTask extends Task<DDLWor
               principalDesc.getType(), dbName, null, null, null);
           if (dbs != null && dbs.size() > 0) {
             boolean first = true;
+            sortPrivileges(dbs);
             for (HiveObjectPrivilege db : dbs) {
               if (!first) {
                 outStream.write(terminator);
@@ -586,6 +588,7 @@ public class DDLTask extends Task<DDLWor
                   columnName);
               if (columnss != null && columnss.size() > 0) {
                 boolean first = true;
+                sortPrivileges(columnss);
                 for (HiveObjectPrivilege col : columnss) {
                   if (!first) {
                     outStream.write(terminator);
@@ -606,6 +609,7 @@ public class DDLTask extends Task<DDLWor
                     .getType(), dbName, tableName, partValues, null);
             if (parts != null && parts.size() > 0) {
               boolean first = true;
+              sortPrivileges(parts);
               for (HiveObjectPrivilege part : parts) {
                 if (!first) {
                   outStream.write(terminator);
@@ -625,6 +629,7 @@ public class DDLTask extends Task<DDLWor
                 dbName, tableName, null, null);
             if (tbls != null && tbls.size() > 0) {
               boolean first = true;
+              sortPrivileges(tbls);
               for (HiveObjectPrivilege tbl : tbls) {
                 if (!first) {
                   outStream.write(terminator);
@@ -657,6 +662,18 @@ public class DDLTask extends Task<DDLWor
     return 0;
   }
 
+  private static void sortPrivileges(List<HiveObjectPrivilege> privileges) {
+    Collections.sort(privileges, new Comparator<HiveObjectPrivilege>() {
+
+      @Override
+      public int compare(HiveObjectPrivilege one, HiveObjectPrivilege other) {
+        return one.getGrantInfo().getPrivilege().compareTo(other.getGrantInfo().getPrivilege());
+      }
+
+    });
+
+  }
+
   private int grantOrRevokePrivileges(List<PrincipalDesc> principals,
       List<PrivilegeDesc> privileges, PrivilegeObjectDesc privSubjectDesc,
       String grantor, PrincipalType grantorType, boolean grantOption, boolean isGrant) {

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java?rev=1506785&r1=1506784&r2=1506785&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java Thu Jul 25 00:14:58 2013
@@ -35,6 +35,7 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.common.FileUtils;
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext;
 import org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader;
 import org.apache.hadoop.hive.ql.io.HiveInputFormat;
 import org.apache.hadoop.hive.ql.io.HiveRecordReader;

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java?rev=1506785&r1=1506784&r2=1506785&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java Thu Jul 25 00:14:58 2013
@@ -65,7 +65,7 @@ public class FetchTask extends Task<Fetc
 
     try {
       // Create a file system handle
-      JobConf job = new JobConf(conf, ExecDriver.class);
+      JobConf job = new JobConf(conf);
 
       Operator<?> source = work.getSource();
       if (source instanceof TableScanOperator) {

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java?rev=1506785&r1=1506784&r2=1506785&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java Thu Jul 25 00:14:58 2013
@@ -338,7 +338,7 @@ public class FileSinkOperator extends Te
         jc = (JobConf) hconf;
       } else {
         // test code path
-        jc = new JobConf(hconf, ExecDriver.class);
+        jc = new JobConf(hconf);
       }
 
       if (multiFileSpray) {
@@ -808,7 +808,7 @@ public class FileSinkOperator extends Te
   private String lsDir() {
     String specPath = conf.getDirName();
     // need to get a JobConf here because it's not passed through at client side
-    JobConf jobConf = new JobConf(ExecDriver.class);
+    JobConf jobConf = new JobConf();
     Path tmpPath = Utilities.toTempPath(specPath);
     StringBuilder sb = new StringBuilder("\n");
     try {

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java?rev=1506785&r1=1506784&r2=1506785&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java Thu Jul 25 00:14:58 2013
@@ -707,6 +707,7 @@ public class GroupByOperator extends Ope
   @Override
   public void startGroup() throws HiveException {
     firstRowInGroup = true;
+    super.startGroup();
   }
 
   @Override
@@ -750,7 +751,7 @@ public class GroupByOperator extends Ope
               + " #total = " + numRowsInput + " reduction = " + 1.0
               * (numRowsHashTbl / numRowsInput) + " minReduction = "
               + minReductionHashAggr);
-          flush(true);
+          flushHashTable(true);
           hashAggr = false;
         } else {
           LOG.trace("Hash Aggr Enabled: #hash table = " + numRowsHashTbl
@@ -835,7 +836,7 @@ public class GroupByOperator extends Ope
     // happen at boundaries
     if ((!groupKeyIsNotReduceKey || firstRowInGroup)
         && shouldBeFlushed(newKeys)) {
-      flush(false);
+      flushHashTable(false);
     }
   }
 
@@ -983,7 +984,12 @@ public class GroupByOperator extends Ope
     return length;
   }
 
-  private void flush(boolean complete) throws HiveException {
+  /**
+   * Flush hash table. This method is used by hash-based aggregations
+   * @param complete
+   * @throws HiveException
+   */
+  private void flushHashTable(boolean complete) throws HiveException {
 
     countAfterReport = 0;
 
@@ -1048,6 +1054,42 @@ public class GroupByOperator extends Ope
   }
 
   /**
+   * Forward all aggregations to children. It is only used by DemuxOperator.
+   * @throws HiveException
+   */
+  @Override
+  public void flush() throws HiveException{
+    try {
+      if (hashAggregations != null) {
+        LOG.info("Begin Hash Table flush: size = "
+            + hashAggregations.size());
+        Iterator iter = hashAggregations.entrySet().iterator();
+        while (iter.hasNext()) {
+          Map.Entry<KeyWrapper, AggregationBuffer[]> m = (Map.Entry) iter
+              .next();
+
+          forward(m.getKey().getKeyArray(), m.getValue());
+          iter.remove();
+        }
+        hashAggregations.clear();
+      } else if (aggregations != null) {
+        // sort-based aggregations
+        if (currentKeys != null) {
+          forward(currentKeys.getKeyArray(), aggregations);
+        }
+        currentKeys = null;
+      } else {
+        // The GroupByOperator is not initialized, which means there is no
+        // data
+        // (since we initialize the operators when we see the first record).
+        // Just do nothing here.
+      }
+    } catch (Exception e) {
+      throw new HiveException(e);
+    }
+  }
+
+  /**
    * We need to forward all the aggregations to children.
    *
    */
@@ -1088,33 +1130,9 @@ public class GroupByOperator extends Ope
           // create dummy keys - size 0
           forward(new Object[0], aggregations);
         } else {
-          if (hashAggregations != null) {
-            LOG.info("Begin Hash Table flush at close: size = "
-                + hashAggregations.size());
-            Iterator iter = hashAggregations.entrySet().iterator();
-            while (iter.hasNext()) {
-              Map.Entry<KeyWrapper, AggregationBuffer[]> m = (Map.Entry) iter
-                  .next();
-
-              forward(m.getKey().getKeyArray(), m.getValue());
-              iter.remove();
-            }
-            hashAggregations.clear();
-          } else if (aggregations != null) {
-            // sort-based aggregations
-            if (currentKeys != null) {
-              forward(currentKeys.getKeyArray(), aggregations);
-            }
-            currentKeys = null;
-          } else {
-            // The GroupByOperator is not initialized, which means there is no
-            // data
-            // (since we initialize the operators when we see the first record).
-            // Just do nothing here.
-          }
+          flush();
         }
       } catch (Exception e) {
-        e.printStackTrace();
         throw new HiveException(e);
       }
     }

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java?rev=1506785&r1=1506784&r2=1506785&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java Thu Jul 25 00:14:58 2013
@@ -87,7 +87,7 @@ public class JoinOperator extends Common
       }
 
       // number of rows for the key in the given table
-      int sz = storage[alias].size();
+      long sz = storage[alias].size();
       StructObjectInspector soi = (StructObjectInspector) inputObjInspectors[tag];
       StructField sf = soi.getStructFieldRef(Utilities.ReduceField.KEY
           .toString());

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java?rev=1506785&r1=1506784&r2=1506785&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java Thu Jul 25 00:14:58 2013
@@ -33,6 +33,7 @@ import java.util.Set;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
 import org.apache.hadoop.hive.ql.plan.MapredWork;

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/MapredContext.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/MapredContext.java?rev=1506785&r1=1506784&r2=1506785&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/MapredContext.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/MapredContext.java Thu Jul 25 00:14:58 2013
@@ -40,17 +40,17 @@ public class MapredContext {
   private static final Log logger = LogFactory.getLog("MapredContext");
   private static final ThreadLocal<MapredContext> contexts = new ThreadLocal<MapredContext>();
 
-  static MapredContext get() {
+  public static MapredContext get() {
     return contexts.get();
   }
 
-  static MapredContext init(boolean isMap, JobConf jobConf) {
+  public static MapredContext init(boolean isMap, JobConf jobConf) {
     MapredContext context = new MapredContext(isMap, jobConf);
     contexts.set(context);
     return context;
   }
 
-  static void close() {
+  public static void close() {
     MapredContext context = contexts.get();
     if (context != null) {
       context.closeAll();
@@ -91,7 +91,7 @@ public class MapredContext {
     return jobConf;
   }
 
-  void setReporter(Reporter reporter) {
+  public void setReporter(Reporter reporter) {
     this.reporter = reporter;
   }
 
@@ -139,8 +139,8 @@ public class MapredContext {
     try {
       Method initMethod = func.getClass().getMethod("configure", MapredContext.class);
       return initMethod.getDeclaringClass() != GenericUDF.class &&
-          initMethod.getDeclaringClass() != GenericUDAFEvaluator.class &&
-          initMethod.getDeclaringClass() != GenericUDTF.class;
+        initMethod.getDeclaringClass() != GenericUDAFEvaluator.class &&
+        initMethod.getDeclaringClass() != GenericUDTF.class;
     } catch (Exception e) {
       return false;
     }
@@ -150,7 +150,7 @@ public class MapredContext {
     try {
       Method closeMethod = func.getClass().getMethod("close");
       return closeMethod.getDeclaringClass() != GenericUDF.class &&
-          closeMethod.getDeclaringClass() != GenericUDAFEvaluator.class;
+        closeMethod.getDeclaringClass() != GenericUDAFEvaluator.class;
     } catch (Exception e) {
       return false;
     }

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java?rev=1506785&r1=1506784&r2=1506785&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java Thu Jul 25 00:14:58 2013
@@ -40,6 +40,8 @@ import org.apache.hadoop.hive.metastore.
 import org.apache.hadoop.hive.metastore.api.Order;
 import org.apache.hadoop.hive.ql.Context;
 import org.apache.hadoop.hive.ql.DriverContext;
+import org.apache.hadoop.hive.ql.exec.mr.MapRedTask;
+import org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask;
 import org.apache.hadoop.hive.ql.hooks.LineageInfo.DataContainer;
 import org.apache.hadoop.hive.ql.hooks.WriteEntity;
 import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils;

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java?rev=1506785&r1=1506784&r2=1506785&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java Thu Jul 25 00:14:58 2013
@@ -27,10 +27,12 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.concurrent.atomic.AtomicInteger;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext;
 import org.apache.hadoop.hive.ql.lib.Node;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
@@ -76,7 +78,7 @@ public abstract class Operator<T extends
 
   private transient ExecMapperContext execContext;
 
-  private static int seqId;
+  private static AtomicInteger seqId;
 
   // It can be optimized later so that an operator operator (init/close) is performed
   // only after that operation has been performed on all the parents. This will require
@@ -103,17 +105,17 @@ public abstract class Operator<T extends
   // all operators
 
   static {
-    seqId = 0;
+    seqId = new AtomicInteger(0);
   }
 
   private boolean useBucketizedHiveInputFormat;
 
   public Operator() {
-    id = String.valueOf(seqId++);
+    id = String.valueOf(seqId.getAndIncrement());
   }
 
   public static void resetId() {
-    seqId = 0;
+    seqId.set(0);
   }
 
   /**
@@ -123,8 +125,8 @@ public abstract class Operator<T extends
    *          Used to report progress of certain operators.
    */
   public Operator(Reporter reporter) {
+    this();
     this.reporter = reporter;
-    id = String.valueOf(seqId++);
   }
 
   public void setChildOperators(
@@ -435,7 +437,7 @@ public abstract class Operator<T extends
    *          parent operator id
    * @throws HiveException
    */
-  private void initialize(Configuration hconf, ObjectInspector inputOI,
+  protected void initialize(Configuration hconf, ObjectInspector inputOI,
       int parentId) throws HiveException {
     LOG.info("Initializing child " + id + " " + getName());
     // Double the size of the array if needed
@@ -523,7 +525,7 @@ public abstract class Operator<T extends
     LOG.debug("Start group Done");
   }
 
-  // If a operator wants to do some work at the end of a group
+  // If an operator wants to do some work at the end of a group
   public void endGroup() throws HiveException {
     LOG.debug("Ending group");
 
@@ -543,6 +545,20 @@ public abstract class Operator<T extends
     LOG.debug("End group Done");
   }
 
+  // an blocking operator (e.g. GroupByOperator and JoinOperator) can
+  // override this method to forward its outputs
+  public void flush() throws HiveException {
+  }
+
+  public void processGroup(int tag) throws HiveException {
+    if (childOperators == null) {
+      return;
+    }
+    for (int i = 0; i < childOperatorsArray.length; i++) {
+      childOperatorsArray[i].processGroup(childOperatorsTag[i]);
+    }
+  }
+
   protected boolean allInitializedParentsAreClosed() {
     if (parentOperators != null) {
       for (Operator<? extends OperatorDesc> parent : parentOperators) {
@@ -779,7 +795,7 @@ public abstract class Operator<T extends
     parentOperators.set(parentIndex, newParent);
   }
 
-  private long getNextCntr(long cntr) {
+  protected long getNextCntr(long cntr) {
     // A very simple counter to keep track of number of rows processed by an
     // operator. It dumps
     // every 1 million times, and quickly before that

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java?rev=1506785&r1=1506784&r2=1506785&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java Thu Jul 25 00:14:58 2013
@@ -23,6 +23,8 @@ import java.util.List;
 import java.util.Map;
 
 import org.apache.hadoop.hive.ql.plan.CollectDesc;
+import org.apache.hadoop.hive.ql.plan.MuxDesc;
+import org.apache.hadoop.hive.ql.plan.DemuxDesc;
 import org.apache.hadoop.hive.ql.plan.DummyStoreDesc;
 import org.apache.hadoop.hive.ql.plan.ExtractDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
@@ -98,6 +100,10 @@ public final class OperatorFactory {
         HashTableSinkOperator.class));
     opvec.add(new OpTuple<DummyStoreDesc>(DummyStoreDesc.class,
         DummyStoreOperator.class));
+    opvec.add(new OpTuple<DemuxDesc>(DemuxDesc.class,
+        DemuxOperator.class));
+    opvec.add(new OpTuple<MuxDesc>(MuxDesc.class,
+        MuxOperator.class));
   }
 
   public static <T extends OperatorDesc> Operator<T> get(Class<T> opClass) {
@@ -257,7 +263,7 @@ public final class OperatorFactory {
   public static <T extends OperatorDesc> Operator<T> getAndMakeChild(T conf,
       RowSchema rwsch, Map<String, ExprNodeDesc> colExprMap, Operator... oplist) {
     Operator<T> ret = getAndMakeChild(conf, rwsch, oplist);
-    ret.setColumnExprMap(colExprMap);    
+    ret.setColumnExprMap(colExprMap);
     return (ret);
   }
 

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java?rev=1506785&r1=1506784&r2=1506785&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java Thu Jul 25 00:14:58 2013
@@ -200,7 +200,7 @@ public class TableScanOperator extends O
       jc = (JobConf) hconf;
     } else {
       // test code path
-      jc = new JobConf(hconf, ExecDriver.class);
+      jc = new JobConf(hconf);
     }
 
     currentStat = null;

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java?rev=1506785&r1=1506784&r2=1506785&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java Thu Jul 25 00:14:58 2013
@@ -31,7 +31,6 @@ import org.apache.commons.logging.LogFac
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.ql.CommandNeedRetryException;
-import org.apache.hadoop.hive.ql.Context;
 import org.apache.hadoop.hive.ql.DriverContext;
 import org.apache.hadoop.hive.ql.QueryPlan;
 import org.apache.hadoop.hive.ql.lib.Node;
@@ -50,6 +49,8 @@ import org.apache.hadoop.util.StringUtil
 public abstract class Task<T extends Serializable> implements Serializable, Node {
 
   private static final long serialVersionUID = 1L;
+  public transient HashMap<String, Long> taskCounters;
+  public transient TaskHandle taskHandle;
   protected transient boolean started;
   protected transient boolean initialized;
   protected transient boolean isdone;
@@ -58,8 +59,6 @@ public abstract class Task<T extends Ser
   protected transient Hive db;
   protected transient LogHelper console;
   protected transient QueryPlan queryPlan;
-  protected transient TaskHandle taskHandle;
-  protected transient HashMap<String, Long> taskCounters;
   protected transient DriverContext driverContext;
   protected transient boolean clonedConf = false;
   protected transient String jobID;
@@ -87,6 +86,7 @@ public abstract class Task<T extends Ser
 
   protected String id;
   protected T work;
+
   public static enum FeedType {
     DYNAMIC_PARTITIONS, // list of dynamic partitions
   };

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java?rev=1506785&r1=1506784&r2=1506785&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java Thu Jul 25 00:14:58 2013
@@ -23,6 +23,8 @@ import java.util.ArrayList;
 import java.util.List;
 
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.mr.MapRedTask;
+import org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask;
 import org.apache.hadoop.hive.ql.io.rcfile.merge.BlockMergeTask;
 import org.apache.hadoop.hive.ql.io.rcfile.merge.MergeWork;
 import org.apache.hadoop.hive.ql.io.rcfile.stats.PartialScanTask;

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/UDFArgumentException.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/UDFArgumentException.java?rev=1506785&r1=1506784&r2=1506785&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/UDFArgumentException.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/UDFArgumentException.java Thu Jul 25 00:14:58 2013
@@ -21,6 +21,8 @@ package org.apache.hadoop.hive.ql.exec;
 import java.lang.reflect.Method;
 import java.lang.reflect.Type;
 import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
 import java.util.List;
 
 import org.apache.hadoop.hive.ql.parse.SemanticException;
@@ -72,6 +74,8 @@ public class UDFArgumentException extend
     StringBuilder sb = new StringBuilder();
     sb.append(message);
     if (methods != null) {
+      // Sort the methods before omitting them.
+      sortMethods(methods);
       sb.append(". Possible choices: ");
       for (Method m: methods) {
         Type[] types = m.getGenericParameterTypes();
@@ -89,6 +93,28 @@ public class UDFArgumentException extend
     return sb.toString();
   }
   
+  private static void sortMethods(List<Method> methods) {
+    Collections.sort( methods, new Comparator<Method>(){
+
+      @Override
+      public int compare(Method m1, Method m2) {
+        int result = m1.getName().compareTo(m2.getName());
+        if (result != 0)
+          return result;
+        Type[] types1 = m1.getGenericParameterTypes();
+        Type[] types2 = m2.getGenericParameterTypes();
+        for (int i = 0; i < types1.length && i < types2.length; i++) {
+          String type1 = ObjectInspectorUtils.getTypeNameFromJavaClass(types1[i]);
+          String type2 = ObjectInspectorUtils.getTypeNameFromJavaClass(types2[i]);
+          if ((result = type1.compareTo(type2)) != 0)
+            return result;
+        }
+        return types1.length - types2.length;
+      }
+
+    });
+  }
+  
   /**
    * The UDF or UDAF class that has the ambiguity.
    */

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java?rev=1506785&r1=1506784&r2=1506785&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java Thu Jul 25 00:14:58 2013
@@ -97,6 +97,8 @@ import org.apache.hadoop.hive.ql.Context
 import org.apache.hadoop.hive.ql.ErrorMsg;
 import org.apache.hadoop.hive.ql.QueryPlan;
 import org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter;
+import org.apache.hadoop.hive.ql.exec.mr.ExecDriver;
+import org.apache.hadoop.hive.ql.exec.mr.MapRedTask;
 import org.apache.hadoop.hive.ql.io.ContentSummaryInputFormat;
 import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils;
 import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat;
@@ -121,9 +123,9 @@ import org.apache.hadoop.hive.ql.plan.Ma
 import org.apache.hadoop.hive.ql.plan.PartitionDesc;
 import org.apache.hadoop.hive.ql.plan.PlanUtils;
 import org.apache.hadoop.hive.ql.plan.PlanUtils.ExpressionTypes;
+import org.apache.hadoop.hive.ql.plan.TableDesc;
 import org.apache.hadoop.hive.ql.plan.api.Adjacency;
 import org.apache.hadoop.hive.ql.plan.api.Graph;
-import org.apache.hadoop.hive.ql.plan.TableDesc;
 import org.apache.hadoop.hive.ql.session.SessionState;
 import org.apache.hadoop.hive.ql.stats.StatsFactory;
 import org.apache.hadoop.hive.ql.stats.StatsPublisher;
@@ -166,13 +168,22 @@ public final class Utilities {
   public static String HADOOP_LOCAL_FS = "file:///";
 
   /**
-   * ReduceField.
-   *
+   * ReduceField:
+   * KEY: record key
+   * VALUE: record value
    */
   public static enum ReduceField {
-    KEY, VALUE, ALIAS
+    KEY, VALUE
   };
 
+  public static List<String> reduceFieldNameList;
+  static {
+    reduceFieldNameList = new ArrayList<String>();
+    for (ReduceField r : ReduceField.values()) {
+      reduceFieldNameList.add(r.toString());
+    }
+  }
+
   private Utilities() {
     // prevent instantiation
   }
@@ -234,15 +245,18 @@ public final class Utilities {
   public static void setWorkflowAdjacencies(Configuration conf, QueryPlan plan) {
     try {
       Graph stageGraph = plan.getQueryPlan().getStageGraph();
-      if (stageGraph == null)
+      if (stageGraph == null) {
         return;
+      }
       List<Adjacency> adjList = stageGraph.getAdjacencyList();
-      if (adjList == null)
+      if (adjList == null) {
         return;
+      }
       for (Adjacency adj : adjList) {
         List<String> children = adj.getChildren();
-        if (children == null || children.isEmpty())
+        if (children == null || children.isEmpty()) {
           return;
+        }
         conf.setStrings("mapreduce.workflow.adjacency."+adj.getNode(),
             children.toArray(new String[children.size()]));
       }
@@ -1404,7 +1418,7 @@ public final class Utilities {
       jc = new JobConf(hconf);
     } else {
       // test code path
-      jc = new JobConf(hconf, ExecDriver.class);
+      jc = new JobConf(hconf);
     }
     HiveOutputFormat<?, ?> hiveOutputFormat = null;
     Class<? extends Writable> outputClass = null;

Copied: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java (from r1506653, hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java)
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java?p2=hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java&p1=hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java&r1=1506653&r2=1506785&rev=1506785&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java Thu Jul 25 00:14:58 2013
@@ -26,6 +26,7 @@ import java.io.Serializable;
 import java.lang.management.ManagementFactory;
 import java.lang.management.MemoryMXBean;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Enumeration;
@@ -64,6 +65,10 @@ import org.apache.hadoop.hive.ql.exec.Ta
 import org.apache.hadoop.hive.ql.exec.Task;
 import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExecMapper;
+import org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface;
 import org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
 import org.apache.hadoop.hive.ql.io.HiveKey;
 import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
@@ -289,7 +294,23 @@ public class ExecDriver extends Task<Map
     ShimLoader.getHadoopShims().prepareJobOutput(job);
     //See the javadoc on HiveOutputFormatImpl and HadoopShims.prepareJobOutput()
     job.setOutputFormat(HiveOutputFormatImpl.class);
-    job.setMapperClass(ExecMapper.class);
+
+
+    boolean vectorPath = HiveConf.getBoolVar(job,
+        HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED);
+
+    if (vectorPath) {
+      if (validateVectorPath()) {
+        LOG.info("Going down the vectorization path");
+        job.setMapperClass(VectorExecMapper.class);
+      } else {
+        //fall back to non-vector mode
+        HiveConf.setBoolVar(job, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, false);
+        job.setMapperClass(ExecMapper.class);
+      }
+    } else {
+      job.setMapperClass(ExecMapper.class);
+    }
 
     job.setMapOutputKeyClass(HiveKey.class);
     job.setMapOutputValueClass(BytesWritable.class);
@@ -539,6 +560,59 @@ public class ExecDriver extends Task<Map
     return (returnVal);
   }
 
+  private boolean validateVectorPath() {
+    LOG.debug("Validating if vectorized execution is applicable");
+    MapredWork thePlan = this.getWork();
+
+    for (String path : thePlan.getPathToPartitionInfo().keySet()) {
+      PartitionDesc pd = thePlan.getPathToPartitionInfo().get(path);
+      List<Class<?>> interfaceList =
+          Arrays.asList(pd.getInputFileFormatClass().getInterfaces());
+      if (!interfaceList.contains(VectorizedInputFormatInterface.class)) {
+        LOG.debug("Input format: " + pd.getInputFileFormatClassName()
+            + ", doesn't provide vectorized input");
+        return false;
+      }
+    }
+    VectorizationContext vc = new VectorizationContext(null, 0);
+    for (String onefile : thePlan.getPathToAliases().keySet()) {
+      List<String> aliases = thePlan.getPathToAliases().get(onefile);
+      for (String onealias : aliases) {
+        Operator<? extends OperatorDesc> op = thePlan.getAliasToWork().get(
+            onealias);
+        Operator<? extends OperatorDesc> vectorOp = null;
+        try {
+          vectorOp = VectorMapOperator.vectorizeOperator(op, vc);
+        } catch (Exception e) {
+          LOG.debug("Cannot vectorize the plan", e);
+          return false;
+        }
+        if (vectorOp == null) {
+          LOG.debug("Cannot vectorize the plan");
+          return false;
+        }
+        //verify the expressions contained in the operators
+        try {
+          validateVectorOperator(vectorOp);
+        } catch (HiveException e) {
+          LOG.debug("Cannot vectorize the plan", e);
+          return false;
+        }
+      }
+    }
+    return true;
+  }
+
+  private void validateVectorOperator(Operator<? extends OperatorDesc> vectorOp)
+      throws HiveException {
+    vectorOp.initialize(job, null);
+    if (vectorOp.getChildOperators() != null) {
+      for (Operator<? extends OperatorDesc> vop : vectorOp.getChildOperators()) {
+        validateVectorOperator(vop);
+      }
+    }
+  }
+
   private void handleSampling(DriverContext context, MapredWork work, JobConf job, HiveConf conf)
       throws Exception {
     assert work.getAliasToWork().keySet().size() == 1;
@@ -1028,3 +1102,4 @@ public class ExecDriver extends Task<Map
     }
   }
 }
+

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/AbstractRowContainer.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/AbstractRowContainer.java?rev=1506785&r1=1506784&r2=1506785&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/AbstractRowContainer.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/AbstractRowContainer.java Thu Jul 25 00:14:58 2013
@@ -38,7 +38,7 @@ public abstract class AbstractRowContain
    * @return number of elements in the RowContainer
    */
 
-  public abstract int size();
+  public abstract long size();
 
   /**
    * Remove all elements in the RowContainer.

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinDoubleKeys.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinDoubleKeys.java?rev=1506785&r1=1506784&r2=1506785&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinDoubleKeys.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinDoubleKeys.java Thu Jul 25 00:14:58 2013
@@ -50,45 +50,44 @@ public class MapJoinDoubleKeys extends A
     this.obj2 = obj2;
   }
 
+
+
   @Override
-  public boolean equals(Object o) {
-    if (o instanceof MapJoinDoubleKeys) {
-      MapJoinDoubleKeys mObj = (MapJoinDoubleKeys) o;
-      Object key1 = mObj.getObj1();
-      Object key2 = mObj.getObj2();
-
-      if ((obj1 == null) && (key1 == null)) {
-        if ((obj2 == null) && (key2 == null)) {
-          return true;
-        }
-      }
-      if ((obj1 != null) && (key1 != null)) {
-        if (obj1.equals(key1)) {
-          if ((obj2 != null) && (key2 != null)) {
-            if (obj2.equals(key2)) {
-              return true;
-            }
-          }
-        }
-      }
-    }
-    return false;
+  public int hashCode() {
+    final int prime = 31;
+    int result = 1;
+    result = prime * result + ((obj1 == null) ? 0 : obj1.hashCode());
+    result = prime * result + ((obj2 == null) ? 0 : obj2.hashCode());
+    return result;
   }
 
   @Override
-  public int hashCode() {
-    int hashCode = 1;
+  public boolean equals(Object obj) {
+    if (this == obj) {
+      return true;
+    }
+    if (obj == null) {
+      return false;
+    }
+    if (getClass() != obj.getClass()) {
+      return false;
+    }
+    MapJoinDoubleKeys other = (MapJoinDoubleKeys) obj;
     if (obj1 == null) {
-      hashCode = metadataTag;
-    } else {
-      hashCode += (31 + obj1.hashCode());
+      if (other.obj1 != null) {
+        return false;
+      }
+    } else if (!obj1.equals(other.obj1)) {
+      return false;
     }
     if (obj2 == null) {
-      hashCode += metadataTag;
-    } else {
-      hashCode += (31 + obj2.hashCode());
+      if (other.obj2 != null) {
+        return false;
+      }
+    } else if (!obj2.equals(other.obj2)) {
+      return false;
     }
-    return hashCode;
+    return true;
   }
 
   @Override

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectKey.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectKey.java?rev=1506785&r1=1506784&r2=1506785&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectKey.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectKey.java Thu Jul 25 00:14:58 2013
@@ -22,6 +22,7 @@ import java.io.IOException;
 import java.io.ObjectInput;
 import java.io.ObjectOutput;
 import java.util.ArrayList;
+import java.util.Arrays;
 
 import org.apache.hadoop.hive.ql.exec.HashTableSinkOperator;
 import org.apache.hadoop.hive.ql.exec.HashTableSinkOperator.HashTableSinkObjectCtx;
@@ -34,7 +35,7 @@ import org.apache.hadoop.io.Writable;
 /**
  * Map Join Object used for both key.
  */
-public class MapJoinObjectKey  extends AbstractMapJoinKey {
+public class MapJoinObjectKey extends AbstractMapJoinKey {
 
 
   protected transient Object[] obj;
@@ -49,46 +50,29 @@ public class MapJoinObjectKey  extends A
     this.obj = obj;
   }
 
+
+
   @Override
-  public boolean equals(Object o) {
-    if (o instanceof MapJoinObjectKey) {
-      MapJoinObjectKey mObj = (MapJoinObjectKey) o;
-      Object[] mObjArray = mObj.getObj();
-      if ((obj == null) && (mObjArray == null)) {
-        return true;
-      }
-      if ((obj != null) && (mObjArray != null)) {
-        if (obj.length == mObjArray.length) {
-          for (int i = 0; i < obj.length; i++) {
-            if (obj[i] == null) {
-              return mObjArray[i] == null;
-            }
-            if (!obj[i].equals(mObjArray[i])) {
-              return false;
-            }
-          }
-          return true;
-        }
-      }
-    }
-    return false;
+  public int hashCode() {
+    return Arrays.hashCode(obj);
   }
 
   @Override
-  public int hashCode() {
-    int hashCode;
+  public boolean equals(Object obj) {
+    if (this == obj) {
+      return true;
+    }
     if (obj == null) {
-      hashCode = metadataTag;
-    } else {
-      hashCode = 1;
-
-      for (int i = 0; i < obj.length; i++) {
-        Object o = obj[i];
-        hashCode = 31 * hashCode + (o == null ? 0 : o.hashCode());
-      }
-
+      return false;
+    }
+    if (getClass() != obj.getClass()) {
+      return false;
+    }
+    MapJoinObjectKey other = (MapJoinObjectKey) obj;
+    if (!Arrays.equals(this.obj, other.obj)) {
+      return false;
     }
-    return hashCode;
+    return true;
   }
 
   @Override
@@ -104,9 +88,9 @@ public class MapJoinObjectKey  extends A
       ArrayList<Object> list = (ArrayList<Object>) ObjectInspectorUtils.copyToStandardObject(ctx
           .getSerDe().deserialize(val), ctx.getSerDe().getObjectInspector(),
           ObjectInspectorCopyOption.WRITABLE);
-      if(list == null){
+      if (list == null) {
         obj = new ArrayList(0).toArray();
-      }else{
+      } else {
         obj = list.toArray();
       }
 
@@ -148,8 +132,8 @@ public class MapJoinObjectKey  extends A
   }
 
   @Override
-  public boolean hasAnyNulls(boolean[] nullsafes){
-    if (obj != null && obj.length> 0) {
+  public boolean hasAnyNulls(boolean[] nullsafes) {
+    if (obj != null && obj.length > 0) {
       for (int i = 0; i < obj.length; i++) {
         if (obj[i] == null && (nullsafes == null || !nullsafes[i])) {
           return true;

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectValue.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectValue.java?rev=1506785&r1=1506784&r2=1506785&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectValue.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectValue.java Thu Jul 25 00:14:58 2013
@@ -140,7 +140,7 @@ public class MapJoinObjectValue implemen
 
       // Different processing for key and value
       MapJoinRowContainer<Object[]> v = obj;
-      out.writeInt(v.size());
+      out.writeInt((int)v.size());
       if (v.size() > 0) {
         Object[] row = v.first();
         out.writeInt(row.length);

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinRowContainer.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinRowContainer.java?rev=1506785&r1=1506784&r2=1506785&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinRowContainer.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinRowContainer.java Thu Jul 25 00:14:58 2013
@@ -65,7 +65,7 @@ public class MapJoinRowContainer<Row> ex
    * @return number of elements in the RowContainer
    */
   @Override
-  public int size() {
+  public long size() {
     return list.size();
   }
 

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/RowContainer.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/RowContainer.java?rev=1506785&r1=1506784&r2=1506785&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/RowContainer.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/RowContainer.java Thu Jul 25 00:14:58 2013
@@ -86,7 +86,7 @@ public class RowContainer<Row extends Li
   private int blockSize; // number of objects in the block before it is spilled
   // to disk
   private int numFlushedBlocks; // total # of blocks
-  private int size; // total # of elements in the RowContainer
+  private long size;    // total # of elements in the RowContainer
   private File tmpFile; // temporary file holding the spilled blocks
   Path tempOutPath = null;
   private File parentFile;
@@ -283,7 +283,7 @@ public class RowContainer<Row extends Li
     }
   }
 
-  ArrayList<Object> row = new ArrayList<Object>(2);
+  private final ArrayList<Object> row = new ArrayList<Object>(2);
 
   private void spillBlock(Row[] block, int length) throws HiveException {
     try {
@@ -360,7 +360,7 @@ public class RowContainer<Row extends Li
    * @return number of elements in the RowContainer
    */
   @Override
-  public int size() {
+  public long size() {
     return size;
   }
 

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExecMapper.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExecMapper.java?rev=1506785&r1=1506784&r2=1506785&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExecMapper.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExecMapper.java Thu Jul 25 00:14:58 2013
@@ -28,7 +28,7 @@ import java.util.Map;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.ql.exec.ExecMapperContext;
+import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext;
 import org.apache.hadoop.hive.ql.exec.FetchOperator;
 import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.exec.Utilities;

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java?rev=1506785&r1=1506784&r2=1506785&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java Thu Jul 25 00:14:58 2013
@@ -36,7 +36,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.common.FileUtils;
 import org.apache.hadoop.hive.metastore.api.SkewedValueList;
 import org.apache.hadoop.hive.ql.ErrorMsg;
-import org.apache.hadoop.hive.ql.exec.ExecDriver;
+import org.apache.hadoop.hive.ql.exec.mr.ExecDriver;
 import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
 import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
 import org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter;

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java?rev=1506785&r1=1506784&r2=1506785&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java Thu Jul 25 00:14:58 2013
@@ -33,7 +33,7 @@ import java.util.Set;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.ql.exec.ExecMapperContext;
+import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext;
 import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.metadata.VirtualColumn;

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputSplit.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputSplit.java?rev=1506785&r1=1506784&r2=1506785&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputSplit.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputSplit.java Thu Jul 25 00:14:58 2013
@@ -37,7 +37,6 @@ import org.apache.hadoop.conf.Configurab
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.PathFilter;
-import org.apache.hadoop.hive.ql.exec.ExecMapper;
 import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.exec.TableScanOperator;
 import org.apache.hadoop.hive.ql.exec.Utilities;

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java?rev=1506785&r1=1506784&r2=1506785&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java Thu Jul 25 00:14:58 2013
@@ -21,7 +21,7 @@ package org.apache.hadoop.hive.ql.io;
 import java.io.IOException;
 
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hive.ql.exec.ExecMapper;
+import org.apache.hadoop.hive.ql.exec.mr.ExecMapper;
 import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat.CombineHiveInputSplit;
 import org.apache.hadoop.hive.shims.HadoopShims.InputSplitShim;
 import org.apache.hadoop.io.Writable;

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/HiveRecordReader.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/HiveRecordReader.java?rev=1506785&r1=1506784&r2=1506785&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/HiveRecordReader.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/HiveRecordReader.java Thu Jul 25 00:14:58 2013
@@ -20,7 +20,7 @@ package org.apache.hadoop.hive.ql.io;
 
 import java.io.IOException;
 
-import org.apache.hadoop.hive.ql.exec.ExecMapper;
+import org.apache.hadoop.hive.ql.exec.mr.ExecMapper;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.mapred.JobConf;

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java?rev=1506785&r1=1506784&r2=1506785&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java Thu Jul 25 00:14:58 2013
@@ -31,6 +31,26 @@ import java.io.IOException;
 public final class OrcFile {
 
   public static final String MAGIC = "ORC";
+
+  /**
+   * Create a version number for the ORC file format, so that we can add
+   * non-forward compatible changes in the future. To make it easier for users
+   * to understand the version numbers, we use the Hive release number that
+   * first wrote that version of ORC files.
+   *
+   * Thus, if you add new encodings or other non-forward compatible changes
+   * to ORC files, which prevent the old reader from reading the new format,
+   * you should change these variable to reflect the next Hive release number.
+   * Non-forward compatible changes should never be added in patch releases.
+   *
+   * Do not make any changes that break backwards compatibility, which would
+   * prevent the new reader from reading ORC files generated by any released
+   * version of Hive.
+   */
+  public static final int MAJOR_VERSION = 0;
+  public static final int MINOR_VERSION = 11;
+
+  // the table properties that control ORC files
   public static final String COMPRESSION = "orc.compress";
   static final String DEFAULT_COMPRESSION = "ZLIB";
   public static final String COMPRESSION_BLOCK_SIZE = "orc.compress.size";

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java?rev=1506785&r1=1506784&r2=1506785&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java Thu Jul 25 00:14:58 2013
@@ -19,10 +19,13 @@
 package org.apache.hadoop.hive.ql.io.orc;
 
 import com.google.protobuf.CodedInputStream;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.io.Text;
 
 import java.io.IOException;
 import java.io.InputStream;
@@ -33,6 +36,8 @@ import java.util.List;
 
 final class ReaderImpl implements Reader {
 
+  private static final Log LOG = LogFactory.getLog(ReaderImpl.class);
+
   private static final int DIRECTORY_SIZE_GUESS = 16 * 1024;
 
   private final FileSystem fileSystem;
@@ -176,6 +181,81 @@ final class ReaderImpl implements Reader
     return result;
   }
 
+  /**
+   * Ensure this is an ORC file to prevent users from trying to read text
+   * files or RC files as ORC files.
+   * @param in the file being read
+   * @param path the filename for error messages
+   * @param psLen the postscript length
+   * @param buffer the tail of the file
+   * @throws IOException
+   */
+  static void ensureOrcFooter(FSDataInputStream in,
+                                      Path path,
+                                      int psLen,
+                                      ByteBuffer buffer) throws IOException {
+    int len = OrcFile.MAGIC.length();
+    if (psLen < len + 1) {
+      throw new IOException("Malformed ORC file " + path +
+          ". Invalid postscript length " + psLen);
+    }
+    int offset = buffer.arrayOffset() + buffer.position() + buffer.limit() - 1
+        - len;
+    byte[] array = buffer.array();
+    // now look for the magic string at the end of the postscript.
+    if (!Text.decode(array, offset, len).equals(OrcFile.MAGIC)) {
+      // If it isn't there, this may be the 0.11.0 version of ORC.
+      // Read the first 3 bytes of the file to check for the header
+      in.seek(0);
+      byte[] header = new byte[len];
+      in.readFully(header, 0, len);
+      // if it isn't there, this isn't an ORC file
+      if (!Text.decode(header, 0 , len).equals(OrcFile.MAGIC)) {
+        throw new IOException("Malformed ORC file " + path +
+            ". Invalid postscript.");
+      }
+    }
+  }
+
+  /**
+   * Build a version string out of an array.
+   * @param version the version number as a list
+   * @return the human readable form of the version string
+   */
+  private static String versionString(List<Integer> version) {
+    StringBuilder buffer = new StringBuilder();
+    for(int i=0; i < version.size(); ++i) {
+      if (i != 0) {
+        buffer.append('.');
+      }
+      buffer.append(version.get(i));
+    }
+    return buffer.toString();
+  }
+
+  /**
+   * Check to see if this ORC file is from a future version and if so,
+   * warn the user that we may not be able to read all of the column encodings.
+   * @param log the logger to write any error message to
+   * @param path the filename for error messages
+   * @param version the version of hive that wrote the file.
+   */
+  static void checkOrcVersion(Log log, Path path, List<Integer> version) {
+    if (version.size() >= 1) {
+      int major = version.get(0);
+      int minor = 0;
+      if (version.size() >= 2) {
+        minor = version.get(1);
+      }
+      if (major > OrcFile.MAJOR_VERSION ||
+          (major == OrcFile.MAJOR_VERSION && minor > OrcFile.MINOR_VERSION)) {
+        log.warn("ORC file " + path + " was written by a future Hive version " +
+            versionString(version) + ". This file may not be readable by " +
+            "this version of Hive.");
+      }
+    }
+  }
+
   ReaderImpl(FileSystem fs, Path path) throws IOException {
     this.fileSystem = fs;
     this.path = path;
@@ -187,10 +267,12 @@ final class ReaderImpl implements Reader
     file.readFully(buffer.array(), buffer.arrayOffset() + buffer.position(),
       buffer.remaining());
     int psLen = buffer.get(readSize - 1);
+    ensureOrcFooter(file, path, psLen, buffer);
     int psOffset = readSize - 1 - psLen;
     CodedInputStream in = CodedInputStream.newInstance(buffer.array(),
       buffer.arrayOffset() + psOffset, psLen);
     OrcProto.PostScript ps = OrcProto.PostScript.parseFrom(in);
+    checkOrcVersion(LOG, path, ps.getVersionList());
     int footerSize = (int) ps.getFooterLength();
     bufferSize = (int) ps.getCompressionBlockSize();
     switch (ps.getCompression()) {

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java?rev=1506785&r1=1506784&r2=1506785&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java Thu Jul 25 00:14:58 2013
@@ -92,7 +92,7 @@ class RecordReaderImpl implements Record
     }
     firstRow = skippedRows;
     totalRowCount = rows;
-    reader = createTreeReader(0, types, included);
+    reader = createTreeReader(path, 0, types, included);
     indexes = new OrcProto.RowIndex[types.size()];
     rowIndexStride = strideRate;
     if (this.stripes.size() > 0) {
@@ -115,17 +115,27 @@ class RecordReaderImpl implements Record
   }
 
   private abstract static class TreeReader {
+    protected final Path path;
     protected final int columnId;
     private BitFieldReader present = null;
     protected boolean valuePresent = false;
 
-    TreeReader(int columnId) {
+    TreeReader(Path path, int columnId) {
+      this.path = path;
       this.columnId = columnId;
     }
 
+    void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
+      if (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) {
+        throw new IOException("Unknown encoding " + encoding + " in column " +
+            columnId + " of " + path);
+      }
+    }
+
     void startStripe(Map<StreamName, InStream> streams,
                      List<OrcProto.ColumnEncoding> encoding
                     ) throws IOException {
+      checkEncoding(encoding.get(columnId));
       InStream in = streams.get(new StreamName(columnId,
           OrcProto.Stream.Kind.PRESENT));
       if (in == null) {
@@ -206,8 +216,8 @@ class RecordReaderImpl implements Record
   private static class BooleanTreeReader extends TreeReader{
     private BitFieldReader reader = null;
 
-    BooleanTreeReader(int columnId) {
-      super(columnId);
+    BooleanTreeReader(Path path, int columnId) {
+      super(path, columnId);
     }
 
     @Override
@@ -266,8 +276,8 @@ class RecordReaderImpl implements Record
   private static class ByteTreeReader extends TreeReader{
     private RunLengthByteReader reader = null;
 
-    ByteTreeReader(int columnId) {
-      super(columnId);
+    ByteTreeReader(Path path, int columnId) {
+      super(path, columnId);
     }
 
     @Override
@@ -326,8 +336,8 @@ class RecordReaderImpl implements Record
   private static class ShortTreeReader extends TreeReader{
     private RunLengthIntegerReader reader = null;
 
-    ShortTreeReader(int columnId) {
-      super(columnId);
+    ShortTreeReader(Path path, int columnId) {
+      super(path, columnId);
     }
 
     @Override
@@ -387,8 +397,8 @@ class RecordReaderImpl implements Record
   private static class IntTreeReader extends TreeReader{
     private RunLengthIntegerReader reader = null;
 
-    IntTreeReader(int columnId) {
-      super(columnId);
+    IntTreeReader(Path path, int columnId) {
+      super(path, columnId);
     }
 
     @Override
@@ -448,8 +458,8 @@ class RecordReaderImpl implements Record
   private static class LongTreeReader extends TreeReader{
     private RunLengthIntegerReader reader = null;
 
-    LongTreeReader(int columnId) {
-      super(columnId);
+    LongTreeReader(Path path, int columnId) {
+      super(path, columnId);
     }
 
     @Override
@@ -509,8 +519,8 @@ class RecordReaderImpl implements Record
   private static class FloatTreeReader extends TreeReader{
     private InStream stream;
 
-    FloatTreeReader(int columnId) {
-      super(columnId);
+    FloatTreeReader(Path path, int columnId) {
+      super(path, columnId);
     }
 
     @Override
@@ -589,8 +599,8 @@ class RecordReaderImpl implements Record
   private static class DoubleTreeReader extends TreeReader{
     private InStream stream;
 
-    DoubleTreeReader(int columnId) {
-      super(columnId);
+    DoubleTreeReader(Path path, int columnId) {
+      super(path, columnId);
     }
 
     @Override
@@ -668,8 +678,8 @@ class RecordReaderImpl implements Record
     private InStream stream;
     private RunLengthIntegerReader lengths;
 
-    BinaryTreeReader(int columnId) {
-      super(columnId);
+    BinaryTreeReader(Path path, int columnId) {
+      super(path, columnId);
     }
 
     @Override
@@ -739,8 +749,8 @@ class RecordReaderImpl implements Record
     private RunLengthIntegerReader nanos;
     private final LongColumnVector nanoVector = new LongColumnVector();
 
-    TimestampTreeReader(int columnId) {
-      super(columnId);
+    TimestampTreeReader(Path path, int columnId) {
+      super(path, columnId);
     }
 
     @Override
@@ -856,8 +866,8 @@ class RecordReaderImpl implements Record
     private InStream valueStream;
     private RunLengthIntegerReader scaleStream;
 
-    DecimalTreeReader(int columnId) {
-      super(columnId);
+    DecimalTreeReader(Path path, int columnId) {
+      super(path, columnId);
     }
 
     @Override
@@ -913,11 +923,18 @@ class RecordReaderImpl implements Record
     private byte[] dictionaryBufferInBytesCache = null;
     private final LongColumnVector scratchlcv;
 
-    StringTreeReader(int columnId) {
-      super(columnId);
+    StringTreeReader(Path path, int columnId) {
+      super(path, columnId);
       scratchlcv = new LongColumnVector();
     }
 
+    void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
+      if (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DICTIONARY) {
+        throw new IOException("Unknown encoding " + encoding + " in column " +
+            columnId + " of " + path);
+      }
+    }
+
     @Override
     void startStripe(Map<StreamName, InStream> streams,
                      List<OrcProto.ColumnEncoding> encodings
@@ -1069,10 +1086,10 @@ class RecordReaderImpl implements Record
     private final TreeReader[] fields;
     private final String[] fieldNames;
 
-    StructTreeReader(int columnId,
+    StructTreeReader(Path path, int columnId,
                      List<OrcProto.Type> types,
                      boolean[] included) throws IOException {
-      super(columnId);
+      super(path, columnId);
       OrcProto.Type type = types.get(columnId);
       int fieldCount = type.getFieldNamesCount();
       this.fields = new TreeReader[fieldCount];
@@ -1080,7 +1097,7 @@ class RecordReaderImpl implements Record
       for(int i=0; i < fieldCount; ++i) {
         int subtype = type.getSubtypes(i);
         if (included == null || included[subtype]) {
-          this.fields[i] = createTreeReader(subtype, types, included);
+          this.fields[i] = createTreeReader(path, subtype, types, included);
         }
         this.fieldNames[i] = type.getFieldNames(i);
       }
@@ -1167,17 +1184,17 @@ class RecordReaderImpl implements Record
     private final TreeReader[] fields;
     private RunLengthByteReader tags;
 
-    UnionTreeReader(int columnId,
-                     List<OrcProto.Type> types,
-                     boolean[] included) throws IOException {
-      super(columnId);
+    UnionTreeReader(Path path, int columnId,
+                    List<OrcProto.Type> types,
+                    boolean[] included) throws IOException {
+      super(path, columnId);
       OrcProto.Type type = types.get(columnId);
       int fieldCount = type.getSubtypesCount();
       this.fields = new TreeReader[fieldCount];
       for(int i=0; i < fieldCount; ++i) {
         int subtype = type.getSubtypes(i);
         if (included == null || included[subtype]) {
-          this.fields[i] = createTreeReader(subtype, types, included);
+          this.fields[i] = createTreeReader(path, subtype, types, included);
         }
       }
     }
@@ -1246,12 +1263,13 @@ class RecordReaderImpl implements Record
     private final TreeReader elementReader;
     private RunLengthIntegerReader lengths;
 
-    ListTreeReader(int columnId,
-                    List<OrcProto.Type> types,
-                    boolean[] included) throws IOException {
-      super(columnId);
+    ListTreeReader(Path path, int columnId,
+                   List<OrcProto.Type> types,
+                   boolean[] included) throws IOException {
+      super(path, columnId);
       OrcProto.Type type = types.get(columnId);
-      elementReader = createTreeReader(type.getSubtypes(0), types, included);
+      elementReader = createTreeReader(path, type.getSubtypes(0), types,
+          included);
     }
 
     @Override
@@ -1325,20 +1343,21 @@ class RecordReaderImpl implements Record
     private final TreeReader valueReader;
     private RunLengthIntegerReader lengths;
 
-    MapTreeReader(int columnId,
-                   List<OrcProto.Type> types,
-                   boolean[] included) throws IOException {
-      super(columnId);
+    MapTreeReader(Path path,
+                  int columnId,
+                  List<OrcProto.Type> types,
+                  boolean[] included) throws IOException {
+      super(path, columnId);
       OrcProto.Type type = types.get(columnId);
       int keyColumn = type.getSubtypes(0);
       int valueColumn = type.getSubtypes(1);
       if (included == null || included[keyColumn]) {
-        keyReader = createTreeReader(keyColumn, types, included);
+        keyReader = createTreeReader(path, keyColumn, types, included);
       } else {
         keyReader = null;
       }
       if (included == null || included[valueColumn]) {
-        valueReader = createTreeReader(valueColumn, types, included);
+        valueReader = createTreeReader(path, valueColumn, types, included);
       } else {
         valueReader = null;
       }
@@ -1407,42 +1426,43 @@ class RecordReaderImpl implements Record
     }
   }
 
-  private static TreeReader createTreeReader(int columnId,
+  private static TreeReader createTreeReader(Path path,
+                                             int columnId,
                                              List<OrcProto.Type> types,
                                              boolean[] included
                                             ) throws IOException {
     OrcProto.Type type = types.get(columnId);
     switch (type.getKind()) {
       case BOOLEAN:
-        return new BooleanTreeReader(columnId);
+        return new BooleanTreeReader(path, columnId);
       case BYTE:
-        return new ByteTreeReader(columnId);
+        return new ByteTreeReader(path, columnId);
       case DOUBLE:
-        return new DoubleTreeReader(columnId);
+        return new DoubleTreeReader(path, columnId);
       case FLOAT:
-        return new FloatTreeReader(columnId);
+        return new FloatTreeReader(path, columnId);
       case SHORT:
-        return new ShortTreeReader(columnId);
+        return new ShortTreeReader(path, columnId);
       case INT:
-        return new IntTreeReader(columnId);
+        return new IntTreeReader(path, columnId);
       case LONG:
-        return new LongTreeReader(columnId);
+        return new LongTreeReader(path, columnId);
       case STRING:
-        return new StringTreeReader(columnId);
+        return new StringTreeReader(path, columnId);
       case BINARY:
-        return new BinaryTreeReader(columnId);
+        return new BinaryTreeReader(path, columnId);
       case TIMESTAMP:
-        return new TimestampTreeReader(columnId);
+        return new TimestampTreeReader(path, columnId);
       case DECIMAL:
-        return new DecimalTreeReader(columnId);
+        return new DecimalTreeReader(path, columnId);
       case STRUCT:
-        return new StructTreeReader(columnId, types, included);
+        return new StructTreeReader(path, columnId, types, included);
       case LIST:
-        return new ListTreeReader(columnId, types, included);
+        return new ListTreeReader(path, columnId, types, included);
       case MAP:
-        return new MapTreeReader(columnId, types, included);
+        return new MapTreeReader(path, columnId, types, included);
       case UNION:
-        return new UnionTreeReader(columnId, types, included);
+        return new UnionTreeReader(path, columnId, types, included);
       default:
         throw new IllegalArgumentException("Unsupported type " +
           type.getKind());



Mime
View raw message