hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From prasan...@apache.org
Subject [2/2] hive git commit: HIVE-13985: ORC improvements for reducing the file system calls in task side (Prasanth Jayachandran reviewed by Sergey Shelukhin)
Date Fri, 17 Jun 2016 20:40:52 GMT
HIVE-13985: ORC improvements for reducing the file system calls in task side (Prasanth Jayachandran reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/89fa0a1d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/89fa0a1d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/89fa0a1d

Branch: refs/heads/branch-1
Commit: 89fa0a1dee376823be788fefc663acc0341c0731
Parents: 0b63145
Author: Prasanth Jayachandran <prasanthj@apache.org>
Authored: Fri Jun 17 13:40:40 2016 -0700
Committer: Prasanth Jayachandran <prasanthj@apache.org>
Committed: Fri Jun 17 13:40:40 2016 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/conf/HiveConf.java   |   5 +
 .../apache/hadoop/hive/ql/io/orc/OrcProto.java  | 891 ++++++++++++++++-
 .../apache/hadoop/hive/ql/io/orc/OrcFile.java   |  10 +-
 .../hadoop/hive/ql/io/orc/OrcInputFormat.java   | 174 ++--
 .../hadoop/hive/ql/io/orc/OrcNewSplit.java      |  50 +-
 .../apache/hadoop/hive/ql/io/orc/OrcSplit.java  |  72 +-
 .../apache/hadoop/hive/ql/io/orc/OrcTail.java   | 137 +++
 .../apache/hadoop/hive/ql/io/orc/Reader.java    |   4 +
 .../hadoop/hive/ql/io/orc/ReaderImpl.java       | 198 ++--
 .../ql/io/orc/VectorizedOrcInputFormat.java     |   3 +-
 .../hadoop/hive/ql/io/orc/orc_proto.proto       |   8 +
 .../hive/ql/io/orc/TestInputOutputFormat.java   | 971 ++++++++++++++++++-
 12 files changed, 2218 insertions(+), 305 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/89fa0a1d/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index e760ed5..ef5860a 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -1054,6 +1054,11 @@ public class HiveConf extends Configuration {
         "data is read remotely (from the client or HS2 machine) and sent to all the tasks."),
     HIVE_ORC_CACHE_STRIPE_DETAILS_SIZE("hive.orc.cache.stripe.details.size", 10000,
         "Max cache size for keeping meta info about orc splits cached in the client."),
+    HIVE_ORC_CACHE_USE_SOFT_REFERENCES("hive.orc.cache.use.soft.references", false,
+        "By default, the cache that ORC input format uses to store orc file footer use hard\n" +
+        "references for the cached object. Setting this to true can help avoid out of memory\n" +
+        "issues under memory pressure (in some cases) at the cost of slight unpredictability in\n" +
+        "overall query performance."),
     HIVE_ORC_COMPUTE_SPLITS_NUM_THREADS("hive.orc.compute.splits.num.threads", 10,
         "How many threads orc should use to create splits in parallel."),
     HIVE_ORC_SKIP_CORRUPT_DATA("hive.exec.orc.skip.corrupt.data", false,

http://git-wip-us.apache.org/repos/asf/hive/blob/89fa0a1d/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java
----------------------------------------------------------------------
diff --git a/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java b/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java
index a0336d2..b95ee1d 100644
--- a/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java
+++ b/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java
@@ -18940,6 +18940,875 @@ public final class OrcProto {
     // @@protoc_insertion_point(class_scope:orc.proto.PostScript)
   }
 
+  public interface FileTailOrBuilder
+      extends com.google.protobuf.MessageOrBuilder {
+
+    // optional .orc.proto.PostScript postscript = 1;
+    /**
+     * <code>optional .orc.proto.PostScript postscript = 1;</code>
+     */
+    boolean hasPostscript();
+    /**
+     * <code>optional .orc.proto.PostScript postscript = 1;</code>
+     */
+    org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript getPostscript();
+    /**
+     * <code>optional .orc.proto.PostScript postscript = 1;</code>
+     */
+    org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScriptOrBuilder getPostscriptOrBuilder();
+
+    // optional .orc.proto.Footer footer = 2;
+    /**
+     * <code>optional .orc.proto.Footer footer = 2;</code>
+     */
+    boolean hasFooter();
+    /**
+     * <code>optional .orc.proto.Footer footer = 2;</code>
+     */
+    org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer getFooter();
+    /**
+     * <code>optional .orc.proto.Footer footer = 2;</code>
+     */
+    org.apache.hadoop.hive.ql.io.orc.OrcProto.FooterOrBuilder getFooterOrBuilder();
+
+    // optional uint64 fileLength = 3;
+    /**
+     * <code>optional uint64 fileLength = 3;</code>
+     */
+    boolean hasFileLength();
+    /**
+     * <code>optional uint64 fileLength = 3;</code>
+     */
+    long getFileLength();
+
+    // optional uint64 postscriptLength = 4;
+    /**
+     * <code>optional uint64 postscriptLength = 4;</code>
+     */
+    boolean hasPostscriptLength();
+    /**
+     * <code>optional uint64 postscriptLength = 4;</code>
+     */
+    long getPostscriptLength();
+  }
+  /**
+   * Protobuf type {@code orc.proto.FileTail}
+   *
+   * <pre>
+   * This gets serialized as part of OrcSplit, also used by footer cache.
+   * </pre>
+   */
+  public static final class FileTail extends
+      com.google.protobuf.GeneratedMessage
+      implements FileTailOrBuilder {
+    // Use FileTail.newBuilder() to construct.
+    private FileTail(com.google.protobuf.GeneratedMessage.Builder<?> builder) {
+      super(builder);
+      this.unknownFields = builder.getUnknownFields();
+    }
+    private FileTail(boolean noInit) { this.unknownFields = com.google.protobuf.UnknownFieldSet.getDefaultInstance(); }
+
+    private static final FileTail defaultInstance;
+    public static FileTail getDefaultInstance() {
+      return defaultInstance;
+    }
+
+    public FileTail getDefaultInstanceForType() {
+      return defaultInstance;
+    }
+
+    private final com.google.protobuf.UnknownFieldSet unknownFields;
+    @java.lang.Override
+    public final com.google.protobuf.UnknownFieldSet
+        getUnknownFields() {
+      return this.unknownFields;
+    }
+    private FileTail(
+        com.google.protobuf.CodedInputStream input,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      initFields();
+      int mutable_bitField0_ = 0;
+      com.google.protobuf.UnknownFieldSet.Builder unknownFields =
+          com.google.protobuf.UnknownFieldSet.newBuilder();
+      try {
+        boolean done = false;
+        while (!done) {
+          int tag = input.readTag();
+          switch (tag) {
+            case 0:
+              done = true;
+              break;
+            default: {
+              if (!parseUnknownField(input, unknownFields,
+                                     extensionRegistry, tag)) {
+                done = true;
+              }
+              break;
+            }
+            case 10: {
+              org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript.Builder subBuilder = null;
+              if (((bitField0_ & 0x00000001) == 0x00000001)) {
+                subBuilder = postscript_.toBuilder();
+              }
+              postscript_ = input.readMessage(org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript.PARSER, extensionRegistry);
+              if (subBuilder != null) {
+                subBuilder.mergeFrom(postscript_);
+                postscript_ = subBuilder.buildPartial();
+              }
+              bitField0_ |= 0x00000001;
+              break;
+            }
+            case 18: {
+              org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer.Builder subBuilder = null;
+              if (((bitField0_ & 0x00000002) == 0x00000002)) {
+                subBuilder = footer_.toBuilder();
+              }
+              footer_ = input.readMessage(org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer.PARSER, extensionRegistry);
+              if (subBuilder != null) {
+                subBuilder.mergeFrom(footer_);
+                footer_ = subBuilder.buildPartial();
+              }
+              bitField0_ |= 0x00000002;
+              break;
+            }
+            case 24: {
+              bitField0_ |= 0x00000004;
+              fileLength_ = input.readUInt64();
+              break;
+            }
+            case 32: {
+              bitField0_ |= 0x00000008;
+              postscriptLength_ = input.readUInt64();
+              break;
+            }
+          }
+        }
+      } catch (com.google.protobuf.InvalidProtocolBufferException e) {
+        throw e.setUnfinishedMessage(this);
+      } catch (java.io.IOException e) {
+        throw new com.google.protobuf.InvalidProtocolBufferException(
+            e.getMessage()).setUnfinishedMessage(this);
+      } finally {
+        this.unknownFields = unknownFields.build();
+        makeExtensionsImmutable();
+      }
+    }
+    public static final com.google.protobuf.Descriptors.Descriptor
+        getDescriptor() {
+      return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_orc_proto_FileTail_descriptor;
+    }
+
+    protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
+        internalGetFieldAccessorTable() {
+      return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_orc_proto_FileTail_fieldAccessorTable
+          .ensureFieldAccessorsInitialized(
+              org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail.class, org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail.Builder.class);
+    }
+
+    public static com.google.protobuf.Parser<FileTail> PARSER =
+        new com.google.protobuf.AbstractParser<FileTail>() {
+      public FileTail parsePartialFrom(
+          com.google.protobuf.CodedInputStream input,
+          com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+          throws com.google.protobuf.InvalidProtocolBufferException {
+        return new FileTail(input, extensionRegistry);
+      }
+    };
+
+    @java.lang.Override
+    public com.google.protobuf.Parser<FileTail> getParserForType() {
+      return PARSER;
+    }
+
+    private int bitField0_;
+    // optional .orc.proto.PostScript postscript = 1;
+    public static final int POSTSCRIPT_FIELD_NUMBER = 1;
+    private org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript postscript_;
+    /**
+     * <code>optional .orc.proto.PostScript postscript = 1;</code>
+     */
+    public boolean hasPostscript() {
+      return ((bitField0_ & 0x00000001) == 0x00000001);
+    }
+    /**
+     * <code>optional .orc.proto.PostScript postscript = 1;</code>
+     */
+    public org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript getPostscript() {
+      return postscript_;
+    }
+    /**
+     * <code>optional .orc.proto.PostScript postscript = 1;</code>
+     */
+    public org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScriptOrBuilder getPostscriptOrBuilder() {
+      return postscript_;
+    }
+
+    // optional .orc.proto.Footer footer = 2;
+    public static final int FOOTER_FIELD_NUMBER = 2;
+    private org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer footer_;
+    /**
+     * <code>optional .orc.proto.Footer footer = 2;</code>
+     */
+    public boolean hasFooter() {
+      return ((bitField0_ & 0x00000002) == 0x00000002);
+    }
+    /**
+     * <code>optional .orc.proto.Footer footer = 2;</code>
+     */
+    public org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer getFooter() {
+      return footer_;
+    }
+    /**
+     * <code>optional .orc.proto.Footer footer = 2;</code>
+     */
+    public org.apache.hadoop.hive.ql.io.orc.OrcProto.FooterOrBuilder getFooterOrBuilder() {
+      return footer_;
+    }
+
+    // optional uint64 fileLength = 3;
+    public static final int FILELENGTH_FIELD_NUMBER = 3;
+    private long fileLength_;
+    /**
+     * <code>optional uint64 fileLength = 3;</code>
+     */
+    public boolean hasFileLength() {
+      return ((bitField0_ & 0x00000004) == 0x00000004);
+    }
+    /**
+     * <code>optional uint64 fileLength = 3;</code>
+     */
+    public long getFileLength() {
+      return fileLength_;
+    }
+
+    // optional uint64 postscriptLength = 4;
+    public static final int POSTSCRIPTLENGTH_FIELD_NUMBER = 4;
+    private long postscriptLength_;
+    /**
+     * <code>optional uint64 postscriptLength = 4;</code>
+     */
+    public boolean hasPostscriptLength() {
+      return ((bitField0_ & 0x00000008) == 0x00000008);
+    }
+    /**
+     * <code>optional uint64 postscriptLength = 4;</code>
+     */
+    public long getPostscriptLength() {
+      return postscriptLength_;
+    }
+
+    private void initFields() {
+      postscript_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript.getDefaultInstance();
+      footer_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer.getDefaultInstance();
+      fileLength_ = 0L;
+      postscriptLength_ = 0L;
+    }
+    private byte memoizedIsInitialized = -1;
+    public final boolean isInitialized() {
+      byte isInitialized = memoizedIsInitialized;
+      if (isInitialized != -1) return isInitialized == 1;
+
+      memoizedIsInitialized = 1;
+      return true;
+    }
+
+    public void writeTo(com.google.protobuf.CodedOutputStream output)
+                        throws java.io.IOException {
+      getSerializedSize();
+      if (((bitField0_ & 0x00000001) == 0x00000001)) {
+        output.writeMessage(1, postscript_);
+      }
+      if (((bitField0_ & 0x00000002) == 0x00000002)) {
+        output.writeMessage(2, footer_);
+      }
+      if (((bitField0_ & 0x00000004) == 0x00000004)) {
+        output.writeUInt64(3, fileLength_);
+      }
+      if (((bitField0_ & 0x00000008) == 0x00000008)) {
+        output.writeUInt64(4, postscriptLength_);
+      }
+      getUnknownFields().writeTo(output);
+    }
+
+    private int memoizedSerializedSize = -1;
+    public int getSerializedSize() {
+      int size = memoizedSerializedSize;
+      if (size != -1) return size;
+
+      size = 0;
+      if (((bitField0_ & 0x00000001) == 0x00000001)) {
+        size += com.google.protobuf.CodedOutputStream
+          .computeMessageSize(1, postscript_);
+      }
+      if (((bitField0_ & 0x00000002) == 0x00000002)) {
+        size += com.google.protobuf.CodedOutputStream
+          .computeMessageSize(2, footer_);
+      }
+      if (((bitField0_ & 0x00000004) == 0x00000004)) {
+        size += com.google.protobuf.CodedOutputStream
+          .computeUInt64Size(3, fileLength_);
+      }
+      if (((bitField0_ & 0x00000008) == 0x00000008)) {
+        size += com.google.protobuf.CodedOutputStream
+          .computeUInt64Size(4, postscriptLength_);
+      }
+      size += getUnknownFields().getSerializedSize();
+      memoizedSerializedSize = size;
+      return size;
+    }
+
+    private static final long serialVersionUID = 0L;
+    @java.lang.Override
+    protected java.lang.Object writeReplace()
+        throws java.io.ObjectStreamException {
+      return super.writeReplace();
+    }
+
+    public static org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail parseFrom(
+        com.google.protobuf.ByteString data)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      return PARSER.parseFrom(data);
+    }
+    public static org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail parseFrom(
+        com.google.protobuf.ByteString data,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      return PARSER.parseFrom(data, extensionRegistry);
+    }
+    public static org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail parseFrom(byte[] data)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      return PARSER.parseFrom(data);
+    }
+    public static org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail parseFrom(
+        byte[] data,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      return PARSER.parseFrom(data, extensionRegistry);
+    }
+    public static org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail parseFrom(java.io.InputStream input)
+        throws java.io.IOException {
+      return PARSER.parseFrom(input);
+    }
+    public static org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail parseFrom(
+        java.io.InputStream input,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws java.io.IOException {
+      return PARSER.parseFrom(input, extensionRegistry);
+    }
+    public static org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail parseDelimitedFrom(java.io.InputStream input)
+        throws java.io.IOException {
+      return PARSER.parseDelimitedFrom(input);
+    }
+    public static org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail parseDelimitedFrom(
+        java.io.InputStream input,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws java.io.IOException {
+      return PARSER.parseDelimitedFrom(input, extensionRegistry);
+    }
+    public static org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail parseFrom(
+        com.google.protobuf.CodedInputStream input)
+        throws java.io.IOException {
+      return PARSER.parseFrom(input);
+    }
+    public static org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail parseFrom(
+        com.google.protobuf.CodedInputStream input,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws java.io.IOException {
+      return PARSER.parseFrom(input, extensionRegistry);
+    }
+
+    public static Builder newBuilder() { return Builder.create(); }
+    public Builder newBuilderForType() { return newBuilder(); }
+    public static Builder newBuilder(org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail prototype) {
+      return newBuilder().mergeFrom(prototype);
+    }
+    public Builder toBuilder() { return newBuilder(this); }
+
+    @java.lang.Override
+    protected Builder newBuilderForType(
+        com.google.protobuf.GeneratedMessage.BuilderParent parent) {
+      Builder builder = new Builder(parent);
+      return builder;
+    }
+    /**
+     * Protobuf type {@code orc.proto.FileTail}
+     *
+     * <pre>
+     * This gets serialized as part of OrcSplit, also used by footer cache.
+     * </pre>
+     */
+    public static final class Builder extends
+        com.google.protobuf.GeneratedMessage.Builder<Builder>
+       implements org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTailOrBuilder {
+      public static final com.google.protobuf.Descriptors.Descriptor
+          getDescriptor() {
+        return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_orc_proto_FileTail_descriptor;
+      }
+
+      protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
+          internalGetFieldAccessorTable() {
+        return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_orc_proto_FileTail_fieldAccessorTable
+            .ensureFieldAccessorsInitialized(
+                org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail.class, org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail.Builder.class);
+      }
+
+      // Construct using org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail.newBuilder()
+      private Builder() {
+        maybeForceBuilderInitialization();
+      }
+
+      private Builder(
+          com.google.protobuf.GeneratedMessage.BuilderParent parent) {
+        super(parent);
+        maybeForceBuilderInitialization();
+      }
+      private void maybeForceBuilderInitialization() {
+        if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) {
+          getPostscriptFieldBuilder();
+          getFooterFieldBuilder();
+        }
+      }
+      private static Builder create() {
+        return new Builder();
+      }
+
+      public Builder clear() {
+        super.clear();
+        if (postscriptBuilder_ == null) {
+          postscript_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript.getDefaultInstance();
+        } else {
+          postscriptBuilder_.clear();
+        }
+        bitField0_ = (bitField0_ & ~0x00000001);
+        if (footerBuilder_ == null) {
+          footer_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer.getDefaultInstance();
+        } else {
+          footerBuilder_.clear();
+        }
+        bitField0_ = (bitField0_ & ~0x00000002);
+        fileLength_ = 0L;
+        bitField0_ = (bitField0_ & ~0x00000004);
+        postscriptLength_ = 0L;
+        bitField0_ = (bitField0_ & ~0x00000008);
+        return this;
+      }
+
+      public Builder clone() {
+        return create().mergeFrom(buildPartial());
+      }
+
+      public com.google.protobuf.Descriptors.Descriptor
+          getDescriptorForType() {
+        return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_orc_proto_FileTail_descriptor;
+      }
+
+      public org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail getDefaultInstanceForType() {
+        return org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail.getDefaultInstance();
+      }
+
+      public org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail build() {
+        org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail result = buildPartial();
+        if (!result.isInitialized()) {
+          throw newUninitializedMessageException(result);
+        }
+        return result;
+      }
+
+      public org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail buildPartial() {
+        org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail result = new org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail(this);
+        int from_bitField0_ = bitField0_;
+        int to_bitField0_ = 0;
+        if (((from_bitField0_ & 0x00000001) == 0x00000001)) {
+          to_bitField0_ |= 0x00000001;
+        }
+        if (postscriptBuilder_ == null) {
+          result.postscript_ = postscript_;
+        } else {
+          result.postscript_ = postscriptBuilder_.build();
+        }
+        if (((from_bitField0_ & 0x00000002) == 0x00000002)) {
+          to_bitField0_ |= 0x00000002;
+        }
+        if (footerBuilder_ == null) {
+          result.footer_ = footer_;
+        } else {
+          result.footer_ = footerBuilder_.build();
+        }
+        if (((from_bitField0_ & 0x00000004) == 0x00000004)) {
+          to_bitField0_ |= 0x00000004;
+        }
+        result.fileLength_ = fileLength_;
+        if (((from_bitField0_ & 0x00000008) == 0x00000008)) {
+          to_bitField0_ |= 0x00000008;
+        }
+        result.postscriptLength_ = postscriptLength_;
+        result.bitField0_ = to_bitField0_;
+        onBuilt();
+        return result;
+      }
+
+      public Builder mergeFrom(com.google.protobuf.Message other) {
+        if (other instanceof org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail) {
+          return mergeFrom((org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail)other);
+        } else {
+          super.mergeFrom(other);
+          return this;
+        }
+      }
+
+      public Builder mergeFrom(org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail other) {
+        if (other == org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail.getDefaultInstance()) return this;
+        if (other.hasPostscript()) {
+          mergePostscript(other.getPostscript());
+        }
+        if (other.hasFooter()) {
+          mergeFooter(other.getFooter());
+        }
+        if (other.hasFileLength()) {
+          setFileLength(other.getFileLength());
+        }
+        if (other.hasPostscriptLength()) {
+          setPostscriptLength(other.getPostscriptLength());
+        }
+        this.mergeUnknownFields(other.getUnknownFields());
+        return this;
+      }
+
+      public final boolean isInitialized() {
+        return true;
+      }
+
+      public Builder mergeFrom(
+          com.google.protobuf.CodedInputStream input,
+          com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+          throws java.io.IOException {
+        org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail parsedMessage = null;
+        try {
+          parsedMessage = PARSER.parsePartialFrom(input, extensionRegistry);
+        } catch (com.google.protobuf.InvalidProtocolBufferException e) {
+          parsedMessage = (org.apache.hadoop.hive.ql.io.orc.OrcProto.FileTail) e.getUnfinishedMessage();
+          throw e;
+        } finally {
+          if (parsedMessage != null) {
+            mergeFrom(parsedMessage);
+          }
+        }
+        return this;
+      }
+      private int bitField0_;
+
+      // optional .orc.proto.PostScript postscript = 1;
+      private org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript postscript_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript.getDefaultInstance();
+      private com.google.protobuf.SingleFieldBuilder<
+          org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript, org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript.Builder, org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScriptOrBuilder> postscriptBuilder_;
+      /**
+       * <code>optional .orc.proto.PostScript postscript = 1;</code>
+       */
+      public boolean hasPostscript() {
+        return ((bitField0_ & 0x00000001) == 0x00000001);
+      }
+      /**
+       * <code>optional .orc.proto.PostScript postscript = 1;</code>
+       */
+      public org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript getPostscript() {
+        if (postscriptBuilder_ == null) {
+          return postscript_;
+        } else {
+          return postscriptBuilder_.getMessage();
+        }
+      }
+      /**
+       * <code>optional .orc.proto.PostScript postscript = 1;</code>
+       */
+      public Builder setPostscript(org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript value) {
+        if (postscriptBuilder_ == null) {
+          if (value == null) {
+            throw new NullPointerException();
+          }
+          postscript_ = value;
+          onChanged();
+        } else {
+          postscriptBuilder_.setMessage(value);
+        }
+        bitField0_ |= 0x00000001;
+        return this;
+      }
+      /**
+       * <code>optional .orc.proto.PostScript postscript = 1;</code>
+       */
+      public Builder setPostscript(
+          org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript.Builder builderForValue) {
+        if (postscriptBuilder_ == null) {
+          postscript_ = builderForValue.build();
+          onChanged();
+        } else {
+          postscriptBuilder_.setMessage(builderForValue.build());
+        }
+        bitField0_ |= 0x00000001;
+        return this;
+      }
+      /**
+       * <code>optional .orc.proto.PostScript postscript = 1;</code>
+       */
+      public Builder mergePostscript(org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript value) {
+        if (postscriptBuilder_ == null) {
+          if (((bitField0_ & 0x00000001) == 0x00000001) &&
+              postscript_ != org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript.getDefaultInstance()) {
+            postscript_ =
+              org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript.newBuilder(postscript_).mergeFrom(value).buildPartial();
+          } else {
+            postscript_ = value;
+          }
+          onChanged();
+        } else {
+          postscriptBuilder_.mergeFrom(value);
+        }
+        bitField0_ |= 0x00000001;
+        return this;
+      }
+      /**
+       * <code>optional .orc.proto.PostScript postscript = 1;</code>
+       */
+      public Builder clearPostscript() {
+        if (postscriptBuilder_ == null) {
+          postscript_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript.getDefaultInstance();
+          onChanged();
+        } else {
+          postscriptBuilder_.clear();
+        }
+        bitField0_ = (bitField0_ & ~0x00000001);
+        return this;
+      }
+      /**
+       * <code>optional .orc.proto.PostScript postscript = 1;</code>
+       */
+      public org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript.Builder getPostscriptBuilder() {
+        bitField0_ |= 0x00000001;
+        onChanged();
+        return getPostscriptFieldBuilder().getBuilder();
+      }
+      /**
+       * <code>optional .orc.proto.PostScript postscript = 1;</code>
+       */
+      public org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScriptOrBuilder getPostscriptOrBuilder() {
+        if (postscriptBuilder_ != null) {
+          return postscriptBuilder_.getMessageOrBuilder();
+        } else {
+          return postscript_;
+        }
+      }
+      /**
+       * <code>optional .orc.proto.PostScript postscript = 1;</code>
+       */
+      private com.google.protobuf.SingleFieldBuilder<
+          org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript, org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript.Builder, org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScriptOrBuilder> 
+          getPostscriptFieldBuilder() {
+        if (postscriptBuilder_ == null) {
+          postscriptBuilder_ = new com.google.protobuf.SingleFieldBuilder<
+              org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript, org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript.Builder, org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScriptOrBuilder>(
+                  postscript_,
+                  getParentForChildren(),
+                  isClean());
+          postscript_ = null;
+        }
+        return postscriptBuilder_;
+      }
+
+      // optional .orc.proto.Footer footer = 2;
+      private org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer footer_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer.getDefaultInstance();
+      private com.google.protobuf.SingleFieldBuilder<
+          org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer, org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer.Builder, org.apache.hadoop.hive.ql.io.orc.OrcProto.FooterOrBuilder> footerBuilder_;
+      /**
+       * <code>optional .orc.proto.Footer footer = 2;</code>
+       */
+      public boolean hasFooter() {
+        return ((bitField0_ & 0x00000002) == 0x00000002);
+      }
+      /**
+       * <code>optional .orc.proto.Footer footer = 2;</code>
+       */
+      public org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer getFooter() {
+        if (footerBuilder_ == null) {
+          return footer_;
+        } else {
+          return footerBuilder_.getMessage();
+        }
+      }
+      /**
+       * <code>optional .orc.proto.Footer footer = 2;</code>
+       */
+      public Builder setFooter(org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer value) {
+        if (footerBuilder_ == null) {
+          if (value == null) {
+            throw new NullPointerException();
+          }
+          footer_ = value;
+          onChanged();
+        } else {
+          footerBuilder_.setMessage(value);
+        }
+        bitField0_ |= 0x00000002;
+        return this;
+      }
+      /**
+       * <code>optional .orc.proto.Footer footer = 2;</code>
+       */
+      public Builder setFooter(
+          org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer.Builder builderForValue) {
+        if (footerBuilder_ == null) {
+          footer_ = builderForValue.build();
+          onChanged();
+        } else {
+          footerBuilder_.setMessage(builderForValue.build());
+        }
+        bitField0_ |= 0x00000002;
+        return this;
+      }
+      /**
+       * <code>optional .orc.proto.Footer footer = 2;</code>
+       */
+      public Builder mergeFooter(org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer value) {
+        if (footerBuilder_ == null) {
+          if (((bitField0_ & 0x00000002) == 0x00000002) &&
+              footer_ != org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer.getDefaultInstance()) {
+            footer_ =
+              org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer.newBuilder(footer_).mergeFrom(value).buildPartial();
+          } else {
+            footer_ = value;
+          }
+          onChanged();
+        } else {
+          footerBuilder_.mergeFrom(value);
+        }
+        bitField0_ |= 0x00000002;
+        return this;
+      }
+      /**
+       * <code>optional .orc.proto.Footer footer = 2;</code>
+       */
+      public Builder clearFooter() {
+        if (footerBuilder_ == null) {
+          footer_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer.getDefaultInstance();
+          onChanged();
+        } else {
+          footerBuilder_.clear();
+        }
+        bitField0_ = (bitField0_ & ~0x00000002);
+        return this;
+      }
+      /**
+       * <code>optional .orc.proto.Footer footer = 2;</code>
+       */
+      public org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer.Builder getFooterBuilder() {
+        bitField0_ |= 0x00000002;
+        onChanged();
+        return getFooterFieldBuilder().getBuilder();
+      }
+      /**
+       * <code>optional .orc.proto.Footer footer = 2;</code>
+       */
+      public org.apache.hadoop.hive.ql.io.orc.OrcProto.FooterOrBuilder getFooterOrBuilder() {
+        if (footerBuilder_ != null) {
+          return footerBuilder_.getMessageOrBuilder();
+        } else {
+          return footer_;
+        }
+      }
+      /**
+       * <code>optional .orc.proto.Footer footer = 2;</code>
+       */
+      private com.google.protobuf.SingleFieldBuilder<
+          org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer, org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer.Builder, org.apache.hadoop.hive.ql.io.orc.OrcProto.FooterOrBuilder> 
+          getFooterFieldBuilder() {
+        if (footerBuilder_ == null) {
+          footerBuilder_ = new com.google.protobuf.SingleFieldBuilder<
+              org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer, org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer.Builder, org.apache.hadoop.hive.ql.io.orc.OrcProto.FooterOrBuilder>(
+                  footer_,
+                  getParentForChildren(),
+                  isClean());
+          footer_ = null;
+        }
+        return footerBuilder_;
+      }
+
+      // optional uint64 fileLength = 3;
+      private long fileLength_ ;
+      /**
+       * <code>optional uint64 fileLength = 3;</code>
+       */
+      public boolean hasFileLength() {
+        return ((bitField0_ & 0x00000004) == 0x00000004);
+      }
+      /**
+       * <code>optional uint64 fileLength = 3;</code>
+       */
+      public long getFileLength() {
+        return fileLength_;
+      }
+      /**
+       * <code>optional uint64 fileLength = 3;</code>
+       */
+      public Builder setFileLength(long value) {
+        bitField0_ |= 0x00000004;
+        fileLength_ = value;
+        onChanged();
+        return this;
+      }
+      /**
+       * <code>optional uint64 fileLength = 3;</code>
+       */
+      public Builder clearFileLength() {
+        bitField0_ = (bitField0_ & ~0x00000004);
+        fileLength_ = 0L;
+        onChanged();
+        return this;
+      }
+
+      // optional uint64 postscriptLength = 4;
+      private long postscriptLength_ ;
+      /**
+       * <code>optional uint64 postscriptLength = 4;</code>
+       */
+      public boolean hasPostscriptLength() {
+        return ((bitField0_ & 0x00000008) == 0x00000008);
+      }
+      /**
+       * <code>optional uint64 postscriptLength = 4;</code>
+       */
+      public long getPostscriptLength() {
+        return postscriptLength_;
+      }
+      /**
+       * <code>optional uint64 postscriptLength = 4;</code>
+       */
+      public Builder setPostscriptLength(long value) {
+        bitField0_ |= 0x00000008;
+        postscriptLength_ = value;
+        onChanged();
+        return this;
+      }
+      /**
+       * <code>optional uint64 postscriptLength = 4;</code>
+       */
+      public Builder clearPostscriptLength() {
+        bitField0_ = (bitField0_ & ~0x00000008);
+        postscriptLength_ = 0L;
+        onChanged();
+        return this;
+      }
+
+      // @@protoc_insertion_point(builder_scope:orc.proto.FileTail)
+    }
+
+    static {
+      defaultInstance = new FileTail(true);
+      defaultInstance.initFields();
+    }
+
+    // @@protoc_insertion_point(class_scope:orc.proto.FileTail)
+  }
+
   private static com.google.protobuf.Descriptors.Descriptor
     internal_static_orc_proto_IntegerStatistics_descriptor;
   private static
@@ -19055,6 +19924,11 @@ public final class OrcProto {
   private static
     com.google.protobuf.GeneratedMessage.FieldAccessorTable
       internal_static_orc_proto_PostScript_fieldAccessorTable;
+  private static com.google.protobuf.Descriptors.Descriptor
+    internal_static_orc_proto_FileTail_descriptor;
+  private static
+    com.google.protobuf.GeneratedMessage.FieldAccessorTable
+      internal_static_orc_proto_FileTail_fieldAccessorTable;
 
   public static com.google.protobuf.Descriptors.FileDescriptor
       getDescriptor() {
@@ -19135,10 +20009,13 @@ public final class OrcProto {
       "ression\030\002 \001(\0162\032.orc.proto.CompressionKin" +
       "d\022\034\n\024compressionBlockSize\030\003 \001(\004\022\023\n\007versi",
       "on\030\004 \003(\rB\002\020\001\022\026\n\016metadataLength\030\005 \001(\004\022\025\n\r" +
-      "writerVersion\030\006 \001(\r\022\016\n\005magic\030\300> \001(\t*:\n\017C" +
-      "ompressionKind\022\010\n\004NONE\020\000\022\010\n\004ZLIB\020\001\022\n\n\006SN" +
-      "APPY\020\002\022\007\n\003LZO\020\003B\"\n org.apache.hadoop.hiv" +
-      "e.ql.io.orc"
+      "writerVersion\030\006 \001(\r\022\016\n\005magic\030\300> \001(\t\"\206\001\n\010" +
+      "FileTail\022)\n\npostscript\030\001 \001(\0132\025.orc.proto" +
+      ".PostScript\022!\n\006footer\030\002 \001(\0132\021.orc.proto." +
+      "Footer\022\022\n\nfileLength\030\003 \001(\004\022\030\n\020postscript" +
+      "Length\030\004 \001(\004*:\n\017CompressionKind\022\010\n\004NONE\020" +
+      "\000\022\010\n\004ZLIB\020\001\022\n\n\006SNAPPY\020\002\022\007\n\003LZO\020\003B\"\n org." +
+      "apache.hadoop.hive.ql.io.orc"
     };
     com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner =
       new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() {
@@ -19283,6 +20160,12 @@ public final class OrcProto {
             com.google.protobuf.GeneratedMessage.FieldAccessorTable(
               internal_static_orc_proto_PostScript_descriptor,
               new java.lang.String[] { "FooterLength", "Compression", "CompressionBlockSize", "Version", "MetadataLength", "WriterVersion", "Magic", });
+          internal_static_orc_proto_FileTail_descriptor =
+            getDescriptor().getMessageTypes().get(23);
+          internal_static_orc_proto_FileTail_fieldAccessorTable = new
+            com.google.protobuf.GeneratedMessage.FieldAccessorTable(
+              internal_static_orc_proto_FileTail_descriptor,
+              new java.lang.String[] { "Postscript", "Footer", "FileLength", "PostscriptLength", });
           return null;
         }
       };

http://git-wip-us.apache.org/repos/asf/hive/blob/89fa0a1d/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
index 3ca0a6e..906eb6b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
@@ -194,14 +194,14 @@ public final class OrcFile {
   public static class ReaderOptions {
     private final Configuration conf;
     private FileSystem filesystem;
-    private ReaderImpl.FileMetaInfo fileMetaInfo;
     private long maxLength = Long.MAX_VALUE;
+    private OrcTail orcTail;
 
     public ReaderOptions(Configuration conf) {
       this.conf = conf;
     }
-    ReaderOptions fileMetaInfo(ReaderImpl.FileMetaInfo info) {
-      fileMetaInfo = info;
+    ReaderOptions orcTail(OrcTail orcTail) {
+      this.orcTail = orcTail;
       return this;
     }
 
@@ -223,8 +223,8 @@ public final class OrcFile {
       return filesystem;
     }
 
-    ReaderImpl.FileMetaInfo getFileMetaInfo() {
-      return fileMetaInfo;
+    OrcTail getOrcTail() {
+      return orcTail;
     }
 
     long getMaxLength() {

http://git-wip-us.apache.org/repos/asf/hive/blob/89fa0a1d/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 64abe91..35469d1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -78,10 +78,12 @@ import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.Reporter;
 import org.apache.hadoop.util.StringUtils;
 
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.cache.Cache;
 import com.google.common.cache.CacheBuilder;
 import com.google.common.collect.Lists;
 import com.google.common.util.concurrent.ThreadFactoryBuilder;
+
 /**
  * A MapReduce/Hive input format for ORC files.
  * <p>
@@ -127,6 +129,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
   private static final long DEFAULT_MIN_SPLIT_SIZE = 16 * 1024 * 1024;
   private static final long DEFAULT_MAX_SPLIT_SIZE = 256 * 1024 * 1024;
   private static final int DEFAULT_ETL_FILE_THRESHOLD = 100;
+  private static final int DEFAULT_CACHE_INITIAL_CAPACITY = 1024;
 
   private static final PerfLogger perfLogger = PerfLogger.getPerfLogger();
   private static final String CLASS_NAME = ReaderImpl.class.getName();
@@ -430,7 +433,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
    */
   static class Context {
     private final Configuration conf;
-    private static Cache<Path, FileInfo> footerCache;
+    private static Cache<Path, OrcTail> footerCache;
     private static ExecutorService threadPool = null;
     private final int numBuckets;
     private final long maxSize;
@@ -467,6 +470,8 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
           ConfVars.HIVE_ORC_CACHE_STRIPE_DETAILS_SIZE);
       int numThreads = HiveConf.getIntVar(conf,
           ConfVars.HIVE_ORC_COMPUTE_SPLITS_NUM_THREADS);
+      boolean useSoftReference = HiveConf.getBoolVar(conf,
+          ConfVars.HIVE_ORC_CACHE_USE_SOFT_REFERENCES);
 
       cacheStripeDetails = (cacheStripeDetailsSize > 0);
 
@@ -480,12 +485,14 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
         }
 
         if (footerCache == null && cacheStripeDetails) {
-          footerCache = CacheBuilder.newBuilder()
+          CacheBuilder builder = CacheBuilder.newBuilder()
+              .initialCapacity(DEFAULT_CACHE_INITIAL_CAPACITY)
               .concurrencyLevel(numThreads)
-              .initialCapacity(cacheStripeDetailsSize)
-              .maximumSize(cacheStripeDetailsSize)
-              .softValues()
-              .build();
+              .maximumSize(cacheStripeDetailsSize);
+          if (useSoftReference) {
+            builder = builder.softValues();
+          }
+          footerCache = builder.build();
         }
       }
       String value = conf.get(ValidTxnList.VALID_TXNS_KEY,
@@ -502,13 +509,13 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
     private final Context context;
     private final FileSystem fs;
     private final FileStatus file;
-    private final FileInfo fileInfo;
+    private final OrcTail orcTail;
     private final boolean isOriginal;
     private final List<DeltaMetaData> deltas;
     private final boolean hasBase;
 
     SplitInfo(Context context, FileSystem fs,
-        FileStatus file, FileInfo fileInfo,
+        FileStatus file, OrcTail orcTail,
         boolean isOriginal,
         List<DeltaMetaData> deltas,
         boolean hasBase, Path dir, boolean[] covered) throws IOException {
@@ -516,7 +523,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
       this.context = context;
       this.fs = fs;
       this.file = file;
-      this.fileInfo = fileInfo;
+      this.orcTail = orcTail;
       this.isOriginal = isOriginal;
       this.deltas = deltas;
       this.hasBase = hasBase;
@@ -547,26 +554,26 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
       this.covered = covered;
     }
 
-    private FileInfo verifyCachedFileInfo(FileStatus file) {
-      FileInfo fileInfo = Context.footerCache.getIfPresent(file.getPath());
-      if (fileInfo != null) {
+    private OrcTail verifyCachedOrcTail(FileStatus file) {
+      OrcTail orcTail = Context.footerCache.getIfPresent(file.getPath());
+      if (orcTail != null) {
         if (LOG.isDebugEnabled()) {
           LOG.debug("Info cached for path: " + file.getPath());
         }
-        if (fileInfo.modificationTime == file.getModificationTime() &&
-            fileInfo.size == file.getLen()) {
+        if (orcTail.getFileModificationTime() == file.getModificationTime() &&
+            orcTail.getFileLength() == file.getLen()) {
           // Cached copy is valid
           context.cacheHitCounter.incrementAndGet();
-          return fileInfo;
+          return orcTail;
         } else {
           // Invalidate
           Context.footerCache.invalidate(file.getPath());
           if (LOG.isDebugEnabled()) {
             LOG.debug("Meta-Info for : " + file.getPath() +
                 " changed. CachedModificationTime: "
-                + fileInfo.modificationTime + ", CurrentModificationTime: "
+                + orcTail.getFileModificationTime() + ", CurrentModificationTime: "
                 + file.getModificationTime()
-                + ", CachedLength: " + fileInfo.size + ", CurrentLength: " +
+                + ", CachedLength: " + orcTail.getFileLength() + ", CurrentLength: " +
                 file.getLen());
           }
         }
@@ -582,13 +589,13 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
     public List<SplitInfo> getSplits() throws IOException {
       List<SplitInfo> result = Lists.newArrayList();
       for (FileStatus file : files) {
-        FileInfo info = null;
+        OrcTail orcTail = null;
         if (context.cacheStripeDetails) {
-          info = verifyCachedFileInfo(file);
+          orcTail = verifyCachedOrcTail(file);
         }
         // ignore files of 0 length
         if (file.getLen() > 0) {
-          result.add(new SplitInfo(context, fs, file, info, isOriginal, deltas, true, dir, covered));
+          result.add(new SplitInfo(context, fs, file, orcTail, isOriginal, deltas, true, dir, covered));
         }
       }
       return result;
@@ -630,7 +637,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
         for (Map.Entry<Long, BlockLocation> entry : blockOffsets.entrySet()) {
           OrcSplit orcSplit = new OrcSplit(fileStatus.getPath(), entry.getKey(),
                   entry.getValue().getLength(), entry.getValue().getHosts(), null, isOriginal, true,
-                  deltas, -1);
+                  deltas, -1, fileStatus.getLen());
           splits.add(orcSplit);
         }
       }
@@ -671,7 +678,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
       if (!deltas.isEmpty()) {
         for (int b = 0; b < numBuckets; ++b) {
           if (!covered[b]) {
-            splits.add(new OrcSplit(dir, b, 0, new String[0], null, false, false, deltas, -1));
+            splits.add(new OrcSplit(dir, b, 0, new String[0], null, false, false, deltas, -1, -1));
           }
         }
       }
@@ -775,12 +782,11 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
     private final Context context;
     private final FileSystem fs;
     private final FileStatus file;
+    private OrcTail orcTail;
     private final long blockSize;
     private final TreeMap<Long, BlockLocation> locations;
-    private final FileInfo fileInfo;
     private List<StripeInformation> stripes;
-    private ReaderImpl.FileMetaInfo fileMetaInfo;
-    private Metadata metadata;
+    private List<StripeStatistics> stripeStats;
     private List<OrcProto.Type> types;
     private boolean[] includedCols;
     private final boolean isOriginal;
@@ -795,7 +801,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
       this.fs = splitInfo.fs;
       this.file = splitInfo.file;
       this.blockSize = file.getBlockSize();
-      this.fileInfo = splitInfo.fileInfo;
+      this.orcTail = splitInfo.orcTail;
       locations = SHIMS.getLocationsWithOffset(fs, file);
       this.isOriginal = splitInfo.isOriginal;
       this.deltas = splitInfo.deltas;
@@ -839,11 +845,11 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
      * are written with large block sizes.
      * @param offset the start of the split
      * @param length the length of the split
-     * @param fileMetaInfo file metadata from footer and postscript
+     * @param orcTail orc file tail
      * @throws IOException
      */
     OrcSplit createSplit(long offset, long length,
-                     ReaderImpl.FileMetaInfo fileMetaInfo) throws IOException {
+                     OrcTail orcTail) throws IOException {
       String[] hosts;
       Map.Entry<Long, BlockLocation> startEntry = locations.floorEntry(offset);
       BlockLocation start = startEntry.getValue();
@@ -902,8 +908,8 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
       final double splitRatio = (double) length / (double) fileLen;
       final long scaledProjSize = projColsUncompressedSize > 0 ?
           (long) (splitRatio * projColsUncompressedSize) : fileLen;
-      return new OrcSplit(file.getPath(), offset, length, hosts, fileMetaInfo,
-          isOriginal, hasBase, deltas, scaledProjSize);
+      return new OrcSplit(file.getPath(), offset, length, hosts, orcTail,
+          isOriginal, hasBase, deltas, scaledProjSize, fileLen);
     }
 
     /**
@@ -929,7 +935,6 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
             writerVersion != OrcFile.WriterVersion.ORIGINAL) {
           SearchArgument sarg = options.getSearchArgument();
           List<PredicateLeaf> sargLeaves = sarg.getLeaves();
-          List<StripeStatistics> stripeStats = metadata.getStripeStatistics();
           int[] filterColumns = RecordReaderImpl.mapSargColumns(sargLeaves,
               options.getColumnNames(), getRootColumn(isOriginal));
 
@@ -965,7 +970,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
         if (!includeStripe[idx]) {
           // create split for the previous unfinished stripe
           if (currentOffset != -1) {
-            splits.add(createSplit(currentOffset, currentLength, fileMetaInfo));
+            splits.add(createSplit(currentOffset, currentLength, orcTail));
             currentOffset = -1;
           }
           continue;
@@ -975,7 +980,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
         // crossed a block boundary, cut the input split here.
         if (currentOffset != -1 && currentLength > context.minSize &&
             (currentOffset / blockSize != stripe.getOffset() / blockSize)) {
-          splits.add(createSplit(currentOffset, currentLength, fileMetaInfo));
+          splits.add(createSplit(currentOffset, currentLength, orcTail));
           currentOffset = -1;
         }
         // if we aren't building a split, start a new one.
@@ -987,12 +992,12 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
               (stripe.getOffset() + stripe.getLength()) - currentOffset;
         }
         if (currentLength >= context.maxSize) {
-          splits.add(createSplit(currentOffset, currentLength, fileMetaInfo));
+          splits.add(createSplit(currentOffset, currentLength, orcTail));
           currentOffset = -1;
         }
       }
       if (currentOffset != -1) {
-        splits.add(createSplit(currentOffset, currentLength, fileMetaInfo));
+        splits.add(createSplit(currentOffset, currentLength, orcTail));
       }
 
       // add uncovered ACID delta splits
@@ -1001,41 +1006,34 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
     }
 
     private void populateAndCacheStripeDetails() throws IOException {
-      Reader orcReader = OrcFile.createReader(file.getPath(),
-          OrcFile.readerOptions(context.conf).filesystem(fs).maxLength(file.getLen()));
-      if (fileInfo != null) {
-        stripes = fileInfo.stripeInfos;
-        fileMetaInfo = fileInfo.fileMetaInfo;
-        metadata = fileInfo.metadata;
-        types = fileInfo.types;
-        writerVersion = fileInfo.writerVersion;
-        // For multiple runs, in case sendSplitsInFooter changes
-        if (fileMetaInfo == null && context.footerInSplits) {
-          fileInfo.fileMetaInfo = ((ReaderImpl) orcReader).getFileMetaInfo();
-          fileInfo.metadata = orcReader.getMetadata();
-          fileInfo.types = orcReader.getTypes();
-          fileInfo.writerVersion = orcReader.getWriterVersion();
-        }
-      } else {
-        stripes = orcReader.getStripes();
-        metadata = orcReader.getMetadata();
-        types = orcReader.getTypes();
-        writerVersion = orcReader.getWriterVersion();
-        fileMetaInfo = context.footerInSplits ?
-            ((ReaderImpl) orcReader).getFileMetaInfo() : null;
+      if (orcTail == null) {
+        Reader orcReader = OrcFile.createReader(file.getPath(),
+                OrcFile.readerOptions(context.conf)
+                        .filesystem(fs)
+                        .maxLength(file.getLen()));
+        orcTail = new OrcTail(orcReader.getFileTail(), orcReader.getSerializedFileFooter(), file.getModificationTime());
         if (context.cacheStripeDetails) {
-          // Populate into cache.
-          Context.footerCache.put(file.getPath(),
-              new FileInfo(file.getModificationTime(), file.getLen(), stripes,
-                  metadata, types, fileMetaInfo, writerVersion));
+          context.footerCache.put(file.getPath(), orcTail);
         }
       }
+
+      stripes = orcTail.getStripes();
+      stripeStats = orcTail.getStripeStatistics();
+      types = orcTail.getTypes();
+      writerVersion = orcTail.getWriterVersion();
       includedCols = genIncludedColumns(types, context.conf, isOriginal);
-      projColsUncompressedSize = computeProjectionSize(orcReader, includedCols, isOriginal);
+      List<OrcProto.ColumnStatistics> fileColStats = orcTail.getFooter().getStatisticsList();
+      projColsUncompressedSize = computeProjectionSize(types, fileColStats, includedCols,
+              isOriginal);
+      if (!context.footerInSplits) {
+        orcTail = null;
+      }
     }
 
-    private long computeProjectionSize(final Reader orcReader, final boolean[] includedCols,
-        final boolean isOriginal) {
+    private long computeProjectionSize(List<OrcProto.Type> types,
+                                       List<OrcProto.ColumnStatistics> stats,
+                                       boolean[] includedCols,
+                                       boolean isOriginal) {
       final int rootIdx = getRootColumn(isOriginal);
       List<Integer> internalColIds = Lists.newArrayList();
       if (includedCols != null) {
@@ -1045,7 +1043,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
           }
         }
       }
-      return orcReader.getRawDataSizeFromColIndices(internalColIds);
+      return ReaderImpl.getRawDataSizeFromColIndices(internalColIds, types, stats);
     }
 
     private boolean isStripeSatisfyPredicate(StripeStatistics stripeStatistics,
@@ -1149,37 +1147,6 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
     return result.toArray(new InputSplit[result.size()]);
   }
 
-  /**
-   * FileInfo.
-   *
-   * Stores information relevant to split generation for an ORC File.
-   *
-   */
-  private static class FileInfo {
-    long modificationTime;
-    long size;
-    List<StripeInformation> stripeInfos;
-    ReaderImpl.FileMetaInfo fileMetaInfo;
-    Metadata metadata;
-    List<OrcProto.Type> types;
-    private OrcFile.WriterVersion writerVersion;
-
-
-    FileInfo(long modificationTime, long size,
-             List<StripeInformation> stripeInfos,
-             Metadata metadata, List<OrcProto.Type> types,
-             ReaderImpl.FileMetaInfo fileMetaInfo,
-             OrcFile.WriterVersion writerVersion) {
-      this.modificationTime = modificationTime;
-      this.size = size;
-      this.stripeInfos = stripeInfos;
-      this.fileMetaInfo = fileMetaInfo;
-      this.metadata = metadata;
-      this.types = types;
-      this.writerVersion = writerVersion;
-    }
-  }
-
   @SuppressWarnings("unchecked")
   private org.apache.hadoop.mapred.RecordReader<NullWritable, OrcStruct>
     createVectorizedReader(InputSplit split, JobConf conf, Reporter reporter
@@ -1199,13 +1166,17 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
       if (vectorMode) {
         return createVectorizedReader(inputSplit, conf, reporter);
       } else {
+        OrcFile.ReaderOptions readerOptions = OrcFile.readerOptions(conf);
+        if (inputSplit instanceof OrcSplit) {
+          OrcSplit split = (OrcSplit) inputSplit;
+          readerOptions.maxLength(split.getFileLength()).orcTail(split.getOrcTail());
+        }
         return new OrcRecordReader(OrcFile.createReader(
             ((FileSplit) inputSplit).getPath(),
-            OrcFile.readerOptions(conf)), conf, (FileSplit) inputSplit);
+            readerOptions), conf, (FileSplit) inputSplit);
       }
     }
 
-    OrcSplit split = (OrcSplit) inputSplit;
     reporter.setStatus(inputSplit.toString());
 
     Options options = new Options(conf).reporter(reporter);
@@ -1309,7 +1280,12 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
     if (split.hasBase()) {
       bucket = AcidUtils.parseBaseBucketFilename(split.getPath(), conf)
           .getBucket();
-      reader = OrcFile.createReader(path, OrcFile.readerOptions(conf));
+      OrcFile.ReaderOptions readerOptions = OrcFile.readerOptions(conf)
+              .maxLength(split.getFileLength());
+      if (split.hasFooter()) {
+        readerOptions.orcTail(split.getOrcTail());
+      }
+      reader = OrcFile.createReader(path, readerOptions);
     } else {
       bucket = (int) split.getStart();
       reader = null;

http://git-wip-us.apache.org/repos/asf/hive/blob/89fa0a1d/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewSplit.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewSplit.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewSplit.java
index b58c880..77d6eb5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewSplit.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewSplit.java
@@ -34,7 +34,7 @@ import org.apache.hadoop.mapreduce.lib.input.FileSplit;
  *
  */
 public class OrcNewSplit extends FileSplit {
-  private ReaderImpl.FileMetaInfo fileMetaInfo;
+  private OrcTail orcTail;
   private boolean hasFooter;
   private boolean isOriginal;
   private boolean hasBase;
@@ -51,7 +51,7 @@ public class OrcNewSplit extends FileSplit {
   public OrcNewSplit(OrcSplit inner) throws IOException {
     super(inner.getPath(), inner.getStart(), inner.getLength(),
           inner.getLocations());
-    this.fileMetaInfo = inner.getFileMetaInfo();
+    this.orcTail = inner.getOrcTail();
     this.hasFooter = inner.hasFooter();
     this.isOriginal = inner.isOriginal();
     this.hasBase = inner.hasBase();
@@ -72,20 +72,11 @@ public class OrcNewSplit extends FileSplit {
       delta.write(out);
     }
     if (hasFooter) {
-      // serialize FileMetaInfo fields
-      Text.writeString(out, fileMetaInfo.compressionType);
-      WritableUtils.writeVInt(out, fileMetaInfo.bufferSize);
-      WritableUtils.writeVInt(out, fileMetaInfo.metadataSize);
-
-      // serialize FileMetaInfo field footer
-      ByteBuffer footerBuff = fileMetaInfo.footerBuffer;
-      footerBuff.reset();
-
-      // write length of buffer
-      WritableUtils.writeVInt(out, footerBuff.limit() - footerBuff.position());
-      out.write(footerBuff.array(), footerBuff.position(),
-          footerBuff.limit() - footerBuff.position());
-      WritableUtils.writeVInt(out, fileMetaInfo.writerVersion.getId());
+      OrcProto.FileTail fileTail = orcTail.getMinimalFileTail();
+      byte[] tailBuffer = fileTail.toByteArray();
+      int tailLen = tailBuffer.length;
+      WritableUtils.writeVInt(out, tailLen);
+      out.write(tailBuffer);
     }
   }
 
@@ -107,27 +98,14 @@ public class OrcNewSplit extends FileSplit {
       deltas.add(dmd);
     }
     if (hasFooter) {
-      // deserialize FileMetaInfo fields
-      String compressionType = Text.readString(in);
-      int bufferSize = WritableUtils.readVInt(in);
-      int metadataSize = WritableUtils.readVInt(in);
-
-      // deserialize FileMetaInfo field footer
-      int footerBuffSize = WritableUtils.readVInt(in);
-      ByteBuffer footerBuff = ByteBuffer.allocate(footerBuffSize);
-      in.readFully(footerBuff.array(), 0, footerBuffSize);
-      OrcFile.WriterVersion writerVersion =
-          ReaderImpl.getWriterVersion(WritableUtils.readVInt(in));
-
-      fileMetaInfo = new ReaderImpl.FileMetaInfo(compressionType, bufferSize,
-          metadataSize, footerBuff, writerVersion);
+      int tailLen = WritableUtils.readVInt(in);
+      byte[] tailBuffer = new byte[tailLen];
+      in.readFully(tailBuffer);
+      OrcProto.FileTail fileTail = OrcProto.FileTail.parseFrom(tailBuffer);
+      orcTail = new OrcTail(fileTail, null);
     }
   }
 
-  ReaderImpl.FileMetaInfo getFileMetaInfo(){
-    return fileMetaInfo;
-  }
-
   public boolean hasFooter() {
     return hasFooter;
   }
@@ -143,4 +121,8 @@ public class OrcNewSplit extends FileSplit {
   public List<AcidInputFormat.DeltaMetaData> getDeltas() {
     return deltas;
   }
+
+  public OrcTail getOrcTail() {
+    return orcTail;
+  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/89fa0a1d/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java
index 3e74df5..43ebdb4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java
@@ -39,13 +39,14 @@ import org.apache.hadoop.mapred.FileSplit;
  *
  */
 public class OrcSplit extends FileSplit {
-  private ReaderImpl.FileMetaInfo fileMetaInfo;
+  private OrcTail orcTail;
   private boolean hasFooter;
   private boolean isOriginal;
   private boolean hasBase;
   private final List<AcidInputFormat.DeltaMetaData> deltas = new ArrayList<>();
   private OrcFile.WriterVersion writerVersion;
   private long projColsUncompressedSize;
+  private long fileLen;
 
   static final int BASE_FLAG = 4;
   static final int ORIGINAL_FLAG = 2;
@@ -59,15 +60,17 @@ public class OrcSplit extends FileSplit {
   }
 
   public OrcSplit(Path path, long offset, long length, String[] hosts,
-      ReaderImpl.FileMetaInfo fileMetaInfo, boolean isOriginal, boolean hasBase,
-      List<AcidInputFormat.DeltaMetaData> deltas, long projectedDataSize) {
+      OrcTail orcTail, boolean isOriginal, boolean hasBase,
+      List<AcidInputFormat.DeltaMetaData> deltas, long projectedDataSize, long fileLen) {
     super(path, offset, length, hosts);
-    this.fileMetaInfo = fileMetaInfo;
-    hasFooter = this.fileMetaInfo != null;
+    this.orcTail = orcTail;
+    hasFooter = this.orcTail != null;
     this.isOriginal = isOriginal;
     this.hasBase = hasBase;
     this.deltas.addAll(deltas);
     this.projColsUncompressedSize = projectedDataSize;
+    // setting file length to Long.MAX_VALUE will let orc reader read file length from file system
+    this.fileLen = fileLen <= 0 ? Long.MAX_VALUE : fileLen;
   }
 
   @Override
@@ -84,21 +87,13 @@ public class OrcSplit extends FileSplit {
       delta.write(out);
     }
     if (hasFooter) {
-      // serialize FileMetaInfo fields
-      Text.writeString(out, fileMetaInfo.compressionType);
-      WritableUtils.writeVInt(out, fileMetaInfo.bufferSize);
-      WritableUtils.writeVInt(out, fileMetaInfo.metadataSize);
-
-      // serialize FileMetaInfo field footer
-      ByteBuffer footerBuff = fileMetaInfo.footerBuffer;
-      footerBuff.reset();
-
-      // write length of buffer
-      WritableUtils.writeVInt(out, footerBuff.limit() - footerBuff.position());
-      out.write(footerBuff.array(), footerBuff.position(),
-          footerBuff.limit() - footerBuff.position());
-      WritableUtils.writeVInt(out, fileMetaInfo.writerVersion.getId());
+      OrcProto.FileTail fileTail = orcTail.getMinimalFileTail();
+      byte[] tailBuffer = fileTail.toByteArray();
+      int tailLen = tailBuffer.length;
+      WritableUtils.writeVInt(out, tailLen);
+      out.write(tailBuffer);
     }
+    out.writeLong(fileLen);
   }
 
   @Override
@@ -119,25 +114,13 @@ public class OrcSplit extends FileSplit {
       deltas.add(dmd);
     }
     if (hasFooter) {
-      // deserialize FileMetaInfo fields
-      String compressionType = Text.readString(in);
-      int bufferSize = WritableUtils.readVInt(in);
-      int metadataSize = WritableUtils.readVInt(in);
-
-      // deserialize FileMetaInfo field footer
-      int footerBuffSize = WritableUtils.readVInt(in);
-      ByteBuffer footerBuff = ByteBuffer.allocate(footerBuffSize);
-      in.readFully(footerBuff.array(), 0, footerBuffSize);
-      OrcFile.WriterVersion writerVersion =
-          ReaderImpl.getWriterVersion(WritableUtils.readVInt(in));
-
-      fileMetaInfo = new ReaderImpl.FileMetaInfo(compressionType, bufferSize,
-          metadataSize, footerBuff, writerVersion);
+      int tailLen = WritableUtils.readVInt(in);
+      byte[] tailBuffer = new byte[tailLen];
+      in.readFully(tailBuffer);
+      OrcProto.FileTail fileTail = OrcProto.FileTail.parseFrom(tailBuffer);
+      orcTail = new OrcTail(fileTail, null);
     }
-  }
-
-  ReaderImpl.FileMetaInfo getFileMetaInfo(){
-    return fileMetaInfo;
+    fileLen = in.readLong();
   }
 
   public boolean hasFooter() {
@@ -168,4 +151,19 @@ public class OrcSplit extends FileSplit {
   public long getProjectedColumnsUncompressedSize() {
     return projColsUncompressedSize;
   }
+
+  public long getFileLength() {
+    return fileLen;
+  }
+
+  public OrcTail getOrcTail() {
+    return orcTail;
+  }
+
+  @Override
+  public String toString() {
+    return "OrcSplit [" + getPath() + ", start=" + getStart() + ", length=" + getLength()
+            + ", isOriginal=" + isOriginal + ", fileLength=" + fileLen + ", hasFooter=" + hasFooter +
+            ", hasBase=" + hasBase + ", deltas=" + (deltas == null ? 0 : deltas.size()) + "]";
+  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/89fa0a1d/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcTail.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcTail.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcTail.java
new file mode 100644
index 0000000..54ca2f0
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcTail.java
@@ -0,0 +1,137 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.orc;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+
+public final class OrcTail {
+    // postscript + footer - Serialized in OrcSplit
+    private final OrcProto.FileTail fileTail;
+    // serialized representation of metadata, footer and postscript
+    private final ByteBuffer serializedTail;
+    // used to invalidate cache entries
+    private final long fileModificationTime;
+    private OrcProto.Metadata metadata;
+
+    public OrcTail(OrcProto.FileTail fileTail, ByteBuffer serializedTail) {
+        this(fileTail, serializedTail, -1);
+    }
+
+    public OrcTail(OrcProto.FileTail fileTail, ByteBuffer serializedTail, long fileModificationTime) {
+        this.fileTail = fileTail;
+        this.serializedTail = serializedTail;
+        this.fileModificationTime = fileModificationTime;
+        this.metadata = null; // lazily deserialized
+    }
+
+    public ByteBuffer getSerializedTail() {
+        return serializedTail;
+    }
+
+    public long getFileModificationTime() {
+        return fileModificationTime;
+    }
+
+    public OrcProto.Footer getFooter() {
+        return fileTail.getFooter();
+    }
+
+    public OrcProto.PostScript getPostScript() {
+        return fileTail.getPostscript();
+    }
+
+    public long getFileLength() {
+        return fileTail.getFileLength();
+    }
+
+    public OrcFile.WriterVersion getWriterVersion() {
+        OrcProto.PostScript ps = fileTail.getPostscript();
+        return ReaderImpl.getWriterVersion(ps.getWriterVersion());
+    }
+
+    public List<StripeInformation> getStripes() {
+        List<StripeInformation> result = new ArrayList<>();
+        for (OrcProto.StripeInformation stripeProto : fileTail.getFooter().getStripesList()) {
+            result.add(new ReaderImpl.StripeInformationImpl(stripeProto));
+        }
+        return result;
+    }
+
+    public CompressionKind getCompressionKind() {
+        return CompressionKind.valueOf(fileTail.getPostscript().getCompression().name());
+    }
+
+    public CompressionCodec getCompressionCodec() {
+        return WriterImpl.createCodec(getCompressionKind());
+    }
+
+    public int getCompressionBufferSize() {
+        return (int) fileTail.getPostscript().getCompressionBlockSize();
+    }
+
+    public List<StripeStatistics> getStripeStatistics() throws IOException {
+        List<StripeStatistics> result = new ArrayList<>();
+        List<OrcProto.StripeStatistics> ssProto = getStripeStatisticsProto();
+        if (ssProto != null) {
+            for (OrcProto.StripeStatistics ss : ssProto) {
+                result.add(new StripeStatistics(ss.getColStatsList()));
+            }
+        }
+        return result;
+    }
+
+    public List<OrcProto.StripeStatistics> getStripeStatisticsProto() throws IOException {
+        if (getMetadata() == null) {
+            return null;
+        }
+        return getMetadata().getStripeStatsList();
+    }
+
+    public int getMetadataSize() {
+        return (int) getPostScript().getMetadataLength();
+    }
+
+    public List<OrcProto.Type> getTypes() {
+        return getFooter().getTypesList();
+    }
+
+    public OrcProto.FileTail getFileTail() {
+        return fileTail;
+    }
+
+    public OrcProto.FileTail getMinimalFileTail() {
+        OrcProto.FileTail.Builder fileTailBuilder = OrcProto.FileTail.newBuilder(fileTail);
+        OrcProto.Footer.Builder footerBuilder = OrcProto.Footer.newBuilder(fileTail.getFooter());
+        footerBuilder.clearStatistics();
+        fileTailBuilder.setFooter(footerBuilder.build());
+        OrcProto.FileTail result = fileTailBuilder.build();
+        return result;
+    }
+
+    public OrcProto.Metadata getMetadata() throws IOException {
+        if (serializedTail == null) return null;
+        if (metadata == null) {
+            metadata = ReaderImpl.extractMetadata(serializedTail, 0,
+                    (int) fileTail.getPostscript().getMetadataLength(),
+                    getCompressionCodec(), getCompressionBufferSize());
+        }
+        return metadata;
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/89fa0a1d/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java
index 6dbe461..3105738 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java
@@ -146,6 +146,10 @@ public interface Reader {
    */
   OrcFile.WriterVersion getWriterVersion();
 
+  OrcProto.FileTail getFileTail();
+
+  ByteBuffer getSerializedFileFooter();
+
   /**
    * Options for creating a RecordReader.
    */

http://git-wip-us.apache.org/repos/asf/hive/blob/89fa0a1d/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
index d42675c..1b7f6d6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hive.ql.io.orc;
 
 import java.io.IOException;
+import java.io.InputStream;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -30,6 +31,7 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.common.DiskRange;
@@ -72,6 +74,7 @@ public class ReaderImpl implements Reader {
   // will help avoid cpu cycles spend in deserializing at cost of increased
   // memory footprint.
   private final ByteBuffer footerByteBuffer;
+  private final OrcTail tail;
 
   static class StripeInformationImpl
       implements StripeInformation {
@@ -308,29 +311,23 @@ public class ReaderImpl implements Reader {
     this.path = path;
     this.conf = options.getConfiguration();
 
-    FileMetaInfo footerMetaData;
-    if (options.getFileMetaInfo() != null) {
-      footerMetaData = options.getFileMetaInfo();
+    OrcTail orcTail = options.getOrcTail();
+    if (orcTail == null) {
+      tail = extractFileTail(fs, path, options.getMaxLength());
+      options.orcTail(tail);
     } else {
-      footerMetaData = extractMetaInfoFromFooter(fs, path,
-          options.getMaxLength());
+      tail = orcTail;
     }
-    MetaInfoObjExtractor rInfo =
-        new MetaInfoObjExtractor(footerMetaData.compressionType,
-                                 footerMetaData.bufferSize,
-                                 footerMetaData.metadataSize,
-                                 footerMetaData.footerBuffer
-                                 );
-    this.footerByteBuffer = footerMetaData.footerBuffer;
-    this.compressionKind = rInfo.compressionKind;
-    this.codec = rInfo.codec;
-    this.bufferSize = rInfo.bufferSize;
-    this.metadataSize = rInfo.metadataSize;
-    this.metadata = rInfo.metadata;
-    this.footer = rInfo.footer;
-    this.inspector = rInfo.inspector;
-    this.versionList = footerMetaData.versionList;
-    this.writerVersion = footerMetaData.writerVersion;
+    this.footerByteBuffer = tail.getSerializedTail();
+    this.compressionKind = tail.getCompressionKind();
+    this.codec = tail.getCompressionCodec();
+    this.bufferSize = tail.getCompressionBufferSize();
+    this.metadataSize = tail.getMetadataSize();
+    this.versionList = tail.getPostScript().getVersionList();
+    this.footer = tail.getFooter();
+    this.writerVersion = tail.getWriterVersion();
+    this.metadata = tail.getMetadata();
+    this.inspector = OrcStruct.createObjectInspector(0, tail.getTypes());
   }
 
   /**
@@ -347,7 +344,7 @@ public class ReaderImpl implements Reader {
     return OrcFile.WriterVersion.ORIGINAL;
   }
 
-  private static FileMetaInfo extractMetaInfoFromFooter(FileSystem fs,
+  private static OrcTail extractFileTail(FileSystem fs,
                                                         Path path,
                                                         long maxFileLength
                                                         ) throws IOException {
@@ -355,14 +352,20 @@ public class ReaderImpl implements Reader {
     ByteBuffer buffer = null;
     OrcProto.PostScript ps = null;
     OrcFile.WriterVersion writerVersion = null;
+    long modificationTime;
+    OrcProto.FileTail.Builder fileTailBuilder = OrcProto.FileTail.newBuilder();
     try {
       // figure out the size of the file using the option or filesystem
       long size;
       if (maxFileLength == Long.MAX_VALUE) {
-        size = fs.getFileStatus(path).getLen();
+        FileStatus fileStatus = fs.getFileStatus(path);
+        size = fileStatus.getLen();
+        modificationTime = fileStatus.getModificationTime();
       } else {
         size = maxFileLength;
+        modificationTime = -1;
       }
+      fileTailBuilder.setFileLength(size);
 
       //read last bytes into buffer to get PostScript
       int readSize = (int) Math.min(size, DIRECTORY_SIZE_GUESS);
@@ -416,6 +419,8 @@ public class ReaderImpl implements Reader {
         buffer = extraBuf;
         buffer.position(0);
         buffer.limit(footerSize + metadataSize);
+        readSize += extra;
+        psOffset = readSize - 1 - psLen;
       } else {
         //footer is already in the bytes in buffer, just adjust position, length
         buffer.position(psOffset - footerSize - metadataSize);
@@ -424,6 +429,17 @@ public class ReaderImpl implements Reader {
 
       // remember position for later
       buffer.mark();
+      int bufferSize = (int) ps.getCompressionBlockSize();
+      CompressionCodec codec = WriterImpl.createCodec(CompressionKind.valueOf(ps.getCompression().name()));
+      fileTailBuilder.setPostscriptLength(psLen).setPostscript(ps);
+      int footerOffset = psOffset - footerSize;
+      buffer.position(footerOffset);
+      ByteBuffer footerBuffer = buffer.slice();
+      buffer.reset();
+      OrcProto.Footer footer = extractFooter(footerBuffer, 0, footerSize,
+          codec, bufferSize);
+      fileTailBuilder.setFooter(footer);
+
     } finally {
       try {
         file.close();
@@ -431,101 +447,30 @@ public class ReaderImpl implements Reader {
         LOG.error("Failed to close the file after another error", ex);
       }
     }
-
-    return new FileMetaInfo(
-        ps.getCompression().toString(),
-        (int) ps.getCompressionBlockSize(),
-        (int) ps.getMetadataLength(),
-        buffer,
-        ps.getVersionList(),
-        writerVersion
-        );
-  }
-
-
-
-  /**
-   * MetaInfoObjExtractor - has logic to create the values for the fields in ReaderImpl
-   *  from serialized fields.
-   * As the fields are final, the fields need to be initialized in the constructor and
-   *  can't be done in some helper function. So this helper class is used instead.
-   *
-   */
-  private static class MetaInfoObjExtractor{
-    final CompressionKind compressionKind;
-    final CompressionCodec codec;
-    final int bufferSize;
-    final int metadataSize;
-    final OrcProto.Metadata metadata;
-    final OrcProto.Footer footer;
-    final ObjectInspector inspector;
-
-    MetaInfoObjExtractor(String codecStr, int bufferSize, int metadataSize, 
-        ByteBuffer footerBuffer) throws IOException {
-
-      this.compressionKind = CompressionKind.valueOf(codecStr.toUpperCase());
-      this.bufferSize = bufferSize;
-      this.codec = WriterImpl.createCodec(compressionKind);
-      this.metadataSize = metadataSize;
-
-      int position = footerBuffer.position();
-      int footerBufferSize = footerBuffer.limit() - footerBuffer.position() - metadataSize;
-      footerBuffer.limit(position + metadataSize);
-
-      CodedInputStream instream = InStream.createCodedInputStream("metadata",
-          Lists.<DiskRange>newArrayList(new BufferChunk(footerBuffer, 0)), metadataSize,
-          codec, bufferSize);
-      this.metadata = OrcProto.Metadata.parseFrom(instream);
-
-      footerBuffer.position(position + metadataSize);
-      footerBuffer.limit(position + metadataSize + footerBufferSize);
-      instream = InStream.createCodedInputStream("footer", Lists.<DiskRange>newArrayList(
-          new BufferChunk(footerBuffer, 0)), footerBufferSize, codec, bufferSize);
-      this.footer = OrcProto.Footer.parseFrom(instream);
-
-      footerBuffer.position(position);
-      this.inspector = OrcStruct.createObjectInspector(0, footer.getTypesList());
-    }
+    return new OrcTail(fileTailBuilder.build(), buffer.slice(), modificationTime);
   }
 
-  /**
-   * FileMetaInfo - represents file metadata stored in footer and postscript sections of the file
-   * that is useful for Reader implementation
-   *
-   */
-  static class FileMetaInfo{
-    final String compressionType;
-    final int bufferSize;
-    final int metadataSize;
-    final ByteBuffer footerBuffer;
-    final List<Integer> versionList;
-    final OrcFile.WriterVersion writerVersion;
-
-    FileMetaInfo(String compressionType, int bufferSize, int metadataSize,
-        ByteBuffer footerBuffer, OrcFile.WriterVersion writerVersion) {
-      this(compressionType, bufferSize, metadataSize, footerBuffer, null,
-          writerVersion);
-    }
-
-    FileMetaInfo(String compressionType, int bufferSize, int metadataSize,
-                 ByteBuffer footerBuffer, List<Integer> versionList,
-                 OrcFile.WriterVersion writerVersion){
-      this.compressionType = compressionType;
-      this.bufferSize = bufferSize;
-      this.metadataSize = metadataSize;
-      this.footerBuffer = footerBuffer;
-      this.versionList = versionList;
-      this.writerVersion = writerVersion;
-    }
+  private static OrcProto.Footer extractFooter(ByteBuffer bb, int footerAbsPos,
+      int footerSize, CompressionCodec codec,
+      int bufferSize) throws IOException {
+    bb.position(footerAbsPos);
+    bb.limit(footerAbsPos + footerSize);
+    InputStream instream = InStream.create("footer",
+        Lists.<DiskRange>newArrayList(new BufferChunk(bb, 0)), footerSize, codec, bufferSize);
+    CodedInputStream in = CodedInputStream.newInstance(instream);
+    return OrcProto.Footer.parseFrom(in);
   }
 
-  public FileMetaInfo getFileMetaInfo(){
-    return new FileMetaInfo(compressionKind.toString(), bufferSize,
-        metadataSize, footerByteBuffer, versionList, writerVersion);
+  public static OrcProto.Metadata extractMetadata(ByteBuffer bb, int metadataAbsPos,
+      int metadataSize, CompressionCodec codec, int bufferSize) throws IOException {
+    bb.position(metadataAbsPos);
+    bb.limit(metadataAbsPos + metadataSize);
+    InputStream inputStream = InStream.create("metadata",
+        Lists.<DiskRange>newArrayList(new BufferChunk(bb, 0)), metadataSize, codec, bufferSize);
+    CodedInputStream in = CodedInputStream.newInstance(inputStream);
+    return OrcProto.Metadata.parseFrom(in);
   }
 
-
-
   @Override
   public RecordReader rows() throws IOException {
     return rowsOptions(new Options());
@@ -577,24 +522,34 @@ public class ReaderImpl implements Reader {
       for (int i = 0; i < stats.size(); ++i) {
         indices.add(i);
       }
-      deserializedSize = getRawDataSizeFromColIndices(indices);
+      deserializedSize = getRawDataSizeFromColIndices(indices, footer.getTypesList(), stats);
     }
     return deserializedSize;
   }
 
   @Override
-  public long getRawDataSizeFromColIndices(List<Integer> colIndices) {
+  public OrcProto.FileTail getFileTail() {
+    return tail.getFileTail();
+  }
+
+  @Override
+  public ByteBuffer getSerializedFileFooter() {
+    return tail.getSerializedTail();
+  }
+
+  public static long getRawDataSizeFromColIndices(List<Integer> colIndices, List<Type> types,
+                                                  List<OrcProto.ColumnStatistics> stats) {
     long result = 0;
     for (int colIdx : colIndices) {
-      result += getRawDataSizeOfColumn(colIdx);
+      result += getRawDataSizeOfColumn(colIdx, types, stats);
     }
     return result;
   }
 
-  private long getRawDataSizeOfColumn(int colIdx) {
-    OrcProto.ColumnStatistics colStat = footer.getStatistics(colIdx);
+  private static long getRawDataSizeOfColumn(int colIdx, List<Type> types, List<OrcProto.ColumnStatistics> stats) {
+    OrcProto.ColumnStatistics colStat = stats.get(colIdx);
     long numVals = colStat.getNumberOfValues();
-    Type type = footer.getTypes(colIdx);
+    Type type = types.get(colIdx);
 
     switch (type.getKind()) {
     case BINARY:
@@ -638,7 +593,12 @@ public class ReaderImpl implements Reader {
   @Override
   public long getRawDataSizeOfColumns(List<String> colNames) {
     List<Integer> colIndices = getColumnIndicesFromNames(colNames);
-    return getRawDataSizeFromColIndices(colIndices);
+    return getRawDataSizeFromColIndices(colIndices, tail.getTypes(), tail.getFooter().getStatisticsList());
+  }
+
+  @Override
+  public long getRawDataSizeFromColIndices(List<Integer> colIds) {
+    return getRawDataSizeFromColIndices(colIds, tail.getTypes(), tail.getFooter().getStatisticsList());
   }
 
   private List<Integer> getColumnIndicesFromNames(List<String> colNames) {

http://git-wip-us.apache.org/repos/asf/hive/blob/89fa0a1d/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java
index 64b426c..330ac12 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java
@@ -168,8 +168,9 @@ public class VectorizedOrcInputFormat extends FileInputFormat<NullWritable, Vect
     if(fSplit instanceof OrcSplit){
       OrcSplit orcSplit = (OrcSplit) fSplit;
       if (orcSplit.hasFooter()) {
-        opts.fileMetaInfo(orcSplit.getFileMetaInfo());
+        opts.orcTail(orcSplit.getOrcTail());
       }
+      opts.maxLength(orcSplit.getFileLength());
     }
     Reader reader = OrcFile.createReader(path, opts);
     return new VectorizedOrcRecordReader(reader, conf, fSplit);

http://git-wip-us.apache.org/repos/asf/hive/blob/89fa0a1d/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
----------------------------------------------------------------------
diff --git a/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto b/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
index 06d0b07..5505efe 100644
--- a/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
+++ b/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
@@ -220,3 +220,11 @@ message PostScript {
   // Leave this last in the record
   optional string magic = 8000;
 }
+
+// This gets serialized as part of OrcSplit, also used by footer cache.
+message FileTail {
+  optional PostScript postscript = 1;
+  optional Footer footer = 2;
+  optional uint64 fileLength = 3;
+  optional uint64 postscriptLength = 4;
+}


Mime
View raw message