hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From prasan...@apache.org
Subject hive git commit: HIVE-11592: ORC metadata section can sometimes exceed protobuf message size limit (Prasanth Jayachandran reviewed by Sergey Shelukhin)
Date Wed, 19 Aug 2015 18:46:53 GMT
Repository: hive
Updated Branches:
  refs/heads/branch-1 0f1ecf2f7 -> 1f6fdc2d8


HIVE-11592: ORC metadata section can sometimes exceed protobuf message size limit (Prasanth
Jayachandran reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1f6fdc2d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1f6fdc2d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1f6fdc2d

Branch: refs/heads/branch-1
Commit: 1f6fdc2d820a7bbfeb16ddca754396c12755e86a
Parents: 0f1ecf2
Author: Prasanth Jayachandran <j.prasanth.j@gmail.com>
Authored: Wed Aug 19 11:46:27 2015 -0700
Committer: Prasanth Jayachandran <j.prasanth.j@gmail.com>
Committed: Wed Aug 19 11:46:27 2015 -0700

----------------------------------------------------------------------
 .../hadoop/hive/ql/io/orc/ReaderImpl.java       | 34 +++++++++++++++++++-
 1 file changed, 33 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/1f6fdc2d/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
index a6448b6..c990d85 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
@@ -46,12 +46,15 @@ import org.apache.hadoop.io.Text;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
 import com.google.protobuf.CodedInputStream;
+import com.google.protobuf.InvalidProtocolBufferException;
 
 public class ReaderImpl implements Reader {
 
   private static final Log LOG = LogFactory.getLog(ReaderImpl.class);
 
   private static final int DIRECTORY_SIZE_GUESS = 16 * 1024;
+  private static final int DEFAULT_PROTOBUF_MESSAGE_LIMIT = 64 << 20;  // 64MB
+  private static final int PROTOBUF_MESSAGE_MAX_LIMIT = 1024 << 20; // 1GB
 
   protected final FileSystem fileSystem;
   protected final Path path;
@@ -468,7 +471,36 @@ public class ReaderImpl implements Reader {
 
       InputStream instream = InStream.create("metadata", Lists.<DiskRange>newArrayList(
           new BufferChunk(footerBuffer, 0)), metadataSize, codec, bufferSize);
-      this.metadata = OrcProto.Metadata.parseFrom(instream);
+      CodedInputStream in = CodedInputStream.newInstance(instream);
+      int msgLimit = DEFAULT_PROTOBUF_MESSAGE_LIMIT;
+      OrcProto.Metadata meta = null;
+      do {
+        try {
+          in.setSizeLimit(msgLimit);
+          meta = OrcProto.Metadata.parseFrom(in);
+        } catch (InvalidProtocolBufferException e) {
+          if (e.getMessage().contains("Protocol message was too large")) {
+            LOG.warn("Metadata section is larger than " + msgLimit + " bytes. Increasing
the max" +
+                " size of the coded input stream." );
+
+            msgLimit = msgLimit << 1;
+            if (msgLimit > PROTOBUF_MESSAGE_MAX_LIMIT) {
+              LOG.error("Metadata section exceeds max protobuf message size of " +
+                  PROTOBUF_MESSAGE_MAX_LIMIT + " bytes.");
+              throw e;
+            }
+
+            // we must have failed in the middle of reading instream and instream doesn't
support
+            // resetting the stream
+            instream = InStream.create("metadata", Lists.<DiskRange>newArrayList(
+                new BufferChunk(footerBuffer, 0)), metadataSize, codec, bufferSize);
+            in = CodedInputStream.newInstance(instream);
+          } else {
+            throw e;
+          }
+        }
+      } while (meta == null);
+      this.metadata = meta;
 
       footerBuffer.position(position + metadataSize);
       footerBuffer.limit(position + metadataSize + footerBufferSize);


Mime
View raw message