Return-Path: X-Original-To: apmail-hive-commits-archive@www.apache.org Delivered-To: apmail-hive-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 23A17108E0 for ; Sat, 20 Jul 2013 18:29:36 +0000 (UTC) Received: (qmail 26940 invoked by uid 500); 20 Jul 2013 18:29:35 -0000 Delivered-To: apmail-hive-commits-archive@hive.apache.org Received: (qmail 26777 invoked by uid 500); 20 Jul 2013 18:29:30 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 26767 invoked by uid 99); 20 Jul 2013 18:29:28 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 20 Jul 2013 18:29:28 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 20 Jul 2013 18:29:26 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id A2AC92388860; Sat, 20 Jul 2013 18:29:05 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1505184 - in /hive/branches/branch-0.11/ql/src: gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/ java/org/apache/hadoop/hive/ql/io/orc/ protobuf/org/apache/hadoop/hive/ql/io/orc/ Date: Sat, 20 Jul 2013 18:29:05 -0000 To: commits@hive.apache.org From: omalley@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20130720182905.A2AC92388860@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: omalley Date: Sat Jul 20 18:29:03 2013 New Revision: 1505184 URL: http://svn.apache.org/r1505184 Log: HIVE-4724 Better detection of non-ORC files in the ORC reader Modified: hive/branches/branch-0.11/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java hive/branches/branch-0.11/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java hive/branches/branch-0.11/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java hive/branches/branch-0.11/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java hive/branches/branch-0.11/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java hive/branches/branch-0.11/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto Modified: hive/branches/branch-0.11/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java URL: http://svn.apache.org/viewvc/hive/branches/branch-0.11/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java?rev=1505184&r1=1505183&r2=1505184&view=diff ============================================================================== --- hive/branches/branch-0.11/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java (original) +++ hive/branches/branch-0.11/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java Sat Jul 20 18:29:03 2013 @@ -9791,6 +9791,15 @@ public final class OrcProto { // optional uint64 compressionBlockSize = 3; boolean hasCompressionBlockSize(); long getCompressionBlockSize(); + + // repeated uint32 version = 4 [packed = true]; + java.util.List getVersionList(); + int getVersionCount(); + int getVersion(int index); + + // optional string magic = 8000; + boolean hasMagic(); + String getMagic(); } public static final class PostScript extends com.google.protobuf.GeneratedMessage @@ -9851,10 +9860,59 @@ public final class OrcProto { return compressionBlockSize_; } + // repeated uint32 version = 4 [packed = true]; + public static final int VERSION_FIELD_NUMBER = 4; + private java.util.List version_; + public java.util.List + getVersionList() { + return version_; + } + public int getVersionCount() { + return version_.size(); + } + public int getVersion(int index) { + return version_.get(index); + } + private int versionMemoizedSerializedSize = -1; + + // optional string magic = 8000; + public static final int MAGIC_FIELD_NUMBER = 8000; + private java.lang.Object magic_; + public boolean hasMagic() { + return ((bitField0_ & 0x00000008) == 0x00000008); + } + public String getMagic() { + java.lang.Object ref = magic_; + if (ref instanceof String) { + return (String) ref; + } else { + com.google.protobuf.ByteString bs = + (com.google.protobuf.ByteString) ref; + String s = bs.toStringUtf8(); + if (com.google.protobuf.Internal.isValidUtf8(bs)) { + magic_ = s; + } + return s; + } + } + private com.google.protobuf.ByteString getMagicBytes() { + java.lang.Object ref = magic_; + if (ref instanceof String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8((String) ref); + magic_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + private void initFields() { footerLength_ = 0L; compression_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.CompressionKind.NONE; compressionBlockSize_ = 0L; + version_ = java.util.Collections.emptyList();; + magic_ = ""; } private byte memoizedIsInitialized = -1; public final boolean isInitialized() { @@ -9877,6 +9935,16 @@ public final class OrcProto { if (((bitField0_ & 0x00000004) == 0x00000004)) { output.writeUInt64(3, compressionBlockSize_); } + if (getVersionList().size() > 0) { + output.writeRawVarint32(34); + output.writeRawVarint32(versionMemoizedSerializedSize); + } + for (int i = 0; i < version_.size(); i++) { + output.writeUInt32NoTag(version_.get(i)); + } + if (((bitField0_ & 0x00000008) == 0x00000008)) { + output.writeBytes(8000, getMagicBytes()); + } getUnknownFields().writeTo(output); } @@ -9898,6 +9966,24 @@ public final class OrcProto { size += com.google.protobuf.CodedOutputStream .computeUInt64Size(3, compressionBlockSize_); } + { + int dataSize = 0; + for (int i = 0; i < version_.size(); i++) { + dataSize += com.google.protobuf.CodedOutputStream + .computeUInt32SizeNoTag(version_.get(i)); + } + size += dataSize; + if (!getVersionList().isEmpty()) { + size += 1; + size += com.google.protobuf.CodedOutputStream + .computeInt32SizeNoTag(dataSize); + } + versionMemoizedSerializedSize = dataSize; + } + if (((bitField0_ & 0x00000008) == 0x00000008)) { + size += com.google.protobuf.CodedOutputStream + .computeBytesSize(8000, getMagicBytes()); + } size += getUnknownFields().getSerializedSize(); memoizedSerializedSize = size; return size; @@ -10028,6 +10114,10 @@ public final class OrcProto { bitField0_ = (bitField0_ & ~0x00000002); compressionBlockSize_ = 0L; bitField0_ = (bitField0_ & ~0x00000004); + version_ = java.util.Collections.emptyList();; + bitField0_ = (bitField0_ & ~0x00000008); + magic_ = ""; + bitField0_ = (bitField0_ & ~0x00000010); return this; } @@ -10078,6 +10168,15 @@ public final class OrcProto { to_bitField0_ |= 0x00000004; } result.compressionBlockSize_ = compressionBlockSize_; + if (((bitField0_ & 0x00000008) == 0x00000008)) { + version_ = java.util.Collections.unmodifiableList(version_); + bitField0_ = (bitField0_ & ~0x00000008); + } + result.version_ = version_; + if (((from_bitField0_ & 0x00000010) == 0x00000010)) { + to_bitField0_ |= 0x00000008; + } + result.magic_ = magic_; result.bitField0_ = to_bitField0_; onBuilt(); return result; @@ -10103,6 +10202,19 @@ public final class OrcProto { if (other.hasCompressionBlockSize()) { setCompressionBlockSize(other.getCompressionBlockSize()); } + if (!other.version_.isEmpty()) { + if (version_.isEmpty()) { + version_ = other.version_; + bitField0_ = (bitField0_ & ~0x00000008); + } else { + ensureVersionIsMutable(); + version_.addAll(other.version_); + } + onChanged(); + } + if (other.hasMagic()) { + setMagic(other.getMagic()); + } this.mergeUnknownFields(other.getUnknownFields()); return this; } @@ -10155,6 +10267,25 @@ public final class OrcProto { compressionBlockSize_ = input.readUInt64(); break; } + case 32: { + ensureVersionIsMutable(); + version_.add(input.readUInt32()); + break; + } + case 34: { + int length = input.readRawVarint32(); + int limit = input.pushLimit(length); + while (input.getBytesUntilLimit() > 0) { + addVersion(input.readUInt32()); + } + input.popLimit(limit); + break; + } + case 64002: { + bitField0_ |= 0x00000010; + magic_ = input.readBytes(); + break; + } } } } @@ -10227,6 +10358,87 @@ public final class OrcProto { return this; } + // repeated uint32 version = 4 [packed = true]; + private java.util.List version_ = java.util.Collections.emptyList();; + private void ensureVersionIsMutable() { + if (!((bitField0_ & 0x00000008) == 0x00000008)) { + version_ = new java.util.ArrayList(version_); + bitField0_ |= 0x00000008; + } + } + public java.util.List + getVersionList() { + return java.util.Collections.unmodifiableList(version_); + } + public int getVersionCount() { + return version_.size(); + } + public int getVersion(int index) { + return version_.get(index); + } + public Builder setVersion( + int index, int value) { + ensureVersionIsMutable(); + version_.set(index, value); + onChanged(); + return this; + } + public Builder addVersion(int value) { + ensureVersionIsMutable(); + version_.add(value); + onChanged(); + return this; + } + public Builder addAllVersion( + java.lang.Iterable values) { + ensureVersionIsMutable(); + super.addAll(values, version_); + onChanged(); + return this; + } + public Builder clearVersion() { + version_ = java.util.Collections.emptyList();; + bitField0_ = (bitField0_ & ~0x00000008); + onChanged(); + return this; + } + + // optional string magic = 8000; + private java.lang.Object magic_ = ""; + public boolean hasMagic() { + return ((bitField0_ & 0x00000010) == 0x00000010); + } + public String getMagic() { + java.lang.Object ref = magic_; + if (!(ref instanceof String)) { + String s = ((com.google.protobuf.ByteString) ref).toStringUtf8(); + magic_ = s; + return s; + } else { + return (String) ref; + } + } + public Builder setMagic(String value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000010; + magic_ = value; + onChanged(); + return this; + } + public Builder clearMagic() { + bitField0_ = (bitField0_ & ~0x00000010); + magic_ = getDefaultInstance().getMagic(); + onChanged(); + return this; + } + void setMagic(com.google.protobuf.ByteString value) { + bitField0_ |= 0x00000010; + magic_ = value; + onChanged(); + } + // @@protoc_insertion_point(builder_scope:org.apache.hadoop.hive.ql.io.orc.PostScript) } @@ -10384,12 +10596,13 @@ public final class OrcProto { ".hive.ql.io.orc.UserMetadataItem\022\024\n\014numb" + "erOfRows\030\006 \001(\004\022F\n\nstatistics\030\007 \003(\01322.org" + ".apache.hadoop.hive.ql.io.orc.ColumnStat" + - "istics\022\026\n\016rowIndexStride\030\010 \001(\r\"\210\001\n\nPostS" + + "istics\022\026\n\016rowIndexStride\030\010 \001(\r\"\255\001\n\nPostS" + "cript\022\024\n\014footerLength\030\001 \001(\004\022F\n\013compressi" + "on\030\002 \001(\01621.org.apache.hadoop.hive.ql.io.", "orc.CompressionKind\022\034\n\024compressionBlockS" + - "ize\030\003 \001(\004*:\n\017CompressionKind\022\010\n\004NONE\020\000\022\010" + - "\n\004ZLIB\020\001\022\n\n\006SNAPPY\020\002\022\007\n\003LZO\020\003" + "ize\030\003 \001(\004\022\023\n\007version\030\004 \003(\rB\002\020\001\022\016\n\005magic\030" + + "\300> \001(\t*:\n\017CompressionKind\022\010\n\004NONE\020\000\022\010\n\004Z" + + "LIB\020\001\022\n\n\006SNAPPY\020\002\022\007\n\003LZO\020\003" }; com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner = new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() { @@ -10521,7 +10734,7 @@ public final class OrcProto { internal_static_org_apache_hadoop_hive_ql_io_orc_PostScript_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_org_apache_hadoop_hive_ql_io_orc_PostScript_descriptor, - new java.lang.String[] { "FooterLength", "Compression", "CompressionBlockSize", }, + new java.lang.String[] { "FooterLength", "Compression", "CompressionBlockSize", "Version", "Magic", }, org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript.class, org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript.Builder.class); return null; Modified: hive/branches/branch-0.11/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java URL: http://svn.apache.org/viewvc/hive/branches/branch-0.11/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java?rev=1505184&r1=1505183&r2=1505184&view=diff ============================================================================== --- hive/branches/branch-0.11/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java (original) +++ hive/branches/branch-0.11/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java Sat Jul 20 18:29:03 2013 @@ -31,6 +31,26 @@ import java.io.IOException; public final class OrcFile { public static final String MAGIC = "ORC"; + + /** + * Create a version number for the ORC file format, so that we can add + * non-forward compatible changes in the future. To make it easier for users + * to understand the version numbers, we use the Hive release number that + * first wrote that version of ORC files. + * + * Thus, if you add new encodings or other non-forward compatible changes + * to ORC files, which prevent the old reader from reading the new format, + * you should change these variable to reflect the next Hive release number. + * Non-forward compatible changes should never be added in patch releases. + * + * Do not make any changes that break backwards compatibility, which would + * prevent the new reader from reading ORC files generated by any released + * version of Hive. + */ + public static final int MAJOR_VERSION = 0; + public static final int MINOR_VERSION = 11; + + // the table properties that control ORC files public static final String COMPRESSION = "orc.compress"; static final String DEFAULT_COMPRESSION = "ZLIB"; public static final String COMPRESSION_BLOCK_SIZE = "orc.compress.size"; Modified: hive/branches/branch-0.11/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java URL: http://svn.apache.org/viewvc/hive/branches/branch-0.11/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java?rev=1505184&r1=1505183&r2=1505184&view=diff ============================================================================== --- hive/branches/branch-0.11/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java (original) +++ hive/branches/branch-0.11/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java Sat Jul 20 18:29:03 2013 @@ -19,10 +19,13 @@ package org.apache.hadoop.hive.ql.io.orc; import com.google.protobuf.CodedInputStream; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.io.Text; import java.io.IOException; import java.io.InputStream; @@ -33,6 +36,8 @@ import java.util.List; final class ReaderImpl implements Reader { + private static final Log LOG = LogFactory.getLog(ReaderImpl.class); + private static final int DIRECTORY_SIZE_GUESS = 16 * 1024; private final FileSystem fileSystem; @@ -176,6 +181,81 @@ final class ReaderImpl implements Reader return result; } + /** + * Ensure this is an ORC file to prevent users from trying to read text + * files or RC files as ORC files. + * @param in the file being read + * @param path the filename for error messages + * @param psLen the postscript length + * @param buffer the tail of the file + * @throws IOException + */ + static void ensureOrcFooter(FSDataInputStream in, + Path path, + int psLen, + ByteBuffer buffer) throws IOException { + int len = OrcFile.MAGIC.length(); + if (psLen < len + 1) { + throw new IOException("Malformed ORC file " + path + + ". Invalid postscript length " + psLen); + } + int offset = buffer.arrayOffset() + buffer.position() + buffer.limit() - 1 + - len; + byte[] array = buffer.array(); + // now look for the magic string at the end of the postscript. + if (!Text.decode(array, offset, len).equals(OrcFile.MAGIC)) { + // If it isn't there, this may be the 0.11.0 version of ORC. + // Read the first 3 bytes of the file to check for the header + in.seek(0); + byte[] header = new byte[len]; + in.readFully(header, 0, len); + // if it isn't there, this isn't an ORC file + if (!Text.decode(header, 0 , len).equals(OrcFile.MAGIC)) { + throw new IOException("Malformed ORC file " + path + + ". Invalid postscript."); + } + } + } + + /** + * Build a version string out of an array. + * @param version the version number as a list + * @return the human readable form of the version string + */ + private static String versionString(List version) { + StringBuilder buffer = new StringBuilder(); + for(int i=0; i < version.size(); ++i) { + if (i != 0) { + buffer.append('.'); + } + buffer.append(version.get(i)); + } + return buffer.toString(); + } + + /** + * Check to see if this ORC file is from a future version and if so, + * warn the user that we may not be able to read all of the column encodings. + * @param log the logger to write any error message to + * @param path the filename for error messages + * @param version the version of hive that wrote the file. + */ + static void checkOrcVersion(Log log, Path path, List version) { + if (version.size() >= 1) { + int major = version.get(0); + int minor = 0; + if (version.size() >= 2) { + minor = version.get(1); + } + if (major > OrcFile.MAJOR_VERSION || + (major == OrcFile.MAJOR_VERSION && minor > OrcFile.MINOR_VERSION)) { + log.warn("ORC file " + path + " was written by a future Hive version " + + versionString(version) + ". This file may not be readable by " + + "this version of Hive."); + } + } + } + ReaderImpl(FileSystem fs, Path path) throws IOException { this.fileSystem = fs; this.path = path; @@ -187,10 +267,12 @@ final class ReaderImpl implements Reader file.readFully(buffer.array(), buffer.arrayOffset() + buffer.position(), buffer.remaining()); int psLen = buffer.get(readSize - 1); + ensureOrcFooter(file, path, psLen, buffer); int psOffset = readSize - 1 - psLen; CodedInputStream in = CodedInputStream.newInstance(buffer.array(), buffer.arrayOffset() + psOffset, psLen); OrcProto.PostScript ps = OrcProto.PostScript.parseFrom(in); + checkOrcVersion(LOG, path, ps.getVersionList()); int footerSize = (int) ps.getFooterLength(); bufferSize = (int) ps.getCompressionBlockSize(); switch (ps.getCompression()) { Modified: hive/branches/branch-0.11/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java URL: http://svn.apache.org/viewvc/hive/branches/branch-0.11/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java?rev=1505184&r1=1505183&r2=1505184&view=diff ============================================================================== --- hive/branches/branch-0.11/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (original) +++ hive/branches/branch-0.11/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java Sat Jul 20 18:29:03 2013 @@ -87,7 +87,7 @@ class RecordReaderImpl implements Record } firstRow = skippedRows; totalRowCount = rows; - reader = createTreeReader(0, types, included); + reader = createTreeReader(path, 0, types, included); indexes = new OrcProto.RowIndex[types.size()]; rowIndexStride = strideRate; if (this.stripes.size() > 0) { @@ -110,17 +110,27 @@ class RecordReaderImpl implements Record } private abstract static class TreeReader { + protected final Path path; protected final int columnId; private BitFieldReader present = null; protected boolean valuePresent = false; - TreeReader(int columnId) { + TreeReader(Path path, int columnId) { + this.path = path; this.columnId = columnId; } + void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException { + if (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) { + throw new IOException("Unknown encoding " + encoding + " in column " + + columnId + " of " + path); + } + } + void startStripe(Map streams, List encoding ) throws IOException { + checkEncoding(encoding.get(columnId)); InStream in = streams.get(new StreamName(columnId, OrcProto.Stream.Kind.PRESENT)); if (in == null) { @@ -169,8 +179,8 @@ class RecordReaderImpl implements Record private static class BooleanTreeReader extends TreeReader{ private BitFieldReader reader = null; - BooleanTreeReader(int columnId) { - super(columnId); + BooleanTreeReader(Path path, int columnId) { + super(path, columnId); } @Override @@ -212,8 +222,8 @@ class RecordReaderImpl implements Record private static class ByteTreeReader extends TreeReader{ private RunLengthByteReader reader = null; - ByteTreeReader(int columnId) { - super(columnId); + ByteTreeReader(Path path, int columnId) { + super(path, columnId); } @Override @@ -255,8 +265,8 @@ class RecordReaderImpl implements Record private static class ShortTreeReader extends TreeReader{ private RunLengthIntegerReader reader = null; - ShortTreeReader(int columnId) { - super(columnId); + ShortTreeReader(Path path, int columnId) { + super(path, columnId); } @Override @@ -299,8 +309,8 @@ class RecordReaderImpl implements Record private static class IntTreeReader extends TreeReader{ private RunLengthIntegerReader reader = null; - IntTreeReader(int columnId) { - super(columnId); + IntTreeReader(Path path, int columnId) { + super(path, columnId); } @Override @@ -343,8 +353,8 @@ class RecordReaderImpl implements Record private static class LongTreeReader extends TreeReader{ private RunLengthIntegerReader reader = null; - LongTreeReader(int columnId) { - super(columnId); + LongTreeReader(Path path, int columnId) { + super(path, columnId); } @Override @@ -387,8 +397,8 @@ class RecordReaderImpl implements Record private static class FloatTreeReader extends TreeReader{ private InStream stream; - FloatTreeReader(int columnId) { - super(columnId); + FloatTreeReader(Path path, int columnId) { + super(path, columnId); } @Override @@ -434,8 +444,8 @@ class RecordReaderImpl implements Record private static class DoubleTreeReader extends TreeReader{ private InStream stream; - DoubleTreeReader(int columnId) { - super(columnId); + DoubleTreeReader(Path path, int columnId) { + super(path, columnId); } @Override @@ -481,8 +491,8 @@ class RecordReaderImpl implements Record private InStream stream; private RunLengthIntegerReader lengths; - BinaryTreeReader(int columnId) { - super(columnId); + BinaryTreeReader(Path path, int columnId) { + super(path, columnId); } @Override @@ -545,8 +555,8 @@ class RecordReaderImpl implements Record private RunLengthIntegerReader data; private RunLengthIntegerReader nanos; - TimestampTreeReader(int columnId) { - super(columnId); + TimestampTreeReader(Path path, int columnId) { + super(path, columnId); } @Override @@ -615,8 +625,8 @@ class RecordReaderImpl implements Record private InStream valueStream; private RunLengthIntegerReader scaleStream; - DecimalTreeReader(int columnId) { - super(columnId); + DecimalTreeReader(Path path, int columnId) { + super(path, columnId); } @Override @@ -663,8 +673,15 @@ class RecordReaderImpl implements Record private int[] dictionaryOffsets; private RunLengthIntegerReader reader; - StringTreeReader(int columnId) { - super(columnId); + StringTreeReader(Path path, int columnId) { + super(path, columnId); + } + + void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException { + if (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DICTIONARY) { + throw new IOException("Unknown encoding " + encoding + " in column " + + columnId + " of " + path); + } } @Override @@ -755,10 +772,10 @@ class RecordReaderImpl implements Record private final TreeReader[] fields; private final String[] fieldNames; - StructTreeReader(int columnId, + StructTreeReader(Path path, int columnId, List types, boolean[] included) throws IOException { - super(columnId); + super(path, columnId); OrcProto.Type type = types.get(columnId); int fieldCount = type.getFieldNamesCount(); this.fields = new TreeReader[fieldCount]; @@ -766,7 +783,7 @@ class RecordReaderImpl implements Record for(int i=0; i < fieldCount; ++i) { int subtype = type.getSubtypes(i); if (included == null || included[subtype]) { - this.fields[i] = createTreeReader(subtype, types, included); + this.fields[i] = createTreeReader(path, subtype, types, included); } this.fieldNames[i] = type.getFieldNames(i); } @@ -831,17 +848,17 @@ class RecordReaderImpl implements Record private final TreeReader[] fields; private RunLengthByteReader tags; - UnionTreeReader(int columnId, - List types, - boolean[] included) throws IOException { - super(columnId); + UnionTreeReader(Path path, int columnId, + List types, + boolean[] included) throws IOException { + super(path, columnId); OrcProto.Type type = types.get(columnId); int fieldCount = type.getSubtypesCount(); this.fields = new TreeReader[fieldCount]; for(int i=0; i < fieldCount; ++i) { int subtype = type.getSubtypes(i); if (included == null || included[subtype]) { - this.fields[i] = createTreeReader(subtype, types, included); + this.fields[i] = createTreeReader(path, subtype, types, included); } } } @@ -904,12 +921,13 @@ class RecordReaderImpl implements Record private final TreeReader elementReader; private RunLengthIntegerReader lengths; - ListTreeReader(int columnId, - List types, - boolean[] included) throws IOException { - super(columnId); + ListTreeReader(Path path, int columnId, + List types, + boolean[] included) throws IOException { + super(path, columnId); OrcProto.Type type = types.get(columnId); - elementReader = createTreeReader(type.getSubtypes(0), types, included); + elementReader = createTreeReader(path, type.getSubtypes(0), types, + included); } @Override @@ -977,20 +995,21 @@ class RecordReaderImpl implements Record private final TreeReader valueReader; private RunLengthIntegerReader lengths; - MapTreeReader(int columnId, - List types, - boolean[] included) throws IOException { - super(columnId); + MapTreeReader(Path path, + int columnId, + List types, + boolean[] included) throws IOException { + super(path, columnId); OrcProto.Type type = types.get(columnId); int keyColumn = type.getSubtypes(0); int valueColumn = type.getSubtypes(1); if (included == null || included[keyColumn]) { - keyReader = createTreeReader(keyColumn, types, included); + keyReader = createTreeReader(path, keyColumn, types, included); } else { keyReader = null; } if (included == null || included[valueColumn]) { - valueReader = createTreeReader(valueColumn, types, included); + valueReader = createTreeReader(path, valueColumn, types, included); } else { valueReader = null; } @@ -1053,42 +1072,43 @@ class RecordReaderImpl implements Record } } - private static TreeReader createTreeReader(int columnId, + private static TreeReader createTreeReader(Path path, + int columnId, List types, boolean[] included ) throws IOException { OrcProto.Type type = types.get(columnId); switch (type.getKind()) { case BOOLEAN: - return new BooleanTreeReader(columnId); + return new BooleanTreeReader(path, columnId); case BYTE: - return new ByteTreeReader(columnId); + return new ByteTreeReader(path, columnId); case DOUBLE: - return new DoubleTreeReader(columnId); + return new DoubleTreeReader(path, columnId); case FLOAT: - return new FloatTreeReader(columnId); + return new FloatTreeReader(path, columnId); case SHORT: - return new ShortTreeReader(columnId); + return new ShortTreeReader(path, columnId); case INT: - return new IntTreeReader(columnId); + return new IntTreeReader(path, columnId); case LONG: - return new LongTreeReader(columnId); + return new LongTreeReader(path, columnId); case STRING: - return new StringTreeReader(columnId); + return new StringTreeReader(path, columnId); case BINARY: - return new BinaryTreeReader(columnId); + return new BinaryTreeReader(path, columnId); case TIMESTAMP: - return new TimestampTreeReader(columnId); + return new TimestampTreeReader(path, columnId); case DECIMAL: - return new DecimalTreeReader(columnId); + return new DecimalTreeReader(path, columnId); case STRUCT: - return new StructTreeReader(columnId, types, included); + return new StructTreeReader(path, columnId, types, included); case LIST: - return new ListTreeReader(columnId, types, included); + return new ListTreeReader(path, columnId, types, included); case MAP: - return new MapTreeReader(columnId, types, included); + return new MapTreeReader(path, columnId, types, included); case UNION: - return new UnionTreeReader(columnId, types, included); + return new UnionTreeReader(path, columnId, types, included); default: throw new IllegalArgumentException("Unsupported type " + type.getKind()); Modified: hive/branches/branch-0.11/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java URL: http://svn.apache.org/viewvc/hive/branches/branch-0.11/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java?rev=1505184&r1=1505183&r2=1505184&view=diff ============================================================================== --- hive/branches/branch-0.11/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java (original) +++ hive/branches/branch-0.11/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java Sat Jul 20 18:29:03 2013 @@ -1430,7 +1430,10 @@ class WriterImpl implements Writer, Memo OrcProto.PostScript.Builder builder = OrcProto.PostScript.newBuilder() .setCompression(writeCompressionKind(compress)) - .setFooterLength(footerLength); + .setFooterLength(footerLength) + .setMagic(OrcFile.MAGIC) + .addVersion(OrcFile.MAJOR_VERSION) + .addVersion(OrcFile.MINOR_VERSION); if (compress != CompressionKind.NONE) { builder.setCompressionBlockSize(bufferSize); } Modified: hive/branches/branch-0.11/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto URL: http://svn.apache.org/viewvc/hive/branches/branch-0.11/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto?rev=1505184&r1=1505183&r2=1505184&view=diff ============================================================================== --- hive/branches/branch-0.11/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto (original) +++ hive/branches/branch-0.11/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto Sat Jul 20 18:29:03 2013 @@ -135,4 +135,7 @@ message PostScript { optional uint64 footerLength = 1; optional CompressionKind compression = 2; optional uint64 compressionBlockSize = 3; + repeated uint32 version = 4 [packed = true]; + // Leave this last in the record + optional string magic = 8000; }