Return-Path: X-Original-To: apmail-hive-commits-archive@www.apache.org Delivered-To: apmail-hive-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 15CAB17FF7 for ; Tue, 24 Mar 2015 17:34:15 +0000 (UTC) Received: (qmail 38412 invoked by uid 500); 24 Mar 2015 17:34:14 -0000 Delivered-To: apmail-hive-commits-archive@hive.apache.org Received: (qmail 38366 invoked by uid 500); 24 Mar 2015 17:34:14 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 38345 invoked by uid 99); 24 Mar 2015 17:34:14 -0000 Received: from eris.apache.org (HELO hades.apache.org) (140.211.11.105) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 24 Mar 2015 17:34:14 +0000 Received: from hades.apache.org (localhost [127.0.0.1]) by hades.apache.org (ASF Mail Server at hades.apache.org) with ESMTP id 99EC5AC027F for ; Tue, 24 Mar 2015 17:34:14 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1668942 [1/4] - in /hive/trunk/ql/src: gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/ java/org/apache/hadoop/hive/ql/io/orc/ protobuf/org/apache/hadoop/hive/ql/io/orc/ test/org/apache/hadoop/hive/ql/io/orc/ test/resources/ test/re... Date: Tue, 24 Mar 2015 17:34:13 -0000 To: commits@hive.apache.org From: prasanthj@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20150324173414.99EC5AC027F@hades.apache.org> Author: prasanthj Date: Tue Mar 24 17:34:12 2015 New Revision: 1668942 URL: http://svn.apache.org/r1668942 Log: HIVE-8746: ORC timestamp columns are sensitive to daylight savings time (Prasanth Jayachandran reviewed by Gopal V, Gunther Hagleitner) Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcTimezone1.java hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcTimezone2.java Modified: hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java hive/trunk/ql/src/test/resources/orc-file-dump-bloomfilter.out hive/trunk/ql/src/test/resources/orc-file-dump-bloomfilter2.out hive/trunk/ql/src/test/resources/orc-file-dump-dictionary-threshold.out hive/trunk/ql/src/test/resources/orc-file-dump.out hive/trunk/ql/src/test/resources/orc-file-has-null.out hive/trunk/ql/src/test/results/clientpositive/alter_merge_orc.q.out hive/trunk/ql/src/test/results/clientpositive/alter_merge_stats_orc.q.out hive/trunk/ql/src/test/results/clientpositive/annotate_stats_part.q.out hive/trunk/ql/src/test/results/clientpositive/annotate_stats_table.q.out hive/trunk/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out hive/trunk/ql/src/test/results/clientpositive/dynpart_sort_optimization2.q.out hive/trunk/ql/src/test/results/clientpositive/encrypted/encryption_insert_partition_dynamic.q.out hive/trunk/ql/src/test/results/clientpositive/encrypted/encryption_insert_partition_static.q.out hive/trunk/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out hive/trunk/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out hive/trunk/ql/src/test/results/clientpositive/orc_analyze.q.out hive/trunk/ql/src/test/results/clientpositive/spark/alter_merge_orc.q.out hive/trunk/ql/src/test/results/clientpositive/spark/alter_merge_stats_orc.q.out hive/trunk/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out hive/trunk/ql/src/test/results/clientpositive/tez/alter_merge_orc.q.out hive/trunk/ql/src/test/results/clientpositive/tez/alter_merge_stats_orc.q.out hive/trunk/ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out hive/trunk/ql/src/test/results/clientpositive/tez/dynpart_sort_optimization2.q.out hive/trunk/ql/src/test/results/clientpositive/tez/orc_analyze.q.out hive/trunk/ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out hive/trunk/ql/src/test/results/clientpositive/vectorized_ptf.q.out Modified: hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java?rev=1668942&r1=1668941&r2=1668942&view=diff ============================================================================== --- hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java (original) +++ hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java Tue Mar 24 17:34:12 2015 @@ -10603,6 +10603,21 @@ public final class OrcProto { */ org.apache.hadoop.hive.ql.io.orc.OrcProto.ColumnEncodingOrBuilder getColumnsOrBuilder( int index); + + // optional string writerTimezone = 3; + /** + * optional string writerTimezone = 3; + */ + boolean hasWriterTimezone(); + /** + * optional string writerTimezone = 3; + */ + java.lang.String getWriterTimezone(); + /** + * optional string writerTimezone = 3; + */ + com.google.protobuf.ByteString + getWriterTimezoneBytes(); } /** * Protobuf type {@code orc.proto.StripeFooter} @@ -10671,6 +10686,11 @@ public final class OrcProto { columns_.add(input.readMessage(org.apache.hadoop.hive.ql.io.orc.OrcProto.ColumnEncoding.PARSER, extensionRegistry)); break; } + case 26: { + bitField0_ |= 0x00000001; + writerTimezone_ = input.readBytes(); + break; + } } } } catch (com.google.protobuf.InvalidProtocolBufferException e) { @@ -10716,6 +10736,7 @@ public final class OrcProto { return PARSER; } + private int bitField0_; // repeated .orc.proto.Stream streams = 1; public static final int STREAMS_FIELD_NUMBER = 1; private java.util.List streams_; @@ -10788,9 +10809,53 @@ public final class OrcProto { return columns_.get(index); } + // optional string writerTimezone = 3; + public static final int WRITERTIMEZONE_FIELD_NUMBER = 3; + private java.lang.Object writerTimezone_; + /** + * optional string writerTimezone = 3; + */ + public boolean hasWriterTimezone() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + /** + * optional string writerTimezone = 3; + */ + public java.lang.String getWriterTimezone() { + java.lang.Object ref = writerTimezone_; + if (ref instanceof java.lang.String) { + return (java.lang.String) ref; + } else { + com.google.protobuf.ByteString bs = + (com.google.protobuf.ByteString) ref; + java.lang.String s = bs.toStringUtf8(); + if (bs.isValidUtf8()) { + writerTimezone_ = s; + } + return s; + } + } + /** + * optional string writerTimezone = 3; + */ + public com.google.protobuf.ByteString + getWriterTimezoneBytes() { + java.lang.Object ref = writerTimezone_; + if (ref instanceof java.lang.String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8( + (java.lang.String) ref); + writerTimezone_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + private void initFields() { streams_ = java.util.Collections.emptyList(); columns_ = java.util.Collections.emptyList(); + writerTimezone_ = ""; } private byte memoizedIsInitialized = -1; public final boolean isInitialized() { @@ -10810,6 +10875,9 @@ public final class OrcProto { for (int i = 0; i < columns_.size(); i++) { output.writeMessage(2, columns_.get(i)); } + if (((bitField0_ & 0x00000001) == 0x00000001)) { + output.writeBytes(3, getWriterTimezoneBytes()); + } getUnknownFields().writeTo(output); } @@ -10827,6 +10895,10 @@ public final class OrcProto { size += com.google.protobuf.CodedOutputStream .computeMessageSize(2, columns_.get(i)); } + if (((bitField0_ & 0x00000001) == 0x00000001)) { + size += com.google.protobuf.CodedOutputStream + .computeBytesSize(3, getWriterTimezoneBytes()); + } size += getUnknownFields().getSerializedSize(); memoizedSerializedSize = size; return size; @@ -10957,6 +11029,8 @@ public final class OrcProto { } else { columnsBuilder_.clear(); } + writerTimezone_ = ""; + bitField0_ = (bitField0_ & ~0x00000004); return this; } @@ -10984,6 +11058,7 @@ public final class OrcProto { public org.apache.hadoop.hive.ql.io.orc.OrcProto.StripeFooter buildPartial() { org.apache.hadoop.hive.ql.io.orc.OrcProto.StripeFooter result = new org.apache.hadoop.hive.ql.io.orc.OrcProto.StripeFooter(this); int from_bitField0_ = bitField0_; + int to_bitField0_ = 0; if (streamsBuilder_ == null) { if (((bitField0_ & 0x00000001) == 0x00000001)) { streams_ = java.util.Collections.unmodifiableList(streams_); @@ -11002,6 +11077,11 @@ public final class OrcProto { } else { result.columns_ = columnsBuilder_.build(); } + if (((from_bitField0_ & 0x00000004) == 0x00000004)) { + to_bitField0_ |= 0x00000001; + } + result.writerTimezone_ = writerTimezone_; + result.bitField0_ = to_bitField0_; onBuilt(); return result; } @@ -11069,6 +11149,11 @@ public final class OrcProto { } } } + if (other.hasWriterTimezone()) { + bitField0_ |= 0x00000004; + writerTimezone_ = other.writerTimezone_; + onChanged(); + } this.mergeUnknownFields(other.getUnknownFields()); return this; } @@ -11576,6 +11661,80 @@ public final class OrcProto { return columnsBuilder_; } + // optional string writerTimezone = 3; + private java.lang.Object writerTimezone_ = ""; + /** + * optional string writerTimezone = 3; + */ + public boolean hasWriterTimezone() { + return ((bitField0_ & 0x00000004) == 0x00000004); + } + /** + * optional string writerTimezone = 3; + */ + public java.lang.String getWriterTimezone() { + java.lang.Object ref = writerTimezone_; + if (!(ref instanceof java.lang.String)) { + java.lang.String s = ((com.google.protobuf.ByteString) ref) + .toStringUtf8(); + writerTimezone_ = s; + return s; + } else { + return (java.lang.String) ref; + } + } + /** + * optional string writerTimezone = 3; + */ + public com.google.protobuf.ByteString + getWriterTimezoneBytes() { + java.lang.Object ref = writerTimezone_; + if (ref instanceof String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8( + (java.lang.String) ref); + writerTimezone_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + /** + * optional string writerTimezone = 3; + */ + public Builder setWriterTimezone( + java.lang.String value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000004; + writerTimezone_ = value; + onChanged(); + return this; + } + /** + * optional string writerTimezone = 3; + */ + public Builder clearWriterTimezone() { + bitField0_ = (bitField0_ & ~0x00000004); + writerTimezone_ = getDefaultInstance().getWriterTimezone(); + onChanged(); + return this; + } + /** + * optional string writerTimezone = 3; + */ + public Builder setWriterTimezoneBytes( + com.google.protobuf.ByteString value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000004; + writerTimezone_ = value; + onChanged(); + return this; + } + // @@protoc_insertion_point(builder_scope:orc.proto.StripeFooter) } @@ -18921,40 +19080,41 @@ public final class OrcProto { "ng\022,\n\004kind\030\001 \001(\0162\036.orc.proto.ColumnEncod" + "ing.Kind\022\026\n\016dictionarySize\030\002 \001(\r\"D\n\004Kind" + "\022\n\n\006DIRECT\020\000\022\016\n\nDICTIONARY\020\001\022\r\n\tDIRECT_V", - "2\020\002\022\021\n\rDICTIONARY_V2\020\003\"^\n\014StripeFooter\022\"" + + "2\020\002\022\021\n\rDICTIONARY_V2\020\003\"v\n\014StripeFooter\022\"" + "\n\007streams\030\001 \003(\0132\021.orc.proto.Stream\022*\n\007co" + - "lumns\030\002 \003(\0132\031.orc.proto.ColumnEncoding\"\341" + - "\002\n\004Type\022\"\n\004kind\030\001 \001(\0162\024.orc.proto.Type.K" + - "ind\022\024\n\010subtypes\030\002 \003(\rB\002\020\001\022\022\n\nfieldNames\030" + - "\003 \003(\t\022\025\n\rmaximumLength\030\004 \001(\r\022\021\n\tprecisio" + - "n\030\005 \001(\r\022\r\n\005scale\030\006 \001(\r\"\321\001\n\004Kind\022\013\n\007BOOLE" + - "AN\020\000\022\010\n\004BYTE\020\001\022\t\n\005SHORT\020\002\022\007\n\003INT\020\003\022\010\n\004LO" + - "NG\020\004\022\t\n\005FLOAT\020\005\022\n\n\006DOUBLE\020\006\022\n\n\006STRING\020\007\022" + - "\n\n\006BINARY\020\010\022\r\n\tTIMESTAMP\020\t\022\010\n\004LIST\020\n\022\007\n\003", - "MAP\020\013\022\n\n\006STRUCT\020\014\022\t\n\005UNION\020\r\022\013\n\007DECIMAL\020" + - "\016\022\010\n\004DATE\020\017\022\013\n\007VARCHAR\020\020\022\010\n\004CHAR\020\021\"x\n\021St" + - "ripeInformation\022\016\n\006offset\030\001 \001(\004\022\023\n\013index" + - "Length\030\002 \001(\004\022\022\n\ndataLength\030\003 \001(\004\022\024\n\014foot" + - "erLength\030\004 \001(\004\022\024\n\014numberOfRows\030\005 \001(\004\"/\n\020" + - "UserMetadataItem\022\014\n\004name\030\001 \001(\t\022\r\n\005value\030" + - "\002 \001(\014\"A\n\020StripeStatistics\022-\n\010colStats\030\001 " + - "\003(\0132\033.orc.proto.ColumnStatistics\"<\n\010Meta" + - "data\0220\n\013stripeStats\030\001 \003(\0132\033.orc.proto.St" + - "ripeStatistics\"\222\002\n\006Footer\022\024\n\014headerLengt", - "h\030\001 \001(\004\022\025\n\rcontentLength\030\002 \001(\004\022-\n\007stripe" + - "s\030\003 \003(\0132\034.orc.proto.StripeInformation\022\036\n" + - "\005types\030\004 \003(\0132\017.orc.proto.Type\022-\n\010metadat" + - "a\030\005 \003(\0132\033.orc.proto.UserMetadataItem\022\024\n\014" + - "numberOfRows\030\006 \001(\004\022/\n\nstatistics\030\007 \003(\0132\033" + - ".orc.proto.ColumnStatistics\022\026\n\016rowIndexS" + - "tride\030\010 \001(\r\"\305\001\n\nPostScript\022\024\n\014footerLeng" + - "th\030\001 \001(\004\022/\n\013compression\030\002 \001(\0162\032.orc.prot" + - "o.CompressionKind\022\034\n\024compressionBlockSiz" + - "e\030\003 \001(\004\022\023\n\007version\030\004 \003(\rB\002\020\001\022\026\n\016metadata", - "Length\030\005 \001(\004\022\025\n\rwriterVersion\030\006 \001(\r\022\016\n\005m" + - "agic\030\300> \001(\t*:\n\017CompressionKind\022\010\n\004NONE\020\000" + - "\022\010\n\004ZLIB\020\001\022\n\n\006SNAPPY\020\002\022\007\n\003LZO\020\003B\"\n org.a" + - "pache.hadoop.hive.ql.io.orc" + "lumns\030\002 \003(\0132\031.orc.proto.ColumnEncoding\022\026" + + "\n\016writerTimezone\030\003 \001(\t\"\341\002\n\004Type\022\"\n\004kind\030" + + "\001 \001(\0162\024.orc.proto.Type.Kind\022\024\n\010subtypes\030" + + "\002 \003(\rB\002\020\001\022\022\n\nfieldNames\030\003 \003(\t\022\025\n\rmaximum" + + "Length\030\004 \001(\r\022\021\n\tprecision\030\005 \001(\r\022\r\n\005scale" + + "\030\006 \001(\r\"\321\001\n\004Kind\022\013\n\007BOOLEAN\020\000\022\010\n\004BYTE\020\001\022\t" + + "\n\005SHORT\020\002\022\007\n\003INT\020\003\022\010\n\004LONG\020\004\022\t\n\005FLOAT\020\005\022" + + "\n\n\006DOUBLE\020\006\022\n\n\006STRING\020\007\022\n\n\006BINARY\020\010\022\r\n\tT", + "IMESTAMP\020\t\022\010\n\004LIST\020\n\022\007\n\003MAP\020\013\022\n\n\006STRUCT\020" + + "\014\022\t\n\005UNION\020\r\022\013\n\007DECIMAL\020\016\022\010\n\004DATE\020\017\022\013\n\007V" + + "ARCHAR\020\020\022\010\n\004CHAR\020\021\"x\n\021StripeInformation\022" + + "\016\n\006offset\030\001 \001(\004\022\023\n\013indexLength\030\002 \001(\004\022\022\n\n" + + "dataLength\030\003 \001(\004\022\024\n\014footerLength\030\004 \001(\004\022\024" + + "\n\014numberOfRows\030\005 \001(\004\"/\n\020UserMetadataItem" + + "\022\014\n\004name\030\001 \001(\t\022\r\n\005value\030\002 \001(\014\"A\n\020StripeS" + + "tatistics\022-\n\010colStats\030\001 \003(\0132\033.orc.proto." + + "ColumnStatistics\"<\n\010Metadata\0220\n\013stripeSt" + + "ats\030\001 \003(\0132\033.orc.proto.StripeStatistics\"\222", + "\002\n\006Footer\022\024\n\014headerLength\030\001 \001(\004\022\025\n\rconte" + + "ntLength\030\002 \001(\004\022-\n\007stripes\030\003 \003(\0132\034.orc.pr" + + "oto.StripeInformation\022\036\n\005types\030\004 \003(\0132\017.o" + + "rc.proto.Type\022-\n\010metadata\030\005 \003(\0132\033.orc.pr" + + "oto.UserMetadataItem\022\024\n\014numberOfRows\030\006 \001" + + "(\004\022/\n\nstatistics\030\007 \003(\0132\033.orc.proto.Colum" + + "nStatistics\022\026\n\016rowIndexStride\030\010 \001(\r\"\305\001\n\n" + + "PostScript\022\024\n\014footerLength\030\001 \001(\004\022/\n\013comp" + + "ression\030\002 \001(\0162\032.orc.proto.CompressionKin" + + "d\022\034\n\024compressionBlockSize\030\003 \001(\004\022\023\n\007versi", + "on\030\004 \003(\rB\002\020\001\022\026\n\016metadataLength\030\005 \001(\004\022\025\n\r" + + "writerVersion\030\006 \001(\r\022\016\n\005magic\030\300> \001(\t*:\n\017C" + + "ompressionKind\022\010\n\004NONE\020\000\022\010\n\004ZLIB\020\001\022\n\n\006SN" + + "APPY\020\002\022\007\n\003LZO\020\003B\"\n org.apache.hadoop.hiv" + + "e.ql.io.orc" }; com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner = new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() { @@ -19056,7 +19216,7 @@ public final class OrcProto { internal_static_orc_proto_StripeFooter_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_orc_proto_StripeFooter_descriptor, - new java.lang.String[] { "Streams", "Columns", }); + new java.lang.String[] { "Streams", "Columns", "WriterTimezone", }); internal_static_orc_proto_Type_descriptor = getDescriptor().getMessageTypes().get(16); internal_static_orc_proto_Type_fieldAccessorTable = new Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java?rev=1668942&r1=1668941&r2=1668942&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java Tue Mar 24 17:34:12 2015 @@ -50,7 +50,7 @@ import org.codehaus.jettison.json.JSONWr * A tool for printing out the file structure of ORC files. */ public final class FileDump { - private static final String ROWINDEX_PREFIX = "--rowindex="; + private static final String UNKNOWN = "UNKNOWN"; // not used private FileDump() {} @@ -77,9 +77,13 @@ public final class FileDump { } } + boolean printTimeZone = false; + if (cli.hasOption('t')) { + printTimeZone = true; + } String[] files = cli.getArgs(); if (dumpData) printData(Arrays.asList(files), conf); - else printMetaData(Arrays.asList(files), conf, rowIndexCols); + else printMetaData(Arrays.asList(files), conf, rowIndexCols, printTimeZone); } private static void printData(List files, Configuration conf) throws IOException, @@ -90,7 +94,7 @@ public final class FileDump { } private static void printMetaData(List files, Configuration conf, - List rowIndexCols) throws IOException { + List rowIndexCols, boolean printTimeZone) throws IOException { for (String filename : files) { System.out.println("Structure for " + filename); Path path = new Path(filename); @@ -125,11 +129,19 @@ public final class FileDump { for (StripeInformation stripe : reader.getStripes()) { ++stripeIx; long stripeStart = stripe.getOffset(); - System.out.println(" Stripe: " + stripe.toString()); OrcProto.StripeFooter footer = rows.readStripeFooter(stripe); + if (printTimeZone) { + String tz = footer.getWriterTimezone(); + if (tz == null || tz.isEmpty()) { + tz = UNKNOWN; + } + System.out.println(" Stripe: " + stripe.toString() + " timezone: " + tz); + } else { + System.out.println(" Stripe: " + stripe.toString()); + } long sectionStart = stripeStart; for(OrcProto.Stream section: footer.getStreamsList()) { - String kind = section.hasKind() ? section.getKind().name() : "UNKNOWN"; + String kind = section.hasKind() ? section.getKind().name() : UNKNOWN; System.out.println(" Stream: column " + section.getColumn() + " section " + kind + " start: " + sectionStart + " length " + section.getLength()); @@ -278,6 +290,13 @@ public final class FileDump { .withDescription("Should the data be printed") .create('d')); + // to avoid breaking unit tests (when run in different time zones) for file dump, printing + // of timezone is made optional + result.addOption(OptionBuilder + .withLongOpt("timezone") + .withDescription("Print writer's time zone") + .create('t')); + result.addOption(OptionBuilder .withLongOpt("help") .withDescription("print help message") Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java?rev=1668942&r1=1668941&r2=1668942&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java Tue Mar 24 17:34:12 2015 @@ -26,13 +26,15 @@ import java.math.BigInteger; import java.nio.ByteBuffer; import java.sql.Date; import java.sql.Timestamp; +import java.text.ParseException; +import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.LinkedHashMap; -import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.TimeZone; import org.apache.commons.lang.StringUtils; import org.apache.commons.lang3.exception.ExceptionUtils; @@ -55,8 +57,8 @@ import org.apache.hadoop.hive.ql.exec.ve import org.apache.hadoop.hive.ql.exec.vector.TimestampUtils; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; -import org.apache.hadoop.hive.ql.io.orc.RecordReaderUtils.ByteBufferAllocatorPool; import org.apache.hadoop.hive.ql.io.filters.BloomFilter; +import org.apache.hadoop.hive.ql.io.orc.RecordReaderUtils.ByteBufferAllocatorPool; import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue; @@ -78,8 +80,6 @@ import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; -import com.google.common.collect.Lists; - class RecordReaderImpl implements RecordReader { static final Log LOG = LogFactory.getLog(RecordReaderImpl.class); @@ -186,7 +186,7 @@ class RecordReaderImpl implements Record int bufferSize, long strideRate, Configuration conf - ) throws IOException { + ) throws IOException { this.path = path; this.file = fileSystem.open(path); this.codec = codec; @@ -291,9 +291,9 @@ class RecordReaderImpl implements Record } void startStripe(Map streams, - List encoding + OrcProto.StripeFooter stripeFooter ) throws IOException { - checkEncoding(encoding.get(columnId)); + checkEncoding(stripeFooter.getColumnsList().get(columnId)); InStream in = streams.get(new StreamName(columnId, OrcProto.Stream.Kind.PRESENT)); if (in == null) { @@ -390,9 +390,9 @@ class RecordReaderImpl implements Record @Override void startStripe(Map streams, - List encodings + OrcProto.StripeFooter stripeFooter ) throws IOException { - super.startStripe(streams, encodings); + super.startStripe(streams, stripeFooter); reader = new BitFieldReader(streams.get(new StreamName(columnId, OrcProto.Stream.Kind.DATA)), 1); } @@ -460,9 +460,9 @@ class RecordReaderImpl implements Record @Override void startStripe(Map streams, - List encodings + OrcProto.StripeFooter stripeFooter ) throws IOException { - super.startStripe(streams, encodings); + super.startStripe(streams, stripeFooter); reader = new RunLengthByteReader(streams.get(new StreamName(columnId, OrcProto.Stream.Kind.DATA))); } @@ -544,13 +544,13 @@ class RecordReaderImpl implements Record @Override void startStripe(Map streams, - List encodings + OrcProto.StripeFooter stripeFooter ) throws IOException { - super.startStripe(streams, encodings); + super.startStripe(streams, stripeFooter); StreamName name = new StreamName(columnId, OrcProto.Stream.Kind.DATA); - reader = createIntegerReader(encodings.get(columnId).getKind(), streams.get(name), true, - false); + reader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(), + streams.get(name), true, false); } @Override @@ -630,13 +630,13 @@ class RecordReaderImpl implements Record @Override void startStripe(Map streams, - List encodings + OrcProto.StripeFooter stripeFooter ) throws IOException { - super.startStripe(streams, encodings); + super.startStripe(streams, stripeFooter); StreamName name = new StreamName(columnId, OrcProto.Stream.Kind.DATA); - reader = createIntegerReader(encodings.get(columnId).getKind(), streams.get(name), true, - false); + reader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(), + streams.get(name), true, false); } @Override @@ -717,13 +717,13 @@ class RecordReaderImpl implements Record @Override void startStripe(Map streams, - List encodings + OrcProto.StripeFooter stripeFooter ) throws IOException { - super.startStripe(streams, encodings); + super.startStripe(streams, stripeFooter); StreamName name = new StreamName(columnId, OrcProto.Stream.Kind.DATA); - reader = createIntegerReader(encodings.get(columnId).getKind(), streams.get(name), true, - false); + reader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(), + streams.get(name), true, false); } @Override @@ -791,9 +791,9 @@ class RecordReaderImpl implements Record @Override void startStripe(Map streams, - List encodings + OrcProto.StripeFooter stripeFooter ) throws IOException { - super.startStripe(streams, encodings); + super.startStripe(streams, stripeFooter); StreamName name = new StreamName(columnId, OrcProto.Stream.Kind.DATA); stream = streams.get(name); @@ -883,9 +883,9 @@ class RecordReaderImpl implements Record @Override void startStripe(Map streams, - List encodings + OrcProto.StripeFooter stripeFooter ) throws IOException { - super.startStripe(streams, encodings); + super.startStripe(streams, stripeFooter); StreamName name = new StreamName(columnId, OrcProto.Stream.Kind.DATA); @@ -989,14 +989,14 @@ class RecordReaderImpl implements Record @Override void startStripe(Map streams, - List encodings + OrcProto.StripeFooter stripeFooter ) throws IOException { - super.startStripe(streams, encodings); + super.startStripe(streams, stripeFooter); StreamName name = new StreamName(columnId, OrcProto.Stream.Kind.DATA); stream = streams.get(name); - lengths = createIntegerReader(encodings.get(columnId).getKind(), streams.get(new - StreamName(columnId, OrcProto.Stream.Kind.LENGTH)), false, false); + lengths = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(), + streams.get(new StreamName(columnId, OrcProto.Stream.Kind.LENGTH)), false, false); } @Override @@ -1063,10 +1063,15 @@ class RecordReaderImpl implements Record } } - public static class TimestampTreeReader extends TreeReader { - protected IntegerReader data = null; - protected IntegerReader nanos = null; + private static class TimestampTreeReader extends TreeReader{ + private IntegerReader data = null; + private IntegerReader nanos = null; private final boolean skipCorrupt; + private Map baseTimestampMap; + private long base_timestamp; + private final TimeZone readerTimeZone; + private TimeZone writerTimeZone; + private boolean hasSameTZRules; TimestampTreeReader(int columnId, boolean skipCorrupt) throws IOException { this(columnId, null, null, null, null, skipCorrupt); @@ -1077,6 +1082,11 @@ class RecordReaderImpl implements Record throws IOException { super(columnId, presentStream); this.skipCorrupt = skipCorrupt; + this.baseTimestampMap = new HashMap<>(); + this.readerTimeZone = TimeZone.getDefault(); + this.writerTimeZone = readerTimeZone; + this.hasSameTZRules = writerTimeZone.hasSameRules(readerTimeZone); + this.base_timestamp = getBaseTimestamp(readerTimeZone.getID()); if (encoding != null) { checkEncoding(encoding); @@ -1101,15 +1111,42 @@ class RecordReaderImpl implements Record @Override void startStripe(Map streams, - List encodings + OrcProto.StripeFooter stripeFooter ) throws IOException { - super.startStripe(streams, encodings); - data = createIntegerReader(encodings.get(columnId).getKind(), + super.startStripe(streams, stripeFooter); + data = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(), streams.get(new StreamName(columnId, OrcProto.Stream.Kind.DATA)), true, skipCorrupt); - nanos = createIntegerReader(encodings.get(columnId).getKind(), + nanos = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(), streams.get(new StreamName(columnId, OrcProto.Stream.Kind.SECONDARY)), false, skipCorrupt); + base_timestamp = getBaseTimestamp(stripeFooter.getWriterTimezone()); + } + + private long getBaseTimestamp(String timeZoneId) throws IOException { + // to make sure new readers read old files in the same way + if (timeZoneId == null || timeZoneId.isEmpty()) { + timeZoneId = readerTimeZone.getID(); + } + + if (!baseTimestampMap.containsKey(timeZoneId)) { + writerTimeZone = TimeZone.getTimeZone(timeZoneId); + hasSameTZRules = writerTimeZone.hasSameRules(readerTimeZone); + SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + sdf.setTimeZone(writerTimeZone); + try { + long epoch = + sdf.parse(WriterImpl.BASE_TIMESTAMP_STRING).getTime() / WriterImpl.MILLIS_PER_SECOND; + baseTimestampMap.put(timeZoneId, epoch); + return epoch; + } catch (ParseException e) { + throw new IOException("Unable to create base timestamp", e); + } finally { + sdf.setTimeZone(readerTimeZone); + } + } + + return baseTimestampMap.get(timeZoneId); } @Override @@ -1134,9 +1171,7 @@ class RecordReaderImpl implements Record } else { result = (TimestampWritable) previous; } - Timestamp ts = new Timestamp(0); - long millis = (data.next() + WriterImpl.BASE_TIMESTAMP) * - WriterImpl.MILLIS_PER_SECOND; + long millis = (data.next() + base_timestamp) * WriterImpl.MILLIS_PER_SECOND; int newNanos = parseNanos(nanos.next()); // fix the rounding when we divided by 1000. if (millis >= 0) { @@ -1144,7 +1179,24 @@ class RecordReaderImpl implements Record } else { millis -= newNanos / 1000000; } - ts.setTime(millis); + long offset = 0; + // If reader and writer time zones have different rules, adjust the timezone difference + // between reader and writer taking day light savings into account. + if (!hasSameTZRules) { + offset = writerTimeZone.getOffset(millis) - readerTimeZone.getOffset(millis); + } + long adjustedMillis = millis + offset; + Timestamp ts = new Timestamp(adjustedMillis); + // Sometimes the reader timezone might have changed after adding the adjustedMillis. + // To account for that change, check for any difference in reader timezone after + // adding adjustedMillis. If so use the new offset (offset at adjustedMillis point of time). + if (!hasSameTZRules && + (readerTimeZone.getOffset(millis) != readerTimeZone.getOffset(adjustedMillis))) { + long newOffset = + writerTimeZone.getOffset(millis) - readerTimeZone.getOffset(adjustedMillis); + adjustedMillis = millis + newOffset; + ts.setTime(adjustedMillis); + } ts.setNanos(newNanos); result.set(ts); } @@ -1223,12 +1275,13 @@ class RecordReaderImpl implements Record @Override void startStripe(Map streams, - List encodings + OrcProto.StripeFooter stripeFooter ) throws IOException { - super.startStripe(streams, encodings); + super.startStripe(streams, stripeFooter); StreamName name = new StreamName(columnId, OrcProto.Stream.Kind.DATA); - reader = createIntegerReader(encodings.get(columnId).getKind(), streams.get(name), true, false); + reader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(), + streams.get(name), true, false); } @Override @@ -1317,13 +1370,13 @@ class RecordReaderImpl implements Record @Override void startStripe(Map streams, - List encodings + OrcProto.StripeFooter stripeFooter ) throws IOException { - super.startStripe(streams, encodings); + super.startStripe(streams, stripeFooter); valueStream = streams.get(new StreamName(columnId, OrcProto.Stream.Kind.DATA)); - scaleReader = createIntegerReader(encodings.get(columnId).getKind(), streams.get( - new StreamName(columnId, OrcProto.Stream.Kind.SECONDARY)), true, false); + scaleReader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(), + streams.get(new StreamName(columnId, OrcProto.Stream.Kind.SECONDARY)), true, false); } @Override @@ -1449,11 +1502,11 @@ class RecordReaderImpl implements Record @Override void startStripe(Map streams, - List encodings + OrcProto.StripeFooter stripeFooter ) throws IOException { // For each stripe, checks the encoding and initializes the appropriate // reader - switch (encodings.get(columnId).getKind()) { + switch (stripeFooter.getColumnsList().get(columnId).getKind()) { case DIRECT: case DIRECT_V2: reader = new StringDirectTreeReader(columnId); @@ -1464,9 +1517,9 @@ class RecordReaderImpl implements Record break; default: throw new IllegalArgumentException("Unsupported encoding " + - encodings.get(columnId).getKind()); + stripeFooter.getColumnsList().get(columnId).getKind()); } - reader.startStripe(streams, encodings); + reader.startStripe(streams, stripeFooter); } @Override @@ -1599,13 +1652,13 @@ class RecordReaderImpl implements Record @Override void startStripe(Map streams, - List encodings + OrcProto.StripeFooter stripeFooter ) throws IOException { - super.startStripe(streams, encodings); + super.startStripe(streams, stripeFooter); StreamName name = new StreamName(columnId, OrcProto.Stream.Kind.DATA); stream = streams.get(name); - lengths = createIntegerReader(encodings.get(columnId).getKind(), + lengths = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(), streams.get(new StreamName(columnId, OrcProto.Stream.Kind.LENGTH)), false, false); } @@ -1720,9 +1773,9 @@ class RecordReaderImpl implements Record @Override void startStripe(Map streams, - List encodings + OrcProto.StripeFooter stripeFooter ) throws IOException { - super.startStripe(streams, encodings); + super.startStripe(streams, stripeFooter); // read the dictionary blob StreamName name = new StreamName(columnId, @@ -1733,11 +1786,11 @@ class RecordReaderImpl implements Record // read the lengths name = new StreamName(columnId, OrcProto.Stream.Kind.LENGTH); in = streams.get(name); - readDictionaryLengthStream(in, encodings.get(columnId)); + readDictionaryLengthStream(in, stripeFooter.getColumnsList().get(columnId)); // set up the row reader name = new StreamName(columnId, OrcProto.Stream.Kind.DATA); - reader = createIntegerReader(encodings.get(columnId).getKind(), + reader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(), streams.get(name), false, false); } @@ -2108,12 +2161,12 @@ class RecordReaderImpl implements Record @Override void startStripe(Map streams, - List encodings + OrcProto.StripeFooter stripeFooter ) throws IOException { - super.startStripe(streams, encodings); + super.startStripe(streams, stripeFooter); for(TreeReader field: fields) { if (field != null) { - field.startStripe(streams, encodings); + field.startStripe(streams, stripeFooter); } } } @@ -2184,14 +2237,14 @@ class RecordReaderImpl implements Record @Override void startStripe(Map streams, - List encodings + OrcProto.StripeFooter stripeFooter ) throws IOException { - super.startStripe(streams, encodings); + super.startStripe(streams, stripeFooter); tags = new RunLengthByteReader(streams.get(new StreamName(columnId, OrcProto.Stream.Kind.DATA))); for(TreeReader field: fields) { if (field != null) { - field.startStripe(streams, encodings); + field.startStripe(streams, stripeFooter); } } } @@ -2276,14 +2329,14 @@ class RecordReaderImpl implements Record @Override void startStripe(Map streams, - List encodings + OrcProto.StripeFooter stripeFooter ) throws IOException { - super.startStripe(streams, encodings); - lengths = createIntegerReader(encodings.get(columnId).getKind(), + super.startStripe(streams, stripeFooter); + lengths = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(), streams.get(new StreamName(columnId, OrcProto.Stream.Kind.LENGTH)), false, false); if (elementReader != null) { - elementReader.startStripe(streams, encodings); + elementReader.startStripe(streams, stripeFooter); } } @@ -2370,17 +2423,17 @@ class RecordReaderImpl implements Record @Override void startStripe(Map streams, - List encodings + OrcProto.StripeFooter stripeFooter ) throws IOException { - super.startStripe(streams, encodings); - lengths = createIntegerReader(encodings.get(columnId).getKind(), + super.startStripe(streams, stripeFooter); + lengths = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(), streams.get(new StreamName(columnId, OrcProto.Stream.Kind.LENGTH)), false, false); if (keyReader != null) { - keyReader.startStripe(streams, encodings); + keyReader.startStripe(streams, stripeFooter); } if (valueReader != null) { - valueReader.startStripe(streams, encodings); + valueReader.startStripe(streams, stripeFooter); } } @@ -2976,7 +3029,7 @@ class RecordReaderImpl implements Record } else { readPartialDataStreams(stripe); } - reader.startStripe(streams, stripeFooter.getColumnsList()); + reader.startStripe(streams, stripeFooter); // if we skipped the first row group, move the pointers forward if (rowInStripe != 0) { seekToRowEntry(reader, (int) (rowInStripe / rowIndexStride)); Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java?rev=1668942&r1=1668941&r2=1668942&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java Tue Mar 24 17:34:12 2015 @@ -26,10 +26,10 @@ import java.lang.management.ManagementFa import java.nio.ByteBuffer; import java.sql.Timestamp; import java.util.ArrayList; -import java.util.Arrays; import java.util.EnumSet; import java.util.List; import java.util.Map; +import java.util.TimeZone; import java.util.TreeMap; import org.apache.commons.logging.Log; @@ -796,6 +796,7 @@ public class WriterImpl implements Write foundNulls = false; builder.addColumns(getEncoding()); + builder.setWriterTimezone(TimeZone.getDefault().getID()); if (rowIndexStream != null) { if (rowIndex.getEntryCount() != requiredIndexEntries) { throw new IllegalArgumentException("Column has wrong number of " + @@ -1511,13 +1512,13 @@ public class WriterImpl implements Write } static final int MILLIS_PER_SECOND = 1000; - static final long BASE_TIMESTAMP = - Timestamp.valueOf("2015-01-01 00:00:00").getTime() / MILLIS_PER_SECOND; + static final String BASE_TIMESTAMP_STRING = "2015-01-01 00:00:00"; private static class TimestampTreeWriter extends TreeWriter { private final IntegerWriter seconds; private final IntegerWriter nanos; private final boolean isDirectV2; + private final long base_timestamp; TimestampTreeWriter(int columnId, ObjectInspector inspector, @@ -1530,6 +1531,8 @@ public class WriterImpl implements Write this.nanos = createIntegerWriter(writer.createStream(id, OrcProto.Stream.Kind.SECONDARY), false, isDirectV2, writer); recordPosition(rowIndexPosition); + // for unit tests to set different time zones + this.base_timestamp = Timestamp.valueOf(BASE_TIMESTAMP_STRING).getTime() / MILLIS_PER_SECOND; } @Override @@ -1550,7 +1553,7 @@ public class WriterImpl implements Write ((TimestampObjectInspector) inspector). getPrimitiveJavaObject(obj); indexStatistics.updateTimestamp(val); - seconds.write((val.getTime() / MILLIS_PER_SECOND) - BASE_TIMESTAMP); + seconds.write((val.getTime() / MILLIS_PER_SECOND) - base_timestamp); nanos.write(formatNanos(val.getNanos())); if (createBloomFilter) { bloomFilter.addLong(val.getTime()); Modified: hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto?rev=1668942&r1=1668941&r2=1668942&view=diff ============================================================================== --- hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto (original) +++ hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto Tue Mar 24 17:34:12 2015 @@ -129,6 +129,7 @@ message ColumnEncoding { message StripeFooter { repeated Stream streams = 1; repeated ColumnEncoding columns = 2; + optional string writerTimezone = 3; } message Type { Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java?rev=1668942&r1=1668941&r2=1668942&view=diff ============================================================================== --- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java (original) +++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java Tue Mar 24 17:34:12 2015 @@ -1638,14 +1638,14 @@ public class TestInputOutputFormat { assertEquals("mock:/combinationAcid/p=0/base_0000010/bucket_00000", split.getPath().toString()); assertEquals(0, split.getStart()); - assertEquals(607, split.getLength()); + assertEquals(625, split.getLength()); split = (HiveInputFormat.HiveInputSplit) splits[1]; assertEquals("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat", split.inputFormatClassName()); assertEquals("mock:/combinationAcid/p=0/base_0000010/bucket_00001", split.getPath().toString()); assertEquals(0, split.getStart()); - assertEquals(629, split.getLength()); + assertEquals(647, split.getLength()); CombineHiveInputFormat.CombineHiveInputSplit combineSplit = (CombineHiveInputFormat.CombineHiveInputSplit) splits[2]; assertEquals(BUCKETS, combineSplit.getNumPaths()); @@ -1653,7 +1653,7 @@ public class TestInputOutputFormat { assertEquals("mock:/combinationAcid/p=1/00000" + bucket + "_0", combineSplit.getPath(bucket).toString()); assertEquals(0, combineSplit.getOffset(bucket)); - assertEquals(241, combineSplit.getLength(bucket)); + assertEquals(253, combineSplit.getLength(bucket)); } String[] hosts = combineSplit.getLocations(); assertEquals(2, hosts.length); Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcTimezone1.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcTimezone1.java?rev=1668942&view=auto ============================================================================== --- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcTimezone1.java (added) +++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcTimezone1.java Tue Mar 24 17:34:12 2015 @@ -0,0 +1,194 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.orc; + +import static junit.framework.Assert.assertEquals; +import static junit.framework.Assert.assertNotNull; + +import java.io.File; +import java.sql.Timestamp; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.TimeZone; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; +import org.apache.hive.common.util.HiveTestUtils; +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TestName; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import com.google.common.collect.Lists; + +/** + * + */ +@RunWith(Parameterized.class) +public class TestOrcTimezone1 { + Path workDir = new Path(System.getProperty("test.tmp.dir", + "target" + File.separator + "test" + File.separator + "tmp")); + Configuration conf; + FileSystem fs; + Path testFilePath; + String writerTimeZone; + String readerTimeZone; + static TimeZone defaultTimeZone = TimeZone.getDefault(); + + public TestOrcTimezone1(String writerTZ, String readerTZ) { + this.writerTimeZone = writerTZ; + this.readerTimeZone = readerTZ; + } + + @Parameterized.Parameters + public static Collection data() { + List result = Arrays.asList(new Object[][]{ + /* Extreme timezones */ + {"GMT-12:00", "GMT+14:00"}, + /* No difference in DST */ + {"America/Los_Angeles", "America/Los_Angeles"}, /* same timezone both with DST */ + {"Europe/Berlin", "Europe/Berlin"}, /* same as above but europe */ + {"America/Phoenix", "Asia/Kolkata"} /* Writer no DST, Reader no DST */, + {"Europe/Berlin", "America/Los_Angeles"} /* Writer DST, Reader DST */, + {"Europe/Berlin", "America/Chicago"} /* Writer DST, Reader DST */, + /* With DST difference */ + {"Europe/Berlin", "UTC"}, + {"UTC", "Europe/Berlin"} /* Writer no DST, Reader DST */, + {"America/Los_Angeles", "Asia/Kolkata"} /* Writer DST, Reader no DST */, + {"Europe/Berlin", "Asia/Kolkata"} /* Writer DST, Reader no DST */, + /* Timezone offsets for the reader has changed historically */ + {"Asia/Saigon", "Pacific/Enderbury"}, + {"UTC", "Asia/Jerusalem"}, + + // NOTE: + // "1995-01-01 03:00:00.688888888" this is not a valid time in Pacific/Enderbury timezone. + // On 1995-01-01 00:00:00 GMT offset moved from -11:00 hr to +13:00 which makes all values + // on 1995-01-01 invalid. Try this with joda time + // new MutableDateTime("1995-01-01", DateTimeZone.forTimeZone(readerTimeZone)); + }); + return result; + } + + @Rule + public TestName testCaseName = new TestName(); + + @Before + public void openFileSystem() throws Exception { + conf = new Configuration(); + fs = FileSystem.getLocal(conf); + testFilePath = new Path(workDir, "TestOrcFile." + + testCaseName.getMethodName() + ".orc"); + fs.delete(testFilePath, false); + } + + @After + public void restoreTimeZone() { + TimeZone.setDefault(defaultTimeZone); + } + + @Test + public void testTimestampWriter() throws Exception { + ObjectInspector inspector; + synchronized (TestOrcFile.class) { + inspector = ObjectInspectorFactory.getReflectionObjectInspector(Timestamp.class, + ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + } + + TimeZone.setDefault(TimeZone.getTimeZone(writerTimeZone)); + Writer writer = OrcFile.createWriter(testFilePath, + OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000).bufferSize(10000)); + assertEquals(writerTimeZone, TimeZone.getDefault().getID()); + List ts = Lists.newArrayList(); + ts.add("2003-01-01 01:00:00.000000222"); + ts.add("1996-08-02 09:00:00.723100809"); + ts.add("1999-01-01 02:00:00.999999999"); + ts.add("1995-01-02 03:00:00.688888888"); + ts.add("2002-01-01 04:00:00.1"); + ts.add("2010-03-02 05:00:00.000009001"); + ts.add("2005-01-01 06:00:00.000002229"); + ts.add("2006-01-01 07:00:00.900203003"); + ts.add("2003-01-01 08:00:00.800000007"); + ts.add("1998-11-02 10:00:00.857340643"); + ts.add("2008-10-02 11:00:00.0"); + ts.add("9999-01-01 00:00:00.000999"); + ts.add("2014-03-28 00:00:00.0"); + for (String t : ts) { + writer.addRow(Timestamp.valueOf(t)); + } + writer.close(); + + TimeZone.setDefault(TimeZone.getTimeZone(readerTimeZone)); + Reader reader = OrcFile.createReader(testFilePath, + OrcFile.readerOptions(conf).filesystem(fs)); + assertEquals(readerTimeZone, TimeZone.getDefault().getID()); + RecordReader rows = reader.rows(null); + int idx = 0; + while (rows.hasNext()) { + Object row = rows.next(null); + Timestamp got = ((TimestampWritable) row).getTimestamp(); + assertEquals(ts.get(idx++), got.toString()); + } + rows.close(); + } + + //@Test + public void testReadTimestampFormat_0_11() throws Exception { + TimeZone.setDefault(TimeZone.getTimeZone(readerTimeZone)); + Path oldFilePath = + new Path(HiveTestUtils.getFileFromClasspath("orc-file-11-format.orc")); + Reader reader = OrcFile.createReader(oldFilePath, + OrcFile.readerOptions(conf).filesystem(fs)); + + StructObjectInspector readerInspector = (StructObjectInspector) reader + .getObjectInspector(); + List fields = readerInspector + .getAllStructFieldRefs(); + TimestampObjectInspector tso = (TimestampObjectInspector) readerInspector + .getStructFieldRef("ts").getFieldObjectInspector(); + + RecordReader rows = reader.rows(); + Object row = rows.next(null); + assertNotNull(row); + assertEquals(Timestamp.valueOf("2000-03-12 15:00:00"), + tso.getPrimitiveJavaObject(readerInspector.getStructFieldData(row, + fields.get(12)))); + + // check the contents of second row + assertEquals(true, rows.hasNext()); + rows.seekToRow(7499); + row = rows.next(null); + assertEquals(Timestamp.valueOf("2000-03-12 15:00:01"), + tso.getPrimitiveJavaObject(readerInspector.getStructFieldData(row, + fields.get(12)))); + + // handle the close up + assertEquals(false, rows.hasNext()); + rows.close(); + } +} Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcTimezone2.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcTimezone2.java?rev=1668942&view=auto ============================================================================== --- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcTimezone2.java (added) +++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcTimezone2.java Tue Mar 24 17:34:12 2015 @@ -0,0 +1,142 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.orc; + +import static junit.framework.Assert.assertEquals; + +import java.io.File; +import java.sql.Timestamp; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.Random; +import java.util.TimeZone; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TestName; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import com.google.common.collect.Lists; + +/** + * + */ +@RunWith(Parameterized.class) +public class TestOrcTimezone2 { + Path workDir = new Path(System.getProperty("test.tmp.dir", + "target" + File.separator + "test" + File.separator + "tmp")); + Configuration conf; + FileSystem fs; + Path testFilePath; + String writerTimeZone; + String readerTimeZone; + static TimeZone defaultTimeZone = TimeZone.getDefault(); + + public TestOrcTimezone2(String writerTZ, String readerTZ) { + this.writerTimeZone = writerTZ; + this.readerTimeZone = readerTZ; + } + + @Parameterized.Parameters + public static Collection data() { + String[] allTimeZones = TimeZone.getAvailableIDs(); + Random rand = new Random(123); + int len = allTimeZones.length; + int n = 500; + Object[][] data = new Object[n][]; + for (int i = 0; i < n; i++) { + int wIdx = rand.nextInt(len); + int rIdx = rand.nextInt(len); + data[i] = new Object[2]; + data[i][0] = allTimeZones[wIdx]; + data[i][1] = allTimeZones[rIdx]; + } + return Arrays.asList(data); + } + + @Rule + public TestName testCaseName = new TestName(); + + @Before + public void openFileSystem() throws Exception { + conf = new Configuration(); + fs = FileSystem.getLocal(conf); + testFilePath = new Path(workDir, "TestOrcFile." + + testCaseName.getMethodName() + ".orc"); + fs.delete(testFilePath, false); + } + + @After + public void restoreTimeZone() { + TimeZone.setDefault(defaultTimeZone); + } + + @Test + public void testTimestampWriter() throws Exception { + ObjectInspector inspector; + synchronized (TestOrcFile.class) { + inspector = ObjectInspectorFactory.getReflectionObjectInspector(Timestamp.class, + ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + } + + TimeZone.setDefault(TimeZone.getTimeZone(writerTimeZone)); + Writer writer = OrcFile.createWriter(testFilePath, + OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000).bufferSize(10000)); + assertEquals(writerTimeZone, TimeZone.getDefault().getID()); + List ts = Lists.newArrayList(); + ts.add("2003-01-01 01:00:00.000000222"); + ts.add("1999-01-01 02:00:00.999999999"); + ts.add("1995-01-02 03:00:00.688888888"); + ts.add("2002-01-01 04:00:00.1"); + ts.add("2010-03-02 05:00:00.000009001"); + ts.add("2005-01-01 06:00:00.000002229"); + ts.add("2006-01-01 07:00:00.900203003"); + ts.add("2003-01-01 08:00:00.800000007"); + ts.add("1996-08-02 09:00:00.723100809"); + ts.add("1998-11-02 10:00:00.857340643"); + ts.add("2008-10-02 11:00:00.0"); + ts.add("9999-01-01 00:00:00.000999"); + for (String t : ts) { + writer.addRow(Timestamp.valueOf(t)); + } + writer.close(); + + TimeZone.setDefault(TimeZone.getTimeZone(readerTimeZone)); + Reader reader = OrcFile.createReader(testFilePath, + OrcFile.readerOptions(conf).filesystem(fs)); + assertEquals(readerTimeZone, TimeZone.getDefault().getID()); + RecordReader rows = reader.rows(null); + int idx = 0; + while (rows.hasNext()) { + Object row = rows.next(null); + Timestamp got = ((TimestampWritable) row).getTimestamp(); + assertEquals(ts.get(idx++), got.toString()); + } + rows.close(); + } +} Modified: hive/trunk/ql/src/test/resources/orc-file-dump-bloomfilter.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/resources/orc-file-dump-bloomfilter.out?rev=1668942&r1=1668941&r2=1668942&view=diff ============================================================================== --- hive/trunk/ql/src/test/resources/orc-file-dump-bloomfilter.out (original) +++ hive/trunk/ql/src/test/resources/orc-file-dump-bloomfilter.out Tue Mar 24 17:34:12 2015 @@ -39,7 +39,7 @@ File Statistics: Column 3: count: 21000 hasNull: false min: Darkness, max: worst sum: 81761 Stripes: - Stripe: offset: 3 data: 63765 rows: 5000 tail: 86 index: 845 + Stripe: offset: 3 data: 63765 rows: 5000 tail: 98 index: 845 Stream: column 0 section ROW_INDEX start: 3 length 17 Stream: column 1 section ROW_INDEX start: 20 length 164 Stream: column 2 section ROW_INDEX start: 184 length 173 @@ -67,17 +67,17 @@ Stripes: Entry 3: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 Entry 4: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 - Stripe: offset: 64699 data: 63754 rows: 5000 tail: 86 index: 837 - Stream: column 0 section ROW_INDEX start: 64699 length 17 - Stream: column 1 section ROW_INDEX start: 64716 length 162 - Stream: column 2 section ROW_INDEX start: 64878 length 171 - Stream: column 3 section ROW_INDEX start: 65049 length 83 - Stream: column 3 section BLOOM_FILTER start: 65132 length 404 - Stream: column 1 section DATA start: 65536 length 20029 - Stream: column 2 section DATA start: 85565 length 40035 - Stream: column 3 section DATA start: 125600 length 3532 - Stream: column 3 section LENGTH start: 129132 length 25 - Stream: column 3 section DICTIONARY_DATA start: 129157 length 133 + Stripe: offset: 64711 data: 63754 rows: 5000 tail: 98 index: 837 + Stream: column 0 section ROW_INDEX start: 64711 length 17 + Stream: column 1 section ROW_INDEX start: 64728 length 162 + Stream: column 2 section ROW_INDEX start: 64890 length 171 + Stream: column 3 section ROW_INDEX start: 65061 length 83 + Stream: column 3 section BLOOM_FILTER start: 65144 length 404 + Stream: column 1 section DATA start: 65548 length 20029 + Stream: column 2 section DATA start: 85577 length 40035 + Stream: column 3 section DATA start: 125612 length 3532 + Stream: column 3 section LENGTH start: 129144 length 25 + Stream: column 3 section DICTIONARY_DATA start: 129169 length 133 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 @@ -95,17 +95,17 @@ Stripes: Entry 3: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 Entry 4: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 - Stripe: offset: 129376 data: 63766 rows: 5000 tail: 86 index: 841 - Stream: column 0 section ROW_INDEX start: 129376 length 17 - Stream: column 1 section ROW_INDEX start: 129393 length 159 - Stream: column 2 section ROW_INDEX start: 129552 length 171 - Stream: column 3 section ROW_INDEX start: 129723 length 90 - Stream: column 3 section BLOOM_FILTER start: 129813 length 404 - Stream: column 1 section DATA start: 130217 length 20029 - Stream: column 2 section DATA start: 150246 length 40035 - Stream: column 3 section DATA start: 190281 length 3544 - Stream: column 3 section LENGTH start: 193825 length 25 - Stream: column 3 section DICTIONARY_DATA start: 193850 length 133 + Stripe: offset: 129400 data: 63766 rows: 5000 tail: 98 index: 841 + Stream: column 0 section ROW_INDEX start: 129400 length 17 + Stream: column 1 section ROW_INDEX start: 129417 length 159 + Stream: column 2 section ROW_INDEX start: 129576 length 171 + Stream: column 3 section ROW_INDEX start: 129747 length 90 + Stream: column 3 section BLOOM_FILTER start: 129837 length 404 + Stream: column 1 section DATA start: 130241 length 20029 + Stream: column 2 section DATA start: 150270 length 40035 + Stream: column 3 section DATA start: 190305 length 3544 + Stream: column 3 section LENGTH start: 193849 length 25 + Stream: column 3 section DICTIONARY_DATA start: 193874 length 133 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 @@ -123,17 +123,17 @@ Stripes: Entry 3: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 Entry 4: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 - Stripe: offset: 194069 data: 63796 rows: 5000 tail: 86 index: 844 - Stream: column 0 section ROW_INDEX start: 194069 length 17 - Stream: column 1 section ROW_INDEX start: 194086 length 162 - Stream: column 2 section ROW_INDEX start: 194248 length 170 - Stream: column 3 section ROW_INDEX start: 194418 length 91 - Stream: column 3 section BLOOM_FILTER start: 194509 length 404 - Stream: column 1 section DATA start: 194913 length 20029 - Stream: column 2 section DATA start: 214942 length 40035 - Stream: column 3 section DATA start: 254977 length 3574 - Stream: column 3 section LENGTH start: 258551 length 25 - Stream: column 3 section DICTIONARY_DATA start: 258576 length 133 + Stripe: offset: 194105 data: 63796 rows: 5000 tail: 98 index: 844 + Stream: column 0 section ROW_INDEX start: 194105 length 17 + Stream: column 1 section ROW_INDEX start: 194122 length 162 + Stream: column 2 section ROW_INDEX start: 194284 length 170 + Stream: column 3 section ROW_INDEX start: 194454 length 91 + Stream: column 3 section BLOOM_FILTER start: 194545 length 404 + Stream: column 1 section DATA start: 194949 length 20029 + Stream: column 2 section DATA start: 214978 length 40035 + Stream: column 3 section DATA start: 255013 length 3574 + Stream: column 3 section LENGTH start: 258587 length 25 + Stream: column 3 section DICTIONARY_DATA start: 258612 length 133 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 @@ -151,17 +151,17 @@ Stripes: Entry 3: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 Entry 4: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 - Stripe: offset: 258795 data: 12940 rows: 1000 tail: 78 index: 432 - Stream: column 0 section ROW_INDEX start: 258795 length 12 - Stream: column 1 section ROW_INDEX start: 258807 length 38 - Stream: column 2 section ROW_INDEX start: 258845 length 41 - Stream: column 3 section ROW_INDEX start: 258886 length 40 - Stream: column 3 section BLOOM_FILTER start: 258926 length 301 - Stream: column 1 section DATA start: 259227 length 4007 - Stream: column 2 section DATA start: 263234 length 8007 - Stream: column 3 section DATA start: 271241 length 768 - Stream: column 3 section LENGTH start: 272009 length 25 - Stream: column 3 section DICTIONARY_DATA start: 272034 length 133 + Stripe: offset: 258843 data: 12940 rows: 1000 tail: 90 index: 432 + Stream: column 0 section ROW_INDEX start: 258843 length 12 + Stream: column 1 section ROW_INDEX start: 258855 length 38 + Stream: column 2 section ROW_INDEX start: 258893 length 41 + Stream: column 3 section ROW_INDEX start: 258934 length 40 + Stream: column 3 section BLOOM_FILTER start: 258974 length 301 + Stream: column 1 section DATA start: 259275 length 4007 + Stream: column 2 section DATA start: 263282 length 8007 + Stream: column 3 section DATA start: 271289 length 768 + Stream: column 3 section LENGTH start: 272057 length 25 + Stream: column 3 section DICTIONARY_DATA start: 272082 length 133 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 @@ -172,6 +172,6 @@ Stripes: Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 -File length: 272790 bytes +File length: 272850 bytes Padding length: 0 bytes Padding ratio: 0% Modified: hive/trunk/ql/src/test/resources/orc-file-dump-bloomfilter2.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/resources/orc-file-dump-bloomfilter2.out?rev=1668942&r1=1668941&r2=1668942&view=diff ============================================================================== --- hive/trunk/ql/src/test/resources/orc-file-dump-bloomfilter2.out (original) +++ hive/trunk/ql/src/test/resources/orc-file-dump-bloomfilter2.out Tue Mar 24 17:34:12 2015 @@ -39,7 +39,7 @@ File Statistics: Column 3: count: 21000 hasNull: false min: Darkness, max: worst sum: 81761 Stripes: - Stripe: offset: 3 data: 63765 rows: 5000 tail: 85 index: 6935 + Stripe: offset: 3 data: 63765 rows: 5000 tail: 97 index: 6935 Stream: column 0 section ROW_INDEX start: 3 length 17 Stream: column 1 section ROW_INDEX start: 20 length 164 Stream: column 2 section ROW_INDEX start: 184 length 173 @@ -67,17 +67,17 @@ Stripes: Entry 3: numHashFunctions: 7 bitCount: 9600 popCount: 4971 loadFactor: 0.5178 expectedFpp: 0.009981772 Entry 4: numHashFunctions: 7 bitCount: 9600 popCount: 4949 loadFactor: 0.5155 expectedFpp: 0.009676614 Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 9347 loadFactor: 0.9736 expectedFpp: 0.829482 - Stripe: offset: 70788 data: 63754 rows: 5000 tail: 85 index: 6917 - Stream: column 0 section ROW_INDEX start: 70788 length 17 - Stream: column 1 section ROW_INDEX start: 70805 length 162 - Stream: column 2 section ROW_INDEX start: 70967 length 171 - Stream: column 2 section BLOOM_FILTER start: 71138 length 6484 - Stream: column 3 section ROW_INDEX start: 77622 length 83 - Stream: column 1 section DATA start: 77705 length 20029 - Stream: column 2 section DATA start: 97734 length 40035 - Stream: column 3 section DATA start: 137769 length 3532 - Stream: column 3 section LENGTH start: 141301 length 25 - Stream: column 3 section DICTIONARY_DATA start: 141326 length 133 + Stripe: offset: 70800 data: 63754 rows: 5000 tail: 97 index: 6917 + Stream: column 0 section ROW_INDEX start: 70800 length 17 + Stream: column 1 section ROW_INDEX start: 70817 length 162 + Stream: column 2 section ROW_INDEX start: 70979 length 171 + Stream: column 2 section BLOOM_FILTER start: 71150 length 6484 + Stream: column 3 section ROW_INDEX start: 77634 length 83 + Stream: column 1 section DATA start: 77717 length 20029 + Stream: column 2 section DATA start: 97746 length 40035 + Stream: column 3 section DATA start: 137781 length 3532 + Stream: column 3 section LENGTH start: 141313 length 25 + Stream: column 3 section DICTIONARY_DATA start: 141338 length 133 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 @@ -95,17 +95,17 @@ Stripes: Entry 3: numHashFunctions: 7 bitCount: 9600 popCount: 4962 loadFactor: 0.5169 expectedFpp: 0.009855959 Entry 4: numHashFunctions: 7 bitCount: 9600 popCount: 4966 loadFactor: 0.5173 expectedFpp: 0.009911705 Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 9344 loadFactor: 0.9733 expectedFpp: 0.8276205 - Stripe: offset: 141544 data: 63766 rows: 5000 tail: 85 index: 6924 - Stream: column 0 section ROW_INDEX start: 141544 length 17 - Stream: column 1 section ROW_INDEX start: 141561 length 159 - Stream: column 2 section ROW_INDEX start: 141720 length 171 - Stream: column 2 section BLOOM_FILTER start: 141891 length 6487 - Stream: column 3 section ROW_INDEX start: 148378 length 90 - Stream: column 1 section DATA start: 148468 length 20029 - Stream: column 2 section DATA start: 168497 length 40035 - Stream: column 3 section DATA start: 208532 length 3544 - Stream: column 3 section LENGTH start: 212076 length 25 - Stream: column 3 section DICTIONARY_DATA start: 212101 length 133 + Stripe: offset: 141568 data: 63766 rows: 5000 tail: 97 index: 6924 + Stream: column 0 section ROW_INDEX start: 141568 length 17 + Stream: column 1 section ROW_INDEX start: 141585 length 159 + Stream: column 2 section ROW_INDEX start: 141744 length 171 + Stream: column 2 section BLOOM_FILTER start: 141915 length 6487 + Stream: column 3 section ROW_INDEX start: 148402 length 90 + Stream: column 1 section DATA start: 148492 length 20029 + Stream: column 2 section DATA start: 168521 length 40035 + Stream: column 3 section DATA start: 208556 length 3544 + Stream: column 3 section LENGTH start: 212100 length 25 + Stream: column 3 section DICTIONARY_DATA start: 212125 length 133 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 @@ -123,17 +123,17 @@ Stripes: Entry 3: numHashFunctions: 7 bitCount: 9600 popCount: 4943 loadFactor: 0.5149 expectedFpp: 0.009594797 Entry 4: numHashFunctions: 7 bitCount: 9600 popCount: 4930 loadFactor: 0.5135 expectedFpp: 0.009419539 Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 9333 loadFactor: 0.9722 expectedFpp: 0.82082444 - Stripe: offset: 212319 data: 63796 rows: 5000 tail: 85 index: 6925 - Stream: column 0 section ROW_INDEX start: 212319 length 17 - Stream: column 1 section ROW_INDEX start: 212336 length 162 - Stream: column 2 section ROW_INDEX start: 212498 length 170 - Stream: column 2 section BLOOM_FILTER start: 212668 length 6485 - Stream: column 3 section ROW_INDEX start: 219153 length 91 - Stream: column 1 section DATA start: 219244 length 20029 - Stream: column 2 section DATA start: 239273 length 40035 - Stream: column 3 section DATA start: 279308 length 3574 - Stream: column 3 section LENGTH start: 282882 length 25 - Stream: column 3 section DICTIONARY_DATA start: 282907 length 133 + Stripe: offset: 212355 data: 63796 rows: 5000 tail: 97 index: 6925 + Stream: column 0 section ROW_INDEX start: 212355 length 17 + Stream: column 1 section ROW_INDEX start: 212372 length 162 + Stream: column 2 section ROW_INDEX start: 212534 length 170 + Stream: column 2 section BLOOM_FILTER start: 212704 length 6485 + Stream: column 3 section ROW_INDEX start: 219189 length 91 + Stream: column 1 section DATA start: 219280 length 20029 + Stream: column 2 section DATA start: 239309 length 40035 + Stream: column 3 section DATA start: 279344 length 3574 + Stream: column 3 section LENGTH start: 282918 length 25 + Stream: column 3 section DICTIONARY_DATA start: 282943 length 133 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 @@ -151,17 +151,17 @@ Stripes: Entry 3: numHashFunctions: 7 bitCount: 9600 popCount: 4941 loadFactor: 0.5147 expectedFpp: 0.009567649 Entry 4: numHashFunctions: 7 bitCount: 9600 popCount: 4993 loadFactor: 0.5201 expectedFpp: 0.010295142 Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 9353 loadFactor: 0.9743 expectedFpp: 0.8332165 - Stripe: offset: 283125 data: 12940 rows: 1000 tail: 78 index: 1468 - Stream: column 0 section ROW_INDEX start: 283125 length 12 - Stream: column 1 section ROW_INDEX start: 283137 length 38 - Stream: column 2 section ROW_INDEX start: 283175 length 41 - Stream: column 2 section BLOOM_FILTER start: 283216 length 1337 - Stream: column 3 section ROW_INDEX start: 284553 length 40 - Stream: column 1 section DATA start: 284593 length 4007 - Stream: column 2 section DATA start: 288600 length 8007 - Stream: column 3 section DATA start: 296607 length 768 - Stream: column 3 section LENGTH start: 297375 length 25 - Stream: column 3 section DICTIONARY_DATA start: 297400 length 133 + Stripe: offset: 283173 data: 12940 rows: 1000 tail: 90 index: 1468 + Stream: column 0 section ROW_INDEX start: 283173 length 12 + Stream: column 1 section ROW_INDEX start: 283185 length 38 + Stream: column 2 section ROW_INDEX start: 283223 length 41 + Stream: column 2 section BLOOM_FILTER start: 283264 length 1337 + Stream: column 3 section ROW_INDEX start: 284601 length 40 + Stream: column 1 section DATA start: 284641 length 4007 + Stream: column 2 section DATA start: 288648 length 8007 + Stream: column 3 section DATA start: 296655 length 768 + Stream: column 3 section LENGTH start: 297423 length 25 + Stream: column 3 section DICTIONARY_DATA start: 297448 length 133 Encoding column 0: DIRECT Encoding column 1: DIRECT_V2 Encoding column 2: DIRECT_V2 @@ -172,6 +172,6 @@ Stripes: Entry 0: numHashFunctions: 7 bitCount: 9600 popCount: 4948 loadFactor: 0.5154 expectedFpp: 0.00966294 Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 4948 loadFactor: 0.5154 expectedFpp: 0.00966294 -File length: 298155 bytes +File length: 298215 bytes Padding length: 0 bytes Padding ratio: 0%