hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From prasan...@apache.org
Subject svn commit: r1668942 [1/4] - in /hive/trunk/ql/src: gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/ java/org/apache/hadoop/hive/ql/io/orc/ protobuf/org/apache/hadoop/hive/ql/io/orc/ test/org/apache/hadoop/hive/ql/io/orc/ test/resources/ test/re...
Date Tue, 24 Mar 2015 17:34:13 GMT
Author: prasanthj
Date: Tue Mar 24 17:34:12 2015
New Revision: 1668942

URL: http://svn.apache.org/r1668942
Log:
HIVE-8746: ORC timestamp columns are sensitive to daylight savings time (Prasanth Jayachandran reviewed by Gopal V, Gunther Hagleitner)

Added:
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcTimezone1.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcTimezone2.java
Modified:
    hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
    hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
    hive/trunk/ql/src/test/resources/orc-file-dump-bloomfilter.out
    hive/trunk/ql/src/test/resources/orc-file-dump-bloomfilter2.out
    hive/trunk/ql/src/test/resources/orc-file-dump-dictionary-threshold.out
    hive/trunk/ql/src/test/resources/orc-file-dump.out
    hive/trunk/ql/src/test/resources/orc-file-has-null.out
    hive/trunk/ql/src/test/results/clientpositive/alter_merge_orc.q.out
    hive/trunk/ql/src/test/results/clientpositive/alter_merge_stats_orc.q.out
    hive/trunk/ql/src/test/results/clientpositive/annotate_stats_part.q.out
    hive/trunk/ql/src/test/results/clientpositive/annotate_stats_table.q.out
    hive/trunk/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out
    hive/trunk/ql/src/test/results/clientpositive/dynpart_sort_optimization2.q.out
    hive/trunk/ql/src/test/results/clientpositive/encrypted/encryption_insert_partition_dynamic.q.out
    hive/trunk/ql/src/test/results/clientpositive/encrypted/encryption_insert_partition_static.q.out
    hive/trunk/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out
    hive/trunk/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out
    hive/trunk/ql/src/test/results/clientpositive/orc_analyze.q.out
    hive/trunk/ql/src/test/results/clientpositive/spark/alter_merge_orc.q.out
    hive/trunk/ql/src/test/results/clientpositive/spark/alter_merge_stats_orc.q.out
    hive/trunk/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/alter_merge_orc.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/alter_merge_stats_orc.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/dynpart_sort_optimization2.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/orc_analyze.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out
    hive/trunk/ql/src/test/results/clientpositive/vectorized_ptf.q.out

Modified: hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java?rev=1668942&r1=1668941&r2=1668942&view=diff
==============================================================================
--- hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java (original)
+++ hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java Tue Mar 24 17:34:12 2015
@@ -10603,6 +10603,21 @@ public final class OrcProto {
      */
     org.apache.hadoop.hive.ql.io.orc.OrcProto.ColumnEncodingOrBuilder getColumnsOrBuilder(
         int index);
+
+    // optional string writerTimezone = 3;
+    /**
+     * <code>optional string writerTimezone = 3;</code>
+     */
+    boolean hasWriterTimezone();
+    /**
+     * <code>optional string writerTimezone = 3;</code>
+     */
+    java.lang.String getWriterTimezone();
+    /**
+     * <code>optional string writerTimezone = 3;</code>
+     */
+    com.google.protobuf.ByteString
+        getWriterTimezoneBytes();
   }
   /**
    * Protobuf type {@code orc.proto.StripeFooter}
@@ -10671,6 +10686,11 @@ public final class OrcProto {
               columns_.add(input.readMessage(org.apache.hadoop.hive.ql.io.orc.OrcProto.ColumnEncoding.PARSER, extensionRegistry));
               break;
             }
+            case 26: {
+              bitField0_ |= 0x00000001;
+              writerTimezone_ = input.readBytes();
+              break;
+            }
           }
         }
       } catch (com.google.protobuf.InvalidProtocolBufferException e) {
@@ -10716,6 +10736,7 @@ public final class OrcProto {
       return PARSER;
     }
 
+    private int bitField0_;
     // repeated .orc.proto.Stream streams = 1;
     public static final int STREAMS_FIELD_NUMBER = 1;
     private java.util.List<org.apache.hadoop.hive.ql.io.orc.OrcProto.Stream> streams_;
@@ -10788,9 +10809,53 @@ public final class OrcProto {
       return columns_.get(index);
     }
 
+    // optional string writerTimezone = 3;
+    public static final int WRITERTIMEZONE_FIELD_NUMBER = 3;
+    private java.lang.Object writerTimezone_;
+    /**
+     * <code>optional string writerTimezone = 3;</code>
+     */
+    public boolean hasWriterTimezone() {
+      return ((bitField0_ & 0x00000001) == 0x00000001);
+    }
+    /**
+     * <code>optional string writerTimezone = 3;</code>
+     */
+    public java.lang.String getWriterTimezone() {
+      java.lang.Object ref = writerTimezone_;
+      if (ref instanceof java.lang.String) {
+        return (java.lang.String) ref;
+      } else {
+        com.google.protobuf.ByteString bs = 
+            (com.google.protobuf.ByteString) ref;
+        java.lang.String s = bs.toStringUtf8();
+        if (bs.isValidUtf8()) {
+          writerTimezone_ = s;
+        }
+        return s;
+      }
+    }
+    /**
+     * <code>optional string writerTimezone = 3;</code>
+     */
+    public com.google.protobuf.ByteString
+        getWriterTimezoneBytes() {
+      java.lang.Object ref = writerTimezone_;
+      if (ref instanceof java.lang.String) {
+        com.google.protobuf.ByteString b = 
+            com.google.protobuf.ByteString.copyFromUtf8(
+                (java.lang.String) ref);
+        writerTimezone_ = b;
+        return b;
+      } else {
+        return (com.google.protobuf.ByteString) ref;
+      }
+    }
+
     private void initFields() {
       streams_ = java.util.Collections.emptyList();
       columns_ = java.util.Collections.emptyList();
+      writerTimezone_ = "";
     }
     private byte memoizedIsInitialized = -1;
     public final boolean isInitialized() {
@@ -10810,6 +10875,9 @@ public final class OrcProto {
       for (int i = 0; i < columns_.size(); i++) {
         output.writeMessage(2, columns_.get(i));
       }
+      if (((bitField0_ & 0x00000001) == 0x00000001)) {
+        output.writeBytes(3, getWriterTimezoneBytes());
+      }
       getUnknownFields().writeTo(output);
     }
 
@@ -10827,6 +10895,10 @@ public final class OrcProto {
         size += com.google.protobuf.CodedOutputStream
           .computeMessageSize(2, columns_.get(i));
       }
+      if (((bitField0_ & 0x00000001) == 0x00000001)) {
+        size += com.google.protobuf.CodedOutputStream
+          .computeBytesSize(3, getWriterTimezoneBytes());
+      }
       size += getUnknownFields().getSerializedSize();
       memoizedSerializedSize = size;
       return size;
@@ -10957,6 +11029,8 @@ public final class OrcProto {
         } else {
           columnsBuilder_.clear();
         }
+        writerTimezone_ = "";
+        bitField0_ = (bitField0_ & ~0x00000004);
         return this;
       }
 
@@ -10984,6 +11058,7 @@ public final class OrcProto {
       public org.apache.hadoop.hive.ql.io.orc.OrcProto.StripeFooter buildPartial() {
         org.apache.hadoop.hive.ql.io.orc.OrcProto.StripeFooter result = new org.apache.hadoop.hive.ql.io.orc.OrcProto.StripeFooter(this);
         int from_bitField0_ = bitField0_;
+        int to_bitField0_ = 0;
         if (streamsBuilder_ == null) {
           if (((bitField0_ & 0x00000001) == 0x00000001)) {
             streams_ = java.util.Collections.unmodifiableList(streams_);
@@ -11002,6 +11077,11 @@ public final class OrcProto {
         } else {
           result.columns_ = columnsBuilder_.build();
         }
+        if (((from_bitField0_ & 0x00000004) == 0x00000004)) {
+          to_bitField0_ |= 0x00000001;
+        }
+        result.writerTimezone_ = writerTimezone_;
+        result.bitField0_ = to_bitField0_;
         onBuilt();
         return result;
       }
@@ -11069,6 +11149,11 @@ public final class OrcProto {
             }
           }
         }
+        if (other.hasWriterTimezone()) {
+          bitField0_ |= 0x00000004;
+          writerTimezone_ = other.writerTimezone_;
+          onChanged();
+        }
         this.mergeUnknownFields(other.getUnknownFields());
         return this;
       }
@@ -11576,6 +11661,80 @@ public final class OrcProto {
         return columnsBuilder_;
       }
 
+      // optional string writerTimezone = 3;
+      private java.lang.Object writerTimezone_ = "";
+      /**
+       * <code>optional string writerTimezone = 3;</code>
+       */
+      public boolean hasWriterTimezone() {
+        return ((bitField0_ & 0x00000004) == 0x00000004);
+      }
+      /**
+       * <code>optional string writerTimezone = 3;</code>
+       */
+      public java.lang.String getWriterTimezone() {
+        java.lang.Object ref = writerTimezone_;
+        if (!(ref instanceof java.lang.String)) {
+          java.lang.String s = ((com.google.protobuf.ByteString) ref)
+              .toStringUtf8();
+          writerTimezone_ = s;
+          return s;
+        } else {
+          return (java.lang.String) ref;
+        }
+      }
+      /**
+       * <code>optional string writerTimezone = 3;</code>
+       */
+      public com.google.protobuf.ByteString
+          getWriterTimezoneBytes() {
+        java.lang.Object ref = writerTimezone_;
+        if (ref instanceof String) {
+          com.google.protobuf.ByteString b = 
+              com.google.protobuf.ByteString.copyFromUtf8(
+                  (java.lang.String) ref);
+          writerTimezone_ = b;
+          return b;
+        } else {
+          return (com.google.protobuf.ByteString) ref;
+        }
+      }
+      /**
+       * <code>optional string writerTimezone = 3;</code>
+       */
+      public Builder setWriterTimezone(
+          java.lang.String value) {
+        if (value == null) {
+    throw new NullPointerException();
+  }
+  bitField0_ |= 0x00000004;
+        writerTimezone_ = value;
+        onChanged();
+        return this;
+      }
+      /**
+       * <code>optional string writerTimezone = 3;</code>
+       */
+      public Builder clearWriterTimezone() {
+        bitField0_ = (bitField0_ & ~0x00000004);
+        writerTimezone_ = getDefaultInstance().getWriterTimezone();
+        onChanged();
+        return this;
+      }
+      /**
+       * <code>optional string writerTimezone = 3;</code>
+       */
+      public Builder setWriterTimezoneBytes(
+          com.google.protobuf.ByteString value) {
+        if (value == null) {
+    throw new NullPointerException();
+  }
+  bitField0_ |= 0x00000004;
+        writerTimezone_ = value;
+        onChanged();
+        return this;
+      }
+
       // @@protoc_insertion_point(builder_scope:orc.proto.StripeFooter)
     }
 
@@ -18921,40 +19080,41 @@ public final class OrcProto {
       "ng\022,\n\004kind\030\001 \001(\0162\036.orc.proto.ColumnEncod" +
       "ing.Kind\022\026\n\016dictionarySize\030\002 \001(\r\"D\n\004Kind" +
       "\022\n\n\006DIRECT\020\000\022\016\n\nDICTIONARY\020\001\022\r\n\tDIRECT_V",
-      "2\020\002\022\021\n\rDICTIONARY_V2\020\003\"^\n\014StripeFooter\022\"" +
+      "2\020\002\022\021\n\rDICTIONARY_V2\020\003\"v\n\014StripeFooter\022\"" +
       "\n\007streams\030\001 \003(\0132\021.orc.proto.Stream\022*\n\007co" +
-      "lumns\030\002 \003(\0132\031.orc.proto.ColumnEncoding\"\341" +
-      "\002\n\004Type\022\"\n\004kind\030\001 \001(\0162\024.orc.proto.Type.K" +
-      "ind\022\024\n\010subtypes\030\002 \003(\rB\002\020\001\022\022\n\nfieldNames\030" +
-      "\003 \003(\t\022\025\n\rmaximumLength\030\004 \001(\r\022\021\n\tprecisio" +
-      "n\030\005 \001(\r\022\r\n\005scale\030\006 \001(\r\"\321\001\n\004Kind\022\013\n\007BOOLE" +
-      "AN\020\000\022\010\n\004BYTE\020\001\022\t\n\005SHORT\020\002\022\007\n\003INT\020\003\022\010\n\004LO" +
-      "NG\020\004\022\t\n\005FLOAT\020\005\022\n\n\006DOUBLE\020\006\022\n\n\006STRING\020\007\022" +
-      "\n\n\006BINARY\020\010\022\r\n\tTIMESTAMP\020\t\022\010\n\004LIST\020\n\022\007\n\003",
-      "MAP\020\013\022\n\n\006STRUCT\020\014\022\t\n\005UNION\020\r\022\013\n\007DECIMAL\020" +
-      "\016\022\010\n\004DATE\020\017\022\013\n\007VARCHAR\020\020\022\010\n\004CHAR\020\021\"x\n\021St" +
-      "ripeInformation\022\016\n\006offset\030\001 \001(\004\022\023\n\013index" +
-      "Length\030\002 \001(\004\022\022\n\ndataLength\030\003 \001(\004\022\024\n\014foot" +
-      "erLength\030\004 \001(\004\022\024\n\014numberOfRows\030\005 \001(\004\"/\n\020" +
-      "UserMetadataItem\022\014\n\004name\030\001 \001(\t\022\r\n\005value\030" +
-      "\002 \001(\014\"A\n\020StripeStatistics\022-\n\010colStats\030\001 " +
-      "\003(\0132\033.orc.proto.ColumnStatistics\"<\n\010Meta" +
-      "data\0220\n\013stripeStats\030\001 \003(\0132\033.orc.proto.St" +
-      "ripeStatistics\"\222\002\n\006Footer\022\024\n\014headerLengt",
-      "h\030\001 \001(\004\022\025\n\rcontentLength\030\002 \001(\004\022-\n\007stripe" +
-      "s\030\003 \003(\0132\034.orc.proto.StripeInformation\022\036\n" +
-      "\005types\030\004 \003(\0132\017.orc.proto.Type\022-\n\010metadat" +
-      "a\030\005 \003(\0132\033.orc.proto.UserMetadataItem\022\024\n\014" +
-      "numberOfRows\030\006 \001(\004\022/\n\nstatistics\030\007 \003(\0132\033" +
-      ".orc.proto.ColumnStatistics\022\026\n\016rowIndexS" +
-      "tride\030\010 \001(\r\"\305\001\n\nPostScript\022\024\n\014footerLeng" +
-      "th\030\001 \001(\004\022/\n\013compression\030\002 \001(\0162\032.orc.prot" +
-      "o.CompressionKind\022\034\n\024compressionBlockSiz" +
-      "e\030\003 \001(\004\022\023\n\007version\030\004 \003(\rB\002\020\001\022\026\n\016metadata",
-      "Length\030\005 \001(\004\022\025\n\rwriterVersion\030\006 \001(\r\022\016\n\005m" +
-      "agic\030\300> \001(\t*:\n\017CompressionKind\022\010\n\004NONE\020\000" +
-      "\022\010\n\004ZLIB\020\001\022\n\n\006SNAPPY\020\002\022\007\n\003LZO\020\003B\"\n org.a" +
-      "pache.hadoop.hive.ql.io.orc"
+      "lumns\030\002 \003(\0132\031.orc.proto.ColumnEncoding\022\026" +
+      "\n\016writerTimezone\030\003 \001(\t\"\341\002\n\004Type\022\"\n\004kind\030" +
+      "\001 \001(\0162\024.orc.proto.Type.Kind\022\024\n\010subtypes\030" +
+      "\002 \003(\rB\002\020\001\022\022\n\nfieldNames\030\003 \003(\t\022\025\n\rmaximum" +
+      "Length\030\004 \001(\r\022\021\n\tprecision\030\005 \001(\r\022\r\n\005scale" +
+      "\030\006 \001(\r\"\321\001\n\004Kind\022\013\n\007BOOLEAN\020\000\022\010\n\004BYTE\020\001\022\t" +
+      "\n\005SHORT\020\002\022\007\n\003INT\020\003\022\010\n\004LONG\020\004\022\t\n\005FLOAT\020\005\022" +
+      "\n\n\006DOUBLE\020\006\022\n\n\006STRING\020\007\022\n\n\006BINARY\020\010\022\r\n\tT",
+      "IMESTAMP\020\t\022\010\n\004LIST\020\n\022\007\n\003MAP\020\013\022\n\n\006STRUCT\020" +
+      "\014\022\t\n\005UNION\020\r\022\013\n\007DECIMAL\020\016\022\010\n\004DATE\020\017\022\013\n\007V" +
+      "ARCHAR\020\020\022\010\n\004CHAR\020\021\"x\n\021StripeInformation\022" +
+      "\016\n\006offset\030\001 \001(\004\022\023\n\013indexLength\030\002 \001(\004\022\022\n\n" +
+      "dataLength\030\003 \001(\004\022\024\n\014footerLength\030\004 \001(\004\022\024" +
+      "\n\014numberOfRows\030\005 \001(\004\"/\n\020UserMetadataItem" +
+      "\022\014\n\004name\030\001 \001(\t\022\r\n\005value\030\002 \001(\014\"A\n\020StripeS" +
+      "tatistics\022-\n\010colStats\030\001 \003(\0132\033.orc.proto." +
+      "ColumnStatistics\"<\n\010Metadata\0220\n\013stripeSt" +
+      "ats\030\001 \003(\0132\033.orc.proto.StripeStatistics\"\222",
+      "\002\n\006Footer\022\024\n\014headerLength\030\001 \001(\004\022\025\n\rconte" +
+      "ntLength\030\002 \001(\004\022-\n\007stripes\030\003 \003(\0132\034.orc.pr" +
+      "oto.StripeInformation\022\036\n\005types\030\004 \003(\0132\017.o" +
+      "rc.proto.Type\022-\n\010metadata\030\005 \003(\0132\033.orc.pr" +
+      "oto.UserMetadataItem\022\024\n\014numberOfRows\030\006 \001" +
+      "(\004\022/\n\nstatistics\030\007 \003(\0132\033.orc.proto.Colum" +
+      "nStatistics\022\026\n\016rowIndexStride\030\010 \001(\r\"\305\001\n\n" +
+      "PostScript\022\024\n\014footerLength\030\001 \001(\004\022/\n\013comp" +
+      "ression\030\002 \001(\0162\032.orc.proto.CompressionKin" +
+      "d\022\034\n\024compressionBlockSize\030\003 \001(\004\022\023\n\007versi",
+      "on\030\004 \003(\rB\002\020\001\022\026\n\016metadataLength\030\005 \001(\004\022\025\n\r" +
+      "writerVersion\030\006 \001(\r\022\016\n\005magic\030\300> \001(\t*:\n\017C" +
+      "ompressionKind\022\010\n\004NONE\020\000\022\010\n\004ZLIB\020\001\022\n\n\006SN" +
+      "APPY\020\002\022\007\n\003LZO\020\003B\"\n org.apache.hadoop.hiv" +
+      "e.ql.io.orc"
     };
     com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner =
       new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() {
@@ -19056,7 +19216,7 @@ public final class OrcProto {
           internal_static_orc_proto_StripeFooter_fieldAccessorTable = new
             com.google.protobuf.GeneratedMessage.FieldAccessorTable(
               internal_static_orc_proto_StripeFooter_descriptor,
-              new java.lang.String[] { "Streams", "Columns", });
+              new java.lang.String[] { "Streams", "Columns", "WriterTimezone", });
           internal_static_orc_proto_Type_descriptor =
             getDescriptor().getMessageTypes().get(16);
           internal_static_orc_proto_Type_fieldAccessorTable = new

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java?rev=1668942&r1=1668941&r2=1668942&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java Tue Mar 24 17:34:12 2015
@@ -50,7 +50,7 @@ import org.codehaus.jettison.json.JSONWr
  * A tool for printing out the file structure of ORC files.
  */
 public final class FileDump {
-  private static final String ROWINDEX_PREFIX = "--rowindex=";
+  private static final String UNKNOWN = "UNKNOWN";
 
   // not used
   private FileDump() {}
@@ -77,9 +77,13 @@ public final class FileDump {
       }
     }
 
+    boolean printTimeZone = false;
+    if (cli.hasOption('t')) {
+      printTimeZone = true;
+    }
     String[] files = cli.getArgs();
     if (dumpData) printData(Arrays.asList(files), conf);
-    else printMetaData(Arrays.asList(files), conf, rowIndexCols);
+    else printMetaData(Arrays.asList(files), conf, rowIndexCols, printTimeZone);
   }
 
   private static void printData(List<String> files, Configuration conf) throws IOException,
@@ -90,7 +94,7 @@ public final class FileDump {
   }
 
   private static void printMetaData(List<String> files, Configuration conf,
-                                    List<Integer> rowIndexCols) throws IOException {
+      List<Integer> rowIndexCols, boolean printTimeZone) throws IOException {
     for (String filename : files) {
       System.out.println("Structure for " + filename);
       Path path = new Path(filename);
@@ -125,11 +129,19 @@ public final class FileDump {
       for (StripeInformation stripe : reader.getStripes()) {
         ++stripeIx;
         long stripeStart = stripe.getOffset();
-        System.out.println("  Stripe: " + stripe.toString());
         OrcProto.StripeFooter footer = rows.readStripeFooter(stripe);
+        if (printTimeZone) {
+          String tz = footer.getWriterTimezone();
+          if (tz == null || tz.isEmpty()) {
+            tz = UNKNOWN;
+          }
+          System.out.println("  Stripe: " + stripe.toString() + " timezone: " + tz);
+        } else {
+          System.out.println("  Stripe: " + stripe.toString());
+        }
         long sectionStart = stripeStart;
         for(OrcProto.Stream section: footer.getStreamsList()) {
-          String kind = section.hasKind() ? section.getKind().name() : "UNKNOWN";
+          String kind = section.hasKind() ? section.getKind().name() : UNKNOWN;
           System.out.println("    Stream: column " + section.getColumn() +
               " section " + kind + " start: " + sectionStart +
               " length " + section.getLength());
@@ -278,6 +290,13 @@ public final class FileDump {
         .withDescription("Should the data be printed")
         .create('d'));
 
+    // to avoid breaking unit tests (when run in different time zones) for file dump, printing
+    // of timezone is made optional
+    result.addOption(OptionBuilder
+        .withLongOpt("timezone")
+        .withDescription("Print writer's time zone")
+        .create('t'));
+
     result.addOption(OptionBuilder
         .withLongOpt("help")
         .withDescription("print help message")

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java?rev=1668942&r1=1668941&r2=1668942&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java Tue Mar 24 17:34:12 2015
@@ -26,13 +26,15 @@ import java.math.BigInteger;
 import java.nio.ByteBuffer;
 import java.sql.Date;
 import java.sql.Timestamp;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.LinkedHashMap;
-import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
+import java.util.TimeZone;
 
 import org.apache.commons.lang.StringUtils;
 import org.apache.commons.lang3.exception.ExceptionUtils;
@@ -55,8 +57,8 @@ import org.apache.hadoop.hive.ql.exec.ve
 import org.apache.hadoop.hive.ql.exec.vector.TimestampUtils;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
-import org.apache.hadoop.hive.ql.io.orc.RecordReaderUtils.ByteBufferAllocatorPool;
 import org.apache.hadoop.hive.ql.io.filters.BloomFilter;
+import org.apache.hadoop.hive.ql.io.orc.RecordReaderUtils.ByteBufferAllocatorPool;
 import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
@@ -78,8 +80,6 @@ import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
 
-import com.google.common.collect.Lists;
-
 class RecordReaderImpl implements RecordReader {
 
   static final Log LOG = LogFactory.getLog(RecordReaderImpl.class);
@@ -186,7 +186,7 @@ class RecordReaderImpl implements Record
                    int bufferSize,
                    long strideRate,
                    Configuration conf
-                  ) throws IOException {
+                   ) throws IOException {
     this.path = path;
     this.file = fileSystem.open(path);
     this.codec = codec;
@@ -291,9 +291,9 @@ class RecordReaderImpl implements Record
     }
 
     void startStripe(Map<StreamName, InStream> streams,
-                     List<OrcProto.ColumnEncoding> encoding
+                     OrcProto.StripeFooter stripeFooter
                     ) throws IOException {
-      checkEncoding(encoding.get(columnId));
+      checkEncoding(stripeFooter.getColumnsList().get(columnId));
       InStream in = streams.get(new StreamName(columnId,
           OrcProto.Stream.Kind.PRESENT));
       if (in == null) {
@@ -390,9 +390,9 @@ class RecordReaderImpl implements Record
 
     @Override
     void startStripe(Map<StreamName, InStream> streams,
-                     List<OrcProto.ColumnEncoding> encodings
+                     OrcProto.StripeFooter stripeFooter
                      ) throws IOException {
-      super.startStripe(streams, encodings);
+      super.startStripe(streams, stripeFooter);
       reader = new BitFieldReader(streams.get(new StreamName(columnId,
           OrcProto.Stream.Kind.DATA)), 1);
     }
@@ -460,9 +460,9 @@ class RecordReaderImpl implements Record
 
     @Override
     void startStripe(Map<StreamName, InStream> streams,
-                     List<OrcProto.ColumnEncoding> encodings
+                     OrcProto.StripeFooter stripeFooter
                     ) throws IOException {
-      super.startStripe(streams, encodings);
+      super.startStripe(streams, stripeFooter);
       reader = new RunLengthByteReader(streams.get(new StreamName(columnId,
           OrcProto.Stream.Kind.DATA)));
     }
@@ -544,13 +544,13 @@ class RecordReaderImpl implements Record
 
     @Override
     void startStripe(Map<StreamName, InStream> streams,
-                     List<OrcProto.ColumnEncoding> encodings
+                     OrcProto.StripeFooter stripeFooter
                     ) throws IOException {
-      super.startStripe(streams, encodings);
+      super.startStripe(streams, stripeFooter);
       StreamName name = new StreamName(columnId,
           OrcProto.Stream.Kind.DATA);
-      reader = createIntegerReader(encodings.get(columnId).getKind(), streams.get(name), true,
-          false);
+      reader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
+          streams.get(name), true, false);
     }
 
     @Override
@@ -630,13 +630,13 @@ class RecordReaderImpl implements Record
 
     @Override
     void startStripe(Map<StreamName, InStream> streams,
-                     List<OrcProto.ColumnEncoding> encodings
+                     OrcProto.StripeFooter stripeFooter
                     ) throws IOException {
-      super.startStripe(streams, encodings);
+      super.startStripe(streams, stripeFooter);
       StreamName name = new StreamName(columnId,
           OrcProto.Stream.Kind.DATA);
-      reader = createIntegerReader(encodings.get(columnId).getKind(), streams.get(name), true,
-          false);
+      reader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
+          streams.get(name), true, false);
     }
 
     @Override
@@ -717,13 +717,13 @@ class RecordReaderImpl implements Record
 
     @Override
     void startStripe(Map<StreamName, InStream> streams,
-                     List<OrcProto.ColumnEncoding> encodings
+                     OrcProto.StripeFooter stripeFooter
                     ) throws IOException {
-      super.startStripe(streams, encodings);
+      super.startStripe(streams, stripeFooter);
       StreamName name = new StreamName(columnId,
           OrcProto.Stream.Kind.DATA);
-      reader = createIntegerReader(encodings.get(columnId).getKind(), streams.get(name), true,
-          false);
+      reader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
+          streams.get(name), true, false);
     }
 
     @Override
@@ -791,9 +791,9 @@ class RecordReaderImpl implements Record
 
     @Override
     void startStripe(Map<StreamName, InStream> streams,
-                     List<OrcProto.ColumnEncoding> encodings
+                     OrcProto.StripeFooter stripeFooter
                     ) throws IOException {
-      super.startStripe(streams, encodings);
+      super.startStripe(streams, stripeFooter);
       StreamName name = new StreamName(columnId,
           OrcProto.Stream.Kind.DATA);
       stream = streams.get(name);
@@ -883,9 +883,9 @@ class RecordReaderImpl implements Record
 
     @Override
     void startStripe(Map<StreamName, InStream> streams,
-                     List<OrcProto.ColumnEncoding> encodings
+                     OrcProto.StripeFooter stripeFooter
                     ) throws IOException {
-      super.startStripe(streams, encodings);
+      super.startStripe(streams, stripeFooter);
       StreamName name =
         new StreamName(columnId,
           OrcProto.Stream.Kind.DATA);
@@ -989,14 +989,14 @@ class RecordReaderImpl implements Record
 
     @Override
     void startStripe(Map<StreamName, InStream> streams,
-                     List<OrcProto.ColumnEncoding> encodings
+                     OrcProto.StripeFooter stripeFooter
                     ) throws IOException {
-      super.startStripe(streams, encodings);
+      super.startStripe(streams, stripeFooter);
       StreamName name = new StreamName(columnId,
           OrcProto.Stream.Kind.DATA);
       stream = streams.get(name);
-      lengths = createIntegerReader(encodings.get(columnId).getKind(), streams.get(new
-          StreamName(columnId, OrcProto.Stream.Kind.LENGTH)), false, false);
+      lengths = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
+          streams.get(new StreamName(columnId, OrcProto.Stream.Kind.LENGTH)), false, false);
     }
 
     @Override
@@ -1063,10 +1063,15 @@ class RecordReaderImpl implements Record
     }
   }
 
-  public static class TimestampTreeReader extends TreeReader {
-    protected IntegerReader data = null;
-    protected IntegerReader nanos = null;
+  private static class TimestampTreeReader extends TreeReader{
+    private IntegerReader data = null;
+    private IntegerReader nanos = null;
     private final boolean skipCorrupt;
+    private Map<String, Long> baseTimestampMap;
+    private long base_timestamp;
+    private final TimeZone readerTimeZone;
+    private TimeZone writerTimeZone;
+    private boolean hasSameTZRules;
 
     TimestampTreeReader(int columnId, boolean skipCorrupt) throws IOException {
       this(columnId, null, null, null, null, skipCorrupt);
@@ -1077,6 +1082,11 @@ class RecordReaderImpl implements Record
         throws IOException {
       super(columnId, presentStream);
       this.skipCorrupt = skipCorrupt;
+      this.baseTimestampMap = new HashMap<>();
+      this.readerTimeZone = TimeZone.getDefault();
+      this.writerTimeZone = readerTimeZone;
+      this.hasSameTZRules = writerTimeZone.hasSameRules(readerTimeZone);
+      this.base_timestamp = getBaseTimestamp(readerTimeZone.getID());
       if (encoding != null) {
         checkEncoding(encoding);
 
@@ -1101,15 +1111,42 @@ class RecordReaderImpl implements Record
 
     @Override
     void startStripe(Map<StreamName, InStream> streams,
-                     List<OrcProto.ColumnEncoding> encodings
+                     OrcProto.StripeFooter stripeFooter
                     ) throws IOException {
-      super.startStripe(streams, encodings);
-      data = createIntegerReader(encodings.get(columnId).getKind(),
+      super.startStripe(streams, stripeFooter);
+      data = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
           streams.get(new StreamName(columnId,
               OrcProto.Stream.Kind.DATA)), true, skipCorrupt);
-      nanos = createIntegerReader(encodings.get(columnId).getKind(),
+      nanos = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
           streams.get(new StreamName(columnId,
               OrcProto.Stream.Kind.SECONDARY)), false, skipCorrupt);
+      base_timestamp = getBaseTimestamp(stripeFooter.getWriterTimezone());
+    }
+
+    private long getBaseTimestamp(String timeZoneId) throws IOException {
+      // to make sure new readers read old files in the same way
+      if (timeZoneId == null || timeZoneId.isEmpty()) {
+        timeZoneId = readerTimeZone.getID();
+      }
+
+      if (!baseTimestampMap.containsKey(timeZoneId)) {
+        writerTimeZone = TimeZone.getTimeZone(timeZoneId);
+        hasSameTZRules = writerTimeZone.hasSameRules(readerTimeZone);
+        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+        sdf.setTimeZone(writerTimeZone);
+        try {
+          long epoch =
+              sdf.parse(WriterImpl.BASE_TIMESTAMP_STRING).getTime() / WriterImpl.MILLIS_PER_SECOND;
+          baseTimestampMap.put(timeZoneId, epoch);
+          return epoch;
+        } catch (ParseException e) {
+          throw new IOException("Unable to create base timestamp", e);
+        } finally {
+          sdf.setTimeZone(readerTimeZone);
+        }
+      }
+
+      return baseTimestampMap.get(timeZoneId);
     }
 
     @Override
@@ -1134,9 +1171,7 @@ class RecordReaderImpl implements Record
         } else {
           result = (TimestampWritable) previous;
         }
-        Timestamp ts = new Timestamp(0);
-        long millis = (data.next() + WriterImpl.BASE_TIMESTAMP) *
-            WriterImpl.MILLIS_PER_SECOND;
+        long millis = (data.next() + base_timestamp) * WriterImpl.MILLIS_PER_SECOND;
         int newNanos = parseNanos(nanos.next());
         // fix the rounding when we divided by 1000.
         if (millis >= 0) {
@@ -1144,7 +1179,24 @@ class RecordReaderImpl implements Record
         } else {
           millis -= newNanos / 1000000;
         }
-        ts.setTime(millis);
+        long offset = 0;
+        // If reader and writer time zones have different rules, adjust the timezone difference
+        // between reader and writer taking day light savings into account.
+        if (!hasSameTZRules) {
+          offset = writerTimeZone.getOffset(millis) - readerTimeZone.getOffset(millis);
+        }
+        long adjustedMillis = millis + offset;
+        Timestamp ts = new Timestamp(adjustedMillis);
+        // Sometimes the reader timezone might have changed after adding the adjustedMillis.
+        // To account for that change, check for any difference in reader timezone after
+        // adding adjustedMillis. If so use the new offset (offset at adjustedMillis point of time).
+        if (!hasSameTZRules &&
+            (readerTimeZone.getOffset(millis) != readerTimeZone.getOffset(adjustedMillis))) {
+          long newOffset =
+              writerTimeZone.getOffset(millis) - readerTimeZone.getOffset(adjustedMillis);
+            adjustedMillis = millis + newOffset;
+            ts.setTime(adjustedMillis);
+        }
         ts.setNanos(newNanos);
         result.set(ts);
       }
@@ -1223,12 +1275,13 @@ class RecordReaderImpl implements Record
 
     @Override
     void startStripe(Map<StreamName, InStream> streams,
-                     List<OrcProto.ColumnEncoding> encodings
+                     OrcProto.StripeFooter stripeFooter
                     ) throws IOException {
-      super.startStripe(streams, encodings);
+      super.startStripe(streams, stripeFooter);
       StreamName name = new StreamName(columnId,
           OrcProto.Stream.Kind.DATA);
-      reader = createIntegerReader(encodings.get(columnId).getKind(), streams.get(name), true, false);
+      reader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
+          streams.get(name), true, false);
     }
 
     @Override
@@ -1317,13 +1370,13 @@ class RecordReaderImpl implements Record
 
     @Override
     void startStripe(Map<StreamName, InStream> streams,
-                     List<OrcProto.ColumnEncoding> encodings
+                     OrcProto.StripeFooter stripeFooter
     ) throws IOException {
-      super.startStripe(streams, encodings);
+      super.startStripe(streams, stripeFooter);
       valueStream = streams.get(new StreamName(columnId,
           OrcProto.Stream.Kind.DATA));
-      scaleReader = createIntegerReader(encodings.get(columnId).getKind(), streams.get(
-          new StreamName(columnId, OrcProto.Stream.Kind.SECONDARY)), true, false);
+      scaleReader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
+          streams.get(new StreamName(columnId, OrcProto.Stream.Kind.SECONDARY)), true, false);
     }
 
     @Override
@@ -1449,11 +1502,11 @@ class RecordReaderImpl implements Record
 
     @Override
     void startStripe(Map<StreamName, InStream> streams,
-                     List<OrcProto.ColumnEncoding> encodings
+                     OrcProto.StripeFooter stripeFooter
                     ) throws IOException {
       // For each stripe, checks the encoding and initializes the appropriate
       // reader
-      switch (encodings.get(columnId).getKind()) {
+      switch (stripeFooter.getColumnsList().get(columnId).getKind()) {
         case DIRECT:
         case DIRECT_V2:
           reader = new StringDirectTreeReader(columnId);
@@ -1464,9 +1517,9 @@ class RecordReaderImpl implements Record
           break;
         default:
           throw new IllegalArgumentException("Unsupported encoding " +
-              encodings.get(columnId).getKind());
+              stripeFooter.getColumnsList().get(columnId).getKind());
       }
-      reader.startStripe(streams, encodings);
+      reader.startStripe(streams, stripeFooter);
     }
 
     @Override
@@ -1599,13 +1652,13 @@ class RecordReaderImpl implements Record
 
     @Override
     void startStripe(Map<StreamName, InStream> streams,
-                     List<OrcProto.ColumnEncoding> encodings
+                     OrcProto.StripeFooter stripeFooter
                     ) throws IOException {
-      super.startStripe(streams, encodings);
+      super.startStripe(streams, stripeFooter);
       StreamName name = new StreamName(columnId,
           OrcProto.Stream.Kind.DATA);
       stream = streams.get(name);
-      lengths = createIntegerReader(encodings.get(columnId).getKind(),
+      lengths = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
           streams.get(new StreamName(columnId, OrcProto.Stream.Kind.LENGTH)),
           false, false);
     }
@@ -1720,9 +1773,9 @@ class RecordReaderImpl implements Record
 
     @Override
     void startStripe(Map<StreamName, InStream> streams,
-                     List<OrcProto.ColumnEncoding> encodings
+                     OrcProto.StripeFooter stripeFooter
                     ) throws IOException {
-      super.startStripe(streams, encodings);
+      super.startStripe(streams, stripeFooter);
 
       // read the dictionary blob
       StreamName name = new StreamName(columnId,
@@ -1733,11 +1786,11 @@ class RecordReaderImpl implements Record
       // read the lengths
       name = new StreamName(columnId, OrcProto.Stream.Kind.LENGTH);
       in = streams.get(name);
-      readDictionaryLengthStream(in, encodings.get(columnId));
+      readDictionaryLengthStream(in, stripeFooter.getColumnsList().get(columnId));
 
       // set up the row reader
       name = new StreamName(columnId, OrcProto.Stream.Kind.DATA);
-      reader = createIntegerReader(encodings.get(columnId).getKind(),
+      reader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
           streams.get(name), false, false);
     }
 
@@ -2108,12 +2161,12 @@ class RecordReaderImpl implements Record
 
     @Override
     void startStripe(Map<StreamName, InStream> streams,
-                     List<OrcProto.ColumnEncoding> encodings
+                     OrcProto.StripeFooter stripeFooter
                     ) throws IOException {
-      super.startStripe(streams, encodings);
+      super.startStripe(streams, stripeFooter);
       for(TreeReader field: fields) {
         if (field != null) {
-          field.startStripe(streams, encodings);
+          field.startStripe(streams, stripeFooter);
         }
       }
     }
@@ -2184,14 +2237,14 @@ class RecordReaderImpl implements Record
 
     @Override
     void startStripe(Map<StreamName, InStream> streams,
-                     List<OrcProto.ColumnEncoding> encodings
+                     OrcProto.StripeFooter stripeFooter
                      ) throws IOException {
-      super.startStripe(streams, encodings);
+      super.startStripe(streams, stripeFooter);
       tags = new RunLengthByteReader(streams.get(new StreamName(columnId,
           OrcProto.Stream.Kind.DATA)));
       for(TreeReader field: fields) {
         if (field != null) {
-          field.startStripe(streams, encodings);
+          field.startStripe(streams, stripeFooter);
         }
       }
     }
@@ -2276,14 +2329,14 @@ class RecordReaderImpl implements Record
 
     @Override
     void startStripe(Map<StreamName, InStream> streams,
-                     List<OrcProto.ColumnEncoding> encodings
+                     OrcProto.StripeFooter stripeFooter
                     ) throws IOException {
-      super.startStripe(streams, encodings);
-      lengths = createIntegerReader(encodings.get(columnId).getKind(),
+      super.startStripe(streams, stripeFooter);
+      lengths = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
           streams.get(new StreamName(columnId,
               OrcProto.Stream.Kind.LENGTH)), false, false);
       if (elementReader != null) {
-        elementReader.startStripe(streams, encodings);
+        elementReader.startStripe(streams, stripeFooter);
       }
     }
 
@@ -2370,17 +2423,17 @@ class RecordReaderImpl implements Record
 
     @Override
     void startStripe(Map<StreamName, InStream> streams,
-                     List<OrcProto.ColumnEncoding> encodings
+                     OrcProto.StripeFooter stripeFooter
                     ) throws IOException {
-      super.startStripe(streams, encodings);
-      lengths = createIntegerReader(encodings.get(columnId).getKind(),
+      super.startStripe(streams, stripeFooter);
+      lengths = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
           streams.get(new StreamName(columnId,
               OrcProto.Stream.Kind.LENGTH)), false, false);
       if (keyReader != null) {
-        keyReader.startStripe(streams, encodings);
+        keyReader.startStripe(streams, stripeFooter);
       }
       if (valueReader != null) {
-        valueReader.startStripe(streams, encodings);
+        valueReader.startStripe(streams, stripeFooter);
       }
     }
 
@@ -2976,7 +3029,7 @@ class RecordReaderImpl implements Record
       } else {
         readPartialDataStreams(stripe);
       }
-      reader.startStripe(streams, stripeFooter.getColumnsList());
+      reader.startStripe(streams, stripeFooter);
       // if we skipped the first row group, move the pointers forward
       if (rowInStripe != 0) {
         seekToRowEntry(reader, (int) (rowInStripe / rowIndexStride));

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java?rev=1668942&r1=1668941&r2=1668942&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java Tue Mar 24 17:34:12 2015
@@ -26,10 +26,10 @@ import java.lang.management.ManagementFa
 import java.nio.ByteBuffer;
 import java.sql.Timestamp;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.EnumSet;
 import java.util.List;
 import java.util.Map;
+import java.util.TimeZone;
 import java.util.TreeMap;
 
 import org.apache.commons.logging.Log;
@@ -796,6 +796,7 @@ public class WriterImpl implements Write
       foundNulls = false;
 
       builder.addColumns(getEncoding());
+      builder.setWriterTimezone(TimeZone.getDefault().getID());
       if (rowIndexStream != null) {
         if (rowIndex.getEntryCount() != requiredIndexEntries) {
           throw new IllegalArgumentException("Column has wrong number of " +
@@ -1511,13 +1512,13 @@ public class WriterImpl implements Write
   }
 
   static final int MILLIS_PER_SECOND = 1000;
-  static final long BASE_TIMESTAMP =
-      Timestamp.valueOf("2015-01-01 00:00:00").getTime() / MILLIS_PER_SECOND;
+  static final String BASE_TIMESTAMP_STRING = "2015-01-01 00:00:00";
 
   private static class TimestampTreeWriter extends TreeWriter {
     private final IntegerWriter seconds;
     private final IntegerWriter nanos;
     private final boolean isDirectV2;
+    private final long base_timestamp;
 
     TimestampTreeWriter(int columnId,
                      ObjectInspector inspector,
@@ -1530,6 +1531,8 @@ public class WriterImpl implements Write
       this.nanos = createIntegerWriter(writer.createStream(id,
           OrcProto.Stream.Kind.SECONDARY), false, isDirectV2, writer);
       recordPosition(rowIndexPosition);
+      // for unit tests to set different time zones
+      this.base_timestamp = Timestamp.valueOf(BASE_TIMESTAMP_STRING).getTime() / MILLIS_PER_SECOND;
     }
 
     @Override
@@ -1550,7 +1553,7 @@ public class WriterImpl implements Write
             ((TimestampObjectInspector) inspector).
                 getPrimitiveJavaObject(obj);
         indexStatistics.updateTimestamp(val);
-        seconds.write((val.getTime() / MILLIS_PER_SECOND) - BASE_TIMESTAMP);
+        seconds.write((val.getTime() / MILLIS_PER_SECOND) - base_timestamp);
         nanos.write(formatNanos(val.getNanos()));
         if (createBloomFilter) {
           bloomFilter.addLong(val.getTime());

Modified: hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto?rev=1668942&r1=1668941&r2=1668942&view=diff
==============================================================================
--- hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto (original)
+++ hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto Tue Mar 24 17:34:12 2015
@@ -129,6 +129,7 @@ message ColumnEncoding {
 message StripeFooter {
   repeated Stream streams = 1;
   repeated ColumnEncoding columns = 2;
+  optional string writerTimezone = 3;
 }
 
 message Type {

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java?rev=1668942&r1=1668941&r2=1668942&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java Tue Mar 24 17:34:12 2015
@@ -1638,14 +1638,14 @@ public class TestInputOutputFormat {
     assertEquals("mock:/combinationAcid/p=0/base_0000010/bucket_00000",
         split.getPath().toString());
     assertEquals(0, split.getStart());
-    assertEquals(607, split.getLength());
+    assertEquals(625, split.getLength());
     split = (HiveInputFormat.HiveInputSplit) splits[1];
     assertEquals("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat",
         split.inputFormatClassName());
     assertEquals("mock:/combinationAcid/p=0/base_0000010/bucket_00001",
         split.getPath().toString());
     assertEquals(0, split.getStart());
-    assertEquals(629, split.getLength());
+    assertEquals(647, split.getLength());
     CombineHiveInputFormat.CombineHiveInputSplit combineSplit =
         (CombineHiveInputFormat.CombineHiveInputSplit) splits[2];
     assertEquals(BUCKETS, combineSplit.getNumPaths());
@@ -1653,7 +1653,7 @@ public class TestInputOutputFormat {
       assertEquals("mock:/combinationAcid/p=1/00000" + bucket + "_0",
           combineSplit.getPath(bucket).toString());
       assertEquals(0, combineSplit.getOffset(bucket));
-      assertEquals(241, combineSplit.getLength(bucket));
+      assertEquals(253, combineSplit.getLength(bucket));
     }
     String[] hosts = combineSplit.getLocations();
     assertEquals(2, hosts.length);

Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcTimezone1.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcTimezone1.java?rev=1668942&view=auto
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcTimezone1.java (added)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcTimezone1.java Tue Mar 24 17:34:12 2015
@@ -0,0 +1,194 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import static junit.framework.Assert.assertEquals;
+import static junit.framework.Assert.assertNotNull;
+
+import java.io.File;
+import java.sql.Timestamp;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+import java.util.TimeZone;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.serde2.io.TimestampWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
+import org.apache.hive.common.util.HiveTestUtils;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestName;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import com.google.common.collect.Lists;
+
+/**
+ *
+ */
+@RunWith(Parameterized.class)
+public class TestOrcTimezone1 {
+  Path workDir = new Path(System.getProperty("test.tmp.dir",
+      "target" + File.separator + "test" + File.separator + "tmp"));
+  Configuration conf;
+  FileSystem fs;
+  Path testFilePath;
+  String writerTimeZone;
+  String readerTimeZone;
+  static TimeZone defaultTimeZone = TimeZone.getDefault();
+
+  public TestOrcTimezone1(String writerTZ, String readerTZ) {
+    this.writerTimeZone = writerTZ;
+    this.readerTimeZone = readerTZ;
+  }
+
+  @Parameterized.Parameters
+  public static Collection<Object[]> data() {
+    List<Object[]> result = Arrays.asList(new Object[][]{
+        /* Extreme timezones */
+        {"GMT-12:00", "GMT+14:00"},
+        /* No difference in DST */
+        {"America/Los_Angeles", "America/Los_Angeles"}, /* same timezone both with DST */
+        {"Europe/Berlin", "Europe/Berlin"}, /* same as above but europe */
+        {"America/Phoenix", "Asia/Kolkata"} /* Writer no DST, Reader no DST */,
+        {"Europe/Berlin", "America/Los_Angeles"} /* Writer DST, Reader DST */,
+        {"Europe/Berlin", "America/Chicago"} /* Writer DST, Reader DST */,
+        /* With DST difference */
+        {"Europe/Berlin", "UTC"},
+        {"UTC", "Europe/Berlin"} /* Writer no DST, Reader DST */,
+        {"America/Los_Angeles", "Asia/Kolkata"} /* Writer DST, Reader no DST */,
+        {"Europe/Berlin", "Asia/Kolkata"} /* Writer DST, Reader no DST */,
+        /* Timezone offsets for the reader has changed historically */
+        {"Asia/Saigon", "Pacific/Enderbury"},
+        {"UTC", "Asia/Jerusalem"},
+
+        // NOTE:
+        // "1995-01-01 03:00:00.688888888" this is not a valid time in Pacific/Enderbury timezone.
+        // On 1995-01-01 00:00:00 GMT offset moved from -11:00 hr to +13:00 which makes all values
+        // on 1995-01-01 invalid. Try this with joda time
+        // new MutableDateTime("1995-01-01", DateTimeZone.forTimeZone(readerTimeZone));
+    });
+    return result;
+  }
+
+  @Rule
+  public TestName testCaseName = new TestName();
+
+  @Before
+  public void openFileSystem() throws Exception {
+    conf = new Configuration();
+    fs = FileSystem.getLocal(conf);
+    testFilePath = new Path(workDir, "TestOrcFile." +
+        testCaseName.getMethodName() + ".orc");
+    fs.delete(testFilePath, false);
+  }
+
+  @After
+  public void restoreTimeZone() {
+    TimeZone.setDefault(defaultTimeZone);
+  }
+
+  @Test
+  public void testTimestampWriter() throws Exception {
+    ObjectInspector inspector;
+    synchronized (TestOrcFile.class) {
+      inspector = ObjectInspectorFactory.getReflectionObjectInspector(Timestamp.class,
+          ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+    }
+
+    TimeZone.setDefault(TimeZone.getTimeZone(writerTimeZone));
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000).bufferSize(10000));
+    assertEquals(writerTimeZone, TimeZone.getDefault().getID());
+    List<String> ts = Lists.newArrayList();
+    ts.add("2003-01-01 01:00:00.000000222");
+    ts.add("1996-08-02 09:00:00.723100809");
+    ts.add("1999-01-01 02:00:00.999999999");
+    ts.add("1995-01-02 03:00:00.688888888");
+    ts.add("2002-01-01 04:00:00.1");
+    ts.add("2010-03-02 05:00:00.000009001");
+    ts.add("2005-01-01 06:00:00.000002229");
+    ts.add("2006-01-01 07:00:00.900203003");
+    ts.add("2003-01-01 08:00:00.800000007");
+    ts.add("1998-11-02 10:00:00.857340643");
+    ts.add("2008-10-02 11:00:00.0");
+    ts.add("9999-01-01 00:00:00.000999");
+    ts.add("2014-03-28 00:00:00.0");
+    for (String t : ts) {
+      writer.addRow(Timestamp.valueOf(t));
+    }
+    writer.close();
+
+    TimeZone.setDefault(TimeZone.getTimeZone(readerTimeZone));
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    assertEquals(readerTimeZone, TimeZone.getDefault().getID());
+    RecordReader rows = reader.rows(null);
+    int idx = 0;
+    while (rows.hasNext()) {
+      Object row = rows.next(null);
+      Timestamp got = ((TimestampWritable) row).getTimestamp();
+      assertEquals(ts.get(idx++), got.toString());
+    }
+    rows.close();
+  }
+
+  //@Test
+  public void testReadTimestampFormat_0_11() throws Exception {
+    TimeZone.setDefault(TimeZone.getTimeZone(readerTimeZone));
+    Path oldFilePath =
+        new Path(HiveTestUtils.getFileFromClasspath("orc-file-11-format.orc"));
+    Reader reader = OrcFile.createReader(oldFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+
+    StructObjectInspector readerInspector = (StructObjectInspector) reader
+        .getObjectInspector();
+    List<? extends StructField> fields = readerInspector
+        .getAllStructFieldRefs();
+    TimestampObjectInspector tso = (TimestampObjectInspector) readerInspector
+        .getStructFieldRef("ts").getFieldObjectInspector();
+    
+    RecordReader rows = reader.rows();
+    Object row = rows.next(null);
+    assertNotNull(row);
+    assertEquals(Timestamp.valueOf("2000-03-12 15:00:00"),
+        tso.getPrimitiveJavaObject(readerInspector.getStructFieldData(row,
+            fields.get(12))));
+    
+    // check the contents of second row
+    assertEquals(true, rows.hasNext());
+    rows.seekToRow(7499);
+    row = rows.next(null);
+    assertEquals(Timestamp.valueOf("2000-03-12 15:00:01"),
+        tso.getPrimitiveJavaObject(readerInspector.getStructFieldData(row,
+            fields.get(12))));
+    
+    // handle the close up
+    assertEquals(false, rows.hasNext());
+    rows.close();
+  }
+}

Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcTimezone2.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcTimezone2.java?rev=1668942&view=auto
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcTimezone2.java (added)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcTimezone2.java Tue Mar 24 17:34:12 2015
@@ -0,0 +1,142 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import static junit.framework.Assert.assertEquals;
+
+import java.io.File;
+import java.sql.Timestamp;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+import java.util.Random;
+import java.util.TimeZone;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.serde2.io.TimestampWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestName;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import com.google.common.collect.Lists;
+
+/**
+ *
+ */
+@RunWith(Parameterized.class)
+public class TestOrcTimezone2 {
+  Path workDir = new Path(System.getProperty("test.tmp.dir",
+      "target" + File.separator + "test" + File.separator + "tmp"));
+  Configuration conf;
+  FileSystem fs;
+  Path testFilePath;
+  String writerTimeZone;
+  String readerTimeZone;
+  static TimeZone defaultTimeZone = TimeZone.getDefault();
+
+  public TestOrcTimezone2(String writerTZ, String readerTZ) {
+    this.writerTimeZone = writerTZ;
+    this.readerTimeZone = readerTZ;
+  }
+
+  @Parameterized.Parameters
+  public static Collection<Object[]> data() {
+    String[] allTimeZones = TimeZone.getAvailableIDs();
+    Random rand = new Random(123);
+    int len = allTimeZones.length;
+    int n = 500;
+    Object[][] data = new Object[n][];
+    for (int i = 0; i < n; i++) {
+      int wIdx = rand.nextInt(len);
+      int rIdx = rand.nextInt(len);
+      data[i] = new Object[2];
+      data[i][0] = allTimeZones[wIdx];
+      data[i][1] = allTimeZones[rIdx];
+    }
+    return Arrays.asList(data);
+  }
+
+  @Rule
+  public TestName testCaseName = new TestName();
+
+  @Before
+  public void openFileSystem() throws Exception {
+    conf = new Configuration();
+    fs = FileSystem.getLocal(conf);
+    testFilePath = new Path(workDir, "TestOrcFile." +
+        testCaseName.getMethodName() + ".orc");
+    fs.delete(testFilePath, false);
+  }
+
+  @After
+  public void restoreTimeZone() {
+    TimeZone.setDefault(defaultTimeZone);
+  }
+
+  @Test
+  public void testTimestampWriter() throws Exception {
+    ObjectInspector inspector;
+    synchronized (TestOrcFile.class) {
+      inspector = ObjectInspectorFactory.getReflectionObjectInspector(Timestamp.class,
+          ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+    }
+
+    TimeZone.setDefault(TimeZone.getTimeZone(writerTimeZone));
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000).bufferSize(10000));
+    assertEquals(writerTimeZone, TimeZone.getDefault().getID());
+    List<String> ts = Lists.newArrayList();
+    ts.add("2003-01-01 01:00:00.000000222");
+    ts.add("1999-01-01 02:00:00.999999999");
+    ts.add("1995-01-02 03:00:00.688888888");
+    ts.add("2002-01-01 04:00:00.1");
+    ts.add("2010-03-02 05:00:00.000009001");
+    ts.add("2005-01-01 06:00:00.000002229");
+    ts.add("2006-01-01 07:00:00.900203003");
+    ts.add("2003-01-01 08:00:00.800000007");
+    ts.add("1996-08-02 09:00:00.723100809");
+    ts.add("1998-11-02 10:00:00.857340643");
+    ts.add("2008-10-02 11:00:00.0");
+    ts.add("9999-01-01 00:00:00.000999");
+    for (String t : ts) {
+      writer.addRow(Timestamp.valueOf(t));
+    }
+    writer.close();
+
+    TimeZone.setDefault(TimeZone.getTimeZone(readerTimeZone));
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    assertEquals(readerTimeZone, TimeZone.getDefault().getID());
+    RecordReader rows = reader.rows(null);
+    int idx = 0;
+    while (rows.hasNext()) {
+      Object row = rows.next(null);
+      Timestamp got = ((TimestampWritable) row).getTimestamp();
+      assertEquals(ts.get(idx++), got.toString());
+    }
+    rows.close();
+  }
+}

Modified: hive/trunk/ql/src/test/resources/orc-file-dump-bloomfilter.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/resources/orc-file-dump-bloomfilter.out?rev=1668942&r1=1668941&r2=1668942&view=diff
==============================================================================
--- hive/trunk/ql/src/test/resources/orc-file-dump-bloomfilter.out (original)
+++ hive/trunk/ql/src/test/resources/orc-file-dump-bloomfilter.out Tue Mar 24 17:34:12 2015
@@ -39,7 +39,7 @@ File Statistics:
   Column 3: count: 21000 hasNull: false min: Darkness, max: worst sum: 81761
 
 Stripes:
-  Stripe: offset: 3 data: 63765 rows: 5000 tail: 86 index: 845
+  Stripe: offset: 3 data: 63765 rows: 5000 tail: 98 index: 845
     Stream: column 0 section ROW_INDEX start: 3 length 17
     Stream: column 1 section ROW_INDEX start: 20 length 164
     Stream: column 2 section ROW_INDEX start: 184 length 173
@@ -67,17 +67,17 @@ Stripes:
       Entry 3: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
       Entry 4: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
       Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
-  Stripe: offset: 64699 data: 63754 rows: 5000 tail: 86 index: 837
-    Stream: column 0 section ROW_INDEX start: 64699 length 17
-    Stream: column 1 section ROW_INDEX start: 64716 length 162
-    Stream: column 2 section ROW_INDEX start: 64878 length 171
-    Stream: column 3 section ROW_INDEX start: 65049 length 83
-    Stream: column 3 section BLOOM_FILTER start: 65132 length 404
-    Stream: column 1 section DATA start: 65536 length 20029
-    Stream: column 2 section DATA start: 85565 length 40035
-    Stream: column 3 section DATA start: 125600 length 3532
-    Stream: column 3 section LENGTH start: 129132 length 25
-    Stream: column 3 section DICTIONARY_DATA start: 129157 length 133
+  Stripe: offset: 64711 data: 63754 rows: 5000 tail: 98 index: 837
+    Stream: column 0 section ROW_INDEX start: 64711 length 17
+    Stream: column 1 section ROW_INDEX start: 64728 length 162
+    Stream: column 2 section ROW_INDEX start: 64890 length 171
+    Stream: column 3 section ROW_INDEX start: 65061 length 83
+    Stream: column 3 section BLOOM_FILTER start: 65144 length 404
+    Stream: column 1 section DATA start: 65548 length 20029
+    Stream: column 2 section DATA start: 85577 length 40035
+    Stream: column 3 section DATA start: 125612 length 3532
+    Stream: column 3 section LENGTH start: 129144 length 25
+    Stream: column 3 section DICTIONARY_DATA start: 129169 length 133
     Encoding column 0: DIRECT
     Encoding column 1: DIRECT_V2
     Encoding column 2: DIRECT_V2
@@ -95,17 +95,17 @@ Stripes:
       Entry 3: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
       Entry 4: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
       Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
-  Stripe: offset: 129376 data: 63766 rows: 5000 tail: 86 index: 841
-    Stream: column 0 section ROW_INDEX start: 129376 length 17
-    Stream: column 1 section ROW_INDEX start: 129393 length 159
-    Stream: column 2 section ROW_INDEX start: 129552 length 171
-    Stream: column 3 section ROW_INDEX start: 129723 length 90
-    Stream: column 3 section BLOOM_FILTER start: 129813 length 404
-    Stream: column 1 section DATA start: 130217 length 20029
-    Stream: column 2 section DATA start: 150246 length 40035
-    Stream: column 3 section DATA start: 190281 length 3544
-    Stream: column 3 section LENGTH start: 193825 length 25
-    Stream: column 3 section DICTIONARY_DATA start: 193850 length 133
+  Stripe: offset: 129400 data: 63766 rows: 5000 tail: 98 index: 841
+    Stream: column 0 section ROW_INDEX start: 129400 length 17
+    Stream: column 1 section ROW_INDEX start: 129417 length 159
+    Stream: column 2 section ROW_INDEX start: 129576 length 171
+    Stream: column 3 section ROW_INDEX start: 129747 length 90
+    Stream: column 3 section BLOOM_FILTER start: 129837 length 404
+    Stream: column 1 section DATA start: 130241 length 20029
+    Stream: column 2 section DATA start: 150270 length 40035
+    Stream: column 3 section DATA start: 190305 length 3544
+    Stream: column 3 section LENGTH start: 193849 length 25
+    Stream: column 3 section DICTIONARY_DATA start: 193874 length 133
     Encoding column 0: DIRECT
     Encoding column 1: DIRECT_V2
     Encoding column 2: DIRECT_V2
@@ -123,17 +123,17 @@ Stripes:
       Entry 3: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
       Entry 4: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
       Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
-  Stripe: offset: 194069 data: 63796 rows: 5000 tail: 86 index: 844
-    Stream: column 0 section ROW_INDEX start: 194069 length 17
-    Stream: column 1 section ROW_INDEX start: 194086 length 162
-    Stream: column 2 section ROW_INDEX start: 194248 length 170
-    Stream: column 3 section ROW_INDEX start: 194418 length 91
-    Stream: column 3 section BLOOM_FILTER start: 194509 length 404
-    Stream: column 1 section DATA start: 194913 length 20029
-    Stream: column 2 section DATA start: 214942 length 40035
-    Stream: column 3 section DATA start: 254977 length 3574
-    Stream: column 3 section LENGTH start: 258551 length 25
-    Stream: column 3 section DICTIONARY_DATA start: 258576 length 133
+  Stripe: offset: 194105 data: 63796 rows: 5000 tail: 98 index: 844
+    Stream: column 0 section ROW_INDEX start: 194105 length 17
+    Stream: column 1 section ROW_INDEX start: 194122 length 162
+    Stream: column 2 section ROW_INDEX start: 194284 length 170
+    Stream: column 3 section ROW_INDEX start: 194454 length 91
+    Stream: column 3 section BLOOM_FILTER start: 194545 length 404
+    Stream: column 1 section DATA start: 194949 length 20029
+    Stream: column 2 section DATA start: 214978 length 40035
+    Stream: column 3 section DATA start: 255013 length 3574
+    Stream: column 3 section LENGTH start: 258587 length 25
+    Stream: column 3 section DICTIONARY_DATA start: 258612 length 133
     Encoding column 0: DIRECT
     Encoding column 1: DIRECT_V2
     Encoding column 2: DIRECT_V2
@@ -151,17 +151,17 @@ Stripes:
       Entry 3: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
       Entry 4: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
       Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
-  Stripe: offset: 258795 data: 12940 rows: 1000 tail: 78 index: 432
-    Stream: column 0 section ROW_INDEX start: 258795 length 12
-    Stream: column 1 section ROW_INDEX start: 258807 length 38
-    Stream: column 2 section ROW_INDEX start: 258845 length 41
-    Stream: column 3 section ROW_INDEX start: 258886 length 40
-    Stream: column 3 section BLOOM_FILTER start: 258926 length 301
-    Stream: column 1 section DATA start: 259227 length 4007
-    Stream: column 2 section DATA start: 263234 length 8007
-    Stream: column 3 section DATA start: 271241 length 768
-    Stream: column 3 section LENGTH start: 272009 length 25
-    Stream: column 3 section DICTIONARY_DATA start: 272034 length 133
+  Stripe: offset: 258843 data: 12940 rows: 1000 tail: 90 index: 432
+    Stream: column 0 section ROW_INDEX start: 258843 length 12
+    Stream: column 1 section ROW_INDEX start: 258855 length 38
+    Stream: column 2 section ROW_INDEX start: 258893 length 41
+    Stream: column 3 section ROW_INDEX start: 258934 length 40
+    Stream: column 3 section BLOOM_FILTER start: 258974 length 301
+    Stream: column 1 section DATA start: 259275 length 4007
+    Stream: column 2 section DATA start: 263282 length 8007
+    Stream: column 3 section DATA start: 271289 length 768
+    Stream: column 3 section LENGTH start: 272057 length 25
+    Stream: column 3 section DICTIONARY_DATA start: 272082 length 133
     Encoding column 0: DIRECT
     Encoding column 1: DIRECT_V2
     Encoding column 2: DIRECT_V2
@@ -172,6 +172,6 @@ Stripes:
       Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
       Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
 
-File length: 272790 bytes
+File length: 272850 bytes
 Padding length: 0 bytes
 Padding ratio: 0%

Modified: hive/trunk/ql/src/test/resources/orc-file-dump-bloomfilter2.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/resources/orc-file-dump-bloomfilter2.out?rev=1668942&r1=1668941&r2=1668942&view=diff
==============================================================================
--- hive/trunk/ql/src/test/resources/orc-file-dump-bloomfilter2.out (original)
+++ hive/trunk/ql/src/test/resources/orc-file-dump-bloomfilter2.out Tue Mar 24 17:34:12 2015
@@ -39,7 +39,7 @@ File Statistics:
   Column 3: count: 21000 hasNull: false min: Darkness, max: worst sum: 81761
 
 Stripes:
-  Stripe: offset: 3 data: 63765 rows: 5000 tail: 85 index: 6935
+  Stripe: offset: 3 data: 63765 rows: 5000 tail: 97 index: 6935
     Stream: column 0 section ROW_INDEX start: 3 length 17
     Stream: column 1 section ROW_INDEX start: 20 length 164
     Stream: column 2 section ROW_INDEX start: 184 length 173
@@ -67,17 +67,17 @@ Stripes:
       Entry 3: numHashFunctions: 7 bitCount: 9600 popCount: 4971 loadFactor: 0.5178 expectedFpp: 0.009981772
       Entry 4: numHashFunctions: 7 bitCount: 9600 popCount: 4949 loadFactor: 0.5155 expectedFpp: 0.009676614
       Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 9347 loadFactor: 0.9736 expectedFpp: 0.829482
-  Stripe: offset: 70788 data: 63754 rows: 5000 tail: 85 index: 6917
-    Stream: column 0 section ROW_INDEX start: 70788 length 17
-    Stream: column 1 section ROW_INDEX start: 70805 length 162
-    Stream: column 2 section ROW_INDEX start: 70967 length 171
-    Stream: column 2 section BLOOM_FILTER start: 71138 length 6484
-    Stream: column 3 section ROW_INDEX start: 77622 length 83
-    Stream: column 1 section DATA start: 77705 length 20029
-    Stream: column 2 section DATA start: 97734 length 40035
-    Stream: column 3 section DATA start: 137769 length 3532
-    Stream: column 3 section LENGTH start: 141301 length 25
-    Stream: column 3 section DICTIONARY_DATA start: 141326 length 133
+  Stripe: offset: 70800 data: 63754 rows: 5000 tail: 97 index: 6917
+    Stream: column 0 section ROW_INDEX start: 70800 length 17
+    Stream: column 1 section ROW_INDEX start: 70817 length 162
+    Stream: column 2 section ROW_INDEX start: 70979 length 171
+    Stream: column 2 section BLOOM_FILTER start: 71150 length 6484
+    Stream: column 3 section ROW_INDEX start: 77634 length 83
+    Stream: column 1 section DATA start: 77717 length 20029
+    Stream: column 2 section DATA start: 97746 length 40035
+    Stream: column 3 section DATA start: 137781 length 3532
+    Stream: column 3 section LENGTH start: 141313 length 25
+    Stream: column 3 section DICTIONARY_DATA start: 141338 length 133
     Encoding column 0: DIRECT
     Encoding column 1: DIRECT_V2
     Encoding column 2: DIRECT_V2
@@ -95,17 +95,17 @@ Stripes:
       Entry 3: numHashFunctions: 7 bitCount: 9600 popCount: 4962 loadFactor: 0.5169 expectedFpp: 0.009855959
       Entry 4: numHashFunctions: 7 bitCount: 9600 popCount: 4966 loadFactor: 0.5173 expectedFpp: 0.009911705
       Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 9344 loadFactor: 0.9733 expectedFpp: 0.8276205
-  Stripe: offset: 141544 data: 63766 rows: 5000 tail: 85 index: 6924
-    Stream: column 0 section ROW_INDEX start: 141544 length 17
-    Stream: column 1 section ROW_INDEX start: 141561 length 159
-    Stream: column 2 section ROW_INDEX start: 141720 length 171
-    Stream: column 2 section BLOOM_FILTER start: 141891 length 6487
-    Stream: column 3 section ROW_INDEX start: 148378 length 90
-    Stream: column 1 section DATA start: 148468 length 20029
-    Stream: column 2 section DATA start: 168497 length 40035
-    Stream: column 3 section DATA start: 208532 length 3544
-    Stream: column 3 section LENGTH start: 212076 length 25
-    Stream: column 3 section DICTIONARY_DATA start: 212101 length 133
+  Stripe: offset: 141568 data: 63766 rows: 5000 tail: 97 index: 6924
+    Stream: column 0 section ROW_INDEX start: 141568 length 17
+    Stream: column 1 section ROW_INDEX start: 141585 length 159
+    Stream: column 2 section ROW_INDEX start: 141744 length 171
+    Stream: column 2 section BLOOM_FILTER start: 141915 length 6487
+    Stream: column 3 section ROW_INDEX start: 148402 length 90
+    Stream: column 1 section DATA start: 148492 length 20029
+    Stream: column 2 section DATA start: 168521 length 40035
+    Stream: column 3 section DATA start: 208556 length 3544
+    Stream: column 3 section LENGTH start: 212100 length 25
+    Stream: column 3 section DICTIONARY_DATA start: 212125 length 133
     Encoding column 0: DIRECT
     Encoding column 1: DIRECT_V2
     Encoding column 2: DIRECT_V2
@@ -123,17 +123,17 @@ Stripes:
       Entry 3: numHashFunctions: 7 bitCount: 9600 popCount: 4943 loadFactor: 0.5149 expectedFpp: 0.009594797
       Entry 4: numHashFunctions: 7 bitCount: 9600 popCount: 4930 loadFactor: 0.5135 expectedFpp: 0.009419539
       Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 9333 loadFactor: 0.9722 expectedFpp: 0.82082444
-  Stripe: offset: 212319 data: 63796 rows: 5000 tail: 85 index: 6925
-    Stream: column 0 section ROW_INDEX start: 212319 length 17
-    Stream: column 1 section ROW_INDEX start: 212336 length 162
-    Stream: column 2 section ROW_INDEX start: 212498 length 170
-    Stream: column 2 section BLOOM_FILTER start: 212668 length 6485
-    Stream: column 3 section ROW_INDEX start: 219153 length 91
-    Stream: column 1 section DATA start: 219244 length 20029
-    Stream: column 2 section DATA start: 239273 length 40035
-    Stream: column 3 section DATA start: 279308 length 3574
-    Stream: column 3 section LENGTH start: 282882 length 25
-    Stream: column 3 section DICTIONARY_DATA start: 282907 length 133
+  Stripe: offset: 212355 data: 63796 rows: 5000 tail: 97 index: 6925
+    Stream: column 0 section ROW_INDEX start: 212355 length 17
+    Stream: column 1 section ROW_INDEX start: 212372 length 162
+    Stream: column 2 section ROW_INDEX start: 212534 length 170
+    Stream: column 2 section BLOOM_FILTER start: 212704 length 6485
+    Stream: column 3 section ROW_INDEX start: 219189 length 91
+    Stream: column 1 section DATA start: 219280 length 20029
+    Stream: column 2 section DATA start: 239309 length 40035
+    Stream: column 3 section DATA start: 279344 length 3574
+    Stream: column 3 section LENGTH start: 282918 length 25
+    Stream: column 3 section DICTIONARY_DATA start: 282943 length 133
     Encoding column 0: DIRECT
     Encoding column 1: DIRECT_V2
     Encoding column 2: DIRECT_V2
@@ -151,17 +151,17 @@ Stripes:
       Entry 3: numHashFunctions: 7 bitCount: 9600 popCount: 4941 loadFactor: 0.5147 expectedFpp: 0.009567649
       Entry 4: numHashFunctions: 7 bitCount: 9600 popCount: 4993 loadFactor: 0.5201 expectedFpp: 0.010295142
       Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 9353 loadFactor: 0.9743 expectedFpp: 0.8332165
-  Stripe: offset: 283125 data: 12940 rows: 1000 tail: 78 index: 1468
-    Stream: column 0 section ROW_INDEX start: 283125 length 12
-    Stream: column 1 section ROW_INDEX start: 283137 length 38
-    Stream: column 2 section ROW_INDEX start: 283175 length 41
-    Stream: column 2 section BLOOM_FILTER start: 283216 length 1337
-    Stream: column 3 section ROW_INDEX start: 284553 length 40
-    Stream: column 1 section DATA start: 284593 length 4007
-    Stream: column 2 section DATA start: 288600 length 8007
-    Stream: column 3 section DATA start: 296607 length 768
-    Stream: column 3 section LENGTH start: 297375 length 25
-    Stream: column 3 section DICTIONARY_DATA start: 297400 length 133
+  Stripe: offset: 283173 data: 12940 rows: 1000 tail: 90 index: 1468
+    Stream: column 0 section ROW_INDEX start: 283173 length 12
+    Stream: column 1 section ROW_INDEX start: 283185 length 38
+    Stream: column 2 section ROW_INDEX start: 283223 length 41
+    Stream: column 2 section BLOOM_FILTER start: 283264 length 1337
+    Stream: column 3 section ROW_INDEX start: 284601 length 40
+    Stream: column 1 section DATA start: 284641 length 4007
+    Stream: column 2 section DATA start: 288648 length 8007
+    Stream: column 3 section DATA start: 296655 length 768
+    Stream: column 3 section LENGTH start: 297423 length 25
+    Stream: column 3 section DICTIONARY_DATA start: 297448 length 133
     Encoding column 0: DIRECT
     Encoding column 1: DIRECT_V2
     Encoding column 2: DIRECT_V2
@@ -172,6 +172,6 @@ Stripes:
       Entry 0: numHashFunctions: 7 bitCount: 9600 popCount: 4948 loadFactor: 0.5154 expectedFpp: 0.00966294
       Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 4948 loadFactor: 0.5154 expectedFpp: 0.00966294
 
-File length: 298155 bytes
+File length: 298215 bytes
 Padding length: 0 bytes
 Padding ratio: 0%



Mime
View raw message