hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sp...@apache.org
Subject [1/4] hive git commit: Revert "HIVE-16231: Parquet timestamp may be stored differently since HIVE-12767 (Barna Zsombor Klara, reviewed by Sergio Pena)"
Date Wed, 17 May 2017 14:42:49 GMT
Repository: hive
Updated Branches:
  refs/heads/branch-2.3 6afc1bed5 -> 32fd02b7c


Revert "HIVE-16231: Parquet timestamp may be stored differently since HIVE-12767 (Barna Zsombor
Klara, reviewed by Sergio Pena)"

This reverts commit 8b866562b16a2b10880a4296fe133ef007a85c77.


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0907b411
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0907b411
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0907b411

Branch: refs/heads/branch-2.3
Commit: 0907b411e26bb94bbf6d0d6e10fb29973d2ed5f0
Parents: 6afc1be
Author: Sergio Pena <sergio.pena@cloudera.com>
Authored: Wed May 17 09:30:33 2017 -0500
Committer: Sergio Pena <sergio.pena@cloudera.com>
Committed: Wed May 17 09:30:33 2017 -0500

----------------------------------------------------------------------
 .../ql/io/parquet/MapredParquetOutputFormat.java     | 10 ++++++----
 .../hive/ql/io/parquet/ParquetRecordReaderBase.java  | 14 +++++++++-----
 .../hive/ql/io/parquet/timestamp/NanoTimeUtils.java  | 15 +--------------
 .../ql/io/parquet/timestamp/TestNanoTimeUtils.java   | 13 -------------
 .../queries/clientpositive/parquet_int96_timestamp.q |  2 +-
 5 files changed, 17 insertions(+), 37 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/0907b411/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetOutputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetOutputFormat.java
b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetOutputFormat.java
index a7bb5ee..26f1e75 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetOutputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetOutputFormat.java
@@ -21,7 +21,6 @@ import java.util.Properties;
 import java.util.TimeZone;
 
 import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetTableUtils;
-import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils;
 import org.apache.parquet.Strings;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -140,11 +139,14 @@ public class MapredParquetOutputFormat extends FileOutputFormat<NullWritable,
Pa
     String timeZoneID =
         tableProperties.getProperty(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY);
     if (!Strings.isNullOrEmpty(timeZoneID)) {
-
-      NanoTimeUtils.validateTimeZone(timeZoneID);
+      if (!Arrays.asList(TimeZone.getAvailableIDs()).contains(timeZoneID)) {
+        throw new IllegalStateException("Unexpected timezone id found for parquet int96 conversion:
" + timeZoneID);
+      }
       return TimeZone.getTimeZone(timeZoneID);
     }
 
-    return TimeZone.getDefault();
+    // If no timezone is defined in table properties, then adjust timestamps using
+    // PARQUET_INT96_NO_ADJUSTMENT_ZONE timezone
+    return TimeZone.getTimeZone(ParquetTableUtils.PARQUET_INT96_NO_ADJUSTMENT_ZONE);
   }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/0907b411/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ParquetRecordReaderBase.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ParquetRecordReaderBase.java
b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ParquetRecordReaderBase.java
index 2954601..8e33b7d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ParquetRecordReaderBase.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ParquetRecordReaderBase.java
@@ -20,7 +20,6 @@ import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.io.parquet.read.DataWritableReadSupport;
 import org.apache.hadoop.hive.ql.io.parquet.read.ParquetFilterPredicateConverter;
 import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetTableUtils;
-import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils;
 import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
 import org.apache.hadoop.hive.serde2.SerDeStats;
@@ -45,6 +44,7 @@ import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;
 import java.util.TimeZone;
 
@@ -170,7 +170,7 @@ public class ParquetRecordReaderBase {
     boolean skipConversion = HiveConf.getBoolVar(configuration,
         HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION);
     FileMetaData fileMetaData = parquetMetadata.getFileMetaData();
-    if (!Strings.nullToEmpty(fileMetaData.getCreatedBy()).startsWith("parquet-mr") &&
+    if (!Strings.nullToEmpty(fileMetaData.getCreatedBy()).startsWith("parquet-mr") ||
         skipConversion) {
       // Impala writes timestamp values using GMT only. We should not try to convert Impala
       // files to other type of timezones.
@@ -179,12 +179,16 @@ public class ParquetRecordReaderBase {
       // TABLE_PARQUET_INT96_TIMEZONE is a table property used to detect what timezone conversion
       // to use when reading Parquet timestamps.
       timeZoneID = configuration.get(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY,
-          TimeZone.getDefault().getID());
-      NanoTimeUtils.validateTimeZone(timeZoneID);
+          ParquetTableUtils.PARQUET_INT96_NO_ADJUSTMENT_ZONE);
+
+      if (!Arrays.asList(TimeZone.getAvailableIDs()).contains(timeZoneID)) {
+          throw new IllegalStateException("Unexpected timezone id found for parquet int96
conversion: " + timeZoneID);
+      }
     }
 
     // 'timeZoneID' should be valid, since we did not throw exception above
-    configuration.set(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY,timeZoneID);
+    configuration.set(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY,
+        TimeZone.getTimeZone(timeZoneID).getID());
   }
 
   public FilterCompat.Filter setFilter(final JobConf conf, MessageType schema) {

http://git-wip-us.apache.org/repos/asf/hive/blob/0907b411/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java
b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java
index dbd6fb3..5dc8088 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java
@@ -152,26 +152,13 @@ public class NanoTimeUtils {
 
     calendar.setTimeInMillis(utcCalendar.getTimeInMillis());
 
-    Calendar adjusterCalendar = copyToCalendarWithTZ(calendar, getLocalCalendar());
+    Calendar adjusterCalendar = copyToCalendarWithTZ(calendar, Calendar.getInstance());
 
     Timestamp ts = new Timestamp(adjusterCalendar.getTimeInMillis());
     ts.setNanos((int) nanos);
     return ts;
   }
 
-  /**
-   * Check if the string id is a valid java TimeZone id.
-   * TimeZone#getTimeZone will return "GMT" if the id cannot be understood.
-   * @param timeZoneID
-   */
-  public static void validateTimeZone(String timeZoneID) {
-    if (TimeZone.getTimeZone(timeZoneID).getID().equals("GMT")
-        && !"GMT".equals(timeZoneID)) {
-      throw new IllegalStateException(
-          "Unexpected timezone id found for parquet int96 conversion: " + timeZoneID);
-    }
-  }
-
   private static Calendar copyToCalendarWithTZ(Calendar from, Calendar to) {
     if(from.getTimeZone().getID().equals(to.getTimeZone().getID())) {
       return from;

http://git-wip-us.apache.org/repos/asf/hive/blob/0907b411/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/timestamp/TestNanoTimeUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/timestamp/TestNanoTimeUtils.java
b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/timestamp/TestNanoTimeUtils.java
index 1e10dbf..37cf0e2 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/timestamp/TestNanoTimeUtils.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/timestamp/TestNanoTimeUtils.java
@@ -230,17 +230,4 @@ public class TestNanoTimeUtils {
     Assert.assertEquals(newNTUTC.getJulianDay(), depNTUTC.getJulianDay());
     Assert.assertEquals(newNTUTC.getTimeOfDayNanos(), depNTUTC.getTimeOfDayNanos());
   }
-
-  @Test
-  public void testTimeZoneValidationWithCorrectZoneId() {
-    NanoTimeUtils.validateTimeZone("GMT");
-    NanoTimeUtils.validateTimeZone("UTC");
-    NanoTimeUtils.validateTimeZone("GMT+10");
-    NanoTimeUtils.validateTimeZone("Europe/Budapest");
-  }
-
-  @Test(expected = IllegalStateException.class)
-  public void testTimeZoneValidationWithIncorrectZoneId() {
-    NanoTimeUtils.validateTimeZone("UCC");
-  }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/0907b411/ql/src/test/queries/clientpositive/parquet_int96_timestamp.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/parquet_int96_timestamp.q b/ql/src/test/queries/clientpositive/parquet_int96_timestamp.q
index 6eadd1b..5de2c3f 100644
--- a/ql/src/test/queries/clientpositive/parquet_int96_timestamp.q
+++ b/ql/src/test/queries/clientpositive/parquet_int96_timestamp.q
@@ -2,7 +2,7 @@ create table dummy (id int);
 insert into table dummy values (1);
 
 set hive.parquet.mr.int96.enable.utc.write.zone=true;
-set hive.parquet.timestamp.skip.conversion=true;
+set hive.parquet.timestamp.skip.conversion=false;
 
 -- read/write timestamps using UTC as default write zone
 create table timestamps (ts timestamp) stored as parquet;


Mime
View raw message