hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ser...@apache.org
Subject [2/4] hive git commit: HIVE-13948 : Incorrect timezone handling in Writable results in wrong dates in queries (Sergey Shelukhin, reviewed by Jason Dere)
Date Tue, 07 Jun 2016 19:40:28 GMT
HIVE-13948 : Incorrect timezone handling in Writable results in wrong dates in queries (Sergey
Shelukhin, reviewed by Jason Dere)

Conflicts:
	common/src/java/org/apache/hive/common/util/DateParser.java


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/89574b36
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/89574b36
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/89574b36

Branch: refs/heads/branch-2.1
Commit: 89574b3627248afbb6acdcf198c3aa9b036ca7ee
Parents: 28f2068
Author: Sergey Shelukhin <sershe@apache.org>
Authored: Tue Jun 7 12:01:38 2016 -0700
Committer: Sergey Shelukhin <sershe@apache.org>
Committed: Tue Jun 7 12:31:24 2016 -0700

----------------------------------------------------------------------
 .../hive/ql/parse/BaseSemanticAnalyzer.java     |  2 +-
 .../hadoop/hive/ql/udf/UDFDayOfMonth.java       |  2 +-
 .../org/apache/hadoop/hive/ql/udf/UDFMonth.java |  2 +-
 .../hadoop/hive/ql/udf/UDFWeekOfYear.java       |  2 +-
 .../org/apache/hadoop/hive/ql/udf/UDFYear.java  |  2 +-
 .../hadoop/hive/serde2/io/TestDateWritable.java | 68 +++++++++++++-------
 .../apache/hive/service/cli/ColumnValue.java    |  7 --
 .../hadoop/hive/serde2/io/DateWritable.java     | 68 ++++++++++++++++----
 8 files changed, 105 insertions(+), 48 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/89574b36/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
index 4a9db9e..db7aeef 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
@@ -1580,7 +1580,7 @@ public abstract class BaseSemanticAnalyzer {
       Object colValue, String originalColSpec) throws SemanticException {
     Date value;
     if (colValue instanceof DateWritable) {
-      value = ((DateWritable) colValue).get();
+      value = ((DateWritable) colValue).get(false); // Time doesn't matter.
     } else if (colValue instanceof Date) {
       value = (Date) colValue;
     } else {

http://git-wip-us.apache.org/repos/asf/hive/blob/89574b36/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDayOfMonth.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDayOfMonth.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDayOfMonth.java
index 6100cae..a578b0d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDayOfMonth.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDayOfMonth.java
@@ -90,7 +90,7 @@ public class UDFDayOfMonth extends UDF {
       return null;
     }
 
-    calendar.setTime(d.get());
+    calendar.setTime(d.get(false)); // Time doesn't matter.
     result.set(calendar.get(Calendar.DAY_OF_MONTH));
     return result;
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/89574b36/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java
index 8c2b0e4..759d2b0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java
@@ -88,7 +88,7 @@ public class UDFMonth extends UDF {
       return null;
     }
 
-    calendar.setTime(d.get());
+    calendar.setTime(d.get(false));  // Time doesn't matter.
     result.set(1 + calendar.get(Calendar.MONTH));
     return result;
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/89574b36/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFWeekOfYear.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFWeekOfYear.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFWeekOfYear.java
index e03c049..7deb61b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFWeekOfYear.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFWeekOfYear.java
@@ -87,7 +87,7 @@ public class UDFWeekOfYear extends UDF {
       return null;
     }
 
-    calendar.setTime(d.get());
+    calendar.setTime(d.get(false));  // Time doesn't matter.
     result.set(calendar.get(Calendar.WEEK_OF_YEAR));
     return result;
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/89574b36/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java
index d7ecd8c..d1af121 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java
@@ -90,7 +90,7 @@ public class UDFYear extends UDF {
       return null;
     }
 
-    calendar.setTime(d.get());
+    calendar.setTime(d.get(false));  // Time doesn't matter.
     result.set(calendar.get(Calendar.YEAR));
     return result;
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/89574b36/serde/src/test/org/apache/hadoop/hive/serde2/io/TestDateWritable.java
----------------------------------------------------------------------
diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/io/TestDateWritable.java b/serde/src/test/org/apache/hadoop/hive/serde2/io/TestDateWritable.java
index fd95ccf..97eb967 100644
--- a/serde/src/test/org/apache/hadoop/hive/serde2/io/TestDateWritable.java
+++ b/serde/src/test/org/apache/hadoop/hive/serde2/io/TestDateWritable.java
@@ -21,6 +21,8 @@ package org.apache.hadoop.hive.serde2.io;
 import com.google.code.tempusfugit.concurrency.annotations.*;
 import com.google.code.tempusfugit.concurrency.*;
 import org.junit.*;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import static org.junit.Assert.*;
 import java.io.*;
@@ -28,6 +30,8 @@ import java.sql.Date;
 import java.text.DateFormat;
 import java.text.SimpleDateFormat;
 import java.util.Calendar;
+import java.util.GregorianCalendar;
+import java.util.LinkedList;
 import java.util.TimeZone;
 import java.util.concurrent.Callable;
 import java.util.concurrent.ExecutionException;
@@ -36,6 +40,7 @@ import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
 
 public class TestDateWritable {
+  private static final Logger LOG = LoggerFactory.getLogger(TestDateWritable.class);
 
   @Rule public ConcurrentRule concurrentRule = new ConcurrentRule();
   @Rule public RepeatingRule repeatingRule = new RepeatingRule();
@@ -160,24 +165,34 @@ public class TestDateWritable {
     return dateStrings[(int) (Math.random() * 365)];
   }
 
-  public static class DateTestCallable implements Callable<String> {
-    public DateTestCallable() {
+  public static class DateTestCallable implements Callable<Void> {
+    private LinkedList<DtMismatch> bad;
+    private String tz;
+
+    public DateTestCallable(LinkedList<DtMismatch> bad, String tz) {
+      this.bad = bad;
+      this.tz = tz;
     }
 
     @Override
-    public String call() throws Exception {
+    public Void call() throws Exception {
+      SimpleDateFormat sdf = new SimpleDateFormat("YYYY-MM-dd HH:mm:ss");
       // Iterate through each day of the year, make sure Date/DateWritable match
-      Date originalDate = Date.valueOf("2014-01-01");
+      Date originalDate = Date.valueOf("1900-01-01");
       Calendar cal = Calendar.getInstance();
       cal.setTimeInMillis(originalDate.getTime());
-      for (int idx = 0; idx < 365; ++idx) {
+      for (int idx = 0; idx < 365*200; ++idx) {
         originalDate = new Date(cal.getTimeInMillis());
         // Make sure originalDate is at midnight in the local time zone,
         // since DateWritable will generate dates at that time.
         originalDate = Date.valueOf(originalDate.toString());
         DateWritable dateWritable = new DateWritable(originalDate);
-        if (!originalDate.equals(dateWritable.get())) {
-          return originalDate.toString();
+        Date actual = dateWritable.get(false);
+        if (!originalDate.equals(actual)) {
+          String originalStr = sdf.format(originalDate);
+          String actualStr = sdf.format(actual);
+          if (originalStr.substring(0, 10).equals(actualStr.substring(0, 10))) continue;
+          bad.add(new DtMismatch(originalStr, actualStr, tz));
         }
         cal.add(Calendar.DAY_OF_YEAR, 1);
       }
@@ -186,34 +201,37 @@ public class TestDateWritable {
     }
   }
 
+  private static class DtMismatch {
+    String expected, found, tz;
+    public DtMismatch(String originalStr, String actualStr, String tz) {
+      this.expected = originalStr;
+      this.found = actualStr;
+      this.tz = tz;
+    }
+  }
+
   @Test
-  public void testDaylightSavingsTime() throws InterruptedException, ExecutionException {
-    String[] timeZones = {
-        "GMT",
-        "UTC",
-        "America/Godthab",
-        "America/Los_Angeles",
-        "Asia/Jerusalem",
-        "Australia/Melbourne",
-        "Europe/London",
-        // time zones with half hour boundaries
-        "America/St_Johns",
-        "Asia/Tehran",
-    };
-
-    for (String timeZone: timeZones) {
+  public void testDaylightSavingsTime() throws Exception {
+    LinkedList<DtMismatch> bad = new LinkedList<>();
+
+    for (String timeZone: TimeZone.getAvailableIDs()) {
       TimeZone previousDefault = TimeZone.getDefault();
       TimeZone.setDefault(TimeZone.getTimeZone(timeZone));
       assertEquals("Default timezone should now be " + timeZone,
           timeZone, TimeZone.getDefault().getID());
       ExecutorService threadPool = Executors.newFixedThreadPool(1);
       try {
-        Future<String> future = threadPool.submit(new DateTestCallable());
-        String result = future.get();
-        assertNull("Failed at timezone " + timeZone + ", date " + result, result);
+        // TODO: pointless
+        threadPool.submit(new DateTestCallable(bad, timeZone)).get();
       } finally {
         threadPool.shutdown(); TimeZone.setDefault(previousDefault);
       }
     }
+    StringBuilder errors = new StringBuilder("\nDATE MISMATCH:\n");
+    for (DtMismatch dm : bad) {
+      errors.append("E ").append(dm.tz).append(": ").append(dm.expected).append(" != ").append(dm.found).append("\n");
+    }
+    LOG.error(errors.toString());
+    if (!bad.isEmpty()) throw new Exception(bad.size() + " mismatches, see logs");
   }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/89574b36/service/src/java/org/apache/hive/service/cli/ColumnValue.java
----------------------------------------------------------------------
diff --git a/service/src/java/org/apache/hive/service/cli/ColumnValue.java b/service/src/java/org/apache/hive/service/cli/ColumnValue.java
index 28149e1..76e8c03 100644
--- a/service/src/java/org/apache/hive/service/cli/ColumnValue.java
+++ b/service/src/java/org/apache/hive/service/cli/ColumnValue.java
@@ -262,13 +262,6 @@ public class ColumnValue {
     return null;
   }
 
-  private static Date getDateValue(TStringValue tStringValue) {
-    if (tStringValue.isSetValue()) {
-      return Date.valueOf(tStringValue.getValue());
-    }
-    return null;
-  }
-
   private static Timestamp getTimestampValue(TStringValue tStringValue) {
     if (tStringValue.isSetValue()) {
       return Timestamp.valueOf(tStringValue.getValue());

http://git-wip-us.apache.org/repos/asf/hive/blob/89574b36/storage-api/src/java/org/apache/hadoop/hive/serde2/io/DateWritable.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/serde2/io/DateWritable.java b/storage-api/src/java/org/apache/hadoop/hive/serde2/io/DateWritable.java
index dd2b1d9..637720a 100644
--- a/storage-api/src/java/org/apache/hadoop/hive/serde2/io/DateWritable.java
+++ b/storage-api/src/java/org/apache/hadoop/hive/serde2/io/DateWritable.java
@@ -22,6 +22,7 @@ import java.io.DataOutput;
 import java.io.IOException;
 import java.sql.Date;
 import java.util.Calendar;
+import java.util.GregorianCalendar;
 import java.util.TimeZone;
 import java.util.concurrent.TimeUnit;
 
@@ -41,7 +42,7 @@ public class DateWritable implements WritableComparable<DateWritable>
{
 
   private static final long MILLIS_PER_DAY = TimeUnit.DAYS.toMillis(1);
 
-  // Local time zone.
+  // Local time zone. Store separately because Calendar would clone it.
   // Java TimeZone has no mention of thread safety. Use thread local instance to be safe.
   private static final ThreadLocal<TimeZone> LOCAL_TIMEZONE = new ThreadLocal<TimeZone>()
{
     @Override
@@ -50,6 +51,19 @@ public class DateWritable implements WritableComparable<DateWritable>
{
     }
   };
 
+  private static final ThreadLocal<Calendar> UTC_CALENDAR = new ThreadLocal<Calendar>()
{
+    @Override
+    protected Calendar initialValue() {
+      return new GregorianCalendar(TimeZone.getTimeZone("UTC"));
+    }
+  };
+  private static final ThreadLocal<Calendar> LOCAL_CALENDAR = new ThreadLocal<Calendar>()
{
+    @Override
+    protected Calendar initialValue() {
+      return Calendar.getInstance();
+    }
+  };
+
   // Internal representation is an integer representing day offset from our epoch value 1970-01-01
   private int daysSinceEpoch = 0;
 
@@ -95,11 +109,16 @@ public class DateWritable implements WritableComparable<DateWritable>
{
   }
 
   /**
-   *
    * @return Date value corresponding to the date in the local time zone
    */
   public Date get() {
-    return new Date(daysToMillis(daysSinceEpoch));
+    return get(true);
+  }
+
+  // TODO: we should call this more often. In theory, for DATE type, time should never matter,
but
+  //       it's hard to tell w/some code paths like UDFs/OIs etc. that are used in many places.
+  public Date get(boolean doesTimeMatter) {
+    return new Date(daysToMillis(daysSinceEpoch, doesTimeMatter));
   }
 
   public int getDays() {
@@ -119,21 +138,47 @@ public class DateWritable implements WritableComparable<DateWritable>
{
   }
 
   public static long daysToMillis(int d) {
-    // Convert from day offset to ms in UTC, then apply local timezone offset.
-    long millisUtc = d * MILLIS_PER_DAY;
-    long tmp =  millisUtc - LOCAL_TIMEZONE.get().getOffset(millisUtc);
-    // Between millisUtc and tmp, the time zone offset may have changed due to DST.
-    // Look up the offset again.
-    return millisUtc - LOCAL_TIMEZONE.get().getOffset(tmp);
+    return daysToMillis(d, true);
+  }
+
+  public static long daysToMillis(int d, boolean doesTimeMatter) {
+    // What we are trying to get is the equivalent of new Date(ymd).getTime() in the local
tz,
+    // where ymd is whatever d represents. How it "works" is this.
+    // First we get the UTC midnight for that day (which always exists, a small island of
sanity).
+    long utcMidnight = d * MILLIS_PER_DAY;
+    // Now we take a local TZ offset at midnight UTC. Say we are in -4; that means (surprise
+    // surprise) that at midnight UTC it was 20:00 in local. So far we are on firm ground.
+    long utcMidnightOffset = LOCAL_TIMEZONE.get().getOffset(utcMidnight);
+    // And now we wander straight into the swamp, when instead of adding, we subtract it
from UTC
+    // midnight to supposedly get local midnight (in the above case, 4:00 UTC). Of course,
given
+    // all the insane DST variations, where we actually end up is anyone's guess.
+    long hopefullyMidnight = utcMidnight - utcMidnightOffset;
+    // Then we determine the local TZ offset at that magical time.
+    long offsetAtHM = LOCAL_TIMEZONE.get().getOffset(hopefullyMidnight);
+    // If the offsets are the same, we assume our initial jump did not cross any DST boundaries,
+    // and is thus valid. Both times flowed at the same pace. We congratulate ourselves and
bail.
+    if (utcMidnightOffset == offsetAtHM) return hopefullyMidnight;
+    // Alas, we crossed some DST boundary. If the time of day doesn't matter to the caller,
we'll
+    // simply get the next day and go back half a day. This is not ideal but seems to work.
+    if (!doesTimeMatter) return daysToMillis(d + 1) - (MILLIS_PER_DAY >> 1);
+    // Now, we could get previous and next day, figure our how many hours were inserted or
removed,
+    // and from which of the days, etc. But at this point our gun is pointing straight at
our foot,
+    // so let's just go the safe, expensive way.
+    Calendar utc = UTC_CALENDAR.get(), local = LOCAL_CALENDAR.get();
+    utc.setTimeInMillis(utcMidnight);
+    local.set(utc.get(Calendar.YEAR), utc.get(Calendar.MONTH), utc.get(Calendar.DAY_OF_MONTH));
+    return local.getTimeInMillis();
   }
 
   public static int millisToDays(long millisLocal) {
+    // We assume millisLocal is midnight of some date. What we are basically trying to do
+    // here is go from local-midnight to UTC-midnight (or whatever time that happens to be).
     long millisUtc = millisLocal + LOCAL_TIMEZONE.get().getOffset(millisLocal);
     int days;
     if (millisUtc >= 0L) {
       days = (int) (millisUtc / MILLIS_PER_DAY);
     } else {
-      days = (int) ((millisUtc - 86399999) / MILLIS_PER_DAY);
+      days = (int) ((millisUtc - 86399999 /*(MILLIS_PER_DAY - 1)*/) / MILLIS_PER_DAY);
     }
     return days;
   }
@@ -169,7 +214,8 @@ public class DateWritable implements WritableComparable<DateWritable>
{
 
   @Override
   public String toString() {
-    return get().toString();
+    // For toString, the time does not matter
+    return get(false).toString();
   }
 
   @Override


Mime
View raw message