hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From omal...@apache.org
Subject hive git commit: HIVE-10407. Separate out timestamp ranges for ORC test cases. (omalley reviewed by prasanthj)
Date Thu, 23 Apr 2015 17:35:01 GMT
Repository: hive
Updated Branches:
  refs/heads/master 5a576b6fb -> 44c16a2a8


HIVE-10407. Separate out timestamp ranges for ORC test cases. (omalley reviewed
by prasanthj)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/44c16a2a
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/44c16a2a
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/44c16a2a

Branch: refs/heads/master
Commit: 44c16a2a8c43a259b7f977cc1ca43d176730cd94
Parents: 5a576b6
Author: Owen O'Malley <omalley@apache.org>
Authored: Thu Apr 23 10:33:47 2015 -0700
Committer: Owen O'Malley <omalley@apache.org>
Committed: Thu Apr 23 10:33:47 2015 -0700

----------------------------------------------------------------------
 .../hadoop/hive/ql/io/orc/TestOrcFile.java      | 95 ++++++++++++++++----
 .../hadoop/hive/ql/io/orc/TestOrcTimezone1.java |  2 +-
 .../hadoop/hive/ql/io/orc/TestOrcTimezone2.java |  2 +-
 3 files changed, 82 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/44c16a2a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
index 8577f41..255565e 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
@@ -27,6 +27,7 @@ import java.io.File;
 import java.io.IOException;
 import java.math.BigInteger;
 import java.nio.ByteBuffer;
+import java.sql.Date;
 import java.sql.Timestamp;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -46,6 +47,7 @@ import org.apache.hadoop.hive.ql.io.orc.OrcFile.Version;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
 import org.apache.hadoop.hive.serde2.io.ByteWritable;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
 import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
 import org.apache.hadoop.hive.serde2.io.ShortWritable;
@@ -489,7 +491,7 @@ public class TestOrcFile {
         OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000).bufferSize(10000)
             .version(OrcFile.Version.V_0_11));
     List<Timestamp> tslist = Lists.newArrayList();
-    tslist.add(Timestamp.valueOf("9999-01-01 00:00:00.000999"));
+    tslist.add(Timestamp.valueOf("2037-01-01 00:00:00.000999"));
     tslist.add(Timestamp.valueOf("2003-01-01 00:00:00.000000222"));
     tslist.add(Timestamp.valueOf("1999-01-01 00:00:00.999999999"));
     tslist.add(Timestamp.valueOf("1995-01-01 00:00:00.688888888"));
@@ -689,7 +691,7 @@ public class TestOrcFile {
     assertEquals("two", ((StringColumnStatistics)ss2.getColumnStatistics()[2]).getMinimum());
     assertEquals("three", ((StringColumnStatistics)ss3.getColumnStatistics()[2]).getMinimum());
     assertEquals("one", ((StringColumnStatistics)ss1.getColumnStatistics()[2]).getMaximum());
-    assertEquals("two", ((StringColumnStatistics)ss2.getColumnStatistics()[2]).getMaximum());
+    assertEquals("two", ((StringColumnStatistics) ss2.getColumnStatistics()[2]).getMaximum());
     assertEquals("three", ((StringColumnStatistics)ss3.getColumnStatistics()[2]).getMaximum());
     assertEquals(15000, ((StringColumnStatistics)ss1.getColumnStatistics()[2]).getSum());
     assertEquals(15000, ((StringColumnStatistics)ss2.getColumnStatistics()[2]).getSum());
@@ -710,7 +712,7 @@ public class TestOrcFile {
     assertEquals(3, index.length);
     items = index[1].getEntryList();
     assertEquals(2,
-                 items.get(0).getStatistics().getIntStatistics().getMaximum());
+        items.get(0).getStatistics().getIntStatistics().getMaximum());
   }
 
   @Test
@@ -1112,8 +1114,8 @@ public class TestOrcFile {
                                          .bufferSize(100));
     writer.addUserMetadata("my.meta", byteBuf(1, 2, 3, 4, 5, 6, 7, -1, -2, 127,
                                               -128));
-    writer.addUserMetadata("clobber", byteBuf(1,2,3));
-    writer.addUserMetadata("clobber", byteBuf(4,3,2,1));
+    writer.addUserMetadata("clobber", byteBuf(1, 2, 3));
+    writer.addUserMetadata("clobber", byteBuf(4, 3, 2, 1));
     ByteBuffer bigBuf = ByteBuffer.allocate(40000);
     Random random = new Random(0);
     random.nextBytes(bigBuf.array());
@@ -1153,10 +1155,71 @@ public class TestOrcFile {
   }
 
   /**
-   * We test union, timestamp, and decimal separately since we need to make the
-   * object inspector manually. (The Hive reflection-based doesn't handle
-   * them properly.)
+   * Generate an ORC file with a range of dates and times.
    */
+  public void createOrcDateFile(Path file, int minYear, int maxYear
+                                ) throws IOException {
+    List<OrcProto.Type> types = new ArrayList<OrcProto.Type>();
+    types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.STRUCT).
+        addFieldNames("time").addFieldNames("date").
+        addSubtypes(1).addSubtypes(2).build());
+    types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.TIMESTAMP).
+        build());
+    types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.DATE).
+        build());
+
+    ObjectInspector inspector;
+    synchronized (TestOrcFile.class) {
+      inspector = OrcStruct.createObjectInspector(0, types);
+    }
+    Writer writer = OrcFile.createWriter(file,
+        OrcFile.writerOptions(conf)
+            .inspector(inspector)
+            .stripeSize(100000)
+            .bufferSize(10000)
+            .blockPadding(false));
+    OrcStruct row = new OrcStruct(2);
+    for (int year = minYear; year < maxYear; ++year) {
+      for (int ms = 1000; ms < 2000; ++ms) {
+        row.setFieldValue(0,
+            new TimestampWritable(Timestamp.valueOf(year + "-05-05 12:34:56."
+                + ms)));
+        row.setFieldValue(1,
+            new DateWritable(new Date(year - 1900, 11, 25)));
+        writer.addRow(row);
+      }
+    }
+    writer.close();
+    Reader reader = OrcFile.createReader(file,
+        OrcFile.readerOptions(conf));
+    RecordReader rows = reader.rows();
+    for (int year = minYear; year < maxYear; ++year) {
+      for(int ms = 1000; ms < 2000; ++ms) {
+        row = (OrcStruct) rows.next(row);
+        assertEquals(new TimestampWritable
+                (Timestamp.valueOf(year + "-05-05 12:34:56." + ms)),
+            row.getFieldValue(0));
+        assertEquals(new DateWritable(new Date(year - 1900, 11, 25)),
+            row.getFieldValue(1));
+      }
+    }
+  }
+
+  @Test
+  public void testDate1900() throws Exception {
+    createOrcDateFile(testFilePath, 1900, 1970);
+  }
+
+  @Test
+  public void testDate2038() throws Exception {
+    createOrcDateFile(testFilePath, 2038, 2250);
+  }
+
+  /**
+     * We test union, timestamp, and decimal separately since we need to make the
+     * object inspector manually. (The Hive reflection-based doesn't handle
+     * them properly.)
+     */
   @Test
   public void testUnionAndTimestamp() throws Exception {
     List<OrcProto.Type> types = new ArrayList<OrcProto.Type>();
@@ -1209,13 +1272,15 @@ public class TestOrcFile {
     union.set((byte) 1, null);
     writer.addRow(row);
     union.set((byte) 0, new IntWritable(200000));
-    row.setFieldValue(0, new TimestampWritable(Timestamp.valueOf("1900-01-01 00:00:00")));
+    row.setFieldValue(0, new TimestampWritable
+        (Timestamp.valueOf("1970-01-01 00:00:00")));
     value = HiveDecimal.create("10000000000000000000");
     row.setFieldValue(2, new HiveDecimalWritable(value));
     writer.addRow(row);
     Random rand = new Random(42);
-    for(int i=1900; i < 2200; ++i) {
-      row.setFieldValue(0, new TimestampWritable(Timestamp.valueOf(i + "-05-05 12:34:56."
+ i)));
+    for(int i=1970; i < 2038; ++i) {
+      row.setFieldValue(0, new TimestampWritable(Timestamp.valueOf(i +
+          "-05-05 12:34:56." + i)));
       if ((i & 1) == 0) {
         union.set((byte) 0, new IntWritable(i*i));
       } else {
@@ -1260,10 +1325,10 @@ public class TestOrcFile {
     assertEquals(true, Arrays.equals(expected, included));
 
     assertEquals(false, reader.getMetadataKeys().iterator().hasNext());
-    assertEquals(5309, reader.getNumberOfRows());
+    assertEquals(5077, reader.getNumberOfRows());
     DecimalColumnStatistics stats =
         (DecimalColumnStatistics) reader.getStatistics()[5];
-    assertEquals(303, stats.getNumberOfValues());
+    assertEquals(71, stats.getNumberOfValues());
     assertEquals(HiveDecimal.create("-5643.234"), stats.getMinimum());
     assertEquals(maxValue, stats.getMaximum());
     // TODO: fix this
@@ -1324,13 +1389,13 @@ public class TestOrcFile {
     assertEquals(null, union.getObject());
     assertEquals(null, row.getFieldValue(2));
     row = (OrcStruct) rows.next(row);
-    assertEquals(new TimestampWritable(Timestamp.valueOf("1900-01-01 00:00:00")),
+    assertEquals(new TimestampWritable(Timestamp.valueOf("1970-01-01 00:00:00")),
         row.getFieldValue(0));
     assertEquals(new IntWritable(200000), union.getObject());
     assertEquals(new HiveDecimalWritable(HiveDecimal.create("10000000000000000000")),
                  row.getFieldValue(2));
     rand = new Random(42);
-    for(int i=1900; i < 2200; ++i) {
+    for(int i=1970; i < 2038; ++i) {
       row = (OrcStruct) rows.next(row);
       assertEquals(new TimestampWritable(Timestamp.valueOf(i + "-05-05 12:34:56." + i)),
           row.getFieldValue(0));

http://git-wip-us.apache.org/repos/asf/hive/blob/44c16a2a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcTimezone1.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcTimezone1.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcTimezone1.java
index 91e9c8f..526c357 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcTimezone1.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcTimezone1.java
@@ -136,7 +136,7 @@ public class TestOrcTimezone1 {
     ts.add("2003-01-01 08:00:00.800000007");
     ts.add("1998-11-02 10:00:00.857340643");
     ts.add("2008-10-02 11:00:00.0");
-    ts.add("9999-01-01 00:00:00.000999");
+    ts.add("2037-01-01 00:00:00.000999");
     ts.add("2014-03-28 00:00:00.0");
     for (String t : ts) {
       writer.addRow(Timestamp.valueOf(t));

http://git-wip-us.apache.org/repos/asf/hive/blob/44c16a2a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcTimezone2.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcTimezone2.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcTimezone2.java
index 751e231..3eae4a9 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcTimezone2.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcTimezone2.java
@@ -120,7 +120,7 @@ public class TestOrcTimezone2 {
     ts.add("1996-08-02 09:00:00.723100809");
     ts.add("1998-11-02 10:00:00.857340643");
     ts.add("2008-10-02 11:00:00.0");
-    ts.add("9999-01-01 00:00:00.000999");
+    ts.add("2037-01-01 00:00:00.000999");
     for (String t : ts) {
       writer.addRow(Timestamp.valueOf(t));
     }


Mime
View raw message