hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hashut...@apache.org
Subject svn commit: r1519868 - in /hive/trunk/ql/src/test: org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java resources/orc-file-11-format.orc
Date Tue, 03 Sep 2013 22:11:23 GMT
Author: hashutosh
Date: Tue Sep  3 22:11:23 2013
New Revision: 1519868

URL: http://svn.apache.org/r1519868
Log:
HIVE-5049 : Create an ORC test case that has a 0.11 ORC file (Prasanth J via Ashutosh Chauhan)

Added:
    hive/trunk/ql/src/test/resources/orc-file-11-format.orc   (with props)
Modified:
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java?rev=1519868&r1=1519867&r2=1519868&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java Tue Sep  3 22:11:23
2013
@@ -38,10 +38,12 @@ import org.apache.hadoop.hive.serde2.obj
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
 import org.apache.hadoop.io.BooleanWritable;
 import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.FloatWritable;
@@ -130,6 +132,48 @@ public class TestOrcFile {
     }
   }
 
+  public static class AllTypesRow {
+    Boolean boolean1;
+    Byte byte1;
+    Short short1;
+    Integer int1;
+    Long long1;
+    Float float1;
+    Double double1;
+    BytesWritable bytes1;
+    Text string1;
+    MiddleStruct middle;
+    List<InnerStruct> list = new ArrayList<InnerStruct>();
+    Map<Text, InnerStruct> map = new HashMap<Text, InnerStruct>();
+    Timestamp ts;
+    HiveDecimal decimal1;
+
+    AllTypesRow(Boolean b1, Byte b2, Short s1, Integer i1, Long l1, Float f1,
+           Double d1,
+           BytesWritable b3, String s2, MiddleStruct m1,
+           List<InnerStruct> l2, Map<Text, InnerStruct> m2,
+           Timestamp ts1, HiveDecimal decimal) {
+      this.boolean1 = b1;
+      this.byte1 = b2;
+      this.short1 = s1;
+      this.int1 = i1;
+      this.long1 = l1;
+      this.float1 = f1;
+      this.double1 = d1;
+      this.bytes1 = b3;
+      if (s2 == null) {
+        this.string1 = null;
+      } else {
+        this.string1 = new Text(s2);
+      }
+      this.middle = m1;
+      this.list = l2;
+      this.map = m2;
+      this.ts = ts1;
+      this.decimal1 = decimal;
+    }
+  }
+
   private static InnerStruct inner(int i, String s) {
     return new InnerStruct(i, s);
   }
@@ -187,6 +231,290 @@ public class TestOrcFile {
   }
 
   @Test
+  public void testWriteFormat_0_11() throws Exception {
+    ObjectInspector inspector;
+    synchronized (TestOrcFile.class) {
+      inspector = ObjectInspectorFactory
+          .getReflectionObjectInspector(AllTypesRow.class,
+              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+    }
+    conf.set("hive.exec.orc.write.format", "0.11");
+    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
+        100000, CompressionKind.NONE, 10000, 10000);
+    for(int i = 0; i < 7500; i++) {
+      if (i % 2 == 0) {
+        writer.addRow(new AllTypesRow(false, (byte) 1, (short) 1024, 65536,
+            Long.MAX_VALUE, (float) 1.0, -15.0, bytes(0, 1, 2, 3, 4), "hi",
+            new MiddleStruct(inner(1, "bye"), inner(2, "sigh")), list(
+                inner(3, "good"), inner(4, "bad")), map(), Timestamp
+                .valueOf("2000-03-12 15:00:00"), new HiveDecimal(
+                "12345678.6547456")));
+      } else {
+        writer.addRow(new AllTypesRow(true, (byte) 100, (short) 2048, 65536,
+            Long.MAX_VALUE, (float) 2.0, -5.0, bytes(), "bye",
+            new MiddleStruct(inner(1, "bye"), inner(2, "sigh")), list(
+                inner(100000000, "cat"), inner(-100000, "in"),
+                inner(1234, "hat")),
+            map(inner(5, "chani"), inner(1, "mauddib")), Timestamp
+                .valueOf("2000-03-12 15:00:01"), new HiveDecimal(
+                "12345678.6547457")));
+      }
+    }
+    writer.close();
+  }
+
+  @Test
+  public void testReadFormat_0_11() throws Exception {
+    Path resourceDir = new Path(System.getProperty("test.build.resources", "ql"
+        + File.separator + "src" + File.separator + "test" + File.separator
+        + "resources"));
+    Path oldFilePath = new Path(resourceDir, "orc-file-11-format.orc");
+    Reader reader = OrcFile.createReader(fs, oldFilePath);
+
+    int stripeCount = 0;
+    int rowCount = 0;
+    long currentOffset = -1;
+    for(StripeInformation stripe : reader.getStripes()) {
+      stripeCount += 1;
+      rowCount += stripe.getNumberOfRows();
+      if (currentOffset < 0) {
+        currentOffset = stripe.getOffset() + stripe.getIndexLength()
+            + stripe.getDataLength() + stripe.getFooterLength();
+      } else {
+        assertEquals(currentOffset, stripe.getOffset());
+        currentOffset += stripe.getIndexLength() + stripe.getDataLength()
+            + stripe.getFooterLength();
+      }
+    }
+    assertEquals(reader.getNumberOfRows(), rowCount);
+    assertEquals(2, stripeCount);
+
+    // check the stats
+    ColumnStatistics[] stats = reader.getStatistics();
+    assertEquals(7500, stats[1].getNumberOfValues());
+    assertEquals(3750, ((BooleanColumnStatistics) stats[1]).getFalseCount());
+    assertEquals(3750, ((BooleanColumnStatistics) stats[1]).getTrueCount());
+    assertEquals("count: 7500 true: 3750", stats[1].toString());
+
+    assertEquals(2048, ((IntegerColumnStatistics) stats[3]).getMaximum());
+    assertEquals(1024, ((IntegerColumnStatistics) stats[3]).getMinimum());
+    assertEquals(true, ((IntegerColumnStatistics) stats[3]).isSumDefined());
+    assertEquals(11520000, ((IntegerColumnStatistics) stats[3]).getSum());
+    assertEquals("count: 7500 min: 1024 max: 2048 sum: 11520000",
+        stats[3].toString());
+
+    assertEquals(Long.MAX_VALUE,
+        ((IntegerColumnStatistics) stats[5]).getMaximum());
+    assertEquals(Long.MAX_VALUE,
+        ((IntegerColumnStatistics) stats[5]).getMinimum());
+    assertEquals(false, ((IntegerColumnStatistics) stats[5]).isSumDefined());
+    assertEquals(
+        "count: 7500 min: 9223372036854775807 max: 9223372036854775807",
+        stats[5].toString());
+
+    assertEquals(-15.0, ((DoubleColumnStatistics) stats[7]).getMinimum());
+    assertEquals(-5.0, ((DoubleColumnStatistics) stats[7]).getMaximum());
+    assertEquals(-75000.0, ((DoubleColumnStatistics) stats[7]).getSum(),
+        0.00001);
+    assertEquals("count: 7500 min: -15.0 max: -5.0 sum: -75000.0",
+        stats[7].toString());
+
+    assertEquals("count: 7500 min: bye max: hi", stats[9].toString());
+
+    // check the inspectors
+    StructObjectInspector readerInspector = (StructObjectInspector) reader
+        .getObjectInspector();
+    assertEquals(ObjectInspector.Category.STRUCT, readerInspector.getCategory());
+    assertEquals("struct<boolean1:boolean,byte1:tinyint,short1:smallint,"
+        + "int1:int,long1:bigint,float1:float,double1:double,bytes1:"
+        + "binary,string1:string,middle:struct<list:array<struct<int1:int,"
+        + "string1:string>>>,list:array<struct<int1:int,string1:string>>,"
+        + "map:map<string,struct<int1:int,string1:string>>,ts:timestamp,"
+        + "decimal1:decimal>", readerInspector.getTypeName());
+    List<? extends StructField> fields = readerInspector
+        .getAllStructFieldRefs();
+    BooleanObjectInspector bo = (BooleanObjectInspector) readerInspector
+        .getStructFieldRef("boolean1").getFieldObjectInspector();
+    ByteObjectInspector by = (ByteObjectInspector) readerInspector
+        .getStructFieldRef("byte1").getFieldObjectInspector();
+    ShortObjectInspector sh = (ShortObjectInspector) readerInspector
+        .getStructFieldRef("short1").getFieldObjectInspector();
+    IntObjectInspector in = (IntObjectInspector) readerInspector
+        .getStructFieldRef("int1").getFieldObjectInspector();
+    LongObjectInspector lo = (LongObjectInspector) readerInspector
+        .getStructFieldRef("long1").getFieldObjectInspector();
+    FloatObjectInspector fl = (FloatObjectInspector) readerInspector
+        .getStructFieldRef("float1").getFieldObjectInspector();
+    DoubleObjectInspector dbl = (DoubleObjectInspector) readerInspector
+        .getStructFieldRef("double1").getFieldObjectInspector();
+    BinaryObjectInspector bi = (BinaryObjectInspector) readerInspector
+        .getStructFieldRef("bytes1").getFieldObjectInspector();
+    StringObjectInspector st = (StringObjectInspector) readerInspector
+        .getStructFieldRef("string1").getFieldObjectInspector();
+    StructObjectInspector mid = (StructObjectInspector) readerInspector
+        .getStructFieldRef("middle").getFieldObjectInspector();
+    List<? extends StructField> midFields = mid.getAllStructFieldRefs();
+    ListObjectInspector midli = (ListObjectInspector) midFields.get(0)
+        .getFieldObjectInspector();
+    StructObjectInspector inner = (StructObjectInspector) midli
+        .getListElementObjectInspector();
+    List<? extends StructField> inFields = inner.getAllStructFieldRefs();
+    ListObjectInspector li = (ListObjectInspector) readerInspector
+        .getStructFieldRef("list").getFieldObjectInspector();
+    MapObjectInspector ma = (MapObjectInspector) readerInspector
+        .getStructFieldRef("map").getFieldObjectInspector();
+    TimestampObjectInspector tso = (TimestampObjectInspector) readerInspector
+        .getStructFieldRef("ts").getFieldObjectInspector();
+    HiveDecimalObjectInspector dco = (HiveDecimalObjectInspector) readerInspector
+        .getStructFieldRef("decimal1").getFieldObjectInspector();
+    StringObjectInspector mk = (StringObjectInspector) ma
+        .getMapKeyObjectInspector();
+    RecordReader rows = reader.rows(null);
+    Object row = rows.next(null);
+    assertNotNull(row);
+    // check the contents of the first row
+    assertEquals(false,
+        bo.get(readerInspector.getStructFieldData(row, fields.get(0))));
+    assertEquals(1,
+        by.get(readerInspector.getStructFieldData(row, fields.get(1))));
+    assertEquals(1024,
+        sh.get(readerInspector.getStructFieldData(row, fields.get(2))));
+    assertEquals(65536,
+        in.get(readerInspector.getStructFieldData(row, fields.get(3))));
+    assertEquals(Long.MAX_VALUE,
+        lo.get(readerInspector.getStructFieldData(row, fields.get(4))));
+    assertEquals(1.0,
+        fl.get(readerInspector.getStructFieldData(row, fields.get(5))), 0.00001);
+    assertEquals(-15.0,
+        dbl.get(readerInspector.getStructFieldData(row, fields.get(6))),
+        0.00001);
+    assertEquals(bytes(0, 1, 2, 3, 4),
+        bi.getPrimitiveWritableObject(readerInspector.getStructFieldData(row,
+            fields.get(7))));
+    assertEquals("hi", st.getPrimitiveJavaObject(readerInspector
+        .getStructFieldData(row, fields.get(8))));
+    List<?> midRow = midli.getList(mid.getStructFieldData(
+        readerInspector.getStructFieldData(row, fields.get(9)),
+        midFields.get(0)));
+    assertNotNull(midRow);
+    assertEquals(2, midRow.size());
+    assertEquals(1,
+        in.get(inner.getStructFieldData(midRow.get(0), inFields.get(0))));
+    assertEquals("bye", st.getPrimitiveJavaObject(inner.getStructFieldData(
+        midRow.get(0), inFields.get(1))));
+    assertEquals(2,
+        in.get(inner.getStructFieldData(midRow.get(1), inFields.get(0))));
+    assertEquals("sigh", st.getPrimitiveJavaObject(inner.getStructFieldData(
+        midRow.get(1), inFields.get(1))));
+    List<?> list = li.getList(readerInspector.getStructFieldData(row,
+        fields.get(10)));
+    assertEquals(2, list.size());
+    assertEquals(3,
+        in.get(inner.getStructFieldData(list.get(0), inFields.get(0))));
+    assertEquals("good", st.getPrimitiveJavaObject(inner.getStructFieldData(
+        list.get(0), inFields.get(1))));
+    assertEquals(4,
+        in.get(inner.getStructFieldData(list.get(1), inFields.get(0))));
+    assertEquals("bad", st.getPrimitiveJavaObject(inner.getStructFieldData(
+        list.get(1), inFields.get(1))));
+    Map<?, ?> map = ma.getMap(readerInspector.getStructFieldData(row,
+        fields.get(11)));
+    assertEquals(0, map.size());
+    assertEquals(Timestamp.valueOf("2000-03-12 15:00:00"),
+        tso.getPrimitiveJavaObject(readerInspector.getStructFieldData(row,
+            fields.get(12))));
+    assertEquals(new HiveDecimal("12345678.6547456"),
+        dco.getPrimitiveJavaObject(readerInspector.getStructFieldData(row,
+            fields.get(13))));
+
+    // check the contents of second row
+    assertEquals(true, rows.hasNext());
+    rows.seekToRow(7499);
+    row = rows.next(null);
+    assertEquals(true,
+        bo.get(readerInspector.getStructFieldData(row, fields.get(0))));
+    assertEquals(100,
+        by.get(readerInspector.getStructFieldData(row, fields.get(1))));
+    assertEquals(2048,
+        sh.get(readerInspector.getStructFieldData(row, fields.get(2))));
+    assertEquals(65536,
+        in.get(readerInspector.getStructFieldData(row, fields.get(3))));
+    assertEquals(Long.MAX_VALUE,
+        lo.get(readerInspector.getStructFieldData(row, fields.get(4))));
+    assertEquals(2.0,
+        fl.get(readerInspector.getStructFieldData(row, fields.get(5))), 0.00001);
+    assertEquals(-5.0,
+        dbl.get(readerInspector.getStructFieldData(row, fields.get(6))),
+        0.00001);
+    assertEquals(bytes(), bi.getPrimitiveWritableObject(readerInspector
+        .getStructFieldData(row, fields.get(7))));
+    assertEquals("bye", st.getPrimitiveJavaObject(readerInspector
+        .getStructFieldData(row, fields.get(8))));
+    midRow = midli.getList(mid.getStructFieldData(
+        readerInspector.getStructFieldData(row, fields.get(9)),
+        midFields.get(0)));
+    assertNotNull(midRow);
+    assertEquals(2, midRow.size());
+    assertEquals(1,
+        in.get(inner.getStructFieldData(midRow.get(0), inFields.get(0))));
+    assertEquals("bye", st.getPrimitiveJavaObject(inner.getStructFieldData(
+        midRow.get(0), inFields.get(1))));
+    assertEquals(2,
+        in.get(inner.getStructFieldData(midRow.get(1), inFields.get(0))));
+    assertEquals("sigh", st.getPrimitiveJavaObject(inner.getStructFieldData(
+        midRow.get(1), inFields.get(1))));
+    list = li.getList(readerInspector.getStructFieldData(row, fields.get(10)));
+    assertEquals(3, list.size());
+    assertEquals(100000000,
+        in.get(inner.getStructFieldData(list.get(0), inFields.get(0))));
+    assertEquals("cat", st.getPrimitiveJavaObject(inner.getStructFieldData(
+        list.get(0), inFields.get(1))));
+    assertEquals(-100000,
+        in.get(inner.getStructFieldData(list.get(1), inFields.get(0))));
+    assertEquals("in", st.getPrimitiveJavaObject(inner.getStructFieldData(
+        list.get(1), inFields.get(1))));
+    assertEquals(1234,
+        in.get(inner.getStructFieldData(list.get(2), inFields.get(0))));
+    assertEquals("hat", st.getPrimitiveJavaObject(inner.getStructFieldData(
+        list.get(2), inFields.get(1))));
+    map = ma.getMap(readerInspector.getStructFieldData(row, fields.get(11)));
+    assertEquals(2, map.size());
+    boolean[] found = new boolean[2];
+    for(Object key : map.keySet()) {
+      String str = mk.getPrimitiveJavaObject(key);
+      if (str.equals("chani")) {
+        assertEquals(false, found[0]);
+        assertEquals(5,
+            in.get(inner.getStructFieldData(map.get(key), inFields.get(0))));
+        assertEquals(str, st.getPrimitiveJavaObject(inner.getStructFieldData(
+            map.get(key), inFields.get(1))));
+        found[0] = true;
+      } else if (str.equals("mauddib")) {
+        assertEquals(false, found[1]);
+        assertEquals(1,
+            in.get(inner.getStructFieldData(map.get(key), inFields.get(0))));
+        assertEquals(str, st.getPrimitiveJavaObject(inner.getStructFieldData(
+            map.get(key), inFields.get(1))));
+        found[1] = true;
+      } else {
+        throw new IllegalArgumentException("Unknown key " + str);
+      }
+    }
+    assertEquals(true, found[0]);
+    assertEquals(true, found[1]);
+    assertEquals(Timestamp.valueOf("2000-03-12 15:00:01"),
+        tso.getPrimitiveJavaObject(readerInspector.getStructFieldData(row,
+            fields.get(12))));
+    assertEquals(new HiveDecimal("12345678.6547457"),
+        dco.getPrimitiveJavaObject(readerInspector.getStructFieldData(row,
+            fields.get(13))));
+
+    // handle the close up
+    assertEquals(false, rows.hasNext());
+    rows.close();
+  }
+
+  @Test
   public void test1() throws Exception {
     ObjectInspector inspector;
     synchronized (TestOrcFile.class) {

Added: hive/trunk/ql/src/test/resources/orc-file-11-format.orc
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/resources/orc-file-11-format.orc?rev=1519868&view=auto
==============================================================================
Binary file - no diff available.

Propchange: hive/trunk/ql/src/test/resources/orc-file-11-format.orc
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream



Mime
View raw message