hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From gunt...@apache.org
Subject svn commit: r1540168 [2/2] - in /hive/trunk/ql/src: gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/ java/org/apache/hadoop/hive/ql/io/orc/ protobuf/org/apache/hadoop/hive/ql/io/orc/ test/org/apache/hadoop/hive/ql/io/orc/ test/resources/
Date Fri, 08 Nov 2013 20:02:51 GMT
Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java?rev=1540168&r1=1540167&r2=1540168&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java Fri Nov  8 20:02:50
2013
@@ -539,6 +539,70 @@ public class TestOrcFile {
     rows.close();
   }
 
+
+  @Test
+  public void testStripeLevelStats() throws Exception {
+    ObjectInspector inspector;
+    synchronized (TestOrcFile.class) {
+      inspector = ObjectInspectorFactory.getReflectionObjectInspector
+          (InnerStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+    }
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .inspector(inspector)
+            .stripeSize(100000)
+            .bufferSize(10000));
+    for (int i = 0; i < 11000; i++) {
+      if (i >= 5000) {
+        if (i >= 10000) {
+          writer.addRow(new InnerStruct(3, "three"));
+        } else {
+          writer.addRow(new InnerStruct(2, "two"));
+        }
+      } else {
+        writer.addRow(new InnerStruct(1, "one"));
+      }
+    }
+
+    writer.close();
+    Reader reader = OrcFile.createReader(fs, testFilePath);
+    Metadata metadata = reader.getMetadata();
+    int numStripes = metadata.getStripeStatistics().size();
+    assertEquals(3, numStripes);
+    StripeStatistics ss1 = metadata.getStripeStatistics().get(0);
+    StripeStatistics ss2 = metadata.getStripeStatistics().get(1);
+    StripeStatistics ss3 = metadata.getStripeStatistics().get(2);
+    assertEquals(4996, ss1.getColumnStatistics()[0].getNumberOfValues());
+    assertEquals(5000, ss2.getColumnStatistics()[0].getNumberOfValues());
+    assertEquals(1004, ss3.getColumnStatistics()[0].getNumberOfValues());
+
+    assertEquals(4996, ((IntegerColumnStatistics)ss1.getColumnStatistics()[1]).getNumberOfValues());
+    assertEquals(5000, ((IntegerColumnStatistics)ss2.getColumnStatistics()[1]).getNumberOfValues());
+    assertEquals(1004, ((IntegerColumnStatistics)ss3.getColumnStatistics()[1]).getNumberOfValues());
+    assertEquals(1, ((IntegerColumnStatistics)ss1.getColumnStatistics()[1]).getMinimum());
+    assertEquals(1, ((IntegerColumnStatistics)ss2.getColumnStatistics()[1]).getMinimum());
+    assertEquals(2, ((IntegerColumnStatistics)ss3.getColumnStatistics()[1]).getMinimum());
+    assertEquals(1, ((IntegerColumnStatistics)ss1.getColumnStatistics()[1]).getMaximum());
+    assertEquals(2, ((IntegerColumnStatistics)ss2.getColumnStatistics()[1]).getMaximum());
+    assertEquals(3, ((IntegerColumnStatistics)ss3.getColumnStatistics()[1]).getMaximum());
+    assertEquals(4996, ((IntegerColumnStatistics)ss1.getColumnStatistics()[1]).getSum());
+    assertEquals(9996, ((IntegerColumnStatistics)ss2.getColumnStatistics()[1]).getSum());
+    assertEquals(3008, ((IntegerColumnStatistics)ss3.getColumnStatistics()[1]).getSum());
+
+    assertEquals(4996, ((StringColumnStatistics)ss1.getColumnStatistics()[2]).getNumberOfValues());
+    assertEquals(5000, ((StringColumnStatistics)ss2.getColumnStatistics()[2]).getNumberOfValues());
+    assertEquals(1004, ((StringColumnStatistics)ss3.getColumnStatistics()[2]).getNumberOfValues());
+    assertEquals("one", ((StringColumnStatistics)ss1.getColumnStatistics()[2]).getMinimum());
+    assertEquals("one", ((StringColumnStatistics)ss2.getColumnStatistics()[2]).getMinimum());
+    assertEquals("three", ((StringColumnStatistics)ss3.getColumnStatistics()[2]).getMinimum());
+    assertEquals("one", ((StringColumnStatistics)ss1.getColumnStatistics()[2]).getMaximum());
+    assertEquals("two", ((StringColumnStatistics)ss2.getColumnStatistics()[2]).getMaximum());
+    assertEquals("two", ((StringColumnStatistics)ss3.getColumnStatistics()[2]).getMaximum());
+    assertEquals(14988, ((StringColumnStatistics)ss1.getColumnStatistics()[2]).getSum());
+    assertEquals(15000, ((StringColumnStatistics)ss2.getColumnStatistics()[2]).getSum());
+    assertEquals(5012, ((StringColumnStatistics)ss3.getColumnStatistics()[2]).getSum());
+  }
+
   @Test
   public void test1() throws Exception {
     ObjectInspector inspector;
@@ -547,12 +611,12 @@ public class TestOrcFile {
           (BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
     }
     Writer writer = OrcFile.createWriter(testFilePath,
-                                         OrcFile.writerOptions(conf)
-                                         .inspector(inspector)
-                                         .stripeSize(100000)
-                                         .bufferSize(10000));
+        OrcFile.writerOptions(conf)
+            .inspector(inspector)
+            .stripeSize(100000)
+            .bufferSize(10000));
     writer.addRow(new BigRow(false, (byte) 1, (short) 1024, 65536,
-        Long.MAX_VALUE, (float) 1.0, -15.0, bytes(0,1,2,3,4), "hi",
+        Long.MAX_VALUE, (float) 1.0, -15.0, bytes(0, 1, 2, 3, 4), "hi",
         new MiddleStruct(inner(1, "bye"), inner(2, "sigh")),
         list(inner(3, "good"), inner(4, "bad")),
         map()));
@@ -560,10 +624,12 @@ public class TestOrcFile {
         Long.MAX_VALUE, (float) 2.0, -5.0, bytes(), "bye",
         new MiddleStruct(inner(1, "bye"), inner(2, "sigh")),
         list(inner(100000000, "cat"), inner(-100000, "in"), inner(1234, "hat")),
-        map(inner(5,"chani"), inner(1,"mauddib"))));
+        map(inner(5, "chani"), inner(1, "mauddib"))));
     writer.close();
     Reader reader = OrcFile.createReader(fs, testFilePath);
 
+    Metadata metadata = reader.getMetadata();
+
     // check the stats
     ColumnStatistics[] stats = reader.getStatistics();
     assertEquals(2, stats[1].getNumberOfValues());
@@ -578,14 +644,12 @@ public class TestOrcFile {
     assertEquals("count: 2 min: 1024 max: 2048 sum: 3072",
         stats[3].toString());
 
-    assertEquals(Long.MAX_VALUE,
-        ((IntegerColumnStatistics) stats[5]).getMaximum());
-    assertEquals(Long.MAX_VALUE,
-        ((IntegerColumnStatistics) stats[5]).getMinimum());
-    assertEquals(false, ((IntegerColumnStatistics) stats[5]).isSumDefined());
-    assertEquals("count: 2 min: 9223372036854775807 max: 9223372036854775807",
-        stats[5].toString());
-
+    StripeStatistics ss = metadata.getStripeStatistics().get(0);
+    assertEquals(2, ss.getColumnStatistics()[0].getNumberOfValues());
+    assertEquals(1, ((BooleanColumnStatistics) ss.getColumnStatistics()[1]).getTrueCount());
+    assertEquals(1024, ((IntegerColumnStatistics) ss.getColumnStatistics()[3]).getMinimum());
+    assertEquals(2048, ((IntegerColumnStatistics) ss.getColumnStatistics()[3]).getMaximum());
+    assertEquals(3072, ((IntegerColumnStatistics) ss.getColumnStatistics()[3]).getSum());
     assertEquals(-15.0, ((DoubleColumnStatistics) stats[7]).getMinimum());
     assertEquals(-5.0, ((DoubleColumnStatistics) stats[7]).getMaximum());
     assertEquals(-20.0, ((DoubleColumnStatistics) stats[7]).getSum(), 0.00001);
@@ -935,6 +999,9 @@ public class TestOrcFile {
       }
     }
     assertEquals(3, i);
+    Metadata metadata = reader.getMetadata();
+    int numStripes = metadata.getStripeStatistics().size();
+    assertEquals(1, numStripes);
   }
 
   /**

Modified: hive/trunk/ql/src/test/resources/orc-file-dump-dictionary-threshold.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/resources/orc-file-dump-dictionary-threshold.out?rev=1540168&r1=1540167&r2=1540168&view=diff
==============================================================================
--- hive/trunk/ql/src/test/resources/orc-file-dump-dictionary-threshold.out (original)
+++ hive/trunk/ql/src/test/resources/orc-file-dump-dictionary-threshold.out Fri Nov  8 20:02:50
2013
@@ -4,7 +4,34 @@ Compression: ZLIB
 Compression size: 10000
 Type: struct<i:int,l:bigint,s:string>
 
-Statistics:
+Stripe Statistics:
+  Stripe 1:
+    Column 0: count: 4000
+    Column 1: count: 4000 min: -2147115959 max: 2145911404 sum: 71315665983
+    Column 2: count: 4000 min: -9211329013123260308 max: 9217851628057711416
+    Column 3: count: 4000 min: Darkness,-230 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788
sum: 245096
+  Stripe 2:
+    Column 0: count: 5000
+    Column 1: count: 5000 min: -2147390285 max: 2146838901 sum: 107869424275
+    Column 2: count: 5000 min: -9222178666167296739 max: 9221301751385928177
+    Column 3: count: 5000 min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146
max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984
sum: 972748
+  Stripe 3:
+    Column 0: count: 5000
+    Column 1: count: 5000 min: -2145928262 max: 2147224606 sum: 38276585043
+    Column 2: count: 5000 min: -9221963099397084326 max: 9222722740629726770
+    Column 3: count: 5000 min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134
max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766
sum: 1753024
+  Stripe 4:
+    Column 0: count: 5000
+    Column 1: count: 5000 min: -2145378214 max: 2147453086 sum: -43469576640
+    Column 2: count: 5000 min: -9222731174895935707 max: 9222919052987871506
+    Column 3: count: 5000 min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188
max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-127
 82-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788
sum: 2636664
+  Stripe 5:
+    Column 0: count: 2000
+    Column 1: count: 2000 min: -2143595397 max: 2144595861 sum: -64863580335
+    Column 2: count: 2000 min: -9212379634781416464 max: 9208134757538374043
+    Column 3: count: 2000 min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048-15494-15674-15726-16006-16056-16180-16304-16332-16452-16598-16730-16810-16994-17210-17268-17786-17962-18214-18444-18446-18724-18912-18952-19164
max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-86
 20-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788-19204-19254-19518-19596-19786-19874-19904-20390-20752-20936
sum: 1302706
+
+File Statistics:
   Column 0: count: 21000
   Column 1: count: 21000 min: -2147390285 max: 2147453086 sum: 109128518326
   Column 2: count: 21000 min: -9222731174895935707 max: 9222919052987871506

Modified: hive/trunk/ql/src/test/resources/orc-file-dump.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/resources/orc-file-dump.out?rev=1540168&r1=1540167&r2=1540168&view=diff
==============================================================================
--- hive/trunk/ql/src/test/resources/orc-file-dump.out (original)
+++ hive/trunk/ql/src/test/resources/orc-file-dump.out Fri Nov  8 20:02:50 2013
@@ -4,7 +4,34 @@ Compression: ZLIB
 Compression size: 10000
 Type: struct<i:int,l:bigint,s:string>
 
-Statistics:
+Stripe Statistics:
+  Stripe 1:
+    Column 0: count: 5000
+    Column 1: count: 5000 min: -2146021688 max: 2147223299 sum: 515792826
+    Column 2: count: 5000 min: -9218592812243954469 max: 9221614132680747961
+    Column 3: count: 5000 min: Darkness, max: worst sum: 19280
+  Stripe 2:
+    Column 0: count: 5000
+    Column 1: count: 5000 min: -2146733128 max: 2147001622 sum: 7673427
+    Column 2: count: 5000 min: -9220818777591257749 max: 9222259462014003839
+    Column 3: count: 5000 min: Darkness, max: worst sum: 19504
+  Stripe 3:
+    Column 0: count: 5000
+    Column 1: count: 5000 min: -2146993718 max: 2147378179 sum: 132660742551
+    Column 2: count: 5000 min: -9218342074710552826 max: 9222303228623055266
+    Column 3: count: 5000 min: Darkness, max: worst sum: 19641
+  Stripe 4:
+    Column 0: count: 5000
+    Column 1: count: 5000 min: -2146658006 max: 2145520931 sum: 8533549236
+    Column 2: count: 5000 min: -9222758097219661129 max: 9221043130193737406
+    Column 3: count: 5000 min: Darkness, max: worst sum: 19470
+  Stripe 5:
+    Column 0: count: 1000
+    Column 1: count: 1000 min: -2146245500 max: 2146378640 sum: 51299706363
+    Column 2: count: 1000 min: -9208193203370316142 max: 9218567213558056476
+    Column 3: count: 1000 min: Darkness, max: worst sum: 3866
+
+File Statistics:
   Column 0: count: 21000
   Column 1: count: 21000 min: -2146993718 max: 2147378179 sum: 193017464403
   Column 2: count: 21000 min: -9222758097219661129 max: 9222303228623055266



Mime
View raw message