hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ser...@apache.org
Subject [17/35] hive git commit: HIVE-14089 : complex type support in LLAP IO is broken (Sergey Shelukhin, reviewed by Prasanth Jayachandran)
Date Tue, 22 Nov 2016 02:46:26 GMT
http://git-wip-us.apache.org/repos/asf/hive/blob/6efa869f/ql/src/test/results/clientpositive/vector_complex_all.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_complex_all.q.out b/ql/src/test/results/clientpositive/vector_complex_all.q.out
index a54a371..7ce707a 100644
--- a/ql/src/test/results/clientpositive/vector_complex_all.q.out
+++ b/ql/src/test/results/clientpositive/vector_complex_all.q.out
@@ -34,8 +34,9 @@ PREHOOK: query: CREATE TABLE orc_create_complex (
   str STRING,
   mp  MAP<STRING,STRING>,
   lst ARRAY<STRING>,
-  strct STRUCT<A:STRING,B:STRING>
-) STORED AS ORC
+  strct STRUCT<A:STRING,B:STRING>,
+  val string
+) STORED AS ORC tblproperties("orc.row.index.stride"="1000", "orc.stripe.size"="1000", "orc.compress.size"="10000")
 PREHOOK: type: CREATETABLE
 PREHOOK: Output: database:default
 PREHOOK: Output: default@orc_create_complex
@@ -43,16 +44,19 @@ POSTHOOK: query: CREATE TABLE orc_create_complex (
   str STRING,
   mp  MAP<STRING,STRING>,
   lst ARRAY<STRING>,
-  strct STRUCT<A:STRING,B:STRING>
-) STORED AS ORC
+  strct STRUCT<A:STRING,B:STRING>,
+  val string
+) STORED AS ORC tblproperties("orc.row.index.stride"="1000", "orc.stripe.size"="1000", "orc.compress.size"="10000")
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@orc_create_complex
-PREHOOK: query: INSERT OVERWRITE TABLE orc_create_complex SELECT * FROM orc_create_staging
+PREHOOK: query: INSERT OVERWRITE TABLE orc_create_complex
+SELECT orc_create_staging.*, '0' FROM orc_create_staging
 PREHOOK: type: QUERY
 PREHOOK: Input: default@orc_create_staging
 PREHOOK: Output: default@orc_create_complex
-POSTHOOK: query: INSERT OVERWRITE TABLE orc_create_complex SELECT * FROM orc_create_staging
+POSTHOOK: query: INSERT OVERWRITE TABLE orc_create_complex
+SELECT orc_create_staging.*, '0' FROM orc_create_staging
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@orc_create_staging
 POSTHOOK: Output: default@orc_create_complex
@@ -60,45 +64,8 @@ POSTHOOK: Lineage: orc_create_complex.lst SIMPLE [(orc_create_staging)orc_create
 POSTHOOK: Lineage: orc_create_complex.mp SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:mp,
type:map<string,string>, comment:null), ]
 POSTHOOK: Lineage: orc_create_complex.str SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:str,
type:string, comment:null), ]
 POSTHOOK: Lineage: orc_create_complex.strct SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:strct,
type:struct<A:string,B:string>, comment:null), ]
-orc_create_staging.str	orc_create_staging.mp	orc_create_staging.lst	orc_create_staging.strct
-PREHOOK: query: -- Since complex types are not supported, this query should not vectorize.
-EXPLAIN
-SELECT * FROM orc_create_complex
-PREHOOK: type: QUERY
-POSTHOOK: query: -- Since complex types are not supported, this query should not vectorize.
-EXPLAIN
-SELECT * FROM orc_create_complex
-POSTHOOK: type: QUERY
-Explain
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-1
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            alias: orc_create_complex
-            Statistics: Num rows: 3 Data size: 3177 Basic stats: COMPLETE Column stats: NONE
-            Select Operator
-              expressions: str (type: string), mp (type: map<string,string>), lst (type:
array<string>), strct (type: struct<a:string,b:string>)
-              outputColumnNames: _col0, _col1, _col2, _col3
-              Statistics: Num rows: 3 Data size: 3177 Basic stats: COMPLETE Column stats:
NONE
-              File Output Operator
-                compressed: false
-                Statistics: Num rows: 3 Data size: 3177 Basic stats: COMPLETE Column stats:
NONE
-                table:
-                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
-  Stage: Stage-0
-    Fetch Operator
-      limit: -1
-      Processor Tree:
-        ListSink
-
+POSTHOOK: Lineage: orc_create_complex.val SIMPLE []
+orc_create_staging.str	orc_create_staging.mp	orc_create_staging.lst	orc_create_staging.strct
c1
 PREHOOK: query: SELECT * FROM orc_create_complex
 PREHOOK: type: QUERY
 PREHOOK: Input: default@orc_create_complex
@@ -107,129 +74,156 @@ POSTHOOK: query: SELECT * FROM orc_create_complex
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@orc_create_complex
 #### A masked pattern was here ####
-orc_create_complex.str	orc_create_complex.mp	orc_create_complex.lst	orc_create_complex.strct
-line1	{"key13":"value13","key11":"value11","key12":"value12"}	["a","b","c"]	{"a":"one","b":"two"}
-line2	{"key21":"value21","key22":"value22","key23":"value23"}	["d","e","f"]	{"a":"three","b":"four"}
-line3	{"key31":"value31","key32":"value32","key33":"value33"}	["g","h","i"]	{"a":"five","b":"six"}
-PREHOOK: query: -- However, since this query is not referencing the complex fields, it should
vectorize.
-EXPLAIN
-SELECT COUNT(*) FROM orc_create_complex
+orc_create_complex.str	orc_create_complex.mp	orc_create_complex.lst	orc_create_complex.strct
orc_create_complex.val
+line1	{"key13":"value13","key11":"value11","key12":"value12"}	["a","b","c"]	{"a":"one","b":"two"}
0
+line2	{"key21":"value21","key22":"value22","key23":"value23"}	["d","e","f"]	{"a":"three","b":"four"}
0
+line3	{"key31":"value31","key32":"value32","key33":"value33"}	["g","h","i"]	{"a":"five","b":"six"}
0
+PREHOOK: query: SELECT str FROM orc_create_complex
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_create_complex
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT str FROM orc_create_complex
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_create_complex
+#### A masked pattern was here ####
+str
+line1
+line2
+line3
+PREHOOK: query: SELECT strct, mp, lst FROM orc_create_complex
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_create_complex
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT strct, mp, lst FROM orc_create_complex
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_create_complex
+#### A masked pattern was here ####
+strct	mp	lst
+{"a":"one","b":"two"}	{"key13":"value13","key11":"value11","key12":"value12"}	["a","b","c"]
+{"a":"three","b":"four"}	{"key21":"value21","key22":"value22","key23":"value23"}	["d","e","f"]
+{"a":"five","b":"six"}	{"key31":"value31","key32":"value32","key33":"value33"}	["g","h","i"]
+PREHOOK: query: SELECT lst, str FROM orc_create_complex
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_create_complex
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT lst, str FROM orc_create_complex
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_create_complex
+#### A masked pattern was here ####
+lst	str
+["a","b","c"]	line1
+["d","e","f"]	line2
+["g","h","i"]	line3
+PREHOOK: query: SELECT mp, str FROM orc_create_complex
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_create_complex
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT mp, str FROM orc_create_complex
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_create_complex
+#### A masked pattern was here ####
+mp	str
+{"key13":"value13","key11":"value11","key12":"value12"}	line1
+{"key21":"value21","key22":"value22","key23":"value23"}	line2
+{"key31":"value31","key32":"value32","key33":"value33"}	line3
+PREHOOK: query: SELECT strct, str FROM orc_create_complex
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_create_complex
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT strct, str FROM orc_create_complex
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_create_complex
+#### A masked pattern was here ####
+strct	str
+{"a":"one","b":"two"}	line1
+{"a":"three","b":"four"}	line2
+{"a":"five","b":"six"}	line3
+PREHOOK: query: SELECT strct.B, str FROM orc_create_complex
 PREHOOK: type: QUERY
-POSTHOOK: query: -- However, since this query is not referencing the complex fields, it should
vectorize.
-EXPLAIN
-SELECT COUNT(*) FROM orc_create_complex
+PREHOOK: Input: default@orc_create_complex
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT strct.B, str FROM orc_create_complex
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_create_complex
+#### A masked pattern was here ####
+b	str
+two	line1
+four	line2
+six	line3
+Warning: Shuffle Join JOIN[12][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_0]] in Stage 'Stage-1:MAPRED'
is a cross product
+PREHOOK: query: INSERT INTO TABLE orc_create_complex
+SELECT orc_create_staging.*, src1.key FROM orc_create_staging cross join src src1 cross join
orc_create_staging spam1 cross join orc_create_staging spam2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_create_staging
+PREHOOK: Input: default@src
+PREHOOK: Output: default@orc_create_complex
+POSTHOOK: query: INSERT INTO TABLE orc_create_complex
+SELECT orc_create_staging.*, src1.key FROM orc_create_staging cross join src src1 cross join
orc_create_staging spam1 cross join orc_create_staging spam2
 POSTHOOK: type: QUERY
-Explain
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-1
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            alias: orc_create_complex
-            Statistics: Num rows: 3 Data size: 3177 Basic stats: COMPLETE Column stats: COMPLETE
-            Select Operator
-              Statistics: Num rows: 3 Data size: 3177 Basic stats: COMPLETE Column stats:
COMPLETE
-              Group By Operator
-                aggregations: count()
-                mode: hash
-                outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats:
COMPLETE
-                Reduce Output Operator
-                  sort order: 
-                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats:
COMPLETE
-                  value expressions: _col0 (type: bigint)
-      Execution mode: vectorized
-      Reduce Operator Tree:
-        Group By Operator
-          aggregations: count(VALUE._col0)
-          mode: mergepartial
-          outputColumnNames: _col0
-          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-          File Output Operator
-            compressed: false
-            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-            table:
-                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
-  Stage: Stage-0
-    Fetch Operator
-      limit: -1
-      Processor Tree:
-        ListSink
-
-PREHOOK: query: SELECT COUNT(*) FROM orc_create_complex
+POSTHOOK: Input: default@orc_create_staging
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@orc_create_complex
+POSTHOOK: Lineage: orc_create_complex.lst SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:lst,
type:array<string>, comment:null), ]
+POSTHOOK: Lineage: orc_create_complex.mp SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:mp,
type:map<string,string>, comment:null), ]
+POSTHOOK: Lineage: orc_create_complex.str SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:str,
type:string, comment:null), ]
+POSTHOOK: Lineage: orc_create_complex.strct SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:strct,
type:struct<A:string,B:string>, comment:null), ]
+POSTHOOK: Lineage: orc_create_complex.val SIMPLE [(src)src1.FieldSchema(name:key, type:string,
comment:default), ]
+orc_create_staging.str	orc_create_staging.mp	orc_create_staging.lst	orc_create_staging.strct
src1.key
+PREHOOK: query: select count(*) from orc_create_complex
 PREHOOK: type: QUERY
 PREHOOK: Input: default@orc_create_complex
 #### A masked pattern was here ####
-POSTHOOK: query: SELECT COUNT(*) FROM orc_create_complex
+POSTHOOK: query: select count(*) from orc_create_complex
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@orc_create_complex
 #### A masked pattern was here ####
 c0
-3
-PREHOOK: query: -- Also, since this query is not referencing the complex fields, it should
vectorize.
-EXPLAIN
-SELECT str FROM orc_create_complex ORDER BY str
+13503
+PREHOOK: query: SELECT distinct lst, strct FROM orc_create_complex
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_create_complex
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT distinct lst, strct FROM orc_create_complex
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_create_complex
+#### A masked pattern was here ####
+lst	strct
+["a","b","c"]	{"a":"one","b":"two"}
+["d","e","f"]	{"a":"three","b":"four"}
+["g","h","i"]	{"a":"five","b":"six"}
+PREHOOK: query: SELECT str, count(val)  FROM orc_create_complex GROUP BY str
 PREHOOK: type: QUERY
-POSTHOOK: query: -- Also, since this query is not referencing the complex fields, it should
vectorize.
-EXPLAIN
-SELECT str FROM orc_create_complex ORDER BY str
+PREHOOK: Input: default@orc_create_complex
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT str, count(val)  FROM orc_create_complex GROUP BY str
 POSTHOOK: type: QUERY
-Explain
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-1
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            alias: orc_create_complex
-            Statistics: Num rows: 3 Data size: 3177 Basic stats: COMPLETE Column stats: NONE
-            Select Operator
-              expressions: str (type: string)
-              outputColumnNames: _col0
-              Statistics: Num rows: 3 Data size: 3177 Basic stats: COMPLETE Column stats:
NONE
-              Reduce Output Operator
-                key expressions: _col0 (type: string)
-                sort order: +
-                Statistics: Num rows: 3 Data size: 3177 Basic stats: COMPLETE Column stats:
NONE
-      Execution mode: vectorized
-      Reduce Operator Tree:
-        Select Operator
-          expressions: KEY.reducesinkkey0 (type: string)
-          outputColumnNames: _col0
-          Statistics: Num rows: 3 Data size: 3177 Basic stats: COMPLETE Column stats: NONE
-          File Output Operator
-            compressed: false
-            Statistics: Num rows: 3 Data size: 3177 Basic stats: COMPLETE Column stats: NONE
-            table:
-                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
-  Stage: Stage-0
-    Fetch Operator
-      limit: -1
-      Processor Tree:
-        ListSink
-
-PREHOOK: query: SELECT str FROM orc_create_complex ORDER BY str
+POSTHOOK: Input: default@orc_create_complex
+#### A masked pattern was here ####
+str	c1
+line1	4501
+line2	4501
+line3	4501
+PREHOOK: query: SELECT strct.B, count(val) FROM orc_create_complex GROUP BY strct.B
 PREHOOK: type: QUERY
 PREHOOK: Input: default@orc_create_complex
 #### A masked pattern was here ####
-POSTHOOK: query: SELECT str FROM orc_create_complex ORDER BY str
+POSTHOOK: query: SELECT strct.B, count(val) FROM orc_create_complex GROUP BY strct.B
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@orc_create_complex
 #### A masked pattern was here ####
-str
-line1
-line2
-line3
+strct.b	_c1
+four	4501
+six	4501
+two	4501
+PREHOOK: query: SELECT strct, mp, lst, str, count(val) FROM orc_create_complex GROUP BY strct,
mp, lst, str
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_create_complex
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT strct, mp, lst, str, count(val) FROM orc_create_complex GROUP BY
strct, mp, lst, str
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_create_complex
+#### A masked pattern was here ####
+strct	mp	lst	str	c4
+{"a":"one","b":"two"}	{"key11":"value11","key12":"value12","key13":"value13"}	["a","b","c"]
line1	4501
+{"a":"three","b":"four"}	{"key21":"value21","key22":"value22","key23":"value23"}	["d","e","f"]
line2	4501
+{"a":"five","b":"six"}	{"key31":"value31","key32":"value32","key33":"value33"}	["g","h","i"]
line3	4501

http://git-wip-us.apache.org/repos/asf/hive/blob/6efa869f/storage-api/src/java/org/apache/hadoop/hive/common/io/encoded/EncodedColumnBatch.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/common/io/encoded/EncodedColumnBatch.java
b/storage-api/src/java/org/apache/hadoop/hive/common/io/encoded/EncodedColumnBatch.java
index 907181e..13772c9 100644
--- a/storage-api/src/java/org/apache/hadoop/hive/common/io/encoded/EncodedColumnBatch.java
+++ b/storage-api/src/java/org/apache/hadoop/hive/common/io/encoded/EncodedColumnBatch.java
@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.hive.common.io.encoded;
 
+import java.util.Arrays;
 import java.util.List;
 import java.util.concurrent.atomic.AtomicInteger;
 
@@ -76,14 +77,17 @@ public class EncodedColumnBatch<BatchKey> {
   /** The key that is used to map this batch to source location. */
   protected BatchKey batchKey;
   /**
-   * Stream data for each stream, for each included column.
-   * For each column, streams are indexed by kind, with missing elements being null.
+   * Stream data for each column that has true in the corresponding hasData position.
+   * For each column, streams are indexed by kind (for ORC), with missing elements being
null.
    */
   protected ColumnStreamData[][] columnData;
-  /** Column indexes included in the batch. Correspond to columnData elements. */
-  protected int[] columnIxs;
+  /** Indicates which columns have data. Correspond to columnData elements. */
+  protected boolean[] hasData;
 
   public void reset() {
+    if (hasData != null) {
+      Arrays.fill(hasData, false);
+    }
     if (columnData == null) return;
     for (int i = 0; i < columnData.length; ++i) {
       if (columnData[i] == null) continue;
@@ -93,37 +97,37 @@ public class EncodedColumnBatch<BatchKey> {
     }
   }
 
-  public void initColumn(int colIxMod, int colIx, int streamCount) {
-    columnIxs[colIxMod] = colIx;
-    if (columnData[colIxMod] == null || columnData[colIxMod].length != streamCount) {
-      columnData[colIxMod] = new ColumnStreamData[streamCount];
+  public void initColumn(int colIx, int streamCount) {
+    hasData[colIx] = true;
+    if (columnData[colIx] == null || columnData[colIx].length != streamCount) {
+      columnData[colIx] = new ColumnStreamData[streamCount];
     }
   }
 
-  public void setStreamData(int colIxMod, int streamKind, ColumnStreamData csd) {
-    columnData[colIxMod][streamKind] = csd;
-  }
-
-  public void setAllStreamsData(int colIxMod, int colIx, ColumnStreamData[] sbs) {
-    columnIxs[colIxMod] = colIx;
-    columnData[colIxMod] = sbs;
+  public void setStreamData(int colIx, int streamIx, ColumnStreamData csd) {
+    assert hasData[colIx];
+    columnData[colIx][streamIx] = csd;
   }
 
   public BatchKey getBatchKey() {
     return batchKey;
   }
 
-  public ColumnStreamData[][] getColumnData() {
-    return columnData;
+  public ColumnStreamData[] getColumnData(int colIx) {
+    if (!hasData[colIx]) throw new AssertionError("No data for column " + colIx);
+    return columnData[colIx];
   }
 
-  public int[] getColumnIxs() {
-    return columnIxs;
+  public int getTotalColCount() {
+    return columnData.length; // Includes the columns that have no data
   }
 
   protected void resetColumnArrays(int columnCount) {
-    if (columnIxs != null && columnCount == columnIxs.length) return;
-    columnIxs = new int[columnCount];
+    if (hasData != null && columnCount == hasData.length) {
+      Arrays.fill(hasData, false);
+      return;
+    }
+    hasData = new boolean[columnCount];
     ColumnStreamData[][] columnData = new ColumnStreamData[columnCount][];
     if (this.columnData != null) {
       for (int i = 0; i < Math.min(columnData.length, this.columnData.length); ++i) {
@@ -132,4 +136,8 @@ public class EncodedColumnBatch<BatchKey> {
     }
     this.columnData = columnData;
   }
-}
\ No newline at end of file
+
+  public boolean hasData(int colIx) {
+    return hasData[colIx];
+  }
+}


Mime
View raw message