Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 323AF200D48 for ; Wed, 15 Nov 2017 06:52:28 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id 30C48160C07; Wed, 15 Nov 2017 05:52:28 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id C9701160C0E for ; Wed, 15 Nov 2017 06:52:25 +0100 (CET) Received: (qmail 28719 invoked by uid 500); 15 Nov 2017 05:52:24 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 27634 invoked by uid 99); 15 Nov 2017 05:52:23 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 15 Nov 2017 05:52:23 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id E4029F5E57; Wed, 15 Nov 2017 05:52:22 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: hashutosh@apache.org To: commits@hive.apache.org Date: Wed, 15 Nov 2017 05:52:30 -0000 Message-Id: In-Reply-To: <2aca5e91714c43fd8902af9014c41a7f@git.apache.org> References: <2aca5e91714c43fd8902af9014c41a7f@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [09/12] hive git commit: HIVE-18067 : Remove extraneous golden files archived-at: Wed, 15 Nov 2017 05:52:28 -0000 http://git-wip-us.apache.org/repos/asf/hive/blob/afa9ffee/ql/src/test/results/clientpositive/vector_windowing_expressions.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/vector_windowing_expressions.q.out b/ql/src/test/results/clientpositive/vector_windowing_expressions.q.out deleted file mode 100644 index 7a26191..0000000 --- a/ql/src/test/results/clientpositive/vector_windowing_expressions.q.out +++ /dev/null @@ -1,2077 +0,0 @@ -PREHOOK: query: drop table over10k -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table over10k -POSTHOOK: type: DROPTABLE -PREHOOK: query: create table over10k( - t tinyint, - si smallint, - i int, - b bigint, - f float, - d double, - bo boolean, - s string, - ts timestamp, - `dec` decimal(4,2), - bin binary) - row format delimited - fields terminated by '|' -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@over10k -POSTHOOK: query: create table over10k( - t tinyint, - si smallint, - i int, - b bigint, - f float, - d double, - bo boolean, - s string, - ts timestamp, - `dec` decimal(4,2), - bin binary) - row format delimited - fields terminated by '|' -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@over10k -PREHOOK: query: load data local inpath '../../data/files/over10k' into table over10k -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@over10k -POSTHOOK: query: load data local inpath '../../data/files/over10k' into table over10k -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@over10k -PREHOOK: query: explain vectorization detail -select p_mfgr, p_retailprice, p_size, -round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2), -max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1 -from part -window w1 as (distribute by p_mfgr sort by p_retailprice) -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select p_mfgr, p_retailprice, p_size, -round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2), -max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1 -from part -window w1 as (distribute by p_mfgr sort by p_retailprice) -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] - projectedColumns: [p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string] - Reduce Output Operator - key expressions: p_mfgr (type: string), p_retailprice (type: double) - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: p_size (type: int) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 9 - includeColumns: [2, 5, 7] - dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int), KEY.reducesinkkey1 (type: double) - outputColumnNames: _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col2: string, _col5: int, _col7: double - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col7 ASC NULLS FIRST - partition by: _col2 - raw input shape: - window functions: - window function definition - alias: sum_window_0 - arguments: _col7 - name: sum - window function: GenericUDAFSumDouble - window frame: RANGE PRECEDING(MAX)~CURRENT - window function definition - alias: sum_window_1 - arguments: lag(...) - name: sum - window function: GenericUDAFSumDouble - window frame: RANGE PRECEDING(MAX)~CURRENT - window function definition - alias: last_value_window_2 - arguments: _col7 - name: last_value - window function: GenericUDAFLastValueEvaluator - window frame: RANGE PRECEDING(MAX)~CURRENT - window function definition - alias: max_window_3 - arguments: _col7 - name: max - window function: GenericUDAFMaxEvaluator - window frame: RANGE PRECEDING(MAX)~CURRENT - window function definition - alias: min_window_4 - arguments: _col7 - name: min - window function: GenericUDAFMinEvaluator - window frame: RANGE PRECEDING(MAX)~CURRENT - window function definition - alias: first_value_window_5 - arguments: _col7 - name: first_value - window function: GenericUDAFFirstValueEvaluator - window frame: RANGE PRECEDING(MAX)~CURRENT - Lead/Lag information: lag(...) (type: double) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col7 (type: double), _col5 (type: int), (round(sum_window_0, 2) = round((sum_window_1 + last_value_window_2), 2)) (type: boolean), ((max_window_3 - min_window_4) = (last_value_window_2 - first_value_window_5)) (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select p_mfgr, p_retailprice, p_size, -round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2), -max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1 -from part -window w1 as (distribute by p_mfgr sort by p_retailprice) -PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_retailprice, p_size, -round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2), -max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1 -from part -window w1 as (distribute by p_mfgr sort by p_retailprice) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@part -#### A masked pattern was here #### -p_mfgr p_retailprice p_size _c3 _c4 -Manufacturer#1 1173.15 2 true true -Manufacturer#1 1173.15 2 true true -Manufacturer#1 1414.42 28 true true -Manufacturer#1 1602.59 6 true true -Manufacturer#1 1632.66 42 true true -Manufacturer#1 1753.76 34 true true -Manufacturer#2 1690.68 14 true true -Manufacturer#2 1698.66 25 true true -Manufacturer#2 1701.6 18 true true -Manufacturer#2 1800.7 40 true true -Manufacturer#2 2031.98 2 true true -Manufacturer#3 1190.27 14 true true -Manufacturer#3 1337.29 45 true true -Manufacturer#3 1410.39 19 true true -Manufacturer#3 1671.68 17 true true -Manufacturer#3 1922.98 1 true true -Manufacturer#4 1206.26 27 true true -Manufacturer#4 1290.35 12 true true -Manufacturer#4 1375.42 39 true true -Manufacturer#4 1620.67 10 true true -Manufacturer#4 1844.92 7 true true -Manufacturer#5 1018.1 46 true true -Manufacturer#5 1464.48 23 true true -Manufacturer#5 1611.66 6 true true -Manufacturer#5 1788.73 2 true true -Manufacturer#5 1789.69 31 true true -PREHOOK: query: explain vectorization detail -select p_mfgr, p_retailprice, p_size, -rank() over (distribute by p_mfgr sort by p_retailprice) as r, -sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) as s2, -sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) -5 as s1 -from part -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select p_mfgr, p_retailprice, p_size, -rank() over (distribute by p_mfgr sort by p_retailprice) as r, -sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) as s2, -sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) -5 as s1 -from part -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] - projectedColumns: [p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string] - Reduce Output Operator - key expressions: p_mfgr (type: string), p_retailprice (type: double) - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: p_size (type: int) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 9 - includeColumns: [2, 5, 7] - dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int), KEY.reducesinkkey1 (type: double) - outputColumnNames: _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col2: string, _col5: int, _col7: double - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col7 ASC NULLS FIRST - partition by: _col2 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col7 - name: rank - window function: GenericUDAFRankEvaluator - window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - window function definition - alias: sum_window_1 - arguments: _col7 - name: sum - window function: GenericUDAFSumDouble - window frame: ROWS PRECEDING(MAX)~CURRENT - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col7 (type: double), _col5 (type: int), rank_window_0 (type: int), sum_window_1 (type: double), (sum_window_1 - 5.0) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select p_mfgr, p_retailprice, p_size, -rank() over (distribute by p_mfgr sort by p_retailprice) as r, -sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) as s2, -sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) -5 as s1 -from part -PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_retailprice, p_size, -rank() over (distribute by p_mfgr sort by p_retailprice) as r, -sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) as s2, -sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) -5 as s1 -from part -POSTHOOK: type: QUERY -POSTHOOK: Input: default@part -#### A masked pattern was here #### -p_mfgr p_retailprice p_size r s2 s1 -Manufacturer#1 1173.15 2 1 1173.15 1168.15 -Manufacturer#1 1173.15 2 1 2346.3 2341.3 -Manufacturer#1 1414.42 28 3 3760.7200000000003 3755.7200000000003 -Manufacturer#1 1602.59 6 4 5363.31 5358.31 -Manufacturer#1 1632.66 42 5 6995.97 6990.97 -Manufacturer#1 1753.76 34 6 8749.73 8744.73 -Manufacturer#2 1690.68 14 1 1690.68 1685.68 -Manufacturer#2 1698.66 25 2 3389.34 3384.34 -Manufacturer#2 1701.6 18 3 5090.9400000000005 5085.9400000000005 -Manufacturer#2 1800.7 40 4 6891.64 6886.64 -Manufacturer#2 2031.98 2 5 8923.62 8918.62 -Manufacturer#3 1190.27 14 1 1190.27 1185.27 -Manufacturer#3 1337.29 45 2 2527.56 2522.56 -Manufacturer#3 1410.39 19 3 3937.95 3932.95 -Manufacturer#3 1671.68 17 4 5609.63 5604.63 -Manufacturer#3 1922.98 1 5 7532.610000000001 7527.610000000001 -Manufacturer#4 1206.26 27 1 1206.26 1201.26 -Manufacturer#4 1290.35 12 2 2496.6099999999997 2491.6099999999997 -Manufacturer#4 1375.42 39 3 3872.0299999999997 3867.0299999999997 -Manufacturer#4 1620.67 10 4 5492.7 5487.7 -Manufacturer#4 1844.92 7 5 7337.62 7332.62 -Manufacturer#5 1018.1 46 1 1018.1 1013.1 -Manufacturer#5 1464.48 23 2 2482.58 2477.58 -Manufacturer#5 1611.66 6 3 4094.24 4089.24 -Manufacturer#5 1788.73 2 4 5882.969999999999 5877.969999999999 -Manufacturer#5 1789.69 31 5 7672.66 7667.66 -PREHOOK: query: explain vectorization detail -select s, si, f, si - lead(f, 3) over (partition by t order by bo,s,si,f desc) from over10k limit 100 -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select s, si, f, si - lead(f, 3) over (partition by t order by bo,s,si,f desc) from over10k limit 100 -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: over10k - Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - projectedColumns: [t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2)/DECIMAL_64, bin:binary] - Reduce Output Operator - key expressions: t (type: tinyint), bo (type: boolean), s (type: string), si (type: smallint), f (type: float) - sort order: ++++- - Map-reduce partition columns: t (type: tinyint) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 11 - includeColumns: [0, 1, 4, 6, 7] - dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2)/DECIMAL_64, bin:binary - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey3 (type: smallint), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey1 (type: boolean), KEY.reducesinkkey2 (type: string) - outputColumnNames: _col0, _col1, _col4, _col6, _col7 - Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: tinyint, _col1: smallint, _col4: float, _col6: boolean, _col7: string - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col6 ASC NULLS FIRST, _col7 ASC NULLS FIRST, _col1 ASC NULLS FIRST, _col4 DESC NULLS LAST - partition by: _col0 - raw input shape: - window functions: - window function definition - alias: lead_window_0 - arguments: _col4, 3 - name: lead - window function: GenericUDAFLeadEvaluator - window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col7 (type: string), _col1 (type: smallint), _col4 (type: float), (UDFToFloat(_col1) - lead_window_0) (type: float) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 100 - Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 100 - Processor Tree: - ListSink - -PREHOOK: query: select s, si, f, si - lead(f, 3) over (partition by t order by bo,s,si,f desc) from over10k limit 100 -PREHOOK: type: QUERY -PREHOOK: Input: default@over10k -#### A masked pattern was here #### -POSTHOOK: query: select s, si, f, si - lead(f, 3) over (partition by t order by bo,s,si,f desc) from over10k limit 100 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@over10k -#### A masked pattern was here #### -s si f _c3 -alice allen 400 76.31 337.23 -alice davidson 384 71.97 357.79 -alice king 455 2.48 395.93 -alice king 458 62.77 384.16998 -alice xylophone 485 26.21 464.05 -bob falkner 260 59.07 242.4 -bob ichabod 454 73.83 381.7 -bob polk 264 20.95 257.17 -bob underhill 454 17.6 424.94 -bob underhill 465 72.3 453.17 -bob van buren 433 6.83 398.4 -calvin ichabod 431 29.06 334.22 -david garcia 485 11.83 421.51 -ethan steinbeck 298 34.6 288.14 -fred ellison 376 96.78 330.76 -holly steinbeck 384 63.49 293.7 -holly underhill 318 9.86 269.91 -irene ellison 458 45.24 365.29 -irene underhill 307 90.3 244.19 -jessica johnson 494 48.09 490.18 -jessica king 459 92.71 452.2 -jessica white 284 62.81 209.08 -luke garcia 311 3.82 267.27 -luke young 451 6.8 429.0 -mike king 275 74.92 211.81 -oscar garcia 362 43.73 340.66 -priscilla laertes 316 22.0 296.06 -priscilla quirinius 423 63.19 362.72 -priscilla zipper 485 21.34 400.61 -quinn ellison 266 19.94 209.95 -quinn polk 507 60.28 447.66 -sarah robinson 320 84.39 309.74 -tom polk 346 56.05 320.33 -ulysses ellison 381 59.34 358.66 -ulysses quirinius 303 10.26 259.6 -ulysses robinson 313 25.67 269.31 -ulysses steinbeck 333 22.34 270.61 -victor allen 337 43.4 311.5 -victor hernandez 447 43.69 375.22 -victor xylophone 438 62.39 424.33 -wendy quirinius 279 25.5 250.25 -wendy robinson 275 71.78 262.88 -wendy xylophone 314 13.67 295.73 -xavier garcia 493 28.75 474.56 -zach thompson 386 12.12 377.63 -zach young 286 18.27 263.65 -alice falkner 280 18.44 227.7 -bob ellison 339 8.37 300.95 -bob johnson 374 22.35 326.49 -calvin white 280 52.3 198.32 -david carson 270 38.05 255.77 -david falkner 469 47.51 388.35 -david hernandez 408 81.68 339.27 -ethan underhill 339 14.23 256.26 -gabriella brown 498 80.65 413.25 -holly nixon 505 68.73 440.71 -holly polk 268 82.74 182.04001 -holly thompson 387 84.75 298.22 -irene young 458 64.29 401.8 -jessica miller 299 85.96 243.41 -katie ichabod 469 88.78 385.61 -luke ichabod 289 56.2 286.74 -luke king 337 55.59 274.88 -mike allen 465 83.39 383.03 -mike polk 500 2.26 427.74 -mike white 454 62.12 430.78 -mike xylophone 448 81.97 447.17 -nick nixon 335 72.26 240.78 -nick robinson 350 23.22 294.59 -oscar davidson 432 0.83 420.93 -oscar johnson 315 94.22 233.05 -oscar johnson 469 55.41 468.44 -oscar miller 324 11.07 265.19 -rachel davidson 507 81.95 468.78 -rachel thompson 344 0.56 246.12 -sarah miller 386 58.81 304.36 -sarah xylophone 275 38.22 177.48999 -sarah zipper 376 97.88 294.61 -tom hernandez 467 81.64 459.9 -tom hernandez 477 97.51 415.19 -tom steinbeck 414 81.39 361.87 -ulysses carson 343 7.1 314.22 -victor robinson 415 61.81 349.5 -victor thompson 344 52.13 NULL -xavier ovid 280 28.78 NULL -yuri xylophone 430 65.5 NULL -alice underhill 389 26.68 368.06 -alice underhill 446 6.49 444.21 -bob ovid 331 67.12 236.43 -bob van buren 406 20.94 383.32 -david falkner 406 1.79 374.34 -david miller 450 94.57 380.13 -ethan allen 380 22.68 375.6 -ethan king 395 31.66 361.51 -ethan nixon 475 69.87 431.39 -ethan polk 283 4.4 243.82 -fred allen 331 33.49 281.68 -fred king 511 43.61 457.22 -fred polk 261 39.18 248.73 -fred young 303 49.32 221.51001 -PREHOOK: query: explain vectorization detail -select s, i, i - lead(i, 3, 0) over (partition by si order by i,s) from over10k limit 100 -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select s, i, i - lead(i, 3, 0) over (partition by si order by i,s) from over10k limit 100 -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: over10k - Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - projectedColumns: [t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2)/DECIMAL_64, bin:binary] - Reduce Output Operator - key expressions: si (type: smallint), i (type: int), s (type: string) - sort order: +++ - Map-reduce partition columns: si (type: smallint) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 11 - includeColumns: [1, 2, 7] - dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2)/DECIMAL_64, bin:binary - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: string) - outputColumnNames: _col1, _col2, _col7 - Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: smallint, _col2: int, _col7: string - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col2 ASC NULLS FIRST, _col7 ASC NULLS FIRST - partition by: _col1 - raw input shape: - window functions: - window function definition - alias: lead_window_0 - arguments: _col2, 3, 0 - name: lead - window function: GenericUDAFLeadEvaluator - window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col7 (type: string), _col2 (type: int), (_col2 - lead_window_0) (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 100 - Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 100 - Processor Tree: - ListSink - -PREHOOK: query: select s, i, i - lead(i, 3, 0) over (partition by si order by i,s) from over10k limit 100 -PREHOOK: type: QUERY -PREHOOK: Input: default@over10k -#### A masked pattern was here #### -POSTHOOK: query: select s, i, i - lead(i, 3, 0) over (partition by si order by i,s) from over10k limit 100 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@over10k -#### A masked pattern was here #### -s i _c2 -wendy garcia 65540 -18 -ethan thompson 65543 -20 -zach nixon 65549 -31 -alice robinson 65558 -28 -wendy nixon 65563 -33 -victor robinson 65580 -19 -ethan falkner 65586 -18 -victor davidson 65596 -17 -xavier quirinius 65599 -14 -fred quirinius 65604 -11 -nick zipper 65613 -3 -xavier van buren 65613 -7 -victor johnson 65615 -12 -alice ovid 65616 -24 -xavier ovid 65620 -23 -ulysses white 65627 -24 -sarah white 65640 -13 -calvin young 65643 -25 -victor thompson 65651 -42 -calvin johnson 65653 -53 -irene polk 65668 -45 -zach underhill 65693 -38 -quinn hernandez 65706 -27 -rachel ovid 65713 -24 -gabriella falkner 65731 -7 -zach white 65733 -8 -fred hernandez 65737 -7 -rachel ellison 65738 -6 -oscar steinbeck 65741 -6 -alice ellison 65744 -8 -tom allen 65744 -19 -quinn quirinius 65747 -31 -victor hernandez 65752 -26 -holly xylophone 65763 -26 -david davidson 65778 65778 -ulysses young 65778 65778 -sarah brown 65789 65789 -xavier brown 65541 -16 -zach hernandez 65542 -18 -katie ichabod 65547 -19 -oscar young 65557 -15 -holly white 65560 -14 -priscilla laertes 65566 -9 -ethan king 65572 -6 -zach hernandez 65574 -10 -oscar thompson 65575 -13 -victor xylophone 65578 -16 -gabriella ellison 65584 -26 -nick quirinius 65588 -22 -holly robinson 65594 -18 -alice xylophone 65610 -16 -yuri brown 65610 -21 -sarah hernandez 65612 -26 -katie garcia 65626 -28 -jessica laertes 65631 -23 -ethan underhill 65638 -17 -irene young 65654 -37 -priscilla thompson 65654 -40 -luke quirinius 65655 -44 -david brown 65691 -20 -luke falkner 65694 -18 -priscilla miller 65699 -20 -rachel robinson 65711 -9 -ethan polk 65712 -10 -wendy brown 65719 -13 -mike underhill 65720 -18 -zach underhill 65722 -26 -nick zipper 65732 -20 -fred brown 65738 -18 -ulysses young 65748 -23 -nick davidson 65752 -19 -fred zipper 65756 -15 -yuri nixon 65771 -10 -zach hernandez 65771 -19 -zach zipper 65771 65771 -alice underhill 65781 65781 -oscar laertes 65790 65790 -sarah zipper 65546 -19 -bob falkner 65551 -17 -luke ovid 65551 -17 -katie allen 65565 -4 -nick falkner 65568 -5 -zach steinbeck 65568 -11 -oscar van buren 65569 -13 -gabriella young 65573 -11 -jessica ichabod 65579 -24 -david garcia 65582 -24 -nick xylophone 65584 -27 -calvin johnson 65603 -14 -xavier zipper 65606 -50 -alice nixon 65611 -58 -jessica laertes 65617 -62 -fred king 65656 -61 -priscilla underhill 65669 -48 -priscilla zipper 65679 -45 -nick king 65717 -11 -sarah polk 65717 -17 -irene quirinius 65724 -28 -tom laertes 65728 -25 -yuri johnson 65734 -27 -PREHOOK: query: explain vectorization detail -select s, si, d, si - lag(d, 3) over (partition by b order by si,s,d) from over10k limit 100 -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select s, si, d, si - lag(d, 3) over (partition by b order by si,s,d) from over10k limit 100 -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: over10k - Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - projectedColumns: [t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2)/DECIMAL_64, bin:binary] - Reduce Output Operator - key expressions: b (type: bigint), si (type: smallint), s (type: string), d (type: double) - sort order: ++++ - Map-reduce partition columns: b (type: bigint) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 11 - includeColumns: [1, 3, 5, 7] - dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2)/DECIMAL_64, bin:binary - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey3 (type: double), KEY.reducesinkkey2 (type: string) - outputColumnNames: _col1, _col3, _col5, _col7 - Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: smallint, _col3: bigint, _col5: double, _col7: string - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col1 ASC NULLS FIRST, _col7 ASC NULLS FIRST, _col5 ASC NULLS FIRST - partition by: _col3 - raw input shape: - window functions: - window function definition - alias: lag_window_0 - arguments: _col5, 3 - name: lag - window function: GenericUDAFLagEvaluator - window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col7 (type: string), _col1 (type: smallint), _col5 (type: double), (UDFToDouble(_col1) - lag_window_0) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 100 - Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 100 - Processor Tree: - ListSink - -PREHOOK: query: select s, si, d, si - lag(d, 3) over (partition by b order by si,s,d) from over10k limit 100 -PREHOOK: type: QUERY -PREHOOK: Input: default@over10k -#### A masked pattern was here #### -POSTHOOK: query: select s, si, d, si - lag(d, 3) over (partition by b order by si,s,d) from over10k limit 100 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@over10k -#### A masked pattern was here #### -s si d _c3 -jessica ellison 262 30.41 NULL -david young 266 45.12 NULL -jessica steinbeck 274 2.15 NULL -david zipper 275 43.45 244.59 -zach nixon 283 15.95 237.88 -holly allen 285 24.37 282.85 -irene garcia 292 33.54 248.55 -ulysses xylophone 292 44.66 276.05 -irene van buren 309 35.81 284.63 -sarah miller 312 6.65 278.46 -victor garcia 312 39.14 267.34000000000003 -ethan ichabod 319 29.4 283.19 -wendy falkner 322 10.02 315.35 -oscar miller 324 25.95 284.86 -david ovid 332 28.34 302.6 -alice zipper 333 3.38 322.98 -yuri nixon 333 8.28 307.05 -ulysses nixon 335 18.48 306.66 -david ovid 336 9.36 332.62 -calvin falkner 337 17.63 328.72 -katie quirinius 349 11.3 330.52 -quinn miller 351 22.46 341.64 -victor xylophone 357 38.58 339.37 -ethan garcia 368 9.2 356.7 -nick steinbeck 395 37.54 372.54 -ulysses ichabod 415 47.61 376.42 -rachel thompson 416 37.99 406.8 -calvin young 418 47.22 380.46 -katie xylophone 425 32.59 377.39 -nick quirinius 429 19.63 391.01 -ethan ellison 453 47.92 405.78 -irene nixon 454 48.03 421.40999999999997 -bob steinbeck 462 47.04 442.37 -luke robinson 462 47.48 414.08 -gabriella steinbeck 467 9.35 418.97 -tom hernandez 467 29.36 419.96 -irene polk 485 14.26 437.52 -mike xylophone 494 36.92 484.65 -calvin allen 499 39.99 469.64 -quinn steinbeck 503 16.62 488.74 -calvin thompson 263 30.87 NULL -rachel quirinius 263 29.46 NULL -ulysses garcia 263 31.85 NULL -mike steinbeck 266 48.57 235.13 -rachel young 275 14.75 245.54 -tom king 278 31.11 246.15 -oscar robinson 283 30.35 234.43 -zach allen 284 1.88 269.25 -bob king 308 27.61 276.89 -ulysses allen 310 22.77 279.65 -fred nixon 317 0.48 315.12 -gabriella robinson 321 0.33 293.39 -bob johnson 325 9.61 302.23 -rachel davidson 335 2.34 334.52 -fred brown 337 5.8 336.67 -wendy ellison 350 20.25 340.39 -zach falkner 391 13.67 388.66 -katie xylophone 410 39.09 404.2 -holly king 413 3.56 392.75 -sarah van buren 417 7.81 403.33 -calvin van buren 430 36.01 390.90999999999997 -katie white 434 33.56 430.44 -oscar quirinius 454 7.03 446.19 -zach young 505 18.19 468.99 -gabriella robinson 506 12.8 472.44 -sarah xylophone 507 16.09 499.97 -rachel thompson 267 46.87 NULL -gabriella van buren 271 41.04 NULL -mike steinbeck 284 11.44 NULL -ethan ovid 293 2.08 246.13 -luke falkner 293 40.67 251.96 -irene nixon 321 24.35 309.56 -mike van buren 327 2.58 324.92 -ulysses robinson 329 26.64 288.33 -quinn laertes 332 10.71 307.65 -tom polk 346 34.03 343.42 -jessica johnson 352 45.71 325.36 -xavier davidson 354 33.9 343.29 -wendy nixon 364 29.42 329.97 -jessica quirinius 375 47.33 329.29 -xavier brown 376 26.17 342.1 -gabriella davidson 383 18.87 353.58 -jessica brown 388 34.09 340.67 -gabriella garcia 391 32.44 364.83 -ethan miller 396 49.07 377.13 -bob garcia 416 7.82 381.90999999999997 -priscilla hernandez 416 29.94 383.56 -holly nixon 419 17.81 369.93 -nick underhill 429 39.54 421.18 -xavier falkner 434 0.88 404.06 -luke robinson 461 44.02 443.19 -bob underhill 465 22.58 425.46 -ulysses king 483 37.98 482.12 -jessica miller 486 26.14 441.98 -bob ovid 493 9.7 470.42 -alice falkner 500 37.85 462.02 -quinn xylophone 267 49.8 NULL -gabriella thompson 268 17.15 NULL -calvin xylophone 275 49.32 NULL -gabriella zipper 279 30.41 229.2 -PREHOOK: query: explain vectorization detail -select s, lag(s, 3, 'fred') over (partition by f order by b) from over10k limit 100 -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select s, lag(s, 3, 'fred') over (partition by f order by b) from over10k limit 100 -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: over10k - Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - projectedColumns: [t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2)/DECIMAL_64, bin:binary] - Reduce Output Operator - key expressions: f (type: float), b (type: bigint) - sort order: ++ - Map-reduce partition columns: f (type: float) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE - value expressions: s (type: string) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 11 - includeColumns: [3, 4, 7] - dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2)/DECIMAL_64, bin:binary - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey0 (type: float), VALUE._col5 (type: string) - outputColumnNames: _col3, _col4, _col7 - Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col3: bigint, _col4: float, _col7: string - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col3 ASC NULLS FIRST - partition by: _col4 - raw input shape: - window functions: - window function definition - alias: lag_window_0 - arguments: _col7, 3, 'fred' - name: lag - window function: GenericUDAFLagEvaluator - window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col7 (type: string), lag_window_0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 100 - Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 100 - Processor Tree: - ListSink - -PREHOOK: query: select s, lag(s, 3, 'fred') over (partition by f order by b) from over10k limit 100 -PREHOOK: type: QUERY -PREHOOK: Input: default@over10k -#### A masked pattern was here #### -POSTHOOK: query: select s, lag(s, 3, 'fred') over (partition by f order by b) from over10k limit 100 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@over10k -#### A masked pattern was here #### -s lag_window_0 -yuri thompson fred -bob ichabod fred -luke king fred -luke steinbeck fred -fred zipper fred -quinn miller fred -calvin van buren fred -holly steinbeck fred -david davidson fred -calvin thompson fred -calvin quirinius fred -david ovid fred -holly thompson fred -nick zipper fred -victor steinbeck fred -victor robinson fred -zach ovid fred -ulysses zipper fred -luke falkner fred -irene thompson fred -yuri johnson fred -ulysses falkner fred -gabriella robinson fred -alice robinson fred -priscilla xylophone fred -david laertes fred -mike underhill fred -victor van buren fred -holly falkner fred -priscilla falkner fred -ethan ovid fred -luke zipper fred -mike steinbeck fred -calvin white fred -alice quirinius fred -irene miller fred -wendy polk fred -nick young fred -yuri davidson fred -ethan ellison fred -zach hernandez fred -wendy miller fred -katie underhill fred -irene zipper fred -holly allen fred -quinn brown fred -calvin ovid fred -zach robinson fred -nick miller fred -mike allen fred -yuri van buren fred -priscilla young fred -zach miller fred -victor xylophone fred -sarah falkner fred -rachel ichabod fred -alice robinson fred -calvin ovid fred -calvin ovid fred -luke laertes fred -david hernandez fred -alice ovid fred -luke quirinius fred -oscar white fred -zach falkner fred -rachel thompson fred -priscilla king fred -xavier polk fred -wendy ichabod fred -rachel ovid fred -wendy allen fred -luke brown fred -mike brown fred -oscar ichabod fred -xavier garcia fred -yuri brown fred -bob xylophone fred -luke davidson fred -ethan quirinius fred -zach davidson fred -irene miller fred -wendy king fred -bob zipper fred -sarah thompson fred -bob carson fred -bob laertes fred -xavier allen fred -sarah robinson fred -david king fred -oscar davidson fred -victor hernandez fred -wendy polk fred -david ellison fred -ulysses johnson fred -jessica ovid fred -bob king fred -ulysses garcia fred -irene falkner fred -holly robinson fred -yuri white fred -PREHOOK: query: explain vectorization detail -select p_mfgr, avg(p_retailprice) over(partition by p_mfgr, p_type order by p_mfgr) from part -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select p_mfgr, avg(p_retailprice) over(partition by p_mfgr, p_type order by p_mfgr) from part -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] - projectedColumns: [p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string] - Reduce Output Operator - key expressions: p_mfgr (type: string), p_type (type: string) - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string), p_type (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: p_retailprice (type: double) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 9 - includeColumns: [2, 4, 7] - dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col5 (type: double) - outputColumnNames: _col2, _col4, _col7 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col2: string, _col4: string, _col7: double - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col2 ASC NULLS FIRST - partition by: _col2, _col4 - raw input shape: - window functions: - window function definition - alias: avg_window_0 - arguments: _col7 - name: avg - window function: GenericUDAFAverageEvaluatorDouble - window frame: RANGE PRECEDING(MAX)~CURRENT - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), avg_window_0 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr, p_type order by p_mfgr) from part -PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr, p_type order by p_mfgr) from part -POSTHOOK: type: QUERY -POSTHOOK: Input: default@part -#### A masked pattern was here #### -p_mfgr avg_window_0 -Manufacturer#1 1753.76 -Manufacturer#1 1632.66 -Manufacturer#1 1602.59 -Manufacturer#1 1173.15 -Manufacturer#1 1173.15 -Manufacturer#1 1414.42 -Manufacturer#2 1800.7 -Manufacturer#2 1690.68 -Manufacturer#2 2031.98 -Manufacturer#2 1698.66 -Manufacturer#2 1701.6 -Manufacturer#3 1922.98 -Manufacturer#3 1410.39 -Manufacturer#3 1671.68 -Manufacturer#3 1190.27 -Manufacturer#3 1337.29 -Manufacturer#4 1844.92 -Manufacturer#4 1375.42 -Manufacturer#4 1620.67 -Manufacturer#4 1206.26 -Manufacturer#4 1290.35 -Manufacturer#5 1018.1 -Manufacturer#5 1464.48 -Manufacturer#5 1789.69 -Manufacturer#5 1788.73 -Manufacturer#5 1611.66 -PREHOOK: query: explain vectorization detail -select p_mfgr, avg(p_retailprice) over(partition by p_mfgr order by p_type,p_mfgr rows between unbounded preceding and current row) from part -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select p_mfgr, avg(p_retailprice) over(partition by p_mfgr order by p_type,p_mfgr rows between unbounded preceding and current row) from part -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] - projectedColumns: [p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string] - Reduce Output Operator - key expressions: p_mfgr (type: string), p_type (type: string) - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: p_retailprice (type: double) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 9 - includeColumns: [2, 4, 7] - dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col5 (type: double) - outputColumnNames: _col2, _col4, _col7 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col2: string, _col4: string, _col7: double - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col4 ASC NULLS FIRST, _col2 ASC NULLS FIRST - partition by: _col2 - raw input shape: - window functions: - window function definition - alias: avg_window_0 - arguments: _col7 - name: avg - window function: GenericUDAFAverageEvaluatorDouble - window frame: ROWS PRECEDING(MAX)~CURRENT - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), avg_window_0 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr order by p_type,p_mfgr rows between unbounded preceding and current row) from part -PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr order by p_type,p_mfgr rows between unbounded preceding and current row) from part -POSTHOOK: type: QUERY -POSTHOOK: Input: default@part -#### A masked pattern was here #### -p_mfgr avg_window_0 -Manufacturer#1 1753.76 -Manufacturer#1 1693.21 -Manufacturer#1 1663.0033333333333 -Manufacturer#1 1540.54 -Manufacturer#1 1467.062 -Manufacturer#1 1458.2883333333332 -Manufacturer#2 1800.7 -Manufacturer#2 1745.69 -Manufacturer#2 1841.1200000000001 -Manufacturer#2 1805.505 -Manufacturer#2 1784.7240000000002 -Manufacturer#3 1922.98 -Manufacturer#3 1666.685 -Manufacturer#3 1668.3500000000001 -Manufacturer#3 1548.83 -Manufacturer#3 1506.522 -Manufacturer#4 1844.92 -Manufacturer#4 1610.17 -Manufacturer#4 1613.67 -Manufacturer#4 1511.8175 -Manufacturer#4 1467.5240000000001 -Manufacturer#5 1018.1 -Manufacturer#5 1241.29 -Manufacturer#5 1424.0900000000001 -Manufacturer#5 1515.25 -Manufacturer#5 1534.532 -PREHOOK: query: create table t1 (a1 int, b1 string) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@t1 -POSTHOOK: query: create table t1 (a1 int, b1 string) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@t1 -PREHOOK: query: create table t2 (a1 int, b1 string) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@t2 -POSTHOOK: query: create table t2 (a1 int, b1 string) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@t2 -PREHOOK: query: explain vectorization detail -from (select sum(i) over (partition by ts order by i), s from over10k) tt insert overwrite table t1 select * insert overwrite table t2 select * -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -from (select sum(i) over (partition by ts order by i), s from over10k) tt insert overwrite table t1 select * insert overwrite table t2 select * -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: over10k - Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - projectedColumns: [t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2)/DECIMAL_64, bin:binary] - Reduce Output Operator - key expressions: ts (type: timestamp), i (type: int) - sort order: ++ - Map-reduce partition columns: ts (type: timestamp) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE - value expressions: s (type: string) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 11 - includeColumns: [2, 7, 8] - dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2)/DECIMAL_64, bin:binary - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: int), VALUE._col6 (type: string), KEY.reducesinkkey0 (type: timestamp) - outputColumnNames: _col2, _col7, _col8 - Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col2: int, _col7: string, _col8: timestamp - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col2 ASC NULLS FIRST - partition by: _col8 - raw input shape: - window functions: - window function definition - alias: sum_window_0 - arguments: _col2 - name: sum - window function: GenericUDAFSumLong - window frame: RANGE PRECEDING(MAX)~CURRENT - Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: sum_window_0 (type: bigint), _col7 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t1 - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t2 - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t1 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - - Stage: Stage-1 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t2 - - Stage: Stage-4 - Stats Work - Basic Stats Work: - -PREHOOK: query: from (select sum(i) over (partition by ts order by i), s from over10k) tt insert overwrite table t1 select * insert overwrite table t2 select * -PREHOOK: type: QUERY -PREHOOK: Input: default@over10k -PREHOOK: Output: default@t1 -PREHOOK: Output: default@t2 -POSTHOOK: query: from (select sum(i) over (partition by ts order by i), s from over10k) tt insert overwrite table t1 select * insert overwrite table t2 select * -POSTHOOK: type: QUERY -POSTHOOK: Input: default@over10k -POSTHOOK: Output: default@t1 -POSTHOOK: Output: default@t2 -POSTHOOK: Lineage: t1.a1 SCRIPT [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), (over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), (over10k)over10k.FieldSchema(name:i, type:int, comment:null), (over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), (over10k)over10k.FieldSchema(name:f, type:float, comment:null), (over10k)over10k.FieldSchema(name:d, type:double, comment:null), (over10k)over10k.FieldSchema(name:bo, type:boolean, comment:null), (over10k)over10k.FieldSchema(name:s, type:string, comment:null), (over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), (over10k)over10k.FieldSchema(name:dec, type:decimal(4,2), comment:null), (over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), ] -POSTHOOK: Lineage: t1.b1 SIMPLE [(over10k)over10k.FieldSchema(name:s, type:string, comment:null), ] -POSTHOOK: Lineage: t2.a1 SCRIPT [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), (over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), (over10k)over10k.FieldSchema(name:i, type:int, comment:null), (over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), (over10k)over10k.FieldSchema(name:f, type:float, comment:null), (over10k)over10k.FieldSchema(name:d, type:double, comment:null), (over10k)over10k.FieldSchema(name:bo, type:boolean, comment:null), (over10k)over10k.FieldSchema(name:s, type:string, comment:null), (over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), (over10k)over10k.FieldSchema(name:dec, type:decimal(4,2), comment:null), (over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), ] -POSTHOOK: Lineage: t2.b1 SIMPLE [(over10k)over10k.FieldSchema(name:s, type:string, comment:null), ] -_col0 _col1 -PREHOOK: query: select * from t1 limit 3 -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -#### A masked pattern was here #### -POSTHOOK: query: select * from t1 limit 3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -#### A masked pattern was here #### -t1.a1 t1.b1 -65542 rachel thompson -131088 oscar brown -262258 wendy steinbeck -PREHOOK: query: select * from t2 limit 3 -PREHOOK: type: QUERY -PREHOOK: Input: default@t2 -#### A masked pattern was here #### -POSTHOOK: query: select * from t2 limit 3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t2 -#### A masked pattern was here #### -t2.a1 t2.b1 -65542 rachel thompson -131088 oscar brown -262258 wendy steinbeck -PREHOOK: query: explain vectorization detail -select p_mfgr, p_retailprice, p_size, -round(sum(p_retailprice) over w1 , 2) + 50.0 = round(sum(lag(p_retailprice,1,50.0)) over w1 + (last_value(p_retailprice) over w1),2) -from part -window w1 as (distribute by p_mfgr sort by p_retailprice) -limit 11 -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select p_mfgr, p_retailprice, p_size, -round(sum(p_retailprice) over w1 , 2) + 50.0 = round(sum(lag(p_retailprice,1,50.0)) over w1 + (last_value(p_retailprice) over w1),2) -from part -window w1 as (distribute by p_mfgr sort by p_retailprice) -limit 11 -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] - projectedColumns: [p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string] - Reduce Output Operator - key expressions: p_mfgr (type: string), p_retailprice (type: double) - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: p_size (type: int) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 9 - includeColumns: [2, 5, 7] - dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int), KEY.reducesinkkey1 (type: double) - outputColumnNames: _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col2: string, _col5: int, _col7: double - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col7 ASC NULLS FIRST - partition by: _col2 - raw input shape: - window functions: - window function definition - alias: sum_window_0 - arguments: _col7 - name: sum - window function: GenericUDAFSumDouble - window frame: RANGE PRECEDING(MAX)~CURRENT - window function definition - alias: sum_window_1 - arguments: lag(...) - name: sum - window function: GenericUDAFSumDouble - window frame: RANGE PRECEDING(MAX)~CURRENT - window function definition - alias: last_value_window_2 - arguments: _col7 - name: last_value - window function: GenericUDAFLastValueEvaluator - window frame: RANGE PRECEDING(MAX)~CURRENT - Lead/Lag information: lag(...) (type: double) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col7 (type: double), _col5 (type: int), ((round(sum_window_0, 2) + 50.0) = round((sum_window_1 + last_value_window_2), 2)) (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 11 - Statistics: Num rows: 11 Data size: 1331 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 11 Data size: 1331 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 11 - Processor Tree: - ListSink - -PREHOOK: query: select p_mfgr, p_retailprice, p_size, -round(sum(p_retailprice) over w1 , 2) + 50.0 = round(sum(lag(p_retailprice,1,50.0)) over w1 + (last_value(p_retailprice) over w1),2) -from part -window w1 as (distribute by p_mfgr sort by p_retailprice) -limit 11 -PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_retailprice, p_size, -round(sum(p_retailprice) over w1 , 2) + 50.0 = round(sum(lag(p_retailprice,1,50.0)) over w1 + (last_value(p_retailprice) over w1),2) -from part -window w1 as (distribute by p_mfgr sort by p_retailprice) -limit 11 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@part -#### A masked pattern was here #### -p_mfgr p_retailprice p_size _c3 -Manufacturer#1 1173.15 2 true -Manufacturer#1 1173.15 2 true -Manufacturer#1 1414.42 28 true -Manufacturer#1 1602.59 6 true -Manufacturer#1 1632.66 42 true -Manufacturer#1 1753.76 34 true -Manufacturer#2 1690.68 14 true -Manufacturer#2 1698.66 25 true -Manufacturer#2 1701.6 18 true -Manufacturer#2 1800.7 40 true -Manufacturer#2 2031.98 2 true -PREHOOK: query: select p_mfgr, p_retailprice, p_size, -round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2), -max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1 -from part -window w1 as (distribute by p_mfgr sort by p_retailprice) -PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_retailprice, p_size, -round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2), -max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1 -from part -window w1 as (distribute by p_mfgr sort by p_retailprice) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@part -#### A masked pattern was here #### -p_mfgr p_retailprice p_size _c3 _c4 -Manufacturer#1 1173.15 2 true true -Manufacturer#1 1173.15 2 true true -Manufacturer#1 1414.42 28 true true -Manufacturer#1 1602.59 6 true true -Manufacturer#1 1632.66 42 true true -Manufacturer#1 1753.76 34 true true -Manufacturer#2 1690.68 14 true true -Manufacturer#2 1698.66 25 true true -Manufacturer#2 1701.6 18 true true -Manufacturer#2 1800.7 40 true true -Manufacturer#2 2031.98 2 true true -Manufacturer#3 1190.27 14 true true -Manufacturer#3 1337.29 45 true true -Manufacturer#3 1410.39 19 true true -Manufacturer#3 1671.68 17 true true -Manufacturer#3 1922.98 1 true true -Manufacturer#4 1206.26 27 true true -Manufacturer#4 1290.35 12 true true -Manufacturer#4 1375.42 39 true true -Manufacturer#4 1620.67 10 true true -Manufacturer#4 1844.92 7 true true -Manufacturer#5 1018.1 46 true true -Manufacturer#5 1464.48 23 true true -Manufacturer#5 1611.66 6 true true -Manufacturer#5 1788.73 2 true true -Manufacturer#5 1789.69 31 true true -PREHOOK: query: select p_mfgr, p_retailprice, p_size, -rank() over (distribute by p_mfgr sort by p_retailprice) as r, -sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) as s2, -sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) -5 as s1 -from part -PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_retailprice, p_size, -rank() over (distribute by p_mfgr sort by p_retailprice) as r, -sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) as s2, -sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) -5 as s1 -from part -POSTHOOK: type: QUERY -POSTHOOK: Input: default@part -#### A masked pattern was here #### -p_mfgr p_retailprice p_size r s2 s1 -Manufacturer#1 1173.15 2 1 1173.15 1168.15 -Manufacturer#1 1173.15 2 1 2346.3 2341.3 -Manufacturer#1 1414.42 28 3 3760.7200000000003 3755.7200000000003 -Manufacturer#1 1602.59 6 4 5363.31 5358.31 -Manufacturer#1 1632.66 42 5 6995.97 6990.97 -Manufacturer#1 1753.76 34 6 8749.73 8744.73 -Manufacturer#2 1690.68 14 1 1690.68 1685.68 -Manufacturer#2 1698.66 25 2 3389.34 3384.34 -Manufacturer#2 1701.6 18 3 5090.9400000000005 5085.9400000000005 -Manufacturer#2 1800.7 40 4 6891.64 6886.64 -Manufacturer#2 2031.98 2 5 8923.62 8918.62 -Manufacturer#3 1190.27 14 1 1190.27 1185.27 -Manufacturer#3 1337.29 45 2 2527.56 2522.56 -Manufacturer#3 1410.39 19 3 3937.95 3932.95 -Manufacturer#3 1671.68 17 4 5609.63 5604.63 -Manufacturer#3 1922.98 1 5 7532.610000000001 7527.610000000001 -Manufacturer#4 1206.26 27 1 1206.26 1201.26 -Manufacturer#4 1290.35 12 2 2496.6099999999997 2491.6099999999997 -Manufacturer#4 1375.42 39 3 3872.0299999999997 3867.0299999999997 -Manufacturer#4 1620.67 10 4 5492.7 5487.7 -Manufacturer#4 1844.92 7 5 7337.62 7332.62 -Manufacturer#5 1018.1 46 1 1018.1 1013.1 -Manufacturer#5 1464.48 23 2 2482.58 2477.58 -Manufacturer#5 1611.66 6 3 4094.24 4089.24 -Manufacturer#5 1788.73 2 4 5882.969999999999 5877.969999999999 -Manufacturer#5 1789.69 31 5 7672.66 7667.66 -PREHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr, p_type order by p_mfgr) from part -PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr, p_type order by p_mfgr) from part -POSTHOOK: type: QUERY -POSTHOOK: Input: default@part -#### A masked pattern was here #### -p_mfgr avg_window_0 -Manufacturer#1 1753.76 -Manufacturer#1 1632.66 -Manufacturer#1 1602.59 -Manufacturer#1 1173.15 -Manufacturer#1 1173.15 -Manufacturer#1 1414.42 -Manufacturer#2 1800.7 -Manufacturer#2 1690.68 -Manufacturer#2 2031.98 -Manufacturer#2 1698.66 -Manufacturer#2 1701.6 -Manufacturer#3 1922.98 -Manufacturer#3 1410.39 -Manufacturer#3 1671.68 -Manufacturer#3 1190.27 -Manufacturer#3 1337.29 -Manufacturer#4 1844.92 -Manufacturer#4 1375.42 -Manufacturer#4 1620.67 -Manufacturer#4 1206.26 -Manufacturer#4 1290.35 -Manufacturer#5 1018.1 -Manufacturer#5 1464.48 -Manufacturer#5 1789.69 -Manufacturer#5 1788.73 -Manufacturer#5 1611.66 -PREHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr order by p_type,p_mfgr rows between unbounded preceding and current row) from part -PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr order by p_type,p_mfgr rows between unbounded preceding and current row) from part -POSTHOOK: type: QUERY -POSTHOOK: Input: default@part -#### A masked pattern was here #### -p_mfgr avg_window_0 -Manufacturer#1 1753.76 -Manufacturer#1 1693.21 -Manufacturer#1 1663.0033333333333 -Manufacturer#1 1540.54 -Manufacturer#1 1467.062 -Manufacturer#1 1458.2883333333332 -Manufacturer#2 1800.7 -Manufacturer#2 1745.69 -Manufacturer#2 1841.1200000000001 -Manufacturer#2 1805.505 -Manufacturer#2 1784.7240000000002 -Manufacturer#3 1922.98 -Manufacturer#3 1666.685 -Manufacturer#3 1668.3500000000001 -Manufacturer#3 1548.83 -Manufacturer#3 1506.522 -Manufacturer#4 18