hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jd...@apache.org
Subject hive git commit: HIVE-19335: Disable runtime filtering (semijoin reduction opt with bloomfilter) for external tables (Jason Dere, reviewed by Deepak Jaiswal)
Date Tue, 01 May 2018 22:23:03 GMT
Repository: hive
Updated Branches:
  refs/heads/branch-3 7b7618652 -> 6d7236154


HIVE-19335: Disable runtime filtering (semijoin reduction opt with bloomfilter) for external
tables (Jason Dere, reviewed by Deepak Jaiswal)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6d723615
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6d723615
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6d723615

Branch: refs/heads/branch-3
Commit: 6d7236154c505c946996de13eec4eee456bb8795
Parents: 7b76186
Author: Jason Dere <jdere@hortonworks.com>
Authored: Tue May 1 15:09:40 2018 -0700
Committer: Jason Dere <jdere@hortonworks.com>
Committed: Tue May 1 15:22:56 2018 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/conf/HiveConf.java   |   5 +
 .../test/resources/testconfiguration.properties |   1 +
 .../DynamicPartitionPruningOptimization.java    |  35 ++
 .../dynamic_semijoin_reduction_4.q              |  65 +++
 .../llap/dynamic_semijoin_reduction_4.q.out     | 568 +++++++++++++++++++
 5 files changed, 674 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/6d723615/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 0fb9e93..58f1e48 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2782,6 +2782,11 @@ public class HiveConf extends Configuration {
     HIVE_CLI_TEZ_SESSION_ASYNC("hive.cli.tez.session.async", true, "Whether to start Tez\n"
+
         "session in background when running CLI with Tez, allowing CLI to be available earlier."),
 
+    HIVE_DISABLE_UNSAFE_EXTERNALTABLE_OPERATIONS("hive.disable.unsafe.external.table.operations",
true,
+        "Whether to disable certain optimizations and operations on external tables," +
+        " on the assumption that data changes by external applications may have negative
effects" +
+        " on these operations."),
+
     HIVE_ERROR_ON_EMPTY_PARTITION("hive.error.on.empty.partition", false,
         "Whether to throw an exception if dynamic partition insert generates empty results."),
 

http://git-wip-us.apache.org/repos/asf/hive/blob/6d723615/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 79140d3..7c52982 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -500,6 +500,7 @@ minillaplocal.query.files=\
   dynamic_semijoin_reduction.q,\
   dynamic_semijoin_reduction_2.q,\
   dynamic_semijoin_reduction_3.q,\
+  dynamic_semijoin_reduction_4.q,\
   dynamic_semijoin_reduction_sw.q,\
   dynpart_sort_opt_vectorization.q,\
   dynpart_sort_optimization.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/6d723615/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
index 9917330..caec2c0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
@@ -28,6 +28,7 @@ import java.util.Stack;
 
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
+import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
 import org.apache.hadoop.hive.ql.exec.ColumnInfo;
 import org.apache.hadoop.hive.ql.exec.FilterOperator;
 import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
@@ -189,6 +190,7 @@ public class DynamicPartitionPruningOptimization implements NodeProcessor
{
           }
         } else {
           LOG.debug("Column " + column + " is not a partition column");
+          semiJoin = semiJoin && !disableSemiJoinOptDueToExternalTable(parseContext.getConf(),
ts, ctx);
           if (semiJoin && ts.getConf().getFilterExpr() != null) {
             LOG.debug("Initiate semijoin reduction for " + column + " ("
                 + ts.getConf().getFilterExpr().getExprString());
@@ -279,6 +281,39 @@ public class DynamicPartitionPruningOptimization implements NodeProcessor
{
     return false;
   }
 
+  private boolean disableSemiJoinOptDueToExternalTable(HiveConf conf, TableScanOperator ts,
DynamicListContext ctx) {
+    boolean disableSemiJoin = false;
+    if (conf.getBoolVar(HiveConf.ConfVars.HIVE_DISABLE_UNSAFE_EXTERNALTABLE_OPERATIONS))
{
+      // We already have the TableScan for one side of the join. Check this now.
+      if (MetaStoreUtils.isExternalTable(ts.getConf().getTableMetadata().getTTable())) {
+        LOG.debug("Disabling semijoin optimzation on {} since it is an external table.",
+            ts.getConf().getTableMetadata().getFullyQualifiedName());
+        disableSemiJoin = true;
+      } else {
+        // Check the other side of the join, using the DynamicListContext
+        ExprNodeDesc exprNodeDesc = ctx.generator.getConf().getKeyCols().get(ctx.desc.getKeyIndex());
+        ExprNodeColumnDesc colExpr = ExprNodeDescUtils.getColumnExpr(exprNodeDesc);
+
+        if (colExpr != null) {
+          // fetch table alias
+          ExprNodeDescUtils.ColumnOrigin columnOrigin =
+                  ExprNodeDescUtils.findColumnOrigin(exprNodeDesc, ctx.generator);
+          if (columnOrigin != null && columnOrigin.op instanceof TableScanOperator)
{
+            // Join key origin has been traced to a table column. Check if the table is external.
+            TableScanOperator joinKeyTs = (TableScanOperator) columnOrigin.op;
+            if (MetaStoreUtils.isExternalTable(joinKeyTs.getConf().getTableMetadata().getTTable()))
{
+              LOG.debug("Join key {} is from {} which is an external table. Disabling semijoin
optimization.",
+                  columnOrigin.col,
+                  joinKeyTs.getConf().getTableMetadata().getFullyQualifiedName());
+              disableSemiJoin = true;
+            }
+          }
+        }
+      }
+    }
+    return disableSemiJoin;
+  }
+
   // Given a key, find the corresponding column name.
   private boolean getColumnInfo(DynamicListContext ctx, StringBuilder internalColName,
                                 StringBuilder colName, StringBuilder tabAlias) {

http://git-wip-us.apache.org/repos/asf/hive/blob/6d723615/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_4.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_4.q b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_4.q
new file mode 100644
index 0000000..67bf7c8
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_4.q
@@ -0,0 +1,65 @@
+--! qt:dataset:srcpart
+set hive.compute.query.using.stats=false;
+set hive.mapred.mode=nonstrict;
+set hive.explain.user=false;
+set hive.optimize.ppd=true;
+set hive.ppd.remove.duplicatefilters=true;
+set hive.tez.dynamic.partition.pruning=true;
+set hive.tez.dynamic.semijoin.reduction=true;
+set hive.optimize.metadataonly=false;
+set hive.optimize.index.filter=true;
+set hive.stats.autogather=true;
+set hive.tez.bigtable.minsize.semijoin.reduction=1;
+set hive.tez.min.bloom.filter.entries=1;
+set hive.stats.fetch.column.stats=true;
+set hive.tez.bloom.filter.factor=1.0f;
+set hive.disable.unsafe.external.table.operations=true;
+
+-- Create Tables
+create table srcpart_date (key string, value string) partitioned by (ds string ) stored as
ORC;
+CREATE TABLE srcpart_small(key1 STRING, value1 STRING) partitioned by (ds string) STORED
as ORC;
+create external table srcpart_date_ext (key string, value string) partitioned by (ds string
) stored as ORC;
+CREATE external TABLE srcpart_small_ext(key1 STRING, value1 STRING) partitioned by (ds string)
STORED as ORC;
+
+-- Add Partitions
+alter table srcpart_date add partition (ds = "2008-04-08");
+alter table srcpart_date add partition (ds = "2008-04-09");
+
+alter table srcpart_small add partition (ds = "2008-04-08");
+alter table srcpart_small add partition (ds = "2008-04-09");
+
+alter table srcpart_date_ext add partition (ds = "2008-04-08");
+alter table srcpart_date_ext add partition (ds = "2008-04-09");
+
+alter table srcpart_small_ext add partition (ds = "2008-04-08");
+alter table srcpart_small_ext add partition (ds = "2008-04-09");
+
+-- Load
+insert overwrite table srcpart_date partition (ds = "2008-04-08" ) select key, value from
srcpart where ds = "2008-04-08";
+insert overwrite table srcpart_date partition (ds = "2008-04-09") select key, value from
srcpart where ds = "2008-04-09";
+insert overwrite table srcpart_small partition (ds = "2008-04-09") select key, value from
srcpart where ds = "2008-04-09" limit 20;
+
+insert overwrite table srcpart_date_ext partition (ds = "2008-04-08" ) select key, value
from srcpart where ds = "2008-04-08";
+insert overwrite table srcpart_date_ext partition (ds = "2008-04-09") select key, value from
srcpart where ds = "2008-04-09";
+insert overwrite table srcpart_small_ext partition (ds = "2008-04-09") select key, value
from srcpart where ds = "2008-04-09" limit 20;
+
+analyze table srcpart_date compute statistics for columns;
+analyze table srcpart_small compute statistics for columns;
+
+analyze table srcpart_date_ext compute statistics for columns;
+analyze table srcpart_small_ext compute statistics for columns;
+
+
+-- single column, single key
+set test.comment=This query should use semijoin reduction optimization;
+set test.comment;
+EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1);
+
+set test.comment=Big table is external table - no semijoin reduction opt;
+set test.comment;
+EXPLAIN select count(*) from srcpart_date_ext join srcpart_small on (srcpart_date_ext.key
= srcpart_small.key1);
+
+set test.comment=Small table is external table - no semijoin reduction opt;
+set test.comment;
+EXPLAIN select count(*) from srcpart_date join srcpart_small_ext on (srcpart_date.key = srcpart_small_ext.key1);
+

http://git-wip-us.apache.org/repos/asf/hive/blob/6d723615/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_4.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_4.q.out
new file mode 100644
index 0000000..d33c6cd
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_4.q.out
@@ -0,0 +1,568 @@
+PREHOOK: query: create table srcpart_date (key string, value string) partitioned by (ds string
) stored as ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@srcpart_date
+POSTHOOK: query: create table srcpart_date (key string, value string) partitioned by (ds
string ) stored as ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@srcpart_date
+PREHOOK: query: CREATE TABLE srcpart_small(key1 STRING, value1 STRING) partitioned by (ds
string) STORED as ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@srcpart_small
+POSTHOOK: query: CREATE TABLE srcpart_small(key1 STRING, value1 STRING) partitioned by (ds
string) STORED as ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@srcpart_small
+PREHOOK: query: create external table srcpart_date_ext (key string, value string) partitioned
by (ds string ) stored as ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@srcpart_date_ext
+POSTHOOK: query: create external table srcpart_date_ext (key string, value string) partitioned
by (ds string ) stored as ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@srcpart_date_ext
+PREHOOK: query: CREATE external TABLE srcpart_small_ext(key1 STRING, value1 STRING) partitioned
by (ds string) STORED as ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@srcpart_small_ext
+POSTHOOK: query: CREATE external TABLE srcpart_small_ext(key1 STRING, value1 STRING) partitioned
by (ds string) STORED as ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@srcpart_small_ext
+PREHOOK: query: alter table srcpart_date add partition (ds = "2008-04-08")
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Output: default@srcpart_date
+POSTHOOK: query: alter table srcpart_date add partition (ds = "2008-04-08")
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Output: default@srcpart_date
+POSTHOOK: Output: default@srcpart_date@ds=2008-04-08
+PREHOOK: query: alter table srcpart_date add partition (ds = "2008-04-09")
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Output: default@srcpart_date
+POSTHOOK: query: alter table srcpart_date add partition (ds = "2008-04-09")
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Output: default@srcpart_date
+POSTHOOK: Output: default@srcpart_date@ds=2008-04-09
+PREHOOK: query: alter table srcpart_small add partition (ds = "2008-04-08")
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Output: default@srcpart_small
+POSTHOOK: query: alter table srcpart_small add partition (ds = "2008-04-08")
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Output: default@srcpart_small
+POSTHOOK: Output: default@srcpart_small@ds=2008-04-08
+PREHOOK: query: alter table srcpart_small add partition (ds = "2008-04-09")
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Output: default@srcpart_small
+POSTHOOK: query: alter table srcpart_small add partition (ds = "2008-04-09")
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Output: default@srcpart_small
+POSTHOOK: Output: default@srcpart_small@ds=2008-04-09
+PREHOOK: query: alter table srcpart_date_ext add partition (ds = "2008-04-08")
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Output: default@srcpart_date_ext
+POSTHOOK: query: alter table srcpart_date_ext add partition (ds = "2008-04-08")
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Output: default@srcpart_date_ext
+POSTHOOK: Output: default@srcpart_date_ext@ds=2008-04-08
+PREHOOK: query: alter table srcpart_date_ext add partition (ds = "2008-04-09")
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Output: default@srcpart_date_ext
+POSTHOOK: query: alter table srcpart_date_ext add partition (ds = "2008-04-09")
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Output: default@srcpart_date_ext
+POSTHOOK: Output: default@srcpart_date_ext@ds=2008-04-09
+PREHOOK: query: alter table srcpart_small_ext add partition (ds = "2008-04-08")
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Output: default@srcpart_small_ext
+POSTHOOK: query: alter table srcpart_small_ext add partition (ds = "2008-04-08")
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Output: default@srcpart_small_ext
+POSTHOOK: Output: default@srcpart_small_ext@ds=2008-04-08
+PREHOOK: query: alter table srcpart_small_ext add partition (ds = "2008-04-09")
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Output: default@srcpart_small_ext
+POSTHOOK: query: alter table srcpart_small_ext add partition (ds = "2008-04-09")
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Output: default@srcpart_small_ext
+POSTHOOK: Output: default@srcpart_small_ext@ds=2008-04-09
+PREHOOK: query: insert overwrite table srcpart_date partition (ds = "2008-04-08" ) select
key, value from srcpart where ds = "2008-04-08"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Output: default@srcpart_date@ds=2008-04-08
+POSTHOOK: query: insert overwrite table srcpart_date partition (ds = "2008-04-08" ) select
key, value from srcpart where ds = "2008-04-08"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Output: default@srcpart_date@ds=2008-04-08
+POSTHOOK: Lineage: srcpart_date PARTITION(ds=2008-04-08).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key,
type:string, comment:default), ]
+POSTHOOK: Lineage: srcpart_date PARTITION(ds=2008-04-08).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value,
type:string, comment:default), ]
+PREHOOK: query: insert overwrite table srcpart_date partition (ds = "2008-04-09") select
key, value from srcpart where ds = "2008-04-09"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+PREHOOK: Output: default@srcpart_date@ds=2008-04-09
+POSTHOOK: query: insert overwrite table srcpart_date partition (ds = "2008-04-09") select
key, value from srcpart where ds = "2008-04-09"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+POSTHOOK: Output: default@srcpart_date@ds=2008-04-09
+POSTHOOK: Lineage: srcpart_date PARTITION(ds=2008-04-09).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key,
type:string, comment:default), ]
+POSTHOOK: Lineage: srcpart_date PARTITION(ds=2008-04-09).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value,
type:string, comment:default), ]
+PREHOOK: query: insert overwrite table srcpart_small partition (ds = "2008-04-09") select
key, value from srcpart where ds = "2008-04-09" limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+PREHOOK: Output: default@srcpart_small@ds=2008-04-09
+POSTHOOK: query: insert overwrite table srcpart_small partition (ds = "2008-04-09") select
key, value from srcpart where ds = "2008-04-09" limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+POSTHOOK: Output: default@srcpart_small@ds=2008-04-09
+POSTHOOK: Lineage: srcpart_small PARTITION(ds=2008-04-09).key1 SIMPLE [(srcpart)srcpart.FieldSchema(name:key,
type:string, comment:default), ]
+POSTHOOK: Lineage: srcpart_small PARTITION(ds=2008-04-09).value1 SIMPLE [(srcpart)srcpart.FieldSchema(name:value,
type:string, comment:default), ]
+PREHOOK: query: insert overwrite table srcpart_date_ext partition (ds = "2008-04-08" ) select
key, value from srcpart where ds = "2008-04-08"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Output: default@srcpart_date_ext@ds=2008-04-08
+POSTHOOK: query: insert overwrite table srcpart_date_ext partition (ds = "2008-04-08" ) select
key, value from srcpart where ds = "2008-04-08"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Output: default@srcpart_date_ext@ds=2008-04-08
+POSTHOOK: Lineage: srcpart_date_ext PARTITION(ds=2008-04-08).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key,
type:string, comment:default), ]
+POSTHOOK: Lineage: srcpart_date_ext PARTITION(ds=2008-04-08).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value,
type:string, comment:default), ]
+PREHOOK: query: insert overwrite table srcpart_date_ext partition (ds = "2008-04-09") select
key, value from srcpart where ds = "2008-04-09"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+PREHOOK: Output: default@srcpart_date_ext@ds=2008-04-09
+POSTHOOK: query: insert overwrite table srcpart_date_ext partition (ds = "2008-04-09") select
key, value from srcpart where ds = "2008-04-09"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+POSTHOOK: Output: default@srcpart_date_ext@ds=2008-04-09
+POSTHOOK: Lineage: srcpart_date_ext PARTITION(ds=2008-04-09).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key,
type:string, comment:default), ]
+POSTHOOK: Lineage: srcpart_date_ext PARTITION(ds=2008-04-09).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value,
type:string, comment:default), ]
+PREHOOK: query: insert overwrite table srcpart_small_ext partition (ds = "2008-04-09") select
key, value from srcpart where ds = "2008-04-09" limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+PREHOOK: Output: default@srcpart_small_ext@ds=2008-04-09
+POSTHOOK: query: insert overwrite table srcpart_small_ext partition (ds = "2008-04-09") select
key, value from srcpart where ds = "2008-04-09" limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+POSTHOOK: Output: default@srcpart_small_ext@ds=2008-04-09
+POSTHOOK: Lineage: srcpart_small_ext PARTITION(ds=2008-04-09).key1 SIMPLE [(srcpart)srcpart.FieldSchema(name:key,
type:string, comment:default), ]
+POSTHOOK: Lineage: srcpart_small_ext PARTITION(ds=2008-04-09).value1 SIMPLE [(srcpart)srcpart.FieldSchema(name:value,
type:string, comment:default), ]
+PREHOOK: query: analyze table srcpart_date compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart_date
+PREHOOK: Input: default@srcpart_date@ds=2008-04-08
+PREHOOK: Input: default@srcpart_date@ds=2008-04-09
+PREHOOK: Output: default@srcpart_date
+PREHOOK: Output: default@srcpart_date@ds=2008-04-08
+PREHOOK: Output: default@srcpart_date@ds=2008-04-09
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table srcpart_date compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart_date
+POSTHOOK: Input: default@srcpart_date@ds=2008-04-08
+POSTHOOK: Input: default@srcpart_date@ds=2008-04-09
+POSTHOOK: Output: default@srcpart_date
+POSTHOOK: Output: default@srcpart_date@ds=2008-04-08
+POSTHOOK: Output: default@srcpart_date@ds=2008-04-09
+#### A masked pattern was here ####
+PREHOOK: query: analyze table srcpart_small compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart_small
+PREHOOK: Input: default@srcpart_small@ds=2008-04-08
+PREHOOK: Input: default@srcpart_small@ds=2008-04-09
+PREHOOK: Output: default@srcpart_small
+PREHOOK: Output: default@srcpart_small@ds=2008-04-08
+PREHOOK: Output: default@srcpart_small@ds=2008-04-09
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table srcpart_small compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart_small
+POSTHOOK: Input: default@srcpart_small@ds=2008-04-08
+POSTHOOK: Input: default@srcpart_small@ds=2008-04-09
+POSTHOOK: Output: default@srcpart_small
+POSTHOOK: Output: default@srcpart_small@ds=2008-04-08
+POSTHOOK: Output: default@srcpart_small@ds=2008-04-09
+#### A masked pattern was here ####
+PREHOOK: query: analyze table srcpart_date_ext compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart_date_ext
+PREHOOK: Input: default@srcpart_date_ext@ds=2008-04-08
+PREHOOK: Input: default@srcpart_date_ext@ds=2008-04-09
+PREHOOK: Output: default@srcpart_date_ext
+PREHOOK: Output: default@srcpart_date_ext@ds=2008-04-08
+PREHOOK: Output: default@srcpart_date_ext@ds=2008-04-09
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table srcpart_date_ext compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart_date_ext
+POSTHOOK: Input: default@srcpart_date_ext@ds=2008-04-08
+POSTHOOK: Input: default@srcpart_date_ext@ds=2008-04-09
+POSTHOOK: Output: default@srcpart_date_ext
+POSTHOOK: Output: default@srcpart_date_ext@ds=2008-04-08
+POSTHOOK: Output: default@srcpart_date_ext@ds=2008-04-09
+#### A masked pattern was here ####
+PREHOOK: query: analyze table srcpart_small_ext compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart_small_ext
+PREHOOK: Input: default@srcpart_small_ext@ds=2008-04-08
+PREHOOK: Input: default@srcpart_small_ext@ds=2008-04-09
+PREHOOK: Output: default@srcpart_small_ext
+PREHOOK: Output: default@srcpart_small_ext@ds=2008-04-08
+PREHOOK: Output: default@srcpart_small_ext@ds=2008-04-09
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table srcpart_small_ext compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart_small_ext
+POSTHOOK: Input: default@srcpart_small_ext@ds=2008-04-08
+POSTHOOK: Input: default@srcpart_small_ext@ds=2008-04-09
+POSTHOOK: Output: default@srcpart_small_ext
+POSTHOOK: Output: default@srcpart_small_ext@ds=2008-04-08
+POSTHOOK: Output: default@srcpart_small_ext@ds=2008-04-09
+#### A masked pattern was here ####
+test.comment=This query should use semijoin reduction optimization
+PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key
= srcpart_small.key1)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key
= srcpart_small.key1)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Map 1 <- Reducer 5 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+        Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: srcpart_date
+                  filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key1_min)
AND DynamicValue(RS_7_srcpart_small_key1_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key1_bloom_filter))))
(type: boolean)
+                  Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column
stats: COMPLETE
+                  Filter Operator
+                    predicate: ((key BETWEEN DynamicValue(RS_7_srcpart_small_key1_min) AND
DynamicValue(RS_7_srcpart_small_key1_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key1_bloom_filter)))
and key is not null) (type: boolean)
+                    Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column
stats: COMPLETE
+                    Select Operator
+                      expressions: key (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE
Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE
Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: srcpart_small
+                  filterExpr: key1 is not null (type: boolean)
+                  Statistics: Num rows: 20 Data size: 1740 Basic stats: PARTIAL Column stats:
PARTIAL
+                  Filter Operator
+                    predicate: key1 is not null (type: boolean)
+                    Statistics: Num rows: 20 Data size: 1740 Basic stats: PARTIAL Column
stats: PARTIAL
+                    Select Operator
+                      expressions: key1 (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 20 Data size: 1740 Basic stats: PARTIAL Column
stats: PARTIAL
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 20 Data size: 1740 Basic stats: PARTIAL Column
stats: PARTIAL
+                      Select Operator
+                        expressions: _col0 (type: string)
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 20 Data size: 1740 Basic stats: PARTIAL Column
stats: PARTIAL
+                        Group By Operator
+                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=20)
+                          mode: hash
+                          outputColumnNames: _col0, _col1, _col2
+                          Statistics: Num rows: 1 Data size: 639 Basic stats: PARTIAL Column
stats: PARTIAL
+                          Reduce Output Operator
+                            sort order: 
+                            Statistics: Num rows: 1 Data size: 639 Basic stats: PARTIAL Column
stats: PARTIAL
+                            value expressions: _col0 (type: string), _col1 (type: string),
_col2 (type: binary)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                Statistics: Num rows: 2200 Data size: 191400 Basic stats: PARTIAL Column
stats: NONE
+                Group By Operator
+                  aggregations: count()
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats:
NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats:
NONE
+                    value expressions: _col0 (type: bigint)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats:
NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 5 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2,
expectedEntries=20)
+                mode: final
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 639 Basic stats: PARTIAL Column stats:
PARTIAL
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 639 Basic stats: PARTIAL Column stats:
PARTIAL
+                  value expressions: _col0 (type: string), _col1 (type: string), _col2 (type:
binary)
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+test.comment=Big table is external table - no semijoin reduction opt
+PREHOOK: query: EXPLAIN select count(*) from srcpart_date_ext join srcpart_small on (srcpart_date_ext.key
= srcpart_small.key1)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN select count(*) from srcpart_date_ext join srcpart_small on (srcpart_date_ext.key
= srcpart_small.key1)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: srcpart_date_ext
+                  filterExpr: key is not null (type: boolean)
+                  Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column
stats: COMPLETE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column
stats: COMPLETE
+                    Select Operator
+                      expressions: key (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE
Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE
Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: srcpart_small
+                  filterExpr: key1 is not null (type: boolean)
+                  Statistics: Num rows: 20 Data size: 1740 Basic stats: PARTIAL Column stats:
PARTIAL
+                  Filter Operator
+                    predicate: key1 is not null (type: boolean)
+                    Statistics: Num rows: 20 Data size: 1740 Basic stats: PARTIAL Column
stats: PARTIAL
+                    Select Operator
+                      expressions: key1 (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 20 Data size: 1740 Basic stats: PARTIAL Column
stats: PARTIAL
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 20 Data size: 1740 Basic stats: PARTIAL Column
stats: PARTIAL
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                Statistics: Num rows: 2200 Data size: 191400 Basic stats: PARTIAL Column
stats: NONE
+                Group By Operator
+                  aggregations: count()
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats:
NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats:
NONE
+                    value expressions: _col0 (type: bigint)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats:
NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+test.comment=Small table is external table - no semijoin reduction opt
+PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small_ext on (srcpart_date.key
= srcpart_small_ext.key1)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small_ext on (srcpart_date.key
= srcpart_small_ext.key1)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: srcpart_date
+                  filterExpr: key is not null (type: boolean)
+                  Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column
stats: COMPLETE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column
stats: COMPLETE
+                    Select Operator
+                      expressions: key (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE
Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE
Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: srcpart_small_ext
+                  filterExpr: key1 is not null (type: boolean)
+                  Statistics: Num rows: 20 Data size: 1740 Basic stats: PARTIAL Column stats:
PARTIAL
+                  Filter Operator
+                    predicate: key1 is not null (type: boolean)
+                    Statistics: Num rows: 20 Data size: 1740 Basic stats: PARTIAL Column
stats: PARTIAL
+                    Select Operator
+                      expressions: key1 (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 20 Data size: 1740 Basic stats: PARTIAL Column
stats: PARTIAL
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 20 Data size: 1740 Basic stats: PARTIAL Column
stats: PARTIAL
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                Statistics: Num rows: 2200 Data size: 191400 Basic stats: PARTIAL Column
stats: NONE
+                Group By Operator
+                  aggregations: count()
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats:
NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats:
NONE
+                    value expressions: _col0 (type: bigint)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats:
NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+


Mime
View raw message