Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 3E762200D1E for ; Wed, 4 Oct 2017 01:36:01 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 3CE071609DE; Tue, 3 Oct 2017 23:36:01 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 0C03F160BDA for ; Wed, 4 Oct 2017 01:35:58 +0200 (CEST) Received: (qmail 13200 invoked by uid 500); 3 Oct 2017 23:35:58 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 12970 invoked by uid 99); 3 Oct 2017 23:35:57 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 03 Oct 2017 23:35:57 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 6324EF5A90; Tue, 3 Oct 2017 23:35:57 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: jcamacho@apache.org To: commits@hive.apache.org Date: Tue, 03 Oct 2017 23:36:00 -0000 Message-Id: <2240f4ce38b44b22a2e86457a1d9bb19@git.apache.org> In-Reply-To: <7a9d2e4949684c29a4449b51163177a9@git.apache.org> References: <7a9d2e4949684c29a4449b51163177a9@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [4/4] hive git commit: HIVE-17432: Enable join and aggregate materialized view rewriting (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan) archived-at: Tue, 03 Oct 2017 23:36:01 -0000 HIVE-17432: Enable join and aggregate materialized view rewriting (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan) Close apache/hive#245 Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/073e8473 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/073e8473 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/073e8473 Branch: refs/heads/master Commit: 073e8473ea61da995c42847ea53909e77f7e76f2 Parents: 8ffd8ea Author: Jesus Camacho Rodriguez Authored: Thu Apr 20 17:47:12 2017 +0100 Committer: Jesus Camacho Rodriguez Committed: Tue Oct 3 16:35:14 2017 -0700 ---------------------------------------------------------------------- .../hadoop/hive/druid/DruidStorageHandler.java | 8 +- .../org/apache/hadoop/hive/ql/exec/DDLTask.java | 49 +- .../apache/hadoop/hive/ql/metadata/Hive.java | 94 +- .../metadata/HiveMaterializedViewsRegistry.java | 8 +- .../apache/hadoop/hive/ql/metadata/Table.java | 5 +- ...tedDynPartitionTimeGranularityOptimizer.java | 12 +- .../ql/optimizer/calcite/HiveCalciteUtil.java | 12 +- .../ql/optimizer/calcite/RelOptHiveTable.java | 78 +- .../rules/views/HiveMaterializedViewRule.java | 27 +- .../hadoop/hive/ql/parse/CalcitePlanner.java | 168 +- .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 5 +- .../hadoop/hive/ql/plan/CreateTableDesc.java | 19 +- .../hadoop/hive/ql/plan/CreateViewDesc.java | 1 + .../apache/hadoop/hive/ql/plan/PlanUtils.java | 99 +- .../materialized_view_create_rewrite_2.q | 88 + .../materialized_view_rewrite_ssb.q | 346 ++++ .../materialized_view_rewrite_ssb_2.q | 347 ++++ .../clientpositive/llap/subquery_notin.q.out | 116 +- .../materialized_view_create_rewrite_2.q.out | 574 ++++++ .../materialized_view_describe.q.out | 12 +- .../materialized_view_rewrite_ssb.q.out | 1621 +++++++++++++++++ .../materialized_view_rewrite_ssb_2.q.out | 1627 ++++++++++++++++++ .../outer_reference_windowed.q.out | 34 +- .../vector_outer_reference_windowed.q.out | 34 +- 24 files changed, 5162 insertions(+), 222 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/073e8473/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandler.java ---------------------------------------------------------------------- diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandler.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandler.java index 62e146d..31e3ea1 100644 --- a/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandler.java +++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandler.java @@ -95,7 +95,7 @@ import javax.annotation.Nullable; /** * DruidStorageHandler provides a HiveStorageHandler implementation for Druid. */ -@SuppressWarnings({ "deprecation", "rawtypes" }) +@SuppressWarnings({ "rawtypes" }) public class DruidStorageHandler extends DefaultHiveMetaHook implements HiveStorageHandler { protected static final Logger LOG = LoggerFactory.getLogger(DruidStorageHandler.class); @@ -527,8 +527,7 @@ public class DruidStorageHandler extends DefaultHiveMetaHook implements HiveStor } @Override - public void configureOutputJobProperties(TableDesc tableDesc, Map jobProperties - ) { + public void configureOutputJobProperties(TableDesc tableDesc, Map jobProperties) { jobProperties.put(Constants.DRUID_SEGMENT_VERSION, new DateTime().toString()); jobProperties.put(Constants.DRUID_JOB_WORKING_DIRECTORY, getStagingWorkingDir().toString()); // DruidOutputFormat will write segments in an intermediate directory @@ -537,8 +536,7 @@ public class DruidStorageHandler extends DefaultHiveMetaHook implements HiveStor } @Override - public void configureTableJobProperties(TableDesc tableDesc, Map jobProperties - ) { + public void configureTableJobProperties(TableDesc tableDesc, Map jobProperties) { } http://git-wip-us.apache.org/repos/asf/hive/blob/073e8473/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index 2e4e2fa..4b1df73 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -134,6 +134,7 @@ import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.HiveMaterializedViewsRegistry; import org.apache.hadoop.hive.ql.metadata.HiveMetaStoreChecker; +import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler; import org.apache.hadoop.hive.ql.metadata.HiveUtils; import org.apache.hadoop.hive.ql.metadata.InvalidTableException; import org.apache.hadoop.hive.ql.metadata.NotNullConstraint; @@ -186,6 +187,7 @@ import org.apache.hadoop.hive.ql.plan.LockTableDesc; import org.apache.hadoop.hive.ql.plan.MsckDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.OrcFileMergeDesc; +import org.apache.hadoop.hive.ql.plan.PlanUtils; import org.apache.hadoop.hive.ql.plan.PrincipalDesc; import org.apache.hadoop.hive.ql.plan.PrivilegeDesc; import org.apache.hadoop.hive.ql.plan.PrivilegeObjectDesc; @@ -4692,11 +4694,11 @@ public class DDLTask extends Task implements Serializable { // create new view Table tbl = db.newTable(crtView.getViewName()); tbl.setViewOriginalText(crtView.getViewOriginalText()); + tbl.setViewExpandedText(crtView.getViewExpandedText()); if (crtView.isMaterialized()) { tbl.setRewriteEnabled(crtView.isRewriteEnabled()); tbl.setTableType(TableType.MATERIALIZED_VIEW); } else { - tbl.setViewExpandedText(crtView.getViewExpandedText()); tbl.setTableType(TableType.VIRTUAL_VIEW); } tbl.setSerializationLib(null); @@ -4725,12 +4727,45 @@ public class DDLTask extends Task implements Serializable { if (crtView.getLocation() != null) { tbl.setDataLocation(new Path(crtView.getLocation())); } - // Short circuit the checks that the input format is valid, this is configured for all - // materialized views and doesn't change so we don't need to check it constantly. - tbl.getSd().setInputFormat(crtView.getInputFormat()); - tbl.getSd().setOutputFormat(crtView.getOutputFormat()); - tbl.getSd().setSerdeInfo(new SerDeInfo(crtView.getSerde(), crtView.getSerde(), - crtView.getSerdeProps())); + + if (crtView.getStorageHandler() != null) { + tbl.setProperty( + org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE, + crtView.getStorageHandler()); + } + HiveStorageHandler storageHandler = tbl.getStorageHandler(); + + /* + * If the user didn't specify a SerDe, we use the default. + */ + String serDeClassName; + if (crtView.getSerde() == null) { + if (storageHandler == null) { + serDeClassName = PlanUtils.getDefaultSerDe().getName(); + LOG.info("Default to " + serDeClassName + + " for materialized view " + crtView.getViewName()); + } else { + serDeClassName = storageHandler.getSerDeClass().getName(); + LOG.info("Use StorageHandler-supplied " + serDeClassName + + " for materialized view " + crtView.getViewName()); + } + } else { + // let's validate that the serde exists + serDeClassName = crtView.getSerde(); + DDLTask.validateSerDe(serDeClassName, conf); + } + tbl.setSerializationLib(serDeClassName); + + // To remain consistent, we need to set input and output formats both + // at the table level and the storage handler level. + tbl.setInputFormatClass(crtView.getInputFormat()); + tbl.setOutputFormatClass(crtView.getOutputFormat()); + if (crtView.getInputFormat() != null && !crtView.getInputFormat().isEmpty()) { + tbl.getSd().setInputFormat(tbl.getInputFormatClass().getName()); + } + if (crtView.getOutputFormat() != null && !crtView.getOutputFormat().isEmpty()) { + tbl.getSd().setOutputFormat(tbl.getOutputFormatClass().getName()); + } } db.createTable(tbl, crtView.getIfNotExists()); http://git-wip-us.apache.org/repos/asf/hive/blob/073e8473/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 436a2fe..b0e68b1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -51,6 +51,7 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; import javax.jdo.JDODataStoreException; @@ -62,13 +63,13 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; -import org.apache.hadoop.hive.common.BlobStorageUtils; import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.common.HiveStatsUtils; import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPrivate; import org.apache.hadoop.hive.common.classification.InterfaceStability.Unstable; +import org.apache.hadoop.hive.common.log.InPlaceUpdate; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.io.HdfsUtils; @@ -138,7 +139,6 @@ import org.apache.hadoop.hive.ql.exec.SerializationUtilities; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.index.HiveIndexHandler; import org.apache.hadoop.hive.ql.io.AcidUtils; -import org.apache.hadoop.hive.common.log.InPlaceUpdate; import org.apache.hadoop.hive.ql.log.PerfLogger; import org.apache.hadoop.hive.ql.optimizer.listbucketingpruner.ListBucketingPrunerUtils; import org.apache.hadoop.hive.ql.plan.AddPartitionDesc; @@ -158,12 +158,12 @@ import org.apache.thrift.TException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.google.common.base.Splitter; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; import com.google.common.util.concurrent.ThreadFactoryBuilder; -import com.google.common.base.Splitter; /** * This class has functions that implement meta data/DDL operations using calls @@ -4291,8 +4291,30 @@ private void constructOneLBLocationMap(FileStatus fSta, * @throws HiveException */ public PrimaryKeyInfo getPrimaryKeys(String dbName, String tblName) throws HiveException { + return getPrimaryKeys(dbName, tblName, false); + } + + /** + * Get primary key columns associated with the table that are available for optimization. + * + * @param dbName Database Name + * @param tblName Table Name + * @return Primary Key associated with the table. + * @throws HiveException + */ + public PrimaryKeyInfo getReliablePrimaryKeys(String dbName, String tblName) throws HiveException { + return getPrimaryKeys(dbName, tblName, true); + } + + private PrimaryKeyInfo getPrimaryKeys(String dbName, String tblName, boolean onlyReliable) + throws HiveException { try { List primaryKeys = getMSC().getPrimaryKeys(new PrimaryKeysRequest(dbName, tblName)); + if (onlyReliable && primaryKeys != null && !primaryKeys.isEmpty()) { + primaryKeys = primaryKeys.stream() + .filter(pk -> pk.isRely_cstr()) + .collect(Collectors.toList()); + } return new PrimaryKeyInfo(primaryKeys, tblName, dbName); } catch (Exception e) { throw new HiveException(e); @@ -4308,8 +4330,30 @@ private void constructOneLBLocationMap(FileStatus fSta, * @throws HiveException */ public ForeignKeyInfo getForeignKeys(String dbName, String tblName) throws HiveException { + return getForeignKeys(dbName, tblName, false); + } + + /** + * Get foreign keys associated with the table that are available for optimization. + * + * @param dbName Database Name + * @param tblName Table Name + * @return Foreign keys associated with the table. + * @throws HiveException + */ + public ForeignKeyInfo getReliableForeignKeys(String dbName, String tblName) throws HiveException { + return getForeignKeys(dbName, tblName, true); + } + + private ForeignKeyInfo getForeignKeys(String dbName, String tblName, boolean onlyReliable) + throws HiveException { try { List foreignKeys = getMSC().getForeignKeys(new ForeignKeysRequest(null, null, dbName, tblName)); + if (onlyReliable && foreignKeys != null && !foreignKeys.isEmpty()) { + foreignKeys = foreignKeys.stream() + .filter(fk -> fk.isRely_cstr()) + .collect(Collectors.toList()); + } return new ForeignKeyInfo(foreignKeys, tblName, dbName); } catch (Exception e) { throw new HiveException(e); @@ -4325,9 +4369,31 @@ private void constructOneLBLocationMap(FileStatus fSta, * @throws HiveException */ public UniqueConstraint getUniqueConstraints(String dbName, String tblName) throws HiveException { + return getUniqueConstraints(dbName, tblName, false); + } + + /** + * Get unique constraints associated with the table that are available for optimization. + * + * @param dbName Database Name + * @param tblName Table Name + * @return Unique constraints associated with the table. + * @throws HiveException + */ + public UniqueConstraint getReliableUniqueConstraints(String dbName, String tblName) throws HiveException { + return getUniqueConstraints(dbName, tblName, true); + } + + private UniqueConstraint getUniqueConstraints(String dbName, String tblName, boolean onlyReliable) + throws HiveException { try { List uniqueConstraints = getMSC().getUniqueConstraints( new UniqueConstraintsRequest(dbName, tblName)); + if (onlyReliable && uniqueConstraints != null && !uniqueConstraints.isEmpty()) { + uniqueConstraints = uniqueConstraints.stream() + .filter(uk -> uk.isRely_cstr()) + .collect(Collectors.toList()); + } return new UniqueConstraint(uniqueConstraints, tblName, dbName); } catch (Exception e) { throw new HiveException(e); @@ -4343,9 +4409,31 @@ private void constructOneLBLocationMap(FileStatus fSta, * @throws HiveException */ public NotNullConstraint getNotNullConstraints(String dbName, String tblName) throws HiveException { + return getNotNullConstraints(dbName, tblName, false); + } + + /** + * Get not null constraints associated with the table that are available for optimization. + * + * @param dbName Database Name + * @param tblName Table Name + * @return Not null constraints associated with the table. + * @throws HiveException + */ + public NotNullConstraint getReliableNotNullConstraints(String dbName, String tblName) throws HiveException { + return getNotNullConstraints(dbName, tblName, true); + } + + private NotNullConstraint getNotNullConstraints(String dbName, String tblName, boolean onlyReliable) + throws HiveException { try { List notNullConstraints = getMSC().getNotNullConstraints( new NotNullConstraintsRequest(dbName, tblName)); + if (onlyReliable && notNullConstraints != null && !notNullConstraints.isEmpty()) { + notNullConstraints = notNullConstraints.stream() + .filter(nnc -> nnc.isRely_cstr()) + .collect(Collectors.toList()); + } return new NotNullConstraint(notNullConstraints, tblName, dbName); } catch (Exception e) { throw new HiveException(e); http://git-wip-us.apache.org/repos/asf/hive/blob/073e8473/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java index 526d1dd..154ea68 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java @@ -54,6 +54,7 @@ import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.QueryState; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl; import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveVolcanoPlanner; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode; @@ -174,7 +175,7 @@ public final class HiveMaterializedViewsRegistry { return null; } // Add to cache - final String viewQuery = materializedViewTable.getViewOriginalText(); + final String viewQuery = materializedViewTable.getViewExpandedText(); final RelNode tableRel = createTableScan(materializedViewTable); if (tableRel == null) { LOG.warn("Materialized view " + materializedViewTable.getCompleteName() + @@ -226,7 +227,9 @@ public final class HiveMaterializedViewsRegistry { private static RelNode createTableScan(Table viewTable) { // 0. Recreate cluster final RelOptPlanner planner = HiveVolcanoPlanner.createPlanner(null); - final RexBuilder rexBuilder = new RexBuilder(new JavaTypeFactoryImpl()); + final RexBuilder rexBuilder = new RexBuilder( + new JavaTypeFactoryImpl( + new HiveTypeSystemImpl())); final RelOptCluster cluster = RelOptCluster.create(planner, rexBuilder); // 1. Create column schema @@ -338,6 +341,7 @@ public final class HiveMaterializedViewsRegistry { return analyzer.genLogicalPlan(node); } catch (Exception e) { // We could not parse the view + LOG.error(e.getMessage()); return null; } } http://git-wip-us.apache.org/repos/asf/hive/blob/073e8473/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java index a53f774..43e202c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java @@ -211,12 +211,9 @@ public class Table implements Serializable { } } - if (isView()) { + if (isView() || isMaterializedView()) { assert (getViewOriginalText() != null); assert (getViewExpandedText() != null); - } else if (isMaterializedView()) { - assert(getViewOriginalText() != null); - assert(getViewExpandedText() == null); } else { assert(getViewOriginalText() == null); assert(getViewExpandedText() == null); http://git-wip-us.apache.org/repos/asf/hive/blob/073e8473/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionTimeGranularityOptimizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionTimeGranularityOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionTimeGranularityOptimizer.java index 7670dc1..4a248d6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionTimeGranularityOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionTimeGranularityOptimizer.java @@ -33,7 +33,6 @@ import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.Utilities.ReduceField; -import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; import org.apache.hadoop.hive.ql.lib.Dispatcher; @@ -134,10 +133,17 @@ public class SortedDynPartitionTimeGranularityOptimizer extends Transform { if (table != null) { // case the statement is an INSERT segmentGranularity = table.getParameters().get(Constants.DRUID_SEGMENT_GRANULARITY); - } else { + } else if (parseCtx.getCreateViewDesc() != null) { + // case the statement is a CREATE MATERIALIZED VIEW AS + segmentGranularity = parseCtx.getCreateViewDesc().getTblProps() + .get(Constants.DRUID_SEGMENT_GRANULARITY); + } else if (parseCtx.getCreateTable() != null) { // case the statement is a CREATE TABLE AS - segmentGranularity = parseCtx.getCreateTable().getTblProps() + segmentGranularity = parseCtx.getCreateTable().getTblProps() .get(Constants.DRUID_SEGMENT_GRANULARITY); + } else { + throw new SemanticException("Druid storage handler used but not an INSERT, " + + "CMVAS or CTAS statement"); } segmentGranularity = !Strings.isNullOrEmpty(segmentGranularity) ? segmentGranularity http://git-wip-us.apache.org/repos/asf/hive/blob/073e8473/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java index 8428615..f70aa75 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java @@ -49,6 +49,7 @@ import org.apache.calcite.rex.RexLocalRef; import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexOver; import org.apache.calcite.rex.RexPatternFieldRef; +import org.apache.calcite.rex.RexTableInputRef; import org.apache.calcite.rex.RexRangeRef; import org.apache.calcite.rex.RexSubQuery; import org.apache.calcite.rex.RexUtil; @@ -89,7 +90,6 @@ import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; -import org.apache.calcite.rex.RexTableInputRef; /** * Generic utility functions needed for Calcite based Hive CBO. @@ -1034,6 +1034,11 @@ public class HiveCalciteUtil { } @Override + public Boolean visitTableInputRef(RexTableInputRef inputRef) { + return false; + } + + @Override public Boolean visitLocalRef(RexLocalRef localRef) { throw new RuntimeException("Not expected to be called."); } @@ -1082,11 +1087,6 @@ public class HiveCalciteUtil { public Boolean visitPatternFieldRef(RexPatternFieldRef fieldRef) { return false; } - - @Override - public Boolean visitTableInputRef(RexTableInputRef fieldRef) { - return false; - } } public static Set getInputRefs(RexNode expr) { http://git-wip-us.apache.org/repos/asf/hive/blob/073e8473/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java index 85aa9b3..044bba1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java @@ -35,16 +35,21 @@ import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.rel.RelFieldCollation.Direction; import org.apache.calcite.rel.RelFieldCollation.NullDirection; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelReferentialConstraint; +import org.apache.calcite.rel.RelReferentialConstraintImpl; import org.apache.calcite.rel.logical.LogicalTableScan; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexNode; import org.apache.calcite.util.ImmutableBitSet; -import org.apache.hadoop.hive.common.StatsSetupConst; +import org.apache.calcite.util.mapping.IntPair; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.metadata.ForeignKeyInfo; +import org.apache.hadoop.hive.ql.metadata.ForeignKeyInfo.ForeignKeyCol; +import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.PartitionIterable; import org.apache.hadoop.hive.ql.metadata.Table; @@ -76,6 +81,7 @@ public class RelOptHiveTable extends RelOptAbstractTable { private final ImmutableMap hivePartitionColsMap; private final ImmutableList hiveVirtualCols; private final int noOfNonVirtualCols; + private final List referentialConstraints; final HiveConf hiveConf; private double rowCount = -1; @@ -106,6 +112,7 @@ public class RelOptHiveTable extends RelOptAbstractTable { this.partitionCache = partitionCache; this.colStatsCache = colStatsCache; this.noColsMissingStats = noColsMissingStats; + this.referentialConstraints = generateReferentialConstraints(); } public RelOptHiveTable copy(RelDataType newRowType) { @@ -150,6 +157,75 @@ public class RelOptHiveTable extends RelOptAbstractTable { } @Override + public List getReferentialConstraints() { + return referentialConstraints; + } + + private List generateReferentialConstraints() { + final ForeignKeyInfo fki; + try { + fki = Hive.get().getReliableForeignKeys( + hiveTblMetadata.getDbName(), hiveTblMetadata.getTableName()); + } catch (HiveException e) { + throw new RuntimeException(e); + } + ImmutableList.Builder builder = ImmutableList.builder(); + for (List fkCols : fki.getForeignKeys().values()) { + List foreignKeyTableQualifiedName = Lists.newArrayList(name); + String parentDatabaseName = fkCols.get(0).parentDatabaseName; + String parentTableName = fkCols.get(0).parentTableName; + String parentFullyQualifiedName; + if (parentDatabaseName != null && !parentDatabaseName.isEmpty()) { + parentFullyQualifiedName = parentDatabaseName + "." + parentTableName; + } + else { + parentFullyQualifiedName = parentTableName; + } + List parentTableQualifiedName = Lists.newArrayList(parentFullyQualifiedName); + Table parentTab = null; + try { + // TODO: We have a cache for Table objects in SemanticAnalyzer::getTableObjectByName() + // We need to move that cache elsewhere and use it from places like this. + parentTab = Hive.get().getTable(parentDatabaseName, parentTableName); + } catch (HiveException e) { + throw new RuntimeException(e); + } + if (parentTab == null) { + LOG.error("Table for primary key not found: " + + "databaseName: " + parentDatabaseName+ ", " + + "tableName: " + parentTableName); + return ImmutableList.of(); + } + ImmutableList.Builder keys = ImmutableList.builder(); + for (ForeignKeyCol fkCol : fkCols) { + int fkPos; + for (fkPos = 0; fkPos < rowType.getFieldNames().size(); fkPos++) { + String fkColName = rowType.getFieldNames().get(fkPos); + if (fkColName.equals(fkCol.childColName)) { + break; + } + } + int pkPos; + for (pkPos = 0; pkPos < parentTab.getAllCols().size(); pkPos++) { + String pkColName = parentTab.getAllCols().get(pkPos).getName(); + if (pkColName.equals(fkCol.parentColName)) { + break; + } + } + if (fkPos == rowType.getFieldNames().size() + || pkPos == parentTab.getAllCols().size()) { + LOG.error("Column for foreign key definition " + fkCol + " not found"); + return ImmutableList.of(); + } + keys.add(IntPair.of(fkPos, pkPos)); + } + builder.add(RelReferentialConstraintImpl.of(foreignKeyTableQualifiedName, + parentTableQualifiedName, keys.build())); + } + return builder.build(); + } + + @Override public RelNode toRel(ToRelContext context) { return new LogicalTableScan(context.getCluster(), this); } http://git-wip-us.apache.org/repos/asf/hive/blob/073e8473/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewRule.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewRule.java index 2fa1138..a7c6fd0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewRule.java @@ -17,22 +17,35 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite.rules.views; -import org.apache.calcite.rel.rules.AbstractMaterializedViewRule.MaterializedViewOnlyFilterRule; import org.apache.calcite.rel.rules.AbstractMaterializedViewRule.MaterializedViewProjectFilterRule; +import org.apache.calcite.rel.rules.AbstractMaterializedViewRule.MaterializedViewOnlyFilterRule; +import org.apache.calcite.rel.rules.AbstractMaterializedViewRule.MaterializedViewProjectJoinRule; +import org.apache.calcite.rel.rules.AbstractMaterializedViewRule.MaterializedViewOnlyJoinRule; +import org.apache.calcite.rel.rules.AbstractMaterializedViewRule.MaterializedViewProjectAggregateRule; +import org.apache.calcite.rel.rules.AbstractMaterializedViewRule.MaterializedViewOnlyAggregateRule; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; /** - * Planner rule that replaces (if possible) a {@link org.apache.calcite.rel.core.Project} - * on a {@link org.apache.calcite.rel.core.Filter} and {@link org.apache.calcite.rel.core.Filter} - * or a {@link org.apache.calcite.rel.core.Filter} on a {@link org.apache.calcite.rel.core.TableScan} - * to use a Materialized View. + * Enable join and aggregate materialized view rewriting */ public class HiveMaterializedViewRule { public static final MaterializedViewProjectFilterRule INSTANCE_PROJECT_FILTER = - new MaterializedViewProjectFilterRule(HiveRelFactories.HIVE_BUILDER, false); + new MaterializedViewProjectFilterRule(HiveRelFactories.HIVE_BUILDER, true); public static final MaterializedViewOnlyFilterRule INSTANCE_FILTER = - new MaterializedViewOnlyFilterRule(HiveRelFactories.HIVE_BUILDER, false); + new MaterializedViewOnlyFilterRule(HiveRelFactories.HIVE_BUILDER, true); + + public static final MaterializedViewProjectJoinRule INSTANCE_PROJECT_JOIN = + new MaterializedViewProjectJoinRule(HiveRelFactories.HIVE_BUILDER, true); + + public static final MaterializedViewOnlyJoinRule INSTANCE_JOIN = + new MaterializedViewOnlyJoinRule(HiveRelFactories.HIVE_BUILDER, true); + + public static final MaterializedViewProjectAggregateRule INSTANCE_PROJECT_AGGREGATE = + new MaterializedViewProjectAggregateRule(HiveRelFactories.HIVE_BUILDER, true); + + public static final MaterializedViewOnlyAggregateRule INSTANCE_AGGREGATE = + new MaterializedViewOnlyAggregateRule(HiveRelFactories.HIVE_BUILDER, true); } http://git-wip-us.apache.org/repos/asf/hive/blob/073e8473/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index e3ae296..6555269 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -76,6 +76,7 @@ import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.core.AggregateCall; import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.core.SetOp; import org.apache.calcite.rel.core.TableScan; import org.apache.calcite.rel.metadata.CachingRelMetadataProvider; @@ -139,6 +140,8 @@ import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.log.PerfLogger; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.NotNullConstraint; +import org.apache.hadoop.hive.ql.metadata.PrimaryKeyInfo; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; @@ -146,9 +149,9 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.Unsu import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteViewSemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveConfPlannerContext; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveDefaultRelMetadataProvider; import org.apache.hadoop.hive.ql.optimizer.calcite.HivePlannerContext; -import org.apache.hadoop.hive.ql.optimizer.calcite.HiveConfPlannerContext; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRexExecutorImpl; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl; @@ -395,27 +398,34 @@ public class CalcitePlanner extends SemanticAnalyzer { } catch (SemanticException e) { throw new CalciteViewSemanticException(e.getMessage()); } + } else if (cboCtx.type == PreCboCtx.Type.VIEW && materializedView) { + // Store text of the ORIGINAL QUERY + String originalText = ctx.getTokenRewriteStream().toString( + cboCtx.nodeOfInterest.getTokenStartIndex(), + cboCtx.nodeOfInterest.getTokenStopIndex()); + unparseTranslator.applyTranslations(ctx.getTokenRewriteStream()); + String expandedText = ctx.getTokenRewriteStream().toString( + cboCtx.nodeOfInterest.getTokenStartIndex(), + cboCtx.nodeOfInterest.getTokenStopIndex()); + // Redo create-table/view analysis, because it's not part of + // doPhase1. + // Use the REWRITTEN AST + init(false); + setAST(newAST); + newAST = reAnalyzeViewAfterCbo(newAST); + viewSelect = newAST; + viewsExpanded = new ArrayList<>(); + viewsExpanded.add(createVwDesc.getViewName()); + createVwDesc.setViewOriginalText(originalText); + createVwDesc.setViewExpandedText(expandedText); + } else if (cboCtx.type == PreCboCtx.Type.CTAS) { + // CTAS + init(false); + setAST(newAST); + newAST = reAnalyzeCTASAfterCbo(newAST); } else { + // All others init(false); - if (cboCtx.type == PreCboCtx.Type.VIEW && materializedView) { - // Redo create-table/view analysis, because it's not part of - // doPhase1. - // Use the REWRITTEN AST - setAST(newAST); - newAST = reAnalyzeViewAfterCbo(newAST); - // Store text of the ORIGINAL QUERY - String originalText = ctx.getTokenRewriteStream().toString( - cboCtx.nodeOfInterest.getTokenStartIndex(), - cboCtx.nodeOfInterest.getTokenStopIndex()); - createVwDesc.setViewOriginalText(originalText); - viewSelect = newAST; - viewsExpanded = new ArrayList<>(); - viewsExpanded.add(createVwDesc.getViewName()); - } else if (cboCtx.type == PreCboCtx.Type.CTAS) { - // CTAS - setAST(newAST); - newAST = reAnalyzeCTASAfterCbo(newAST); - } } if (oldHints.size() > 0) { if (getQB().getParseInfo().getHints() != null) { @@ -1453,13 +1463,12 @@ public class CalcitePlanner extends SemanticAnalyzer { disableSemJoinReordering = false; } - // 4. Run other optimizations that do not need stats + // 4.1. Run other optimizations that do not need stats perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null, HepMatchOrder.BOTTOM_UP, ProjectRemoveRule.INSTANCE, HiveUnionMergeRule.INSTANCE, - HiveProjectMergeRule.INSTANCE_NO_FORCE, HiveAggregateProjectMergeRule.INSTANCE, - HiveJoinCommuteRule.INSTANCE); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Optimizations without stats"); + HiveProjectMergeRule.INSTANCE_NO_FORCE, HiveJoinCommuteRule.INSTANCE); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Optimizations without stats 1"); // 5. Materialized view based rewriting // We disable it for CTAS and MV creation queries (trying to avoid any problem @@ -1483,17 +1492,30 @@ public class CalcitePlanner extends SemanticAnalyzer { public RelOptMaterialization apply(RelOptMaterialization materialization) { final RelNode viewScan = materialization.tableRel; final RelNode newViewScan; - if (viewScan instanceof DruidQuery) { - final DruidQuery dq = (DruidQuery) viewScan; - newViewScan = DruidQuery.create(optCluster, optCluster.traitSetOf(HiveRelNode.CONVENTION), - viewScan.getTable(), dq.getDruidTable(), + if (viewScan instanceof Project) { + // There is a Project on top (due to nullability) + final Project pq = (Project) viewScan; + newViewScan = HiveProject.create(optCluster, copyNodeScan(viewScan), + pq.getChildExps(), pq.getRowType(), Collections. emptyList()); + } else { + newViewScan = copyNodeScan(viewScan); + } + return new RelOptMaterialization(newViewScan, materialization.queryRel, null); + } + + private RelNode copyNodeScan(RelNode scan) { + final RelNode newScan; + if (scan instanceof DruidQuery) { + final DruidQuery dq = (DruidQuery) scan; + newScan = DruidQuery.create(optCluster, optCluster.traitSetOf(HiveRelNode.CONVENTION), + scan.getTable(), dq.getDruidTable(), ImmutableList.of(dq.getTableScan())); } else { - newViewScan = new HiveTableScan(optCluster, optCluster.traitSetOf(HiveRelNode.CONVENTION), - (RelOptHiveTable) viewScan.getTable(), viewScan.getTable().getQualifiedName().get(0), + newScan = new HiveTableScan(optCluster, optCluster.traitSetOf(HiveRelNode.CONVENTION), + (RelOptHiveTable) scan.getTable(), scan.getTable().getQualifiedName().get(0), null, false, false); } - return new RelOptMaterialization(newViewScan, materialization.queryRel, null); + return newScan; } } ); @@ -1507,6 +1529,10 @@ public class CalcitePlanner extends SemanticAnalyzer { // Add view-based rewriting rules to planner planner.addRule(HiveMaterializedViewRule.INSTANCE_PROJECT_FILTER); planner.addRule(HiveMaterializedViewRule.INSTANCE_FILTER); + planner.addRule(HiveMaterializedViewRule.INSTANCE_PROJECT_JOIN); + planner.addRule(HiveMaterializedViewRule.INSTANCE_JOIN); + planner.addRule(HiveMaterializedViewRule.INSTANCE_PROJECT_AGGREGATE); + planner.addRule(HiveMaterializedViewRule.INSTANCE_AGGREGATE); // Optimize plan planner.setRoot(calciteOptimizedPlan); calciteOptimizedPlan = planner.findBestExp(); @@ -1518,6 +1544,12 @@ public class CalcitePlanner extends SemanticAnalyzer { perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: View-based rewriting"); } + // 4.2. Run other optimizations that do not need stats + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); + calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null, + HepMatchOrder.BOTTOM_UP, HiveAggregateProjectMergeRule.INSTANCE); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Optimizations without stats 2"); + // 6. Run aggregate-join transpose (cost based) // If it failed because of missing stats, we continue with // the rest of optimizations @@ -1557,9 +1589,7 @@ public class CalcitePlanner extends SemanticAnalyzer { "Calcite: Removing sq_count_check UDF "); } - - - // 8. Run rule to fix windowing issue when it is done over + // 9. Run rule to fix windowing issue when it is done over // aggregation columns (HIVE-10627) if (profilesCBO.contains(ExtendedCBOProfile.WINDOWING_POSTPROCESSING)) { perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); @@ -1568,7 +1598,7 @@ public class CalcitePlanner extends SemanticAnalyzer { perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Window fixing rule"); } - // 9. Apply Druid transformation rules + // 10. Apply Druid transformation rules perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null, HepMatchOrder.BOTTOM_UP, @@ -1586,10 +1616,10 @@ public class CalcitePlanner extends SemanticAnalyzer { ); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Druid transformation rules"); - // 10. Run rules to aid in translation from Calcite tree to Hive tree + // 11. Run rules to aid in translation from Calcite tree to Hive tree if (HiveConf.getBoolVar(conf, ConfVars.HIVE_CBO_RETPATH_HIVEOP)) { perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - // 10.1. Merge join into multijoin operators (if possible) + // 11.1. Merge join into multijoin operators (if possible) calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, true, mdProvider.getMetadataProvider(), null, HepMatchOrder.BOTTOM_UP, HiveJoinProjectTransposeRule.BOTH_PROJECT_INCLUDE_OUTER, HiveJoinProjectTransposeRule.LEFT_PROJECT_INCLUDE_OUTER, @@ -1607,7 +1637,7 @@ public class CalcitePlanner extends SemanticAnalyzer { HiveFilterProjectTSTransposeRule.INSTANCE, HiveFilterProjectTSTransposeRule.INSTANCE_DRUID, HiveProjectFilterPullUpConstantsRule.INSTANCE); - // 10.2. Introduce exchange operators below join/multijoin operators + // 11.2. Introduce exchange operators below join/multijoin operators calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null, HepMatchOrder.BOTTOM_UP, HiveInsertExchange4JoinRule.EXCHANGE_BELOW_JOIN, HiveInsertExchange4JoinRule.EXCHANGE_BELOW_MULTIJOIN); @@ -2382,7 +2412,7 @@ public class CalcitePlanner extends SemanticAnalyzer { optTable, druidTable, ImmutableList.of(scan)); } else { // Build row type from field - RelDataType rowType = TypeConverter.getType(cluster, rr, null); + RelDataType rowType = inferNotNullableColumns(tabMetaData, TypeConverter.getType(cluster, rr, null)); // Build RelOptAbstractTable String fullyQualifiedTabName = tabMetaData.getDbName(); if (fullyQualifiedTabName != null && !fullyQualifiedTabName.isEmpty()) { @@ -2418,6 +2448,66 @@ public class CalcitePlanner extends SemanticAnalyzer { return tableRel; } + private RelDataType inferNotNullableColumns(Table tabMetaData, RelDataType rowType) + throws HiveException { + // Retrieve not null constraints + final NotNullConstraint nnc = Hive.get().getReliableNotNullConstraints( + tabMetaData.getDbName(), tabMetaData.getTableName()); + // Retrieve primary key constraints (cannot be null) + final PrimaryKeyInfo pkc = Hive.get().getReliablePrimaryKeys( + tabMetaData.getDbName(), tabMetaData.getTableName()); + if (nnc.getNotNullConstraints().isEmpty() && pkc.getColNames().isEmpty()) { + return rowType; + } + + // Build the bitset with not null columns + ImmutableBitSet.Builder builder = ImmutableBitSet.builder(); + for (String nnCol : nnc.getNotNullConstraints().values()) { + int nnPos = -1; + for (int i = 0; i < rowType.getFieldNames().size(); i++) { + if (rowType.getFieldNames().get(i).equals(nnCol)) { + nnPos = i; + break; + } + } + if (nnPos == -1) { + LOG.error("Column for not null constraint definition " + nnCol + " not found"); + return rowType; + } + builder.set(nnPos); + } + for (String pkCol : pkc.getColNames().values()) { + int pkPos = -1; + for (int i = 0; i < rowType.getFieldNames().size(); i++) { + if (rowType.getFieldNames().get(i).equals(pkCol)) { + pkPos = i; + break; + } + } + if (pkPos == -1) { + LOG.error("Column for not null constraint definition " + pkCol + " not found"); + return rowType; + } + builder.set(pkPos); + } + ImmutableBitSet bitSet = builder.build(); + + RexBuilder rexBuilder = cluster.getRexBuilder(); + RelDataTypeFactory dtFactory = rexBuilder.getTypeFactory(); + + List fieldTypes = new LinkedList(); + List fieldNames = new LinkedList(); + for (RelDataTypeField rdtf : rowType.getFieldList()) { + if (bitSet.indexOf(rdtf.getIndex()) != -1) { + fieldTypes.add(dtFactory.createTypeWithNullability(rdtf.getType(), false)); + } else { + fieldTypes.add(rdtf.getType()); + } + fieldNames.add(rdtf.getName()); + } + return dtFactory.createStructType(fieldTypes, fieldNames); + } + private TableType obtainTableType(Table tabMetaData) { if (tabMetaData.getStorageHandler() != null && tabMetaData.getStorageHandler().toString().equals( http://git-wip-us.apache.org/repos/asf/hive/blob/073e8473/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 5e45142..9c6c556 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -11754,10 +11754,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { } createVwDesc.setSchema(derivedSchema); - if (!createVwDesc.isMaterialized()) { - // materialized views don't store the expanded text as they won't be rewritten at query time. - createVwDesc.setViewExpandedText(expandedText); - } + createVwDesc.setViewExpandedText(expandedText); } static List convertRowSchemaToViewSchema(RowResolver rr) throws SemanticException { http://git-wip-us.apache.org/repos/asf/hive/blob/073e8473/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java index a05cb96..36ad5aa 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java @@ -50,7 +50,6 @@ import org.apache.hadoop.hive.ql.parse.ReplicationSpec; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.Explain.Level; import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.mapred.OutputFormat; @@ -715,26 +714,24 @@ public class CreateTableDesc extends DDLDesc implements Serializable { HiveStorageHandler storageHandler = tbl.getStorageHandler(); /* - * We use LazySimpleSerDe by default. - * - * If the user didn't specify a SerDe, and any of the columns are not simple - * types, we will have to use DynamicSerDe instead. + * If the user didn't specify a SerDe, we use the default. */ + String serDeClassName; if (getSerName() == null) { if (storageHandler == null) { - LOG.info("Default to LazySimpleSerDe for table " + tableName); - tbl.setSerializationLib(LazySimpleSerDe.class.getName()); + serDeClassName = PlanUtils.getDefaultSerDe().getName(); + LOG.info("Default to " + serDeClassName + " for table " + tableName); } else { - String serDeClassName = storageHandler.getSerDeClass().getName(); + serDeClassName = storageHandler.getSerDeClass().getName(); LOG.info("Use StorageHandler-supplied " + serDeClassName + " for table " + tableName); - tbl.setSerializationLib(serDeClassName); } } else { // let's validate that the serde exists - DDLTask.validateSerDe(getSerName(), conf); - tbl.setSerializationLib(getSerName()); + serDeClassName = getSerName(); + DDLTask.validateSerDe(serDeClassName, conf); } + tbl.setSerializationLib(serDeClassName); if (getFieldDelim() != null) { tbl.setSerdeParam(serdeConstants.FIELD_DELIM, getFieldDelim()); http://git-wip-us.apache.org/repos/asf/hive/blob/073e8473/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateViewDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateViewDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateViewDesc.java index 6cc4a40..9425f6e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateViewDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateViewDesc.java @@ -76,6 +76,7 @@ public class CreateViewDesc extends DDLDesc implements Serializable { * @param inputFormat * @param outputFormat * @param location + * @param serName * @param serde * @param storageHandler * @param serdeProps http://git-wip-us.apache.org/repos/asf/hive/blob/073e8473/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java index 8211c13..2baa1cb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java @@ -18,6 +18,17 @@ package org.apache.hadoop.hive.ql.plan; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.Set; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.conf.HiveConf; @@ -32,7 +43,6 @@ import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.io.AcidUtils; -import org.apache.hadoop.hive.ql.io.HiveOutputFormat; import org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat; import org.apache.hadoop.hive.ql.io.RCFileInputFormat; import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; @@ -48,6 +58,7 @@ import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.serde2.DelimitedJSONSerDe; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe; @@ -67,17 +78,6 @@ import org.apache.hadoop.mapred.TextInputFormat; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.Comparator; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.Set; - /** * PlanUtils. * @@ -216,7 +216,7 @@ public final class PlanUtils { */ public static TableDesc getDefaultTableDesc(String separatorCode, String columns, String columnTypes, boolean lastColumnTakesRestOfTheLine) { - return getTableDesc(LazySimpleSerDe.class, separatorCode, columns, + return getTableDesc(getDefaultSerDe(), separatorCode, columns, columnTypes, lastColumnTakesRestOfTheLine); } @@ -317,7 +317,7 @@ public final class PlanUtils { SessionState.getSessionConf(), crtTblDesc.getStorageHandler()); } - Class serdeClass = LazySimpleSerDe.class; + Class serdeClass = getDefaultSerDe(); String separatorCode = Integer.toString(Utilities.ctrlaCode); String columns = cols; String columnTypes = colTypes; @@ -412,36 +412,63 @@ public final class PlanUtils { TableDesc ret; try { - Class serdeClass = JavaUtils.loadClass(crtViewDesc.getSerde()); - ret = getTableDesc(serdeClass, new String(LazySerDeParameters.DefaultSeparators), cols, - colTypes, false, false); + HiveStorageHandler storageHandler = null; + if (crtViewDesc.getStorageHandler() != null) { + storageHandler = HiveUtils.getStorageHandler( + SessionState.getSessionConf(), crtViewDesc.getStorageHandler()); + } + + Class serdeClass = getDefaultSerDe(); + String separatorCode = Integer.toString(Utilities.ctrlaCode); + String columns = cols; + String columnTypes = colTypes; + boolean lastColumnTakesRestOfTheLine = false; + + if (storageHandler != null) { + serdeClass = storageHandler.getSerDeClass(); + } else if (crtViewDesc.getSerde() != null) { + serdeClass = JavaUtils.loadClass(crtViewDesc.getSerde()); + } + + ret = getTableDesc(serdeClass, separatorCode, columns, columnTypes, + lastColumnTakesRestOfTheLine, false); // set other table properties - /* - TODO - I don't think I need any of this Properties properties = ret.getProperties(); - if (crtTblDesc.getTableName() != null && crtTblDesc.getDatabaseName() != null) { - properties.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME, - crtTblDesc.getTableName()); + if (crtViewDesc.getStorageHandler() != null) { + properties.setProperty( + org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE, + crtViewDesc.getStorageHandler()); } - if (crtTblDesc.getTblProps() != null) { - properties.putAll(crtTblDesc.getTblProps()); + if (crtViewDesc.getTblProps() != null) { + properties.putAll(crtViewDesc.getTblProps()); + } + if (crtViewDesc.getSerdeProps() != null) { + properties.putAll(crtViewDesc.getSerdeProps()); } - */ // replace the default input & output file format with those found in // crtTblDesc - Class inClass = - (Class)JavaUtils.loadClass(crtViewDesc.getInputFormat()); - Class outClass = - (Class)JavaUtils.loadClass(crtViewDesc.getOutputFormat()); - - ret.setInputFileFormatClass(inClass); - ret.setOutputFileFormatClass(outClass); + Class in_class; + if (storageHandler != null) { + in_class = storageHandler.getInputFormatClass(); + } else { + in_class = JavaUtils.loadClass(crtViewDesc.getInputFormat()); + } + Class out_class; + if (storageHandler != null) { + out_class = storageHandler.getOutputFormatClass(); + } else { + out_class = JavaUtils.loadClass(crtViewDesc.getOutputFormat()); + } + ret.setInputFileFormatClass(in_class); + ret.setOutputFileFormatClass(out_class); } catch (ClassNotFoundException e) { throw new RuntimeException("Unable to find class in getTableDesc: " + e.getMessage(), e); + } catch (HiveException e) { + throw new RuntimeException("Error loading storage handler in getTableDesc: " + e.getMessage(), e); } return ret; } @@ -1170,4 +1197,12 @@ public final class PlanUtils { return currentInput; } + + /** + * Returns the default SerDe for table and materialized view creation + * if none is specified. + */ + public static Class getDefaultSerDe() { + return LazySimpleSerDe.class; + } } http://git-wip-us.apache.org/repos/asf/hive/blob/073e8473/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_2.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_2.q b/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_2.q new file mode 100644 index 0000000..9983bae --- /dev/null +++ b/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_2.q @@ -0,0 +1,88 @@ +set hive.strict.checks.cartesian.product=false; +set hive.materializedview.rewriting=true; +set hive.stats.column.autogather=true; + +create table cmv_basetable (a int, b varchar(256), c decimal(10,2), d int); + +insert into cmv_basetable values + (1, 'alfred', 10.30, 2), + (2, 'bob', 3.14, 3), + (2, 'bonnie', 172342.2, 3), + (3, 'calvin', 978.76, 3), + (3, 'charlie', 9.8, 1); + +create materialized view cmv_mat_view enable rewrite +as select b from cmv_basetable where c > 10.0 group by a, b, c; + +-- CANNOT BE TRIGGERED +explain +select b from cmv_basetable where c > 20.0 group by a, b; + +select b from cmv_basetable where c > 20.0 group by a, b; + +create materialized view cmv_mat_view_2 enable rewrite +as select b, c from cmv_basetable where c > 10.0 group by a, b, c; + +-- CANNOT BE TRIGGERED +explain +select b from cmv_basetable where c > 20.0 group by a, b; + +select b from cmv_basetable where c > 20.0 group by a, b; + +create materialized view cmv_mat_view_3 enable rewrite +as select a, b, c from cmv_basetable where c > 10.0 group by a, b, c; + +-- CAN BE TRIGGERED +explain +select b from cmv_basetable where c > 20.0 group by a, b; + +select b from cmv_basetable where c > 20.0 group by a, b; + +create materialized view cmv_mat_view_4 enable rewrite +as select a, b from cmv_basetable group by a, b; + +-- CAN BE TRIGGERED +explain +select b from cmv_basetable group by b; + +select b from cmv_basetable group by b; + +create table cmv_basetable_2 (a int, b varchar(256), c decimal(10,2), d int); + +insert into cmv_basetable_2 values + (1, 'alfred', 10.30, 2), + (3, 'calvin', 978.76, 3); + +create materialized view cmv_mat_view_5 enable rewrite +as select cmv_basetable.a, cmv_basetable_2.c + from cmv_basetable join cmv_basetable_2 on (cmv_basetable.a = cmv_basetable_2.a) + where cmv_basetable_2.c > 10.0 + group by cmv_basetable.a, cmv_basetable_2.c; + +explain +select cmv_basetable.a +from cmv_basetable join cmv_basetable_2 on (cmv_basetable.a = cmv_basetable_2.a) +where cmv_basetable_2.c > 10.10 +group by cmv_basetable.a, cmv_basetable_2.c; + +select cmv_basetable.a +from cmv_basetable join cmv_basetable_2 on (cmv_basetable.a = cmv_basetable_2.a) +where cmv_basetable_2.c > 10.10 +group by cmv_basetable.a, cmv_basetable_2.c; + +explain +select cmv_basetable.a +from cmv_basetable join cmv_basetable_2 on (cmv_basetable.a = cmv_basetable_2.a) +where cmv_basetable_2.c > 10.10 +group by cmv_basetable.a; + +select cmv_basetable.a +from cmv_basetable join cmv_basetable_2 on (cmv_basetable.a = cmv_basetable_2.a) +where cmv_basetable_2.c > 10.10 +group by cmv_basetable.a; + +drop materialized view cmv_mat_view; +drop materialized view cmv_mat_view_2; +drop materialized view cmv_mat_view_3; +drop materialized view cmv_mat_view_4; +drop materialized view cmv_mat_view_5; http://git-wip-us.apache.org/repos/asf/hive/blob/073e8473/ql/src/test/queries/clientpositive/materialized_view_rewrite_ssb.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/materialized_view_rewrite_ssb.q b/ql/src/test/queries/clientpositive/materialized_view_rewrite_ssb.q new file mode 100644 index 0000000..00da517 --- /dev/null +++ b/ql/src/test/queries/clientpositive/materialized_view_rewrite_ssb.q @@ -0,0 +1,346 @@ +set hive.strict.checks.cartesian.product=false; +set hive.materializedview.rewriting=true; +set hive.stats.column.autogather=true; + +CREATE TABLE `customer`( + `c_custkey` bigint, + `c_name` string, + `c_address` string, + `c_city` string, + `c_nation` string, + `c_region` string, + `c_phone` string, + `c_mktsegment` string, + primary key (`c_custkey`) disable rely) +STORED AS ORC; + +CREATE TABLE `dates`( + `d_datekey` bigint, + `d_date` string, + `d_dayofweek` string, + `d_month` string, + `d_year` int, + `d_yearmonthnum` int, + `d_yearmonth` string, + `d_daynuminweek` int, + `d_daynuminmonth` int, + `d_daynuminyear` int, + `d_monthnuminyear` int, + `d_weeknuminyear` int, + `d_sellingseason` string, + `d_lastdayinweekfl` int, + `d_lastdayinmonthfl` int, + `d_holidayfl` int , + `d_weekdayfl`int, + primary key (`d_datekey`) disable rely +) +STORED AS ORC; + +CREATE TABLE `ssb_part`( + `p_partkey` bigint, + `p_name` string, + `p_mfgr` string, + `p_category` string, + `p_brand1` string, + `p_color` string, + `p_type` string, + `p_size` int, + `p_container` string, + primary key (`p_partkey`) disable rely) +STORED AS ORC; + +CREATE TABLE `supplier`( + `s_suppkey` bigint, + `s_name` string, + `s_address` string, + `s_city` string, + `s_nation` string, + `s_region` string, + `s_phone` string, + primary key (`s_suppkey`) disable rely) +STORED AS ORC; + +CREATE TABLE `lineorder`( + `lo_orderkey` bigint, + `lo_linenumber` int, + `lo_custkey` bigint not null disable rely, + `lo_partkey` bigint not null disable rely, + `lo_suppkey` bigint not null disable rely, + `lo_orderdate` bigint not null disable rely, + `lo_ordpriority` string, + `lo_shippriority` string, + `lo_quantity` double, + `lo_extendedprice` double, + `lo_ordtotalprice` double, + `lo_discount` double, + `lo_revenue` double, + `lo_supplycost` double, + `lo_tax` double, + `lo_commitdate` bigint, + `lo_shipmode` string, + primary key (`lo_orderkey`) disable rely, + constraint fk1 foreign key (`lo_custkey`) references `customer`(`c_custkey`) disable rely, + constraint fk2 foreign key (`lo_orderdate`) references `dates`(`d_datekey`) disable rely, + constraint fk3 foreign key (`lo_partkey`) references `ssb_part`(`p_partkey`) disable rely, + constraint fk4 foreign key (`lo_suppkey`) references `supplier`(`s_suppkey`) disable rely) +STORED AS ORC; + +analyze table customer compute statistics for columns; +analyze table dates compute statistics for columns; +analyze table ssb_part compute statistics for columns; +analyze table supplier compute statistics for columns; +analyze table lineorder compute statistics for columns; + +CREATE MATERIALIZED VIEW `ssb_mv` ENABLE REWRITE +AS +SELECT + c_city, + c_nation, + c_region, + d_weeknuminyear, + d_year, + d_yearmonth, + d_yearmonthnum, + lo_discount, + lo_quantity, + p_brand1, + p_category, + p_mfgr, + s_city, + s_nation, + s_region, + lo_revenue, + lo_extendedprice * lo_discount discounted_price, + lo_revenue - lo_supplycost net_revenue +FROM + customer, dates, lineorder, ssb_part, supplier +where + lo_orderdate = d_datekey + and lo_partkey = p_partkey + and lo_suppkey = s_suppkey + and lo_custkey = c_custkey; + +-- Q1.1 +explain +select + sum(lo_extendedprice*lo_discount) as revenue +from + lineorder, dates +where + lo_orderdate = d_datekey + and d_year = 1993 + and lo_discount between 1 and 3 + and lo_quantity < 25; + +-- Q1.2 +explain +select + sum(lo_extendedprice*lo_discount) as revenue +from + lineorder, dates +where + lo_orderdate = d_datekey + and d_yearmonthnum = 199401 + and lo_discount between 4 and 6 + and lo_quantity between 26 and 35; + +-- Q1.3 +explain +select + sum(lo_extendedprice*lo_discount) as revenue +from + lineorder, dates +where + lo_orderdate = d_datekey + and d_weeknuminyear = 6 + and d_year = 1994 + and lo_discount between 5 and 7 + and lo_quantity between 26 and 35; + +-- Q2.1 +explain +select + sum(lo_revenue) as lo_revenue, d_year, p_brand1 +from + lineorder, dates, ssb_part, supplier +where + lo_orderdate = d_datekey + and lo_partkey = p_partkey + and lo_suppkey = s_suppkey + and p_category = 'MFGR#12' + and s_region = 'AMERICA' +group by + d_year, p_brand1 +order by + d_year, p_brand1; + +-- Q2.2 +explain +select + sum(lo_revenue) as lo_revenue, d_year, p_brand1 +from + lineorder, dates, ssb_part, supplier +where + lo_orderdate = d_datekey + and lo_partkey = p_partkey + and lo_suppkey = s_suppkey + and p_brand1 between 'MFGR#2221' and 'MFGR#2228' + and s_region = 'ASIA' +group by + d_year, p_brand1 +order by + d_year, p_brand1; + +-- Q2.3 +explain +select + sum(lo_revenue) as lo_revenue, d_year, p_brand1 +from + lineorder, dates, ssb_part, supplier +where + lo_orderdate = d_datekey + and lo_partkey = p_partkey + and lo_suppkey = s_suppkey + and p_brand1 = 'MFGR#2239' + and s_region = 'EUROPE' +group by + d_year, p_brand1 +order by + d_year, p_brand1; + +-- Q3.1 +explain +select + c_nation, s_nation, d_year, + sum(lo_revenue) as lo_revenue +from + customer, lineorder, supplier, dates +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_orderdate = d_datekey + and c_region = 'ASIA' + and s_region = 'ASIA' + and d_year >= 1992 and d_year <= 1997 +group by + c_nation, s_nation, d_year +order by + d_year asc, lo_revenue desc; + +-- Q3.2 +explain +select + c_city, s_city, d_year, sum(lo_revenue) as lo_revenue +from + customer, lineorder, supplier, dates +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_orderdate = d_datekey + and c_nation = 'UNITED STATES' + and s_nation = 'UNITED STATES' + and d_year >= 1992 and d_year <= 1997 +group by + c_city, s_city, d_year +order by + d_year asc, lo_revenue desc; + +-- Q3.3 +explain +select + c_city, s_city, d_year, sum(lo_revenue) as lo_revenue +from + customer, lineorder, supplier, dates +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_orderdate = d_datekey + and (c_city='UNITED KI1' or c_city='UNITED KI5') + and (s_city='UNITED KI1' or s_city='UNITED KI5') + and d_year >= 1992 and d_year <= 1997 +group by + c_city, s_city, d_year +order by + d_year asc, lo_revenue desc; + +-- Q3.4 +explain +select + c_city, s_city, d_year, sum(lo_revenue) as lo_revenue +from + customer, lineorder, supplier, dates +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_orderdate = d_datekey + and (c_city='UNITED KI1' or c_city='UNITED KI5') + and (s_city='UNITED KI1' or s_city='UNITED KI5') + and d_yearmonth = 'Dec1997' +group by + c_city, s_city, d_year +order by + d_year asc, lo_revenue desc; + +-- Q4.1 +explain +select + d_year, c_nation, + sum(lo_revenue - lo_supplycost) as profit +from + dates, customer, supplier, ssb_part, lineorder +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_partkey = p_partkey + and lo_orderdate = d_datekey + and c_region = 'AMERICA' + and s_region = 'AMERICA' + and (p_mfgr = 'MFGR#1' or p_mfgr = 'MFGR#2') +group by + d_year, c_nation +order by + d_year, c_nation; + +-- Q4.2 +explain +select + d_year, s_nation, p_category, + sum(lo_revenue - lo_supplycost) as profit +from + dates, customer, supplier, ssb_part, lineorder +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_partkey = p_partkey + and lo_orderdate = d_datekey + and c_region = 'AMERICA' + and s_region = 'AMERICA' + and (d_year = 1997 or d_year = 1998) + and (p_mfgr = 'MFGR#1' or p_mfgr = 'MFGR#2') +group by + d_year, s_nation, p_category +order by + d_year, s_nation, p_category; + +-- Q4.3 +explain +select + d_year, s_city, p_brand1, + sum(lo_revenue - lo_supplycost) as profit +from + dates, customer, supplier, ssb_part, lineorder +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_partkey = p_partkey + and lo_orderdate = d_datekey + and c_region = 'AMERICA' + and s_nation = 'UNITED STATES' + and (d_year = 1997 or d_year = 1998) + and p_category = 'MFGR#14' +group by + d_year, s_city, p_brand1 +order by + d_year, s_city, p_brand1; + +DROP MATERIALIZED VIEW `ssb_mv`; http://git-wip-us.apache.org/repos/asf/hive/blob/073e8473/ql/src/test/queries/clientpositive/materialized_view_rewrite_ssb_2.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/materialized_view_rewrite_ssb_2.q b/ql/src/test/queries/clientpositive/materialized_view_rewrite_ssb_2.q new file mode 100644 index 0000000..d38641f --- /dev/null +++ b/ql/src/test/queries/clientpositive/materialized_view_rewrite_ssb_2.q @@ -0,0 +1,347 @@ +set hive.strict.checks.cartesian.product=false; +set hive.materializedview.rewriting=true; +set hive.stats.column.autogather=true; + +CREATE TABLE `customer`( + `c_custkey` bigint, + `c_name` string, + `c_address` string, + `c_city` string, + `c_nation` string, + `c_region` string, + `c_phone` string, + `c_mktsegment` string, + primary key (`c_custkey`) disable rely) +STORED AS ORC; + +CREATE TABLE `dates`( + `d_datekey` bigint, + `d_date` string, + `d_dayofweek` string, + `d_month` string, + `d_year` int, + `d_yearmonthnum` int, + `d_yearmonth` string, + `d_daynuminweek` int, + `d_daynuminmonth` int, + `d_daynuminyear` int, + `d_monthnuminyear` int, + `d_weeknuminyear` int, + `d_sellingseason` string, + `d_lastdayinweekfl` int, + `d_lastdayinmonthfl` int, + `d_holidayfl` int , + `d_weekdayfl`int, + primary key (`d_datekey`) disable rely +) +STORED AS ORC; + +CREATE TABLE `ssb_part`( + `p_partkey` bigint, + `p_name` string, + `p_mfgr` string, + `p_category` string, + `p_brand1` string, + `p_color` string, + `p_type` string, + `p_size` int, + `p_container` string, + primary key (`p_partkey`) disable rely) +STORED AS ORC; + +CREATE TABLE `supplier`( + `s_suppkey` bigint, + `s_name` string, + `s_address` string, + `s_city` string, + `s_nation` string, + `s_region` string, + `s_phone` string, + primary key (`s_suppkey`) disable rely) +STORED AS ORC; + +CREATE TABLE `lineorder`( + `lo_orderkey` bigint, + `lo_linenumber` int, + `lo_custkey` bigint not null disable rely, + `lo_partkey` bigint not null disable rely, + `lo_suppkey` bigint not null disable rely, + `lo_orderdate` bigint not null disable rely, + `lo_ordpriority` string, + `lo_shippriority` string, + `lo_quantity` double, + `lo_extendedprice` double, + `lo_ordtotalprice` double, + `lo_discount` double, + `lo_revenue` double, + `lo_supplycost` double, + `lo_tax` double, + `lo_commitdate` bigint, + `lo_shipmode` string, + primary key (`lo_orderkey`) disable rely, + constraint fk1 foreign key (`lo_custkey`) references `customer`(`c_custkey`) disable rely, + constraint fk2 foreign key (`lo_orderdate`) references `dates`(`d_datekey`) disable rely, + constraint fk3 foreign key (`lo_partkey`) references `ssb_part`(`p_partkey`) disable rely, + constraint fk4 foreign key (`lo_suppkey`) references `supplier`(`s_suppkey`) disable rely) +STORED AS ORC; + +analyze table customer compute statistics for columns; +analyze table dates compute statistics for columns; +analyze table ssb_part compute statistics for columns; +analyze table supplier compute statistics for columns; +analyze table lineorder compute statistics for columns; + +CREATE MATERIALIZED VIEW `ssb_mv` ENABLE REWRITE +AS +SELECT + cast(d_year || '-' || d_monthnuminyear || '-' || d_daynuminmonth as timestamp) as `__time`, + c_city, + c_nation, + c_region, + cast(d_weeknuminyear as string) d_weeknuminyear, + cast(d_year as string) d_year, + d_yearmonth, + cast(d_yearmonthnum as string) d_yearmonthnum, + cast(lo_discount as string) lo_discount, + cast(lo_quantity as string) lo_quantity, + p_brand1, + p_category, + p_mfgr, + s_city, + s_nation, + s_region, + lo_revenue, + lo_extendedprice * lo_discount discounted_price, + lo_revenue - lo_supplycost net_revenue +FROM + customer, dates, lineorder, ssb_part, supplier +where + lo_orderdate = d_datekey + and lo_partkey = p_partkey + and lo_suppkey = s_suppkey + and lo_custkey = c_custkey; + +-- Q1.1 +explain +select + sum(lo_extendedprice*lo_discount) as revenue +from + lineorder, dates +where + lo_orderdate = d_datekey + and d_year = 1993 + and lo_discount between 1 and 3 + and lo_quantity < 25; + +-- Q1.2 +explain +select + sum(lo_extendedprice*lo_discount) as revenue +from + lineorder, dates +where + lo_orderdate = d_datekey + and d_yearmonthnum = 199401 + and lo_discount between 4 and 6 + and lo_quantity between 26 and 35; + +-- Q1.3 +explain +select + sum(lo_extendedprice*lo_discount) as revenue +from + lineorder, dates +where + lo_orderdate = d_datekey + and d_weeknuminyear = 6 + and d_year = 1994 + and lo_discount between 5 and 7 + and lo_quantity between 26 and 35; + +-- Q2.1 +explain +select + sum(lo_revenue) as lo_revenue, d_year, p_brand1 +from + lineorder, dates, ssb_part, supplier +where + lo_orderdate = d_datekey + and lo_partkey = p_partkey + and lo_suppkey = s_suppkey + and p_category = 'MFGR#12' + and s_region = 'AMERICA' +group by + d_year, p_brand1 +order by + d_year, p_brand1; + +-- Q2.2 +explain +select + sum(lo_revenue) as lo_revenue, d_year, p_brand1 +from + lineorder, dates, ssb_part, supplier +where + lo_orderdate = d_datekey + and lo_partkey = p_partkey + and lo_suppkey = s_suppkey + and p_brand1 between 'MFGR#2221' and 'MFGR#2228' + and s_region = 'ASIA' +group by + d_year, p_brand1 +order by + d_year, p_brand1; + +-- Q2.3 +explain +select + sum(lo_revenue) as lo_revenue, d_year, p_brand1 +from + lineorder, dates, ssb_part, supplier +where + lo_orderdate = d_datekey + and lo_partkey = p_partkey + and lo_suppkey = s_suppkey + and p_brand1 = 'MFGR#2239' + and s_region = 'EUROPE' +group by + d_year, p_brand1 +order by + d_year, p_brand1; + +-- Q3.1 +explain +select + c_nation, s_nation, d_year, + sum(lo_revenue) as lo_revenue +from + customer, lineorder, supplier, dates +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_orderdate = d_datekey + and c_region = 'ASIA' + and s_region = 'ASIA' + and d_year >= 1992 and d_year <= 1997 +group by + c_nation, s_nation, d_year +order by + d_year asc, lo_revenue desc; + +-- Q3.2 +explain +select + c_city, s_city, d_year, sum(lo_revenue) as lo_revenue +from + customer, lineorder, supplier, dates +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_orderdate = d_datekey + and c_nation = 'UNITED STATES' + and s_nation = 'UNITED STATES' + and d_year >= 1992 and d_year <= 1997 +group by + c_city, s_city, d_year +order by + d_year asc, lo_revenue desc; + +-- Q3.3 +explain +select + c_city, s_city, d_year, sum(lo_revenue) as lo_revenue +from + customer, lineorder, supplier, dates +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_orderdate = d_datekey + and (c_city='UNITED KI1' or c_city='UNITED KI5') + and (s_city='UNITED KI1' or s_city='UNITED KI5') + and d_year >= 1992 and d_year <= 1997 +group by + c_city, s_city, d_year +order by + d_year asc, lo_revenue desc; + +-- Q3.4 +explain +select + c_city, s_city, d_year, sum(lo_revenue) as lo_revenue +from + customer, lineorder, supplier, dates +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_orderdate = d_datekey + and (c_city='UNITED KI1' or c_city='UNITED KI5') + and (s_city='UNITED KI1' or s_city='UNITED KI5') + and d_yearmonth = 'Dec1997' +group by + c_city, s_city, d_year +order by + d_year asc, lo_revenue desc; + +-- Q4.1 +explain +select + d_year, c_nation, + sum(lo_revenue - lo_supplycost) as profit +from + dates, customer, supplier, ssb_part, lineorder +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_partkey = p_partkey + and lo_orderdate = d_datekey + and c_region = 'AMERICA' + and s_region = 'AMERICA' + and (p_mfgr = 'MFGR#1' or p_mfgr = 'MFGR#2') +group by + d_year, c_nation +order by + d_year, c_nation; + +-- Q4.2 +explain +select + d_year, s_nation, p_category, + sum(lo_revenue - lo_supplycost) as profit +from + dates, customer, supplier, ssb_part, lineorder +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_partkey = p_partkey + and lo_orderdate = d_datekey + and c_region = 'AMERICA' + and s_region = 'AMERICA' + and (d_year = 1997 or d_year = 1998) + and (p_mfgr = 'MFGR#1' or p_mfgr = 'MFGR#2') +group by + d_year, s_nation, p_category +order by + d_year, s_nation, p_category; + +-- Q4.3 +explain +select + d_year, s_city, p_brand1, + sum(lo_revenue - lo_supplycost) as profit +from + dates, customer, supplier, ssb_part, lineorder +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_partkey = p_partkey + and lo_orderdate = d_datekey + and c_region = 'AMERICA' + and s_nation = 'UNITED STATES' + and (d_year = 1997 or d_year = 1998) + and p_category = 'MFGR#14' +group by + d_year, s_city, p_brand1 +order by + d_year, s_city, p_brand1; + +DROP MATERIALIZED VIEW `ssb_mv`;