quickstep-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jianq...@apache.org
Subject incubator-quickstep git commit: QUICKSTEP-125: Fixed the non-determinism in JoinReordering due to pointer cmp.
Date Mon, 07 May 2018 04:34:31 GMT
Repository: incubator-quickstep
Updated Branches:
  refs/heads/master 3a7b9c796 -> 185c17ca8


QUICKSTEP-125: Fixed the non-determinism in JoinReordering due to pointer cmp.


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/185c17ca
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/185c17ca
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/185c17ca

Branch: refs/heads/master
Commit: 185c17ca8ac9a44413b678924b9e63ebdac664df
Parents: 3a7b9c7
Author: Zuyu Zhang <zuyu@cs.wisc.edu>
Authored: Sat May 5 14:21:53 2018 -0500
Committer: Zuyu Zhang <zuyu@cs.wisc.edu>
Committed: Sat May 5 14:29:42 2018 -0500

----------------------------------------------------------------------
 .../StarSchemaHashJoinOrderOptimization.cpp     | 30 +++++++------
 .../StarSchemaHashJoinOrderOptimization.hpp     | 45 +++++++++++++-------
 2 files changed, 48 insertions(+), 27 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/185c17ca/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp
index 1007dd5..78c0caf 100644
--- a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp
+++ b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp
@@ -30,6 +30,7 @@
 
 #include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
 #include "query_optimizer/expressions/AttributeReference.hpp"
+#include "query_optimizer/expressions/ExprId.hpp"
 #include "query_optimizer/expressions/NamedExpression.hpp"
 #include "query_optimizer/expressions/PatternMatcher.hpp"
 #include "query_optimizer/physical/Aggregate.hpp"
@@ -68,7 +69,7 @@ P::PhysicalPtr StarSchemaHashJoinOrderOptimization::applyInternal(const
P::Physi
     bool is_valid_cascading_hash_join = false;
     if (hash_join->residual_predicate() == nullptr) {
       is_valid_cascading_hash_join = true;
-      for (const E::NamedExpressionPtr expr : hash_join->project_expressions()) {
+      for (const E::NamedExpressionPtr &expr : hash_join->project_expressions()) {
         if (!E::SomeAttributeReference::Matches(expr)) {
           is_valid_cascading_hash_join = false;
           break;
@@ -95,8 +96,8 @@ P::PhysicalPtr StarSchemaHashJoinOrderOptimization::applyInternal(const
P::Physi
 
     // Gather join attribute pairs.
     for (std::size_t i = 0; i < hash_join->left_join_attributes().size(); ++i) {
-      const std::size_t left_attr_id = hash_join->left_join_attributes()[i]->id();
-      const std::size_t right_attr_id = hash_join->right_join_attributes()[i]->id();
+      const E::ExprId left_attr_id = hash_join->left_join_attributes()[i]->id();
+      const E::ExprId right_attr_id = hash_join->right_join_attributes()[i]->id();
 
       join_group->join_attribute_pairs.emplace_back(left_attr_id, right_attr_id);
     }
@@ -150,7 +151,7 @@ physical::PhysicalPtr StarSchemaHashJoinOrderOptimization::generatePlan(
 
   std::vector<TableInfo> table_info_storage;
   const std::vector<P::PhysicalPtr> &tables = join_group.tables;
-  for (std::size_t i = 0; i < join_group.tables.size(); ++i) {
+  for (std::size_t i = 0; i < tables.size(); ++i) {
     table_info_storage.emplace_back(
         i,
         tables[i],
@@ -279,16 +280,8 @@ physical::PhysicalPtr StarSchemaHashJoinOrderOptimization::generatePlan(
     remaining_tables.erase(selected_probe_table_info);
     remaining_tables.erase(selected_build_table_info);
 
-    // Figure out the output attributes.
     const P::PhysicalPtr &probe_child = selected_probe_table_info->table;
     const P::PhysicalPtr &build_child = selected_build_table_info->table;
-    std::vector<E::NamedExpressionPtr> output_attributes;
-    for (const E::AttributeReferencePtr &probe_attr : probe_child->getOutputAttributes())
{
-      output_attributes.emplace_back(probe_attr);
-    }
-    for (const E::AttributeReferencePtr &build_attr : build_child->getOutputAttributes())
{
-      output_attributes.emplace_back(build_attr);
-    }
 
     // Figure out the join attributes.
     std::vector<E::AttributeReferencePtr> probe_attributes;
@@ -311,6 +304,19 @@ physical::PhysicalPtr StarSchemaHashJoinOrderOptimization::generatePlan(
     // the table pool. Return the last table in the table pool if there is only
     // one table left.
     if (remaining_tables.size() > 0) {
+      const auto probe_output_attributes = probe_child->getOutputAttributes();
+      const auto build_output_attributes = build_child->getOutputAttributes();
+
+      // Figure out the output attributes.
+      std::vector<E::NamedExpressionPtr> output_attributes;
+      output_attributes.reserve(probe_output_attributes.size() + build_output_attributes.size());
+      for (const E::AttributeReferencePtr &probe_attr : probe_output_attributes) {
+        output_attributes.emplace_back(probe_attr);
+      }
+      for (const E::AttributeReferencePtr &build_attr : build_output_attributes) {
+        output_attributes.emplace_back(build_attr);
+      }
+
       P::PhysicalPtr output =
           P::HashJoin::Create(probe_child,
                               build_child,

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/185c17ca/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp
index 64e2478..4dc9397 100644
--- a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp
+++ b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp
@@ -109,15 +109,20 @@ class StarSchemaHashJoinOrderOptimization : public Rule<physical::Physical>
{
 
     inline bool isBetterThan(const JoinPair &rhs) const {
       const auto &lhs = *this;
+      const TableInfo &lhs_probe = *(lhs.probe);
+      const TableInfo &lhs_build = *(lhs.build);
+
+      const TableInfo &rhs_probe = *(rhs.probe);
+      const TableInfo &rhs_build = *(rhs.build);
 
       // Avoid carrying too many output attributes all the way through a long
       // chain of hash joins.
       const bool lhs_has_large_output =
-          lhs.build->estimated_num_output_attributes
-              + lhs.probe->estimated_num_output_attributes > 5;
+          lhs_build.estimated_num_output_attributes
+              + lhs_probe.estimated_num_output_attributes > 5;
       const bool rhs_has_large_output =
-          rhs.build->estimated_num_output_attributes
-              + rhs.probe->estimated_num_output_attributes > 5;
+          rhs_build.estimated_num_output_attributes
+              + rhs_probe.estimated_num_output_attributes > 5;
       if (lhs_has_large_output != rhs_has_large_output) {
         return rhs_has_large_output;
       }
@@ -127,33 +132,43 @@ class StarSchemaHashJoinOrderOptimization : public Rule<physical::Physical>
{
         return lhs.build_side_unique;
       }
 
+      // Prefer hash joins where the build side table has no output attributes.
+      const bool lhs_build_has_no_output = lhs_build.estimated_num_output_attributes == 0u;
+      const bool rhs_build_has_no_output = rhs_build.estimated_num_output_attributes == 0u;
+      if (lhs_build_has_no_output != rhs_build_has_no_output) {
+        return lhs_build_has_no_output;
+      }
+
       // Prefer hash joins where the build side table is small.
-      const bool lhs_has_small_build = lhs.build->estimated_cardinality < 0x100;
-      const bool rhs_has_small_build = rhs.build->estimated_cardinality < 0x100;
+      const bool lhs_has_small_build = lhs_build.estimated_cardinality < 0x100;
+      const bool rhs_has_small_build = rhs_build.estimated_cardinality < 0x100;
       if (lhs_has_small_build != rhs_has_small_build) {
         return lhs_has_small_build;
       }
 
       // Prefer hash joins where the probe side table is small. This is effective
       // for TPCH style (snowflake schema) queries, with the help of LIPFilters.
-      if (lhs.probe->estimated_cardinality != rhs.probe->estimated_cardinality) {
-        return lhs.probe->estimated_cardinality < rhs.probe->estimated_cardinality;
+      if (lhs_probe.estimated_cardinality != rhs_probe.estimated_cardinality) {
+        return lhs_probe.estimated_cardinality < rhs_probe.estimated_cardinality;
       }
 
       // Prefer build side tables with better selectivity. This is effective
       // for SSB style queries.
-      if (lhs.build->estimated_selectivity != rhs.build->estimated_selectivity) {
-        return lhs.build->estimated_selectivity < rhs.build->estimated_selectivity;
+      if (lhs_build.estimated_selectivity != rhs_build.estimated_selectivity) {
+        return lhs_build.estimated_selectivity < rhs_build.estimated_selectivity;
       }
 
       // Residual rules that help provide a total order.
-      if (lhs.build->estimated_cardinality != rhs.build->estimated_cardinality) {
-        return lhs.build->estimated_cardinality < rhs.build->estimated_cardinality;
+      if (lhs_build.estimated_cardinality != rhs_build.estimated_cardinality) {
+        return lhs_build.estimated_cardinality < rhs_build.estimated_cardinality;
       }
-      if (lhs.probe->table != rhs.probe->table) {
-        return lhs.probe->table < rhs.probe->table;
+
+      const std::size_t lhs_probe_table_info_id = lhs_probe.table_info_id;
+      const std::size_t rhs_probe_table_info_id = rhs_probe.table_info_id;
+      if (lhs_probe_table_info_id != rhs_probe_table_info_id) {
+        return lhs_probe_table_info_id < rhs_probe_table_info_id;
       } else {
-        return lhs.build->table < rhs.build->table;
+        return lhs_build.table_info_id < rhs_build.table_info_id;
       }
     }
 


Mime
View raw message