quickstep-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jianq...@apache.org
Subject [1/3] incubator-quickstep git commit: Minor bug fixes and refactors.
Date Wed, 09 May 2018 21:20:03 GMT
Repository: incubator-quickstep
Updated Branches:
  refs/heads/master 06982e9a6 -> 42588d433


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/42588d43/parser/preprocessed/SqlParser_gen.hpp
----------------------------------------------------------------------
diff --git a/parser/preprocessed/SqlParser_gen.hpp b/parser/preprocessed/SqlParser_gen.hpp
index 142059d..dd2728d 100644
--- a/parser/preprocessed/SqlParser_gen.hpp
+++ b/parser/preprocessed/SqlParser_gen.hpp
@@ -78,15 +78,15 @@ extern int quickstep_yydebug;
     TOKEN_BLOCKPROPERTIES = 288,
     TOKEN_BLOCKSAMPLE = 289,
     TOKEN_BLOOM_FILTER = 290,
-    TOKEN_CSB_TREE = 291,
-    TOKEN_BY = 292,
-    TOKEN_CASE = 293,
-    TOKEN_CHARACTER = 294,
-    TOKEN_CHECK = 295,
-    TOKEN_COLUMN = 296,
-    TOKEN_CONSTRAINT = 297,
-    TOKEN_COPY = 298,
-    TOKEN_CREATE = 299,
+    TOKEN_BY = 291,
+    TOKEN_CASE = 292,
+    TOKEN_CHARACTER = 293,
+    TOKEN_CHECK = 294,
+    TOKEN_COLUMN = 295,
+    TOKEN_CONSTRAINT = 296,
+    TOKEN_COPY = 297,
+    TOKEN_CREATE = 298,
+    TOKEN_CSB_TREE = 299,
     TOKEN_CURRENT = 300,
     TOKEN_DATE = 301,
     TOKEN_DATETIME = 302,
@@ -100,85 +100,85 @@ extern int quickstep_yydebug;
     TOKEN_DROP = 310,
     TOKEN_ELSE = 311,
     TOKEN_END = 312,
-    TOKEN_EXISTS = 313,
-    TOKEN_EXTRACT = 314,
-    TOKEN_FALSE = 315,
-    TOKEN_FIRST = 316,
-    TOKEN_FLOAT = 317,
-    TOKEN_FOLLOWING = 318,
-    TOKEN_FOR = 319,
-    TOKEN_FOREIGN = 320,
-    TOKEN_FROM = 321,
-    TOKEN_FULL = 322,
-    TOKEN_GROUP = 323,
-    TOKEN_HASH = 324,
-    TOKEN_HAVING = 325,
-    TOKEN_HOUR = 326,
-    TOKEN_IN = 327,
-    TOKEN_INDEX = 328,
-    TOKEN_INNER = 329,
-    TOKEN_INSERT = 330,
-    TOKEN_INTEGER = 331,
-    TOKEN_INTERVAL = 332,
-    TOKEN_INTO = 333,
-    TOKEN_JOIN = 334,
-    TOKEN_KEY = 335,
-    TOKEN_LAST = 336,
-    TOKEN_LEFT = 337,
-    TOKEN_LIMIT = 338,
-    TOKEN_LONG = 339,
-    TOKEN_MINUTE = 340,
-    TOKEN_MONTH = 341,
-    TOKEN_NULL = 342,
-    TOKEN_NULLS = 343,
-    TOKEN_OFF = 344,
-    TOKEN_ON = 345,
-    TOKEN_ORDER = 346,
-    TOKEN_OUTER = 347,
-    TOKEN_OVER = 348,
-    TOKEN_PARTITION = 349,
-    TOKEN_PARTITIONS = 350,
-    TOKEN_PERCENT = 351,
-    TOKEN_PRECEDING = 352,
-    TOKEN_PRIMARY = 353,
-    TOKEN_PRIORITY = 354,
-    TOKEN_QUIT = 355,
-    TOKEN_RANGE = 356,
-    TOKEN_REAL = 357,
-    TOKEN_REFERENCES = 358,
-    TOKEN_RIGHT = 359,
-    TOKEN_ROW = 360,
-    TOKEN_ROW_DELIMITER = 361,
-    TOKEN_ROWS = 362,
-    TOKEN_SECOND = 363,
-    TOKEN_SELECT = 364,
-    TOKEN_SET = 365,
-    TOKEN_SMA = 366,
-    TOKEN_SMALLINT = 367,
-    TOKEN_STDERR = 368,
-    TOKEN_STDOUT = 369,
-    TOKEN_SUBSTRING = 370,
-    TOKEN_TABLE = 371,
-    TOKEN_THEN = 372,
-    TOKEN_TIME = 373,
-    TOKEN_TIMESTAMP = 374,
-    TOKEN_TO = 375,
-    TOKEN_TRUE = 376,
-    TOKEN_TUPLESAMPLE = 377,
-    TOKEN_UNBOUNDED = 378,
-    TOKEN_UNIQUE = 379,
-    TOKEN_UPDATE = 380,
-    TOKEN_USING = 381,
-    TOKEN_VALUES = 382,
-    TOKEN_VARCHAR = 383,
-    TOKEN_WHEN = 384,
-    TOKEN_WHERE = 385,
-    TOKEN_WINDOW = 386,
-    TOKEN_WITH = 387,
-    TOKEN_YEAR = 388,
-    TOKEN_YEARMONTH = 389,
-    TOKEN_EOF = 390,
-    TOKEN_LEX_ERROR = 391
+    TOKEN_EOF = 313,
+    TOKEN_EXISTS = 314,
+    TOKEN_EXTRACT = 315,
+    TOKEN_FALSE = 316,
+    TOKEN_FIRST = 317,
+    TOKEN_FLOAT = 318,
+    TOKEN_FOLLOWING = 319,
+    TOKEN_FOR = 320,
+    TOKEN_FOREIGN = 321,
+    TOKEN_FROM = 322,
+    TOKEN_FULL = 323,
+    TOKEN_GROUP = 324,
+    TOKEN_HASH = 325,
+    TOKEN_HAVING = 326,
+    TOKEN_HOUR = 327,
+    TOKEN_IN = 328,
+    TOKEN_INDEX = 329,
+    TOKEN_INNER = 330,
+    TOKEN_INSERT = 331,
+    TOKEN_INTEGER = 332,
+    TOKEN_INTERVAL = 333,
+    TOKEN_INTO = 334,
+    TOKEN_JOIN = 335,
+    TOKEN_KEY = 336,
+    TOKEN_LAST = 337,
+    TOKEN_LEFT = 338,
+    TOKEN_LEX_ERROR = 339,
+    TOKEN_LIMIT = 340,
+    TOKEN_LONG = 341,
+    TOKEN_MINUTE = 342,
+    TOKEN_MONTH = 343,
+    TOKEN_NULL = 344,
+    TOKEN_NULLS = 345,
+    TOKEN_OFF = 346,
+    TOKEN_ON = 347,
+    TOKEN_ORDER = 348,
+    TOKEN_OUTER = 349,
+    TOKEN_OVER = 350,
+    TOKEN_PARTITION = 351,
+    TOKEN_PARTITIONS = 352,
+    TOKEN_PERCENT = 353,
+    TOKEN_PRECEDING = 354,
+    TOKEN_PRIMARY = 355,
+    TOKEN_PRIORITY = 356,
+    TOKEN_QUIT = 357,
+    TOKEN_RANGE = 358,
+    TOKEN_REAL = 359,
+    TOKEN_REFERENCES = 360,
+    TOKEN_RIGHT = 361,
+    TOKEN_ROW = 362,
+    TOKEN_ROWS = 363,
+    TOKEN_ROW_DELIMITER = 364,
+    TOKEN_SECOND = 365,
+    TOKEN_SELECT = 366,
+    TOKEN_SET = 367,
+    TOKEN_SMA = 368,
+    TOKEN_SMALLINT = 369,
+    TOKEN_STDERR = 370,
+    TOKEN_STDOUT = 371,
+    TOKEN_SUBSTRING = 372,
+    TOKEN_TABLE = 373,
+    TOKEN_THEN = 374,
+    TOKEN_TIME = 375,
+    TOKEN_TIMESTAMP = 376,
+    TOKEN_TO = 377,
+    TOKEN_TRUE = 378,
+    TOKEN_TUPLESAMPLE = 379,
+    TOKEN_UNBOUNDED = 380,
+    TOKEN_UNIQUE = 381,
+    TOKEN_UPDATE = 382,
+    TOKEN_USING = 383,
+    TOKEN_VALUES = 384,
+    TOKEN_VARCHAR = 385,
+    TOKEN_WHEN = 386,
+    TOKEN_WHERE = 387,
+    TOKEN_WINDOW = 388,
+    TOKEN_WITH = 389,
+    TOKEN_YEAR = 390,
+    TOKEN_YEARMONTH = 391
   };
 #endif
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/42588d43/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp b/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
index 6ab86e5..6b506be 100644
--- a/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
+++ b/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
@@ -287,6 +287,7 @@ std::size_t StarSchemaSimpleCostModel::estimateNumDistinctValues(
         return static_cast<std::size_t>(
             left_child_num_distinct_values * right_child_selectivity + 0.5);
       }
+      break;
     }
     case P::PhysicalType::kHashJoin: {
       const P::HashJoinPtr &hash_join =
@@ -447,7 +448,7 @@ double StarSchemaSimpleCostModel::estimateSelectivityForPredicate(
           if (E::ContainsExprId(child->getOutputAttributes(), attr->id())) {
             const std::size_t child_num_distinct_values = estimateNumDistinctValues(attr->id(),
child);
             if (comparison_expression->isEqualityComparisonPredicate()) {
-              return 1.0 / child_num_distinct_values;
+              return 1.0 / std::max(child_num_distinct_values, static_cast<std::size_t>(1u));
             } else {
               return 1.0 / std::max(std::min(child_num_distinct_values / 100.0, 10.0), 2.0);
             }

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/42588d43/query_optimizer/resolver/Resolver.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/resolver/Resolver.cpp b/query_optimizer/resolver/Resolver.cpp
index b07cf10..6e03b83 100644
--- a/query_optimizer/resolver/Resolver.cpp
+++ b/query_optimizer/resolver/Resolver.cpp
@@ -1523,18 +1523,22 @@ L::LogicalPtr Resolver::resolveSetOperations(
   std::vector<const ParseSetOperation*> operands;
   CollapseSetOperation(parse_set_operations, parse_set_operations, &operands);
 
-  DCHECK_LT(1u, operands.size());
+  const std::size_t num_operands = operands.size();
+  DCHECK_LT(1u, num_operands);
   std::vector<L::LogicalPtr> resolved_operations;
+  resolved_operations.reserve(num_operands);
   std::vector<std::vector<E::AttributeReferencePtr>> attribute_matrix;
+  attribute_matrix.reserve(num_operands);
 
   // Resolve the first operation, and get the output attributes.
   auto iter = operands.begin();
   const ParseSetOperation &operation = static_cast<const ParseSetOperation&>(**iter);
   L::LogicalPtr operation_logical =
       resolveSetOperation(operation, set_operation_name, type_hints, parent_resolver);
-  const std::vector<E::AttributeReferencePtr> operation_attributes =
+  std::vector<E::AttributeReferencePtr> operation_attributes =
       operation_logical->getOutputAttributes();
-  attribute_matrix.push_back(operation_attributes);
+  const std::size_t num_operation_attributes = operation_attributes.size();
+  attribute_matrix.push_back(std::move(operation_attributes));
   resolved_operations.push_back(operation_logical);
 
   // Resolve the rest operations, and check the size of output attributes.
@@ -1547,22 +1551,25 @@ L::LogicalPtr Resolver::resolveSetOperations(
 
     // Check output attributes size.
     // Detailed type check and type cast will perform later.
-    if (attribute_matrix.back().size() != operation_attributes.size()) {
+    if (attribute_matrix.back().size() != num_operation_attributes) {
       THROW_SQL_ERROR_AT(&current_operation)
           << "Can not perform " << parse_set_operations.getName()
           << "opeartion between " << std::to_string(attribute_matrix.back().size())
-          << "and " << std::to_string(operation_attributes.size())
+          << "and " << std::to_string(num_operation_attributes)
           << "columns";
     }
 
     resolved_operations.push_back(current_logical);
   }
+  DCHECK_EQ(num_operands, attribute_matrix.size());
+  DCHECK_EQ(num_operands, resolved_operations.size());
 
   // Get the possible output attributes that the attributes of all operands can cast to.
   std::vector<E::AttributeReferencePtr> possible_attributes;
-  for (std::size_t aid = 0; aid < operation_attributes.size(); ++aid) {
+  possible_attributes.reserve(num_operation_attributes);
+  for (std::size_t aid = 0; aid < num_operation_attributes; ++aid) {
     E::AttributeReferencePtr possible_attribute = attribute_matrix[0][aid];
-    for (std::size_t opid = 1; opid < resolved_operations.size(); ++opid) {
+    for (std::size_t opid = 1; opid < num_operands; ++opid) {
       const Type &current_type = attribute_matrix[opid][aid]->getValueType();
       const Type &possible_type = possible_attribute->getValueType();
       if (!possible_type.equals(current_type)) {
@@ -1593,10 +1600,10 @@ L::LogicalPtr Resolver::resolveSetOperations(
     possible_attributes.push_back(possible_attribute);
   }
 
-  for (std::size_t opid = 0; opid < operation_attributes.size(); ++opid) {
+  for (std::size_t opid = 0; opid < num_operation_attributes; ++opid) {
     // Generate a cast operation if needed.
     std::vector<E::NamedExpressionPtr> cast_expressions;
-    for (std::size_t aid = 0; aid < operation_attributes.size(); ++aid) {
+    for (std::size_t aid = 0; aid < num_operation_attributes; ++aid) {
       const E::AttributeReferencePtr current_attr = attribute_matrix[opid][aid];
       const Type &current_type = current_attr->getValueType();
       const Type &possible_type = possible_attributes[aid]->getValueType();
@@ -1635,10 +1642,11 @@ L::LogicalPtr Resolver::resolveSetOperations(
     case ParseSetOperation::kUnionAll:
       return L::SetOperation::Create(
           L::SetOperation::kUnionAll, resolved_operations, output_attributes);
-    default:
-      LOG(FATAL) << "Unknown operation: " << parse_set_operations.toString();
-      return nullptr;
+    case ParseSetOperation::kSelect:
+      LOG(FATAL) << "Unexpected operation: " << parse_set_operations.toString();
   }
+  LOG(FATAL) << "Unreachable";
+  return nullptr;
 }
 
 L::LogicalPtr Resolver::resolveSetOperation(
@@ -1664,10 +1672,9 @@ L::LogicalPtr Resolver::resolveSetOperation(
                            type_hints,
                            parent_resolver);
     }
-    default:
-      LOG(FATAL) << "Unknown set operation: " << set_operation_query.toString();
-      return nullptr;
   }
+  LOG(FATAL) << "Unreachable";
+  return nullptr;
 }
 
 E::SubqueryExpressionPtr Resolver::resolveSubqueryExpression(

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/42588d43/query_optimizer/rules/InjectJoinFilters.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/InjectJoinFilters.cpp b/query_optimizer/rules/InjectJoinFilters.cpp
index 90e81d5..a165377 100644
--- a/query_optimizer/rules/InjectJoinFilters.cpp
+++ b/query_optimizer/rules/InjectJoinFilters.cpp
@@ -49,6 +49,28 @@ namespace optimizer {
 namespace E = ::quickstep::optimizer::expressions;
 namespace P = ::quickstep::optimizer::physical;
 
+namespace {
+
+P::PhysicalPtr wrapSelection(const P::PhysicalPtr &input) {
+  DCHECK(P::SomeTopLevelPlan::Matches(input));
+  const P::TopLevelPlanPtr &top_level_plan =
+      std::static_pointer_cast<const P::TopLevelPlan>(input);
+  const P::PhysicalPtr &plan = top_level_plan->plan();
+
+  if (P::SomeFilterJoin::Matches(plan)) {
+    return input;
+  }
+
+  const P::SelectionPtr selection =
+      P::Selection::Create(
+          plan,
+          E::ToNamedExpressions(top_level_plan->plan()->getOutputAttributes()),
+          nullptr /* filter_predicate */);
+  return input->copyWithNewChildren({ selection });
+}
+
+}  // namespace
+
 P::PhysicalPtr InjectJoinFilters::apply(const P::PhysicalPtr &input) {
   DCHECK(input->getPhysicalType() == P::PhysicalType::kTopLevelPlan);
 
@@ -62,17 +84,25 @@ P::PhysicalPtr InjectJoinFilters::apply(const P::PhysicalPtr &input)
{
   // Step 1. Transform applicable HashJoin nodes to FilterJoin nodes.
   P::PhysicalPtr output = transformHashJoinToFilters(input);
 
-  // Step 2. Push down FilterJoin nodes to be evaluated early.
+  if (output == input) {
+    return input;
+  }
+
+  // Step 2. If the top level plan is a filter join, wrap it with a Selection
+  // to stabilize output columns.
+  output = wrapSelection(output);
+
+  // Step 3. Push down FilterJoin nodes to be evaluated early.
   output = pushDownFilters(output);
 
-  // Step 3. Add Selection nodes for attaching the LIPFilters, if necessary.
+  // Step 4. Add Selection nodes for attaching the LIPFilters, if necessary.
   output = addFilterAnchors(output, false);
 
-  // Step 4. Because of the pushdown of FilterJoin nodes, there are optimization
+  // Step 5. Because of the pushdown of FilterJoin nodes, there are optimization
   // opportunities for projecting columns early.
   output = PruneColumns().apply(output);
 
-  // Step 5. For each FilterJoin node, attach its corresponding LIPFilter to
+  // Step 6. For each FilterJoin node, attach its corresponding LIPFilter to
   // proper nodes.
   concretizeAsLIPFilters(output, nullptr);
 
@@ -146,13 +176,8 @@ bool InjectJoinFilters::isTransformable(
 P::PhysicalPtr InjectJoinFilters::transformHashJoinToFilters(
     const P::PhysicalPtr &input) const {
   std::vector<P::PhysicalPtr> new_children;
-  bool has_changed_children = false;
   for (const P::PhysicalPtr &child : input->children()) {
-    const P::PhysicalPtr new_child = transformHashJoinToFilters(child);
-    if (child != new_child && !has_changed_children) {
-      has_changed_children = true;
-    }
-    new_children.push_back(new_child);
+    new_children.emplace_back(transformHashJoinToFilters(child));
   }
 
   P::HashJoinPtr hash_join;
@@ -187,7 +212,7 @@ P::PhysicalPtr InjectJoinFilters::transformHashJoinToFilters(
                                  hash_join->cloneOutputPartitionSchemeHeader());
   }
 
-  if (has_changed_children) {
+  if (input->children() != new_children) {
     return input->copyWithNewChildren(new_children);
   } else {
     return input;

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/42588d43/query_optimizer/rules/ReorderColumns.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/ReorderColumns.cpp b/query_optimizer/rules/ReorderColumns.cpp
index 5f52938..6373f36 100644
--- a/query_optimizer/rules/ReorderColumns.cpp
+++ b/query_optimizer/rules/ReorderColumns.cpp
@@ -61,7 +61,9 @@ P::PhysicalPtr ReorderColumns::applyInternal(const P::PhysicalPtr &input,
   if (skip_transform) {
     std::vector<P::PhysicalPtr> new_children;
     for (const P::PhysicalPtr &child : input->children()) {
-      new_children.emplace_back(applyInternal(child, lock_ordering && is_not_transformable));
+      const bool child_lock_ordering =
+          (lock_ordering && is_not_transformable) || P::SomeUnionAll::Matches(child);
+      new_children.emplace_back(applyInternal(child, child_lock_ordering));
     }
 
     if (new_children != input->children()) {

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/42588d43/utility/BarrieredReadWriteConcurrentBitVector.hpp
----------------------------------------------------------------------
diff --git a/utility/BarrieredReadWriteConcurrentBitVector.hpp b/utility/BarrieredReadWriteConcurrentBitVector.hpp
index 0086c7f..dba5ff0 100644
--- a/utility/BarrieredReadWriteConcurrentBitVector.hpp
+++ b/utility/BarrieredReadWriteConcurrentBitVector.hpp
@@ -42,7 +42,7 @@ namespace quickstep {
  * @brief A bit vector that supports concurrent read/write operations, with a
  *        RESTRICTED CONCURRENCY LEVEL that the read operations and the write
  *        operations must be isolated with a (mostly implicit) barrier.
- * 
+ *
  * In other words, when using this bit vector, the read operations and write
  * operations must be grouped into phases. Within a phase there can be either
  * concurrent read operations or concurrent write operations, but not both (or

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/42588d43/utility/ExecutionDAGVisualizer.cpp
----------------------------------------------------------------------
diff --git a/utility/ExecutionDAGVisualizer.cpp b/utility/ExecutionDAGVisualizer.cpp
index 8059ef3..4b30492 100644
--- a/utility/ExecutionDAGVisualizer.cpp
+++ b/utility/ExecutionDAGVisualizer.cpp
@@ -292,19 +292,17 @@ void ExecutionDAGVisualizer::bindProfilingStats(
       node_info.labels.emplace_back(
           "effective concurrency: " + FormatDigits(concurrency, 2));
 
-      DCHECK(workorders_count.find(node_index) != workorders_count.end());
-      const std::size_t workorders_count_for_node = workorders_count.at(node_index);
-      if (workorders_count_for_node > 0) {
-        mean_time_per_workorder[node_index] =
-            mean_time_per_workorder[node_index] /
+      const auto cit = workorders_count.find(node_index);
+      if (cit != workorders_count.end()) {
+        const std::size_t workorders_count_for_node = cit->second;
+        mean_time_per_workorder[node_index] /=
             (1000 * static_cast<float>(workorders_count_for_node));
-      } else {
-        mean_time_per_workorder[node_index] = 0;
+
+        node_info.labels.emplace_back(std::to_string(workorders_count_for_node) + " work
orders");
+        node_info.labels.emplace_back(
+            "Mean work order execution time: " +
+            FormatDigits(mean_time_per_workorder[node_index], 2) + " ms");
       }
-      node_info.labels.emplace_back(std::to_string(workorders_count_for_node) + " work orders");
-      node_info.labels.emplace_back(
-          "Mean work order execution time: " +
-          FormatDigits(mean_time_per_workorder[node_index], 2) + " ms");
     }
   }
 }


Mime
View raw message