quickstep-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jianq...@apache.org
Subject incubator-quickstep git commit: Updates
Date Sat, 28 Jan 2017 21:00:14 GMT
Repository: incubator-quickstep
Updated Branches:
  refs/heads/reorder-attrs 26c3db420 -> 4700b097b


Updates


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/4700b097
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/4700b097
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/4700b097

Branch: refs/heads/reorder-attrs
Commit: 4700b097bb95b663cf47282cc77031377e54d28c
Parents: 26c3db4
Author: Jianqiao Zhu <jianqiao@cs.wisc.edu>
Authored: Sat Jan 28 15:00:01 2017 -0600
Committer: Jianqiao Zhu <jianqiao@cs.wisc.edu>
Committed: Sat Jan 28 15:00:01 2017 -0600

----------------------------------------------------------------------
 query_optimizer/rules/ReorderColumns.cpp  |  28 ++---
 relational_operators/HashJoinOperator.cpp | 142 ++++++++++++++++++++++++-
 relational_operators/HashJoinOperator.hpp |   4 +
 3 files changed, 149 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4700b097/query_optimizer/rules/ReorderColumns.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/ReorderColumns.cpp b/query_optimizer/rules/ReorderColumns.cpp
index e04b810..16a3750 100644
--- a/query_optimizer/rules/ReorderColumns.cpp
+++ b/query_optimizer/rules/ReorderColumns.cpp
@@ -54,17 +54,9 @@ P::PhysicalPtr ReorderColumns::applyInternal(const P::PhysicalPtr &input,
   // unchanged so that the output columns are ordered as specified by the user.
   // So here we use the flag "lock_ordering" to skip the first transformable
   // node (i.e. the first Selection or HashJoin).
-  bool skip_transform;
-  if (IsTransformable(input)) {
-    if (lock_ordering) {
-      skip_transform = true;
-      lock_ordering = false;
-    } else {
-      skip_transform = false;
-    }
-  } else {
-    skip_transform = true;
-  }
+  const bool is_not_transformable = !IsTransformable(input);
+  const bool skip_transform = lock_ordering || is_not_transformable;
+  lock_ordering = lock_ordering && is_not_transformable;
 
   if (skip_transform) {
     std::vector<P::PhysicalPtr> new_children;
@@ -86,7 +78,9 @@ P::PhysicalPtr ReorderColumns::applyInternal(const P::PhysicalPtr &input,
   }
   std::reverse(nodes.begin(), nodes.end());
 
-  // Analyze the attributes in the nodes.
+  // A greedy algorithm that reorders the output attributes based on the GEN/KILL
+  // intervals. This algorithm works well with SSB/TPCH queries and is not likely
+  // to make the plans worse for whatever queries.
   std::unordered_map<E::ExprId, std::size_t> base, gen, kill;
 
   const P::PhysicalPtr base_node =
@@ -111,16 +105,6 @@ P::PhysicalPtr ReorderColumns::applyInternal(const P::PhysicalPtr &input,
     }
   }
 
-//  std::cout << "gen: \n";
-//  for (const auto &pair : gen) {
-//    std::cout << pair.first << ": " << pair.second << "\n";
-//  }
-//
-//  std::cout << "kill: \n";
-//  for (const auto &pair : kill) {
-//    std::cout << pair.first << ": " << pair.second << "\n";
-//  }
-
   const auto comparator = [&gen, &kill, &base](const E::NamedExpressionPtr &lhs,
                                                const E::NamedExpressionPtr &rhs) ->
bool {
     const E::ExprId lhs_id = lhs->id();

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4700b097/relational_operators/HashJoinOperator.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/HashJoinOperator.cpp b/relational_operators/HashJoinOperator.cpp
index cda1465..67e7b97 100644
--- a/relational_operators/HashJoinOperator.cpp
+++ b/relational_operators/HashJoinOperator.cpp
@@ -470,16 +470,24 @@ void HashInnerJoinWorkOrder::execute() {
         base_accessor->createSharedTupleIdSequenceAdapterVirtual(*existence_map));
   }
 
+  if (probe_accessor->getImplementationType() == ValueAccessor::Implementation::kSplitRowStore)
{
+    executeWithCopyElision(probe_accessor.get());
+  } else {
+    executeWithoutCopyElision(probe_accessor.get());
+  }
+}
+
+void HashInnerJoinWorkOrder::executeWithoutCopyElision(ValueAccessor *probe_accessor) {
   VectorsOfPairsJoinedTuplesCollector collector;
   if (join_key_attributes_.size() == 1) {
     hash_table_.getAllFromValueAccessor(
-        probe_accessor.get(),
+        probe_accessor,
         join_key_attributes_.front(),
         any_join_key_attributes_nullable_,
         &collector);
   } else {
     hash_table_.getAllFromValueAccessorCompositeKey(
-        probe_accessor.get(),
+        probe_accessor,
         join_key_attributes_,
         any_join_key_attributes_nullable_,
         &collector);
@@ -530,7 +538,7 @@ void HashInnerJoinWorkOrder::execute() {
       temp_result.addColumn((*selection_cit)->getAllValuesForJoin(build_relation_id,
                                                                   build_accessor.get(),
                                                                   probe_relation_id,
-                                                                  probe_accessor.get(),
+                                                                  probe_accessor,
                                                                   build_block_entry.second));
     }
 
@@ -538,6 +546,134 @@ void HashInnerJoinWorkOrder::execute() {
   }
 }
 
+void HashInnerJoinWorkOrder::executeWithCopyElision(ValueAccessor *probe_accessor) {
+  PairsOfVectorsJoinedTuplesCollector collector;
+  if (join_key_attributes_.size() == 1) {
+    hash_table_.getAllFromValueAccessor(
+        probe_accessor,
+        join_key_attributes_.front(),
+        any_join_key_attributes_nullable_,
+        &collector);
+  } else {
+    hash_table_.getAllFromValueAccessorCompositeKey(
+        probe_accessor,
+        join_key_attributes_,
+        any_join_key_attributes_nullable_,
+        &collector);
+  }
+
+  const relation_id build_relation_id = build_relation_.getID();
+  const relation_id probe_relation_id = probe_relation_.getID();
+
+  // Create a map of ValueAccessors and what attributes we want to pick from them.
+  std::vector<std::pair<ValueAccessor *, std::vector<attribute_id>>> accessor_attribute_map;
+  const std::size_t build_index = 0, probe_index = 1, temp_index = 2;
+  for (std::size_t i = 0; i < 3; ++i) {
+    accessor_attribute_map.emplace_back(
+        nullptr /* place holder */,
+        std::vector<attribute_id>(selection_.size(), kInvalidCatalogId));
+  }
+
+  std::vector<const Scalar *> non_trivial_expressions;
+  attribute_id dest_attr = 0;
+
+  for (auto &scalar: selection_) {
+    // If the Scalar (column) is not an attribute in build/probe blocks, we will
+    // insert it into a ColumnVectorsValueAccessor.
+    if (scalar->getDataSource() != Scalar::ScalarDataSource::kAttribute) {
+      // Current destination attribute maps to the column we'll create now.
+      accessor_attribute_map[temp_index].second[dest_attr] = non_trivial_expressions.size();
+      non_trivial_expressions.emplace_back(scalar.get());
+    } else {
+      auto scalar_attr = static_cast<const ScalarAttribute *>(scalar.get());
+      const attribute_id attr_id = scalar_attr->getAttribute().getID();
+      if (scalar_attr->getAttribute().getParent().getID() == build_relation_id) {
+        accessor_attribute_map[build_index].second[dest_attr] = attr_id;
+      } else {
+        accessor_attribute_map[probe_index].second[dest_attr] = attr_id;
+      }
+    }
+    ++dest_attr;
+  }
+
+  for (std::pair<const block_id, PairOfVectors>
+           &build_block_entry : *collector.getJoinedTuples()) {
+    BlockReference build_block =
+        storage_manager_->getBlock(build_block_entry.first, build_relation_);
+    const TupleStorageSubBlock &build_store = build_block->getTupleStorageSubBlock();
+    std::unique_ptr<ValueAccessor> build_accessor(build_store.createValueAccessor());
+    const std::vector<tuple_id> &build_tids = build_block_entry.second.first;
+    const std::vector<tuple_id> &probe_tids = build_block_entry.second.second;
+
+    // Evaluate '*residual_predicate_', if any.
+    //
+    // TODO(chasseur): We might consider implementing true vectorized
+    // evaluation for join predicates that are not equijoins (although in
+    // general that would require evaluating and materializing some expressions
+    // over the cross-product of all tuples in a pair of blocks in order to
+    // evaluate the predicate). We could use a heuristic where we only do the
+    // vectorized materialization and evaluation if the set of matches from the
+    // hash join is below a reasonable threshold so that we don't blow up
+    // temporary memory requirements to an unreasonable degree.
+    if (residual_predicate_ != nullptr) {
+      std::pair<std::vector<tuple_id>, std::vector<tuple_id>> filtered_matches;
+      for (std::size_t i = 0; i < build_tids.size(); ++i) {
+        if (residual_predicate_->matchesForJoinedTuples(*build_accessor,
+                                                        build_relation_id,
+                                                        build_tids[i],
+                                                        *probe_accessor,
+                                                        probe_relation_id,
+                                                        probe_tids[i])) {
+          filtered_matches.first.push_back(build_tids[i]);
+          filtered_matches.second.push_back(probe_tids[i]);
+        }
+      }
+
+      build_block_entry.second = std::move(filtered_matches);
+    }
+
+    // TODO(chasseur): See TODO in NestedLoopsJoinOperator.cpp about limiting
+    // the size of materialized temporary results. In common usage, this
+    // probably won't be an issue for hash-joins, but in the worst case a hash
+    // join can still devolve into a cross-product.
+
+    // We now create ordered value accessors for both build and probe side,
+    // using the joined tuple TIDs.
+    std::unique_ptr<ValueAccessor> ordered_build_accessor(
+        build_accessor->createSharedOrderedTupleIdSequenceAdapterVirtual(build_tids));
+    std::unique_ptr<ValueAccessor> ordered_probe_accessor(
+        probe_accessor->createSharedOrderedTupleIdSequenceAdapterVirtual(probe_tids));
+
+    // We also need a temp value accessor to store results of any scalar expressions.
+    ColumnVectorsValueAccessor temp_result;
+    if (!non_trivial_expressions.empty()) {
+      VectorOfPairs zipped_joined_tuple_ids;
+      zipped_joined_tuple_ids.reserve(build_tids.size());
+      for (std::size_t i = 0; i < build_tids.size(); ++i) {
+        zipped_joined_tuple_ids.emplace_back(build_tids[i], probe_tids[i]);
+      }
+
+      for (const Scalar *scalar : non_trivial_expressions) {
+        temp_result.addColumn(scalar->getAllValuesForJoin(build_relation_id,
+                                                          build_accessor.get(),
+                                                          probe_relation_id,
+                                                          probe_accessor,
+                                                          zipped_joined_tuple_ids));
+      }
+    }
+
+    accessor_attribute_map[build_index].first = ordered_build_accessor.get();
+    accessor_attribute_map[probe_index].first = ordered_probe_accessor.get();
+    accessor_attribute_map[temp_index].first = &temp_result;
+
+    // NOTE(chasseur): calling the bulk-insert method of InsertDestination once
+    // for each pair of joined blocks incurs some extra overhead that could be
+    // avoided by keeping checked-out MutableBlockReferences across iterations
+    // of this loop, but that would get messy when combined with partitioning.
+    output_destination_->bulkInsertTuplesFromValueAccessors(accessor_attribute_map);
+  }
+}
+
 void HashSemiJoinWorkOrder::execute() {
   if (residual_predicate_ == nullptr) {
     executeWithoutResidualPredicate();

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4700b097/relational_operators/HashJoinOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/HashJoinOperator.hpp b/relational_operators/HashJoinOperator.hpp
index acfe3d2..5e9c5d8 100644
--- a/relational_operators/HashJoinOperator.hpp
+++ b/relational_operators/HashJoinOperator.hpp
@@ -423,6 +423,10 @@ class HashInnerJoinWorkOrder : public WorkOrder {
   }
 
  private:
+  void executeWithoutCopyElision(ValueAccessor *probe_accesor);
+
+  void executeWithCopyElision(ValueAccessor *probe_accessor);
+
   const CatalogRelationSchema &build_relation_;
   const CatalogRelationSchema &probe_relation_;
   const std::vector<attribute_id> join_key_attributes_;


Mime
View raw message