quickstep-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From zuyu <...@git.apache.org>
Subject [GitHub] incubator-quickstep pull request #171: QUICKSTEP-68 Reorder intermediate rel...
Date Mon, 30 Jan 2017 06:54:54 GMT
Github user zuyu commented on a diff in the pull request:

    https://github.com/apache/incubator-quickstep/pull/171#discussion_r98385945
  
    --- Diff: relational_operators/HashJoinOperator.cpp ---
    @@ -504,123 +616,61 @@ void HashInnerJoinWorkOrder::execute() {
         // hash join is below a reasonable threshold so that we don't blow up
         // temporary memory requirements to an unreasonable degree.
         if (residual_predicate_ != nullptr) {
    -      std::pair<std::vector<tuple_id>, std::vector<tuple_id>> filtered_matches;
    +      PairOfVectors filtered_matches;
    +
           for (std::size_t i = 0; i < build_tids.size(); ++i) {
             if (residual_predicate_->matchesForJoinedTuples(*build_accessor,
                                                             build_relation_id,
                                                             build_tids[i],
                                                             *probe_accessor,
                                                             probe_relation_id,
                                                             probe_tids[i])) {
    -          filtered_matches.first.push_back(build_tids[i]);
    -          filtered_matches.second.push_back(probe_tids[i]);
    +          filtered_matches.first.emplace_back(build_tids[i]);
    +          filtered_matches.second.emplace_back(probe_tids[i]);
             }
           }
     
           build_block_entry.second = std::move(filtered_matches);
         }
     
    -    // TODO(chasseur): If all the output expressions are ScalarAttributes,
    -    // we could implement a similar fast-path to StorageBlock::selectSimple()
    -    // that avoids a copy.
    -    //
         // TODO(chasseur): See TODO in NestedLoopsJoinOperator.cpp about limiting
         // the size of materialized temporary results. In common usage, this
         // probably won't be an issue for hash-joins, but in the worst case a hash
         // join can still devolve into a cross-product.
    -    //
    -    // NOTE(chasseur): We could also create one big ColumnVectorsValueAccessor
    -    // and accumulate all the results across multiple block pairs into it
    -    // before inserting anything into output blocks, but this would require
    -    // some significant API extensions to the expressions system for a dubious
    -    // benefit (probably only a real performance win when there are very few
    -    // matching tuples in each individual inner block but very many inner
    -    // blocks with at least one match).
    -
    -    // We now create ordered value accessors for both build and probe side,
    -    // using the joined tuple TIDs. Note that we have to use this Lambda-based
    -    // invocation method here because the accessors don't have a virtual
    -    // function that creates such an OrderedTupleIdSequenceAdapterValueAccessor.
    -    std::unique_ptr<ValueAccessor> ordered_build_accessor, ordered_probe_accessor;
    -    InvokeOnValueAccessorNotAdapter(
    -        build_accessor.get(),
    -        [&](auto *accessor) -> void {  // NOLINT(build/c++11)
    -          ordered_build_accessor.reset(
    -              accessor->createSharedOrderedTupleIdSequenceAdapter(build_tids));
    -        });
    -
    -    if (probe_accessor->isTupleIdSequenceAdapter()) {
    -      InvokeOnTupleIdSequenceAdapterValueAccessor(
    -        probe_accessor.get(),
    -        [&](auto *accessor) -> void {  // NOLINT(build/c++11)
    -          ordered_probe_accessor.reset(
    -            accessor->createSharedOrderedTupleIdSequenceAdapter(probe_tids));
    -        });
    -    } else {
    -      InvokeOnValueAccessorNotAdapter(
    -        probe_accessor.get(),
    -        [&](auto *accessor) -> void {  // NOLINT(build/c++11)
    -          ordered_probe_accessor.reset(
    -            accessor->createSharedOrderedTupleIdSequenceAdapter(probe_tids));
    -        });
    -    }
    -
     
         // We also need a temp value accessor to store results of any scalar expressions.
         ColumnVectorsValueAccessor temp_result;
    +    if (!non_trivial_expressions.empty()) {
    +      // The getAllValuesForJoin function below needs joined tuple IDs as a
    +      // vector of pair of (build-tuple-ID, probe-tuple-ID), and we have a pair
    +      // of (build-tuple-IDs-vector, probe-tuple-IDs-vector). So we'll have to
    +      // zip our two vectors together.
    +      VectorOfPairs zipped_joined_tuple_ids;
    +      zipped_joined_tuple_ids.reserve(build_tids.size());
    +      for (std::size_t i = 0; i < build_tids.size(); ++i) {
    +        zipped_joined_tuple_ids.emplace_back(build_tids[i], probe_tids[i]);
    +      }
     
    -    // Create a map of ValueAccessors and what attributes we want to pick from them
    -    std::vector<std::pair<ValueAccessor *, std::vector<attribute_id>>>
accessor_attribute_map;
    -    const std::vector<ValueAccessor *> accessors{
    -        ordered_build_accessor.get(), ordered_probe_accessor.get(), &temp_result};
    -    const unsigned int build_index = 0, probe_index = 1, temp_index = 2;
    -    for (auto &accessor : accessors) {
    -      accessor_attribute_map.push_back(std::make_pair(
    -          accessor,
    -          std::vector<attribute_id>(selection_.size(), kInvalidCatalogId)));
    -    }
    -
    -    attribute_id dest_attr = 0;
    -    std::vector<std::pair<tuple_id, tuple_id>> zipped_joined_tuple_ids;
    -
    -    for (auto &selection_cit : selection_) {
    -      // If the Scalar (column) is not an attribute in build/probe blocks, then
    -      // insert it into a ColumnVectorsValueAccessor.
    -      if (selection_cit->getDataSource() != Scalar::ScalarDataSource::kAttribute)
{
    -        // Current destination attribute maps to the column we'll create now.
    -        accessor_attribute_map[temp_index].second[dest_attr] = temp_result.getNumColumns();
    -
    -        if (temp_result.getNumColumns() == 0) {
    -          // The getAllValuesForJoin function below needs joined tuple IDs as
    -          // a vector of pair of (build-tuple-ID, probe-tuple-ID), and we have
    -          // a pair of (build-tuple-IDs-vector, probe-tuple-IDs-vector). So
    -          // we'll have to zip our two vectors together. We do this inside
    -          // the loop because most queries don't exercise this code since
    -          // they don't have scalar expressions with attributes from both
    -          // build and probe relations (other expressions would have been
    -          // pushed down to before the join).
    -          zipped_joined_tuple_ids.reserve(build_tids.size());
    -          for (std::size_t i = 0; i < build_tids.size(); ++i) {
    -            zipped_joined_tuple_ids.push_back(std::make_pair(build_tids[i], probe_tids[i]));
    -          }
    -        }
    -        temp_result.addColumn(
    -            selection_cit
    -                ->getAllValuesForJoin(build_relation_id, build_accessor.get(),
    -                                      probe_relation_id, probe_accessor.get(),
    -                                      zipped_joined_tuple_ids));
    -      } else {
    -        auto scalar_attr = static_cast<const ScalarAttribute *>(selection_cit.get());
    -        const attribute_id attr_id = scalar_attr->getAttribute().getID();
    -        if (scalar_attr->getAttribute().getParent().getID() == build_relation_id)
{
    -          accessor_attribute_map[build_index].second[dest_attr] = attr_id;
    -        } else {
    -          accessor_attribute_map[probe_index].second[dest_attr] = attr_id;
    -        }
    +      for (const Scalar *scalar : non_trivial_expressions) {
    +        temp_result.addColumn(scalar->getAllValuesForJoin(build_relation_id,
    +                                                          build_accessor.get(),
    +                                                          probe_relation_id,
    +                                                          probe_accessor,
    +                                                          zipped_joined_tuple_ids));
           }
    -      ++dest_attr;
         }
     
    +    // We now create ordered value accessors for both build and probe side,
    +    // using the joined tuple TIDs.
    --- End diff --
    
    Change from `TIDs` to `IDs`? I guess `TID` means `tuple id`.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

Mime
View raw message