quickstep-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hbdeshmukh <...@git.apache.org>
Subject [GitHub] incubator-quickstep pull request #90: Quickstep 28 29
Date Fri, 09 Sep 2016 18:30:44 GMT
Github user hbdeshmukh commented on a diff in the pull request:

    https://github.com/apache/incubator-quickstep/pull/90#discussion_r78227477
  
    --- Diff: storage/StorageBlock.cpp ---
    @@ -494,6 +494,91 @@ void StorageBlock::aggregateGroupBy(
                                                  hash_table);
     }
     
    +
    +void StorageBlock::aggregateGroupByFast(
    +    const std::vector<std::vector<std::unique_ptr<const Scalar>>> &arguments,
    +    const std::vector<std::unique_ptr<const Scalar>> &group_by,
    +    const Predicate *predicate,
    +    AggregationStateHashTableBase *hash_table,
    +    std::unique_ptr<TupleIdSequence> *reuse_matches,
    +    std::vector<std::unique_ptr<ColumnVector>> *reuse_group_by_vectors) const
{
    +  DCHECK_GT(group_by.size(), 0u)
    +      << "Called aggregateGroupBy() with zero GROUP BY expressions";
    +
    +  SubBlocksReference sub_blocks_ref(*tuple_store_,
    +                                    indices_,
    +                                    indices_consistent_);
    +
    +  // IDs of 'arguments' as attributes in the ValueAccessor we create below.
    +  std::vector<attribute_id> arg_ids;
    +  std::vector<std::vector<attribute_id>> argument_ids;
    +
    +  // IDs of GROUP BY key element(s) in the ValueAccessor we create below.
    +  std::vector<attribute_id> key_ids;
    +
    +  // An intermediate ValueAccessor that stores the materialized 'arguments' for
    +  // this aggregate, as well as the GROUP BY expression values.
    +  ColumnVectorsValueAccessor temp_result;
    +  {
    +    std::unique_ptr<ValueAccessor> accessor;
    +    if (predicate) {
    +      if (!*reuse_matches) {
    +        // If there is a filter predicate that hasn't already been evaluated,
    +        // evaluate it now and save the results for other aggregates on this
    +        // same block.
    +        reuse_matches->reset(getMatchesForPredicate(predicate));
    +      }
    +
    +      // Create a filtered ValueAccessor that only iterates over predicate
    +      // matches.
    +      accessor.reset(tuple_store_->createValueAccessor(reuse_matches->get()));
    +    } else {
    +      // Create a ValueAccessor that iterates over all tuples in this block
    +      accessor.reset(tuple_store_->createValueAccessor());
    +    }
    +
    +    attribute_id attr_id = 0;
    +
    +    // First, put GROUP BY keys into 'temp_result'.
    +    if (reuse_group_by_vectors->empty()) {
    +      // Compute GROUP BY values from group_by Scalars, and store them in
    +      // reuse_group_by_vectors for reuse by other aggregates on this same
    +      // block.
    +      reuse_group_by_vectors->reserve(group_by.size());
    +      for (const std::unique_ptr<const Scalar> &group_by_element : group_by)
{
    +        reuse_group_by_vectors->emplace_back(
    +            group_by_element->getAllValues(accessor.get(), &sub_blocks_ref));
    +        temp_result.addColumn(reuse_group_by_vectors->back().get(), false);
    +        key_ids.push_back(attr_id++);
    +      }
    +    } else {
    +      // Reuse precomputed GROUP BY values from reuse_group_by_vectors.
    +      DCHECK_EQ(group_by.size(), reuse_group_by_vectors->size())
    +          << "Wrong number of reuse_group_by_vectors";
    +      for (const std::unique_ptr<ColumnVector> &reuse_cv : *reuse_group_by_vectors)
{
    +        temp_result.addColumn(reuse_cv.get(), false);
    +        key_ids.push_back(attr_id++);
    +      }
    +    }
    +
    +    // Compute argument vectors and add them to 'temp_result'.
    +    for (const std::vector<std::unique_ptr<const Scalar>> &argument :
arguments) {
    +        arg_ids.clear();
    +        for (const std::unique_ptr<const Scalar> &args : argument) {
    +          temp_result.addColumn(args->getAllValues(accessor.get(), &sub_blocks_ref));
    +          arg_ids.push_back(attr_id++);
    +        }
    +        argument_ids.push_back(arg_ids);
    +     }
    +  }
    +
    +  hash_table->upsertValueAccessorCompositeKeyFast(argument_ids,
    --- End diff --
    
    Due to this call, we have to create a virtual function ``upsertValueAccessorCompositeKeyFast``
in the HashTableBase class, to avoid a circular dependency. An alternative is as follows:
    Make the aggregateGroupBy function return the value accessor and other information required
for this call. The caller of aggregateGroupBy will perform the upsert operation on the hash
table, thereby eliminating the need for this function call. 


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

Mime
View raw message