quickstep-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From zuyu <...@git.apache.org>
Subject [GitHub] incubator-quickstep pull request #179: QUICKSTEP-70-71 Improve aggregation p...
Date Mon, 06 Feb 2017 05:12:27 GMT
Github user zuyu commented on a diff in the pull request:

    https://github.com/apache/incubator-quickstep/pull/179#discussion_r99505506
  
    --- Diff: storage/AggregationOperationState.cpp ---
    @@ -353,187 +353,286 @@ bool AggregationOperationState::ProtoIsValid(
       return true;
     }
     
    -void AggregationOperationState::aggregateBlock(const block_id input_block,
    -                                               LIPFilterAdaptiveProber *lip_filter_adaptive_prober)
{
    -  if (group_by_list_.empty()) {
    -    aggregateBlockSingleState(input_block);
    -  } else {
    -    aggregateBlockHashTable(input_block, lip_filter_adaptive_prober);
    +bool AggregationOperationState::checkAggregatePartitioned(
    +    const std::size_t estimated_num_groups,
    +    const std::vector<bool> &is_distinct,
    +    const std::vector<std::unique_ptr<const Scalar>> &group_by,
    +    const std::vector<const AggregateFunction *> &aggregate_functions) const
{
    +  // If there's no aggregation, return false.
    +  if (aggregate_functions.empty()) {
    +    return false;
    +  }
    +  // Check if there's a distinct operation involved in any aggregate, if so
    +  // the aggregate can't be partitioned.
    +  for (auto distinct : is_distinct) {
    +    if (distinct) {
    +      return false;
    +    }
    +  }
    +  // There's no distinct aggregation involved, Check if there's at least one
    +  // GROUP BY operation.
    +  if (group_by.empty()) {
    +    return false;
    +  }
    +
    +  // Currently we require that all the group-by keys are ScalarAttributes for
    +  // the convenient of implementing copy elision.
    +  // TODO(jianqiao): relax this requirement.
    +  for (const auto &group_by_element : group_by) {
    +    if (group_by_element->getAttributeIdForValueAccessor() == kInvalidAttributeID)
{
    +      return false;
    +    }
       }
    +
    +  // There are GROUP BYs without DISTINCT. Check if the estimated number of
    +  // groups is large enough to warrant a partitioned aggregation.
    +  return estimated_num_groups >
    +         static_cast<std::size_t>(
    +             FLAGS_partition_aggregation_num_groups_threshold);
    +  return false;
     }
     
    -void AggregationOperationState::finalizeAggregate(
    -    InsertDestination *output_destination) {
    -  if (group_by_list_.empty()) {
    -    finalizeSingleState(output_destination);
    +std::size_t AggregationOperationState::getNumInitializationPartitions() const {
    +  if (is_aggregate_collision_free_) {
    +    return static_cast<CollisionFreeVectorTable *>(
    +        collision_free_hashtable_.get())->getNumInitializationPartitions();
       } else {
    -    finalizeHashTable(output_destination);
    +    return 0u;
    --- End diff --
    
    Could you help me understand this return value?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

Mime
View raw message