quickstep-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jianqiao <...@git.apache.org>
Subject [GitHub] incubator-quickstep pull request #179: QUICKSTEP-70-71 Improve aggregation p...
Date Tue, 07 Feb 2017 20:06:02 GMT
Github user jianqiao commented on a diff in the pull request:

    https://github.com/apache/incubator-quickstep/pull/179#discussion_r99916031
  
    --- Diff: storage/AggregationOperationState.cpp ---
    @@ -353,187 +353,286 @@ bool AggregationOperationState::ProtoIsValid(
       return true;
     }
     
    -void AggregationOperationState::aggregateBlock(const block_id input_block,
    -                                               LIPFilterAdaptiveProber *lip_filter_adaptive_prober)
{
    -  if (group_by_list_.empty()) {
    -    aggregateBlockSingleState(input_block);
    -  } else {
    -    aggregateBlockHashTable(input_block, lip_filter_adaptive_prober);
    +bool AggregationOperationState::checkAggregatePartitioned(
    +    const std::size_t estimated_num_groups,
    +    const std::vector<bool> &is_distinct,
    +    const std::vector<std::unique_ptr<const Scalar>> &group_by,
    +    const std::vector<const AggregateFunction *> &aggregate_functions) const
{
    +  // If there's no aggregation, return false.
    +  if (aggregate_functions.empty()) {
    +    return false;
    +  }
    +  // Check if there's a distinct operation involved in any aggregate, if so
    +  // the aggregate can't be partitioned.
    +  for (auto distinct : is_distinct) {
    +    if (distinct) {
    +      return false;
    +    }
    +  }
    +  // There's no distinct aggregation involved, Check if there's at least one
    +  // GROUP BY operation.
    +  if (group_by.empty()) {
    +    return false;
    +  }
    +
    +  // Currently we require that all the group-by keys are ScalarAttributes for
    +  // the convenient of implementing copy elision.
    +  // TODO(jianqiao): relax this requirement.
    +  for (const auto &group_by_element : group_by) {
    +    if (group_by_element->getAttributeIdForValueAccessor() == kInvalidAttributeID)
{
    +      return false;
    +    }
       }
    +
    +  // There are GROUP BYs without DISTINCT. Check if the estimated number of
    +  // groups is large enough to warrant a partitioned aggregation.
    +  return estimated_num_groups >
    +         static_cast<std::size_t>(
    +             FLAGS_partition_aggregation_num_groups_threshold);
    +  return false;
     }
     
    -void AggregationOperationState::finalizeAggregate(
    -    InsertDestination *output_destination) {
    -  if (group_by_list_.empty()) {
    -    finalizeSingleState(output_destination);
    +std::size_t AggregationOperationState::getNumInitializationPartitions() const {
    +  if (is_aggregate_collision_free_) {
    +    return static_cast<CollisionFreeVectorTable *>(
    +        collision_free_hashtable_.get())->getNumInitializationPartitions();
       } else {
    -    finalizeHashTable(output_destination);
    +    return 0u;
       }
     }
     
    -void AggregationOperationState::mergeSingleState(
    -    const std::vector<std::unique_ptr<AggregationState>> &local_state)
{
    -  DEBUG_ASSERT(local_state.size() == single_states_.size());
    -  for (std::size_t agg_idx = 0; agg_idx < handles_.size(); ++agg_idx) {
    -    if (!is_distinct_[agg_idx]) {
    -      handles_[agg_idx]->mergeStates(*local_state[agg_idx],
    -                                     single_states_[agg_idx].get());
    -    }
    +std::size_t AggregationOperationState::getNumFinalizationPartitions() const {
    +  if (is_aggregate_collision_free_) {
    +    return static_cast<CollisionFreeVectorTable *>(
    +        collision_free_hashtable_.get())->getNumFinalizationPartitions();
    +  } else if (is_aggregate_partitioned_) {
    +    return partitioned_group_by_hashtable_pool_->getNumPartitions();
    +  } else  {
    +    return 1u;
       }
     }
     
    -void AggregationOperationState::aggregateBlockSingleState(
    -    const block_id input_block) {
    -  // Aggregate per-block state for each aggregate.
    -  std::vector<std::unique_ptr<AggregationState>> local_state;
    +void AggregationOperationState::initialize(const std::size_t partition_id) {
    --- End diff --
    
    The "partition" here is somehow not related to catalog or relations. I.e. it refers to
the manual segmentation of the memory / hash table entries but not the partitioning of storage
blocks.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

Mime
View raw message