Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id D4E26200C02 for ; Fri, 20 Jan 2017 19:04:46 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id D362E160B48; Fri, 20 Jan 2017 18:04:46 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id BE1A9160B34 for ; Fri, 20 Jan 2017 19:04:44 +0100 (CET) Received: (qmail 70422 invoked by uid 500); 20 Jan 2017 18:04:44 -0000 Mailing-List: contact commits-help@quickstep.incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@quickstep.incubator.apache.org Delivered-To: mailing list commits@quickstep.incubator.apache.org Received: (qmail 70413 invoked by uid 99); 20 Jan 2017 18:04:43 -0000 Received: from pnap-us-west-generic-nat.apache.org (HELO spamd4-us-west.apache.org) (209.188.14.142) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 20 Jan 2017 18:04:43 +0000 Received: from localhost (localhost [127.0.0.1]) by spamd4-us-west.apache.org (ASF Mail Server at spamd4-us-west.apache.org) with ESMTP id 496EFC14DA for ; Fri, 20 Jan 2017 18:04:43 +0000 (UTC) X-Virus-Scanned: Debian amavisd-new at spamd4-us-west.apache.org X-Spam-Flag: NO X-Spam-Score: -6.219 X-Spam-Level: X-Spam-Status: No, score=-6.219 tagged_above=-999 required=6.31 tests=[KAM_ASCII_DIVIDERS=0.8, KAM_LAZY_DOMAIN_SECURITY=1, RCVD_IN_DNSWL_HI=-5, RCVD_IN_MSPIKE_H3=-0.01, RCVD_IN_MSPIKE_WL=-0.01, RP_MATCHES_RCVD=-2.999] autolearn=disabled Received: from mx1-lw-eu.apache.org ([10.40.0.8]) by localhost (spamd4-us-west.apache.org [10.40.0.11]) (amavisd-new, port 10024) with ESMTP id I7-ErAV6prga for ; Fri, 20 Jan 2017 18:04:32 +0000 (UTC) Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx1-lw-eu.apache.org (ASF Mail Server at mx1-lw-eu.apache.org) with SMTP id 5D96D5F30B for ; Fri, 20 Jan 2017 18:04:30 +0000 (UTC) Received: (qmail 70354 invoked by uid 99); 20 Jan 2017 18:04:29 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 20 Jan 2017 18:04:29 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 6B8AEDFCC4; Fri, 20 Jan 2017 18:04:29 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: zuyuz@apache.org To: commits@quickstep.incubator.apache.org Date: Fri, 20 Jan 2017 18:04:30 -0000 Message-Id: In-Reply-To: <5b2bf36c78284715997b07af25791412@git.apache.org> References: <5b2bf36c78284715997b07af25791412@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [2/2] incubator-quickstep git commit: Added Operator support for Partitioned HashJoin. archived-at: Fri, 20 Jan 2017 18:04:47 -0000 Added Operator support for Partitioned HashJoin. Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/5af8904d Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/5af8904d Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/5af8904d Branch: refs/heads/partitioned_join_hash_table Commit: 5af8904d5fdd0e556141ffc595d7ddf9625cf474 Parents: ee3b7f0 Author: Zuyu Zhang Authored: Tue Jan 17 13:18:08 2017 -0800 Committer: Zuyu Zhang Committed: Fri Jan 20 10:02:48 2017 -0800 ---------------------------------------------------------------------- query_execution/QueryContext.cpp | 14 +- query_execution/QueryContext.hpp | 25 ++- query_execution/QueryContext.proto | 7 +- query_optimizer/ExecutionGenerator.cpp | 30 ++- relational_operators/BuildHashOperator.cpp | 76 ++++---- relational_operators/BuildHashOperator.hpp | 81 ++++++-- relational_operators/CMakeLists.txt | 9 +- relational_operators/DestroyHashOperator.cpp | 25 ++- relational_operators/DestroyHashOperator.hpp | 9 + relational_operators/HashJoinOperator.cpp | 192 ++++++++++--------- relational_operators/HashJoinOperator.hpp | 174 ++++++++++++++++- relational_operators/WorkOrder.proto | 7 + relational_operators/WorkOrderFactory.cpp | 38 +++- .../tests/HashJoinOperator_unittest.cpp | 42 ++-- storage/StorageBlockInfo.hpp | 3 + 15 files changed, 531 insertions(+), 201 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/5af8904d/query_execution/QueryContext.cpp ---------------------------------------------------------------------- diff --git a/query_execution/QueryContext.cpp b/query_execution/QueryContext.cpp index 0e6636d..71839a7 100644 --- a/query_execution/QueryContext.cpp +++ b/query_execution/QueryContext.cpp @@ -79,9 +79,15 @@ QueryContext::QueryContext(const serialization::QueryContext &proto, } for (int i = 0; i < proto.join_hash_tables_size(); ++i) { - join_hash_tables_.emplace_back( - JoinHashTableFactory::CreateResizableFromProto(proto.join_hash_tables(i), - storage_manager)); + PartitionedJoinHashTables partitioned_join_hash_tables; + + const serialization::QueryContext::HashTableContext &hash_table_context_proto = proto.join_hash_tables(i); + for (std::uint64_t j = 0; j < hash_table_context_proto.num_partitions(); ++j) { + partitioned_join_hash_tables.emplace_back( + JoinHashTableFactory::CreateResizableFromProto(hash_table_context_proto.join_hash_table(), storage_manager)); + } + + join_hash_tables_.push_back(move(partitioned_join_hash_tables)); } for (int i = 0; i < proto.insert_destinations_size(); ++i) { @@ -178,7 +184,7 @@ bool QueryContext::ProtoIsValid(const serialization::QueryContext &proto, } for (int i = 0; i < proto.join_hash_tables_size(); ++i) { - if (!JoinHashTableFactory::ProtoIsValid(proto.join_hash_tables(i))) { + if (!JoinHashTableFactory::ProtoIsValid(proto.join_hash_tables(i).join_hash_table())) { return false; } } http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/5af8904d/query_execution/QueryContext.hpp ---------------------------------------------------------------------- diff --git a/query_execution/QueryContext.hpp b/query_execution/QueryContext.hpp index 7ad8fa1..895c2ea 100644 --- a/query_execution/QueryContext.hpp +++ b/query_execution/QueryContext.hpp @@ -278,33 +278,37 @@ class QueryContext { * @brief Whether the given JoinHashTable id is valid. * * @param id The JoinHashTable id. + * @param part_id The partition id. * * @return True if valid, otherwise false. **/ - bool isValidJoinHashTableId(const join_hash_table_id id) const { - return id < join_hash_tables_.size(); + bool isValidJoinHashTableId(const join_hash_table_id id, const partition_id part_id) const { + return id < join_hash_tables_.size() && + part_id < join_hash_tables_[id].size(); } /** * @brief Get the JoinHashTable. * * @param id The JoinHashTable id in the query. + * @param part_id The partition id. * * @return The JoinHashTable, already created in the constructor. **/ - inline JoinHashTable* getJoinHashTable(const join_hash_table_id id) { - DCHECK_LT(id, join_hash_tables_.size()); - return join_hash_tables_[id].get(); + inline JoinHashTable* getJoinHashTable(const join_hash_table_id id, const partition_id part_id) { + DCHECK(isValidJoinHashTableId(id, part_id)); + return join_hash_tables_[id][part_id].get(); } /** * @brief Destory the given JoinHashTable. * * @param id The id of the JoinHashTable to destroy. + * @param part_id The partition id. **/ - inline void destroyJoinHashTable(const join_hash_table_id id) { - DCHECK_LT(id, join_hash_tables_.size()); - join_hash_tables_[id].reset(); + inline void destroyJoinHashTable(const join_hash_table_id id, const partition_id part_id) { + DCHECK(isValidJoinHashTableId(id, part_id)); + join_hash_tables_[id][part_id].reset(); } /** @@ -562,10 +566,13 @@ class QueryContext { } private: + // Per hash join, the index is the partition id. + typedef std::vector> PartitionedJoinHashTables; + std::vector> aggregation_states_; std::vector> generator_functions_; std::vector> insert_destinations_; - std::vector> join_hash_tables_; + std::vector join_hash_tables_; std::vector> lip_deployments_; std::vector> lip_filters_; std::vector> predicates_; http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/5af8904d/query_execution/QueryContext.proto ---------------------------------------------------------------------- diff --git a/query_execution/QueryContext.proto b/query_execution/QueryContext.proto index ab0f520..6dce6b8 100644 --- a/query_execution/QueryContext.proto +++ b/query_execution/QueryContext.proto @@ -30,6 +30,11 @@ import "utility/SortConfiguration.proto"; import "utility/lip_filter/LIPFilter.proto"; message QueryContext { + message HashTableContext { + required HashTable join_hash_table = 1; + optional uint64 num_partitions = 2 [default = 1]; + } + message ScalarGroup { repeated Scalar scalars = 1; } @@ -47,7 +52,7 @@ message QueryContext { repeated AggregationOperationState aggregation_states = 1; repeated GeneratorFunctionHandle generator_functions = 2; - repeated HashTable join_hash_tables = 3; + repeated HashTableContext join_hash_tables = 3; repeated InsertDestination insert_destinations = 4; repeated LIPFilter lip_filters = 5; repeated LIPFilterDeployment lip_filter_deployments = 6; http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/5af8904d/query_optimizer/ExecutionGenerator.cpp ---------------------------------------------------------------------- diff --git a/query_optimizer/ExecutionGenerator.cpp b/query_optimizer/ExecutionGenerator.cpp index e0bfb3b..e25b8ad 100644 --- a/query_optimizer/ExecutionGenerator.cpp +++ b/query_optimizer/ExecutionGenerator.cpp @@ -694,15 +694,32 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan) { build_physical->getOutputAttributes()))); } + const CatalogRelation *build_relation = build_relation_info->relation; + // FIXME(quickstep-team): Add support for self-join. - if (build_relation_info->relation == probe_operator_info->relation) { + if (build_relation == probe_operator_info->relation) { THROW_SQL_ERROR() << "Self-join is not supported"; } // Create join hash table proto. const QueryContext::join_hash_table_id join_hash_table_index = query_context_proto_->join_hash_tables_size(); - S::HashTable *hash_table_proto = query_context_proto_->add_join_hash_tables(); + S::QueryContext::HashTableContext *hash_table_context_proto = + query_context_proto_->add_join_hash_tables(); + + // No partition. + std::size_t num_partitions = 1; + if (build_relation->hasPartitionScheme() && + build_attribute_ids.size() == 1) { + const PartitionSchemeHeader &partition_scheme_header = + build_relation->getPartitionScheme()->getPartitionSchemeHeader(); + if (build_attribute_ids[0] == partition_scheme_header.getPartitionAttributeId()) { + // TODO(zuyu): add optimizer support for partitioned hash joins. + hash_table_context_proto->set_num_partitions(num_partitions); + } + } + + S::HashTable *hash_table_proto = hash_table_context_proto->mutable_join_hash_table(); // SimplifyHashTableImplTypeProto() switches the hash table implementation // from SeparateChaining to SimpleScalarSeparateChaining when there is a @@ -712,7 +729,6 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan) { HashTableImplTypeProtoFromString(FLAGS_join_hashtable_type), key_types)); - const CatalogRelationSchema *build_relation = build_relation_info->relation; for (const attribute_id build_attribute : build_attribute_ids) { hash_table_proto->add_key_types()->CopyFrom( build_relation->getAttributeById(build_attribute)->getType().getProto()); @@ -725,10 +741,11 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan) { execution_plan_->addRelationalOperator( new BuildHashOperator( query_handle_->query_id(), - *build_relation_info->relation, + *build_relation, build_relation_info->isStoredRelation(), build_attribute_ids, any_build_attributes_nullable, + num_partitions, join_hash_table_index)); // Create InsertDestination proto. @@ -766,11 +783,12 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan) { execution_plan_->addRelationalOperator( new HashJoinOperator( query_handle_->query_id(), - *build_relation_info->relation, + *build_relation, *probe_operator_info->relation, probe_operator_info->isStoredRelation(), probe_attribute_ids, any_probe_attributes_nullable, + num_partitions, *output_relation, insert_destination_index, join_hash_table_index, @@ -782,7 +800,7 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan) { const QueryPlan::DAGNodeIndex destroy_operator_index = execution_plan_->addRelationalOperator(new DestroyHashOperator( - query_handle_->query_id(), join_hash_table_index)); + query_handle_->query_id(), num_partitions, join_hash_table_index)); if (!build_relation_info->isStoredRelation()) { execution_plan_->addDirectDependency(build_operator_index, http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/5af8904d/relational_operators/BuildHashOperator.cpp ---------------------------------------------------------------------- diff --git a/relational_operators/BuildHashOperator.cpp b/relational_operators/BuildHashOperator.cpp index 60e091f..14ec204 100644 --- a/relational_operators/BuildHashOperator.cpp +++ b/relational_operators/BuildHashOperator.cpp @@ -70,38 +70,36 @@ bool BuildHashOperator::getAllWorkOrders( tmb::MessageBus *bus) { DCHECK(query_context != nullptr); - JoinHashTable *hash_table = query_context->getJoinHashTable(hash_table_index_); if (input_relation_is_stored_) { - if (!started_) { - for (const block_id input_block_id : input_relation_block_ids_) { + if (started_) { + return true; + } + + for (std::size_t part_id = 0; part_id < num_partitions_; ++part_id) { + JoinHashTable *hash_table = query_context->getJoinHashTable(hash_table_index_, part_id); + for (const block_id block : input_relation_block_ids_[part_id]) { container->addNormalWorkOrder( - new BuildHashWorkOrder(query_id_, - input_relation_, - join_key_attributes_, - any_join_key_attributes_nullable_, - input_block_id, - hash_table, - storage_manager, + new BuildHashWorkOrder(query_id_, input_relation_, join_key_attributes_, any_join_key_attributes_nullable_, + num_partitions_, part_id, block, hash_table, storage_manager, CreateLIPFilterBuilderHelper(lip_deployment_index_, query_context)), op_index_); } - started_ = true; } - return started_; + started_ = true; + return true; } else { - while (num_workorders_generated_ < input_relation_block_ids_.size()) { - container->addNormalWorkOrder( - new BuildHashWorkOrder( - query_id_, - input_relation_, - join_key_attributes_, - any_join_key_attributes_nullable_, - input_relation_block_ids_[num_workorders_generated_], - hash_table, - storage_manager, - CreateLIPFilterBuilderHelper(lip_deployment_index_, query_context)), - op_index_); - ++num_workorders_generated_; + for (std::size_t part_id = 0; part_id < num_partitions_; ++part_id) { + JoinHashTable *hash_table = query_context->getJoinHashTable(hash_table_index_, part_id); + while (num_workorders_generated_[part_id] < + input_relation_block_ids_[part_id].size()) { + container->addNormalWorkOrder( + new BuildHashWorkOrder(query_id_, input_relation_, join_key_attributes_, any_join_key_attributes_nullable_, + num_partitions_, part_id, + input_relation_block_ids_[part_id][num_workorders_generated_[part_id]], hash_table, + storage_manager, CreateLIPFilterBuilderHelper(lip_deployment_index_, query_context)), + op_index_); + ++num_workorders_generated_[part_id]; + } } return done_feeding_input_relation_; } @@ -109,25 +107,31 @@ bool BuildHashOperator::getAllWorkOrders( bool BuildHashOperator::getAllWorkOrderProtos(WorkOrderProtosContainer *container) { if (input_relation_is_stored_) { - if (!started_) { - for (const block_id input_block_id : input_relation_block_ids_) { - container->addWorkOrderProto(createWorkOrderProto(input_block_id), op_index_); + if (started_) { + return true; + } + + for (std::size_t part_id = 0; part_id < num_partitions_; ++part_id) { + for (const block_id block : input_relation_block_ids_[part_id]) { + container->addWorkOrderProto(createWorkOrderProto(block, part_id), op_index_); } - started_ = true; } + started_ = true; return true; } else { - while (num_workorders_generated_ < input_relation_block_ids_.size()) { - container->addWorkOrderProto( - createWorkOrderProto(input_relation_block_ids_[num_workorders_generated_]), - op_index_); - ++num_workorders_generated_; + for (std::size_t part_id = 0; part_id < num_partitions_; ++part_id) { + while (num_workorders_generated_[part_id] < input_relation_block_ids_[part_id].size()) { + container->addWorkOrderProto( + createWorkOrderProto(input_relation_block_ids_[part_id][num_workorders_generated_[part_id]], part_id), + op_index_); + ++num_workorders_generated_[part_id]; + } } return done_feeding_input_relation_; } } -serialization::WorkOrder* BuildHashOperator::createWorkOrderProto(const block_id block) { +serialization::WorkOrder* BuildHashOperator::createWorkOrderProto(const block_id block, const partition_id part_id) { serialization::WorkOrder *proto = new serialization::WorkOrder; proto->set_work_order_type(serialization::BUILD_HASH); proto->set_query_id(query_id_); @@ -138,7 +142,9 @@ serialization::WorkOrder* BuildHashOperator::createWorkOrderProto(const block_id } proto->SetExtension(serialization::BuildHashWorkOrder::any_join_key_attributes_nullable, any_join_key_attributes_nullable_); + proto->SetExtension(serialization::BuildHashWorkOrder::num_partitions, num_partitions_); proto->SetExtension(serialization::BuildHashWorkOrder::join_hash_table_index, hash_table_index_); + proto->SetExtension(serialization::BuildHashWorkOrder::partition_id, part_id); proto->SetExtension(serialization::BuildHashWorkOrder::block_id, block); proto->SetExtension(serialization::BuildHashWorkOrder::lip_deployment_index, lip_deployment_index_); http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/5af8904d/relational_operators/BuildHashOperator.hpp ---------------------------------------------------------------------- diff --git a/relational_operators/BuildHashOperator.hpp b/relational_operators/BuildHashOperator.hpp index dec121c..e163954 100644 --- a/relational_operators/BuildHashOperator.hpp +++ b/relational_operators/BuildHashOperator.hpp @@ -27,6 +27,7 @@ #include "catalog/CatalogRelation.hpp" #include "catalog/CatalogTypedefs.hpp" +#include "catalog/PartitionScheme.hpp" #include "query_execution/QueryContext.hpp" #include "relational_operators/RelationalOperator.hpp" #include "relational_operators/WorkOrder.hpp" @@ -75,6 +76,8 @@ class BuildHashOperator : public RelationalOperator { * @param join_key_attributes The IDs of equijoin attributes in * input_relation. * @param any_join_key_attributes_nullable If any attribute is nullable. + * @param num_partitions The number of partitions in 'input_relation'. If no + * partitions, it is one. * @param hash_table_index The index of the JoinHashTable in QueryContext. * The HashTable's key Type(s) should be the Type(s) of the * join_key_attributes in input_relation. @@ -84,17 +87,30 @@ class BuildHashOperator : public RelationalOperator { const bool input_relation_is_stored, const std::vector &join_key_attributes, const bool any_join_key_attributes_nullable, + const std::size_t num_partitions, const QueryContext::join_hash_table_id hash_table_index) - : RelationalOperator(query_id), - input_relation_(input_relation), - input_relation_is_stored_(input_relation_is_stored), - join_key_attributes_(join_key_attributes), - any_join_key_attributes_nullable_(any_join_key_attributes_nullable), - hash_table_index_(hash_table_index), - input_relation_block_ids_(input_relation_is_stored ? input_relation.getBlocksSnapshot() - : std::vector()), - num_workorders_generated_(0), - started_(false) {} + : RelationalOperator(query_id), + input_relation_(input_relation), + input_relation_is_stored_(input_relation_is_stored), + join_key_attributes_(join_key_attributes), + any_join_key_attributes_nullable_(any_join_key_attributes_nullable), + num_partitions_(num_partitions), + hash_table_index_(hash_table_index), + input_relation_block_ids_(num_partitions), + num_workorders_generated_(num_partitions), + started_(false) { + if (input_relation_is_stored) { + if (input_relation.hasPartitionScheme()) { + const PartitionScheme &part_scheme = *input_relation.getPartitionScheme(); + for (std::size_t part_id = 0; part_id < num_partitions_; ++part_id) { + input_relation_block_ids_[part_id] = part_scheme.getBlocksInPartition(part_id); + } + } else { + // No partition. + input_relation_block_ids_[0] = input_relation.getBlocksSnapshot(); + } + } + } ~BuildHashOperator() override {} @@ -116,7 +132,7 @@ class BuildHashOperator : public RelationalOperator { void feedInputBlock(const block_id input_block_id, const relation_id input_relation_id, const partition_id part_id) override { - input_relation_block_ids_.push_back(input_block_id); + input_relation_block_ids_[part_id].push_back(input_block_id); } private: @@ -124,17 +140,20 @@ class BuildHashOperator : public RelationalOperator { * @brief Create Work Order proto. * * @param block The block id used in the Work Order. + * @param part_id The partition id of 'input_relation_'. **/ - serialization::WorkOrder* createWorkOrderProto(const block_id block); + serialization::WorkOrder* createWorkOrderProto(const block_id block, const partition_id part_id); const CatalogRelation &input_relation_; const bool input_relation_is_stored_; const std::vector join_key_attributes_; const bool any_join_key_attributes_nullable_; + const std::size_t num_partitions_; const QueryContext::join_hash_table_id hash_table_index_; - std::vector input_relation_block_ids_; - std::vector::size_type num_workorders_generated_; + // The index is the partition id. + std::vector input_relation_block_ids_; + std::vector num_workorders_generated_; bool started_; @@ -154,6 +173,9 @@ class BuildHashWorkOrder : public WorkOrder { * @param join_key_attributes The IDs of equijoin attributes in * input_relation. * @param any_join_key_attributes_nullable If any attribute is nullable. + * @param num_partitions The number of partitions in 'input_relation'. If no + * partitions, it is one. + * @param part_id The partition id of 'input_relation'. * @param build_block_id The block id. * @param hash_table The JoinHashTable to use. * @param storage_manager The StorageManager to use. @@ -163,6 +185,8 @@ class BuildHashWorkOrder : public WorkOrder { const CatalogRelationSchema &input_relation, const std::vector &join_key_attributes, const bool any_join_key_attributes_nullable, + const std::size_t num_partitions, + const partition_id part_id, const block_id build_block_id, JoinHashTable *hash_table, StorageManager *storage_manager, @@ -171,6 +195,8 @@ class BuildHashWorkOrder : public WorkOrder { input_relation_(input_relation), join_key_attributes_(join_key_attributes), any_join_key_attributes_nullable_(any_join_key_attributes_nullable), + num_partitions_(num_partitions), + part_id_(part_id), build_block_id_(build_block_id), hash_table_(DCHECK_NOTNULL(hash_table)), storage_manager_(DCHECK_NOTNULL(storage_manager)), @@ -184,6 +210,9 @@ class BuildHashWorkOrder : public WorkOrder { * @param join_key_attributes The IDs of equijoin attributes in * input_relation. * @param any_join_key_attributes_nullable If any attribute is nullable. + * @param num_partitions The number of partitions in 'input_relation'. If no + * partitions, it is one. + * @param part_id The partition id of 'input_relation'. * @param build_block_id The block id. * @param hash_table The JoinHashTable to use. * @param storage_manager The StorageManager to use. @@ -193,6 +222,8 @@ class BuildHashWorkOrder : public WorkOrder { const CatalogRelationSchema &input_relation, std::vector &&join_key_attributes, const bool any_join_key_attributes_nullable, + const std::size_t num_partitions, + const partition_id part_id, const block_id build_block_id, JoinHashTable *hash_table, StorageManager *storage_manager, @@ -201,6 +232,8 @@ class BuildHashWorkOrder : public WorkOrder { input_relation_(input_relation), join_key_attributes_(std::move(join_key_attributes)), any_join_key_attributes_nullable_(any_join_key_attributes_nullable), + num_partitions_(num_partitions), + part_id_(part_id), build_block_id_(build_block_id), hash_table_(DCHECK_NOTNULL(hash_table)), storage_manager_(DCHECK_NOTNULL(storage_manager)), @@ -214,10 +247,30 @@ class BuildHashWorkOrder : public WorkOrder { void execute() override; + /** + * @brief Get the number of partitions. + * + * @return The number of partitions. + */ + std::size_t num_partitions() const { + return num_partitions_; + } + + /** + * @brief Get the partition id. + * + * @return The partition id. + */ + partition_id getPartitionId() const { + return part_id_; + } + private: const CatalogRelationSchema &input_relation_; const std::vector join_key_attributes_; const bool any_join_key_attributes_nullable_; + const std::size_t num_partitions_; + const partition_id part_id_; const block_id build_block_id_; JoinHashTable *hash_table_; http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/5af8904d/relational_operators/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/relational_operators/CMakeLists.txt b/relational_operators/CMakeLists.txt index 526820e..66ea2d1 100644 --- a/relational_operators/CMakeLists.txt +++ b/relational_operators/CMakeLists.txt @@ -40,8 +40,8 @@ add_library(quickstep_relationaloperators_AggregationOperator AggregationOperato add_library(quickstep_relationaloperators_BuildHashOperator BuildHashOperator.cpp BuildHashOperator.hpp) add_library(quickstep_relationaloperators_CreateIndexOperator CreateIndexOperator.cpp CreateIndexOperator.hpp) add_library(quickstep_relationaloperators_CreateTableOperator CreateTableOperator.cpp CreateTableOperator.hpp) -add_library(quickstep_relationaloperators_DestroyAggregationStateOperator - DestroyAggregationStateOperator.cpp +add_library(quickstep_relationaloperators_DestroyAggregationStateOperator + DestroyAggregationStateOperator.cpp DestroyAggregationStateOperator.hpp) add_library(quickstep_relationaloperators_DeleteOperator DeleteOperator.cpp DeleteOperator.hpp) add_library(quickstep_relationaloperators_DestroyHashOperator DestroyHashOperator.cpp DestroyHashOperator.hpp) @@ -99,6 +99,7 @@ target_link_libraries(quickstep_relationaloperators_BuildHashOperator glog quickstep_catalog_CatalogRelation quickstep_catalog_CatalogTypedefs + quickstep_catalog_PartitionScheme quickstep_queryexecution_QueryContext quickstep_queryexecution_WorkOrderProtosContainer quickstep_queryexecution_WorkOrdersContainer @@ -162,6 +163,7 @@ target_link_libraries(quickstep_relationaloperators_DestroyAggregationStateOpera tmb) target_link_libraries(quickstep_relationaloperators_DestroyHashOperator glog + quickstep_catalog_CatalogTypedefs quickstep_queryexecution_QueryContext quickstep_queryexecution_WorkOrderProtosContainer quickstep_queryexecution_WorkOrdersContainer @@ -204,6 +206,7 @@ target_link_libraries(quickstep_relationaloperators_HashJoinOperator quickstep_catalog_CatalogRelation quickstep_catalog_CatalogRelationSchema quickstep_catalog_CatalogTypedefs + quickstep_catalog_PartitionScheme quickstep_expressions_predicate_Predicate quickstep_expressions_scalar_Scalar quickstep_queryexecution_QueryContext @@ -796,4 +799,4 @@ add_test(TextScanOperator_faulty_unittest TextScanOperator_unittest ${TEXT_SCAN_FAULTY_INPUT_FILE} ${TEXT_SCAN_FAULTY_GOLDEN_OUTPUT_FILE} - ${TEXT_SCAN_FAULTY_FAILURE_OUTPUT_FILE}) \ No newline at end of file + ${TEXT_SCAN_FAULTY_FAILURE_OUTPUT_FILE}) http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/5af8904d/relational_operators/DestroyHashOperator.cpp ---------------------------------------------------------------------- diff --git a/relational_operators/DestroyHashOperator.cpp b/relational_operators/DestroyHashOperator.cpp index 96d2561..4827ef5 100644 --- a/relational_operators/DestroyHashOperator.cpp +++ b/relational_operators/DestroyHashOperator.cpp @@ -35,31 +35,36 @@ bool DestroyHashOperator::getAllWorkOrders( const tmb::client_id scheduler_client_id, tmb::MessageBus *bus) { if (blocking_dependencies_met_ && !work_generated_) { + for (std::size_t part_id = 0; part_id < num_partitions_; ++part_id) { + container->addNormalWorkOrder( + new DestroyHashWorkOrder(query_id_, hash_table_index_, part_id, query_context), + op_index_); + } work_generated_ = true; - container->addNormalWorkOrder( - new DestroyHashWorkOrder(query_id_, hash_table_index_, query_context), - op_index_); } return work_generated_; } bool DestroyHashOperator::getAllWorkOrderProtos(WorkOrderProtosContainer *container) { if (blocking_dependencies_met_ && !work_generated_) { - work_generated_ = true; + for (std::size_t part_id = 0; part_id < num_partitions_; ++part_id) { + serialization::WorkOrder *proto = new serialization::WorkOrder; + proto->set_work_order_type(serialization::DESTROY_HASH); + proto->set_query_id(query_id_); + proto->SetExtension(serialization::DestroyHashWorkOrder::join_hash_table_index, hash_table_index_); + proto->SetExtension(serialization::DestroyHashWorkOrder::partition_id, part_id); - serialization::WorkOrder *proto = new serialization::WorkOrder; - proto->set_work_order_type(serialization::DESTROY_HASH); - proto->set_query_id(query_id_); - proto->SetExtension(serialization::DestroyHashWorkOrder::join_hash_table_index, hash_table_index_); + container->addWorkOrderProto(proto, op_index_); + } - container->addWorkOrderProto(proto, op_index_); + work_generated_ = true; } return work_generated_; } void DestroyHashWorkOrder::execute() { - query_context_->destroyJoinHashTable(hash_table_index_); + query_context_->destroyJoinHashTable(hash_table_index_, part_id_); } } // namespace quickstep http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/5af8904d/relational_operators/DestroyHashOperator.hpp ---------------------------------------------------------------------- diff --git a/relational_operators/DestroyHashOperator.hpp b/relational_operators/DestroyHashOperator.hpp index d402766..8a1fabd 100644 --- a/relational_operators/DestroyHashOperator.hpp +++ b/relational_operators/DestroyHashOperator.hpp @@ -22,6 +22,7 @@ #include +#include "catalog/CatalogTypedefs.hpp" #include "query_execution/QueryContext.hpp" #include "relational_operators/RelationalOperator.hpp" #include "relational_operators/WorkOrder.hpp" @@ -52,11 +53,14 @@ class DestroyHashOperator : public RelationalOperator { * @brief Constructor. * * @param query_id The ID of the query to which this operator belongs. + * @param num_partitions The number of partitions. * @param hash_table_index The index of the JoinHashTable in QueryContext. **/ DestroyHashOperator(const std::size_t query_id, + const std::size_t num_partitions, const QueryContext::join_hash_table_id hash_table_index) : RelationalOperator(query_id), + num_partitions_(num_partitions), hash_table_index_(hash_table_index), work_generated_(false) {} @@ -75,6 +79,7 @@ class DestroyHashOperator : public RelationalOperator { bool getAllWorkOrderProtos(WorkOrderProtosContainer *container) override; private: + const std::size_t num_partitions_; const QueryContext::join_hash_table_id hash_table_index_; bool work_generated_; @@ -91,13 +96,16 @@ class DestroyHashWorkOrder : public WorkOrder { * * @param query_id The ID of the query to which this WorkOrder belongs. * @param hash_table_index The index of the JoinHashTable in QueryContext. + * @param part_id The partition id. * @param query_context The QueryContext to use. **/ DestroyHashWorkOrder(const std::size_t query_id, const QueryContext::join_hash_table_id hash_table_index, + const partition_id part_id, QueryContext *query_context) : WorkOrder(query_id), hash_table_index_(hash_table_index), + part_id_(part_id), query_context_(DCHECK_NOTNULL(query_context)) {} ~DestroyHashWorkOrder() override {} @@ -106,6 +114,7 @@ class DestroyHashWorkOrder : public WorkOrder { private: const QueryContext::join_hash_table_id hash_table_index_; + const partition_id part_id_; QueryContext *query_context_; DISALLOW_COPY_AND_ASSIGN(DestroyHashWorkOrder); http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/5af8904d/relational_operators/HashJoinOperator.cpp ---------------------------------------------------------------------- diff --git a/relational_operators/HashJoinOperator.cpp b/relational_operators/HashJoinOperator.cpp index 2028046..0062b93 100644 --- a/relational_operators/HashJoinOperator.cpp +++ b/relational_operators/HashJoinOperator.cpp @@ -207,49 +207,43 @@ bool HashJoinOperator::getAllNonOuterJoinWorkOrders( query_context->getScalarGroup(selection_index_); InsertDestination *output_destination = query_context->getInsertDestination(output_destination_index_); - const JoinHashTable &hash_table = - *(query_context->getJoinHashTable(hash_table_index_)); if (probe_relation_is_stored_) { - if (!started_) { - for (const block_id probe_block_id : probe_relation_block_ids_) { + if (started_) { + return true; + } + + for (std::size_t part_id = 0; part_id < num_partitions_; ++part_id) { + const JoinHashTable &hash_table = + *(query_context->getJoinHashTable(hash_table_index_, part_id)); + + for (const block_id probe_block_id : probe_relation_block_ids_[part_id]) { container->addNormalWorkOrder( - new JoinWorkOrderClass(query_id_, - build_relation_, - probe_relation_, - join_key_attributes_, - any_join_key_attributes_nullable_, - probe_block_id, - residual_predicate, - selection, - hash_table, - output_destination, - storage_manager, + new JoinWorkOrderClass(query_id_, build_relation_, probe_relation_, join_key_attributes_, + any_join_key_attributes_nullable_, num_partitions_, part_id, probe_block_id, + residual_predicate, selection, hash_table, output_destination, storage_manager, CreateLIPFilterAdaptiveProberHelper(lip_deployment_index_, query_context)), op_index_); } - started_ = true; } - return started_; + started_ = true; + return true; } else { - while (num_workorders_generated_ < probe_relation_block_ids_.size()) { - container->addNormalWorkOrder( - new JoinWorkOrderClass( - query_id_, - build_relation_, - probe_relation_, - join_key_attributes_, - any_join_key_attributes_nullable_, - probe_relation_block_ids_[num_workorders_generated_], - residual_predicate, - selection, - hash_table, - output_destination, - storage_manager, - CreateLIPFilterAdaptiveProberHelper(lip_deployment_index_, query_context)), - op_index_); - ++num_workorders_generated_; - } // end while + for (std::size_t part_id = 0; part_id < num_partitions_; ++part_id) { + const JoinHashTable &hash_table = + *(query_context->getJoinHashTable(hash_table_index_, part_id)); + + while (num_workorders_generated_[part_id] < probe_relation_block_ids_[part_id].size()) { + container->addNormalWorkOrder( + new JoinWorkOrderClass(query_id_, build_relation_, probe_relation_, join_key_attributes_, + any_join_key_attributes_nullable_, num_partitions_, part_id, + probe_relation_block_ids_[part_id][num_workorders_generated_[part_id]], + residual_predicate, selection, hash_table, output_destination, storage_manager, + CreateLIPFilterAdaptiveProberHelper(lip_deployment_index_, query_context)), + op_index_); + ++num_workorders_generated_[part_id]; + } // end while + } // end for return done_feeding_input_relation_; } // end else (probe_relation_is_stored_) } // end if (blocking_dependencies_met_) @@ -269,49 +263,44 @@ bool HashJoinOperator::getAllOuterJoinWorkOrders( InsertDestination *output_destination = query_context->getInsertDestination(output_destination_index_); - const JoinHashTable &hash_table = - *(query_context->getJoinHashTable(hash_table_index_)); if (probe_relation_is_stored_) { - if (!started_) { - for (const block_id probe_block_id : probe_relation_block_ids_) { + if (started_) { + return true; + } + + for (std::size_t part_id = 0; part_id < num_partitions_; ++part_id) { + const JoinHashTable &hash_table = + *(query_context->getJoinHashTable(hash_table_index_, part_id)); + + for (const block_id probe_block_id : probe_relation_block_ids_[part_id]) { container->addNormalWorkOrder( - new HashOuterJoinWorkOrder( - query_id_, - build_relation_, - probe_relation_, - join_key_attributes_, - any_join_key_attributes_nullable_, - probe_block_id, - selection, - is_selection_on_build_, - hash_table, - output_destination, - storage_manager, - CreateLIPFilterAdaptiveProberHelper(lip_deployment_index_, query_context)), + new HashOuterJoinWorkOrder(query_id_, build_relation_, probe_relation_, join_key_attributes_, + any_join_key_attributes_nullable_, num_partitions_, part_id, probe_block_id, + selection, is_selection_on_build_, hash_table, output_destination, + storage_manager, + CreateLIPFilterAdaptiveProberHelper(lip_deployment_index_, query_context)), op_index_); } - started_ = true; } - return started_; + started_ = true; + return true; } else { - while (num_workorders_generated_ < probe_relation_block_ids_.size()) { - container->addNormalWorkOrder( - new HashOuterJoinWorkOrder( - query_id_, - build_relation_, - probe_relation_, - join_key_attributes_, - any_join_key_attributes_nullable_, - probe_relation_block_ids_[num_workorders_generated_], - selection, - is_selection_on_build_, - hash_table, - output_destination, - storage_manager, - CreateLIPFilterAdaptiveProberHelper(lip_deployment_index_, query_context)), - op_index_); - ++num_workorders_generated_; + for (std::size_t part_id = 0; part_id < num_partitions_; ++part_id) { + const JoinHashTable &hash_table = + *(query_context->getJoinHashTable(hash_table_index_, part_id)); + + while (num_workorders_generated_[part_id] < probe_relation_block_ids_[part_id].size()) { + container->addNormalWorkOrder( + new HashOuterJoinWorkOrder(query_id_, build_relation_, probe_relation_, join_key_attributes_, + any_join_key_attributes_nullable_, num_partitions_, part_id, + probe_relation_block_ids_[part_id][num_workorders_generated_[part_id]], + selection, is_selection_on_build_, hash_table, output_destination, + storage_manager, + CreateLIPFilterAdaptiveProberHelper(lip_deployment_index_, query_context)), + op_index_); + ++num_workorders_generated_[part_id]; + } } return done_feeding_input_relation_; } // end else (probe_relation_is_stored_) @@ -343,22 +332,29 @@ bool HashJoinOperator::getAllNonOuterJoinWorkOrderProtos( } if (probe_relation_is_stored_) { - if (!started_) { - for (const block_id probe_block_id : probe_relation_block_ids_) { + if (started_) { + return true; + } + + for (std::size_t part_id = 0; part_id < num_partitions_; ++part_id) { + for (const block_id probe_block_id : probe_relation_block_ids_[part_id]) { container->addWorkOrderProto( - createNonOuterJoinWorkOrderProto(hash_join_type, probe_block_id), + createNonOuterJoinWorkOrderProto(hash_join_type, probe_block_id, part_id), op_index_); } - started_ = true; } + started_ = true; return true; } else { - while (num_workorders_generated_ < probe_relation_block_ids_.size()) { - container->addWorkOrderProto( - createNonOuterJoinWorkOrderProto(hash_join_type, - probe_relation_block_ids_[num_workorders_generated_]), - op_index_); - ++num_workorders_generated_; + for (std::size_t part_id = 0; part_id < num_partitions_; ++part_id) { + while (num_workorders_generated_[part_id] < probe_relation_block_ids_[part_id].size()) { + container->addWorkOrderProto( + createNonOuterJoinWorkOrderProto(hash_join_type, + probe_relation_block_ids_[part_id][num_workorders_generated_[part_id]], + part_id), + op_index_); + ++num_workorders_generated_[part_id]; + } } return done_feeding_input_relation_; @@ -367,7 +363,7 @@ bool HashJoinOperator::getAllNonOuterJoinWorkOrderProtos( serialization::WorkOrder* HashJoinOperator::createNonOuterJoinWorkOrderProto( const serialization::HashJoinWorkOrder::HashJoinWorkOrderType hash_join_type, - const block_id block) { + const block_id block, const partition_id part_id) { serialization::WorkOrder *proto = new serialization::WorkOrder; proto->set_work_order_type(serialization::HASH_JOIN); proto->set_query_id(query_id_); @@ -380,8 +376,10 @@ serialization::WorkOrder* HashJoinOperator::createNonOuterJoinWorkOrderProto( } proto->SetExtension(serialization::HashJoinWorkOrder::any_join_key_attributes_nullable, any_join_key_attributes_nullable_); + proto->SetExtension(serialization::HashJoinWorkOrder::num_partitions, num_partitions_); proto->SetExtension(serialization::HashJoinWorkOrder::insert_destination_index, output_destination_index_); proto->SetExtension(serialization::HashJoinWorkOrder::join_hash_table_index, hash_table_index_); + proto->SetExtension(serialization::HashJoinWorkOrder::partition_id, part_id); proto->SetExtension(serialization::HashJoinWorkOrder::selection_index, selection_index_); proto->SetExtension(serialization::HashJoinWorkOrder::block_id, block); proto->SetExtension(serialization::HashJoinWorkOrder::residual_predicate_index, residual_predicate_index_); @@ -397,26 +395,34 @@ bool HashJoinOperator::getAllOuterJoinWorkOrderProtos(WorkOrderProtosContainer * } if (probe_relation_is_stored_) { - if (!started_) { - for (const block_id probe_block_id : probe_relation_block_ids_) { - container->addWorkOrderProto(createOuterJoinWorkOrderProto(probe_block_id), op_index_); + if (started_) { + return true; + } + + for (std::size_t part_id = 0; part_id < num_partitions_; ++part_id) { + for (const block_id probe_block_id : probe_relation_block_ids_[part_id]) { + container->addWorkOrderProto(createOuterJoinWorkOrderProto(probe_block_id, part_id), op_index_); } - started_ = true; } + started_ = true; return true; } else { - while (num_workorders_generated_ < probe_relation_block_ids_.size()) { - container->addWorkOrderProto( - createOuterJoinWorkOrderProto(probe_relation_block_ids_[num_workorders_generated_]), - op_index_); - ++num_workorders_generated_; + for (std::size_t part_id = 0; part_id < num_partitions_; ++part_id) { + while (num_workorders_generated_[part_id] < probe_relation_block_ids_[part_id].size()) { + container->addWorkOrderProto( + createOuterJoinWorkOrderProto(probe_relation_block_ids_[part_id][num_workorders_generated_[part_id]], + part_id), + op_index_); + ++num_workorders_generated_[part_id]; + } } return done_feeding_input_relation_; } } -serialization::WorkOrder* HashJoinOperator::createOuterJoinWorkOrderProto(const block_id block) { +serialization::WorkOrder* HashJoinOperator::createOuterJoinWorkOrderProto(const block_id block, + const partition_id part_id) { serialization::WorkOrder *proto = new serialization::WorkOrder; proto->set_work_order_type(serialization::HASH_JOIN); proto->set_query_id(query_id_); @@ -430,10 +436,12 @@ serialization::WorkOrder* HashJoinOperator::createOuterJoinWorkOrderProto(const } proto->SetExtension(serialization::HashJoinWorkOrder::any_join_key_attributes_nullable, any_join_key_attributes_nullable_); + proto->SetExtension(serialization::HashJoinWorkOrder::num_partitions, num_partitions_); proto->SetExtension(serialization::HashJoinWorkOrder::insert_destination_index, output_destination_index_); proto->SetExtension(serialization::HashJoinWorkOrder::join_hash_table_index, hash_table_index_); proto->SetExtension(serialization::HashJoinWorkOrder::selection_index, selection_index_); proto->SetExtension(serialization::HashJoinWorkOrder::block_id, block); + proto->SetExtension(serialization::HashJoinWorkOrder::partition_id, part_id); proto->SetExtension(serialization::HashJoinWorkOrder::lip_deployment_index, lip_deployment_index_); for (const bool is_attribute_on_build : is_selection_on_build_) { http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/5af8904d/relational_operators/HashJoinOperator.hpp ---------------------------------------------------------------------- diff --git a/relational_operators/HashJoinOperator.hpp b/relational_operators/HashJoinOperator.hpp index 98c87bf..c117d0a 100644 --- a/relational_operators/HashJoinOperator.hpp +++ b/relational_operators/HashJoinOperator.hpp @@ -28,6 +28,7 @@ #include "catalog/CatalogRelation.hpp" #include "catalog/CatalogTypedefs.hpp" +#include "catalog/PartitionScheme.hpp" #include "query_execution/QueryContext.hpp" #include "relational_operators/RelationalOperator.hpp" #include "relational_operators/WorkOrder.hpp" @@ -100,6 +101,8 @@ class HashJoinOperator : public RelationalOperator { * @param join_key_attributes The IDs of equijoin attributes in * probe_relation. * @param any_join_key_attributes_nullable If any attribute is nullable. + * @param num_partitions The number of partitions in 'input_relation'. If no + * partitions, it is one. * @param output_relation The output relation. * @param output_destination_index The index of the InsertDestination in the * QueryContext to insert the join results. @@ -125,6 +128,7 @@ class HashJoinOperator : public RelationalOperator { const bool probe_relation_is_stored, const std::vector &join_key_attributes, const bool any_join_key_attributes_nullable, + const std::size_t num_partitions, const CatalogRelation &output_relation, const QueryContext::insert_destination_id output_destination_index, const QueryContext::join_hash_table_id hash_table_index, @@ -138,6 +142,7 @@ class HashJoinOperator : public RelationalOperator { probe_relation_is_stored_(probe_relation_is_stored), join_key_attributes_(join_key_attributes), any_join_key_attributes_nullable_(any_join_key_attributes_nullable), + num_partitions_(num_partitions), output_relation_(output_relation), output_destination_index_(output_destination_index), hash_table_index_(hash_table_index), @@ -147,14 +152,24 @@ class HashJoinOperator : public RelationalOperator { ? std::vector() : *is_selection_on_build), join_type_(join_type), - probe_relation_block_ids_(probe_relation_is_stored - ? probe_relation.getBlocksSnapshot() - : std::vector()), - num_workorders_generated_(0), + probe_relation_block_ids_(num_partitions), + num_workorders_generated_(num_partitions), started_(false) { DCHECK(join_type != JoinType::kLeftOuterJoin || (is_selection_on_build != nullptr && residual_predicate_index == QueryContext::kInvalidPredicateId)); + + if (probe_relation_is_stored) { + if (probe_relation.hasPartitionScheme()) { + const PartitionScheme &part_scheme = *probe_relation.getPartitionScheme(); + for (std::size_t part_id = 0; part_id < num_partitions_; ++part_id) { + probe_relation_block_ids_[part_id] = part_scheme.getBlocksInPartition(part_id); + } + } else { + // No partition. + probe_relation_block_ids_[0] = probe_relation.getBlocksSnapshot(); + } + } } ~HashJoinOperator() override {} @@ -192,8 +207,9 @@ class HashJoinOperator : public RelationalOperator { void feedInputBlock(const block_id input_block_id, const relation_id input_relation_id, const partition_id part_id) override { - DCHECK(input_relation_id == probe_relation_.getID()); - probe_relation_block_ids_.push_back(input_block_id); + DCHECK_EQ(probe_relation_.getID(), input_relation_id); + + probe_relation_block_ids_[part_id].push_back(input_block_id); } QueryContext::insert_destination_id getInsertDestinationID() const override { @@ -229,7 +245,7 @@ class HashJoinOperator : public RelationalOperator { serialization::WorkOrder* createNonOuterJoinWorkOrderProto( const serialization::HashJoinWorkOrder::HashJoinWorkOrderType hash_join_type, - const block_id block); + const block_id block, const partition_id part_id); bool getAllOuterJoinWorkOrderProtos(WorkOrderProtosContainer *container); @@ -238,13 +254,14 @@ class HashJoinOperator : public RelationalOperator { * * @param block The block id used in the Work Order. **/ - serialization::WorkOrder* createOuterJoinWorkOrderProto(const block_id block); + serialization::WorkOrder* createOuterJoinWorkOrderProto(const block_id block, const partition_id part_id); const CatalogRelation &build_relation_; const CatalogRelation &probe_relation_; const bool probe_relation_is_stored_; const std::vector join_key_attributes_; const bool any_join_key_attributes_nullable_; + const std::size_t num_partitions_; const CatalogRelation &output_relation_; const QueryContext::insert_destination_id output_destination_index_; const QueryContext::join_hash_table_id hash_table_index_; @@ -253,8 +270,9 @@ class HashJoinOperator : public RelationalOperator { const std::vector is_selection_on_build_; const JoinType join_type_; - std::vector probe_relation_block_ids_; - std::size_t num_workorders_generated_; + // The index is the partition id. + std::vector probe_relation_block_ids_; + std::vector num_workorders_generated_; bool started_; @@ -277,6 +295,9 @@ class HashInnerJoinWorkOrder : public WorkOrder { * @param join_key_attributes The IDs of equijoin attributes in \c * probe_relation. * @param any_join_key_attributes_nullable If any attribute is nullable. + * @param num_partitions The number of partitions in 'probe_relation'. If no + * partitions, it is one. + * @param part_id The partition id of 'probe_relation'. * @param lookup_block_id The block id of the probe_relation. * @param residual_predicate If non-null, apply as an additional filter to * pairs of tuples that match the hash-join (i.e. key equality) @@ -296,6 +317,8 @@ class HashInnerJoinWorkOrder : public WorkOrder { const CatalogRelationSchema &probe_relation, const std::vector &join_key_attributes, const bool any_join_key_attributes_nullable, + const std::size_t num_partitions, + const partition_id part_id, const block_id lookup_block_id, const Predicate *residual_predicate, const std::vector> &selection, @@ -308,6 +331,8 @@ class HashInnerJoinWorkOrder : public WorkOrder { probe_relation_(probe_relation), join_key_attributes_(join_key_attributes), any_join_key_attributes_nullable_(any_join_key_attributes_nullable), + num_partitions_(num_partitions), + part_id_(part_id), block_id_(lookup_block_id), residual_predicate_(residual_predicate), selection_(selection), @@ -327,6 +352,9 @@ class HashInnerJoinWorkOrder : public WorkOrder { * @param join_key_attributes The IDs of equijoin attributes in \c * probe_relation. * @param any_join_key_attributes_nullable If any attribute is nullable. + * @param num_partitions The number of partitions in 'probe_relation'. If no + * partitions, it is one. + * @param part_id The partition id of 'probe_relation'. * @param lookup_block_id The block id of the probe_relation. * @param residual_predicate If non-null, apply as an additional filter to * pairs of tuples that match the hash-join (i.e. key equality) @@ -346,6 +374,8 @@ class HashInnerJoinWorkOrder : public WorkOrder { const CatalogRelationSchema &probe_relation, std::vector &&join_key_attributes, const bool any_join_key_attributes_nullable, + const std::size_t num_partitions, + const partition_id part_id, const block_id lookup_block_id, const Predicate *residual_predicate, const std::vector> &selection, @@ -358,6 +388,8 @@ class HashInnerJoinWorkOrder : public WorkOrder { probe_relation_(probe_relation), join_key_attributes_(std::move(join_key_attributes)), any_join_key_attributes_nullable_(any_join_key_attributes_nullable), + num_partitions_(num_partitions), + part_id_(part_id), block_id_(lookup_block_id), residual_predicate_(residual_predicate), selection_(selection), @@ -378,11 +410,31 @@ class HashInnerJoinWorkOrder : public WorkOrder { **/ void execute() override; + /** + * @brief Get the number of partitions. + * + * @return The number of partitions. + */ + std::size_t num_partitions() const { + return num_partitions_; + } + + /** + * @brief Get the partition id. + * + * @return The partition id. + */ + partition_id getPartitionId() const { + return part_id_; + } + private: const CatalogRelationSchema &build_relation_; const CatalogRelationSchema &probe_relation_; const std::vector join_key_attributes_; const bool any_join_key_attributes_nullable_; + const std::size_t num_partitions_; + const partition_id part_id_; const block_id block_id_; const Predicate *residual_predicate_; const std::vector> &selection_; @@ -413,6 +465,9 @@ class HashSemiJoinWorkOrder : public WorkOrder { * @param join_key_attributes The IDs of equijoin attributes in \c * probe_relation. * @param any_join_key_attributes_nullable If any attribute is nullable. + * @param num_partitions The number of partitions in 'probe_relation'. If no + * partitions, it is one. + * @param part_id The partition id of 'probe_relation'. * @param lookup_block_id The block id of the probe_relation. * @param residual_predicate If non-null, apply as an additional filter to * pairs of tuples that match the hash-join (i.e. key equality) @@ -432,6 +487,8 @@ class HashSemiJoinWorkOrder : public WorkOrder { const CatalogRelationSchema &probe_relation, const std::vector &join_key_attributes, const bool any_join_key_attributes_nullable, + const std::size_t num_partitions, + const partition_id part_id, const block_id lookup_block_id, const Predicate *residual_predicate, const std::vector> &selection, @@ -444,6 +501,8 @@ class HashSemiJoinWorkOrder : public WorkOrder { probe_relation_(probe_relation), join_key_attributes_(join_key_attributes), any_join_key_attributes_nullable_(any_join_key_attributes_nullable), + num_partitions_(num_partitions), + part_id_(part_id), block_id_(lookup_block_id), residual_predicate_(residual_predicate), selection_(selection), @@ -463,6 +522,9 @@ class HashSemiJoinWorkOrder : public WorkOrder { * @param join_key_attributes The IDs of equijoin attributes in \c * probe_relation. * @param any_join_key_attributes_nullable If any attribute is nullable. + * @param num_partitions The number of partitions in 'probe_relation'. If no + * partitions, it is one. + * @param part_id The partition id of 'probe_relation'. * @param lookup_block_id The block id of the probe_relation. * @param residual_predicate If non-null, apply as an additional filter to * pairs of tuples that match the hash-join (i.e. key equality) @@ -482,6 +544,8 @@ class HashSemiJoinWorkOrder : public WorkOrder { const CatalogRelationSchema &probe_relation, std::vector &&join_key_attributes, const bool any_join_key_attributes_nullable, + const std::size_t num_partitions, + const partition_id part_id, const block_id lookup_block_id, const Predicate *residual_predicate, const std::vector> &selection, @@ -494,6 +558,8 @@ class HashSemiJoinWorkOrder : public WorkOrder { probe_relation_(probe_relation), join_key_attributes_(std::move(join_key_attributes)), any_join_key_attributes_nullable_(any_join_key_attributes_nullable), + num_partitions_(num_partitions), + part_id_(part_id), block_id_(lookup_block_id), residual_predicate_(residual_predicate), selection_(selection), @@ -506,6 +572,24 @@ class HashSemiJoinWorkOrder : public WorkOrder { void execute() override; + /** + * @brief Get the number of partitions. + * + * @return The number of partitions. + */ + std::size_t num_partitions() const { + return num_partitions_; + } + + /** + * @brief Get the partition id. + * + * @return The partition id. + */ + partition_id getPartitionId() const { + return part_id_; + } + private: void executeWithoutResidualPredicate(); @@ -515,6 +599,8 @@ class HashSemiJoinWorkOrder : public WorkOrder { const CatalogRelationSchema &probe_relation_; const std::vector join_key_attributes_; const bool any_join_key_attributes_nullable_; + const std::size_t num_partitions_; + const partition_id part_id_; const block_id block_id_; const Predicate *residual_predicate_; const std::vector> &selection_; @@ -545,6 +631,9 @@ class HashAntiJoinWorkOrder : public WorkOrder { * @param join_key_attributes The IDs of equijoin attributes in \c * probe_relation. * @param any_join_key_attributes_nullable If any attribute is nullable. + * @param num_partitions The number of partitions in 'probe_relation'. If no + * partitions, it is one. + * @param part_id The partition id of 'probe_relation'. * @param lookup_block_id The block id of the probe_relation. * @param residual_predicate If non-null, apply as an additional filter to * pairs of tuples that match the hash-join (i.e. key equality) @@ -564,6 +653,8 @@ class HashAntiJoinWorkOrder : public WorkOrder { const CatalogRelationSchema &probe_relation, const std::vector &join_key_attributes, const bool any_join_key_attributes_nullable, + const std::size_t num_partitions, + const partition_id part_id, const block_id lookup_block_id, const Predicate *residual_predicate, const std::vector> &selection, @@ -576,6 +667,8 @@ class HashAntiJoinWorkOrder : public WorkOrder { probe_relation_(probe_relation), join_key_attributes_(join_key_attributes), any_join_key_attributes_nullable_(any_join_key_attributes_nullable), + num_partitions_(num_partitions), + part_id_(part_id), block_id_(lookup_block_id), residual_predicate_(residual_predicate), selection_(selection), @@ -595,6 +688,9 @@ class HashAntiJoinWorkOrder : public WorkOrder { * @param join_key_attributes The IDs of equijoin attributes in \c * probe_relation. * @param any_join_key_attributes_nullable If any attribute is nullable. + * @param num_partitions The number of partitions in 'probe_relation'. If no + * partitions, it is one. + * @param part_id The partition id of 'probe_relation'. * @param lookup_block_id The block id of the probe_relation. * @param residual_predicate If non-null, apply as an additional filter to * pairs of tuples that match the hash-join (i.e. key equality) @@ -614,6 +710,8 @@ class HashAntiJoinWorkOrder : public WorkOrder { const CatalogRelationSchema &probe_relation, std::vector &&join_key_attributes, const bool any_join_key_attributes_nullable, + const std::size_t num_partitions, + const partition_id part_id, const block_id lookup_block_id, const Predicate *residual_predicate, const std::vector> &selection, @@ -626,6 +724,8 @@ class HashAntiJoinWorkOrder : public WorkOrder { probe_relation_(probe_relation), join_key_attributes_(std::move(join_key_attributes)), any_join_key_attributes_nullable_(any_join_key_attributes_nullable), + num_partitions_(num_partitions), + part_id_(part_id), block_id_(lookup_block_id), residual_predicate_(residual_predicate), selection_(selection), @@ -644,6 +744,24 @@ class HashAntiJoinWorkOrder : public WorkOrder { } } + /** + * @brief Get the number of partitions. + * + * @return The number of partitions. + */ + std::size_t num_partitions() const { + return num_partitions_; + } + + /** + * @brief Get the partition id. + * + * @return The partition id. + */ + partition_id getPartitionId() const { + return part_id_; + } + private: void executeWithoutResidualPredicate(); @@ -653,6 +771,8 @@ class HashAntiJoinWorkOrder : public WorkOrder { const CatalogRelationSchema &probe_relation_; const std::vector join_key_attributes_; const bool any_join_key_attributes_nullable_; + const std::size_t num_partitions_; + const partition_id part_id_; const block_id block_id_; const Predicate *residual_predicate_; const std::vector> &selection_; @@ -682,6 +802,9 @@ class HashOuterJoinWorkOrder : public WorkOrder { * @param join_key_attributes The IDs of equijoin attributes in \c * probe_relation. * @param any_join_key_attributes_nullable If any attribute is nullable. + * @param num_partitions The number of partitions in 'probe_relation'. If no + * partitions, it is one. + * @param part_id The partition id of 'probe_relation'. * @param lookup_block_id The block id of the probe_relation. * @param selection A list of Scalars corresponding to the relation attributes * in \c output_destination. Each Scalar is evaluated for the joined @@ -700,6 +823,8 @@ class HashOuterJoinWorkOrder : public WorkOrder { const CatalogRelationSchema &probe_relation, const std::vector &join_key_attributes, const bool any_join_key_attributes_nullable, + const std::size_t num_partitions, + const partition_id part_id, const block_id lookup_block_id, const std::vector> &selection, const std::vector &is_selection_on_build, @@ -712,6 +837,8 @@ class HashOuterJoinWorkOrder : public WorkOrder { probe_relation_(probe_relation), join_key_attributes_(join_key_attributes), any_join_key_attributes_nullable_(any_join_key_attributes_nullable), + num_partitions_(num_partitions), + part_id_(part_id), block_id_(lookup_block_id), selection_(selection), is_selection_on_build_(is_selection_on_build), @@ -731,6 +858,9 @@ class HashOuterJoinWorkOrder : public WorkOrder { * @param join_key_attributes The IDs of equijoin attributes in \c * probe_relation. * @param any_join_key_attributes_nullable If any attribute is nullable. + * @param num_partitions The number of partitions in 'probe_relation'. If no + * partitions, it is one. + * @param part_id The partition id of 'probe_relation'. * @param lookup_block_id The block id of the probe_relation. * @param selection A list of Scalars corresponding to the relation attributes * in \c output_destination. Each Scalar is evaluated for the joined @@ -748,6 +878,8 @@ class HashOuterJoinWorkOrder : public WorkOrder { const CatalogRelationSchema &probe_relation, std::vector &&join_key_attributes, const bool any_join_key_attributes_nullable, + const std::size_t num_partitions, + const partition_id part_id, const block_id lookup_block_id, const std::vector> &selection, std::vector &&is_selection_on_build, @@ -760,6 +892,8 @@ class HashOuterJoinWorkOrder : public WorkOrder { probe_relation_(probe_relation), join_key_attributes_(std::move(join_key_attributes)), any_join_key_attributes_nullable_(any_join_key_attributes_nullable), + num_partitions_(num_partitions), + part_id_(part_id), block_id_(lookup_block_id), selection_(selection), is_selection_on_build_(std::move(is_selection_on_build)), @@ -772,11 +906,31 @@ class HashOuterJoinWorkOrder : public WorkOrder { void execute() override; + /** + * @brief Get the number of partitions. + * + * @return The number of partitions. + */ + std::size_t num_partitions() const { + return num_partitions_; + } + + /** + * @brief Get the partition id. + * + * @return The partition id. + */ + partition_id getPartitionId() const { + return part_id_; + } + private: const CatalogRelationSchema &build_relation_; const CatalogRelationSchema &probe_relation_; const std::vector join_key_attributes_; const bool any_join_key_attributes_nullable_; + const std::size_t num_partitions_; + const partition_id part_id_; const block_id block_id_; const std::vector> &selection_; const std::vector is_selection_on_build_; http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/5af8904d/relational_operators/WorkOrder.proto ---------------------------------------------------------------------- diff --git a/relational_operators/WorkOrder.proto b/relational_operators/WorkOrder.proto index 86f34b8..b914fce 100644 --- a/relational_operators/WorkOrder.proto +++ b/relational_operators/WorkOrder.proto @@ -63,13 +63,16 @@ message AggregationWorkOrder { } } +// Next tag: 40. message BuildHashWorkOrder { extend WorkOrder { // All required. optional int32 relation_id = 32; repeated int32 join_key_attributes = 33; optional bool any_join_key_attributes_nullable = 34; + optional uint64 num_partitions = 38; optional uint32 join_hash_table_index = 35; + optional uint64 partition_id = 39; optional fixed64 block_id = 36; optional int32 lip_deployment_index = 37; } @@ -89,6 +92,7 @@ message DestroyHashWorkOrder { extend WorkOrder { // All required. optional uint32 join_hash_table_index = 112; + optional uint64 partition_id = 113; } } @@ -109,6 +113,7 @@ message FinalizeAggregationWorkOrder { } } +// Next tag: 174. message HashJoinWorkOrder { enum HashJoinWorkOrderType { HASH_ANTI_JOIN = 0; @@ -124,8 +129,10 @@ message HashJoinWorkOrder { optional int32 probe_relation_id = 162; repeated int32 join_key_attributes = 163; optional bool any_join_key_attributes_nullable = 164; + optional uint64 num_partitions = 172; optional int32 insert_destination_index = 165; optional uint32 join_hash_table_index = 166; + optional uint64 partition_id = 173; optional int32 selection_index = 167; optional fixed64 block_id = 168; http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/5af8904d/relational_operators/WorkOrderFactory.cpp ---------------------------------------------------------------------- diff --git a/relational_operators/WorkOrderFactory.cpp b/relational_operators/WorkOrderFactory.cpp index 871f74d..c09bcbe 100644 --- a/relational_operators/WorkOrderFactory.cpp +++ b/relational_operators/WorkOrderFactory.cpp @@ -98,15 +98,20 @@ WorkOrder* WorkOrderFactory::ReconstructFromProto(const serialization::WorkOrder proto.GetExtension(serialization::BuildHashWorkOrder::join_key_attributes, i)); } + const partition_id part_id = + proto.GetExtension(serialization::BuildHashWorkOrder::partition_id); + return new BuildHashWorkOrder( proto.query_id(), catalog_database->getRelationSchemaById( proto.GetExtension(serialization::BuildHashWorkOrder::relation_id)), move(join_key_attributes), proto.GetExtension(serialization::BuildHashWorkOrder::any_join_key_attributes_nullable), + proto.GetExtension(serialization::BuildHashWorkOrder::num_partitions), + part_id, proto.GetExtension(serialization::BuildHashWorkOrder::block_id), query_context->getJoinHashTable( - proto.GetExtension(serialization::BuildHashWorkOrder::join_hash_table_index)), + proto.GetExtension(serialization::BuildHashWorkOrder::join_hash_table_index), part_id), storage_manager, CreateLIPFilterBuilderHelper( proto.GetExtension(serialization::BuildHashWorkOrder::lip_deployment_index), query_context)); @@ -139,6 +144,8 @@ WorkOrder* WorkOrderFactory::ReconstructFromProto(const serialization::WorkOrder proto.query_id(), proto.GetExtension( serialization::DestroyHashWorkOrder::join_hash_table_index), + proto.GetExtension( + serialization::DestroyHashWorkOrder::partition_id), query_context); } case serialization::DROP_TABLE: { @@ -192,6 +199,9 @@ WorkOrder* WorkOrderFactory::ReconstructFromProto(const serialization::WorkOrder const block_id lookup_block_id = proto.GetExtension(serialization::HashJoinWorkOrder::block_id); + const std::size_t num_partitions = + proto.GetExtension(serialization::HashJoinWorkOrder::num_partitions); + const Predicate *residual_predicate = nullptr; if (hash_join_work_order_type != serialization::HashJoinWorkOrder::HASH_OUTER_JOIN) { residual_predicate = @@ -202,9 +212,11 @@ WorkOrder* WorkOrderFactory::ReconstructFromProto(const serialization::WorkOrder const std::vector> &selection = query_context->getScalarGroup( proto.GetExtension(serialization::HashJoinWorkOrder::selection_index)); + const partition_id part_id = + proto.GetExtension(serialization::HashJoinWorkOrder::partition_id); const JoinHashTable &hash_table = *query_context->getJoinHashTable( - proto.GetExtension(serialization::HashJoinWorkOrder::join_hash_table_index)); + proto.GetExtension(serialization::HashJoinWorkOrder::join_hash_table_index), part_id); InsertDestination *output_destination = query_context->getInsertDestination( proto.GetExtension(serialization::HashJoinWorkOrder::insert_destination_index)); @@ -221,6 +233,8 @@ WorkOrder* WorkOrderFactory::ReconstructFromProto(const serialization::WorkOrder probe_relation, move(join_key_attributes), any_join_key_attributes_nullable, + num_partitions, + part_id, lookup_block_id, residual_predicate, selection, @@ -237,6 +251,8 @@ WorkOrder* WorkOrderFactory::ReconstructFromProto(const serialization::WorkOrder probe_relation, move(join_key_attributes), any_join_key_attributes_nullable, + num_partitions, + part_id, lookup_block_id, residual_predicate, selection, @@ -261,6 +277,8 @@ WorkOrder* WorkOrderFactory::ReconstructFromProto(const serialization::WorkOrder probe_relation, move(join_key_attributes), any_join_key_attributes_nullable, + num_partitions, + part_id, lookup_block_id, selection, move(is_selection_on_build), @@ -277,6 +295,8 @@ WorkOrder* WorkOrderFactory::ReconstructFromProto(const serialization::WorkOrder probe_relation, move(join_key_attributes), any_join_key_attributes_nullable, + num_partitions, + part_id, lookup_block_id, residual_predicate, selection, @@ -524,8 +544,11 @@ bool WorkOrderFactory::ProtoIsValid(const serialization::WorkOrder &proto, return proto.HasExtension(serialization::BuildHashWorkOrder::any_join_key_attributes_nullable) && proto.HasExtension(serialization::BuildHashWorkOrder::block_id) && proto.HasExtension(serialization::BuildHashWorkOrder::join_hash_table_index) && + proto.HasExtension(serialization::BuildHashWorkOrder::num_partitions) && + proto.HasExtension(serialization::BuildHashWorkOrder::partition_id) && query_context.isValidJoinHashTableId( - proto.GetExtension(serialization::BuildHashWorkOrder::join_hash_table_index)); + proto.GetExtension(serialization::BuildHashWorkOrder::join_hash_table_index), + proto.GetExtension(serialization::BuildHashWorkOrder::partition_id)); } case serialization::DELETE: { return proto.HasExtension(serialization::DeleteWorkOrder::relation_id) && @@ -544,8 +567,10 @@ bool WorkOrderFactory::ProtoIsValid(const serialization::WorkOrder &proto, } case serialization::DESTROY_HASH: { return proto.HasExtension(serialization::DestroyHashWorkOrder::join_hash_table_index) && + proto.HasExtension(serialization::DestroyHashWorkOrder::partition_id) && query_context.isValidJoinHashTableId( - proto.GetExtension(serialization::DestroyHashWorkOrder::join_hash_table_index)); + proto.GetExtension(serialization::DestroyHashWorkOrder::join_hash_table_index), + proto.GetExtension(serialization::DestroyHashWorkOrder::partition_id)); } case serialization::DROP_TABLE: { return true; @@ -623,8 +648,11 @@ bool WorkOrderFactory::ProtoIsValid(const serialization::WorkOrder &proto, query_context.isValidInsertDestinationId( proto.GetExtension(serialization::HashJoinWorkOrder::insert_destination_index)) && proto.HasExtension(serialization::HashJoinWorkOrder::join_hash_table_index) && + proto.HasExtension(serialization::HashJoinWorkOrder::num_partitions) && + proto.HasExtension(serialization::HashJoinWorkOrder::partition_id) && query_context.isValidJoinHashTableId( - proto.GetExtension(serialization::HashJoinWorkOrder::join_hash_table_index)) && + proto.GetExtension(serialization::HashJoinWorkOrder::join_hash_table_index), + proto.GetExtension(serialization::HashJoinWorkOrder::partition_id)) && proto.HasExtension(serialization::HashJoinWorkOrder::selection_index) && query_context.isValidScalarGroupId( proto.GetExtension(serialization::HashJoinWorkOrder::selection_index)) &&