Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id CD9A3200C30 for ; Tue, 7 Mar 2017 18:16:54 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id C4E2B160B68; Tue, 7 Mar 2017 17:16:54 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id E5CE5160B65 for ; Tue, 7 Mar 2017 18:16:53 +0100 (CET) Received: (qmail 66473 invoked by uid 500); 7 Mar 2017 17:16:53 -0000 Mailing-List: contact dev-help@quickstep.incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@quickstep.incubator.apache.org Delivered-To: mailing list dev@quickstep.incubator.apache.org Received: (qmail 66462 invoked by uid 99); 7 Mar 2017 17:16:52 -0000 Received: from pnap-us-west-generic-nat.apache.org (HELO spamd2-us-west.apache.org) (209.188.14.142) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 07 Mar 2017 17:16:52 +0000 Received: from localhost (localhost [127.0.0.1]) by spamd2-us-west.apache.org (ASF Mail Server at spamd2-us-west.apache.org) with ESMTP id 5126F1A0423 for ; Tue, 7 Mar 2017 17:16:52 +0000 (UTC) X-Virus-Scanned: Debian amavisd-new at spamd2-us-west.apache.org X-Spam-Flag: NO X-Spam-Score: -4.02 X-Spam-Level: X-Spam-Status: No, score=-4.02 tagged_above=-999 required=6.31 tests=[KAM_LAZY_DOMAIN_SECURITY=1, RCVD_IN_DNSWL_HI=-5, RCVD_IN_MSPIKE_H3=-0.01, RCVD_IN_MSPIKE_WL=-0.01, RP_MATCHES_RCVD=-0.001, URIBL_BLOCKED=0.001] autolearn=disabled Received: from mx1-lw-eu.apache.org ([10.40.0.8]) by localhost (spamd2-us-west.apache.org [10.40.0.9]) (amavisd-new, port 10024) with ESMTP id s6PIJQFSKPcs for ; Tue, 7 Mar 2017 17:16:50 +0000 (UTC) Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx1-lw-eu.apache.org (ASF Mail Server at mx1-lw-eu.apache.org) with SMTP id 74E985F479 for ; Tue, 7 Mar 2017 17:16:49 +0000 (UTC) Received: (qmail 66443 invoked by uid 99); 7 Mar 2017 17:16:48 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 07 Mar 2017 17:16:48 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 807C8DFD9E; Tue, 7 Mar 2017 17:16:48 +0000 (UTC) From: hbdeshmukh To: dev@quickstep.incubator.apache.org Reply-To: dev@quickstep.incubator.apache.org References: In-Reply-To: Subject: [GitHub] incubator-quickstep pull request #181: Added limited optimizer support for P... Content-Type: text/plain Message-Id: <20170307171648.807C8DFD9E@git1-us-west.apache.org> Date: Tue, 7 Mar 2017 17:16:48 +0000 (UTC) archived-at: Tue, 07 Mar 2017 17:16:55 -0000 Github user hbdeshmukh commented on a diff in the pull request: https://github.com/apache/incubator-quickstep/pull/181#discussion_r104724405 --- Diff: query_optimizer/ExecutionGenerator.cpp --- @@ -740,6 +805,202 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan) { key_types.push_back(&left_attribute_type); } + const CatalogRelationInfo *build_relation_info = + findRelationInfoOutputByPhysical(build_physical); + const CatalogRelationInfo *probe_operator_info = + findRelationInfoOutputByPhysical(probe_physical); + + const CatalogRelation *build_relation = build_relation_info->relation; + const CatalogRelation *probe_relation = probe_operator_info->relation; + + // FIXME(quickstep-team): Add support for self-join. + if (build_relation == probe_relation) { + THROW_SQL_ERROR() << "Self-join is not supported"; + } + + const PartitionScheme *build_partition_scheme = build_relation->getPartitionScheme(); + const PartitionScheme *probe_partition_scheme = probe_relation->getPartitionScheme(); + + bool build_needs_repartition = false; + bool probe_needs_repartition = false; + bool needs_swap = false; + if (build_partition_scheme && probe_partition_scheme) { + const PartitionSchemeHeader &build_partition_scheme_header = build_partition_scheme->getPartitionSchemeHeader(); + const PartitionSchemeHeader &probe_partition_scheme_header = probe_partition_scheme->getPartitionSchemeHeader(); + + switch (build_partition_scheme_header.getPartitionType()) { + case PartitionSchemeHeader::PartitionType::kRange: + build_needs_repartition = true; + + switch (probe_partition_scheme_header.getPartitionType()) { + case PartitionSchemeHeader::PartitionType::kRange: + probe_needs_repartition = true; + break; + case PartitionSchemeHeader::PartitionType::kHash: { + const attribute_id probe_partition_attr = probe_partition_scheme_header.getPartitionAttributeId(); + if (find(probe_attribute_ids.begin(), probe_attribute_ids.end(), probe_partition_attr) != + probe_attribute_ids.end()) { + needs_swap = true; + } else { + probe_needs_repartition = true; + } + break; + } + } + break; + case PartitionSchemeHeader::PartitionType::kHash: { + const attribute_id build_partition_attr = build_partition_scheme_header.getPartitionAttributeId(); + if (find(build_attribute_ids.begin(), build_attribute_ids.end(), build_partition_attr) != + build_attribute_ids.end()) { + // BuildRelation has a useful partition. + switch (probe_partition_scheme_header.getPartitionType()) { + case PartitionSchemeHeader::PartitionType::kRange: + probe_needs_repartition = true; + break; + case PartitionSchemeHeader::PartitionType::kHash: { + if (areSamePartitionSchemeHeaders(build_partition_scheme_header, *build_relation, + probe_partition_scheme_header, *probe_relation)) { + if (cost_model_for_hash_join_->estimateCardinality(build_physical) > + cost_model_for_hash_join_->estimateCardinality(probe_physical)) { + needs_swap = true; + } + } else { + probe_needs_repartition = true; + } + break; + } + } + } else { + build_needs_repartition = true; + + switch (probe_partition_scheme_header.getPartitionType()) { + case PartitionSchemeHeader::PartitionType::kRange: + probe_needs_repartition = true; + break; + case PartitionSchemeHeader::PartitionType::kHash: { + const attribute_id probe_partition_attr = probe_partition_scheme_header.getPartitionAttributeId(); + if (find(probe_attribute_ids.begin(), probe_attribute_ids.end(), probe_partition_attr) != + probe_attribute_ids.end()) { + needs_swap = true; + } else { + probe_needs_repartition = true; + } + break; + } + } + } + break; + } + } + } else if (probe_partition_scheme) { + needs_swap = true; + + const PartitionSchemeHeader &probe_partition_scheme_header = probe_partition_scheme->getPartitionSchemeHeader(); + switch (probe_partition_scheme_header.getPartitionType()) { + case PartitionSchemeHeader::PartitionType::kRange: + probe_needs_repartition = true; + break; + case PartitionSchemeHeader::PartitionType::kHash: { + const attribute_id probe_partition_attr = probe_partition_scheme_header.getPartitionAttributeId(); + + probe_needs_repartition = + (find(probe_attribute_ids.begin(), probe_attribute_ids.end(), probe_partition_attr) == + probe_attribute_ids.end()); + break; + } + } + } else if (build_partition_scheme) { + const PartitionSchemeHeader &build_partition_scheme_header = build_partition_scheme->getPartitionSchemeHeader(); + switch (build_partition_scheme_header.getPartitionType()) { + case PartitionSchemeHeader::PartitionType::kRange: + build_needs_repartition = true; + break; + case PartitionSchemeHeader::PartitionType::kHash: { + const attribute_id build_partition_attr = build_partition_scheme_header.getPartitionAttributeId(); + build_needs_repartition = + (find(build_attribute_ids.begin(), build_attribute_ids.end(), build_partition_attr) == + build_attribute_ids.end()); + break; + } + } + } + + if (needs_swap) { + swap(probe_physical, build_physical); + swap(probe_attribute_ids, build_attribute_ids); + swap(any_probe_attributes_nullable, any_build_attributes_nullable); + swap(left_join_attributes, right_join_attributes); + swap(probe_operator_info, build_relation_info); + swap(probe_relation, build_relation); + swap(probe_partition_scheme, build_partition_scheme); + swap(probe_needs_repartition, build_needs_repartition); + } + + unique_ptr probe_repartition_scheme; + if (build_needs_repartition) { + const size_t repartition_attr_index = + chooseBestRepartitionAttributeIndex(build_relation->getStatistics(), build_attribute_ids); + auto build_repartition_scheme_header = + make_unique(FLAGS_num_repartitions, + build_attribute_ids[repartition_attr_index]); + auto build_repartition_scheme = make_unique(build_repartition_scheme_header.release()); + + build_partition_scheme = build_repartition_scheme.get(); + + if (build_relation_info->isStoredRelation()) { + THROW_SQL_ERROR() << "Re-partition for the base build table is not supported"; + } else { + S::InsertDestination *build_insert_destination_proto = + query_context_proto_->mutable_insert_destinations(build_relation_info->output_destination_index); + + build_insert_destination_proto->set_insert_destination_type(S::InsertDestinationType::PARTITION_AWARE); + build_insert_destination_proto->MutableExtension(S::PartitionAwareInsertDestination::partition_scheme) + ->MergeFrom(build_repartition_scheme->getProto()); + + CatalogRelation *mutable_build_relation = + catalog_database_->getRelationByIdMutable(build_relation->getID()); + mutable_build_relation->setPartitionScheme(build_repartition_scheme.release()); --- End diff -- May be I missed some of your older PRs, but can you explain how we will physically repartition the relations? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastructure@apache.org or file a JIRA ticket with INFRA. ---