quickstep-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From zu...@apache.org
Subject incubator-quickstep git commit: Added limited optimizer support for Partitioned Hash Joins.
Date Wed, 25 Jan 2017 21:45:17 GMT
Repository: incubator-quickstep
Updated Branches:
  refs/heads/reorder-partitioned-hash-join [created] 0b73be174


Added limited optimizer support for Partitioned Hash Joins.


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/0b73be17
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/0b73be17
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/0b73be17

Branch: refs/heads/reorder-partitioned-hash-join
Commit: 0b73be174d2c3efead2499a3a6258aac7b73b8b9
Parents: 7bce0b8
Author: Zuyu Zhang <zuyuz@apache.org>
Authored: Wed Jan 25 01:49:28 2017 -0800
Committer: Zuyu Zhang <zuyuz@apache.org>
Committed: Wed Jan 25 12:32:59 2017 -0800

----------------------------------------------------------------------
 query_optimizer/ExecutionGenerator.cpp          | 143 +++++++++++++++----
 .../tests/execution_generator/Partition.test    |  51 +++++++
 relational_operators/HashJoinOperator.cpp       |  32 ++---
 relational_operators/HashJoinOperator.hpp       | 136 ++++++++++--------
 relational_operators/WorkOrder.proto            |   2 +-
 relational_operators/WorkOrderFactory.cpp       |  14 +-
 types/TypedValue.hpp                            |  19 +++
 7 files changed, 284 insertions(+), 113 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/0b73be17/query_optimizer/ExecutionGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.cpp b/query_optimizer/ExecutionGenerator.cpp
index e25b8ad..aced3e1 100644
--- a/query_optimizer/ExecutionGenerator.cpp
+++ b/query_optimizer/ExecutionGenerator.cpp
@@ -132,8 +132,10 @@
 #include "gflags/gflags.h"
 #include "glog/logging.h"
 
+using std::find;
 using std::move;
 using std::static_pointer_cast;
+using std::swap;
 using std::unique_ptr;
 using std::unordered_map;
 using std::vector;
@@ -608,6 +610,44 @@ void ExecutionGenerator::convertSharedSubplanReference(const physical::SharedSub
   }
 }
 
+namespace {
+
+bool areSamePartitionSchemeHeaders(const PartitionSchemeHeader &lhs_partition_header,
+                                   const CatalogRelationSchema &lhs_scheme,
+                                   const PartitionSchemeHeader &rhs_partition_header,
+                                   const CatalogRelationSchema &rhs_scheme) {
+  if (lhs_partition_header.getPartitionType() != rhs_partition_header.getPartitionType())
{
+    return false;
+  }
+
+  if (lhs_partition_header.getNumPartitions() != rhs_partition_header.getNumPartitions())
{
+    return false;
+  }
+
+  // Check whether the underlying types in CatalogAttribute are the same.
+  if (!lhs_scheme.getAttributeById(lhs_partition_header.getPartitionAttributeId())->getType().equals(
+       rhs_scheme.getAttributeById(rhs_partition_header.getPartitionAttributeId())->getType()))
{
+    return false;
+  }
+
+  switch (lhs_partition_header.getPartitionType()) {
+    case PartitionSchemeHeader::PartitionType::kHash:
+      return true;
+    case PartitionSchemeHeader::PartitionType::kRange: {
+      const vector<TypedValue> &lhs_ranges =
+          static_cast<const RangePartitionSchemeHeader&>(lhs_partition_header).getPartitionRangeBoundaries();
+      const vector<TypedValue> &rhs_ranges =
+          static_cast<const RangePartitionSchemeHeader&>(rhs_partition_header).getPartitionRangeBoundaries();
+
+      return lhs_ranges == rhs_ranges;
+    }
+  }
+
+  return false;
+}
+
+}  // namespace
+
 void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan) {
   // HashJoin is converted to three operators:
   //     BuildHash, HashJoin, DestroyHash. The second is the primary operator.
@@ -618,13 +658,10 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan)
{
   std::vector<attribute_id> probe_attribute_ids;
   std::vector<attribute_id> build_attribute_ids;
 
-  std::size_t build_cardinality =
-      cost_model_for_hash_join_->estimateCardinality(build_physical);
-
   bool any_probe_attributes_nullable = false;
   bool any_build_attributes_nullable = false;
 
-  const std::vector<E::AttributeReferencePtr> &left_join_attributes =
+  std::vector<E::AttributeReferencePtr> left_join_attributes =
       physical_plan->left_join_attributes();
   for (const E::AttributeReferencePtr &left_join_attribute : left_join_attributes) {
     const CatalogAttribute *probe_catalog_attribute
@@ -636,7 +673,7 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan)
{
     }
   }
 
-  const std::vector<E::AttributeReferencePtr> &right_join_attributes =
+  std::vector<E::AttributeReferencePtr> right_join_attributes =
       physical_plan->right_join_attributes();
   for (const E::AttributeReferencePtr &right_join_attribute : right_join_attributes)
{
     const CatalogAttribute *build_catalog_attribute
@@ -648,6 +685,76 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan)
{
     }
   }
 
+  const CatalogRelationInfo *build_relation_info =
+      findRelationInfoOutputByPhysical(build_physical);
+  const CatalogRelationInfo *probe_operator_info =
+      findRelationInfoOutputByPhysical(probe_physical);
+
+  const CatalogRelation *build_relation = build_relation_info->relation;
+  const CatalogRelation *probe_relation = probe_operator_info->relation;
+
+  const PartitionScheme *build_partition_scheme = build_relation->getPartitionScheme();
+  const PartitionScheme *probe_partition_scheme = probe_relation->getPartitionScheme();
+
+  // Check whether we need repartition for both build and probe.
+  if (build_partition_scheme) {
+    const PartitionSchemeHeader &build_partition_scheme_header = build_partition_scheme->getPartitionSchemeHeader();
+    switch (build_partition_scheme_header.getPartitionType()) {
+      case PartitionSchemeHeader::PartitionType::kRange:
+        THROW_SQL_ERROR() << "Re-partition for range-based is not supported";
+      case PartitionSchemeHeader::PartitionType::kHash: {
+        const attribute_id build_partition_attr = build_partition_scheme_header.getPartitionAttributeId();
+        if (find(build_attribute_ids.begin(), build_attribute_ids.end(), build_partition_attr)
==
+                build_attribute_ids.end()) {
+          THROW_SQL_ERROR() << "Re-partition for hash-based is not supported";
+        }
+        break;
+      }
+    }
+  }
+
+  if (probe_partition_scheme) {
+    const PartitionSchemeHeader &probe_partition_scheme_header = probe_partition_scheme->getPartitionSchemeHeader();
+    switch (probe_partition_scheme_header.getPartitionType()) {
+      case PartitionSchemeHeader::PartitionType::kRange:
+        THROW_SQL_ERROR() << "Re-partition for range-based is not supported";
+      case PartitionSchemeHeader::PartitionType::kHash: {
+        const attribute_id probe_partition_attr = probe_partition_scheme_header.getPartitionAttributeId();
+        if (find(probe_attribute_ids.begin(), probe_attribute_ids.end(), probe_partition_attr)
==
+                probe_attribute_ids.end()) {
+          THROW_SQL_ERROR() << "Re-partition for hash-based is not supported";
+        }
+        break;
+      }
+    }
+  }
+
+  // No partition.
+  std::size_t num_partitions = 1;
+
+  // Set 'num_partitions', and swap the build and probe, if needed.
+  if (build_partition_scheme && probe_partition_scheme) {
+    const PartitionSchemeHeader &build_partition_scheme_header = build_partition_scheme->getPartitionSchemeHeader();
+    const PartitionSchemeHeader &probe_partition_scheme_header = probe_partition_scheme->getPartitionSchemeHeader();
+    if (!areSamePartitionSchemeHeaders(build_partition_scheme_header, *build_relation,
+                                       probe_partition_scheme_header, *probe_relation)) {
+      THROW_SQL_ERROR() << "Hash Join of two different HashPartitionSchemeHeaders is
not supported";
+    }
+
+    num_partitions = build_partition_scheme->getPartitionSchemeHeader().getNumPartitions();
+  } else if (build_partition_scheme) {
+    num_partitions = build_partition_scheme->getPartitionSchemeHeader().getNumPartitions();
+  } else if (probe_partition_scheme) {
+    num_partitions = probe_partition_scheme->getPartitionSchemeHeader().getNumPartitions();
+
+    swap(probe_physical, build_physical);
+    swap(probe_attribute_ids, build_attribute_ids);
+    swap(any_probe_attributes_nullable, any_build_attributes_nullable);
+    swap(left_join_attributes, right_join_attributes);
+    swap(probe_operator_info, build_relation_info);
+    swap(probe_relation, build_relation);
+  }
+
   // Remember key types for call to SimplifyHashTableImplTypeProto() below.
   std::vector<const Type*> key_types;
   for (std::vector<E::AttributeReferencePtr>::size_type attr_idx = 0;
@@ -677,11 +784,6 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan)
{
   convertNamedExpressions(physical_plan->project_expressions(),
                           query_context_proto_->add_scalar_groups());
 
-  const CatalogRelationInfo *build_relation_info =
-      findRelationInfoOutputByPhysical(build_physical);
-  const CatalogRelationInfo *probe_operator_info =
-      findRelationInfoOutputByPhysical(probe_physical);
-
   // Create a vector that indicates whether each project expression is using
   // attributes from the build relation as input. This information is required
   // by the current implementation of hash left outer join
@@ -694,10 +796,8 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan)
{
                 build_physical->getOutputAttributes())));
   }
 
-  const CatalogRelation *build_relation = build_relation_info->relation;
-
   // FIXME(quickstep-team): Add support for self-join.
-  if (build_relation == probe_operator_info->relation) {
+  if (build_relation == probe_relation) {
     THROW_SQL_ERROR() << "Self-join is not supported";
   }
 
@@ -706,18 +806,7 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan)
{
       query_context_proto_->join_hash_tables_size();
   S::QueryContext::HashTableContext *hash_table_context_proto =
       query_context_proto_->add_join_hash_tables();
-
-  // No partition.
-  std::size_t num_partitions = 1;
-  if (build_relation->hasPartitionScheme() &&
-      build_attribute_ids.size() == 1) {
-    const PartitionSchemeHeader &partition_scheme_header =
-        build_relation->getPartitionScheme()->getPartitionSchemeHeader();
-    if (build_attribute_ids[0] == partition_scheme_header.getPartitionAttributeId()) {
-      // TODO(zuyu): add optimizer support for partitioned hash joins.
-      hash_table_context_proto->set_num_partitions(num_partitions);
-    }
-  }
+  hash_table_context_proto->set_num_partitions(num_partitions);
 
   S::HashTable *hash_table_proto = hash_table_context_proto->mutable_join_hash_table();
 
@@ -734,6 +823,8 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan)
{
         build_relation->getAttributeById(build_attribute)->getType().getProto());
   }
 
+  const std::size_t build_cardinality =
+      cost_model_for_hash_join_->estimateCardinality(build_physical);
   hash_table_proto->set_estimated_num_entries(build_cardinality);
 
   // Create three operators.
@@ -784,7 +875,7 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan)
{
           new HashJoinOperator(
               query_handle_->query_id(),
               *build_relation,
-              *probe_operator_info->relation,
+              *probe_relation,
               probe_operator_info->isStoredRelation(),
               probe_attribute_ids,
               any_probe_attributes_nullable,

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/0b73be17/query_optimizer/tests/execution_generator/Partition.test
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/execution_generator/Partition.test b/query_optimizer/tests/execution_generator/Partition.test
index ab05391..8e94316 100644
--- a/query_optimizer/tests/execution_generator/Partition.test
+++ b/query_optimizer/tests/execution_generator/Partition.test
@@ -18,12 +18,20 @@
 CREATE TABLE foo (id INT NULL,
                   name CHAR(20))
 PARTITION BY HASH(id) PARTITIONS 4;
+CREATE TABLE bar (id INT NULL,
+                  score DOUBLE NULL)
+PARTITION BY HASH(id) PARTITIONS 4;
 
 INSERT INTO foo
 SELECT int_col, char_col
 FROM test
 WHERE int_col > 0 OR int_col < 0;
 
+INSERT INTO bar
+SELECT int_col, double_col
+FROM test
+WHERE int_col % 2 = 0;
+
 SELECT * FROM foo;
 --
 +-----------+--------------------+
@@ -52,3 +60,46 @@ SELECT * FROM foo;
 |        -17|        -17 4.123106|
 |        -21|        -21 4.582576|
 +-----------+--------------------+
+==
+
+# Partitioned Hash Join.
+SELECT bar.id, foo.name AS "char_col"
+FROM foo JOIN bar ON foo.id = bar.id;
+--
++-----------+--------------------+
+|id         |char_col            |
++-----------+--------------------+
+|          4|          4 2.000000|
+|          8|          8 2.828427|
+|         12|         12 3.464102|
+|         16|         16 4.000000|
+|         24|         24 4.898979|
+|          2|          2 1.414214|
+|          6|          6 2.449490|
+|         14|         14 3.741657|
+|         18|         18 4.242641|
+|         22|         22 4.690416|
++-----------+--------------------+
+==
+
+# Hash Join with two stored relations, one of which is partitioned.
+SELECT bar.id, test.char_col
+FROM test JOIN bar ON test.int_col = bar.id;
+--
+[same as above]
+==
+
+# Same as above, but has a different join order.
+SELECT bar.id, test.char_col
+FROM bar JOIN test ON bar.id = test.int_col;
+--
+[same as above]
+==
+
+# Hash Join with one stored, partitioned relation,
+# and a non-stored, non-partitioned one.
+SELECT bar.id, test.char_col
+FROM bar JOIN test ON bar.id = test.int_col
+WHERE test.int_col % 2 = 0;
+--
+[same as above]

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/0b73be17/relational_operators/HashJoinOperator.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/HashJoinOperator.cpp b/relational_operators/HashJoinOperator.cpp
index 0062b93..7fd46c9 100644
--- a/relational_operators/HashJoinOperator.cpp
+++ b/relational_operators/HashJoinOperator.cpp
@@ -213,14 +213,14 @@ bool HashJoinOperator::getAllNonOuterJoinWorkOrders(
         return true;
       }
 
-      for (std::size_t part_id = 0; part_id < num_partitions_; ++part_id) {
+      for (std::size_t part_id = 0; part_id < build_num_partitions_; ++part_id) {
         const JoinHashTable &hash_table =
             *(query_context->getJoinHashTable(hash_table_index_, part_id));
 
         for (const block_id probe_block_id : probe_relation_block_ids_[part_id]) {
           container->addNormalWorkOrder(
               new JoinWorkOrderClass(query_id_, build_relation_, probe_relation_, join_key_attributes_,
-                                     any_join_key_attributes_nullable_, num_partitions_,
part_id, probe_block_id,
+                                     any_join_key_attributes_nullable_, build_num_partitions_,
part_id, probe_block_id,
                                      residual_predicate, selection, hash_table, output_destination,
storage_manager,
                                      CreateLIPFilterAdaptiveProberHelper(lip_deployment_index_,
query_context)),
               op_index_);
@@ -229,14 +229,14 @@ bool HashJoinOperator::getAllNonOuterJoinWorkOrders(
       started_ = true;
       return true;
     } else {
-      for (std::size_t part_id = 0; part_id < num_partitions_; ++part_id) {
+      for (std::size_t part_id = 0; part_id < build_num_partitions_; ++part_id) {
         const JoinHashTable &hash_table =
             *(query_context->getJoinHashTable(hash_table_index_, part_id));
 
         while (num_workorders_generated_[part_id] < probe_relation_block_ids_[part_id].size())
{
           container->addNormalWorkOrder(
               new JoinWorkOrderClass(query_id_, build_relation_, probe_relation_, join_key_attributes_,
-                                     any_join_key_attributes_nullable_, num_partitions_,
part_id,
+                                     any_join_key_attributes_nullable_, build_num_partitions_,
part_id,
                                      probe_relation_block_ids_[part_id][num_workorders_generated_[part_id]],
                                      residual_predicate, selection, hash_table, output_destination,
storage_manager,
                                      CreateLIPFilterAdaptiveProberHelper(lip_deployment_index_,
query_context)),
@@ -269,16 +269,16 @@ bool HashJoinOperator::getAllOuterJoinWorkOrders(
         return true;
       }
 
-      for (std::size_t part_id = 0; part_id < num_partitions_; ++part_id) {
+      for (std::size_t part_id = 0; part_id < build_num_partitions_; ++part_id) {
         const JoinHashTable &hash_table =
             *(query_context->getJoinHashTable(hash_table_index_, part_id));
 
         for (const block_id probe_block_id : probe_relation_block_ids_[part_id]) {
           container->addNormalWorkOrder(
               new HashOuterJoinWorkOrder(query_id_, build_relation_, probe_relation_, join_key_attributes_,
-                                         any_join_key_attributes_nullable_, num_partitions_,
part_id, probe_block_id,
-                                         selection, is_selection_on_build_, hash_table, output_destination,
-                                         storage_manager,
+                                         any_join_key_attributes_nullable_, build_num_partitions_,
part_id,
+                                         probe_block_id, selection, is_selection_on_build_,
hash_table,
+                                         output_destination, storage_manager,
                                          CreateLIPFilterAdaptiveProberHelper(lip_deployment_index_,
query_context)),
               op_index_);
         }
@@ -286,14 +286,14 @@ bool HashJoinOperator::getAllOuterJoinWorkOrders(
       started_ = true;
       return true;
     } else {
-      for (std::size_t part_id = 0; part_id < num_partitions_; ++part_id) {
+      for (std::size_t part_id = 0; part_id < build_num_partitions_; ++part_id) {
         const JoinHashTable &hash_table =
             *(query_context->getJoinHashTable(hash_table_index_, part_id));
 
         while (num_workorders_generated_[part_id] < probe_relation_block_ids_[part_id].size())
{
           container->addNormalWorkOrder(
               new HashOuterJoinWorkOrder(query_id_, build_relation_, probe_relation_, join_key_attributes_,
-                                         any_join_key_attributes_nullable_, num_partitions_,
part_id,
+                                         any_join_key_attributes_nullable_, build_num_partitions_,
part_id,
                                          probe_relation_block_ids_[part_id][num_workorders_generated_[part_id]],
                                          selection, is_selection_on_build_, hash_table, output_destination,
                                          storage_manager,
@@ -336,7 +336,7 @@ bool HashJoinOperator::getAllNonOuterJoinWorkOrderProtos(
       return true;
     }
 
-    for (std::size_t part_id = 0; part_id < num_partitions_; ++part_id) {
+    for (std::size_t part_id = 0; part_id < build_num_partitions_; ++part_id) {
       for (const block_id probe_block_id : probe_relation_block_ids_[part_id]) {
         container->addWorkOrderProto(
             createNonOuterJoinWorkOrderProto(hash_join_type, probe_block_id, part_id),
@@ -346,7 +346,7 @@ bool HashJoinOperator::getAllNonOuterJoinWorkOrderProtos(
     started_ = true;
     return true;
   } else {
-    for (std::size_t part_id = 0; part_id < num_partitions_; ++part_id) {
+    for (std::size_t part_id = 0; part_id < build_num_partitions_; ++part_id) {
       while (num_workorders_generated_[part_id] < probe_relation_block_ids_[part_id].size())
{
         container->addWorkOrderProto(
             createNonOuterJoinWorkOrderProto(hash_join_type,
@@ -376,7 +376,7 @@ serialization::WorkOrder* HashJoinOperator::createNonOuterJoinWorkOrderProto(
   }
   proto->SetExtension(serialization::HashJoinWorkOrder::any_join_key_attributes_nullable,
                       any_join_key_attributes_nullable_);
-  proto->SetExtension(serialization::HashJoinWorkOrder::num_partitions, num_partitions_);
+  proto->SetExtension(serialization::HashJoinWorkOrder::build_num_partitions, build_num_partitions_);
   proto->SetExtension(serialization::HashJoinWorkOrder::insert_destination_index, output_destination_index_);
   proto->SetExtension(serialization::HashJoinWorkOrder::join_hash_table_index, hash_table_index_);
   proto->SetExtension(serialization::HashJoinWorkOrder::partition_id, part_id);
@@ -399,7 +399,7 @@ bool HashJoinOperator::getAllOuterJoinWorkOrderProtos(WorkOrderProtosContainer
*
       return true;
     }
 
-    for (std::size_t part_id = 0; part_id < num_partitions_; ++part_id) {
+    for (std::size_t part_id = 0; part_id < build_num_partitions_; ++part_id) {
       for (const block_id probe_block_id : probe_relation_block_ids_[part_id]) {
         container->addWorkOrderProto(createOuterJoinWorkOrderProto(probe_block_id, part_id),
op_index_);
       }
@@ -407,7 +407,7 @@ bool HashJoinOperator::getAllOuterJoinWorkOrderProtos(WorkOrderProtosContainer
*
     started_ = true;
     return true;
   } else {
-    for (std::size_t part_id = 0; part_id < num_partitions_; ++part_id) {
+    for (std::size_t part_id = 0; part_id < build_num_partitions_; ++part_id) {
       while (num_workorders_generated_[part_id] < probe_relation_block_ids_[part_id].size())
{
         container->addWorkOrderProto(
             createOuterJoinWorkOrderProto(probe_relation_block_ids_[part_id][num_workorders_generated_[part_id]],
@@ -436,7 +436,7 @@ serialization::WorkOrder* HashJoinOperator::createOuterJoinWorkOrderProto(const
   }
   proto->SetExtension(serialization::HashJoinWorkOrder::any_join_key_attributes_nullable,
                       any_join_key_attributes_nullable_);
-  proto->SetExtension(serialization::HashJoinWorkOrder::num_partitions, num_partitions_);
+  proto->SetExtension(serialization::HashJoinWorkOrder::build_num_partitions, build_num_partitions_);
   proto->SetExtension(serialization::HashJoinWorkOrder::insert_destination_index, output_destination_index_);
   proto->SetExtension(serialization::HashJoinWorkOrder::join_hash_table_index, hash_table_index_);
   proto->SetExtension(serialization::HashJoinWorkOrder::selection_index, selection_index_);

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/0b73be17/relational_operators/HashJoinOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/HashJoinOperator.hpp b/relational_operators/HashJoinOperator.hpp
index e655f70..5cf7cb2 100644
--- a/relational_operators/HashJoinOperator.hpp
+++ b/relational_operators/HashJoinOperator.hpp
@@ -101,8 +101,8 @@ class HashJoinOperator : public RelationalOperator {
    * @param join_key_attributes The IDs of equijoin attributes in
    *        probe_relation.
    * @param any_join_key_attributes_nullable If any attribute is nullable.
-   * @param num_partitions The number of partitions in 'input_relation'. If no
-   *        partitions, it is one.
+   * @param build_num_partitions The number of partitions in 'build_relation'.
+   *        If no partitions, it is one.
    * @param output_relation The output relation.
    * @param output_destination_index The index of the InsertDestination in the
    *        QueryContext to insert the join results.
@@ -128,7 +128,7 @@ class HashJoinOperator : public RelationalOperator {
       const bool probe_relation_is_stored,
       const std::vector<attribute_id> &join_key_attributes,
       const bool any_join_key_attributes_nullable,
-      const std::size_t num_partitions,
+      const std::size_t build_num_partitions,
       const CatalogRelation &output_relation,
       const QueryContext::insert_destination_id output_destination_index,
       const QueryContext::join_hash_table_id hash_table_index,
@@ -142,7 +142,7 @@ class HashJoinOperator : public RelationalOperator {
         probe_relation_is_stored_(probe_relation_is_stored),
         join_key_attributes_(join_key_attributes),
         any_join_key_attributes_nullable_(any_join_key_attributes_nullable),
-        num_partitions_(num_partitions),
+        build_num_partitions_(build_num_partitions),
         output_relation_(output_relation),
         output_destination_index_(output_destination_index),
         hash_table_index_(hash_table_index),
@@ -152,8 +152,8 @@ class HashJoinOperator : public RelationalOperator {
                                    ? std::vector<bool>()
                                    : *is_selection_on_build),
         join_type_(join_type),
-        probe_relation_block_ids_(num_partitions),
-        num_workorders_generated_(num_partitions),
+        probe_relation_block_ids_(build_num_partitions),
+        num_workorders_generated_(build_num_partitions),
         started_(false) {
     DCHECK(join_type != JoinType::kLeftOuterJoin ||
                (is_selection_on_build != nullptr &&
@@ -162,12 +162,15 @@ class HashJoinOperator : public RelationalOperator {
     if (probe_relation_is_stored) {
       if (probe_relation.hasPartitionScheme()) {
         const PartitionScheme &part_scheme = *probe_relation.getPartitionScheme();
-        for (std::size_t part_id = 0; part_id < num_partitions_; ++part_id) {
+        DCHECK_EQ(build_num_partitions_, part_scheme.getPartitionSchemeHeader().getNumPartitions());
+        for (std::size_t part_id = 0; part_id < build_num_partitions_; ++part_id) {
           probe_relation_block_ids_[part_id] = part_scheme.getBlocksInPartition(part_id);
         }
       } else {
-        // No partition.
-        probe_relation_block_ids_[0] = probe_relation.getBlocksSnapshot();
+        // Broadcast join if probe has no partitions.
+        for (std::size_t part_id = 0; part_id < build_num_partitions_; ++part_id) {
+          probe_relation_block_ids_[part_id] = probe_relation.getBlocksSnapshot();
+        }
       }
     }
   }
@@ -209,7 +212,14 @@ class HashJoinOperator : public RelationalOperator {
                       const partition_id part_id) override {
     DCHECK_EQ(probe_relation_.getID(), input_relation_id);
 
-    probe_relation_block_ids_[part_id].push_back(input_block_id);
+    if (probe_relation_.hasPartitionScheme()) {
+      probe_relation_block_ids_[part_id].push_back(input_block_id);
+    } else {
+      // Broadcast join if probe has no partitions.
+      for (std::size_t build_part_id = 0; build_part_id < build_num_partitions_; ++build_part_id)
{
+        probe_relation_block_ids_[build_part_id].push_back(input_block_id);
+      }
+    }
   }
 
   QueryContext::insert_destination_id getInsertDestinationID() const override {
@@ -261,7 +271,7 @@ class HashJoinOperator : public RelationalOperator {
   const bool probe_relation_is_stored_;
   const std::vector<attribute_id> join_key_attributes_;
   const bool any_join_key_attributes_nullable_;
-  const std::size_t num_partitions_;
+  const std::size_t build_num_partitions_;
   const CatalogRelation &output_relation_;
   const QueryContext::insert_destination_id output_destination_index_;
   const QueryContext::join_hash_table_id hash_table_index_;
@@ -295,8 +305,8 @@ class HashInnerJoinWorkOrder : public WorkOrder {
    * @param join_key_attributes The IDs of equijoin attributes in \c
    *        probe_relation.
    * @param any_join_key_attributes_nullable If any attribute is nullable.
-   * @param num_partitions The number of partitions in 'probe_relation'. If no
-   *        partitions, it is one.
+   * @param build_num_partitions The number of partitions in 'build_relation'.
+   *        If no partitions, it is one.
    * @param part_id The partition id of 'probe_relation'.
    * @param lookup_block_id The block id of the probe_relation.
    * @param residual_predicate If non-null, apply as an additional filter to
@@ -317,7 +327,7 @@ class HashInnerJoinWorkOrder : public WorkOrder {
       const CatalogRelationSchema &probe_relation,
       const std::vector<attribute_id> &join_key_attributes,
       const bool any_join_key_attributes_nullable,
-      const std::size_t num_partitions,
+      const std::size_t build_num_partitions,
       const partition_id part_id,
       const block_id lookup_block_id,
       const Predicate *residual_predicate,
@@ -331,7 +341,7 @@ class HashInnerJoinWorkOrder : public WorkOrder {
         probe_relation_(probe_relation),
         join_key_attributes_(join_key_attributes),
         any_join_key_attributes_nullable_(any_join_key_attributes_nullable),
-        num_partitions_(num_partitions),
+        build_num_partitions_(build_num_partitions),
         part_id_(part_id),
         block_id_(lookup_block_id),
         residual_predicate_(residual_predicate),
@@ -352,8 +362,8 @@ class HashInnerJoinWorkOrder : public WorkOrder {
    * @param join_key_attributes The IDs of equijoin attributes in \c
    *        probe_relation.
    * @param any_join_key_attributes_nullable If any attribute is nullable.
-   * @param num_partitions The number of partitions in 'probe_relation'. If no
-   *        partitions, it is one.
+   * @param build_num_partitions The number of partitions in 'build_relation'.
+   *        If no partitions, it is one.
    * @param part_id The partition id of 'probe_relation'.
    * @param lookup_block_id The block id of the probe_relation.
    * @param residual_predicate If non-null, apply as an additional filter to
@@ -374,7 +384,7 @@ class HashInnerJoinWorkOrder : public WorkOrder {
       const CatalogRelationSchema &probe_relation,
       std::vector<attribute_id> &&join_key_attributes,
       const bool any_join_key_attributes_nullable,
-      const std::size_t num_partitions,
+      const std::size_t build_num_partitions,
       const partition_id part_id,
       const block_id lookup_block_id,
       const Predicate *residual_predicate,
@@ -388,7 +398,7 @@ class HashInnerJoinWorkOrder : public WorkOrder {
         probe_relation_(probe_relation),
         join_key_attributes_(std::move(join_key_attributes)),
         any_join_key_attributes_nullable_(any_join_key_attributes_nullable),
-        num_partitions_(num_partitions),
+        build_num_partitions_(build_num_partitions),
         part_id_(part_id),
         block_id_(lookup_block_id),
         residual_predicate_(residual_predicate),
@@ -411,12 +421,12 @@ class HashInnerJoinWorkOrder : public WorkOrder {
   void execute() override;
 
   /**
-   * @brief Get the number of partitions.
+   * @brief Get the number of partitions in 'build_relation'.
    *
-   * @return The number of partitions.
+   * @return The number of partitions in 'build_relation'.
    */
-  std::size_t num_partitions() const {
-    return num_partitions_;
+  std::size_t build_num_partitions() const {
+    return build_num_partitions_;
   }
 
   /**
@@ -433,7 +443,7 @@ class HashInnerJoinWorkOrder : public WorkOrder {
   const CatalogRelationSchema &probe_relation_;
   const std::vector<attribute_id> join_key_attributes_;
   const bool any_join_key_attributes_nullable_;
-  const std::size_t num_partitions_;
+  const std::size_t build_num_partitions_;
   const partition_id part_id_;
   const block_id block_id_;
   const Predicate *residual_predicate_;
@@ -465,8 +475,8 @@ class HashSemiJoinWorkOrder : public WorkOrder {
    * @param join_key_attributes The IDs of equijoin attributes in \c
    *        probe_relation.
    * @param any_join_key_attributes_nullable If any attribute is nullable.
-   * @param num_partitions The number of partitions in 'probe_relation'. If no
-   *        partitions, it is one.
+   * @param build_num_partitions The number of partitions in 'build_relation'.
+   *        If no partitions, it is one.
    * @param part_id The partition id of 'probe_relation'.
    * @param lookup_block_id The block id of the probe_relation.
    * @param residual_predicate If non-null, apply as an additional filter to
@@ -487,7 +497,7 @@ class HashSemiJoinWorkOrder : public WorkOrder {
       const CatalogRelationSchema &probe_relation,
       const std::vector<attribute_id> &join_key_attributes,
       const bool any_join_key_attributes_nullable,
-      const std::size_t num_partitions,
+      const std::size_t build_num_partitions,
       const partition_id part_id,
       const block_id lookup_block_id,
       const Predicate *residual_predicate,
@@ -501,7 +511,7 @@ class HashSemiJoinWorkOrder : public WorkOrder {
         probe_relation_(probe_relation),
         join_key_attributes_(join_key_attributes),
         any_join_key_attributes_nullable_(any_join_key_attributes_nullable),
-        num_partitions_(num_partitions),
+        build_num_partitions_(build_num_partitions),
         part_id_(part_id),
         block_id_(lookup_block_id),
         residual_predicate_(residual_predicate),
@@ -522,8 +532,8 @@ class HashSemiJoinWorkOrder : public WorkOrder {
    * @param join_key_attributes The IDs of equijoin attributes in \c
    *        probe_relation.
    * @param any_join_key_attributes_nullable If any attribute is nullable.
-   * @param num_partitions The number of partitions in 'probe_relation'. If no
-   *        partitions, it is one.
+   * @param build_num_partitions The number of partitions in 'build_relation'.
+   *        If no partitions, it is one.
    * @param part_id The partition id of 'probe_relation'.
    * @param lookup_block_id The block id of the probe_relation.
    * @param residual_predicate If non-null, apply as an additional filter to
@@ -544,7 +554,7 @@ class HashSemiJoinWorkOrder : public WorkOrder {
       const CatalogRelationSchema &probe_relation,
       std::vector<attribute_id> &&join_key_attributes,
       const bool any_join_key_attributes_nullable,
-      const std::size_t num_partitions,
+      const std::size_t build_num_partitions,
       const partition_id part_id,
       const block_id lookup_block_id,
       const Predicate *residual_predicate,
@@ -558,7 +568,7 @@ class HashSemiJoinWorkOrder : public WorkOrder {
         probe_relation_(probe_relation),
         join_key_attributes_(std::move(join_key_attributes)),
         any_join_key_attributes_nullable_(any_join_key_attributes_nullable),
-        num_partitions_(num_partitions),
+        build_num_partitions_(build_num_partitions),
         part_id_(part_id),
         block_id_(lookup_block_id),
         residual_predicate_(residual_predicate),
@@ -573,12 +583,12 @@ class HashSemiJoinWorkOrder : public WorkOrder {
   void execute() override;
 
   /**
-   * @brief Get the number of partitions.
+   * @brief Get the number of partitions in 'build_relation'.
    *
-   * @return The number of partitions.
+   * @return The number of partitions in 'build_relation'.
    */
-  std::size_t num_partitions() const {
-    return num_partitions_;
+  std::size_t build_num_partitions() const {
+    return build_num_partitions_;
   }
 
   /**
@@ -599,7 +609,7 @@ class HashSemiJoinWorkOrder : public WorkOrder {
   const CatalogRelationSchema &probe_relation_;
   const std::vector<attribute_id> join_key_attributes_;
   const bool any_join_key_attributes_nullable_;
-  const std::size_t num_partitions_;
+  const std::size_t build_num_partitions_;
   const partition_id part_id_;
   const block_id block_id_;
   const Predicate *residual_predicate_;
@@ -631,8 +641,8 @@ class HashAntiJoinWorkOrder : public WorkOrder {
    * @param join_key_attributes The IDs of equijoin attributes in \c
    *        probe_relation.
    * @param any_join_key_attributes_nullable If any attribute is nullable.
-   * @param num_partitions The number of partitions in 'probe_relation'. If no
-   *        partitions, it is one.
+   * @param build_num_partitions The number of partitions in 'build_relation'.
+   *        If no partitions, it is one.
    * @param part_id The partition id of 'probe_relation'.
    * @param lookup_block_id The block id of the probe_relation.
    * @param residual_predicate If non-null, apply as an additional filter to
@@ -653,7 +663,7 @@ class HashAntiJoinWorkOrder : public WorkOrder {
       const CatalogRelationSchema &probe_relation,
       const std::vector<attribute_id> &join_key_attributes,
       const bool any_join_key_attributes_nullable,
-      const std::size_t num_partitions,
+      const std::size_t build_num_partitions,
       const partition_id part_id,
       const block_id lookup_block_id,
       const Predicate *residual_predicate,
@@ -667,7 +677,7 @@ class HashAntiJoinWorkOrder : public WorkOrder {
         probe_relation_(probe_relation),
         join_key_attributes_(join_key_attributes),
         any_join_key_attributes_nullable_(any_join_key_attributes_nullable),
-        num_partitions_(num_partitions),
+        build_num_partitions_(build_num_partitions),
         part_id_(part_id),
         block_id_(lookup_block_id),
         residual_predicate_(residual_predicate),
@@ -688,8 +698,8 @@ class HashAntiJoinWorkOrder : public WorkOrder {
    * @param join_key_attributes The IDs of equijoin attributes in \c
    *        probe_relation.
    * @param any_join_key_attributes_nullable If any attribute is nullable.
-   * @param num_partitions The number of partitions in 'probe_relation'. If no
-   *        partitions, it is one.
+   * @param build_num_partitions The number of partitions in 'build_relation'.
+   *        If no partitions, it is one.
    * @param part_id The partition id of 'probe_relation'.
    * @param lookup_block_id The block id of the probe_relation.
    * @param residual_predicate If non-null, apply as an additional filter to
@@ -710,7 +720,7 @@ class HashAntiJoinWorkOrder : public WorkOrder {
       const CatalogRelationSchema &probe_relation,
       std::vector<attribute_id> &&join_key_attributes,
       const bool any_join_key_attributes_nullable,
-      const std::size_t num_partitions,
+      const std::size_t build_num_partitions,
       const partition_id part_id,
       const block_id lookup_block_id,
       const Predicate *residual_predicate,
@@ -724,7 +734,7 @@ class HashAntiJoinWorkOrder : public WorkOrder {
         probe_relation_(probe_relation),
         join_key_attributes_(std::move(join_key_attributes)),
         any_join_key_attributes_nullable_(any_join_key_attributes_nullable),
-        num_partitions_(num_partitions),
+        build_num_partitions_(build_num_partitions),
         part_id_(part_id),
         block_id_(lookup_block_id),
         residual_predicate_(residual_predicate),
@@ -745,12 +755,12 @@ class HashAntiJoinWorkOrder : public WorkOrder {
   }
 
   /**
-   * @brief Get the number of partitions.
+   * @brief Get the number of partitions in 'build_relation'.
    *
-   * @return The number of partitions.
+   * @return The number of partitions in 'build_relation'.
    */
-  std::size_t num_partitions() const {
-    return num_partitions_;
+  std::size_t build_num_partitions() const {
+    return build_num_partitions_;
   }
 
   /**
@@ -771,7 +781,7 @@ class HashAntiJoinWorkOrder : public WorkOrder {
   const CatalogRelationSchema &probe_relation_;
   const std::vector<attribute_id> join_key_attributes_;
   const bool any_join_key_attributes_nullable_;
-  const std::size_t num_partitions_;
+  const std::size_t build_num_partitions_;
   const partition_id part_id_;
   const block_id block_id_;
   const Predicate *residual_predicate_;
@@ -802,8 +812,8 @@ class HashOuterJoinWorkOrder : public WorkOrder {
    * @param join_key_attributes The IDs of equijoin attributes in \c
    *        probe_relation.
    * @param any_join_key_attributes_nullable If any attribute is nullable.
-   * @param num_partitions The number of partitions in 'probe_relation'. If no
-   *        partitions, it is one.
+   * @param build_num_partitions The number of partitions in 'build_relation'.
+   *        If no partitions, it is one.
    * @param part_id The partition id of 'probe_relation'.
    * @param lookup_block_id The block id of the probe_relation.
    * @param selection A list of Scalars corresponding to the relation attributes
@@ -823,7 +833,7 @@ class HashOuterJoinWorkOrder : public WorkOrder {
       const CatalogRelationSchema &probe_relation,
       const std::vector<attribute_id> &join_key_attributes,
       const bool any_join_key_attributes_nullable,
-      const std::size_t num_partitions,
+      const std::size_t build_num_partitions,
       const partition_id part_id,
       const block_id lookup_block_id,
       const std::vector<std::unique_ptr<const Scalar>> &selection,
@@ -837,7 +847,7 @@ class HashOuterJoinWorkOrder : public WorkOrder {
         probe_relation_(probe_relation),
         join_key_attributes_(join_key_attributes),
         any_join_key_attributes_nullable_(any_join_key_attributes_nullable),
-        num_partitions_(num_partitions),
+        build_num_partitions_(build_num_partitions),
         part_id_(part_id),
         block_id_(lookup_block_id),
         selection_(selection),
@@ -858,8 +868,8 @@ class HashOuterJoinWorkOrder : public WorkOrder {
    * @param join_key_attributes The IDs of equijoin attributes in \c
    *        probe_relation.
    * @param any_join_key_attributes_nullable If any attribute is nullable.
-   * @param num_partitions The number of partitions in 'probe_relation'. If no
-   *        partitions, it is one.
+   * @param build_num_partitions The number of partitions in 'build_relation'.
+   *        If no partitions, it is one.
    * @param part_id The partition id of 'probe_relation'.
    * @param lookup_block_id The block id of the probe_relation.
    * @param selection A list of Scalars corresponding to the relation attributes
@@ -878,7 +888,7 @@ class HashOuterJoinWorkOrder : public WorkOrder {
       const CatalogRelationSchema &probe_relation,
       std::vector<attribute_id> &&join_key_attributes,
       const bool any_join_key_attributes_nullable,
-      const std::size_t num_partitions,
+      const std::size_t build_num_partitions,
       const partition_id part_id,
       const block_id lookup_block_id,
       const std::vector<std::unique_ptr<const Scalar>> &selection,
@@ -892,7 +902,7 @@ class HashOuterJoinWorkOrder : public WorkOrder {
         probe_relation_(probe_relation),
         join_key_attributes_(std::move(join_key_attributes)),
         any_join_key_attributes_nullable_(any_join_key_attributes_nullable),
-        num_partitions_(num_partitions),
+        build_num_partitions_(build_num_partitions),
         part_id_(part_id),
         block_id_(lookup_block_id),
         selection_(selection),
@@ -907,12 +917,12 @@ class HashOuterJoinWorkOrder : public WorkOrder {
   void execute() override;
 
   /**
-   * @brief Get the number of partitions.
+   * @brief Get the number of partitions in 'build_relation'.
    *
-   * @return The number of partitions.
+   * @return The number of partitions in 'build_relation'.
    */
-  std::size_t num_partitions() const {
-    return num_partitions_;
+  std::size_t build_num_partitions() const {
+    return build_num_partitions_;
   }
 
   /**
@@ -929,7 +939,7 @@ class HashOuterJoinWorkOrder : public WorkOrder {
   const CatalogRelationSchema &probe_relation_;
   const std::vector<attribute_id> join_key_attributes_;
   const bool any_join_key_attributes_nullable_;
-  const std::size_t num_partitions_;
+  const std::size_t build_num_partitions_;
   const partition_id part_id_;
   const block_id block_id_;
   const std::vector<std::unique_ptr<const Scalar>> &selection_;

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/0b73be17/relational_operators/WorkOrder.proto
----------------------------------------------------------------------
diff --git a/relational_operators/WorkOrder.proto b/relational_operators/WorkOrder.proto
index b914fce..b42d377 100644
--- a/relational_operators/WorkOrder.proto
+++ b/relational_operators/WorkOrder.proto
@@ -129,7 +129,7 @@ message HashJoinWorkOrder {
     optional int32 probe_relation_id = 162;
     repeated int32 join_key_attributes = 163;
     optional bool any_join_key_attributes_nullable = 164;
-    optional uint64 num_partitions = 172;
+    optional uint64 build_num_partitions = 172;
     optional int32 insert_destination_index = 165;
     optional uint32 join_hash_table_index = 166;
     optional uint64 partition_id = 173;

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/0b73be17/relational_operators/WorkOrderFactory.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/WorkOrderFactory.cpp b/relational_operators/WorkOrderFactory.cpp
index c09bcbe..d6d0818 100644
--- a/relational_operators/WorkOrderFactory.cpp
+++ b/relational_operators/WorkOrderFactory.cpp
@@ -199,8 +199,8 @@ WorkOrder* WorkOrderFactory::ReconstructFromProto(const serialization::WorkOrder
       const block_id lookup_block_id =
           proto.GetExtension(serialization::HashJoinWorkOrder::block_id);
 
-      const std::size_t num_partitions =
-          proto.GetExtension(serialization::HashJoinWorkOrder::num_partitions);
+      const std::size_t build_num_partitions =
+          proto.GetExtension(serialization::HashJoinWorkOrder::build_num_partitions);
 
       const Predicate *residual_predicate = nullptr;
       if (hash_join_work_order_type != serialization::HashJoinWorkOrder::HASH_OUTER_JOIN)
{
@@ -233,7 +233,7 @@ WorkOrder* WorkOrderFactory::ReconstructFromProto(const serialization::WorkOrder
               probe_relation,
               move(join_key_attributes),
               any_join_key_attributes_nullable,
-              num_partitions,
+              build_num_partitions,
               part_id,
               lookup_block_id,
               residual_predicate,
@@ -251,7 +251,7 @@ WorkOrder* WorkOrderFactory::ReconstructFromProto(const serialization::WorkOrder
               probe_relation,
               move(join_key_attributes),
               any_join_key_attributes_nullable,
-              num_partitions,
+              build_num_partitions,
               part_id,
               lookup_block_id,
               residual_predicate,
@@ -277,7 +277,7 @@ WorkOrder* WorkOrderFactory::ReconstructFromProto(const serialization::WorkOrder
               probe_relation,
               move(join_key_attributes),
               any_join_key_attributes_nullable,
-              num_partitions,
+              build_num_partitions,
               part_id,
               lookup_block_id,
               selection,
@@ -295,7 +295,7 @@ WorkOrder* WorkOrderFactory::ReconstructFromProto(const serialization::WorkOrder
               probe_relation,
               move(join_key_attributes),
               any_join_key_attributes_nullable,
-              num_partitions,
+              build_num_partitions,
               part_id,
               lookup_block_id,
               residual_predicate,
@@ -648,7 +648,7 @@ bool WorkOrderFactory::ProtoIsValid(const serialization::WorkOrder &proto,
              query_context.isValidInsertDestinationId(
                  proto.GetExtension(serialization::HashJoinWorkOrder::insert_destination_index))
&&
              proto.HasExtension(serialization::HashJoinWorkOrder::join_hash_table_index)
&&
-             proto.HasExtension(serialization::HashJoinWorkOrder::num_partitions) &&
+             proto.HasExtension(serialization::HashJoinWorkOrder::build_num_partitions) &&
              proto.HasExtension(serialization::HashJoinWorkOrder::partition_id) &&
              query_context.isValidJoinHashTableId(
                  proto.GetExtension(serialization::HashJoinWorkOrder::join_hash_table_index),

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/0b73be17/types/TypedValue.hpp
----------------------------------------------------------------------
diff --git a/types/TypedValue.hpp b/types/TypedValue.hpp
index d75720a..832690d 100644
--- a/types/TypedValue.hpp
+++ b/types/TypedValue.hpp
@@ -253,6 +253,25 @@ class TypedValue {
   }
 
   /**
+   * @brief Equal operator.
+   **/
+  bool operator==(const TypedValue &rhs) const {
+    if (getTypeID() != rhs.getTypeID()) {
+      return false;
+    }
+
+    if (isNull() != rhs.isNull()) {
+      return false;
+    }
+
+    if (isNull()) {
+      return true;
+    }
+
+    return fastEqualCheck(rhs);
+  }
+
+  /**
    * @brief Create a new literal TypedValue with pre-allocated out-of-line
    *        data.
    * @warning The memory at value_ptr must be allocated with malloc() or



Mime
View raw message