quickstep-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jianq...@apache.org
Subject incubator-quickstep git commit: Exact cardinality experiment
Date Thu, 18 Aug 2016 03:12:05 GMT
Repository: incubator-quickstep
Updated Branches:
  refs/heads/LIP-for-tpch-exact-card-experiment [created] 36932d5ee


Exact cardinality experiment


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/36932d5e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/36932d5e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/36932d5e

Branch: refs/heads/LIP-for-tpch-exact-card-experiment
Commit: 36932d5ee09100026ee1c01fcc6e15ca57a5c45f
Parents: e7524cb
Author: Jianqiao Zhu <jianqiao@cs.wisc.edu>
Authored: Wed Aug 17 16:50:59 2016 -0500
Committer: Jianqiao Zhu <jianqiao@cs.wisc.edu>
Committed: Wed Aug 17 16:50:59 2016 -0500

----------------------------------------------------------------------
 .../aggregation/AggregationConcreteHandle.hpp   |  1 +
 query_optimizer/ExecutionGenerator.cpp          | 26 ++++++++++++++++++--
 query_optimizer/ExecutionGenerator.hpp          |  2 ++
 query_optimizer/ExecutionHeuristics.cpp         | 12 ++++-----
 relational_operators/BuildHashOperator.cpp      |  2 +-
 .../FinalizeAggregationOperator.cpp             |  1 +
 storage/AggregationOperationState.cpp           |  1 +
 7 files changed, 36 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/36932d5e/expressions/aggregation/AggregationConcreteHandle.hpp
----------------------------------------------------------------------
diff --git a/expressions/aggregation/AggregationConcreteHandle.hpp b/expressions/aggregation/AggregationConcreteHandle.hpp
index c5ca061..23d22d9 100644
--- a/expressions/aggregation/AggregationConcreteHandle.hpp
+++ b/expressions/aggregation/AggregationConcreteHandle.hpp
@@ -420,6 +420,7 @@ ColumnVector* AggregationConcreteHandle::finalizeHashTableHelper(
     std::vector<std::vector<TypedValue>> *group_by_keys) const {
   const HandleT &handle = static_cast<const HandleT&>(*this);
   const HashTableT &hash_table_concrete = static_cast<const HashTableT&>(hash_table);
+//  std::cerr << " # " << hash_table_concrete.numEntries() << "\n";
 
   if (group_by_keys->empty()) {
     if (NativeColumnVector::UsableForType(result_type)) {

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/36932d5e/query_optimizer/ExecutionGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.cpp b/query_optimizer/ExecutionGenerator.cpp
index f8559ec..27e31d5 100644
--- a/query_optimizer/ExecutionGenerator.cpp
+++ b/query_optimizer/ExecutionGenerator.cpp
@@ -22,6 +22,7 @@
 #include <algorithm>
 #include <cstddef>
 #include <memory>
+#include <sstream>
 #include <string>
 #include <type_traits>
 #include <unordered_map>
@@ -156,6 +157,9 @@ DEFINE_bool(parallelize_load, true, "Parallelize loading data files.");
 DEFINE_bool(optimize_joins, false,
             "Enable post execution plan generation optimizations for joins.");
 
+DEFINE_string(preset_hash_table_size, "",
+              "Prior knowledge of the hash tables' size. For testing only.");
+
 namespace E = ::quickstep::optimizer::expressions;
 namespace P = ::quickstep::optimizer::physical;
 namespace S = ::quickstep::serialization;
@@ -171,6 +175,17 @@ void ExecutionGenerator::generatePlan(const P::PhysicalPtr &physical_plan)
{
   star_schema_cost_model_.reset(
       new cost::StarSchemaSimpleCostModel(top_level_physical_plan_->shared_subplans()));
 
+  if (!FLAGS_preset_hash_table_size.empty()) {
+    std::string preset_str = FLAGS_preset_hash_table_size;
+    std::replace(preset_str.begin(), preset_str.end(), ',', ' ');
+    std::istringstream cards(preset_str);
+    int op_index;
+    std::size_t hash_table_size;
+    while (cards >> op_index >> hash_table_size) {
+      preset_hash_table_size_.emplace(op_index, hash_table_size);
+    }
+  }
+
   const CatalogRelation *result_relation = nullptr;
 
   try {
@@ -739,8 +754,6 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan)
{
         build_relation->getAttributeById(build_attribute)->getType().getProto());
   }
 
-  hash_table_proto->set_estimated_num_entries(build_cardinality);
-
   // Create three operators.
   const QueryPlan::DAGNodeIndex build_operator_index =
       execution_plan_->addRelationalOperator(
@@ -752,6 +765,15 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan)
{
               any_build_attributes_nullable,
               join_hash_table_index));
 
+  if (preset_hash_table_size_.empty()) {
+    hash_table_proto->set_estimated_num_entries(build_cardinality);
+  } else {
+    std::cerr << "Set " << build_operator_index
+              << " with " << preset_hash_table_size_.at(build_operator_index)
<< "\n";
+    hash_table_proto->set_estimated_num_entries(
+      static_cast<std::size_t>(preset_hash_table_size_.at(build_operator_index) * 1.2
+ 8));
+  }
+
   // Create InsertDestination proto.
   const CatalogRelation *output_relation = nullptr;
   const QueryContext::insert_destination_id insert_destination_index =

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/36932d5e/query_optimizer/ExecutionGenerator.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.hpp b/query_optimizer/ExecutionGenerator.hpp
index 8358233..a23bbf1 100644
--- a/query_optimizer/ExecutionGenerator.hpp
+++ b/query_optimizer/ExecutionGenerator.hpp
@@ -426,6 +426,8 @@ class ExecutionGenerator {
 
   physical::TopLevelPlanPtr top_level_physical_plan_;
 
+  std::unordered_map<int, std::size_t> preset_hash_table_size_;
+
   DISALLOW_COPY_AND_ASSIGN(ExecutionGenerator);
 };
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/36932d5e/query_optimizer/ExecutionHeuristics.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionHeuristics.cpp b/query_optimizer/ExecutionHeuristics.cpp
index 81e7362..b674c29 100644
--- a/query_optimizer/ExecutionHeuristics.cpp
+++ b/query_optimizer/ExecutionHeuristics.cpp
@@ -84,8 +84,8 @@ void ExecutionHeuristics::optimizeExecutionPlan(QueryPlan *query_plan,
       auto *probe_side_bloom_filter = hash_table_proto->add_probe_side_bloom_filters();
       const auto &probe_side_bf =
           bloom_filter_config.probe_side_bloom_filters[i];
-      std::cerr << "HashJoin probe " << probe_side_bf.attribute->toString()
-                << " @" << probe_side_bf.builder << "\n";
+//      std::cerr << "HashJoin probe " << probe_side_bf.attribute->toString()
+//                << " @" << probe_side_bf.builder << "\n";
 
       const auto &build_side_info =
            bloom_filter_map.at(
@@ -110,8 +110,8 @@ void ExecutionHeuristics::optimizeExecutionPlan(QueryPlan *query_plan,
       auto *bloom_filter = aggregate_proto->add_bloom_filters();
       const auto &bf =
           bloom_filter_config.probe_side_bloom_filters[i];
-      std::cerr << "Aggregate probe " << bf.attribute->toString()
-                << " @" << bf.builder << "\n";
+//      std::cerr << "Aggregate probe " << bf.attribute->toString()
+//                << " @" << bf.builder << "\n";
 
       const auto &build_side_info =
            bloom_filter_map.at(
@@ -134,8 +134,8 @@ void ExecutionHeuristics::optimizeExecutionPlan(QueryPlan *query_plan,
     for (std::size_t i = 0; i < info.bloom_filter_ids_.size(); ++i) {
       const auto &bf =
           bloom_filter_config.probe_side_bloom_filters[i];
-      std::cerr << "Select probe " << bf.attribute->toString()
-                << " @" << bf.builder << "\n";
+//      std::cerr << "Select probe " << bf.attribute->toString()
+//                << " @" << bf.builder << "\n";
 
       const auto &build_side_info =
            bloom_filter_map.at(

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/36932d5e/relational_operators/BuildHashOperator.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/BuildHashOperator.cpp b/relational_operators/BuildHashOperator.cpp
index d9d2db2..f2daf05 100644
--- a/relational_operators/BuildHashOperator.cpp
+++ b/relational_operators/BuildHashOperator.cpp
@@ -144,7 +144,7 @@ serialization::WorkOrder* BuildHashOperator::createWorkOrderProto(const
block_id
 }
 
 void BuildHashOperator::actionOnCompletion() {
-//  hash_table_->finalizeBuildSideThreadLocalBloomFilters();
+//  std::cerr << getOperatorIndex() << " : " << hash_table_->numEntries()
<< "\n";
 }
 
 void BuildHashWorkOrder::execute() {

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/36932d5e/relational_operators/FinalizeAggregationOperator.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/FinalizeAggregationOperator.cpp b/relational_operators/FinalizeAggregationOperator.cpp
index 65e62c4..74d1bc3 100644
--- a/relational_operators/FinalizeAggregationOperator.cpp
+++ b/relational_operators/FinalizeAggregationOperator.cpp
@@ -41,6 +41,7 @@ bool FinalizeAggregationOperator::getAllWorkOrders(
 
   if (blocking_dependencies_met_ && !started_) {
     started_ = true;
+//    std::cerr << getOperatorIndex();
     container->addNormalWorkOrder(
         new FinalizeAggregationWorkOrder(
             query_id_,

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/36932d5e/storage/AggregationOperationState.cpp
----------------------------------------------------------------------
diff --git a/storage/AggregationOperationState.cpp b/storage/AggregationOperationState.cpp
index d85b5c4..83815c8 100644
--- a/storage/AggregationOperationState.cpp
+++ b/storage/AggregationOperationState.cpp
@@ -530,6 +530,7 @@ void AggregationOperationState::finalizeSingleState(InsertDestination
*output_de
 
     attribute_values.emplace_back(handles_[agg_idx]->finalize(*single_states_[agg_idx]));
   }
+//  std::cerr << " : 1\n";
 
   output_destination->insertTuple(Tuple(std::move(attribute_values)));
 }


Mime
View raw message