quickstep-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jianq...@apache.org
Subject incubator-quickstep git commit: updates
Date Thu, 07 Jul 2016 22:07:33 GMT
Repository: incubator-quickstep
Updated Branches:
  refs/heads/adaptive-bloom-filters 32608da27 -> 9cbd3e847


updates


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/9cbd3e84
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/9cbd3e84
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/9cbd3e84

Branch: refs/heads/adaptive-bloom-filters
Commit: 9cbd3e84786c6dc4280ddb048d8d3f8feef646a6
Parents: 32608da
Author: Jianqiao Zhu <jianqiao@cs.wisc.edu>
Authored: Thu Jul 7 17:07:18 2016 -0500
Committer: Jianqiao Zhu <jianqiao@cs.wisc.edu>
Committed: Thu Jul 7 17:07:18 2016 -0500

----------------------------------------------------------------------
 query_optimizer/CMakeLists.txt               |   1 +
 query_optimizer/PhysicalGenerator.cpp        |   3 +
 query_optimizer/rules/AttachBloomFilters.cpp | 118 ++++++++++++++++++++++
 query_optimizer/rules/AttachBloomFilters.hpp | 100 ++++++++++++++++++
 query_optimizer/rules/CMakeLists.txt         |  17 ++++
 storage/HashTable.hpp                        |  28 ++---
 utility/CMakeLists.txt                       |   1 +
 7 files changed, 249 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9cbd3e84/query_optimizer/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/CMakeLists.txt b/query_optimizer/CMakeLists.txt
index 8912414..ff48a54 100644
--- a/query_optimizer/CMakeLists.txt
+++ b/query_optimizer/CMakeLists.txt
@@ -193,6 +193,7 @@ target_link_libraries(quickstep_queryoptimizer_PhysicalGenerator
                       quickstep_queryoptimizer_LogicalToPhysicalMapper
                       quickstep_queryoptimizer_logical_Logical
                       quickstep_queryoptimizer_physical_Physical
+                      quickstep_queryoptimizer_rules_AttachBloomFilters
                       quickstep_queryoptimizer_rules_PruneColumns
                       quickstep_queryoptimizer_rules_StarSchemaHashJoinOrderOptimization
                       quickstep_queryoptimizer_strategy_Aggregate

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9cbd3e84/query_optimizer/PhysicalGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/PhysicalGenerator.cpp b/query_optimizer/PhysicalGenerator.cpp
index 5f97d6f..50e1d86 100644
--- a/query_optimizer/PhysicalGenerator.cpp
+++ b/query_optimizer/PhysicalGenerator.cpp
@@ -26,6 +26,7 @@
 #include "query_optimizer/Validator.hpp"
 #include "query_optimizer/logical/Logical.hpp"
 #include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/rules/AttachBloomFilters.hpp"
 #include "query_optimizer/rules/PruneColumns.hpp"
 #include "query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp"
 #include "query_optimizer/strategy/Aggregate.hpp"
@@ -99,6 +100,7 @@ P::PhysicalPtr PhysicalGenerator::optimizePlan() {
     rules.emplace_back(new StarSchemaHashJoinOrderOptimization());
   }
   rules.emplace_back(new PruneColumns());
+  rules.emplace_back(new AttachBloomFilters());
 
   for (std::unique_ptr<Rule<P::Physical>> &rule : rules) {
     physical_plan_ = rule->apply(physical_plan_);
@@ -112,6 +114,7 @@ P::PhysicalPtr PhysicalGenerator::optimizePlan() {
     quickstep::PlanVisualizer plan_visualizer;
     std::cerr << "\n" << plan_visualizer.visualize(physical_plan_) << "\n";
   }
+  exit(0);
 
 #ifdef QUICKSTEP_DEBUG
   Validate(physical_plan_);

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9cbd3e84/query_optimizer/rules/AttachBloomFilters.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/AttachBloomFilters.cpp b/query_optimizer/rules/AttachBloomFilters.cpp
new file mode 100644
index 0000000..34822e5
--- /dev/null
+++ b/query_optimizer/rules/AttachBloomFilters.cpp
@@ -0,0 +1,118 @@
+/**
+ *   Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+ *     University of Wisconsin—Madison.
+ *
+ *   Licensed under the Apache License, Version 2.0 (the "License");
+ *   you may not use this file except in compliance with the License.
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software
+ *   distributed under the License is distributed on an "AS IS" BASIS,
+ *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *   See the License for the specific language governing permissions and
+ *   limitations under the License.
+ **/
+
+#include "query_optimizer/rules/AttachBloomFilters.hpp"
+
+#include <memory>
+#include <set>
+#include <unordered_set>
+#include <unordered_map>
+#include <vector>
+
+#include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
+#include "query_optimizer/expressions/AttributeReference.hpp"
+#include "query_optimizer/expressions/NamedExpression.hpp"
+#include "query_optimizer/expressions/PatternMatcher.hpp"
+#include "query_optimizer/physical/HashJoin.hpp"
+#include "query_optimizer/physical/PatternMatcher.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/physical/PhysicalType.hpp"
+#include "query_optimizer/physical/TopLevelPlan.hpp"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+namespace optimizer {
+
+namespace E = ::quickstep::optimizer::expressions;
+namespace P = ::quickstep::optimizer::physical;
+
+P::PhysicalPtr AttachBloomFilters::apply(const P::PhysicalPtr &input) {
+  DCHECK(input->getPhysicalType() == P::PhysicalType::kTopLevelPlan);
+  cost_model_.reset(
+      new cost::StarSchemaSimpleCostModel(
+          std::static_pointer_cast<const P::TopLevelPlan>(input)->shared_subplans()));
+
+  visitProducer(input);
+  visitConsumer(input);
+
+  for (const auto &info_vec_pair : producers_) {
+    std::cerr << "--------\n"
+              << "Node " << info_vec_pair.first->getName()
+              << info_vec_pair.first << "\n";
+    for (const auto &info : info_vec_pair.second) {
+      std::cerr << info.attribute->attribute_alias() << "@"
+                << info.source
+                << ": " << info.selectivity << "\n";
+    }
+    std::cerr << "********\n";
+  }
+
+  return input;
+}
+
+void AttachBloomFilters::visitProducer(const P::PhysicalPtr &node) {
+  std::vector<const BloomFilterInfo> bloom_filters;
+
+  // First check inherited bloom filters
+  std::vector<const BloomFilterInfo*> candidates;
+  switch (node->getPhysicalType()) {
+    case P::PhysicalType::kAggregate:
+    case P::PhysicalType::kSelection:
+    case P::PhysicalType::kHashJoin: {
+      for (const P::PhysicalPtr &child : node->children()) {
+        const auto bloom_filters_it = producers_.find(child);
+        if (bloom_filters_it != producers_.end()) {
+          for (const BloomFilterInfo info : bloom_filters_it->second) {
+            candidates.emplace_back(&info);
+          }
+        }
+      }
+    }
+    default:
+      break;
+  }
+
+  const std::vector<E::AttributeReferencePtr> output_attributes(
+      node->getOutputAttributes());
+
+  std::unordered_set<E::ExprId> output_attribute_ids;
+  for (const auto &attr : output_attributes) {
+    output_attribute_ids.emplace(attr->id());
+  }
+  for (const BloomFilterInfo *info : candidates) {
+    if (output_attribute_ids.find(info->attribute->id()) != output_attribute_ids.end())
{
+      bloom_filters.emplace_back(*info);
+    }
+  }
+
+  // Self-produced bloom filters
+  double selectivity = cost_model_->estimateSelectivity(node);
+  if (selectivity < 1.0) {
+    for (const auto &attr : output_attributes) {
+      bloom_filters.emplace_back(node, attr, selectivity, false);
+    }
+  }
+
+  producers_.emplace(node, std::move(bloom_filters));
+}
+
+void AttachBloomFilters::visitConsumer(const P::PhysicalPtr &node) {
+}
+
+}  // namespace optimizer
+}  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9cbd3e84/query_optimizer/rules/AttachBloomFilters.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/AttachBloomFilters.hpp b/query_optimizer/rules/AttachBloomFilters.hpp
new file mode 100644
index 0000000..dfcc7f5
--- /dev/null
+++ b/query_optimizer/rules/AttachBloomFilters.hpp
@@ -0,0 +1,100 @@
+/**
+ *   Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+ *     University of Wisconsin—Madison.
+ *
+ *   Licensed under the Apache License, Version 2.0 (the "License");
+ *   you may not use this file except in compliance with the License.
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software
+ *   distributed under the License is distributed on an "AS IS" BASIS,
+ *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *   See the License for the specific language governing permissions and
+ *   limitations under the License.
+ **/
+
+#ifndef QUICKSTEP_QUERY_OPTIMIZER_RULES_ATTACH_BLOOM_FILTERS_HPP_
+#define QUICKSTEP_QUERY_OPTIMIZER_RULES_ATTACH_BLOOM_FILTERS_HPP_
+
+#include <algorithm>
+#include <cstddef>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
+#include "query_optimizer/expressions/ExprId.hpp"
+#include "query_optimizer/expressions/NamedExpression.hpp"
+#include "query_optimizer/expressions/Predicate.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/rules/Rule.hpp"
+#include "utility/Macros.hpp"
+
+namespace quickstep {
+namespace optimizer {
+
+/** \addtogroup OptimizerRules
+ *  @{
+ */
+
+/**
+ * @brief TODO
+ */
+class AttachBloomFilters : public Rule<physical::Physical> {
+ public:
+  AttachBloomFilters() {}
+
+  ~AttachBloomFilters() override {}
+
+  std::string getName() const override {
+    return "AttachBloomFilters";
+  }
+
+  physical::PhysicalPtr apply(const physical::PhysicalPtr &input) override;
+
+ private:
+  struct BloomFilterInfo {
+    BloomFilterInfo(const physical::PhysicalPtr &source_in,
+                    const expressions::AttributeReferencePtr &attribute_in,
+                    const double selectivity_in,
+                    const bool from_sibling_in)
+        : source(source_in),
+          attribute(attribute_in),
+          selectivity(selectivity_in),
+          from_sibling(from_sibling_in) {
+    }
+    BloomFilterInfo(const BloomFilterInfo &info)
+        : source(info.source),
+          attribute(info.attribute),
+          selectivity(info.selectivity),
+          from_sibling(info.from_sibling) {
+    }
+    physical::PhysicalPtr source;
+    expressions::AttributeReferencePtr attribute;
+    double selectivity;
+    bool from_sibling;
+  };
+
+  void visitProducer(const physical::PhysicalPtr &node);
+
+  void visitConsumer(const physical::PhysicalPtr &node);
+
+  std::unique_ptr<cost::StarSchemaSimpleCostModel> cost_model_;
+
+  std::map<physical::PhysicalPtr, std::vector<const BloomFilterInfo>> producers_;
+  std::map<physical::PhysicalPtr, std::vector<const BloomFilterInfo>> consumers_;
+
+  DISALLOW_COPY_AND_ASSIGN(AttachBloomFilters);
+};
+
+/** @} */
+
+}  // namespace optimizer
+}  // namespace quickstep
+
+#endif /* QUICKSTEP_QUERY_OPTIMIZER_RULES_ATTACH_BLOOM_FILTERS_HPP_ */

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9cbd3e84/query_optimizer/rules/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/CMakeLists.txt b/query_optimizer/rules/CMakeLists.txt
index 1990174..a943ef7 100644
--- a/query_optimizer/rules/CMakeLists.txt
+++ b/query_optimizer/rules/CMakeLists.txt
@@ -18,6 +18,7 @@
 add_subdirectory(tests)
 
 # Declare micro-libs:
+add_library(quickstep_queryoptimizer_rules_AttachBloomFilters AttachBloomFilters.cpp AttachBloomFilters.hpp)
 add_library(quickstep_queryoptimizer_rules_BottomUpRule ../../empty_src.cpp BottomUpRule.hpp)
 add_library(quickstep_queryoptimizer_rules_CollapseProject CollapseProject.cpp CollapseProject.hpp)
 add_library(quickstep_queryoptimizer_rules_GenerateJoins GenerateJoins.cpp GenerateJoins.hpp)
@@ -35,6 +36,20 @@ add_library(quickstep_queryoptimizer_rules_UnnestSubqueries UnnestSubqueries.cpp
 
 
 # Link dependencies:
+target_link_libraries(quickstep_queryoptimizer_rules_AttachBloomFilters
+                      quickstep_queryoptimizer_costmodel_StarSchemaSimpleCostModel
+                      quickstep_queryoptimizer_expressions_AttributeReference
+                      quickstep_queryoptimizer_expressions_ExprId
+                      quickstep_queryoptimizer_expressions_NamedExpression
+                      quickstep_queryoptimizer_expressions_PatternMatcher
+                      quickstep_queryoptimizer_expressions_Predicate
+                      quickstep_queryoptimizer_physical_HashJoin
+                      quickstep_queryoptimizer_physical_PatternMatcher
+                      quickstep_queryoptimizer_physical_Physical
+                      quickstep_queryoptimizer_physical_PhysicalType
+                      quickstep_queryoptimizer_physical_TopLevelPlan
+                      quickstep_queryoptimizer_rules_Rule
+                      quickstep_utility_Macros)
 target_link_libraries(quickstep_queryoptimizer_rules_BottomUpRule
                       glog
                       quickstep_queryoptimizer_rules_Rule
@@ -126,6 +141,7 @@ target_link_libraries(quickstep_queryoptimizer_rules_StarSchemaHashJoinOrderOpti
                       quickstep_queryoptimizer_physical_PhysicalType
                       quickstep_queryoptimizer_physical_TopLevelPlan
                       quickstep_queryoptimizer_rules_Rule
+                      quickstep_utility_DisjointTreeForest
                       quickstep_utility_Macros)
 target_link_libraries(quickstep_queryoptimizer_rules_TopDownRule
                       quickstep_queryoptimizer_rules_Rule
@@ -176,6 +192,7 @@ target_link_libraries(quickstep_queryoptimizer_rules_UpdateExpression
 # Module all-in-one library:
 add_library(quickstep_queryoptimizer_rules ../../empty_src.cpp OptimizerRulesModule.hpp)
 target_link_libraries(quickstep_queryoptimizer_rules
+                      quickstep_queryoptimizer_rules_AttachBloomFilters
                       quickstep_queryoptimizer_rules_BottomUpRule
                       quickstep_queryoptimizer_rules_CollapseProject
                       quickstep_queryoptimizer_rules_GenerateJoins

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9cbd3e84/storage/HashTable.hpp
----------------------------------------------------------------------
diff --git a/storage/HashTable.hpp b/storage/HashTable.hpp
index be31fd9..740e678 100644
--- a/storage/HashTable.hpp
+++ b/storage/HashTable.hpp
@@ -23,6 +23,7 @@
 #include <atomic>
 #include <cstddef>
 #include <cstdlib>
+#include <memory>
 #include <type_traits>
 #include <vector>
 
@@ -39,6 +40,7 @@
 #include "types/Type.hpp"
 #include "types/TypedValue.hpp"
 #include "utility/BloomFilter.hpp"
+#include "utility/BloomFilterAdapter.hpp"
 #include "utility/HashPair.hpp"
 #include "utility/Macros.hpp"
 
@@ -2246,26 +2248,14 @@ void HashTable<ValueT, resizable, serializable, force_key_copy,
allow_duplicate_
   InvokeOnAnyValueAccessor(
       accessor,
       [&](auto *accessor) -> void {  // NOLINT(build/c++11)
+    std::unique_ptr<BloomFilterAdapter> bloom_filter_adapter;
+    if (has_probe_side_bloom_filter_) {
+      bloom_filter_adapter.reset(
+          new BloomFilterAdapter(probe_bloom_filters_, probe_attribute_ids_));
+    }
     while (accessor->next()) {
-      // Probe any bloom filters, if enabled.
-      if (has_probe_side_bloom_filter_) {
-        DCHECK_EQ(probe_bloom_filters_.size(), probe_attribute_ids_.size());
-        // Check if the key is contained in the BloomFilters or not.
-        bool bloom_miss = false;
-        for (std::size_t i = 0; i < probe_bloom_filters_.size() && !bloom_miss;
++i) {
-          const BloomFilter *bloom_filter = probe_bloom_filters_[i];
-          for (const attribute_id &attr_id : probe_attribute_ids_[i]) {
-            TypedValue bloom_key = accessor->getTypedValue(attr_id);
-            if (!bloom_filter->contains(static_cast<const std::uint8_t*>(bloom_key.getDataPtr()),
-                                        bloom_key.getDataSize())) {
-              bloom_miss = true;
-              break;
-            }
-          }
-        }
-        if (bloom_miss) {
-          continue;  // On a bloom filter miss, probing the hash table can be skipped.
-        }
+      if (has_probe_side_bloom_filter_ && bloom_filter_adapter->miss(accessor))
{
+        continue;  // On a bloom filter miss, probing the hash table can be skipped.
       }
 
       TypedValue key = accessor->getTypedValue(key_attr_id);

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9cbd3e84/utility/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/utility/CMakeLists.txt b/utility/CMakeLists.txt
index 8582981..5883470 100644
--- a/utility/CMakeLists.txt
+++ b/utility/CMakeLists.txt
@@ -320,6 +320,7 @@ target_link_libraries(quickstep_utility
                       quickstep_utility_CheckSnprintf
                       quickstep_utility_DAG
                       quickstep_utility_DAGVisualizer
+                      quickstep_utility_DisjointTreeForest
                       quickstep_utility_EventProfiler
                       quickstep_utility_EqualsAnyConstant
                       quickstep_utility_Glob


Mime
View raw message