quickstep-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jianq...@apache.org
Subject [08/22] incubator-quickstep git commit: Initial commit
Date Mon, 22 Aug 2016 18:56:17 GMT
Initial commit


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/b430e77a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/b430e77a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/b430e77a

Branch: refs/heads/LIP-for-tpch-merged
Commit: b430e77adace7a04870d3e6c9c805c43c4a8b5d7
Parents: cdc1e05
Author: Hakan Memisoglu <hakanmemisoglu@apache.org>
Authored: Sun Aug 21 12:16:21 2016 -0500
Committer: Hakan Memisoglu <hakanmemisoglu@apache.org>
Committed: Sun Aug 21 12:16:21 2016 -0500

----------------------------------------------------------------------
 relational_operators/HashJoinOperator.cpp       | 20 +++++----
 types/containers/ColumnVector.hpp               | 43 +++++++++++++++++++-
 types/containers/ColumnVectorsValueAccessor.hpp | 14 +++++++
 3 files changed, 68 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/b430e77a/relational_operators/HashJoinOperator.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/HashJoinOperator.cpp b/relational_operators/HashJoinOperator.cpp
index 779c0fe..14fc0f6 100644
--- a/relational_operators/HashJoinOperator.cpp
+++ b/relational_operators/HashJoinOperator.cpp
@@ -441,6 +441,7 @@ void HashInnerJoinWorkOrder::execute() {
   const relation_id build_relation_id = build_relation_.getID();
   const relation_id probe_relation_id = probe_relation_.getID();
 
+  ColumnVectorsValueAccessor temp_result;
   for (std::pair<const block_id, std::vector<std::pair<tuple_id, tuple_id>>>
            &build_block_entry : *collector.getJoinedTuples()) {
     BlockReference build_block =
@@ -492,23 +493,26 @@ void HashInnerJoinWorkOrder::execute() {
     // benefit (probably only a real performance win when there are very few
     // matching tuples in each individual inner block but very many inner
     // blocks with at least one match).
-    ColumnVectorsValueAccessor temp_result;
+    //ColumnVectorsValueAccessor temp_result;
+    std::size_t i = 0;
     for (vector<unique_ptr<const Scalar>>::const_iterator selection_cit = selection_.begin();
          selection_cit != selection_.end();
-         ++selection_cit) {
-      temp_result.addColumn((*selection_cit)->getAllValuesForJoin(build_relation_id,
-                                                                  build_accessor.get(),
-                                                                  probe_relation_id,
-                                                                  probe_accessor.get(),
-                                                                  build_block_entry.second));
+         ++selection_cit, ++i) {
+      temp_result.appendColumn((*selection_cit)->getAllValuesForJoin(build_relation_id,
+                                                                     build_accessor.get(),
+                                                                     probe_relation_id,
+                                                                     probe_accessor.get(),
+                                                                     build_block_entry.second),
+                               i);
     }
 
     // NOTE(chasseur): calling the bulk-insert method of InsertDestination once
     // for each pair of joined blocks incurs some extra overhead that could be
     // avoided by keeping checked-out MutableBlockReferences across iterations
     // of this loop, but that would get messy when combined with partitioning.
-    output_destination_->bulkInsertTuples(&temp_result);
+    //        output_destination_->bulkInsertTuples(&temp_result);
   }
+  output_destination_->bulkInsertTuples(&temp_result);
 }
 
 void HashSemiJoinWorkOrder::execute() {

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/b430e77a/types/containers/ColumnVector.hpp
----------------------------------------------------------------------
diff --git a/types/containers/ColumnVector.hpp b/types/containers/ColumnVector.hpp
index fc65656..3953a7f 100644
--- a/types/containers/ColumnVector.hpp
+++ b/types/containers/ColumnVector.hpp
@@ -107,6 +107,8 @@ class ColumnVector {
    **/
   virtual bool isNative() const = 0;
 
+  virtual bool append(ColumnVector *column_vector) = 0;
+
  protected:
   const Type &type_;
 
@@ -383,10 +385,45 @@ class NativeColumnVector : public ColumnVector {
     }
   }
 
+  bool append(ColumnVector *column_vector) override {
+    // Other ColumnVector also has to be native.
+    if (!column_vector->isNative()) {
+      return false;
+    }
+    NativeColumnVector *casted_column_vector = static_cast<NativeColumnVector*>(column_vector);
+    // Both ColumnVectors has to have same type to be appended.
+    if (!type_.equals(casted_column_vector->type_)
+            || type_length_ != casted_column_vector->type_length_) {
+      return false;
+    }
+    // Let's be generous about new reserved space.
+    std::size_t new_actual_length = actual_length_ + casted_column_vector->actual_length_;
+    std::size_t new_reserved_lenth = 0;
+    if (new_actual_length > reserved_length_) {
+      new_reserved_length_ = 2 * new_actual_length;
+    } else {
+      new_reserved_length_ = reserved_length_;
+    }
+
+    void *new_buffer = std::realloc(values_, new_reserved_length);
+    if (new_buffer == nullptr) {
+      return false;
+    }
+    std::swap(values_, new_buffer);
+    std::memcpy(static_cast<char*>(values_)
+                    + (type_length_ * actual_length_), // First empty position of this' buffer
+                casted_column_vector->values_,         // First postion of other's buffer
+                type_length_ * casted_column_vector->actual_length_);  // Number of bytes
+
+    reserved_length_ = new_reserved_length;
+    actual_length_ = new_actual_length;
+    return true;
+  }
+
  private:
   const std::size_t type_length_;
   void *values_;
-  const std::size_t reserved_length_;
+  std::size_t reserved_length_;
   std::size_t actual_length_;
   std::unique_ptr<BitVector<false>> null_bitmap_;
 
@@ -556,6 +593,10 @@ class IndirectColumnVector : public ColumnVector {
     values_[position] = std::move(value);
   }
 
+  bool append(ColumnVector *column_vector) override {
+    return true;
+  }
+
  private:
   const bool type_is_nullable_;
   const std::size_t reserved_length_;

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/b430e77a/types/containers/ColumnVectorsValueAccessor.hpp
----------------------------------------------------------------------
diff --git a/types/containers/ColumnVectorsValueAccessor.hpp b/types/containers/ColumnVectorsValueAccessor.hpp
index 2300f3b..4f37eb8 100644
--- a/types/containers/ColumnVectorsValueAccessor.hpp
+++ b/types/containers/ColumnVectorsValueAccessor.hpp
@@ -92,6 +92,20 @@ class ColumnVectorsValueAccessor : public ValueAccessor {
           : static_cast<const IndirectColumnVector*>(column)->size();
   }
 
+  void appendColumn(ColumnVector *column, const std::size_t index, const bool owns = true)
{
+    if (index >= columns_.size()) {
+      addColumn(column, owns);
+    } else {
+      ColumnVector *old_column = columns_[index];
+      old_column->append(column);
+      const int appended_column_length
+          = column->isNative()
+              ? static_cast<const NativeColumnVector*>(column)->size()
+              : static_cast<const IndirectColumnVector*>(column)->size();
+      column_length_ += appended_column_length;
+    }
+  }
+
   inline void beginIteration() {
     current_position_ = std::numeric_limits<std::size_t>::max();
   }


Mime
View raw message