quickstep-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hbdeshm...@apache.org
Subject [3/6] incubator-quickstep git commit: Provide method to get estimated size of hash table
Date Tue, 01 Nov 2016 21:57:00 GMT
Provide method to get estimated size of hash table

- Static API that doesn't need instantiation of the HashTable object.
- Supports LinearOpenAddressing, SeparateChaining and
  SimpleScalarSeparateChaining hash table variants.


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/f54d1a2f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/f54d1a2f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/f54d1a2f

Branch: refs/heads/delay-hashtable-memory-alloc
Commit: f54d1a2f126c49a05584ccb616b9c770f019c357
Parents: 1d486ae
Author: Harshad Deshmukh <hbdeshmukh@apache.org>
Authored: Tue Nov 1 14:42:11 2016 -0500
Committer: Harshad Deshmukh <hbdeshmukh@apache.org>
Committed: Tue Nov 1 16:56:38 2016 -0500

----------------------------------------------------------------------
 storage/HashTableFactory.hpp                    | 69 +++++++++++++++++
 storage/HashTableKeyManager.hpp                 | 81 ++++++++++++++++++--
 storage/LinearOpenAddressingHashTable.hpp       | 16 ++--
 storage/SeparateChainingHashTable.hpp           | 19 +++--
 .../SimpleScalarSeparateChainingHashTable.hpp   |  4 +
 5 files changed, 171 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/f54d1a2f/storage/HashTableFactory.hpp
----------------------------------------------------------------------
diff --git a/storage/HashTableFactory.hpp b/storage/HashTableFactory.hpp
index d690557..58c4d82 100644
--- a/storage/HashTableFactory.hpp
+++ b/storage/HashTableFactory.hpp
@@ -139,6 +139,75 @@ template <typename ValueT,
 class HashTableFactory {
  public:
   /**
+   * @brief Find the estimated memory in bytes for the given hash table.
+   *
+   * @param proto A protobuf description of a HashTable.
+   *
+   * @return The estimated memory in bytes for the hash table.
+   **/
+  static std::size_t GetHashTableEstimatedMemorySize(
+      const serialization::HashTable &proto) {
+    auto hash_table_type =
+        HashTableImplTypeFromProto(proto.hash_table_impl_type());
+    switch (hash_table_type) {
+      case HashTableImplType::kLinearOpenAddressing: {
+        std::vector<const Type *> key_types;
+        for (int i = 0; i < proto.key_types_size(); ++i) {
+          key_types.emplace_back(
+              &TypeFactory::ReconstructFromProto(proto.key_types(i)));
+        } const std::size_t key_start_in_bucket_offset =
+            HashTableKeyManager<serializable,
+                                force_key_copy>::GetKeyStartInBucketOffset();
+        std::vector<std::size_t> key_offsets;
+        const std::size_t fixed_key_size =
+            HashTableKeyManager<serializable, force_key_copy>::
+                CalculateFixedKeySize(
+                    key_types, key_start_in_bucket_offset, &key_offsets);
+        return proto.estimated_num_entries() *
+               LinearOpenAddressingHashTable<
+                   ValueT,
+                   resizable,
+                   serializable,
+                   force_key_copy,
+                   allow_duplicate_keys>::ComputeBucketSize(fixed_key_size);
+      }
+      case HashTableImplType::kSeparateChaining: {
+        std::vector<const Type*> key_types;
+        for (int i = 0; i < proto.key_types_size(); ++i) {
+          key_types.emplace_back(
+              &TypeFactory::ReconstructFromProto(proto.key_types(i)));
+        }
+        const std::size_t key_start_in_bucket_offset =
+            HashTableKeyManager<serializable,
+                                force_key_copy>::GetKeyStartInBucketOffset();
+        std::vector<std::size_t> key_offsets;
+        const std::size_t fixed_key_size =
+            HashTableKeyManager<serializable, force_key_copy>::
+                CalculateFixedKeySize(
+                    key_types, key_start_in_bucket_offset, &key_offsets);
+        return proto.estimated_num_entries() *
+               SeparateChainingHashTable<
+                   ValueT,
+                   resizable,
+                   serializable,
+                   force_key_copy,
+                   allow_duplicate_keys>::ComputeBucketSize(fixed_key_size);
+      }
+      case HashTableImplType::kSimpleScalarSeparateChaining:
+        return proto.estimated_num_entries() *
+               SimpleScalarSeparateChainingHashTable<
+                   ValueT,
+                   resizable,
+                   serializable,
+                   force_key_copy,
+                   allow_duplicate_keys>::GetBucketSize();
+      default: {
+        LOG(FATAL) << "Unrecognized HashTableImplType in HashTableFactory::createResizable()\n";
+      }
+    }
+  }
+
+  /**
    * @brief Create a new resizable HashTable, with the type selected by
    *        hash_table_type. Other parameters are forwarded to the HashTable's
    *        constructor.

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/f54d1a2f/storage/HashTableKeyManager.hpp
----------------------------------------------------------------------
diff --git a/storage/HashTableKeyManager.hpp b/storage/HashTableKeyManager.hpp
index 0295ad4..db68d98 100644
--- a/storage/HashTableKeyManager.hpp
+++ b/storage/HashTableKeyManager.hpp
@@ -62,6 +62,50 @@ template <bool serializable, bool force_key_copy>
 class HashTableKeyManager {
  public:
   /**
+   * @brief Calculate the fixed key size for the hash table.
+   *
+   * @param key_types A vector of pointers to the Types of key components for
+   *        a HashTable.
+   * @param key_start_in_bucket The position in the HashTable's buckets where
+   *        storage for keys begins, measured as the number of bytes from the
+   *        start of the bucket.
+   * @param key_offsets The offsets of the keys with respect to the beginning.
+   *
+   * @return The fixed key size for the HashTable.
+   **/
+  static std::size_t CalculateFixedKeySize(
+      const std::vector<const Type *> &key_types,
+      const std::size_t key_start_in_bucket,
+      std::vector<std::size_t> *key_offsets) {
+    DCHECK(!key_types.empty());
+    DCHECK_NOTNULL(key_offsets);
+    std::size_t fixed_key_size = 0;
+
+    for (const Type *subkey_type : key_types) {
+      key_offsets->push_back(key_start_in_bucket + fixed_key_size);
+
+      if (force_key_copy) {
+        if (subkey_type->isVariableLength()) {
+          fixed_key_size += sizeof(std::size_t) * 2;
+        } else {
+          fixed_key_size += subkey_type->maximumByteLength();
+        }
+      } else {
+        constexpr std::size_t ptr_size = serializable ? sizeof(std::ptrdiff_t)
+                                                      : sizeof(const void*);
+        if ((!subkey_type->isVariableLength())
+            && (subkey_type->maximumByteLength() <= ptr_size + sizeof(std::size_t)))
{
+          fixed_key_size += subkey_type->maximumByteLength();
+        } else {
+          // A pointer to external memory, paired with a length.
+          fixed_key_size += ptr_size + sizeof(std::size_t);
+        }
+      }
+    }
+    return fixed_key_size;
+  }
+
+  /**
    * @brief Constructor.
    *
    * @param key_types A vector of pointers to the Types of key components for
@@ -308,17 +352,44 @@ HashTableKeyManager<serializable, force_key_copy>::HashTableKeyManager(
       next_variable_length_key_offset_(0) {
   DCHECK(!key_types_.empty());
 
-  for (const Type *subkey_type : key_types_) {
-    key_offsets_.push_back(key_start_in_bucket + fixed_key_size_);
+  switch(serializable) {
+    case true:
+      switch(force_key_copy) {
+        case true:
+          fixed_key_size_ =
+              HashTableKeyManager<true, true>::CalculateFixedKeySize(
+                  key_types, key_start_in_bucket, &key_offsets_);
+          break;
+        case false:
+          fixed_key_size_ =
+              HashTableKeyManager<true, false>::CalculateFixedKeySize(
+                  key_types, key_start_in_bucket, &key_offsets_);
+          break;
+      }
+      break;
+    case false:
+      switch(force_key_copy) {
+        case true:
+          fixed_key_size_ =
+              HashTableKeyManager<false, true>::CalculateFixedKeySize(
+                  key_types, key_start_in_bucket, &key_offsets_);
+          break;
+        case false:
+          fixed_key_size_ =
+              HashTableKeyManager<false, false>::CalculateFixedKeySize(
+                  key_types, key_start_in_bucket, &key_offsets_);
+          break;
+      }
+      break;
+  }
 
+  for (const Type *subkey_type : key_types_) {
     if (force_key_copy) {
       if (subkey_type->isVariableLength()) {
         // An offset into the variable length storage region, paired with a length.
-        fixed_key_size_ += sizeof(std::size_t) * 2;
         estimated_variable_key_size_ += subkey_type->estimateAverageByteLength();
         key_inline_.push_back(false);
       } else {
-        fixed_key_size_ += subkey_type->maximumByteLength();
         key_inline_.push_back(true);
       }
     } else {
@@ -326,11 +397,9 @@ HashTableKeyManager<serializable, force_key_copy>::HashTableKeyManager(
                                                     : sizeof(const void*);
       if ((!subkey_type->isVariableLength())
           && (subkey_type->maximumByteLength() <= ptr_size + sizeof(std::size_t)))
{
-        fixed_key_size_ += subkey_type->maximumByteLength();
         key_inline_.push_back(true);
       } else {
         // A pointer to external memory, paired with a length.
-        fixed_key_size_ += ptr_size + sizeof(std::size_t);
         key_inline_.push_back(false);
       }
     }

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/f54d1a2f/storage/LinearOpenAddressingHashTable.hpp
----------------------------------------------------------------------
diff --git a/storage/LinearOpenAddressingHashTable.hpp b/storage/LinearOpenAddressingHashTable.hpp
index 4953b4d..4a6d63e 100644
--- a/storage/LinearOpenAddressingHashTable.hpp
+++ b/storage/LinearOpenAddressingHashTable.hpp
@@ -71,6 +71,16 @@ class LinearOpenAddressingHashTable : public HashTable<ValueT,
   static constexpr std::size_t kPendingHash
       = HashTable<ValueT, resizable, serializable, force_key_copy, allow_duplicate_keys>::kPendingHash;
 
+  // Round bucket size up to a multiple of kBucketAlignment.
+  static constexpr std::size_t ComputeBucketSize(const std::size_t fixed_key_size) {
+    return (((kValueOffset + sizeof(ValueT) + fixed_key_size - 1) / kBucketAlignment) + 1)
+           * kBucketAlignment;
+  }
+
+  static constexpr std::size_t GetKeyStartInBucketOffset() {
+    return kValueOffset + sizeof(ValueT);
+  }
+
   LinearOpenAddressingHashTable(const std::vector<const Type*> &key_types,
                                 const std::size_t num_entries,
                                 StorageManager *storage_manager);
@@ -211,12 +221,6 @@ class LinearOpenAddressingHashTable : public HashTable<ValueT,
   static constexpr std::size_t kValueOffset
       = (((sizeof(std::atomic<std::size_t>) - 1) / alignof(ValueT)) + 1) * alignof(ValueT);
 
-  // Round bucket size up to a multiple of kBucketAlignment.
-  static constexpr std::size_t ComputeBucketSize(const std::size_t fixed_key_size) {
-    return (((kValueOffset + sizeof(ValueT) + fixed_key_size - 1) / kBucketAlignment) + 1)
-           * kBucketAlignment;
-  }
-
   // Attempt to find an empty bucket to insert 'hash_code' into, starting from
   // '*bucket_num'. Returns true and stores kPendingHash in bucket if an empty
   // bucket is found. Returns false if 'allow_duplicate_keys' is false and a

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/f54d1a2f/storage/SeparateChainingHashTable.hpp
----------------------------------------------------------------------
diff --git a/storage/SeparateChainingHashTable.hpp b/storage/SeparateChainingHashTable.hpp
index 3ca060d..e696464 100644
--- a/storage/SeparateChainingHashTable.hpp
+++ b/storage/SeparateChainingHashTable.hpp
@@ -63,6 +63,19 @@ class SeparateChainingHashTable : public HashTable<ValueT,
                                                    force_key_copy,
                                                    allow_duplicate_keys> {
  public:
+  // Round bucket size up to a multiple of kBucketAlignment.
+  static constexpr std::size_t ComputeBucketSize(
+      const std::size_t fixed_key_size) {
+    return (((kValueOffset + sizeof(ValueT) + fixed_key_size - 1) /
+             kBucketAlignment) +
+            1) *
+           kBucketAlignment;
+  }
+
+  static constexpr std::size_t GetKeyStartInBucketOffset() {
+    return kValueOffset + sizeof(ValueT);
+  }
+
   SeparateChainingHashTable(const std::vector<const Type*> &key_types,
                             const std::size_t num_entries,
                             StorageManager *storage_manager);
@@ -188,12 +201,6 @@ class SeparateChainingHashTable : public HashTable<ValueT,
   static constexpr std::size_t kValueOffset
       = (((sizeof(std::atomic<std::size_t>) + sizeof(std::size_t) - 1) / alignof(ValueT))
+ 1) * alignof(ValueT);
 
-  // Round bucket size up to a multiple of kBucketAlignment.
-  static constexpr std::size_t ComputeBucketSize(const std::size_t fixed_key_size) {
-    return (((kValueOffset + sizeof(ValueT) + fixed_key_size - 1) / kBucketAlignment) + 1)
-           * kBucketAlignment;
-  }
-
   // If ValueT is not trivially destructible, invoke its destructor for all
   // values held in the specified buckets (including those in "empty" buckets
   // that were default constructed). If ValueT is trivially destructible, this

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/f54d1a2f/storage/SimpleScalarSeparateChainingHashTable.hpp
----------------------------------------------------------------------
diff --git a/storage/SimpleScalarSeparateChainingHashTable.hpp b/storage/SimpleScalarSeparateChainingHashTable.hpp
index 8448896..d527aeb 100644
--- a/storage/SimpleScalarSeparateChainingHashTable.hpp
+++ b/storage/SimpleScalarSeparateChainingHashTable.hpp
@@ -77,6 +77,10 @@ class SimpleScalarSeparateChainingHashTable : public HashTable<ValueT,
                                                                force_key_copy,
                                                                allow_duplicate_keys> {
  public:
+  static const std::size_t GetBucketSize() {
+    return sizeof(Bucket);
+  }
+
   SimpleScalarSeparateChainingHashTable(const std::vector<const Type*> &key_types,
                                         const std::size_t num_entries,
                                         StorageManager *storage_manager);


Mime
View raw message