From commits-return-15467-archive-asf-public=cust-asf.ponee.io@tvm.apache.org Tue Jun 16 07:46:16 2020 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [207.244.88.153]) by mx-eu-01.ponee.io (Postfix) with SMTP id BCE57180621 for ; Tue, 16 Jun 2020 09:46:15 +0200 (CEST) Received: (qmail 47842 invoked by uid 500); 16 Jun 2020 07:46:15 -0000 Mailing-List: contact commits-help@tvm.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@tvm.apache.org Delivered-To: mailing list commits@tvm.apache.org Received: (qmail 47833 invoked by uid 99); 16 Jun 2020 07:46:15 -0000 Received: from ec2-52-202-80-70.compute-1.amazonaws.com (HELO gitbox.apache.org) (52.202.80.70) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 16 Jun 2020 07:46:15 +0000 From: =?utf-8?q?GitBox?= To: commits@tvm.apache.org Subject: =?utf-8?q?=5BGitHub=5D_=5Bincubator-tvm=5D_junrushao1994_commented_on_a_chan?= =?utf-8?q?ge_in_pull_request_=235740=3A_=5BObject=5D_Introduce_POD-C_Compli?= =?utf-8?q?ant_tvm=3A=3AMap?= Message-ID: <159229357505.8807.8238362893786108761.asfpy@gitbox.apache.org> Date: Tue, 16 Jun 2020 07:46:15 -0000 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit In-Reply-To: References: junrushao1994 commented on a change in pull request #5740: URL: https://github.com/apache/incubator-tvm/pull/5740#discussion_r440651195 ########## File path: include/tvm/node/container.h ########## @@ -43,51 +44,1188 @@ using runtime::Downcast; using runtime::IterAdapter; using runtime::make_object; using runtime::Object; +using runtime::ObjectEqual; +using runtime::ObjectHash; using runtime::ObjectPtr; using runtime::ObjectPtrEqual; using runtime::ObjectPtrHash; using runtime::ObjectRef; using runtime::String; using runtime::StringObj; -/*! \brief String-aware ObjectRef hash functor */ -struct ObjectHash { - size_t operator()(const ObjectRef& a) const { - if (const auto* str = a.as()) { - return String::HashBytes(str->data, str->size); - } - return ObjectPtrHash()(a); +#if (TVM_USE_STL_MAP != 0) + +/*! \brief Shared content of all specializations of hash map */ +class MapNode : public Object { + public: + /*! \brief Type of the keys in the hash map */ + using key_type = ObjectRef; + /*! \brief Type of the values in the hash map */ + using mapped_type = ObjectRef; + /*! \brief Type of the actual underlying container */ + using ContainerType = std::unordered_map; + /*! \brief Iterator class */ + using iterator = ContainerType::iterator; + /*! \brief Iterator class */ + using const_iterator = ContainerType::const_iterator; + /*! \brief Type of value stored in the hash map */ + using KVType = ContainerType::value_type; + + static_assert(std::is_standard_layout::value, "KVType is not standard layout"); + static_assert(sizeof(KVType) == 16 || sizeof(KVType) == 8, "sizeof(KVType) incorrect"); + + static constexpr const uint32_t _type_index = runtime::TypeIndex::kRuntimeMap; + static constexpr const char* _type_key = "Map"; + TVM_DECLARE_FINAL_OBJECT_INFO(MapNode, Object); + + /*! + * \brief Number of elements in the SmallMapNode + * \return The result + */ + size_t size() const { return data_.size(); } + /*! + * \brief Count the number of times a key exists in the hash map + * \param key The indexing key + * \return The result, 0 or 1 + */ + size_t count(const key_type& key) const { return data_.count(key); } + /*! + * \brief Index value associated with a key, throw exception if the key does not exist + * \param key The indexing key + * \return The const reference to the value + */ + const mapped_type& at(const key_type& key) const { return data_.at(key); } + /*! + * \brief Index value associated with a key, throw exception if the key does not exist + * \param key The indexing key + * \return The mutable reference to the value + */ + mapped_type& at(const key_type& key) { return data_.at(key); } + /*! \return begin iterator */ + iterator begin() { return data_.begin(); } + /*! \return const begin iterator */ + const_iterator begin() const { return data_.begin(); } + /*! \return end iterator */ + iterator end() { return data_.end(); } + /*! \return end iterator */ + const_iterator end() const { return data_.end(); } + /*! + * \brief Index value associated with a key + * \param key The indexing key + * \return The iterator of the entry associated with the key, end iterator if not exists + */ + const_iterator find(const key_type& key) const { return data_.find(key); } + /*! + * \brief Index value associated with a key + * \param key The indexing key + * \return The iterator of the entry associated with the key, end iterator if not exists + */ + iterator find(const key_type& key) { return data_.find(key); } + /*! + * \brief Erase the entry associated with the iterator + * \param position The iterator + */ + void erase(const iterator& position) { data_.erase(position); } + /*! + * \brief Erase the entry associated with the key, do nothing if not exists + * \param key The indexing key + */ + void erase(const key_type& key) { data_.erase(key); } + + protected: + /*! + * \brief Create an empty container + * \return The object created + */ + static ObjectPtr Empty() { return make_object(); } + /*! + * \brief Create the map using contents from the given iterators. + * \param first Begin of iterator + * \param last End of iterator + * \tparam IterType The type of iterator + * \return ObjectPtr to the map created + */ + template + static ObjectPtr CreateFromRange(IterType first, IterType last) { + ObjectPtr p = make_object(); + p->data_ = std::unordered_map(first, last); + return p; + } + /*! + * \brief InsertMaybeReHash an entry into the given hash map + * \param kv The entry to be inserted + * \param map The pointer to the map, can be changed if re-hashing happens + */ + static void InsertMaybeReHash(const KVType& kv, ObjectPtr* map) { + MapNode* m = static_cast(map->get()); + m->data_[kv.first] = kv.second; } + /*! + * \brief Create an empty container with elements copying from another MapNode + * \param m The source container + * \return The object created + */ + static ObjectPtr CopyFrom(MapNode* m) { + ObjectPtr p = make_object(); + p->data_ = std::unordered_map(m->data_.begin(), + m->data_.end()); + return p; + } + /*! \brief The real container storing data */ + ContainerType data_; + template + friend class Map; }; -/*! \brief String-aware ObjectRef equal functor */ -struct ObjectEqual { - bool operator()(const ObjectRef& a, const ObjectRef& b) const { - if (a.same_as(b)) { - return true; +#else + +/*! \brief Shared content of all specializations of hash map */ +class MapNode : public Object { + public: + /*! \brief Type of the keys in the hash map */ + using key_type = ObjectRef; + /*! \brief Type of the values in the hash map */ + using mapped_type = ObjectRef; + /*! \brief Type of value stored in the hash map */ + using KVType = std::pair; + /*! \brief Iterator class */ + class iterator; + + static_assert(std::is_standard_layout::value, "KVType is not standard layout"); + static_assert(sizeof(KVType) == 16 || sizeof(KVType) == 8, "sizeof(KVType) incorrect"); + + static constexpr const uint32_t _type_index = runtime::TypeIndex::kRuntimeMap; + static constexpr const char* _type_key = "Map"; + TVM_DECLARE_FINAL_OBJECT_INFO(MapNode, Object); + + /*! + * \brief Number of elements in the SmallMapNode + * \return The result + */ + size_t size() const { return size_; } + /*! + * \brief Count the number of times a key exists in the hash map + * \param key The indexing key + * \return The result, 0 or 1 + */ + size_t count(const key_type& key) const; + /*! + * \brief Index value associated with a key, throw exception if the key does not exist + * \param key The indexing key + * \return The const reference to the value + */ + const mapped_type& at(const key_type& key) const; + /*! + * \brief Index value associated with a key, throw exception if the key does not exist + * \param key The indexing key + * \return The mutable reference to the value + */ + mapped_type& at(const key_type& key); + /*! \return begin iterator */ + iterator begin() const; + /*! \return end iterator */ + iterator end() const; + /*! + * \brief Index value associated with a key + * \param key The indexing key + * \return The iterator of the entry associated with the key, end iterator if not exists + */ + iterator find(const key_type& key) const; + /*! + * \brief Erase the entry associated with the iterator + * \param position The iterator + */ + void erase(const iterator& position); + /*! + * \brief Erase the entry associated with the key, do nothing if not exists + * \param key The indexing key + */ + void erase(const key_type& key) { erase(find(key)); } + + class iterator { + public: + using iterator_category = std::forward_iterator_tag; + using difference_type = int64_t; + using value_type = KVType; + using pointer = KVType*; + using reference = KVType&; + /*! \brief Default constructor */ + iterator() : i(0), self(nullptr) {} + /*! \brief Compare iterators */ + bool operator==(const iterator& other) const { return i == other.i && self == other.self; } + /*! \brief Compare iterators */ + bool operator!=(const iterator& other) const { return !(*this == other); } + /*! \brief De-reference iterators */ + pointer operator->() const; + /*! \brief De-reference iterators */ + reference operator*() const { return *((*this).operator->()); } + /*! \brief Prefix self increment, e.g. ++iter */ + iterator& operator++(); + /*! \brief Prefix self decrement, e.g. --iter */ + iterator& operator--(); + /*! \brief Suffix self increment */ + iterator operator++(int) { + iterator copy = *this; + ++(*this); + return copy; } - if (const auto* str_a = a.as()) { - if (const auto* str_b = b.as()) { - return String::memncmp(str_a->data, str_b->data, str_a->size, str_b->size) == 0; + /*! \brief Suffix self decrement */ + iterator operator--(int) { + iterator copy = *this; + --(*this); + return copy; + } + + protected: + /*! \brief Construct by value */ + iterator(uint64_t i, const MapNode* self) : i(i), self(self) {} + /*! \brief The position on the array */ + uint64_t i; + /*! \brief The container it points to */ + const MapNode* self; + + friend class DenseMapNode; + friend class SmallMapNode; + }; + + protected: + /*! + * \brief Create an empty container + * \return The object created + */ + static ObjectPtr Empty(); + /*! + * \brief Create the map using contents from the given iterators. + * \param first Begin of iterator + * \param last End of iterator + * \tparam IterType The type of iterator + * \return ObjectPtr to the map created + */ + template + static ObjectPtr CreateFromRange(IterType first, IterType last); + /*! + * \brief InsertMaybeReHash an entry into the given hash map + * \param kv The entry to be inserted + * \param map The pointer to the map, can be changed if re-hashing happens + */ + static void InsertMaybeReHash(const KVType& kv, ObjectPtr* map); + /*! + * \brief Create an empty container with elements copying from another SmallMapNode + * \param m The source container + * \return The object created + */ + static ObjectPtr CopyFrom(MapNode* m); + /*! \brief number of slots minus 1 */ + uint64_t slots_; + /*! \brief number of entries in the container */ + uint64_t size_; + // Reference class + template + friend class Map; +}; + +/*! \brief A specialization of small-sized hash map */ +class SmallMapNode : public MapNode, + public runtime::InplaceArrayBase { + private: + static constexpr uint64_t kInitSize = 2; + static constexpr uint64_t kMaxSize = 4; + + public: + using MapNode::iterator; + using MapNode::KVType; + + /*! \brief Defaults to the destructor of InplaceArrayBase */ + ~SmallMapNode() = default; + /*! + * \brief Count the number of times a key exists in the SmallMapNode + * \param key The indexing key + * \return The result, 0 or 1 + */ + size_t count(const key_type& key) const { return find(key).i < size_; } + /*! + * \brief Index value associated with a key, throw exception if the key does not exist + * \param key The indexing key + * \return The const reference to the value + */ + const mapped_type& at(const key_type& key) const { + iterator itr = find(key); + CHECK(itr.i < size_) << "IndexError: key is not in Map"; + return itr->second; + } + /*! + * \brief Index value associated with a key, throw exception if the key does not exist + * \param key The indexing key + * \return The mutable reference to the value + */ + mapped_type& at(const key_type& key) { + iterator itr = find(key); + CHECK(itr.i < size_) << "IndexError: key is not in Map"; + return itr->second; + } + /*! \return begin iterator */ + iterator begin() const { return iterator(0, this); } + /*! \return end iterator */ + iterator end() const { return iterator(size_, this); } + /*! + * \brief Index value associated with a key + * \param key The indexing key + * \return The iterator of the entry associated with the key, end iterator if not exists + */ + iterator find(const key_type& key) const { + KVType* ptr = static_cast(AddressOf(0)); + for (uint64_t i = 0; i < size_; ++i, ++ptr) { + if (ObjectEqual()(ptr->first, key)) { + return iterator(i, this); } } - return false; + return iterator(size_, this); + } + /*! + * \brief Erase the entry associated with the iterator + * \param position The iterator + */ + void erase(const iterator& position) { Erase(position.i); } + + private: + /*! + * \brief Remove a position in SmallMapNode + * \param i The position to be removed + */ + void Erase(const uint64_t i) { + if (i >= size_) { + return; + } + KVType* begin = static_cast(AddressOf(0)); + KVType* last = begin + (size_ - 1); + if (i + 1 == size_) { + last->first.ObjectRef::~ObjectRef(); + last->second.ObjectRef::~ObjectRef(); + } else { + *(begin + i) = std::move(*last); + } + size_ -= 1; + } + /*! + * \brief Create an empty container + * \param n Number of empty slots + * \return The object created + */ + static ObjectPtr Empty(uint64_t n = kInitSize) { + using ::tvm::runtime::make_inplace_array_object; + ObjectPtr p = make_inplace_array_object(n); + p->size_ = 0; + p->slots_ = n; + return p; + } + /*! + * \brief Create an empty container initialized with a given range + * \param n Number of empty slots + * \param first begin of iterator + * \param last end of iterator + * \tparam IterType The type of iterator + * \return The object created + */ + template + static ObjectPtr CreateFromRange(uint64_t n, IterType first, IterType last) { + ObjectPtr p = Empty(n); + KVType* ptr = static_cast(p->AddressOf(0)); + for (; first != last; ++first, ++p->size_) { + new (ptr++) KVType(*first); + } + return p; + } + /*! + * \brief Create an empty container with elements copying from another SmallMapNode + * \param m The source container + * \return The object created + */ + static ObjectPtr CopyFrom(SmallMapNode* m) { + KVType* first = static_cast(m->AddressOf(0)); + KVType* last = first + m->size_; + return CreateFromRange(m->size_, first, last); + } + /*! + * \brief InsertMaybeReHash an entry into the given hash map + * \param kv The entry to be inserted + * \param map The pointer to the map, can be changed if re-hashing happens + */ + static void InsertMaybeReHash(const KVType& kv, ObjectPtr* map) { + SmallMapNode* m = static_cast(map->get()); + iterator itr = m->find(kv.first); + if (itr.i < m->size_) { + itr->second = kv.second; + return; + } + if (m->size_ < m->slots_) { + KVType* ptr = static_cast(m->AddressOf(m->size_)); + new (ptr) KVType(kv); + ++m->size_; + return; + } + uint64_t next_size = std::max(m->slots_ * 2, uint64_t(kInitSize)); + next_size = std::min(next_size, uint64_t(kMaxSize)); + CHECK_GT(next_size, m->slots_); + ObjectPtr n = CreateFromRange(next_size, m->begin(), m->end()); + InsertMaybeReHash(kv, &n); + *map = std::move(n); } + /*! + * \brief Increment the pointer + * \param i The pointer to be incremented + * \return The increased pointer + */ + uint64_t IncItr(uint64_t i) const { return i + 1 < size_ ? i + 1 : size_; } + /*! + * \brief Decrement the pointer + * \param i The pointer to be decremented + * \return The decreased pointer + */ + uint64_t DecItr(uint64_t i) const { return i > 0 ? i - 1 : size_; } + /*! + * \brief De-reference the pointer + * \param i The pointer to be dereferenced + * \return The result + */ + KVType* DeRefItr(uint64_t i) const { return static_cast(AddressOf(i)); } + /*! \brief A size function used by InplaceArrayBase */ + uint64_t GetSize() const { return size_; } + + protected: + friend class MapNode; + friend class DenseMapNode; + friend ObjectPtr runtime::make_object<>(); + friend class runtime::InplaceArrayBase; }; -/*! \brief map node content */ -class MapNode : public Object { +/*! \brief A specialization of hash map that implements the idea of array-based hash map. + * Another reference implementation can be found [1]. + * + * A. Overview + * + * DenseMapNode did several improvements over traditional separate chaining hash, + * in terms of cache locality, memory footprints and data organization. + * + * A1. Implicit linked list. For better cache locality, instead of using linked list + * explicitly for each bucket, we store list data into a single array that spans contiguously + * in memory, and then carefully design access patterns to make sure most of them fall into + * a single cache line. + * + * A2. 1-byte metadata. There is only 1 byte overhead for each slot in the array to indexing and + * traversal. This can be divided in 3 parts. + * 1) Reserved code: (0b11111111)_2 indicates a slot is empty; (0b11111110)_2 indicates protected, + * which means the slot is empty but not allowed to be written. + * 2) If not empty or protected, the highest bit is used to indicate whether data in the slot is + * head of a linked list. + * 3) The rest 7 bits are used as the "next pointer" (i.e. pointer to the next element). On 64-bit + * architecture, an ordinary pointer can take up to 8 bytes, which is not acceptable overhead when + * dealing with 16-byte ObjectRef pairs. Based on a commonly noticed fact that the lists are + * relatively short (length <= 3) in hash maps, we follow [1]'s idea that only allows the pointer to + * be one of the 126 possible values, i.e. if the next element of i-th slot is (i + x)-th element, + * then x must be one of the 126 pre-defined values. + * + * A3. Data blocking. We organize the array in the way that every 16 elements forms a data block. + * The 16-byte metadata of those 16 elements are stored together, followed by the real data, i.e. + * 16 key-value pairs. + * + * B. Implementation details + * + * B1. Power-of-2 table size and Fibonacci Hashing. We use power-of-two as table size to avoid + * modulo for more efficient arithmetics. To make the hash-to-slot mapping distribute more evenly, + * we use the Fibonacci Hashing [2] trick. + * + * B2. Traverse a linked list in the array. + * 1) List head. Assume Fibonacci Hashing maps a given key to slot i, if metadata at slot i + * indicates that it is list head, then we found the head; otherwise the list is empty. No probing + * is done in this procedure. 2) Next element. To find the next element of a non-empty slot i, we + * look at the last 7 bits of the metadata at slot i. If they are all zeros, then it is the end of + * list; otherwise, we know that the next element is (i + candidates[the-last-7-bits]). + * + * B3. InsertMaybeReHash an element. Following B2, we first traverse the linked list to see if this + * element is in the linked list, and if not, we put it at the end by probing the next empty + * position in one of the 126 candidate positions. If the linked list does not even exist, but the + * slot for list head has been occupied by another linked list, we should find this intruder another + * place. + * + * B4. Quadratic probing with triangle numbers. In open address hashing, it is provable that probing + * with triangle numbers can traverse power-of-2-sized table [3]. In our algorithm, we follow the + * suggestion in [1] that also use triangle numbers for "next pointer" as well as sparing for list + * head. + * + * [1] https://github.com/skarupke/flat_hash_map + * [2] https://programmingpraxis.com/2018/06/19/fibonacci-hash/ + * [3] https://fgiesen.wordpress.com/2015/02/22/triangular-numbers-mod-2n/ + */ +class DenseMapNode : public MapNode { + private: + /*! \brief The number of elements in a memory block */ + static constexpr int kBlockCap = 16; + /*! \brief Maximum load factor of the hash map */ + static constexpr double kMaxLoadFactor = 0.99; + /*! \brief Binary representation of the metadata of an empty slot */ + static constexpr uint8_t kEmptySlot = uint8_t(0b11111111); + /*! \brief Binary representation of the metadata of a protected slot */ + static constexpr uint8_t kProtectedSlot = uint8_t(0b11111110); + /*! \brief Number of probing choices available */ + static constexpr int kNumJumpDists = 126; + /*! \brief Head of the implicit linked list */ + struct ListNode; + /*! \brief POD type of a block of memory */ + struct Block { + uint8_t b[kBlockCap + kBlockCap * sizeof(KVType)]; + }; + static_assert(sizeof(Block) == kBlockCap * (sizeof(KVType) + 1), "sizeof(Block) incorrect"); + static_assert(std::is_standard_layout::value, "Block is not standard layout"); + public: - /*! \brief The corresponding conatiner type */ - using ContainerType = std::unordered_map; + using MapNode::iterator; - /*! \brief the data content */ - ContainerType data; + /*! + * \brief Destroy the DenseMapNode + */ + ~DenseMapNode() { this->Reset(); } + /*! \return The number of elements of the key */ + size_t count(const key_type& key) const { return !Search(key).IsNone(); } + /*! + * \brief Index value associated with a key, throw exception if the key does not exist + * \param key The indexing key + * \return The const reference to the value + */ + const mapped_type& at(const key_type& key) const { return At(key); } + /*! + * \brief Index value associated with a key, throw exception if the key does not exist + * \param key The indexing key + * \return The mutable reference to the value + */ + mapped_type& at(const key_type& key) { return At(key); } + /*! + * \brief Index value associated with a key + * \param key The indexing key + * \return The iterator of the entry associated with the key, end iterator if not exists + */ + iterator find(const key_type& key) const { + ListNode n = Search(key); + return n.IsNone() ? end() : iterator(n.index, this); + } + /*! + * \brief Erase the entry associated with the iterator + * \param position The iterator + */ + void erase(const iterator& position) { + uint64_t i = position.i; + if (position.self != nullptr && i <= this->slots_) { + Erase(ListNode(i, this)); + } + } + /*! \return begin iterator */ + iterator begin() const { + if (slots_ == 0) { + return iterator(0, this); + } + for (uint64_t i = 0; i <= slots_; ++i) { + if (!ListNode(i, this).IsEmpty()) { + return iterator(i, this); + } + } + return iterator(slots_ + 1, this); + } + /*! \return end iterator */ + iterator end() const { return slots_ == 0 ? iterator(0, this) : iterator(slots_ + 1, this); } - static constexpr const char* _type_key = "Map"; - TVM_DECLARE_FINAL_OBJECT_INFO(MapNode, Object); + private: + /*! + * \brief Search for the given key + * \param key The key + * \return ListNode that associated with the key + */ + ListNode Search(const key_type& key) const { + if (this->size_ == 0) { + return ListNode(); + } + for (ListNode n = GetListHead(ObjectHash()(key)); !n.IsNone(); n.MoveToNext(this)) { + if (ObjectEqual()(key, n.Key())) { + return n; + } + } + return ListNode(); + } + /*! + * \brief Search for the given key, throw exception if not exists + * \param key The key + * \return ListNode that associated with the key + */ + mapped_type& At(const key_type& key) const { + ListNode n = Search(key); + CHECK(!n.IsNone()) << "IndexError: key is not in Map"; + return n.Val(); + } + /*! + * \brief Try to insert a key, or do nothing if already exists + * \param key The indexing key + * \param result The linked-list entry found or just constructed + * \return A boolean, indicating if actual insertion happens + */ + bool TryInsert(const key_type& key, ListNode* result) { + if (slots_ == 0) { + return false; + } + // required that `m` to be the head of a linked list through which we can iterator + ListNode m = IndexFromhash(ObjectHash()(key)); + // `m` can be: 1) empty; 2) body of an irrelevant list; 3) head of the relevant list + // Case 1: empty + if (m.IsEmpty()) { + m.NewHead(KVType(key, ObjectRef(nullptr))); + this->size_ += 1; + *result = m; + return true; + } + // Case 2: body of an irrelevant list + if (!m.IsHead()) { + // we move the elements around and construct the single-element linked list + return IsFull() ? false : TrySpareListHead(m, key, result); + } + // Case 3: head of the relevant list + // we iterate through the linked list until the end + ListNode n = m; + do { + // find equal item, do not insert + if (ObjectEqual()(key, n.Key())) { + *result = n; + return true; + } + // make sure `m` is the previous element of `n` + m = n; + } while (n.MoveToNext(this)); + // `m` is the tail of the linked list + // always check capacity before insertion + if (IsFull()) { + return false; + } + // find the next empty slot + uint8_t jump; + if (!m.GetNextEmpty(this, &jump, result)) { + return false; + } + result->NewTail(KVType(key, ObjectRef(nullptr))); + // link `n` to `empty`, and move forward + m.SetJump(jump); + this->size_ += 1; + return true; + } + /*! + * \brief Spare an entry to be the head of a linked list. + * As described in B3, during insertion, it is possible that the entire linked list does not + * exist, but the slot of its head has been occupied by other linked lists. In this case, we need + * to spare the slot by moving away the elements to another valid empty one to make insertion + * possible. \param n The given entry to be spared \param key The indexing key \param result The + * linked-list entry constructed as the head \return A boolean, if actual insertion happens + */ + bool TrySpareListHead(ListNode n, const key_type& key, ListNode* result) { + // `n` is not the head of the linked list + // move the original item of `n` (if any) + // and construct new item on the position `n` + // To make `n` empty, we + // 1) find `w` the previous element of `n` in the linked list + // 2) copy the linked list starting from `r = n` + // 3) paste them after `w` + // read from the linked list after `r` + ListNode r = n; + // write to the tail of `w` + ListNode w = n.GetPrev(this); + // after `n` is moved, we disallow writing to the slot + bool is_first = true; + uint8_t r_meta, jump; + ListNode empty; + do { + // `jump` describes how `w` is jumped to `empty` + // rehash if there is no empty space after `w` + if (!w.GetNextEmpty(this, &jump, &empty)) { + return false; + } + // move `r` to `empty` + empty.NewTail(std::move(r.Data())); + // clear the metadata of `r` + r_meta = r.Meta(); + if (is_first) { + is_first = false; + r.SetProtected(); + } else { + r.SetEmpty(); + } + // link `w` to `empty`, and move forward + w.SetJump(jump); + w = empty; + // move `r` forward as well + } while (r.MoveToNext(this, r_meta)); + // finally we have done moving the linked list + // fill data_ into `n` + n.NewHead(KVType(key, ObjectRef(nullptr))); + this->size_ += 1; + *result = n; + return true; + } + /*! + * \brief Remove a ListNode + * \param n The node to be removed + */ + void Erase(const ListNode& n) { + this->size_ -= 1; + if (!n.HasNext()) { + // `n` is the last + if (!n.IsHead()) { + // cut the link if there is any + n.GetPrev(this).SetJump(0); + } + n.Data().KVType::~KVType(); + n.SetEmpty(); + } else { + ListNode last = n, prev = n; + for (last.MoveToNext(this); last.HasNext(); prev = last, last.MoveToNext(this)) { + } + n.Data() = std::move(last.Data()); + last.SetEmpty(); + prev.SetJump(0); + } + } + /*! \brief Clear the container to empty, release all entries and memory acquired */ + void Reset() { + uint64_t n_blocks = CalcNumBlocks(this->slots_); + DenseMapNode* m = this; + for (uint64_t bi = 0; bi < n_blocks; ++bi) { + uint8_t* m_m = m->data_[bi].b; + KVType* m_d = reinterpret_cast(m->data_[bi].b + kBlockCap); + for (int j = 0; j < kBlockCap; ++j, ++m_m, ++m_d) { + uint8_t& meta = *m_m; + if (meta != uint8_t(kProtectedSlot) && meta != uint8_t(kEmptySlot)) { + meta = uint8_t(kEmptySlot); + m_d->KVType::~KVType(); + } + } + } + delete[] data_; + data_ = nullptr; + slots_ = 0; + size_ = 0; + fib_shift_ = 63; + } + /*! + * \brief Create an empty container + * \param fib_shift The fib shift provided + * \param n_slots Number of slots required, should be power-of-two + * \return The object created + */ + static ObjectPtr Empty(uint32_t fib_shift, uint64_t n_slots) { + CHECK_GT(n_slots, uint64_t(SmallMapNode::kMaxSize)); + CHECK_EQ((n_slots & -n_slots), n_slots); + ObjectPtr p = make_object(); + uint64_t n_blocks = CalcNumBlocks(n_slots - 1); + Block* block = p->data_ = new Block[n_blocks]; + p->slots_ = n_slots - 1; + p->size_ = 0; + p->fib_shift_ = fib_shift; + for (uint64_t i = 0; i < n_blocks; ++i, ++block) { + std::fill(block->b, block->b + kBlockCap, uint8_t(kEmptySlot)); + } + return p; + } + /*! + * \brief Create an empty container with elements copying from another DenseMapNode + * \param m The source container + * \return The object created + */ + static ObjectPtr CopyFrom(DenseMapNode* m) { + ObjectPtr p = make_object(); + uint64_t n_blocks = CalcNumBlocks(m->slots_); + p->data_ = new Block[n_blocks]; + p->slots_ = m->slots_; + p->size_ = m->size_; + p->fib_shift_ = m->fib_shift_; + for (uint64_t bi = 0; bi < n_blocks; ++bi) { + uint8_t* m_m = m->data_[bi].b; + uint8_t* p_m = p->data_[bi].b; + KVType* m_d = reinterpret_cast(m->data_[bi].b + kBlockCap); + KVType* p_d = reinterpret_cast(p->data_[bi].b + kBlockCap); + for (int j = 0; j < kBlockCap; ++j, ++m_m, ++m_d, ++p_m, ++p_d) { + uint8_t& meta = *p_m = *m_m; + CHECK(meta != kProtectedSlot); + if (meta != uint8_t(kEmptySlot)) { + new (p_d) KVType(*m_d); + } + } + } + return p; + } + /*! + * \brief InsertMaybeReHash an entry into the given hash map + * \param kv The entry to be inserted + * \param map The pointer to the map, can be changed if re-hashing happens + */ + static void InsertMaybeReHash(const KVType& kv, ObjectPtr* map) { + DenseMapNode* m = static_cast(map->get()); + ListNode n; + if (m->TryInsert(kv.first, &n)) { + n.Val() = kv.second; + return; + } + CHECK_GT(m->slots_, uint64_t(SmallMapNode::kMaxSize)); + ObjectPtr p = Empty(m->fib_shift_ - 1, m->slots_ * 2 + 2); + InsertMaybeReHash(kv, &p); + uint64_t n_blocks = CalcNumBlocks(m->slots_); + for (uint64_t bi = 0; bi < n_blocks; ++bi) { Review comment: It is an infrequent but performance critical path (10%-20% time), using iterators may introduce some code that cannot be optimized out by the compiler. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: users@infra.apache.org