kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ale...@apache.org
Subject [kudu] 01/03: [tablet] Optimize TabletMetadata::CollectBlockIds efficiency
Date Sat, 16 Nov 2019 00:44:48 GMT
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 3ca7db0dbce89b2938d4c2cd5f2189767d5676f3
Author: Yingchun Lai <405403881@qq.com>
AuthorDate: Tue Oct 15 16:50:04 2019 +0800

    [tablet] Optimize TabletMetadata::CollectBlockIds efficiency
    
    Container size may reallocate and elements will be moved or copied
    several times if using std::vector and inserting elements at the
    beginning in TabletMetadata::CollectBlockIds, we'd better to insert
    elements at the end.
    
    We did some simple benchmarks for TabletMetadata::CollectBlockIds()
    operation in TestTabletMetadata.BenchmarkCollectBlockIds, compared
    with old version, result shows that inserting at the end provides a
    linear time cost when block count per RowSet or RowSet count
    increasing.
    And also, we ran benchmarks to compare std::vector with std::list
    and std::deque, both of them have worse efficiency.
    Result details as follow:
    (The first column is FLAGS_test_row_set_count/FLAGS_test_block_count_per_rs,
    the other columns show 10 times total TabletMetadata::CollectBlockIds()
    time cost, in millisecond.)
               vector          list    deque   vector
               (head insert)                   (rear insert)
    1000/1000  3464            687     334     321
    2000/1000  15238           1390    647     613
    4000/1000  75139           3057    1392    1212
    8000/1000  328808          6593    2805    2736
    4000/2000  159517          6488    2965    2462
    4000/4000  348471          11967   5513    5141
    4000/8000  -(too long)     23706   11704   10806
    
    Change-Id: I7ce853e35eb7dfa9f9a099e465ea8edfaa7c4aa9
    Reviewed-on: http://gerrit.cloudera.org:8080/14445
    Tested-by: Kudu Jenkins
    Reviewed-by: Adar Dembo <adar@cloudera.com>
---
 src/kudu/fs/block_id.h                  |  2 +-
 src/kudu/tablet/delta_tracker.cc        |  2 +-
 src/kudu/tablet/diskrowset.cc           |  2 +-
 src/kudu/tablet/metadata-test.cc        | 16 +++++++-------
 src/kudu/tablet/rowset_metadata.cc      |  8 +++----
 src/kudu/tablet/rowset_metadata.h       |  6 +++---
 src/kudu/tablet/tablet_metadata-test.cc | 38 +++++++++++++++++++++++++++++++++
 src/kudu/tablet/tablet_metadata.cc      | 22 +++++++++----------
 src/kudu/tablet/tablet_metadata.h       |  9 ++++----
 src/kudu/tools/kudu-tool-test.cc        |  9 +++++---
 src/kudu/tools/tool_action_fs.cc        |  4 ++--
 11 files changed, 80 insertions(+), 38 deletions(-)

diff --git a/src/kudu/fs/block_id.h b/src/kudu/fs/block_id.h
index b1ebe03..5e7f5a2 100644
--- a/src/kudu/fs/block_id.h
+++ b/src/kudu/fs/block_id.h
@@ -101,6 +101,6 @@ struct BlockIdEqual {
 };
 
 typedef std::unordered_set<BlockId, BlockIdHash, BlockIdEqual> BlockIdSet;
-
+typedef std::vector<BlockId> BlockIdContainer;
 } // namespace kudu
 #endif /* KUDU_FS_BLOCK_ID_H */
diff --git a/src/kudu/tablet/delta_tracker.cc b/src/kudu/tablet/delta_tracker.cc
index 770e3aa..0c0b0d7 100644
--- a/src/kudu/tablet/delta_tracker.cc
+++ b/src/kudu/tablet/delta_tracker.cc
@@ -367,7 +367,7 @@ Status DeltaTracker::CommitDeltaStoreMetadataUpdate(const RowSetMetadataUpdate&
                                          &new_stores, type),
                         "Unable to open delta blocks");
 
-  vector<BlockId> removed_blocks;
+  BlockIdContainer removed_blocks;
   rowset_metadata_->CommitUpdate(update, &removed_blocks);
   rowset_metadata_->AddOrphanedBlocks(removed_blocks);
   // Once we successfully commit to the rowset metadata, let's ensure we update
diff --git a/src/kudu/tablet/diskrowset.cc b/src/kudu/tablet/diskrowset.cc
index b6a9dc3..64ec1e1 100644
--- a/src/kudu/tablet/diskrowset.cc
+++ b/src/kudu/tablet/diskrowset.cc
@@ -601,7 +601,7 @@ Status DiskRowSet::MajorCompactDeltaStoresWithColumnIds(const vector<ColumnId>&
   // Prepare the changes to the metadata.
   RowSetMetadataUpdate update;
   compaction->CreateMetadataUpdate(&update);
-  vector<BlockId> removed_blocks;
+  BlockIdContainer removed_blocks;
   rowset_metadata_->CommitUpdate(update, &removed_blocks);
 
   // Now that the metadata has been updated, open a new cfile set with the
diff --git a/src/kudu/tablet/metadata-test.cc b/src/kudu/tablet/metadata-test.cc
index e67529d..ad538b8 100644
--- a/src/kudu/tablet/metadata-test.cc
+++ b/src/kudu/tablet/metadata-test.cc
@@ -63,13 +63,13 @@ TEST_F(MetadataTest, RSMD_TestReplaceDeltas_1) {
   to_replace.emplace_back(2);
   to_replace.emplace_back(3);
 
-  vector<BlockId> removed;
+  BlockIdContainer removed;
   meta_->CommitUpdate(
       RowSetMetadataUpdate()
       .ReplaceRedoDeltaBlocks(to_replace, { BlockId(123) }), &removed);
   ASSERT_EQ(vector<BlockId>({ BlockId(1), BlockId(123), BlockId(4) }),
             meta_->redo_delta_blocks());
-  ASSERT_EQ(vector<BlockId>({ BlockId(2), BlockId(3) }),
+  ASSERT_EQ(BlockIdContainer({ BlockId(2), BlockId(3) }),
             removed);
 }
 
@@ -79,13 +79,13 @@ TEST_F(MetadataTest, RSMD_TestReplaceDeltas_2) {
   to_replace.emplace_back(1);
   to_replace.emplace_back(2);
 
-  vector<BlockId> removed;
+  BlockIdContainer removed;
   meta_->CommitUpdate(
       RowSetMetadataUpdate()
       .ReplaceRedoDeltaBlocks(to_replace, { BlockId(123) }), &removed);
   ASSERT_EQ(vector<BlockId>({ BlockId(123), BlockId(3), BlockId(4) }),
             meta_->redo_delta_blocks());
-  ASSERT_EQ(vector<BlockId>({ BlockId(1), BlockId(2) }),
+  ASSERT_EQ(BlockIdContainer({ BlockId(1), BlockId(2) }),
             removed);
 }
 
@@ -95,13 +95,13 @@ TEST_F(MetadataTest, RSMD_TestReplaceDeltas_3) {
   to_replace.emplace_back(3);
   to_replace.emplace_back(4);
 
-  vector<BlockId> removed;
+  BlockIdContainer removed;
   meta_->CommitUpdate(
       RowSetMetadataUpdate()
       .ReplaceRedoDeltaBlocks(to_replace, { BlockId(123) }), &removed);
   ASSERT_EQ(vector<BlockId>({ BlockId(1), BlockId(2), BlockId(123) }),
             meta_->redo_delta_blocks());
-  ASSERT_EQ(vector<BlockId>({ BlockId(3), BlockId(4) }),
+  ASSERT_EQ(BlockIdContainer({ BlockId(3), BlockId(4) }),
             removed);
 }
 
@@ -112,7 +112,7 @@ TEST_F(MetadataTest, RSMD_TestReplaceDeltas_Bad_NonContiguous) {
   to_replace.emplace_back(4);
 
   EXPECT_DEATH({
-    vector<BlockId> removed;
+    BlockIdContainer removed;
     meta_->CommitUpdate(
         RowSetMetadataUpdate().ReplaceRedoDeltaBlocks(to_replace, { BlockId(123) }),
         &removed);
@@ -127,7 +127,7 @@ TEST_F(MetadataTest, RSMD_TestReplaceDeltas_Bad_DoesntExist) {
   to_replace.emplace_back(555);
 
   EXPECT_DEATH({
-    vector<BlockId> removed;
+    BlockIdContainer removed;
     meta_->CommitUpdate(
         RowSetMetadataUpdate().ReplaceRedoDeltaBlocks(to_replace, { BlockId(123) }),
         &removed);
diff --git a/src/kudu/tablet/rowset_metadata.cc b/src/kudu/tablet/rowset_metadata.cc
index 6c9705c..46bd932 100644
--- a/src/kudu/tablet/rowset_metadata.cc
+++ b/src/kudu/tablet/rowset_metadata.cc
@@ -56,7 +56,7 @@ Status RowSetMetadata::CreateNew(TabletMetadata* tablet_metadata,
   return Status::OK();
 }
 
-void RowSetMetadata::AddOrphanedBlocks(const vector<BlockId>& blocks) {
+void RowSetMetadata::AddOrphanedBlocks(const BlockIdContainer& blocks) {
   tablet_metadata_->AddOrphanedBlocks(blocks);
 }
 
@@ -203,7 +203,7 @@ Status RowSetMetadata::CommitUndoDeltaDataBlock(const BlockId& block_id)
{
 }
 
 void RowSetMetadata::CommitUpdate(const RowSetMetadataUpdate& update,
-                                  vector<BlockId>* removed) {
+                                  BlockIdContainer* removed) {
   removed->clear();
   {
     std::lock_guard<LockType> l(lock_);
@@ -298,8 +298,8 @@ int64_t RowSetMetadata::live_row_count() const {
   return live_row_count_;
 }
 
-vector<BlockId> RowSetMetadata::GetAllBlocks() {
-  vector<BlockId> blocks;
+BlockIdContainer RowSetMetadata::GetAllBlocks() {
+  BlockIdContainer blocks;
   std::lock_guard<LockType> l(lock_);
   if (!adhoc_index_block_.IsNull()) {
     blocks.push_back(adhoc_index_block_);
diff --git a/src/kudu/tablet/rowset_metadata.h b/src/kudu/tablet/rowset_metadata.h
index 7f67b63..c4be77d 100644
--- a/src/kudu/tablet/rowset_metadata.h
+++ b/src/kudu/tablet/rowset_metadata.h
@@ -93,7 +93,7 @@ class RowSetMetadata {
 
   Status Flush();
 
-  void AddOrphanedBlocks(const std::vector<BlockId>& blocks);
+  void AddOrphanedBlocks(const BlockIdContainer& blocks);
 
   const std::string ToString() const;
 
@@ -222,11 +222,11 @@ class RowSetMetadata {
   // Returns the blocks removed from the rowset metadata during the update.
   // These blocks must be added to the TabletMetadata's orphaned blocks list.
   void CommitUpdate(const RowSetMetadataUpdate& update,
-                    std::vector<BlockId>* removed);
+                    BlockIdContainer* removed);
 
   void ToProtobuf(RowSetDataPB *pb);
 
-  std::vector<BlockId> GetAllBlocks();
+  BlockIdContainer GetAllBlocks();
 
   // Increase the row count.
   // Note:
diff --git a/src/kudu/tablet/tablet_metadata-test.cc b/src/kudu/tablet/tablet_metadata-test.cc
index dc482eb..1e69ce7 100644
--- a/src/kudu/tablet/tablet_metadata-test.cc
+++ b/src/kudu/tablet/tablet_metadata-test.cc
@@ -18,29 +18,42 @@
 #include "kudu/tablet/tablet_metadata.h"
 
 #include <cstdint>
+#include <map>
 #include <memory>
 #include <ostream>
 #include <string>
 
 #include <boost/optional/optional.hpp>
+#include <gflags/gflags.h>
 #include <glog/logging.h>
 #include <gtest/gtest.h>
 
 #include "kudu/common/common.pb.h"
 #include "kudu/common/partial_row.h"
+#include "kudu/common/schema.h"
 #include "kudu/common/wire_protocol-test-util.h"
+#include "kudu/fs/block_id.h"
 #include "kudu/gutil/gscoped_ptr.h"
 #include "kudu/gutil/port.h"
 #include "kudu/gutil/ref_counted.h"
 #include "kudu/tablet/local_tablet_writer.h"
 #include "kudu/tablet/metadata.pb.h"
+#include "kudu/tablet/rowset_metadata.h"
 #include "kudu/tablet/tablet-harness.h"
 #include "kudu/tablet/tablet-test-util.h"
 #include "kudu/tablet/tablet.h"
 #include "kudu/util/pb_util.h"
 #include "kudu/util/status.h"
+#include "kudu/util/stopwatch.h"
 #include "kudu/util/test_macros.h"
 
+DEFINE_int64(test_row_set_count, 1000, "");
+DEFINE_int64(test_block_count_per_rs, 1000, "");
+
+using std::map;
+using std::shared_ptr;
+using std::unique_ptr;
+
 namespace kudu {
 namespace tablet {
 
@@ -183,5 +196,30 @@ TEST_F(TestTabletMetadata, TestOnDiskSize) {
   ASSERT_GE(final_size, superblock_pb.ByteSize());
 }
 
+TEST_F(TestTabletMetadata, BenchmarkCollectBlockIds) {
+  auto tablet_meta = harness_->tablet()->metadata();
+  RowSetMetadataVector rs_metas;
+  for (int i = 0; i < FLAGS_test_row_set_count; ++i) {
+    unique_ptr<RowSetMetadata> meta;
+    ASSERT_OK(RowSetMetadata::CreateNew(tablet_meta, i, &meta));
+
+    map<ColumnId, BlockId> block_by_column;
+    for (int j = 0; j < FLAGS_test_block_count_per_rs; ++j) {
+      block_by_column[ColumnId(j)] = BlockId(j);
+    }
+    meta->SetColumnDataBlocks(block_by_column);
+    rs_metas.emplace_back(shared_ptr<RowSetMetadata>(meta.release()));
+  }
+  tablet_meta->UpdateAndFlush(RowSetMetadataIds(), rs_metas, TabletMetadata::kNoMrsFlushed);
+
+  for (int i = 0; i < 10; ++i) {
+    BlockIdContainer block_ids;
+    LOG_TIMING(INFO, "collecting BlockIds") {
+      block_ids = tablet_meta->CollectBlockIds();
+    }
+    LOG(INFO) << "block_ids size: " << block_ids.size();
+  }
+}
+
 } // namespace tablet
 } // namespace kudu
diff --git a/src/kudu/tablet/tablet_metadata.cc b/src/kudu/tablet/tablet_metadata.cc
index 8189085..129da86 100644
--- a/src/kudu/tablet/tablet_metadata.cc
+++ b/src/kudu/tablet/tablet_metadata.cc
@@ -189,11 +189,11 @@ vector<BlockIdPB> TabletMetadata::CollectBlockIdPBs(const TabletSuperBlockPB&
su
   return block_ids;
 }
 
-vector<BlockId> TabletMetadata::CollectBlockIds() {
-  vector<BlockId> block_ids;
+BlockIdContainer TabletMetadata::CollectBlockIds() {
+  BlockIdContainer block_ids;
   for (const auto& r : rowsets_) {
-    vector<BlockId> rowset_block_ids = r->GetAllBlocks();
-    block_ids.insert(block_ids.begin(),
+    BlockIdContainer rowset_block_ids = r->GetAllBlocks();
+    block_ids.insert(block_ids.end(),
                      rowset_block_ids.begin(),
                      rowset_block_ids.end());
   }
@@ -348,7 +348,7 @@ Status TabletMetadata::UpdateOnDiskSize() {
 }
 
 Status TabletMetadata::LoadFromSuperBlock(const TabletSuperBlockPB& superblock) {
-  vector<BlockId> orphaned_blocks;
+  BlockIdContainer orphaned_blocks;
 
   VLOG(2) << "Loading TabletMetadata from SuperBlockPB:" << std::endl
           << SecureDebugString(superblock);
@@ -497,17 +497,17 @@ Status TabletMetadata::UpdateAndFlush(const RowSetMetadataIds& to_remove,
   return Flush();
 }
 
-void TabletMetadata::AddOrphanedBlocks(const vector<BlockId>& blocks) {
+void TabletMetadata::AddOrphanedBlocks(const BlockIdContainer& block_ids) {
   std::lock_guard<LockType> l(data_lock_);
-  AddOrphanedBlocksUnlocked(blocks);
+  AddOrphanedBlocksUnlocked(block_ids);
 }
 
-void TabletMetadata::AddOrphanedBlocksUnlocked(const vector<BlockId>& blocks) {
+void TabletMetadata::AddOrphanedBlocksUnlocked(const BlockIdContainer& block_ids) {
   DCHECK(data_lock_.is_locked());
-  orphaned_blocks_.insert(blocks.begin(), blocks.end());
+  orphaned_blocks_.insert(block_ids.begin(), block_ids.end());
 }
 
-void TabletMetadata::DeleteOrphanedBlocks(const vector<BlockId>& blocks) {
+void TabletMetadata::DeleteOrphanedBlocks(const BlockIdContainer& blocks) {
   if (PREDICT_FALSE(!FLAGS_enable_tablet_orphaned_block_deletion)) {
     LOG_WITH_PREFIX(WARNING) << "Not deleting " << blocks.size()
         << " block(s) from disk. Block deletion disabled via "
@@ -559,7 +559,7 @@ Status TabletMetadata::Flush() {
                "tablet_id", tablet_id_);
 
   MutexLock l_flush(flush_lock_);
-  vector<BlockId> orphaned;
+  BlockIdContainer orphaned;
   TabletSuperBlockPB pb;
   {
     std::lock_guard<LockType> l(data_lock_);
diff --git a/src/kudu/tablet/tablet_metadata.h b/src/kudu/tablet/tablet_metadata.h
index 060c071..fae2a3c 100644
--- a/src/kudu/tablet/tablet_metadata.h
+++ b/src/kudu/tablet/tablet_metadata.h
@@ -113,7 +113,7 @@ class TabletMetadata : public RefCountedThreadSafe<TabletMetadata>
{
   static std::vector<BlockIdPB> CollectBlockIdPBs(
       const TabletSuperBlockPB& superblock);
 
-  std::vector<BlockId> CollectBlockIds();
+  BlockIdContainer CollectBlockIds();
 
   const std::string& tablet_id() const {
     DCHECK_NE(state_, kNotLoadedYet);
@@ -190,7 +190,7 @@ class TabletMetadata : public RefCountedThreadSafe<TabletMetadata>
{
   //
   // Blocks are removed from this set after they are successfully deleted
   // in a call to DeleteOrphanedBlocks().
-  void AddOrphanedBlocks(const std::vector<BlockId>& block_ids);
+  void AddOrphanedBlocks(const BlockIdContainer& block_ids);
 
   // Mark the superblock to be in state 'delete_type', sync it to disk, and
   // then delete all of the rowsets in this tablet.
@@ -289,6 +289,7 @@ class TabletMetadata : public RefCountedThreadSafe<TabletMetadata>
{
  private:
   friend class RefCountedThreadSafe<TabletMetadata>;
   friend class MetadataTest;
+  friend class TestTabletMetadataBenchmark;
 
   // Compile time assert that no one deletes TabletMetadata objects.
   ~TabletMetadata();
@@ -331,7 +332,7 @@ class TabletMetadata : public RefCountedThreadSafe<TabletMetadata>
{
                               const RowSetMetadataVector& rowsets) const;
 
   // Requires 'data_lock_'.
-  void AddOrphanedBlocksUnlocked(const std::vector<BlockId>& block_ids);
+  void AddOrphanedBlocksUnlocked(const BlockIdContainer& block_ids);
 
   // Deletes the provided 'blocks' on disk.
   //
@@ -339,7 +340,7 @@ class TabletMetadata : public RefCountedThreadSafe<TabletMetadata>
{
   // 'orphaned_blocks_' set.
   //
   // Failures are logged, but are not fatal.
-  void DeleteOrphanedBlocks(const std::vector<BlockId>& blocks);
+  void DeleteOrphanedBlocks(const BlockIdContainer& blocks);
 
   enum State {
     kNotLoadedYet,
diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc
index d1f96e2..d5fb05c 100644
--- a/src/kudu/tools/kudu-tool-test.cc
+++ b/src/kudu/tools/kudu-tool-test.cc
@@ -1198,7 +1198,7 @@ TEST_F(ToolTest, TestFsCheck) {
 
   // Create a local replica, flush some rows a few times, and collect all
   // of the created block IDs.
-  vector<BlockId> block_ids;
+  BlockIdContainer block_ids;
   {
     TabletHarness::Options opts(kTestDir);
     opts.tablet_id = kTabletId;
@@ -1235,8 +1235,11 @@ TEST_F(ToolTest, TestFsCheck) {
     ASSERT_OK(fs.Open(&report));
     std::shared_ptr<BlockDeletionTransaction> deletion_transaction =
         fs.block_manager()->NewDeletionTransaction();
-    for (int i = 0; i < block_ids.size(); i += 2) {
-      deletion_transaction->AddDeletedBlock(block_ids[i]);
+    int index = 0;
+    for (const auto& block_id : block_ids) {
+      if (index++ % 2 == 0) {
+        deletion_transaction->AddDeletedBlock(block_id);
+      }
     }
     deletion_transaction->CommitDeletedBlocks(&missing_ids);
   }
diff --git a/src/kudu/tools/tool_action_fs.cc b/src/kudu/tools/tool_action_fs.cc
index e650c7f..c72247f 100644
--- a/src/kudu/tools/tool_action_fs.cc
+++ b/src/kudu/tools/tool_action_fs.cc
@@ -140,14 +140,14 @@ Status Check(const RunnerContext& /*context*/) {
   }
 
   // Get the "live" block IDs (i.e. those referenced by a tablet).
-  vector<BlockId> live_block_ids;
+  BlockIdContainer live_block_ids;
   unordered_map<BlockId, string, BlockIdHash, BlockIdEqual> live_block_id_to_tablet;
   vector<string> tablet_ids;
   RETURN_NOT_OK(fs_manager.ListTabletIds(&tablet_ids));
   for (const auto& t : tablet_ids) {
     scoped_refptr<TabletMetadata> meta;
     RETURN_NOT_OK(TabletMetadata::Load(&fs_manager, t, &meta));
-    vector<BlockId> tablet_live_block_ids = meta->CollectBlockIds();
+    BlockIdContainer tablet_live_block_ids = meta->CollectBlockIds();
     live_block_ids.insert(live_block_ids.end(),
                           tablet_live_block_ids.begin(),
                           tablet_live_block_ids.end());


Mime
View raw message