From commits-return-8144-archive-asf-public=cust-asf.ponee.io@kudu.apache.org Sat Nov 16 00:44:48 2019 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [207.244.88.153]) by mx-eu-01.ponee.io (Postfix) with SMTP id 3FBD418067C for ; Sat, 16 Nov 2019 01:44:48 +0100 (CET) Received: (qmail 94751 invoked by uid 500); 16 Nov 2019 00:44:47 -0000 Mailing-List: contact commits-help@kudu.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@kudu.apache.org Delivered-To: mailing list commits@kudu.apache.org Received: (qmail 94702 invoked by uid 99); 16 Nov 2019 00:44:47 -0000 Received: from ec2-52-202-80-70.compute-1.amazonaws.com (HELO gitbox.apache.org) (52.202.80.70) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 16 Nov 2019 00:44:47 +0000 Received: by gitbox.apache.org (ASF Mail Server at gitbox.apache.org, from userid 33) id 1CE3E8B689; Sat, 16 Nov 2019 00:44:47 +0000 (UTC) Date: Sat, 16 Nov 2019 00:44:48 +0000 To: "commits@kudu.apache.org" Subject: [kudu] 01/03: [tablet] Optimize TabletMetadata::CollectBlockIds efficiency MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit From: alexey@apache.org In-Reply-To: <157386508702.23632.10115024226944839454@gitbox.apache.org> References: <157386508702.23632.10115024226944839454@gitbox.apache.org> X-Git-Host: gitbox.apache.org X-Git-Repo: kudu X-Git-Refname: refs/heads/master X-Git-Reftype: branch X-Git-Rev: 3ca7db0dbce89b2938d4c2cd5f2189767d5676f3 X-Git-NotificationType: diff X-Git-Multimail-Version: 1.5.dev Auto-Submitted: auto-generated Message-Id: <20191116004447.1CE3E8B689@gitbox.apache.org> This is an automated email from the ASF dual-hosted git repository. alexey pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git commit 3ca7db0dbce89b2938d4c2cd5f2189767d5676f3 Author: Yingchun Lai <405403881@qq.com> AuthorDate: Tue Oct 15 16:50:04 2019 +0800 [tablet] Optimize TabletMetadata::CollectBlockIds efficiency Container size may reallocate and elements will be moved or copied several times if using std::vector and inserting elements at the beginning in TabletMetadata::CollectBlockIds, we'd better to insert elements at the end. We did some simple benchmarks for TabletMetadata::CollectBlockIds() operation in TestTabletMetadata.BenchmarkCollectBlockIds, compared with old version, result shows that inserting at the end provides a linear time cost when block count per RowSet or RowSet count increasing. And also, we ran benchmarks to compare std::vector with std::list and std::deque, both of them have worse efficiency. Result details as follow: (The first column is FLAGS_test_row_set_count/FLAGS_test_block_count_per_rs, the other columns show 10 times total TabletMetadata::CollectBlockIds() time cost, in millisecond.) vector list deque vector (head insert) (rear insert) 1000/1000 3464 687 334 321 2000/1000 15238 1390 647 613 4000/1000 75139 3057 1392 1212 8000/1000 328808 6593 2805 2736 4000/2000 159517 6488 2965 2462 4000/4000 348471 11967 5513 5141 4000/8000 -(too long) 23706 11704 10806 Change-Id: I7ce853e35eb7dfa9f9a099e465ea8edfaa7c4aa9 Reviewed-on: http://gerrit.cloudera.org:8080/14445 Tested-by: Kudu Jenkins Reviewed-by: Adar Dembo --- src/kudu/fs/block_id.h | 2 +- src/kudu/tablet/delta_tracker.cc | 2 +- src/kudu/tablet/diskrowset.cc | 2 +- src/kudu/tablet/metadata-test.cc | 16 +++++++------- src/kudu/tablet/rowset_metadata.cc | 8 +++---- src/kudu/tablet/rowset_metadata.h | 6 +++--- src/kudu/tablet/tablet_metadata-test.cc | 38 +++++++++++++++++++++++++++++++++ src/kudu/tablet/tablet_metadata.cc | 22 +++++++++---------- src/kudu/tablet/tablet_metadata.h | 9 ++++---- src/kudu/tools/kudu-tool-test.cc | 9 +++++--- src/kudu/tools/tool_action_fs.cc | 4 ++-- 11 files changed, 80 insertions(+), 38 deletions(-) diff --git a/src/kudu/fs/block_id.h b/src/kudu/fs/block_id.h index b1ebe03..5e7f5a2 100644 --- a/src/kudu/fs/block_id.h +++ b/src/kudu/fs/block_id.h @@ -101,6 +101,6 @@ struct BlockIdEqual { }; typedef std::unordered_set BlockIdSet; - +typedef std::vector BlockIdContainer; } // namespace kudu #endif /* KUDU_FS_BLOCK_ID_H */ diff --git a/src/kudu/tablet/delta_tracker.cc b/src/kudu/tablet/delta_tracker.cc index 770e3aa..0c0b0d7 100644 --- a/src/kudu/tablet/delta_tracker.cc +++ b/src/kudu/tablet/delta_tracker.cc @@ -367,7 +367,7 @@ Status DeltaTracker::CommitDeltaStoreMetadataUpdate(const RowSetMetadataUpdate& &new_stores, type), "Unable to open delta blocks"); - vector removed_blocks; + BlockIdContainer removed_blocks; rowset_metadata_->CommitUpdate(update, &removed_blocks); rowset_metadata_->AddOrphanedBlocks(removed_blocks); // Once we successfully commit to the rowset metadata, let's ensure we update diff --git a/src/kudu/tablet/diskrowset.cc b/src/kudu/tablet/diskrowset.cc index b6a9dc3..64ec1e1 100644 --- a/src/kudu/tablet/diskrowset.cc +++ b/src/kudu/tablet/diskrowset.cc @@ -601,7 +601,7 @@ Status DiskRowSet::MajorCompactDeltaStoresWithColumnIds(const vector& // Prepare the changes to the metadata. RowSetMetadataUpdate update; compaction->CreateMetadataUpdate(&update); - vector removed_blocks; + BlockIdContainer removed_blocks; rowset_metadata_->CommitUpdate(update, &removed_blocks); // Now that the metadata has been updated, open a new cfile set with the diff --git a/src/kudu/tablet/metadata-test.cc b/src/kudu/tablet/metadata-test.cc index e67529d..ad538b8 100644 --- a/src/kudu/tablet/metadata-test.cc +++ b/src/kudu/tablet/metadata-test.cc @@ -63,13 +63,13 @@ TEST_F(MetadataTest, RSMD_TestReplaceDeltas_1) { to_replace.emplace_back(2); to_replace.emplace_back(3); - vector removed; + BlockIdContainer removed; meta_->CommitUpdate( RowSetMetadataUpdate() .ReplaceRedoDeltaBlocks(to_replace, { BlockId(123) }), &removed); ASSERT_EQ(vector({ BlockId(1), BlockId(123), BlockId(4) }), meta_->redo_delta_blocks()); - ASSERT_EQ(vector({ BlockId(2), BlockId(3) }), + ASSERT_EQ(BlockIdContainer({ BlockId(2), BlockId(3) }), removed); } @@ -79,13 +79,13 @@ TEST_F(MetadataTest, RSMD_TestReplaceDeltas_2) { to_replace.emplace_back(1); to_replace.emplace_back(2); - vector removed; + BlockIdContainer removed; meta_->CommitUpdate( RowSetMetadataUpdate() .ReplaceRedoDeltaBlocks(to_replace, { BlockId(123) }), &removed); ASSERT_EQ(vector({ BlockId(123), BlockId(3), BlockId(4) }), meta_->redo_delta_blocks()); - ASSERT_EQ(vector({ BlockId(1), BlockId(2) }), + ASSERT_EQ(BlockIdContainer({ BlockId(1), BlockId(2) }), removed); } @@ -95,13 +95,13 @@ TEST_F(MetadataTest, RSMD_TestReplaceDeltas_3) { to_replace.emplace_back(3); to_replace.emplace_back(4); - vector removed; + BlockIdContainer removed; meta_->CommitUpdate( RowSetMetadataUpdate() .ReplaceRedoDeltaBlocks(to_replace, { BlockId(123) }), &removed); ASSERT_EQ(vector({ BlockId(1), BlockId(2), BlockId(123) }), meta_->redo_delta_blocks()); - ASSERT_EQ(vector({ BlockId(3), BlockId(4) }), + ASSERT_EQ(BlockIdContainer({ BlockId(3), BlockId(4) }), removed); } @@ -112,7 +112,7 @@ TEST_F(MetadataTest, RSMD_TestReplaceDeltas_Bad_NonContiguous) { to_replace.emplace_back(4); EXPECT_DEATH({ - vector removed; + BlockIdContainer removed; meta_->CommitUpdate( RowSetMetadataUpdate().ReplaceRedoDeltaBlocks(to_replace, { BlockId(123) }), &removed); @@ -127,7 +127,7 @@ TEST_F(MetadataTest, RSMD_TestReplaceDeltas_Bad_DoesntExist) { to_replace.emplace_back(555); EXPECT_DEATH({ - vector removed; + BlockIdContainer removed; meta_->CommitUpdate( RowSetMetadataUpdate().ReplaceRedoDeltaBlocks(to_replace, { BlockId(123) }), &removed); diff --git a/src/kudu/tablet/rowset_metadata.cc b/src/kudu/tablet/rowset_metadata.cc index 6c9705c..46bd932 100644 --- a/src/kudu/tablet/rowset_metadata.cc +++ b/src/kudu/tablet/rowset_metadata.cc @@ -56,7 +56,7 @@ Status RowSetMetadata::CreateNew(TabletMetadata* tablet_metadata, return Status::OK(); } -void RowSetMetadata::AddOrphanedBlocks(const vector& blocks) { +void RowSetMetadata::AddOrphanedBlocks(const BlockIdContainer& blocks) { tablet_metadata_->AddOrphanedBlocks(blocks); } @@ -203,7 +203,7 @@ Status RowSetMetadata::CommitUndoDeltaDataBlock(const BlockId& block_id) { } void RowSetMetadata::CommitUpdate(const RowSetMetadataUpdate& update, - vector* removed) { + BlockIdContainer* removed) { removed->clear(); { std::lock_guard l(lock_); @@ -298,8 +298,8 @@ int64_t RowSetMetadata::live_row_count() const { return live_row_count_; } -vector RowSetMetadata::GetAllBlocks() { - vector blocks; +BlockIdContainer RowSetMetadata::GetAllBlocks() { + BlockIdContainer blocks; std::lock_guard l(lock_); if (!adhoc_index_block_.IsNull()) { blocks.push_back(adhoc_index_block_); diff --git a/src/kudu/tablet/rowset_metadata.h b/src/kudu/tablet/rowset_metadata.h index 7f67b63..c4be77d 100644 --- a/src/kudu/tablet/rowset_metadata.h +++ b/src/kudu/tablet/rowset_metadata.h @@ -93,7 +93,7 @@ class RowSetMetadata { Status Flush(); - void AddOrphanedBlocks(const std::vector& blocks); + void AddOrphanedBlocks(const BlockIdContainer& blocks); const std::string ToString() const; @@ -222,11 +222,11 @@ class RowSetMetadata { // Returns the blocks removed from the rowset metadata during the update. // These blocks must be added to the TabletMetadata's orphaned blocks list. void CommitUpdate(const RowSetMetadataUpdate& update, - std::vector* removed); + BlockIdContainer* removed); void ToProtobuf(RowSetDataPB *pb); - std::vector GetAllBlocks(); + BlockIdContainer GetAllBlocks(); // Increase the row count. // Note: diff --git a/src/kudu/tablet/tablet_metadata-test.cc b/src/kudu/tablet/tablet_metadata-test.cc index dc482eb..1e69ce7 100644 --- a/src/kudu/tablet/tablet_metadata-test.cc +++ b/src/kudu/tablet/tablet_metadata-test.cc @@ -18,29 +18,42 @@ #include "kudu/tablet/tablet_metadata.h" #include +#include #include #include #include #include +#include #include #include #include "kudu/common/common.pb.h" #include "kudu/common/partial_row.h" +#include "kudu/common/schema.h" #include "kudu/common/wire_protocol-test-util.h" +#include "kudu/fs/block_id.h" #include "kudu/gutil/gscoped_ptr.h" #include "kudu/gutil/port.h" #include "kudu/gutil/ref_counted.h" #include "kudu/tablet/local_tablet_writer.h" #include "kudu/tablet/metadata.pb.h" +#include "kudu/tablet/rowset_metadata.h" #include "kudu/tablet/tablet-harness.h" #include "kudu/tablet/tablet-test-util.h" #include "kudu/tablet/tablet.h" #include "kudu/util/pb_util.h" #include "kudu/util/status.h" +#include "kudu/util/stopwatch.h" #include "kudu/util/test_macros.h" +DEFINE_int64(test_row_set_count, 1000, ""); +DEFINE_int64(test_block_count_per_rs, 1000, ""); + +using std::map; +using std::shared_ptr; +using std::unique_ptr; + namespace kudu { namespace tablet { @@ -183,5 +196,30 @@ TEST_F(TestTabletMetadata, TestOnDiskSize) { ASSERT_GE(final_size, superblock_pb.ByteSize()); } +TEST_F(TestTabletMetadata, BenchmarkCollectBlockIds) { + auto tablet_meta = harness_->tablet()->metadata(); + RowSetMetadataVector rs_metas; + for (int i = 0; i < FLAGS_test_row_set_count; ++i) { + unique_ptr meta; + ASSERT_OK(RowSetMetadata::CreateNew(tablet_meta, i, &meta)); + + map block_by_column; + for (int j = 0; j < FLAGS_test_block_count_per_rs; ++j) { + block_by_column[ColumnId(j)] = BlockId(j); + } + meta->SetColumnDataBlocks(block_by_column); + rs_metas.emplace_back(shared_ptr(meta.release())); + } + tablet_meta->UpdateAndFlush(RowSetMetadataIds(), rs_metas, TabletMetadata::kNoMrsFlushed); + + for (int i = 0; i < 10; ++i) { + BlockIdContainer block_ids; + LOG_TIMING(INFO, "collecting BlockIds") { + block_ids = tablet_meta->CollectBlockIds(); + } + LOG(INFO) << "block_ids size: " << block_ids.size(); + } +} + } // namespace tablet } // namespace kudu diff --git a/src/kudu/tablet/tablet_metadata.cc b/src/kudu/tablet/tablet_metadata.cc index 8189085..129da86 100644 --- a/src/kudu/tablet/tablet_metadata.cc +++ b/src/kudu/tablet/tablet_metadata.cc @@ -189,11 +189,11 @@ vector TabletMetadata::CollectBlockIdPBs(const TabletSuperBlockPB& su return block_ids; } -vector TabletMetadata::CollectBlockIds() { - vector block_ids; +BlockIdContainer TabletMetadata::CollectBlockIds() { + BlockIdContainer block_ids; for (const auto& r : rowsets_) { - vector rowset_block_ids = r->GetAllBlocks(); - block_ids.insert(block_ids.begin(), + BlockIdContainer rowset_block_ids = r->GetAllBlocks(); + block_ids.insert(block_ids.end(), rowset_block_ids.begin(), rowset_block_ids.end()); } @@ -348,7 +348,7 @@ Status TabletMetadata::UpdateOnDiskSize() { } Status TabletMetadata::LoadFromSuperBlock(const TabletSuperBlockPB& superblock) { - vector orphaned_blocks; + BlockIdContainer orphaned_blocks; VLOG(2) << "Loading TabletMetadata from SuperBlockPB:" << std::endl << SecureDebugString(superblock); @@ -497,17 +497,17 @@ Status TabletMetadata::UpdateAndFlush(const RowSetMetadataIds& to_remove, return Flush(); } -void TabletMetadata::AddOrphanedBlocks(const vector& blocks) { +void TabletMetadata::AddOrphanedBlocks(const BlockIdContainer& block_ids) { std::lock_guard l(data_lock_); - AddOrphanedBlocksUnlocked(blocks); + AddOrphanedBlocksUnlocked(block_ids); } -void TabletMetadata::AddOrphanedBlocksUnlocked(const vector& blocks) { +void TabletMetadata::AddOrphanedBlocksUnlocked(const BlockIdContainer& block_ids) { DCHECK(data_lock_.is_locked()); - orphaned_blocks_.insert(blocks.begin(), blocks.end()); + orphaned_blocks_.insert(block_ids.begin(), block_ids.end()); } -void TabletMetadata::DeleteOrphanedBlocks(const vector& blocks) { +void TabletMetadata::DeleteOrphanedBlocks(const BlockIdContainer& blocks) { if (PREDICT_FALSE(!FLAGS_enable_tablet_orphaned_block_deletion)) { LOG_WITH_PREFIX(WARNING) << "Not deleting " << blocks.size() << " block(s) from disk. Block deletion disabled via " @@ -559,7 +559,7 @@ Status TabletMetadata::Flush() { "tablet_id", tablet_id_); MutexLock l_flush(flush_lock_); - vector orphaned; + BlockIdContainer orphaned; TabletSuperBlockPB pb; { std::lock_guard l(data_lock_); diff --git a/src/kudu/tablet/tablet_metadata.h b/src/kudu/tablet/tablet_metadata.h index 060c071..fae2a3c 100644 --- a/src/kudu/tablet/tablet_metadata.h +++ b/src/kudu/tablet/tablet_metadata.h @@ -113,7 +113,7 @@ class TabletMetadata : public RefCountedThreadSafe { static std::vector CollectBlockIdPBs( const TabletSuperBlockPB& superblock); - std::vector CollectBlockIds(); + BlockIdContainer CollectBlockIds(); const std::string& tablet_id() const { DCHECK_NE(state_, kNotLoadedYet); @@ -190,7 +190,7 @@ class TabletMetadata : public RefCountedThreadSafe { // // Blocks are removed from this set after they are successfully deleted // in a call to DeleteOrphanedBlocks(). - void AddOrphanedBlocks(const std::vector& block_ids); + void AddOrphanedBlocks(const BlockIdContainer& block_ids); // Mark the superblock to be in state 'delete_type', sync it to disk, and // then delete all of the rowsets in this tablet. @@ -289,6 +289,7 @@ class TabletMetadata : public RefCountedThreadSafe { private: friend class RefCountedThreadSafe; friend class MetadataTest; + friend class TestTabletMetadataBenchmark; // Compile time assert that no one deletes TabletMetadata objects. ~TabletMetadata(); @@ -331,7 +332,7 @@ class TabletMetadata : public RefCountedThreadSafe { const RowSetMetadataVector& rowsets) const; // Requires 'data_lock_'. - void AddOrphanedBlocksUnlocked(const std::vector& block_ids); + void AddOrphanedBlocksUnlocked(const BlockIdContainer& block_ids); // Deletes the provided 'blocks' on disk. // @@ -339,7 +340,7 @@ class TabletMetadata : public RefCountedThreadSafe { // 'orphaned_blocks_' set. // // Failures are logged, but are not fatal. - void DeleteOrphanedBlocks(const std::vector& blocks); + void DeleteOrphanedBlocks(const BlockIdContainer& blocks); enum State { kNotLoadedYet, diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc index d1f96e2..d5fb05c 100644 --- a/src/kudu/tools/kudu-tool-test.cc +++ b/src/kudu/tools/kudu-tool-test.cc @@ -1198,7 +1198,7 @@ TEST_F(ToolTest, TestFsCheck) { // Create a local replica, flush some rows a few times, and collect all // of the created block IDs. - vector block_ids; + BlockIdContainer block_ids; { TabletHarness::Options opts(kTestDir); opts.tablet_id = kTabletId; @@ -1235,8 +1235,11 @@ TEST_F(ToolTest, TestFsCheck) { ASSERT_OK(fs.Open(&report)); std::shared_ptr deletion_transaction = fs.block_manager()->NewDeletionTransaction(); - for (int i = 0; i < block_ids.size(); i += 2) { - deletion_transaction->AddDeletedBlock(block_ids[i]); + int index = 0; + for (const auto& block_id : block_ids) { + if (index++ % 2 == 0) { + deletion_transaction->AddDeletedBlock(block_id); + } } deletion_transaction->CommitDeletedBlocks(&missing_ids); } diff --git a/src/kudu/tools/tool_action_fs.cc b/src/kudu/tools/tool_action_fs.cc index e650c7f..c72247f 100644 --- a/src/kudu/tools/tool_action_fs.cc +++ b/src/kudu/tools/tool_action_fs.cc @@ -140,14 +140,14 @@ Status Check(const RunnerContext& /*context*/) { } // Get the "live" block IDs (i.e. those referenced by a tablet). - vector live_block_ids; + BlockIdContainer live_block_ids; unordered_map live_block_id_to_tablet; vector tablet_ids; RETURN_NOT_OK(fs_manager.ListTabletIds(&tablet_ids)); for (const auto& t : tablet_ids) { scoped_refptr meta; RETURN_NOT_OK(TabletMetadata::Load(&fs_manager, t, &meta)); - vector tablet_live_block_ids = meta->CollectBlockIds(); + BlockIdContainer tablet_live_block_ids = meta->CollectBlockIds(); live_block_ids.insert(live_block_ids.end(), tablet_live_block_ids.begin(), tablet_live_block_ids.end());