From commits-return-6922-archive-asf-public=cust-asf.ponee.io@kudu.apache.org Sat Feb 2 18:49:24 2019 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx-eu-01.ponee.io (Postfix) with SMTP id 7BFDC18077F for ; Sat, 2 Feb 2019 19:49:23 +0100 (CET) Received: (qmail 30010 invoked by uid 500); 2 Feb 2019 18:49:22 -0000 Mailing-List: contact commits-help@kudu.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@kudu.apache.org Delivered-To: mailing list commits@kudu.apache.org Received: (qmail 30001 invoked by uid 99); 2 Feb 2019 18:49:22 -0000 Received: from ec2-52-202-80-70.compute-1.amazonaws.com (HELO gitbox.apache.org) (52.202.80.70) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 02 Feb 2019 18:49:22 +0000 Received: by gitbox.apache.org (ASF Mail Server at gitbox.apache.org, from userid 33) id 64DA0857AE; Sat, 2 Feb 2019 18:49:21 +0000 (UTC) Date: Sat, 02 Feb 2019 18:49:24 +0000 To: "commits@kudu.apache.org" Subject: [kudu] 03/03: KUDU-2665: LBM may delete containers with live blocks MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit From: adar@apache.org In-Reply-To: <154913336128.10708.12006904578464468831@gitbox.apache.org> References: <154913336128.10708.12006904578464468831@gitbox.apache.org> X-Git-Host: gitbox.apache.org X-Git-Repo: kudu X-Git-Refname: refs/heads/master X-Git-Reftype: branch X-Git-Rev: 2107f23064704bee07bee317b6b131fdf63b151f X-Git-NotificationType: diff X-Git-Multimail-Version: 1.5.dev Auto-Submitted: auto-generated Message-Id: <20190202184921.64DA0857AE@gitbox.apache.org> This is an automated email from the ASF dual-hosted git repository. adar pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git commit 2107f23064704bee07bee317b6b131fdf63b151f Author: helifu AuthorDate: Wed Jan 30 22:55:30 2019 +0800 KUDU-2665: LBM may delete containers with live blocks Due to the nature of WritableBlock finalization/closing, it's possible for a container with outstanding WritableBlock instances to briefly appear as dead. That's because: 1) The container's next block offset (responsible for determining fullness) is incrmented when the WritableBlock is finalized, but 2) The container's live block count is incremented when the WritableBlock is closed. Thus, if the "last" block in a container is deleted after a WritableBlock was finalized but before it has been closed, the container will be erroneously marked as dead. When the container's last referrent disappears, it will be deleted from disk despite having live blocks in it. Change-Id: I894f32b1c164ae7770c92171850edd167dfaf8ad Reviewed-on: http://gerrit.cloudera.org:8080/12308 Reviewed-by: Hao Hao Tested-by: Kudu Jenkins Reviewed-by: Adar Dembo --- src/kudu/fs/log_block_manager-test.cc | 65 +++++++++++++++++++++++++++++++++++ src/kudu/fs/log_block_manager.cc | 57 ++++++++++++++++++++---------- 2 files changed, 103 insertions(+), 19 deletions(-) diff --git a/src/kudu/fs/log_block_manager-test.cc b/src/kudu/fs/log_block_manager-test.cc index 1c52d74..c82b7ff 100644 --- a/src/kudu/fs/log_block_manager-test.cc +++ b/src/kudu/fs/log_block_manager-test.cc @@ -1867,6 +1867,71 @@ TEST_F(LogBlockManagerTest, TestDeleteDeadContainersByDeletionTransaction) { } } +// Test for KUDU-2665 to ensure that once the container is full and has no live +// blocks but with a reference by WritableBlock, it will not be deleted. +TEST_F(LogBlockManagerTest, TestDoNotDeleteFakeDeadContainer) { + // Lower the max container size. + FLAGS_log_container_max_size = 64 * 1024; + + const auto Process = [&] (bool close_block) { + // Create a bunch of blocks on the same container. + vector blocks; + for (int i = 0; i < 10; ++i) { + unique_ptr transaction = bm_->NewCreationTransaction(); + unique_ptr writer; + ASSERT_OK(bm_->CreateBlock(test_block_opts_, &writer)); + blocks.emplace_back(writer->id()); + ASSERT_OK(writer->Append("a")); + ASSERT_OK(writer->Finalize()); + transaction->AddCreatedBlock(std::move(writer)); + ASSERT_OK(transaction->CommitCreatedBlocks()); + } + + // Create a special block. + unique_ptr writer; + ASSERT_OK(bm_->CreateBlock(test_block_opts_, &writer)); + BlockId block_id = writer->id(); + unique_ptr data(new uint8_t[FLAGS_log_container_max_size]); + ASSERT_OK(writer->Append({ data.get(), FLAGS_log_container_max_size })); + ASSERT_OK(writer->Finalize()); + // Do not close and reset the writer. + // Now the container is full and has no live blocks. + + // Delete the bunch of blocks. + { + vector deleted; + shared_ptr transaction = bm_->NewDeletionTransaction(); + for (const auto& e : blocks) { + transaction->AddDeletedBlock(e); + } + ASSERT_OK(transaction->CommitDeletedBlocks(&deleted)); + transaction.reset(); + for (const auto& data_dir : dd_manager_->data_dirs()) { + data_dir->WaitOnClosures(); + } + } + + // Close and reset the writer. + // It's going to test Abort() when 'close_block' is false. + if (close_block) { + ASSERT_OK(writer->Close()); + } + writer.reset(); + + // Open the special block after restart. + ASSERT_OK(ReopenBlockManager()); + unique_ptr block; + if (close_block) { + ASSERT_OK(bm_->OpenBlock(block_id, &block)); + } else { + ASSERT_TRUE(bm_->OpenBlock(block_id, &block).IsNotFound()); + } + }; + + Process(true); + Process(false); +} + TEST_F(LogBlockManagerTest, TestHalfPresentContainer) { BlockId block_id; string data_file_name; diff --git a/src/kudu/fs/log_block_manager.cc b/src/kudu/fs/log_block_manager.cc index 8d9bfaf..a389b59 100644 --- a/src/kudu/fs/log_block_manager.cc +++ b/src/kudu/fs/log_block_manager.cc @@ -520,22 +520,45 @@ class LogBlockContainer: public RefCountedThreadSafe { int64_t live_bytes() const { return live_bytes_.Load(); } int64_t live_bytes_aligned() const { return live_bytes_aligned_.Load(); } int64_t live_blocks() const { return live_blocks_.Load(); } + int32_t blocks_being_written() const { return blocks_being_written_.Load(); } bool full() const { return next_block_offset() >= FLAGS_log_container_max_size || (max_num_blocks_ && (total_blocks() >= max_num_blocks_)); } + bool dead() const { return dead_.Load(); } const LogBlockManagerMetrics* metrics() const { return metrics_; } DataDir* data_dir() const { return data_dir_; } const PathInstanceMetadataPB* instance() const { return data_dir_->instance()->metadata(); } - // Tries to mark the container as 'dead'. A dead container is one that is full - // and has no live blocks; it is deleted when the container goes out of scope. + // Adjusts the number of blocks being written. + // Positive means increase, negative means decrease. + int32_t blocks_being_written_incr(int32_t value) { + return blocks_being_written_.IncrementBy(value); + } + + // Check that the container meets the death condition. // - // If successful, returns true; otherwise returns false. - bool TrySetDead(); + // Although this code looks like a TOCTOU violation, it is safe because of + // some additional LBM invariants: + // 1) When a container becomes full, it stays full for the process' lifetime. + // 2) A full container will never accrue another live block. Meaning, losing + // its last live block is a terminal state for a full container. + // 3) The only exception to #2 is if the container currently has a finalized + // but not-yet-closed WritableBlock. In this case the container became full + // when the WritableBlock was finalized, but the live block counter only + // reflects the new block when it is closed. + bool check_death_condition() const { + return (full() && live_blocks() == 0 && blocks_being_written() == 0); + } - // Returns whether the container has been marked as dead. - bool dead() { return dead_.Load(); } + // Tries to mark the container as 'dead', which means it will be deleted + // when it goes out of scope. Can only be set dead once. + // + // If successful, returns true; otherwise returns false. + bool TrySetDead() { + if (dead()) return false; + return dead_.CompareAndSet(false, true); + } private: LogBlockContainer(LogBlockManager* block_manager, DataDir* data_dir, @@ -630,6 +653,9 @@ class LogBlockContainer: public RefCountedThreadSafe { // The number of not-yet-deleted blocks in the container. AtomicInt live_blocks_; + // The number of LogWritableBlocks currently open for this container. + AtomicInt blocks_being_written_; + // Whether or not this container has been marked as dead. AtomicBool dead_; @@ -670,6 +696,7 @@ LogBlockContainer::LogBlockContainer( live_bytes_(0), live_bytes_aligned_(0), live_blocks_(0), + blocks_being_written_(0), dead_(false), metrics_(block_manager->metrics()) { } @@ -1290,18 +1317,6 @@ void LogBlockContainer::ContainerDeletionAsync(int64_t offset, int64_t length) { data_dir()->dir())); } -bool LogBlockContainer::TrySetDead() { - // Although this code looks like a TOCTOU violation, it is safe because of - // some additional LBM invariants: - // 1. When a container becomes full, it stays full for the process' lifetime. - // 2. A full container will never accrue another live block. Meaning, losing - // its last live block is a terminal state for a full container. - if (full() && live_blocks() == 0) { - return dead_.CompareAndSet(false, true); - } - return false; -} - /////////////////////////////////////////////////////////// // LogBlockCreationTransaction //////////////////////////////////////////////////////////// @@ -1407,7 +1422,7 @@ LogBlockDeletionTransaction::~LogBlockDeletionTransaction() { // For the full and dead containers, it is much cheaper // to delete the container files outright, rather than // punching holes. - if (container->full() && container->live_blocks() == 0) { + if (container->check_death_condition()) { // Mark the container as deleted and remove it from the global map. // // It's possible for multiple deletion transactions to end up here. For @@ -1523,6 +1538,7 @@ LogWritableBlock::LogWritableBlock(LogBlockContainerRefPtr container, state_(CLEAN) { DCHECK_GE(block_offset, 0); DCHECK_EQ(0, block_offset % container_->instance()->filesystem_block_size_bytes()); + container_->blocks_being_written_incr(1); if (container_->metrics()) { container_->metrics()->generic_metrics.blocks_open_writing->Increment(); container_->metrics()->generic_metrics.total_writable_blocks->Increment(); @@ -1530,6 +1546,9 @@ LogWritableBlock::LogWritableBlock(LogBlockContainerRefPtr container, } LogWritableBlock::~LogWritableBlock() { + // Put the decrement 'blocks_being_written_' at the beginning of this + // function can help to avoid unnecessary hole punch. + container_->blocks_being_written_incr(-1); if (state_ != CLOSED) { WARN_NOT_OK(Abort(), Substitute("Failed to abort block $0", id().ToString()));