kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From a...@apache.org
Subject [kudu] 03/03: KUDU-2665: LBM may delete containers with live blocks
Date Sat, 02 Feb 2019 18:49:24 GMT
This is an automated email from the ASF dual-hosted git repository.

adar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 2107f23064704bee07bee317b6b131fdf63b151f
Author: helifu <hzhelifu@corp.netease.com>
AuthorDate: Wed Jan 30 22:55:30 2019 +0800

    KUDU-2665: LBM may delete containers with live blocks
    
    Due to the nature of WritableBlock finalization/closing, it's
    possible for a container with outstanding WritableBlock instances
    to briefly appear as dead.
    That's because:
    1) The container's next block offset (responsible for determining
       fullness) is incrmented when the WritableBlock is finalized, but
    2) The container's live block count is incremented when the
       WritableBlock is closed.
    Thus, if the "last" block in a container is deleted after a
    WritableBlock was finalized but before it has been closed, the
    container will be erroneously marked as dead. When the container's
    last referrent disappears, it will be deleted from disk despite
    having live blocks in it.
    
    Change-Id: I894f32b1c164ae7770c92171850edd167dfaf8ad
    Reviewed-on: http://gerrit.cloudera.org:8080/12308
    Reviewed-by: Hao Hao <hao.hao@cloudera.com>
    Tested-by: Kudu Jenkins
    Reviewed-by: Adar Dembo <adar@cloudera.com>
---
 src/kudu/fs/log_block_manager-test.cc | 65 +++++++++++++++++++++++++++++++++++
 src/kudu/fs/log_block_manager.cc      | 57 ++++++++++++++++++++----------
 2 files changed, 103 insertions(+), 19 deletions(-)

diff --git a/src/kudu/fs/log_block_manager-test.cc b/src/kudu/fs/log_block_manager-test.cc
index 1c52d74..c82b7ff 100644
--- a/src/kudu/fs/log_block_manager-test.cc
+++ b/src/kudu/fs/log_block_manager-test.cc
@@ -1867,6 +1867,71 @@ TEST_F(LogBlockManagerTest, TestDeleteDeadContainersByDeletionTransaction)
{
   }
 }
 
+// Test for KUDU-2665 to ensure that once the container is full and has no live
+// blocks but with a reference by WritableBlock, it will not be deleted.
+TEST_F(LogBlockManagerTest, TestDoNotDeleteFakeDeadContainer) {
+  // Lower the max container size.
+  FLAGS_log_container_max_size = 64 * 1024;
+
+  const auto Process = [&] (bool close_block) {
+    // Create a bunch of blocks on the same container.
+    vector<BlockId> blocks;
+    for (int i = 0; i < 10; ++i) {
+      unique_ptr<BlockCreationTransaction> transaction = bm_->NewCreationTransaction();
+      unique_ptr<WritableBlock> writer;
+      ASSERT_OK(bm_->CreateBlock(test_block_opts_, &writer));
+      blocks.emplace_back(writer->id());
+      ASSERT_OK(writer->Append("a"));
+      ASSERT_OK(writer->Finalize());
+      transaction->AddCreatedBlock(std::move(writer));
+      ASSERT_OK(transaction->CommitCreatedBlocks());
+    }
+
+    // Create a special block.
+    unique_ptr<WritableBlock> writer;
+    ASSERT_OK(bm_->CreateBlock(test_block_opts_, &writer));
+    BlockId block_id = writer->id();
+    unique_ptr<uint8_t[]> data(new uint8_t[FLAGS_log_container_max_size]);
+    ASSERT_OK(writer->Append({ data.get(), FLAGS_log_container_max_size }));
+    ASSERT_OK(writer->Finalize());
+    // Do not close and reset the writer.
+    // Now the container is full and has no live blocks.
+
+    // Delete the bunch of blocks.
+    {
+      vector<BlockId> deleted;
+      shared_ptr<BlockDeletionTransaction> transaction = bm_->NewDeletionTransaction();
+      for (const auto& e : blocks) {
+        transaction->AddDeletedBlock(e);
+      }
+      ASSERT_OK(transaction->CommitDeletedBlocks(&deleted));
+      transaction.reset();
+      for (const auto& data_dir : dd_manager_->data_dirs()) {
+        data_dir->WaitOnClosures();
+      }
+    }
+
+    // Close and reset the writer.
+    // It's going to test Abort() when 'close_block' is false.
+    if (close_block) {
+      ASSERT_OK(writer->Close());
+    }
+    writer.reset();
+
+    // Open the special block after restart.
+    ASSERT_OK(ReopenBlockManager());
+    unique_ptr<ReadableBlock> block;
+    if (close_block) {
+      ASSERT_OK(bm_->OpenBlock(block_id, &block));
+    } else {
+      ASSERT_TRUE(bm_->OpenBlock(block_id, &block).IsNotFound());
+    }
+  };
+
+  Process(true);
+  Process(false);
+}
+
 TEST_F(LogBlockManagerTest, TestHalfPresentContainer) {
   BlockId block_id;
   string data_file_name;
diff --git a/src/kudu/fs/log_block_manager.cc b/src/kudu/fs/log_block_manager.cc
index 8d9bfaf..a389b59 100644
--- a/src/kudu/fs/log_block_manager.cc
+++ b/src/kudu/fs/log_block_manager.cc
@@ -520,22 +520,45 @@ class LogBlockContainer: public RefCountedThreadSafe<LogBlockContainer>
{
   int64_t live_bytes() const { return live_bytes_.Load(); }
   int64_t live_bytes_aligned() const { return live_bytes_aligned_.Load(); }
   int64_t live_blocks() const { return live_blocks_.Load(); }
+  int32_t blocks_being_written() const { return blocks_being_written_.Load(); }
   bool full() const {
     return next_block_offset() >= FLAGS_log_container_max_size ||
         (max_num_blocks_ && (total_blocks() >= max_num_blocks_));
   }
+  bool dead() const { return dead_.Load(); }
   const LogBlockManagerMetrics* metrics() const { return metrics_; }
   DataDir* data_dir() const { return data_dir_; }
   const PathInstanceMetadataPB* instance() const { return data_dir_->instance()->metadata();
}
 
-  // Tries to mark the container as 'dead'. A dead container is one that is full
-  // and has no live blocks; it is deleted when the container goes out of scope.
+  // Adjusts the number of blocks being written.
+  // Positive means increase, negative means decrease.
+  int32_t blocks_being_written_incr(int32_t value) {
+    return blocks_being_written_.IncrementBy(value);
+  }
+
+  // Check that the container meets the death condition.
   //
-  // If successful, returns true; otherwise returns false.
-  bool TrySetDead();
+  // Although this code looks like a TOCTOU violation, it is safe because of
+  // some additional LBM invariants:
+  // 1) When a container becomes full, it stays full for the process' lifetime.
+  // 2) A full container will never accrue another live block. Meaning, losing
+  //    its last live block is a terminal state for a full container.
+  // 3) The only exception to #2 is if the container currently has a finalized
+  //    but not-yet-closed WritableBlock. In this case the container became full
+  //    when the WritableBlock was finalized, but the live block counter only
+  //    reflects the new block when it is closed.
+  bool check_death_condition() const {
+    return (full() && live_blocks() == 0 && blocks_being_written() == 0);
+  }
 
-  // Returns whether the container has been marked as dead.
-  bool dead() { return dead_.Load(); }
+  // Tries to mark the container as 'dead', which means it will be deleted
+  // when it goes out of scope. Can only be set dead once.
+  //
+  // If successful, returns true; otherwise returns false.
+  bool TrySetDead() {
+    if (dead()) return false;
+    return dead_.CompareAndSet(false, true);
+  }
 
  private:
   LogBlockContainer(LogBlockManager* block_manager, DataDir* data_dir,
@@ -630,6 +653,9 @@ class LogBlockContainer: public RefCountedThreadSafe<LogBlockContainer>
{
   // The number of not-yet-deleted blocks in the container.
   AtomicInt<int64_t> live_blocks_;
 
+  // The number of LogWritableBlocks currently open for this container.
+  AtomicInt<int32_t> blocks_being_written_;
+
   // Whether or not this container has been marked as dead.
   AtomicBool dead_;
 
@@ -670,6 +696,7 @@ LogBlockContainer::LogBlockContainer(
       live_bytes_(0),
       live_bytes_aligned_(0),
       live_blocks_(0),
+      blocks_being_written_(0),
       dead_(false),
       metrics_(block_manager->metrics()) {
 }
@@ -1290,18 +1317,6 @@ void LogBlockContainer::ContainerDeletionAsync(int64_t offset, int64_t
length) {
                             data_dir()->dir()));
 }
 
-bool LogBlockContainer::TrySetDead() {
-  // Although this code looks like a TOCTOU violation, it is safe because of
-  // some additional LBM invariants:
-  // 1. When a container becomes full, it stays full for the process' lifetime.
-  // 2. A full container will never accrue another live block. Meaning, losing
-  //    its last live block is a terminal state for a full container.
-  if (full() && live_blocks() == 0) {
-    return dead_.CompareAndSet(false, true);
-  }
-  return false;
-}
-
 ///////////////////////////////////////////////////////////
 // LogBlockCreationTransaction
 ////////////////////////////////////////////////////////////
@@ -1407,7 +1422,7 @@ LogBlockDeletionTransaction::~LogBlockDeletionTransaction() {
     // For the full and dead containers, it is much cheaper
     // to delete the container files outright, rather than
     // punching holes.
-    if (container->full() && container->live_blocks() == 0) {
+    if (container->check_death_condition()) {
       // Mark the container as deleted and remove it from the global map.
       //
       // It's possible for multiple deletion transactions to end up here. For
@@ -1523,6 +1538,7 @@ LogWritableBlock::LogWritableBlock(LogBlockContainerRefPtr container,
       state_(CLEAN) {
   DCHECK_GE(block_offset, 0);
   DCHECK_EQ(0, block_offset % container_->instance()->filesystem_block_size_bytes());
+  container_->blocks_being_written_incr(1);
   if (container_->metrics()) {
     container_->metrics()->generic_metrics.blocks_open_writing->Increment();
     container_->metrics()->generic_metrics.total_writable_blocks->Increment();
@@ -1530,6 +1546,9 @@ LogWritableBlock::LogWritableBlock(LogBlockContainerRefPtr container,
 }
 
 LogWritableBlock::~LogWritableBlock() {
+  // Put the decrement 'blocks_being_written_' at the beginning of this
+  // function can help to avoid unnecessary hole punch.
+  container_->blocks_being_written_incr(-1);
   if (state_ != CLOSED) {
     WARN_NOT_OK(Abort(), Substitute("Failed to abort block $0",
                                     id().ToString()));


Mime
View raw message