kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From a...@apache.org
Subject [kudu] branch master updated: KUDU-2636: LBM supports deleting dead containers
Date Thu, 17 Jan 2019 21:55:53 GMT
This is an automated email from the ASF dual-hosted git repository.

adar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new 0c50197  KUDU-2636: LBM supports deleting dead containers
0c50197 is described below

commit 0c501979bea01cfb834be0e2c9e456c6d5751e53
Author: helifu <hzhelifu@corp.netease.com>
AuthorDate: Sat Dec 22 22:03:20 2018 +0800

    KUDU-2636: LBM supports deleting dead containers
    
    this patch intends to support deleting the full containers
    that are dead at runtime. It can help to ease disk pressure,
    save tserver startup time and etc. It uses the container
    size and live blocks to determine whether the container
    is dead or live.
    
    Step1: wrapping up containers in scoped_refptr. [V]
    Step2: supporting dead containers removal.      [V]
    
    Change-Id: I7d3672ae3c3dd9acd489120d653c44a641537f10
    Reviewed-on: http://gerrit.cloudera.org:8080/12075
    Reviewed-by: Adar Dembo <adar@cloudera.com>
    Tested-by: Kudu Jenkins
    Reviewed-by: Hao Hao <hao.hao@cloudera.com>
---
 src/kudu/fs/log_block_manager-test.cc | 157 +++++++++++++++++++++++++++++---
 src/kudu/fs/log_block_manager.cc      | 164 +++++++++++++++++++++++++---------
 src/kudu/fs/log_block_manager.h       |   9 +-
 3 files changed, 269 insertions(+), 61 deletions(-)

diff --git a/src/kudu/fs/log_block_manager-test.cc b/src/kudu/fs/log_block_manager-test.cc
index be997c3..3a16e90 100644
--- a/src/kudu/fs/log_block_manager-test.cc
+++ b/src/kudu/fs/log_block_manager-test.cc
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include "kudu/fs/log_block_manager.h"
+
 #include <algorithm>
 #include <cstdint>
 #include <cstdlib>
@@ -42,7 +44,6 @@
 #include "kudu/fs/fs.pb.h"
 #include "kudu/fs/fs_report.h"
 #include "kudu/fs/log_block_manager-test-util.h"
-#include "kudu/fs/log_block_manager.h"
 #include "kudu/gutil/bind.h"
 #include "kudu/gutil/bind_helpers.h"
 #include "kudu/gutil/casts.h"
@@ -95,6 +96,7 @@ METRIC_DECLARE_gauge_uint64(log_block_manager_blocks_under_management);
 METRIC_DECLARE_counter(log_block_manager_holes_punched);
 METRIC_DECLARE_gauge_uint64(log_block_manager_containers);
 METRIC_DECLARE_gauge_uint64(log_block_manager_full_containers);
+METRIC_DECLARE_gauge_uint64(log_block_manager_dead_containers_deleted);
 
 namespace kudu {
 namespace fs {
@@ -292,7 +294,8 @@ TEST_F(LogBlockManagerTest, MetricsTest) {
         {0, &METRIC_log_block_manager_containers},
         {0, &METRIC_log_block_manager_full_containers} },
       { {0, &METRIC_log_block_manager_holes_punched},
-        {0, &METRIC_block_manager_total_blocks_deleted} }));
+        {0, &METRIC_block_manager_total_blocks_deleted},
+        {0, &METRIC_log_block_manager_dead_containers_deleted} }));
 
   // Lower the max container size so that we can more easily test full
   // container metrics.
@@ -307,7 +310,8 @@ TEST_F(LogBlockManagerTest, MetricsTest) {
         {1, &METRIC_log_block_manager_containers},
         {0, &METRIC_log_block_manager_full_containers} },
       { {0, &METRIC_log_block_manager_holes_punched},
-        {0, &METRIC_block_manager_total_blocks_deleted} }));
+        {0, &METRIC_block_manager_total_blocks_deleted},
+        {0, &METRIC_log_block_manager_dead_containers_deleted} }));
 
   // And when the block is closed, it becomes "under management".
   ASSERT_OK(writer->Close());
@@ -317,12 +321,14 @@ TEST_F(LogBlockManagerTest, MetricsTest) {
         {1, &METRIC_log_block_manager_containers},
         {0, &METRIC_log_block_manager_full_containers} },
       { {0, &METRIC_log_block_manager_holes_punched},
-        {0, &METRIC_block_manager_total_blocks_deleted} }));
+        {0, &METRIC_block_manager_total_blocks_deleted},
+        {0, &METRIC_log_block_manager_dead_containers_deleted} }));
 
   // Create 10 blocks concurrently. We reuse the existing container and
   // create 9 new ones. All of them get filled.
   BlockId saved_id;
   {
+    uint8_t data[1024];
     Random rand(SeedRandom());
     unique_ptr<BlockCreationTransaction> transaction = bm_->NewCreationTransaction();
     for (int i = 0; i < 10; i++) {
@@ -331,9 +337,8 @@ TEST_F(LogBlockManagerTest, MetricsTest) {
       if (saved_id.IsNull()) {
         saved_id = b->id();
       }
-      uint8_t data[1024];
-      for (int i = 0; i < sizeof(data); i += sizeof(uint32_t)) {
-        data[i] = rand.Next();
+      for (int j = 0; j < sizeof(data); j += sizeof(uint32_t)) {
+        data[j] = rand.Next();
       }
       b->Append(Slice(data, sizeof(data)));
       ASSERT_OK(b->Finalize());
@@ -346,7 +351,8 @@ TEST_F(LogBlockManagerTest, MetricsTest) {
           {10, &METRIC_log_block_manager_containers},
           {10, &METRIC_log_block_manager_full_containers} },
         { {0, &METRIC_log_block_manager_holes_punched},
-          {0, &METRIC_block_manager_total_blocks_deleted} }));
+          {0, &METRIC_block_manager_total_blocks_deleted},
+          {0, &METRIC_log_block_manager_dead_containers_deleted} }));
 
     ASSERT_OK(transaction->CommitCreatedBlocks());
     NO_FATALS(CheckLogMetrics(entity,
@@ -355,7 +361,8 @@ TEST_F(LogBlockManagerTest, MetricsTest) {
           {10, &METRIC_log_block_manager_containers},
           {10, &METRIC_log_block_manager_full_containers} },
         { {0, &METRIC_log_block_manager_holes_punched},
-          {0, &METRIC_block_manager_total_blocks_deleted} }));
+          {0, &METRIC_block_manager_total_blocks_deleted},
+          {0, &METRIC_log_block_manager_dead_containers_deleted} }));
   }
 
   // Reopen the block manager and test the metrics. They're all based on
@@ -369,7 +376,8 @@ TEST_F(LogBlockManagerTest, MetricsTest) {
         {10, &METRIC_log_block_manager_containers},
         {10, &METRIC_log_block_manager_full_containers} },
       { {0, &METRIC_log_block_manager_holes_punched},
-        {0, &METRIC_block_manager_total_blocks_deleted} }));
+        {0, &METRIC_block_manager_total_blocks_deleted},
+        {0, &METRIC_log_block_manager_dead_containers_deleted} }));
 
   // Delete a block. Its contents should no longer be under management.
   {
@@ -384,7 +392,8 @@ TEST_F(LogBlockManagerTest, MetricsTest) {
           {10, &METRIC_log_block_manager_containers},
           {10, &METRIC_log_block_manager_full_containers} },
         { {0, &METRIC_log_block_manager_holes_punched},
-          {1, &METRIC_block_manager_total_blocks_deleted} }));
+          {1, &METRIC_block_manager_total_blocks_deleted},
+          {0, &METRIC_log_block_manager_dead_containers_deleted} }));
   }
   // Wait for the actual hole punching to take place.
   for (const auto& data_dir : dd_manager_->data_dirs()) {
@@ -396,7 +405,8 @@ TEST_F(LogBlockManagerTest, MetricsTest) {
         {10, &METRIC_log_block_manager_containers},
         {10, &METRIC_log_block_manager_full_containers} },
       { {1, &METRIC_log_block_manager_holes_punched},
-        {1, &METRIC_block_manager_total_blocks_deleted} }));
+        {1, &METRIC_block_manager_total_blocks_deleted},
+        {0, &METRIC_log_block_manager_dead_containers_deleted} }));
 
   // Set the max container size to default so that we can create a bunch of blocks
   // in the same container. Delete those created blocks afterwards to verify only
@@ -429,7 +439,8 @@ TEST_F(LogBlockManagerTest, MetricsTest) {
           {11, &METRIC_log_block_manager_containers},
           {10, &METRIC_log_block_manager_full_containers} },
         { {1, &METRIC_log_block_manager_holes_punched},
-          {11, &METRIC_block_manager_total_blocks_deleted} }));
+          {11, &METRIC_block_manager_total_blocks_deleted},
+          {0, &METRIC_log_block_manager_dead_containers_deleted} }));
   }
   // Wait for the actual hole punching to take place.
   for (const auto& data_dir : dd_manager_->data_dirs()) {
@@ -441,7 +452,8 @@ TEST_F(LogBlockManagerTest, MetricsTest) {
         {11, &METRIC_log_block_manager_containers},
         {10, &METRIC_log_block_manager_full_containers} },
       { {2, &METRIC_log_block_manager_holes_punched},
-        {11, &METRIC_block_manager_total_blocks_deleted} }));
+        {11, &METRIC_block_manager_total_blocks_deleted},
+        {0, &METRIC_log_block_manager_dead_containers_deleted} }));
 }
 
 TEST_F(LogBlockManagerTest, ContainerPreallocationTest) {
@@ -1738,5 +1750,122 @@ TEST_F(LogBlockManagerTest, TestAbortBlock) {
   ASSERT_EQ(1, bm_->available_containers_by_data_dir_.begin()->second.size());
 }
 
+TEST_F(LogBlockManagerTest, TestDeleteDeadContainersByDeletionTransaction) {
+  const auto TestProcess = [&] (int block_num) {
+    ASSERT_GT(block_num, 0);
+    MetricRegistry registry;
+    scoped_refptr<MetricEntity> entity = METRIC_ENTITY_server.Instantiate(
+        &registry, Substitute("test-$0", block_num));
+
+    ASSERT_OK(ReopenBlockManager(entity));
+    NO_FATALS(CheckLogMetrics(entity,
+        { {0, &METRIC_log_block_manager_bytes_under_management},
+          {0, &METRIC_log_block_manager_blocks_under_management},
+          {0, &METRIC_log_block_manager_containers},
+          {0, &METRIC_log_block_manager_full_containers} },
+        { {0, &METRIC_log_block_manager_holes_punched},
+          {0, &METRIC_block_manager_total_blocks_deleted},
+          {0, &METRIC_log_block_manager_dead_containers_deleted} }));
+
+    // Create a bunch of blocks -> one container.
+    vector<BlockId> blocks;
+    for (int i = 0; i < block_num - 1; ++i) {
+      unique_ptr<WritableBlock> writer;
+      ASSERT_OK(bm_->CreateBlock(test_block_opts_, &writer));
+      blocks.emplace_back(writer->id());
+      ASSERT_OK(writer->Finalize());
+      ASSERT_OK(writer->Close());
+      NO_FATALS(CheckLogMetrics(entity,
+          { {0, &METRIC_log_block_manager_bytes_under_management},
+            {i + 1, &METRIC_log_block_manager_blocks_under_management},
+            {1, &METRIC_log_block_manager_containers},
+            {0, &METRIC_log_block_manager_full_containers} },
+          { {0, &METRIC_log_block_manager_holes_punched},
+            {0, &METRIC_block_manager_total_blocks_deleted},
+            {0, &METRIC_log_block_manager_dead_containers_deleted} }));
+    }
+    {
+      // The last block makes a full container.
+      FLAGS_log_container_max_size = 1;
+      unique_ptr<WritableBlock> writer;
+      ASSERT_OK(bm_->CreateBlock(test_block_opts_, &writer));
+      blocks.emplace_back(writer->id());
+      ASSERT_OK(writer->Append("a"));
+      ASSERT_OK(writer->Finalize());
+      ASSERT_OK(writer->Close());
+      NO_FATALS(CheckLogMetrics(entity,
+          { {1, &METRIC_log_block_manager_bytes_under_management},
+            {block_num, &METRIC_log_block_manager_blocks_under_management},
+            {1, &METRIC_log_block_manager_containers},
+            {1, &METRIC_log_block_manager_full_containers} },
+          { {0, &METRIC_log_block_manager_holes_punched},
+            {0, &METRIC_block_manager_total_blocks_deleted},
+            {0, &METRIC_log_block_manager_dead_containers_deleted} }));
+    }
+    ASSERT_EQ(block_num, blocks.size());
+
+    // Check the container files.
+    string data_file_name;
+    string metadata_file_name;
+    NO_FATALS(GetOnlyContainerDataFile(&data_file_name));
+    NO_FATALS(GetOnlyContainerMetadataFile(&metadata_file_name));
+
+    // Open the last block for reading.
+    unique_ptr<ReadableBlock> reader;
+    ASSERT_OK(bm_->OpenBlock(blocks[block_num-1], &reader));
+    uint64_t size;
+    ASSERT_OK(reader->Size(&size));
+    ASSERT_EQ(1, size);
+
+    // Delete all of the blocks, which makes a dead container.
+    {
+      vector<BlockId> deleted;
+      shared_ptr<BlockDeletionTransaction> deletion_transaction =
+        this->bm_->NewDeletionTransaction();
+      for (const auto& block : blocks) {
+        deletion_transaction->AddDeletedBlock(block);
+      }
+      ASSERT_OK(deletion_transaction->CommitDeletedBlocks(&deleted));
+      ASSERT_EQ(block_num, deleted.size());
+      NO_FATALS(CheckLogMetrics(entity,
+          { {0, &METRIC_log_block_manager_bytes_under_management},
+            {0, &METRIC_log_block_manager_blocks_under_management},
+            {1, &METRIC_log_block_manager_containers},
+            {1, &METRIC_log_block_manager_full_containers} },
+          { {0, &METRIC_log_block_manager_holes_punched},
+            {block_num, &METRIC_block_manager_total_blocks_deleted},
+            {0, &METRIC_log_block_manager_dead_containers_deleted} }));
+    }
+    // The container is still alive, because there is a opened block previously.
+    NO_FATALS(CheckLogMetrics(entity,
+        { {0, &METRIC_log_block_manager_bytes_under_management},
+          {0, &METRIC_log_block_manager_blocks_under_management},
+          {1, &METRIC_log_block_manager_containers},
+          {1, &METRIC_log_block_manager_full_containers} },
+        { {0, &METRIC_log_block_manager_holes_punched},
+          {block_num, &METRIC_block_manager_total_blocks_deleted},
+          {0, &METRIC_log_block_manager_dead_containers_deleted} }));
+
+    // After the reader is closed, the container is actually deleted.
+    reader->Close();
+    NO_FATALS(CheckLogMetrics(entity,
+        { {0, &METRIC_log_block_manager_bytes_under_management},
+          {0, &METRIC_log_block_manager_blocks_under_management},
+          {0, &METRIC_log_block_manager_containers},
+          {0, &METRIC_log_block_manager_full_containers} },
+        { {0, &METRIC_log_block_manager_holes_punched},
+          {block_num, &METRIC_block_manager_total_blocks_deleted},
+          {1, &METRIC_log_block_manager_dead_containers_deleted} }));
+
+    // The container files should have been deleted.
+    ASSERT_FALSE(env_->FileExists(data_file_name));
+    ASSERT_FALSE(env_->FileExists(metadata_file_name));
+  };
+
+  for (int i = 1; i < 4; ++i) {
+    NO_FATALS(TestProcess(i));
+  }
+}
+
 } // namespace fs
 } // namespace kudu
diff --git a/src/kudu/fs/log_block_manager.cc b/src/kudu/fs/log_block_manager.cc
index 7f1669f..335dee7 100644
--- a/src/kudu/fs/log_block_manager.cc
+++ b/src/kudu/fs/log_block_manager.cc
@@ -136,6 +136,11 @@ METRIC_DEFINE_counter(server, log_block_manager_holes_punched,
                       kudu::MetricUnit::kHoles,
                       "Number of holes punched since service start");
 
+METRIC_DEFINE_counter(server, log_block_manager_dead_containers_deleted,
+                      "Number of Dead Block Containers Deleted",
+                      kudu::MetricUnit::kLogBlockContainers,
+                      "Number of full (but dead) block containers that were deleted");
+
 namespace kudu {
 
 namespace fs {
@@ -180,6 +185,7 @@ struct LogBlockManagerMetrics {
   scoped_refptr<AtomicGauge<uint64_t>> full_containers;
 
   scoped_refptr<Counter> holes_punched;
+  scoped_refptr<Counter> dead_containers_deleted;
 };
 
 #define MINIT(x) x(METRIC_log_block_manager_##x.Instantiate(metric_entity))
@@ -190,7 +196,8 @@ LogBlockManagerMetrics::LogBlockManagerMetrics(const scoped_refptr<MetricEntity>
     GINIT(blocks_under_management),
     GINIT(containers),
     GINIT(full_containers),
-    MINIT(holes_punched) {
+    MINIT(holes_punched),
+    MINIT(dead_containers_deleted) {
 }
 #undef GINIT
 
@@ -362,6 +369,9 @@ class LogBlockContainer: public RefCountedThreadSafe<LogBlockContainer>
{
                      const string& id,
                      LogBlockContainerRefPtr* container);
 
+  // The destructor will delete files of this container if it is dead.
+  ~LogBlockContainer();
+
   // Closes a set of blocks belonging to this container, possibly synchronizing
   // the dirty data and metadata to disk.
   //
@@ -518,6 +528,15 @@ class LogBlockContainer: public RefCountedThreadSafe<LogBlockContainer>
{
   DataDir* data_dir() const { return data_dir_; }
   const PathInstanceMetadataPB* instance() const { return data_dir_->instance()->metadata();
}
 
+  // Tries to mark the container as 'dead'. A dead container is one that is full
+  // and has no live blocks; it is deleted when the container goes out of scope.
+  //
+  // If successful, returns true; otherwise returns false.
+  bool TrySetDead();
+
+  // Returns whether the container has been marked as dead.
+  bool dead() { return dead_.Load(); }
+
  private:
   LogBlockContainer(LogBlockManager* block_manager, DataDir* data_dir,
                     unique_ptr<WritablePBContainerFile> metadata_file,
@@ -588,6 +607,9 @@ class LogBlockContainer: public RefCountedThreadSafe<LogBlockContainer>
{
   // The number of not-yet-deleted blocks in the container.
   AtomicInt<int64_t> live_blocks_;
 
+  // Whether or not this container has been marked as dead.
+  AtomicBool dead_;
+
   // The metrics. Not owned by the log container; it has the same lifespan
   // as the block manager.
   const LogBlockManagerMetrics* metrics_;
@@ -601,6 +623,13 @@ class LogBlockContainer: public RefCountedThreadSafe<LogBlockContainer>
{
   DISALLOW_COPY_AND_ASSIGN(LogBlockContainer);
 };
 
+#define CONTAINER_DISK_FAILURE(status_expr, msg) do { \
+  Status s_ = (status_expr); \
+  HANDLE_DISK_FAILURE(s_, block_manager_->error_manager_->RunErrorNotificationCb( \
+      ErrorHandlerType::DISK_ERROR, data_dir_)); \
+  WARN_NOT_OK(s_, msg); \
+} while (0);
+
 LogBlockContainer::LogBlockContainer(
     LogBlockManager* block_manager,
     DataDir* data_dir,
@@ -618,9 +647,22 @@ LogBlockContainer::LogBlockContainer(
       live_bytes_(0),
       live_bytes_aligned_(0),
       live_blocks_(0),
+      dead_(false),
       metrics_(block_manager->metrics()) {
 }
 
+LogBlockContainer::~LogBlockContainer() {
+  if (dead()) {
+    CHECK(!block_manager_->opts_.read_only);
+    string data_file_name = data_file_->filename();
+    string metadata_file_name = metadata_file_->filename();
+    CONTAINER_DISK_FAILURE(block_manager_->file_cache_.DeleteFile(data_file_name),
+        "Could not delete dead container data file " + data_file_name);
+    CONTAINER_DISK_FAILURE(block_manager_->file_cache_.DeleteFile(metadata_file_name),
+        "Could not delete dead container metadata file " + metadata_file_name);
+  }
+}
+
 void LogBlockContainer::HandleError(const Status& s) const {
   HANDLE_DISK_FAILURE(s,
       block_manager()->error_manager()->RunErrorNotificationCb(ErrorHandlerType::DISK_ERROR,
@@ -1159,6 +1201,12 @@ void LogBlockContainer::SetReadOnly(const Status& error) {
 }
 
 void LogBlockContainer::ContainerDeletionAsync(int64_t offset, int64_t length) {
+  if (dead()) {
+    // Don't bother punching holes; the container's destructor will delete the
+    // container's files outright.
+    return;
+  }
+
   VLOG(3) << "Freeing space belonging to container " << ToString();
   Status s = PunchHole(offset, length);
   if (s.ok() && metrics_) metrics_->holes_punched->Increment();
@@ -1166,6 +1214,18 @@ void LogBlockContainer::ContainerDeletionAsync(int64_t offset, int64_t
length) {
                             data_dir()->dir()));
 }
 
+bool LogBlockContainer::TrySetDead() {
+  // Although this code looks like a TOCTOU violation, it is safe because of
+  // some additional LBM invariants:
+  // 1. When a container becomes full, it stays full for the process' lifetime.
+  // 2. A full container will never accrue another live block. Meaning, losing
+  //    its last live block is a terminal state for a full container.
+  if (full() && live_blocks() == 0) {
+    return dead_.CompareAndSet(false, true);
+  }
+  return false;
+}
+
 ///////////////////////////////////////////////////////////
 // LogBlockCreationTransaction
 ////////////////////////////////////////////////////////////
@@ -1229,9 +1289,11 @@ class LogBlockDeletionTransaction : public BlockDeletionTransaction,
       : lbm_(lbm) {
   }
 
-  // Given the shared ownership of LogBlockDeletionTransaction, at this point
-  // all registered blocks should be destructed. Thus, coalescing deletions
-  // for blocks that are contiguous in a container and schedules hole punching.
+  // Due to the shared ownership of LogBlockDeletionTransaction,
+  // the destructor is responsible for destroying all registered
+  // blocks. This includes:
+  // 1. Punching holes in deleted blocks, and
+  // 2. Deleting dead containers outright.
   virtual ~LogBlockDeletionTransaction();
 
   virtual void AddDeletedBlock(BlockId block) override;
@@ -1265,6 +1327,27 @@ void LogBlockDeletionTransaction::AddDeletedBlock(BlockId block) {
 LogBlockDeletionTransaction::~LogBlockDeletionTransaction() {
   for (auto& entry : deleted_interval_map_) {
     LogBlockContainer* container = entry.first.get();
+
+    // For the full and dead containers, it is much cheaper
+    // to delete the container files outright, rather than
+    // punching holes.
+    if (container->full() && container->live_blocks() == 0) {
+      // Mark the container as deleted and remove it from the global map.
+      //
+      // It's possible for multiple deletion transactions to end up here. For
+      // example, if one transaction deletes the second to last block in a
+      // container and the second deletes its last block, the destructors of
+      // both transactions could run at a time that container's internal
+      // bookkeeping reflects the deadness of the container.
+      //
+      // The function 'TrySetDead()' can only be called successfully once,
+      // because there is a compare-and-swap operation inside.
+      if (container->TrySetDead()) {
+        lbm_->RemoveDeadContainer(container->ToString());
+      }
+      continue;
+    }
+
     CHECK_OK_PREPEND(CoalesceIntervals<int64_t>(&entry.second),
                      Substitute("could not coalesce hole punching for container: $0",
                                 container->ToString()));
@@ -1913,19 +1996,34 @@ void LogBlockManager::AddNewContainerUnlocked(const LogBlockContainerRefPtr&
con
   }
 }
 
-void LogBlockManager::RemoveFullContainerUnlocked(const string& container_name) {
+void LogBlockManager::RemoveDeadContainerUnlocked(const string& container_name) {
   DCHECK(lock_.is_locked());
   LogBlockContainerRefPtr to_delete(EraseKeyReturnValuePtr(
       &all_containers_by_name_, container_name));
   CHECK(to_delete);
   CHECK(to_delete->full())
       << Substitute("Container $0 is not full", container_name);
+  CHECK(to_delete->dead())
+      << Substitute("Container $0 is not dead", container_name);
   if (metrics()) {
     metrics()->containers->Decrement();
     metrics()->full_containers->Decrement();
   }
 }
 
+void LogBlockManager::RemoveDeadContainer(const string& container_name) {
+  // Remove the container from memory.
+  {
+    std::lock_guard<simple_spinlock> l(lock_);
+    RemoveDeadContainerUnlocked(container_name);
+  }
+
+  // Update the metrics if necessary.
+  if (metrics()) {
+    metrics()->dead_containers_deleted->Increment();
+  }
+}
+
 Status LogBlockManager::GetOrCreateContainer(const CreateBlockOptions& opts,
                                              LogBlockContainerRefPtr* container) {
   DataDir* dir;
@@ -2173,7 +2271,7 @@ void LogBlockManager::OpenDataDir(DataDir* dir,
 
   // Keep track of containers that have nothing but dead blocks; they will be
   // deleted during repair.
-  vector<string> dead_containers;
+  vector<LogBlockContainerRefPtr> dead_containers;
 
   // Keep track of containers whose live block ratio is low; their metadata
   // files will be compacted during repair.
@@ -2211,6 +2309,11 @@ void LogBlockManager::OpenDataDir(DataDir* dir,
       continue;
     }
     if (!s.ok()) {
+      if (opts_.read_only && s.IsNotFound()) {
+        // Skip the container while the operation is read-only and the files are away,
+        // especially for the kudu cli tool.
+        continue;
+      }
       *result_status = s.CloneAndPrepend(Substitute(
           "Could not open container $0", container_name));
       return;
@@ -2268,7 +2371,7 @@ void LogBlockManager::OpenDataDir(DataDir* dir,
       // confusing to report it as such since it'll be a natural event at startup.
       if (container->live_blocks() == 0) {
         DCHECK(live_blocks.empty());
-        dead_containers.emplace_back(container->ToString());
+        dead_containers.emplace_back(container);
       } else if (static_cast<double>(container->live_blocks()) /
           container->total_blocks() <= FLAGS_log_container_live_metadata_before_compact_ratio)
{
         // Metadata files of containers with very few live blocks will be compacted.
@@ -2437,7 +2540,7 @@ Status LogBlockManager::Repair(
     DataDir* dir,
     FsReport* report,
     vector<LogBlockRefPtr> need_repunching,
-    vector<string> dead_containers,
+    vector<LogBlockContainerRefPtr> dead_containers,
     unordered_map<string, vector<BlockRecordPB>> low_live_block_containers) {
   if (opts_.read_only) {
     LOG(INFO) << "Read-only block manager, skipping repair";
@@ -2458,7 +2561,12 @@ Status LogBlockManager::Repair(
     // Remove all of the dead containers from the block manager. They will be
     // deleted from disk shortly thereafter, outside of the lock.
     for (const auto& d : dead_containers) {
-      RemoveFullContainerUnlocked(d);
+      CHECK(d->TrySetDead());
+      RemoveDeadContainerUnlocked(d->ToString());
+
+      // This must be true if clearing dead_containers below is to delete the
+      // container files.
+      DCHECK(d->HasOneRef());
     }
 
     // Fetch all the containers we're going to need.
@@ -2489,41 +2597,9 @@ Status LogBlockManager::Repair(
     }
   }
 
-
-  // Delete all dead containers.
-  //
-  // After the deletions, the data directory is sync'ed to reduce the chance
-  // of a data file existing without its corresponding metadata file (or vice
-  // versa) in the event of a crash. The block manager would treat such a case
-  // as corruption and require manual intervention.
-  //
-  // TODO(adar) the above is not fool-proof; a crash could manifest in between
-  // any pair of deletions. That said, the odds of it happening are incredibly
-  // rare, and manual resolution isn't hard (just delete the existing file).
-  int64_t deleted_metadata_bytes = 0;
-  for (const auto& d : dead_containers) {
-    string data_file_name = StrCat(d, kContainerDataFileSuffix);
-    string metadata_file_name = StrCat(d, kContainerMetadataFileSuffix);
-
-    uint64_t metadata_size;
-    Status s = env_->GetFileSize(metadata_file_name, &metadata_size);
-    if (s.ok()) {
-      deleted_metadata_bytes += metadata_size;
-    } else {
-      WARN_NOT_OK_LBM_DISK_FAILURE(s,
-          "Could not get size of dead container metadata file " + metadata_file_name);
-    }
-
-    WARN_NOT_OK_LBM_DISK_FAILURE(file_cache_.DeleteFile(data_file_name),
-                "Could not delete dead container data file " + data_file_name);
-    WARN_NOT_OK_LBM_DISK_FAILURE(file_cache_.DeleteFile(metadata_file_name),
-                "Could not delete dead container metadata file " + metadata_file_name);
-  }
-  if (!dead_containers.empty()) {
-    WARN_NOT_OK_LBM_DISK_FAILURE(env_->SyncDir(dir->dir()), "Could not sync data directory");
-    LOG(INFO) << Substitute("Deleted $0 dead containers ($1 metadata bytes)",
-                            dead_containers.size(), deleted_metadata_bytes);
-  }
+  // Clearing this vector drops the last references to the containers within,
+  // triggering their deletion.
+  dead_containers.clear();
 
   // Truncate partial metadata records.
   //
diff --git a/src/kudu/fs/log_block_manager.h b/src/kudu/fs/log_block_manager.h
index 4ca3ecc..f6b1fbd 100644
--- a/src/kudu/fs/log_block_manager.h
+++ b/src/kudu/fs/log_block_manager.h
@@ -264,10 +264,13 @@ class LogBlockManager : public BlockManager {
   void AddNewContainerUnlocked(const LogBlockContainerRefPtr& container);
 
   // Removes a previously added container from this block manager. The
-  // container must be full.
+  // container must be dead (i.e. full and without any live blocks).
   //
   // Must be called with 'lock_' held.
-  void RemoveFullContainerUnlocked(const std::string& container_name);
+  void RemoveDeadContainerUnlocked(const std::string& container_name);
+
+  // Variant of RemoveDeadContainerUnlocked that acquires 'lock_'.
+  void RemoveDeadContainer(const std::string& container_name);
 
   // Returns a container appropriate for the given CreateBlockOptions, creating
   // a new container if necessary.
@@ -339,7 +342,7 @@ class LogBlockManager : public BlockManager {
   Status Repair(DataDir* dir,
                 FsReport* report,
                 std::vector<LogBlockRefPtr> need_repunching,
-                std::vector<std::string> dead_containers,
+                std::vector<LogBlockContainerRefPtr> dead_containers,
                 std::unordered_map<
                     std::string,
                     std::vector<BlockRecordPB>> low_live_block_containers);


Mime
View raw message