kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From a...@apache.org
Subject kudu git commit: KUDU-2131: switch to LIFO log container retrieval
Date Tue, 05 Sep 2017 05:13:07 GMT
Repository: kudu
Updated Branches:
  refs/heads/branch-1.5.x 9f2e32533 -> 9101f85fa


KUDU-2131: switch to LIFO log container retrieval

Currently, LBM selects available log container from available container
queue using FIFO, given the intention to spread the load across all
available containers. However, this may cause inefficiency especially
in the case of tablet copy if there is a long available containers
queue. One failed test case is copying tablet of size 26.81GB on a
tablet server with 14 disks and 19199 containers (only 97 of them are
full). In this case, there are too many containers involved, so that
DownloadBlocks took longer that 3m, the default idle timeout for a
tablet copy session, to commit the new blocks such that the session is
terminated before DownloadWALs.

This patch switches to LIFO for log container selection, so that
finalized blocks will exhaust one container at a time. Thus, when
batching disk synchronization for a groups of created blocks, the
number of fsync() operations will be significant lower for tablet
servers with plenty of available log containers.

It also marks '--tablet_copy_idle_timeout_ms', the flag that controls
the amount of time without activity before a tablet copy session
expires, as a user facing flag. And renames it to
'--tablet_copy_idle_timeout_sec'.

Change-Id: I6eb0bc99422c293417b64e0b6e86c87a3d55e687
Reviewed-on: http://gerrit.cloudera.org:8080/7956
Tested-by: Kudu Jenkins
Reviewed-by: Adar Dembo <adar@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/9101f85f
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/9101f85f
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/9101f85f

Branch: refs/heads/branch-1.5.x
Commit: 9101f85fa4ec465b490720629eb5a0caba2563dd
Parents: 9f2e325
Author: hahao <hao.hao@cloudera.com>
Authored: Sun Sep 3 01:14:39 2017 -0700
Committer: Adar Dembo <adar@cloudera.com>
Committed: Tue Sep 5 05:12:47 2017 +0000

----------------------------------------------------------------------
 src/kudu/fs/log_block_manager-test.cc        | 41 +++++++++++++++++++++++
 src/kudu/fs/log_block_manager.cc             |  2 +-
 src/kudu/fs/log_block_manager.h              |  1 +
 src/kudu/tserver/tablet_copy_service-test.cc |  6 ++--
 src/kudu/tserver/tablet_copy_service.cc      | 11 +++---
 5 files changed, 52 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/9101f85f/src/kudu/fs/log_block_manager-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/fs/log_block_manager-test.cc b/src/kudu/fs/log_block_manager-test.cc
index d7844ed..9401758 100644
--- a/src/kudu/fs/log_block_manager-test.cc
+++ b/src/kudu/fs/log_block_manager-test.cc
@@ -91,6 +91,10 @@ METRIC_DECLARE_gauge_uint64(log_block_manager_full_containers);
 namespace kudu {
 namespace fs {
 
+namespace internal {
+class LogBlockContainer;
+} // namespace internal
+
 class LogBlockManagerTest : public KuduTest {
  public:
   LogBlockManagerTest() :
@@ -1373,6 +1377,43 @@ TEST_F(LogBlockManagerTest, TestFinalizeBlock) {
   ASSERT_EQ(1, bm_->available_containers_by_data_dir_.begin()->second.size());
 }
 
+// Test available log container selection is LIFO.
+TEST_F(LogBlockManagerTest, TestLIFOContainerSelection) {
+  // Create 4 blocks and 4 opened containers that are not full.
+  vector<unique_ptr<WritableBlock>> blocks;
+  for (int i = 0; i < 4; i++) {
+    unique_ptr<WritableBlock> writer;
+    ASSERT_OK(bm_->CreateBlock(test_block_opts_, &writer));
+    writer->Append("test data");
+    blocks.emplace_back(std::move(writer));
+  }
+  for (const auto& block : blocks) {
+    ASSERT_OK(block->Close());
+  }
+  ASSERT_EQ(4, bm_->all_containers_by_name_.size());
+
+  blocks.clear();
+  // Create some other blocks, and finalize each block after write.
+  // The first available container in the queue will be reused every time.
+  internal::LogBlockContainer* container =
+      bm_->available_containers_by_data_dir_.begin()->second.front();
+  for (int i = 0; i < 4; i++) {
+    unique_ptr<WritableBlock> writer;
+    ASSERT_OK(bm_->CreateBlock(test_block_opts_, &writer));
+    writer->Append("test data");
+    ASSERT_OK(writer->Finalize());
+    // After finalizing the written block, the used container will be
+    // available again and can be reused for the following created block.
+    ASSERT_EQ(container,
+              bm_->available_containers_by_data_dir_.begin()->second.front());
+    blocks.emplace_back(std::move(writer));
+  }
+  for (const auto& block : blocks) {
+    ASSERT_OK(block->Close());
+  }
+  ASSERT_EQ(4, bm_->all_containers_by_name_.size());
+}
+
 TEST_F(LogBlockManagerTest, TestAbortBlock) {
   unique_ptr<WritableBlock> writer;
   ASSERT_OK(bm_->CreateBlock(test_block_opts_, &writer));

http://git-wip-us.apache.org/repos/asf/kudu/blob/9101f85f/src/kudu/fs/log_block_manager.cc
----------------------------------------------------------------------
diff --git a/src/kudu/fs/log_block_manager.cc b/src/kudu/fs/log_block_manager.cc
index 2b06afa..38e853a 100644
--- a/src/kudu/fs/log_block_manager.cc
+++ b/src/kudu/fs/log_block_manager.cc
@@ -1876,7 +1876,7 @@ void LogBlockManager::MakeContainerAvailableUnlocked(LogBlockContainer*
containe
   if (container->full() || container->read_only()) {
     return;
   }
-  available_containers_by_data_dir_[container->data_dir()].push_back(container);
+  available_containers_by_data_dir_[container->data_dir()].push_front(container);
 }
 
 Status LogBlockManager::SyncContainer(const LogBlockContainer& container) {

http://git-wip-us.apache.org/repos/asf/kudu/blob/9101f85f/src/kudu/fs/log_block_manager.h
----------------------------------------------------------------------
diff --git a/src/kudu/fs/log_block_manager.h b/src/kudu/fs/log_block_manager.h
index 71afa20..1f3194b 100644
--- a/src/kudu/fs/log_block_manager.h
+++ b/src/kudu/fs/log_block_manager.h
@@ -190,6 +190,7 @@ class LogBlockManager : public BlockManager {
   FRIEND_TEST(LogBlockManagerTest, TestAbortBlock);
   FRIEND_TEST(LogBlockManagerTest, TestCloseFinalizedBlock);
   FRIEND_TEST(LogBlockManagerTest, TestFinalizeBlock);
+  FRIEND_TEST(LogBlockManagerTest, TestLIFOContainerSelection);
   FRIEND_TEST(LogBlockManagerTest, TestLookupBlockLimit);
   FRIEND_TEST(LogBlockManagerTest, TestMetadataTruncation);
   FRIEND_TEST(LogBlockManagerTest, TestParseKernelRelease);

http://git-wip-us.apache.org/repos/asf/kudu/blob/9101f85f/src/kudu/tserver/tablet_copy_service-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tserver/tablet_copy_service-test.cc b/src/kudu/tserver/tablet_copy_service-test.cc
index f469212..0ff01ae 100644
--- a/src/kudu/tserver/tablet_copy_service-test.cc
+++ b/src/kudu/tserver/tablet_copy_service-test.cc
@@ -58,7 +58,7 @@
 #define ASSERT_REMOTE_ERROR(status, err, code, str) \
     ASSERT_NO_FATAL_FAILURE(AssertRemoteError(status, err, code, str))
 
-DECLARE_uint64(tablet_copy_idle_timeout_ms);
+DECLARE_uint64(tablet_copy_idle_timeout_sec);
 DECLARE_uint64(tablet_copy_timeout_poll_period_ms);
 
 using std::string;
@@ -223,7 +223,7 @@ TEST_F(TabletCopyServiceTest, TestSimpleBeginEndSession) {
                                                &segment_seqnos));
   // Basic validation of returned params.
   ASSERT_FALSE(session_id.empty());
-  ASSERT_EQ(FLAGS_tablet_copy_idle_timeout_ms, idle_timeout_millis);
+  ASSERT_EQ(FLAGS_tablet_copy_idle_timeout_sec * 1000, idle_timeout_millis);
   ASSERT_TRUE(superblock.IsInitialized());
   // We should have number of segments = number of rolls + 1 (due to the active segment).
   ASSERT_EQ(kNumLogRolls + 1, segment_seqnos.size());
@@ -489,7 +489,7 @@ TEST_F(TabletCopyServiceTest, TestFetchLog) {
 TEST_F(TabletCopyServiceTest, TestSessionTimeout) {
   // This flag should be seen by the service due to TSO.
   // We have also reduced the timeout polling frequency in SetUp().
-  FLAGS_tablet_copy_idle_timeout_ms = 1; // Expire the session almost immediately.
+  FLAGS_tablet_copy_idle_timeout_sec = 0; // Expire the session immediately.
 
   // Start session.
   string session_id;

http://git-wip-us.apache.org/repos/asf/kudu/blob/9101f85f/src/kudu/tserver/tablet_copy_service.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tserver/tablet_copy_service.cc b/src/kudu/tserver/tablet_copy_service.cc
index 588b7d4..644454b 100644
--- a/src/kudu/tserver/tablet_copy_service.cc
+++ b/src/kudu/tserver/tablet_copy_service.cc
@@ -56,10 +56,11 @@
     } \
   } while (false)
 
-DEFINE_uint64(tablet_copy_idle_timeout_ms, 180000,
+DEFINE_uint64(tablet_copy_idle_timeout_sec, 600,
               "Amount of time without activity before a tablet copy "
-              "session will expire, in millis");
-TAG_FLAG(tablet_copy_idle_timeout_ms, hidden);
+              "session will expire, in seconds");
+TAG_FLAG(tablet_copy_idle_timeout_sec, advanced);
+TAG_FLAG(tablet_copy_idle_timeout_sec, evolving);
 
 DEFINE_uint64(tablet_copy_timeout_poll_period_ms, 10000,
               "How often the tablet_copy service polls for expired "
@@ -97,7 +98,7 @@ using tablet::TabletReplica;
 namespace tserver {
 
 static MonoTime GetNewExpireTime() {
-  return MonoTime::Now() + MonoDelta::FromMilliseconds(FLAGS_tablet_copy_idle_timeout_ms);
+  return MonoTime::Now() + MonoDelta::FromSeconds(FLAGS_tablet_copy_idle_timeout_sec);
 }
 
 TabletCopyServiceImpl::TabletCopyServiceImpl(
@@ -169,7 +170,7 @@ void TabletCopyServiceImpl::BeginTabletCopySession(
 
   resp->set_responder_uuid(fs_manager_->uuid());
   resp->set_session_id(session_id);
-  resp->set_session_idle_timeout_millis(FLAGS_tablet_copy_idle_timeout_ms);
+  resp->set_session_idle_timeout_millis(FLAGS_tablet_copy_idle_timeout_sec * 1000);
   resp->mutable_superblock()->CopyFrom(session->tablet_superblock());
   resp->mutable_initial_cstate()->CopyFrom(session->initial_cstate());
 


Mime
View raw message