From commits-return-5145-archive-asf-public=cust-asf.ponee.io@kudu.apache.org Sat Jan 6 04:23:08 2018 Return-Path: X-Original-To: archive-asf-public@eu.ponee.io Delivered-To: archive-asf-public@eu.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by mx-eu-01.ponee.io (Postfix) with ESMTP id 2BA31180647 for ; Sat, 6 Jan 2018 04:23:08 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id 1B617160C28; Sat, 6 Jan 2018 03:23:08 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 12597160C27 for ; Sat, 6 Jan 2018 04:23:06 +0100 (CET) Received: (qmail 79955 invoked by uid 500); 6 Jan 2018 03:23:06 -0000 Mailing-List: contact commits-help@kudu.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@kudu.apache.org Delivered-To: mailing list commits@kudu.apache.org Received: (qmail 79945 invoked by uid 99); 6 Jan 2018 03:23:06 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 06 Jan 2018 03:23:06 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 1A018DFC3E; Sat, 6 Jan 2018 03:23:03 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: alexey@apache.org To: commits@kudu.apache.org Message-Id: X-Mailer: ASF-Git Admin Mailer Subject: kudu git commit: [tests] update for 3-4-3 replica management scheme (3/3) Date: Sat, 6 Jan 2018 03:23:03 +0000 (UTC) Repository: kudu Updated Branches: refs/heads/master 36995e260 -> 391e3255d [tests] update for 3-4-3 replica management scheme (3/3) Updated scenarios of the following tests to run with 3-4-3 replica management scheme: * DiskFailureITest * TabletCopyFailureITest * TabletCopyITest * TsTabletManagerITest This patch is a part of the work done in the context of KUDU-1097. Change-Id: Id68e29fcee4e6dbd166ada9f18dac2d8fe351a3b Reviewed-on: http://gerrit.cloudera.org:8080/8858 Reviewed-by: Mike Percy Tested-by: Kudu Jenkins Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/391e3255 Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/391e3255 Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/391e3255 Branch: refs/heads/master Commit: 391e3255d6f0ae59f375e5532cea985384f636f8 Parents: 36995e2 Author: Alexey Serbin Authored: Fri Dec 15 15:09:24 2017 -0800 Committer: Alexey Serbin Committed: Sat Jan 6 03:22:24 2018 +0000 ---------------------------------------------------------------------- .../integration-tests/disk_failure-itest.cc | 61 +++++++++---- src/kudu/integration-tests/tablet_copy-itest.cc | 18 +++- .../ts_tablet_manager-itest.cc | 90 +++++++++++++------- 3 files changed, 120 insertions(+), 49 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/391e3255/src/kudu/integration-tests/disk_failure-itest.cc ---------------------------------------------------------------------- diff --git a/src/kudu/integration-tests/disk_failure-itest.cc b/src/kudu/integration-tests/disk_failure-itest.cc index 3657846..8d79884 100644 --- a/src/kudu/integration-tests/disk_failure-itest.cc +++ b/src/kudu/integration-tests/disk_failure-itest.cc @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -29,6 +30,7 @@ #include "kudu/integration-tests/external_mini_cluster-itest-base.h" #include "kudu/integration-tests/test_workload.h" #include "kudu/mini-cluster/external_mini_cluster.h" +#include "kudu/tserver/tserver.pb.h" #include "kudu/util/metrics.h" #include "kudu/util/monotime.h" #include "kudu/util/path_util.h" @@ -38,19 +40,19 @@ METRIC_DECLARE_gauge_uint64(data_dirs_failed); -namespace kudu { - -using cluster::ExternalMiniClusterOptions; -using cluster::ExternalTabletServer; -using fs::BlockManager; +using kudu::cluster::ExternalMiniClusterOptions; +using kudu::cluster::ExternalTabletServer; +using kudu::fs::BlockManager; using std::string; using std::vector; using strings::Substitute; +namespace kudu { + const MonoDelta kAgreementTimeout = MonoDelta::FromSeconds(30); class DiskFailureITest : public ExternalMiniClusterITestBase, - public ::testing::WithParamInterface { + public ::testing::WithParamInterface> { public: // Waits for 'ext_tserver' to experience 'target_failed_disks' disk failures. @@ -70,14 +72,24 @@ class DiskFailureITest : public ExternalMiniClusterITestBase, // shut down and restarted. Errors are injected to one of the directories while // it is shut down. TEST_P(DiskFailureITest, TestFailDuringServerStartup) { - // Set up a cluster with three servers with five disks each. + const string block_manager_type = std::get<0>(GetParam()); + const bool is_3_4_3_mode = std::get<1>(GetParam()); + // Set up a cluster with five disks at each tablet server. In case of 3-4-3 + // replication scheme one more tablet server is needed to put the replacement + // non-voter replica there. + const auto kNumTabletServers = is_3_4_3_mode ? 4 : 3; + const auto kNumTablets = 5; + const auto kNumRows = 100; + ExternalMiniClusterOptions opts; - opts.num_tablet_servers = 3; + opts.num_tablet_servers = kNumTabletServers; opts.num_data_dirs = 5; - opts.block_manager_type = GetParam(); + opts.block_manager_type = block_manager_type; + opts.extra_master_flags.push_back( + Substitute("--raft_prepare_replacement_before_eviction=$0", is_3_4_3_mode)); + opts.extra_tserver_flags.push_back( + Substitute("--raft_prepare_replacement_before_eviction=$0", is_3_4_3_mode)); NO_FATALS(StartClusterWithOpts(opts)); - const int kNumTablets = 5; - const int kNumRows = 100; // Write some data to a tablet. This will spread blocks across all // directories. @@ -90,14 +102,25 @@ TEST_P(DiskFailureITest, TestFailDuringServerStartup) { }); write_workload.StopAndJoin(); - // Ensure the tablets get to a running state. - ExternalTabletServer* ts = cluster_->tablet_server(0); - ASSERT_OK(cluster_->WaitForTabletsRunning(ts, kNumTablets, kAgreementTimeout)); + // Arbitrarily select one tablet server which hosts a replica of the tablet. + ExternalTabletServer* ts = nullptr; + for (const auto& e : ts_map_) { + vector tablets; + ASSERT_OK(itest::ListTablets(e.second, kAgreementTimeout, &tablets)); + if (!tablets.empty()) { + ts = cluster_->tablet_server_by_uuid(e.first); + break; + } + } + ASSERT_NE(nullptr, ts); + + // Ensure at least one tablet get to a running state at one of the tablet servers. + ASSERT_OK(cluster_->WaitForTabletsRunning(ts, 1, kAgreementTimeout)); // Introduce flags to fail one of the directories, avoiding the metadata // directory, the next time the tablet server starts. - string failed_dir = ts->data_dirs()[1]; - vector extra_flags = { + const string& failed_dir = ts->data_dirs()[1]; + const vector extra_flags = { Substitute("--env_inject_eio_globs=$0", JoinPathSegments(failed_dir, "**")), "--env_inject_eio=1.0", "--crash_on_eio=false", @@ -107,7 +130,7 @@ TEST_P(DiskFailureITest, TestFailDuringServerStartup) { // Restart the tablet server with disk failures and ensure it can startup. ASSERT_OK(ts->Restart()); - NO_FATALS(WaitForDiskFailures(cluster_->tablet_server(0))); + NO_FATALS(WaitForDiskFailures(ts)); // Ensure that the tablets are successfully evicted and copied. ClusterVerifier v(cluster_.get()); @@ -117,6 +140,8 @@ TEST_P(DiskFailureITest, TestFailDuringServerStartup) { } INSTANTIATE_TEST_CASE_P(DiskFailure, DiskFailureITest, - ::testing::ValuesIn(BlockManager::block_manager_types())); + ::testing::Combine( + ::testing::ValuesIn(BlockManager::block_manager_types()), + ::testing::Bool())); } // namespace kudu http://git-wip-us.apache.org/repos/asf/kudu/blob/391e3255/src/kudu/integration-tests/tablet_copy-itest.cc ---------------------------------------------------------------------- diff --git a/src/kudu/integration-tests/tablet_copy-itest.cc b/src/kudu/integration-tests/tablet_copy-itest.cc index f977f18..3be6d0f 100644 --- a/src/kudu/integration-tests/tablet_copy-itest.cc +++ b/src/kudu/integration-tests/tablet_copy-itest.cc @@ -1138,11 +1138,25 @@ INSTANTIATE_TEST_CASE_P(FailureCause, TabletCopyFailureITest, TEST_P(TabletCopyFailureITest, TestTabletCopyNewReplicaFailureCanVote) { const MonoDelta kTimeout = MonoDelta::FromSeconds(30); const string& failure_flag = GetParam(); - NO_FATALS(StartCluster({failure_flag}, {}, /*num_tablet_servers=*/ 4)); + const vector kMasterFlags = { + // If running with the 3-4-3 replication scheme, it's necessary to disable + // the default catalog manager's behavior of adding and evicting tablet + // replicas: this test scenario manages replicasa on its own. + "--catalog_manager_evict_excess_replicas=false", + "--master_add_server_when_underreplicated=false", + }; + const vector kTserverFlags = { + failure_flag + }; + constexpr auto kNumReplicas = 3; + constexpr auto kNumTabletServers = kNumReplicas + 1; + + NO_FATALS(StartCluster(kTserverFlags, kMasterFlags, kNumTabletServers)); + TestWorkload workload(cluster_.get()); workload.Setup(); - ASSERT_OK(inspect_->WaitForReplicaCount(3)); + ASSERT_OK(inspect_->WaitForReplicaCount(kNumReplicas)); master::GetTableLocationsResponsePB table_locations; ASSERT_OK(itest::GetTableLocations(cluster_->master_proxy(), TestWorkload::kDefaultTableName, kTimeout, master::VOTER_REPLICA, &table_locations)); http://git-wip-us.apache.org/repos/asf/kudu/blob/391e3255/src/kudu/integration-tests/ts_tablet_manager-itest.cc ---------------------------------------------------------------------- diff --git a/src/kudu/integration-tests/ts_tablet_manager-itest.cc b/src/kudu/integration-tests/ts_tablet_manager-itest.cc index ee5a957..c4ff736 100644 --- a/src/kudu/integration-tests/ts_tablet_manager-itest.cc +++ b/src/kudu/integration-tests/ts_tablet_manager-itest.cc @@ -110,10 +110,8 @@ class TsTabletManagerITest : public KuduTest { ASSERT_OK(bld.Build(&client_messenger_)); } - void StartCluster(int num_replicas) { - InternalMiniClusterOptions opts; - opts.num_tablet_servers = num_replicas; - cluster_.reset(new InternalMiniCluster(env_, opts)); + void StartCluster(InternalMiniClusterOptions opts) { + cluster_.reset(new InternalMiniCluster(env_, std::move(opts))); ASSERT_OK(cluster_->Start()); ASSERT_OK(cluster_->CreateClient(nullptr, &client_)); } @@ -185,61 +183,90 @@ void TsTabletManagerITest::GetIncrementalTabletReports( reports->swap(r); } -// Test that when a tablet is marked as failed, it will eventually be evicted -// and replaced. -TEST_F(TsTabletManagerITest, TestFailedTabletsAreReplaced) { - const int kNumReplicas = 3; - NO_FATALS(StartCluster(kNumReplicas)); - - InternalMiniClusterOptions opts; - opts.num_tablet_servers = kNumReplicas; - unique_ptr cluster(new InternalMiniCluster(env_, opts)); - ASSERT_OK(cluster->Start()); - TestWorkload work(cluster.get()); - work.set_num_replicas(3); +class FailedTabletsAreReplacedITest : + public TsTabletManagerITest, + public ::testing::WithParamInterface { +}; +// Test that when a tablet replica is marked as failed, it will eventually be +// evicted and replaced. +TEST_P(FailedTabletsAreReplacedITest, OneReplica) { + const bool is_3_4_3_mode = GetParam(); + FLAGS_raft_prepare_replacement_before_eviction = is_3_4_3_mode; + const auto kNumReplicas = 3; + const auto kNumTabletServers = kNumReplicas + (is_3_4_3_mode ? 1 : 0); + + { + InternalMiniClusterOptions opts; + opts.num_tablet_servers = kNumTabletServers; + NO_FATALS(StartCluster(std::move(opts))); + } + TestWorkload work(cluster_.get()); + work.set_num_replicas(kNumReplicas); work.Setup(); work.Start(); // Insert data until the tablet becomes visible to the server. - // We'll operate on the first tablet server, chosen arbitrarily. - MiniTabletServer* ts = cluster->mini_tablet_server(0); string tablet_id; ASSERT_EVENTUALLY([&] { - vector tablet_ids = ts->ListTablets(); + auto idx = rand() % kNumTabletServers; + vector tablet_ids = cluster_->mini_tablet_server(idx)->ListTablets(); ASSERT_EQ(1, tablet_ids.size()); tablet_id = tablet_ids[0]; }); - // Wait until the replica is running before failing it. + // Wait until all the replicas are running before failing one arbitrarily. const auto wait_until_running = [&]() { AssertEventually([&]{ - scoped_refptr replica; - ASSERT_OK(ts->server()->tablet_manager()->GetTabletReplica(tablet_id, &replica)); - ASSERT_EQ(replica->state(), tablet::RUNNING); + auto num_replicas_running = 0; + for (auto idx = 0; idx < cluster_->num_tablet_servers(); ++idx) { + MiniTabletServer* ts = cluster_->mini_tablet_server(idx); + scoped_refptr replica; + Status s = ts->server()->tablet_manager()->GetTabletReplica(tablet_id, &replica); + if (s.IsNotFound()) { + continue; + } + ASSERT_OK(s); + if (tablet::RUNNING == replica->state()) { + ++num_replicas_running; + } + } + ASSERT_EQ(kNumReplicas, num_replicas_running); }, MonoDelta::FromSeconds(60)); NO_PENDING_FATALS(); }; - wait_until_running(); + NO_FATALS(wait_until_running()); { - // Inject an error to the replica. Shutting it down will leave it FAILED. + // Inject an error into one of replicas. Shutting it down will leave it in + // the FAILED state. scoped_refptr replica; - ASSERT_OK(ts->server()->tablet_manager()->GetTabletReplica(tablet_id, &replica)); + ASSERT_EVENTUALLY([&] { + auto idx = rand() % kNumTabletServers; + MiniTabletServer* ts = cluster_->mini_tablet_server(idx); + ASSERT_OK(ts->server()->tablet_manager()->GetTabletReplica(tablet_id, &replica)); + }); replica->SetError(Status::IOError("INJECTED ERROR: tablet failed")); replica->Shutdown(); ASSERT_EQ(tablet::FAILED, replica->state()); } // Ensure the tablet eventually is replicated. - wait_until_running(); + NO_FATALS(wait_until_running()); work.StopAndJoin(); } +INSTANTIATE_TEST_CASE_P(, + FailedTabletsAreReplacedITest, + ::testing::Bool()); // Test that when the leader changes, the tablet manager gets notified and // includes that information in the next tablet report. TEST_F(TsTabletManagerITest, TestReportNewLeaderOnLeaderChange) { const int kNumReplicas = 2; - NO_FATALS(StartCluster(kNumReplicas)); + { + InternalMiniClusterOptions opts; + opts.num_tablet_servers = kNumReplicas; + NO_FATALS(StartCluster(std::move(opts))); + } // We need to control elections precisely for this test since we're using // EmulateElection() with a distributed consensus configuration. @@ -331,8 +358,13 @@ TEST_F(TsTabletManagerITest, ReportOnReplicaHealthStatus) { constexpr int kNumReplicas = 3; const auto kTimeout = MonoDelta::FromSeconds(60); + // This test is specific to the 3-4-3 replica management scheme. FLAGS_raft_prepare_replacement_before_eviction = true; - NO_FATALS(StartCluster(kNumReplicas)); + { + InternalMiniClusterOptions opts; + opts.num_tablet_servers = kNumReplicas; + NO_FATALS(StartCluster(std::move(opts))); + } // Create the table. client::sp::shared_ptr table;