kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ale...@apache.org
Subject kudu git commit: [ExternalDaemon] return non-OK if no process to pause/resume
Date Tue, 06 Jun 2017 06:57:54 GMT
Repository: kudu
Updated Branches:
  refs/heads/master 68962df0e -> 86116e4c5


[ExternalDaemon] return non-OK if no process to pause/resume

Prior to this patch, ExternalDaemon::{Pause,Resume}() methods
returned Status::OK() if there were no process to pause/resume.

This patch addresses that: now both methods return Status::NotFound()
if there is no process to pause/resume.  Also, added WARN_UNUSED_RESULT
attribute for both methods and updated the call sites to handle their
return status correspondingly.  Besides, replaced gscoped_ptr with
std::unique_ptr in external_minicluster.{cc,h}.

Change-Id: Ibce641a29ffc43391f54c6a8aacf6ba91ae93ae9
Reviewed-on: http://gerrit.cloudera.org:8080/7092
Reviewed-by: Todd Lipcon <todd@apache.org>
Tested-by: Kudu Jenkins


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/86116e4c
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/86116e4c
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/86116e4c

Branch: refs/heads/master
Commit: 86116e4c515a9c89e728dd699decaf20d097edac
Parents: 68962df
Author: Alexey Serbin <aserbin@cloudera.com>
Authored: Mon Jun 5 17:46:41 2017 -0700
Committer: Alexey Serbin <aserbin@cloudera.com>
Committed: Tue Jun 6 06:55:18 2017 +0000

----------------------------------------------------------------------
 .../integration-tests/client-stress-test.cc     | 24 +++++++--------
 .../integration-tests/external_mini_cluster.cc  | 32 ++++++++++++++------
 .../integration-tests/external_mini_cluster.h   | 10 +++---
 .../integration-tests/master_failover-itest.cc  | 10 +++---
 .../integration-tests/master_migration-itest.cc |  2 +-
 .../integration-tests/raft_consensus-itest.cc   | 10 +++---
 6 files changed, 50 insertions(+), 38 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/86116e4c/src/kudu/integration-tests/client-stress-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/client-stress-test.cc b/src/kudu/integration-tests/client-stress-test.cc
index e909b1a..2718889 100644
--- a/src/kudu/integration-tests/client-stress-test.cc
+++ b/src/kudu/integration-tests/client-stress-test.cc
@@ -181,19 +181,19 @@ TEST_F(ClientStressTest_MultiMaster, TestLeaderResolutionTimeout) {
 
   work.Start();
 
-  cluster_->tablet_server(0)->Pause();
-  cluster_->tablet_server(1)->Pause();
-  cluster_->tablet_server(2)->Pause();
-  cluster_->master(0)->Pause();
-  cluster_->master(1)->Pause();
-  cluster_->master(2)->Pause();
+  ASSERT_OK(cluster_->tablet_server(0)->Pause());
+  ASSERT_OK(cluster_->tablet_server(1)->Pause());
+  ASSERT_OK(cluster_->tablet_server(2)->Pause());
+  ASSERT_OK(cluster_->master(0)->Pause());
+  ASSERT_OK(cluster_->master(1)->Pause());
+  ASSERT_OK(cluster_->master(2)->Pause());
   SleepFor(MonoDelta::FromMilliseconds(300));
-  cluster_->tablet_server(0)->Resume();
-  cluster_->tablet_server(1)->Resume();
-  cluster_->tablet_server(2)->Resume();
-  cluster_->master(0)->Resume();
-  cluster_->master(1)->Resume();
-  cluster_->master(2)->Resume();
+  ASSERT_OK(cluster_->tablet_server(0)->Resume());
+  ASSERT_OK(cluster_->tablet_server(1)->Resume());
+  ASSERT_OK(cluster_->tablet_server(2)->Resume());
+  ASSERT_OK(cluster_->master(0)->Resume());
+  ASSERT_OK(cluster_->master(1)->Resume());
+  ASSERT_OK(cluster_->master(2)->Resume());
   SleepFor(MonoDelta::FromMilliseconds(100));
 
   // Set an explicit timeout. This test has caused deadlocks in the past.

http://git-wip-us.apache.org/repos/asf/kudu/blob/86116e4c/src/kudu/integration-tests/external_mini_cluster.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/external_mini_cluster.cc b/src/kudu/integration-tests/external_mini_cluster.cc
index 85f28dc..bc9ab1b 100644
--- a/src/kudu/integration-tests/external_mini_cluster.cc
+++ b/src/kudu/integration-tests/external_mini_cluster.cc
@@ -19,12 +19,13 @@
 
 #include <algorithm>
 #include <functional>
-#include <gtest/gtest.h>
 #include <memory>
-#include <rapidjson/document.h>
 #include <string>
 #include <unordered_set>
 
+#include <gtest/gtest.h>
+#include <rapidjson/document.h>
+
 #include "kudu/client/client.h"
 #include "kudu/client/master_rpc.h"
 #include "kudu/common/wire_protocol.h"
@@ -33,10 +34,10 @@
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/gutil/strings/util.h"
 #include "kudu/master/master.proxy.h"
+#include "kudu/rpc/messenger.h"
 #include "kudu/server/server_base.pb.h"
 #include "kudu/server/server_base.proxy.h"
 #include "kudu/tserver/tserver_service.proxy.h"
-#include "kudu/rpc/messenger.h"
 #include "kudu/util/async_util.h"
 #include "kudu/util/curl_util.h"
 #include "kudu/util/env.h"
@@ -48,6 +49,7 @@
 #include "kudu/util/net/sockaddr.h"
 #include "kudu/util/path_util.h"
 #include "kudu/util/pb_util.h"
+#include "kudu/util/status.h"
 #include "kudu/util/stopwatch.h"
 #include "kudu/util/subprocess.h"
 #include "kudu/util/test_util.h"
@@ -741,7 +743,7 @@ Status ExternalDaemon::StartProcess(const vector<string>& user_flags)
{
   ignore_result(Env::Default()->DeleteFile(info_path));
 
   // Start the daemon.
-  gscoped_ptr<Subprocess> p(new Subprocess(argv));
+  unique_ptr<Subprocess> p(new Subprocess(argv));
   p->ShareParentStdout(false);
   p->SetEnvVars(extra_env_);
   string env_str;
@@ -822,21 +824,31 @@ void ExternalDaemon::SetExePath(string exe) {
 }
 
 Status ExternalDaemon::Pause() {
-  if (!process_) return Status::OK();
+  if (!process_) {
+    return Status::IllegalState(Substitute(
+        "Request to pause '$0' but the process is not there", exe_));
+  }
   VLOG(1) << "Pausing " << exe_ << " with pid " << process_->pid();
+  const Status s = process_->Kill(SIGSTOP);
+  RETURN_NOT_OK(s);
   paused_ = true;
-  return process_->Kill(SIGSTOP);
+  return s;
 }
 
 Status ExternalDaemon::Resume() {
-  if (!process_) return Status::OK();
+  if (!process_) {
+    return Status::IllegalState(Substitute(
+        "Request to resume '$0' but the process is not there", exe_));
+  }
   VLOG(1) << "Resuming " << exe_ << " with pid " << process_->pid();
+  const Status s = process_->Kill(SIGCONT);
+  RETURN_NOT_OK(s);
   paused_ = false;
-  return process_->Kill(SIGCONT);
+  return s;
 }
 
 bool ExternalDaemon::IsShutdown() const {
-  return process_.get() == nullptr;
+  return !process_;
 }
 
 bool ExternalDaemon::IsProcessAlive() const {
@@ -1094,7 +1106,7 @@ ScopedResumeExternalDaemon::ScopedResumeExternalDaemon(ExternalDaemon*
daemon)
 }
 
 ScopedResumeExternalDaemon::~ScopedResumeExternalDaemon() {
-  daemon_->Resume();
+  WARN_NOT_OK(daemon_->Resume(), "Could not resume external daemon");
 }
 
 //------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/kudu/blob/86116e4c/src/kudu/integration-tests/external_mini_cluster.h
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/external_mini_cluster.h b/src/kudu/integration-tests/external_mini_cluster.h
index e8d25c8..edcc41e 100644
--- a/src/kudu/integration-tests/external_mini_cluster.h
+++ b/src/kudu/integration-tests/external_mini_cluster.h
@@ -14,6 +14,7 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
+
 #pragma once
 
 #include <sys/types.h>
@@ -27,7 +28,6 @@
 #include <boost/optional.hpp>
 
 #include "kudu/client/client.h"
-#include "kudu/gutil/gscoped_ptr.h"
 #include "kudu/gutil/macros.h"
 #include "kudu/gutil/ref_counted.h"
 #include "kudu/integration-tests/mini_cluster_base.h"
@@ -392,10 +392,10 @@ class ExternalDaemon : public RefCountedThreadSafe<ExternalDaemon>
{
   Status EnableKerberos(MiniKdc* kdc, const std::string& bind_host);
 
   // Sends a SIGSTOP signal to the daemon.
-  Status Pause();
+  Status Pause() WARN_UNUSED_RESULT;
 
   // Sends a SIGCONT signal to the daemon.
-  Status Resume();
+  Status Resume() WARN_UNUSED_RESULT;
 
   // Return true if we have explicitly shut down the process.
   bool IsShutdown() const;
@@ -499,12 +499,12 @@ class ExternalDaemon : public RefCountedThreadSafe<ExternalDaemon>
{
   std::vector<std::string> extra_flags_;
   std::map<std::string, std::string> extra_env_;
 
-  gscoped_ptr<Subprocess> process_;
+  std::unique_ptr<Subprocess> process_;
   bool paused_ = false;
 
   std::unique_ptr<Subprocess> perf_record_process_;
 
-  gscoped_ptr<server::ServerStatusPB> status_;
+  std::unique_ptr<server::ServerStatusPB> status_;
   std::string rpc_bind_address_;
 
   // These capture the daemons parameters and running ports and

http://git-wip-us.apache.org/repos/asf/kudu/blob/86116e4c/src/kudu/integration-tests/master_failover-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/master_failover-itest.cc b/src/kudu/integration-tests/master_failover-itest.cc
index ed96b7f..5026e75 100644
--- a/src/kudu/integration-tests/master_failover-itest.cc
+++ b/src/kudu/integration-tests/master_failover-itest.cc
@@ -154,7 +154,7 @@ TEST_F(MasterFailoverTest, TestCreateTableSync) {
   LOG(INFO) << "Pausing leader master";
   int leader_idx;
   ASSERT_OK(cluster_->GetLeaderMasterIndex(&leader_idx));
-  cluster_->master(leader_idx)->Pause();
+  ASSERT_OK(cluster_->master(leader_idx)->Pause());
   ScopedResumeExternalDaemon resume_daemon(cluster_->master(leader_idx));
 
   // As Pause() is asynchronous, the following sequence of events is possible:
@@ -189,7 +189,7 @@ TEST_F(MasterFailoverTest, TestPauseAfterCreateTableIssued) {
   LOG(INFO) << "Pausing leader master";
   int leader_idx;
   ASSERT_OK(cluster_->GetLeaderMasterIndex(&leader_idx));
-  cluster_->master(leader_idx)->Pause();
+  ASSERT_OK(cluster_->master(leader_idx)->Pause());
   ScopedResumeExternalDaemon resume_daemon(cluster_->master(leader_idx));
 
   MonoTime deadline = MonoTime::Now() + MonoDelta::FromSeconds(90);
@@ -215,7 +215,7 @@ TEST_F(MasterFailoverTest, TestDeleteTableSync) {
   LOG(INFO) << "Pausing leader master";
   int leader_idx;
   ASSERT_OK(cluster_->GetLeaderMasterIndex(&leader_idx));
-  cluster_->master(leader_idx)->Pause();
+  ASSERT_OK(cluster_->master(leader_idx)->Pause());
   ScopedResumeExternalDaemon resume_daemon(cluster_->master(leader_idx));
 
   // It's possible for DeleteTable() to delete the table and still return
@@ -249,7 +249,7 @@ TEST_F(MasterFailoverTest, TestRenameTableSync) {
   LOG(INFO) << "Pausing leader master";
   int leader_idx;
   ASSERT_OK(cluster_->GetLeaderMasterIndex(&leader_idx));
-  cluster_->master(leader_idx)->Pause();
+  ASSERT_OK(cluster_->master(leader_idx)->Pause());
   ScopedResumeExternalDaemon resume_daemon(cluster_->master(leader_idx));
 
   // It's possible for AlterTable() to rename the table and still return
@@ -449,7 +449,7 @@ TEST_F(MasterFailoverTest, TestMasterPermanentFailure) {
     // Only in slow mode.
     if (AllowSlowTests()) {
       for (int j = 0; j < cluster_->num_masters(); j++) {
-        cluster_->master(j)->Pause();
+        ASSERT_OK(cluster_->master(j)->Pause());
         ScopedResumeExternalDaemon resume_daemon(cluster_->master(j));
         string table_name = Substitute("table-$0-$1", i, j);
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/86116e4c/src/kudu/integration-tests/master_migration-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/master_migration-itest.cc b/src/kudu/integration-tests/master_migration-itest.cc
index 29f2b2d..234f1ce 100644
--- a/src/kudu/integration-tests/master_migration-itest.cc
+++ b/src/kudu/integration-tests/master_migration-itest.cc
@@ -208,7 +208,7 @@ TEST_F(MasterMigrationTest, TestEndToEndMigration) {
   // Only in slow mode.
   if (AllowSlowTests()) {
     for (int i = 0; i < migrated_cluster.num_masters(); i++) {
-      migrated_cluster.master(i)->Pause();
+      ASSERT_OK(migrated_cluster.master(i)->Pause());
       ScopedResumeExternalDaemon resume_daemon(migrated_cluster.master(i));
       ASSERT_OK(client->OpenTable(kTableName, &table));
       ASSERT_EQ(0, CountTableRows(table.get()));

http://git-wip-us.apache.org/repos/asf/kudu/blob/86116e4c/src/kudu/integration-tests/raft_consensus-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/raft_consensus-itest.cc b/src/kudu/integration-tests/raft_consensus-itest.cc
index ded7710..1e48a65 100644
--- a/src/kudu/integration-tests/raft_consensus-itest.cc
+++ b/src/kudu/integration-tests/raft_consensus-itest.cc
@@ -1639,7 +1639,7 @@ TEST_F(RaftConsensusITest, TestStepDownWithSlowFollower) {
 
   // Stop both followers.
   for (int i = 1; i < 3; i++) {
-    cluster_->tablet_server_by_uuid(tservers[i]->uuid())->Pause();
+    ASSERT_OK(cluster_->tablet_server_by_uuid(tservers[i]->uuid())->Pause());
   }
 
   // Sleep a little bit of time to make sure that the leader has outstanding heartbeats
@@ -1838,7 +1838,7 @@ TEST_F(RaftConsensusITest, TestReplaceChangeConfigOperation) {
   ASSERT_TRUE(s.IsTimedOut());
 
   // Pause the leader, and restart the other servers.
-  cluster_->tablet_server_by_uuid(tservers[0]->uuid())->Pause();
+  ASSERT_OK(cluster_->tablet_server_by_uuid(tservers[0]->uuid())->Pause());
   ASSERT_OK(cluster_->tablet_server_by_uuid(tservers[1]->uuid())->Restart());
   ASSERT_OK(cluster_->tablet_server_by_uuid(tservers[2]->uuid())->Restart());
 
@@ -1852,7 +1852,7 @@ TEST_F(RaftConsensusITest, TestReplaceChangeConfigOperation) {
   // Resume the original leader. Its change-config operation will now be aborted
   // since it was never replicated to the majority, and the new leader will have
   // replaced the operation.
-  cluster_->tablet_server_by_uuid(tservers[0]->uuid())->Resume();
+  ASSERT_OK(cluster_->tablet_server_by_uuid(tservers[0]->uuid())->Resume());
 
   // Insert some data and verify that it propagates to all servers.
   NO_FATALS(InsertTestRowsRemoteThread(0, 10, 1, vector<CountDownLatch*>()));
@@ -2632,7 +2632,7 @@ TEST_F(RaftConsensusITest, TestCommitIndexFarBehindAfterLeaderElection)
{
 
   // Pause the initial leader and wait for the replica to elect itself. The third TS participates
   // here by voting.
-  first_leader_ts->Pause();
+  ASSERT_OK(first_leader_ts->Pause());
   ASSERT_OK(WaitUntilLeader(tablet_servers_[second_leader_ts->uuid()], tablet_id_, kTimeout));
 
   // The voter TS has done its duty. Shut it down to avoid log spam where it tries to run
@@ -2659,7 +2659,7 @@ TEST_F(RaftConsensusITest, TestCommitIndexFarBehindAfterLeaderElection)
{
 
   // Now, when we resume the original leader, we expect them to recover properly.
   // Previously this triggered KUDU-1469.
-  first_leader_ts->Resume();
+  ASSERT_OK(first_leader_ts->Resume());
 
   TabletServerMap active_tservers = tablet_servers_;
   active_tservers.erase(only_vote_ts->uuid());


Mime
View raw message