kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From t...@apache.org
Subject [1/4] incubator-kudu git commit: Send back an error when UpdateConsensus cannot prepare a single transaction
Date Fri, 22 Jan 2016 04:25:54 GMT
Repository: incubator-kudu
Updated Branches:
  refs/heads/master 8260093b9 -> 3d4e0ce8a


Send back an error when UpdateConsensus cannot prepare a single transaction

If for any reason an UpdateConsensus() call fails to prepare a single op,
it will still return OK. The leader doesn't detect that no progress
was made, so it sends a new batch right away... and it's the same one.

This patch makes it so that we detect this situation and so that we
go into the error-handling path on response. It also adds a test
where we manufacture the same conditions.

Change-Id: I546fd3069af974383c23acb7406ea621e6962bb3
Reviewed-on: http://gerrit.cloudera.org:8080/1785
Reviewed-by: Mike Percy <mpercy@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kudu/commit/ae865a53
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kudu/tree/ae865a53
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kudu/diff/ae865a53

Branch: refs/heads/master
Commit: ae865a536a50e4c12daec39c24e053fe768e9c01
Parents: 8260093
Author: Jean-Daniel Cryans <jdcryans@cloudera.com>
Authored: Thu Jan 14 10:44:56 2016 -0800
Committer: Jean-Daniel Cryans <jdcryans@gerrit.cloudera.org>
Committed: Fri Jan 22 02:33:10 2016 +0000

----------------------------------------------------------------------
 src/kudu/consensus/consensus.proto              |  3 ++
 src/kudu/consensus/consensus_peers.cc           |  6 ++-
 src/kudu/consensus/raft_consensus.cc            | 25 +++++++++++
 .../integration-tests/raft_consensus-itest.cc   | 44 ++++++++++++++++++++
 4 files changed, 76 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/ae865a53/src/kudu/consensus/consensus.proto
----------------------------------------------------------------------
diff --git a/src/kudu/consensus/consensus.proto b/src/kudu/consensus/consensus.proto
index 8fe879d..cb10bde 100644
--- a/src/kudu/consensus/consensus.proto
+++ b/src/kudu/consensus/consensus.proto
@@ -65,6 +65,9 @@ message ConsensusErrorPB {
     // The local replica is in the middle of servicing either another vote
     // or an update from a valid leader.
     CONSENSUS_BUSY = 8;
+
+    // The local replica was unable to prepare a single transaction.
+    CANNOT_PREPARE = 9;
   }
 
   // The error code.

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/ae865a53/src/kudu/consensus/consensus_peers.cc
----------------------------------------------------------------------
diff --git a/src/kudu/consensus/consensus_peers.cc b/src/kudu/consensus/consensus_peers.cc
index 6879451..c27ae83 100644
--- a/src/kudu/consensus/consensus_peers.cc
+++ b/src/kudu/consensus/consensus_peers.cc
@@ -244,8 +244,10 @@ void Peer::ProcessResponse() {
 
   // Pass through errors we can respond to, like not found, since in that case
   // we will need to remotely bootstrap. TODO: Handle DELETED response once implemented.
-  if (response_.has_error() &&
-      response_.error().code() != tserver::TabletServerErrorPB::TABLET_NOT_FOUND) {
+  if ((response_.has_error() &&
+      response_.error().code() != tserver::TabletServerErrorPB::TABLET_NOT_FOUND) ||
+      (response_.status().has_error() &&
+          response_.status().error().code() == consensus::ConsensusErrorPB::CANNOT_PREPARE))
{
     // Again, let the queue know that the remote is still responsive, since we
     // will not be sending this error response through to the queue.
     queue_->NotifyPeerIsResponsiveDespiteError(peer_pb_.permanent_uuid());

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/ae865a53/src/kudu/consensus/raft_consensus.cc
----------------------------------------------------------------------
diff --git a/src/kudu/consensus/raft_consensus.cc b/src/kudu/consensus/raft_consensus.cc
index 45567d8..58909f8 100644
--- a/src/kudu/consensus/raft_consensus.cc
+++ b/src/kudu/consensus/raft_consensus.cc
@@ -95,6 +95,11 @@ DEFINE_bool(follower_reject_update_consensus_requests, false,
             "Warning! This is only intended for testing.");
 TAG_FLAG(follower_reject_update_consensus_requests, unsafe);
 
+DEFINE_bool(follower_fail_all_prepare, false,
+            "Whether a follower will fail preparing all transactions. "
+            "Warning! This is only intended for testing.");
+TAG_FLAG(follower_fail_all_prepare, unsafe);
+
 DECLARE_int32(memory_limit_warn_threshold_percentage);
 
 METRIC_DEFINE_counter(tablet, follower_memory_pressure_rejections,
@@ -690,6 +695,11 @@ Status RaftConsensus::StartReplicaTransactionUnlocked(const ReplicateRefPtr&
msg
     return StartConsensusOnlyRoundUnlocked(msg);
   }
 
+  if (PREDICT_FALSE(FLAGS_follower_fail_all_prepare)) {
+    return Status::IllegalState("Rejected: --follower_fail_all_prepare "
+                                "is set to true.");
+  }
+
   VLOG_WITH_PREFIX_UNLOCKED(1) << "Starting transaction: " << msg->get()->id().ShortDebugString();
   scoped_refptr<ConsensusRound> round(new ConsensusRound(this, msg));
   ConsensusRound* round_ptr = round.get();
@@ -1114,6 +1124,21 @@ Status RaftConsensus::UpdateReplica(const ConsensusRequestPB* request,
               " other warnings. Status for this op: " << prepare_status.ToString();
         }
       }
+
+      // If this is empty, it means we couldn't prepare a single de-duped message. There
is nothing
+      // else we can do. The leader will detect this and retry later.
+      if (deduped_req.messages.empty()) {
+        string msg = Substitute("Rejecting Update request from peer $0 for term $1. "
+                                "Could not prepare a single transaction due to: $2",
+                                request->caller_uuid(),
+                                request->caller_term(),
+                                prepare_status.ToString());
+        LOG_WITH_PREFIX_UNLOCKED(INFO) << msg;
+        FillConsensusResponseError(response, ConsensusErrorPB::CANNOT_PREPARE,
+                                   Status::IllegalState(msg));
+        FillConsensusResponseOKUnlocked(response);
+        return Status::OK();
+      }
     }
 
     OpId last_from_leader;

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/ae865a53/src/kudu/integration-tests/raft_consensus-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/raft_consensus-itest.cc b/src/kudu/integration-tests/raft_consensus-itest.cc
index 7a0ccd4..b49dc24 100644
--- a/src/kudu/integration-tests/raft_consensus-itest.cc
+++ b/src/kudu/integration-tests/raft_consensus-itest.cc
@@ -2417,6 +2417,50 @@ TEST_F(RaftConsensusITest, TestChangeConfigRejectedUnlessNoopReplicated)
{
   ASSERT_STR_CONTAINS(s.ToString(), "Latest committed op is not from this term");
 }
 
+// Test that if for some reason none of the transactions can be prepared, that it will come
+// back as an error in UpdateConsensus().
+TEST_F(RaftConsensusITest, TestUpdateConsensusErrorNonePrepared) {
+  const int kNumOps = 10;
+
+  vector<string> ts_flags, master_flags;
+  ts_flags.push_back("--enable_leader_failure_detection=false");
+  master_flags.push_back("--catalog_manager_wait_for_new_tablets_to_elect_leader=false");
+  BuildAndStart(ts_flags, master_flags);
+
+  vector<TServerDetails*> tservers;
+  AppendValuesFromMap(tablet_servers_, &tservers);
+  ASSERT_EQ(3, tservers.size());
+
+  // Shutdown the other servers so they don't get chatty.
+  cluster_->tablet_server_by_uuid(tservers[1]->uuid())->Shutdown();
+  cluster_->tablet_server_by_uuid(tservers[2]->uuid())->Shutdown();
+
+  // Configure the first server to fail all on prepare.
+  TServerDetails *replica_ts = tservers[0];
+  ASSERT_OK(cluster_->SetFlag(cluster_->tablet_server(0),
+                "follower_fail_all_prepare", "true"));
+
+  // Pretend to be the leader and send a request that should return an error.
+  ConsensusRequestPB req;
+  ConsensusResponsePB resp;
+  RpcController rpc;
+  req.set_dest_uuid(replica_ts->uuid());
+  req.set_tablet_id(tablet_id_);
+  req.set_caller_uuid(tservers[2]->instance_id.permanent_uuid());
+  req.set_caller_term(0);
+  req.mutable_committed_index()->CopyFrom(MakeOpId(0, 0));
+  req.mutable_preceding_id()->CopyFrom(MakeOpId(0, 0));
+  for (int i = 0; i < kNumOps; i++) {
+    AddOp(MakeOpId(0, 1 + i), &req);
+  }
+
+  ASSERT_OK(replica_ts->consensus_proxy->UpdateConsensus(req, &resp, &rpc));
+  LOG(INFO) << resp.ShortDebugString();
+  ASSERT_TRUE(resp.status().has_error());
+  ASSERT_EQ(consensus::ConsensusErrorPB::CANNOT_PREPARE, resp.status().error().code());
+  ASSERT_STR_CONTAINS(resp.ShortDebugString(), "Could not prepare a single transaction");
+}
+
 }  // namespace tserver
 }  // namespace kudu
 


Mime
View raw message