kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jdcry...@apache.org
Subject [2/5] kudu git commit: [tools] Manual recovery tools (part 1)
Date Wed, 09 Nov 2016 14:09:43 GMT
[tools] Manual recovery tools (part 1)

This change introduces two recovery tools:
1) 'kudu remote_replica copy' copies the given tablet onto
   a destination server irrespective of the state of the
   destination replica (--force option provided).
   This is a port from https://gerrit.cloudera.org/#/c/3582/
2) 'kudu local_replica delete' (KUDU-1618) deletes a local replica
   of a tablet when the tablet server can not come up due to a
   bad replica of a tablet. This tool should be used with caution,
   hence added a --clean_unsafe flag to warn the user with
   consequences. As of this change, tool supports
   deleting the tablet permanently if used with --clean_unsafe flag,
   and fails with 'unsupported action error' without the flag.
   A future patch will add a default action of tombstoning
   the tablet instead of permanently deleting it.

Also added few tests exercising typical usage scenario for these tools.
Added trace to stderr under existing RunAction routines to throw more
info in the log if an action fails.

Change-Id: I113a25e9b6c14f7c3814140917b61e35030b58d0
Reviewed-on: http://gerrit.cloudera.org:8080/4834
Tested-by: Kudu Jenkins
Reviewed-by: Mike Percy <mpercy@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/4a6493d4
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/4a6493d4
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/4a6493d4

Branch: refs/heads/master
Commit: 4a6493d4db30de9dffde8003c1d844320f1b00d2
Parents: 3c68dea
Author: Dinesh Bhat <dinesh@cloudera.com>
Authored: Tue Oct 25 03:37:40 2016 -0700
Committer: Alexey Serbin <aserbin@cloudera.com>
Committed: Tue Nov 8 23:41:25 2016 +0000

----------------------------------------------------------------------
 .../integration-tests/cluster_itest_util.cc     |  27 +-
 src/kudu/integration-tests/cluster_itest_util.h |   8 +
 src/kudu/integration-tests/delete_table-test.cc |  30 +-
 src/kudu/tools/CMakeLists.txt                   |   2 +-
 src/kudu/tools/ksck_remote.cc                   |   2 +-
 src/kudu/tools/kudu-tool-test.cc                | 312 ++++++++++++++++++-
 src/kudu/tools/tool_action_common.cc            |   3 +-
 src/kudu/tools/tool_action_local_replica.cc     |  42 ++-
 src/kudu/tools/tool_action_remote_replica.cc    |  63 ++++
 src/kudu/tserver/ts_tablet_manager.cc           |  16 +-
 src/kudu/tserver/ts_tablet_manager.h            |  16 +-
 11 files changed, 456 insertions(+), 65 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/4a6493d4/src/kudu/integration-tests/cluster_itest_util.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/cluster_itest_util.cc b/src/kudu/integration-tests/cluster_itest_util.cc
index 898117a..0368546 100644
--- a/src/kudu/integration-tests/cluster_itest_util.cc
+++ b/src/kudu/integration-tests/cluster_itest_util.cc
@@ -39,6 +39,7 @@
 #include "kudu/tserver/tserver_service.pb.h"
 #include "kudu/tserver/tserver_service.proxy.h"
 #include "kudu/util/net/net_util.h"
+#include "kudu/util/test_macros.h"
 
 namespace kudu {
 namespace itest {
@@ -76,6 +77,7 @@ using std::string;
 using std::unordered_map;
 using std::vector;
 using strings::Substitute;
+using tablet::TabletDataState;
 using tserver::CreateTsClientProxies;
 using tserver::ListTabletsResponsePB;
 using tserver::DeleteTabletRequestPB;
@@ -222,8 +224,8 @@ Status WaitUntilAllReplicasHaveOp(const int64_t log_index,
     replicas_str += "{ " + replica->ToString() + " }";
   }
   return Status::TimedOut(Substitute("Index $0 not available on all replicas after $1. "
-                                              "Replicas: [ $2 ]",
-                                              log_index, passed.ToString()));
+                                     "Replicas: [ $2 ]",
+                                     log_index, passed.ToString(), replicas_str));
 }
 
 Status CreateTabletServerMap(MasterServiceProxy* master_proxy,
@@ -741,7 +743,7 @@ Status WaitUntilTabletRunning(TServerDetails* ts,
 
 Status DeleteTablet(const TServerDetails* ts,
                     const std::string& tablet_id,
-                    const tablet::TabletDataState delete_type,
+                    const TabletDataState delete_type,
                     const boost::optional<int64_t>& cas_config_opid_index_less_or_equal,
                     const MonoDelta& timeout,
                     tserver::TabletServerErrorPB::Code* error_code) {
@@ -767,6 +769,25 @@ Status DeleteTablet(const TServerDetails* ts,
   return Status::OK();
 }
 
+void DeleteTabletWithRetries(const TServerDetails* ts,
+                             const string& tablet_id,
+                             TabletDataState delete_type,
+                             const boost::optional<int64_t>& config_opid_index,
+                             const MonoDelta& timeout) {
+  MonoTime start(MonoTime::Now());
+  MonoTime deadline = start + timeout;
+  Status s;
+  while (true) {
+    s = DeleteTablet(ts, tablet_id, delete_type, config_opid_index, timeout);
+    if (s.ok()) return;
+    if (deadline < MonoTime::Now()) {
+      break;
+    }
+    SleepFor(MonoDelta::FromMilliseconds(10));
+  }
+  ASSERT_OK(s);
+}
+
 Status StartTabletCopy(const TServerDetails* ts,
                        const string& tablet_id,
                        const string& copy_source_uuid,

http://git-wip-us.apache.org/repos/asf/kudu/blob/4a6493d4/src/kudu/integration-tests/cluster_itest_util.h
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/cluster_itest_util.h b/src/kudu/integration-tests/cluster_itest_util.h
index 3722b23..8f8571e 100644
--- a/src/kudu/integration-tests/cluster_itest_util.h
+++ b/src/kudu/integration-tests/cluster_itest_util.h
@@ -288,6 +288,14 @@ Status DeleteTablet(const TServerDetails* ts,
                     const MonoDelta& timeout,
                     tserver::TabletServerErrorPB::Code* error_code = NULL);
 
+// Repeatedly try to delete the tablet, retrying on failure up to the
+// specified timeout. Deletion can fail when other operations, such as
+// bootstrap or tablet copy, are running.
+void DeleteTabletWithRetries(const TServerDetails* ts, const std::string& tablet_id,
+                             tablet::TabletDataState delete_type,
+                             const boost::optional<int64_t>& config_opid_index,
+                             const MonoDelta& timeout);
+
 // Cause the remote to initiate tablet copy using the specified host as a
 // source.
 Status StartTabletCopy(const TServerDetails* ts,

http://git-wip-us.apache.org/repos/asf/kudu/blob/4a6493d4/src/kudu/integration-tests/delete_table-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/delete_table-test.cc b/src/kudu/integration-tests/delete_table-test.cc
index fd1803d..0665d0f 100644
--- a/src/kudu/integration-tests/delete_table-test.cc
+++ b/src/kudu/integration-tests/delete_table-test.cc
@@ -113,12 +113,6 @@ class DeleteTableTest : public ExternalMiniClusterITestBase {
   // Delete the given table. If the operation times out, dumps the master stacks
   // to help debug master-side deadlocks.
   void DeleteTable(const string& table_name);
-
-  // Repeatedly try to delete the tablet, retrying on failure up to the
-  // specified timeout. Deletion can fail when other operations, such as
-  // bootstrap or tablet copy, are running.
-  void DeleteTabletWithRetries(const TServerDetails* ts, const string& tablet_id,
-                               TabletDataState delete_type, const MonoDelta& timeout);
 };
 
 string DeleteTableTest::GetLeaderUUID(const string& ts_uuid, const string& tablet_id)
{
@@ -219,24 +213,6 @@ void DeleteTableTest::DeleteTable(const string& table_name) {
   ASSERT_OK(s);
 }
 
-void DeleteTableTest::DeleteTabletWithRetries(const TServerDetails* ts,
-                                              const string& tablet_id,
-                                              TabletDataState delete_type,
-                                              const MonoDelta& timeout) {
-  MonoTime start(MonoTime::Now());
-  MonoTime deadline = start + timeout;
-  Status s;
-  while (true) {
-    s = itest::DeleteTablet(ts, tablet_id, delete_type, boost::none, timeout);
-    if (s.ok()) return;
-    if (deadline < MonoTime::Now()) {
-      break;
-    }
-    SleepFor(MonoDelta::FromMilliseconds(10));
-  }
-  ASSERT_OK(s);
-}
-
 // Test deleting an empty table, and ensure that the tablets get removed,
 // and the master no longer shows the table as existing.
 TEST_F(DeleteTableTest, TestDeleteEmptyTable) {
@@ -778,7 +754,8 @@ TEST_F(DeleteTableTest, TestMergeConsensusMetadata) {
   cluster_->tablet_server(2)->Shutdown();
 
   // Delete with retries because the tablet might still be bootstrapping.
-  NO_FATALS(DeleteTabletWithRetries(ts, tablet_id, TABLET_DATA_TOMBSTONED, timeout));
+  NO_FATALS(itest::DeleteTabletWithRetries(ts, tablet_id,
+                                           TABLET_DATA_TOMBSTONED, boost::none, timeout));
   NO_FATALS(WaitForTabletTombstonedOnTS(kTsIndex, tablet_id, CMETA_EXPECTED));
 
   ASSERT_OK(cluster_->tablet_server(1)->Restart());
@@ -1222,7 +1199,8 @@ TEST_P(DeleteTableTombstonedParamTest, TestTabletTombstone) {
     string tablet_id = tablet.tablet_status().tablet_id();
     // We need retries here, since some of the tablets may still be
     // bootstrapping after being restarted above.
-    NO_FATALS(DeleteTabletWithRetries(ts, tablet_id, TABLET_DATA_DELETED, timeout));
+    NO_FATALS(itest::DeleteTabletWithRetries(ts, tablet_id,
+                                             TABLET_DATA_DELETED, boost::none, timeout));
   }
   ASSERT_OK(inspect_->WaitForNoDataOnTS(kTsIndex));
 }

http://git-wip-us.apache.org/repos/asf/kudu/blob/4a6493d4/src/kudu/tools/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/kudu/tools/CMakeLists.txt b/src/kudu/tools/CMakeLists.txt
index 61951b5..53886df 100644
--- a/src/kudu/tools/CMakeLists.txt
+++ b/src/kudu/tools/CMakeLists.txt
@@ -32,6 +32,7 @@ set(LINK_LIBS
 add_library(kudu_tools_util
   color.cc
   data_gen_util.cc
+  tool_action_common.cc
 )
 target_link_libraries(kudu_tools_util
   ${LINK_LIBS})
@@ -57,7 +58,6 @@ target_link_libraries(ksck
 add_executable(kudu
   tool_action.cc
   tool_action_cluster.cc
-  tool_action_common.cc
   tool_action_fs.cc
   tool_action_local_replica.cc
   tool_action_master.cc

http://git-wip-us.apache.org/repos/asf/kudu/blob/4a6493d4/src/kudu/tools/ksck_remote.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/ksck_remote.cc b/src/kudu/tools/ksck_remote.cc
index 1dddfec..a17c5ea 100644
--- a/src/kudu/tools/ksck_remote.cc
+++ b/src/kudu/tools/ksck_remote.cc
@@ -27,8 +27,8 @@
 #include "kudu/util/net/net_util.h"
 #include "kudu/util/net/sockaddr.h"
 
+DECLARE_int64(timeout_ms); // defined in tool_action_common
 DEFINE_bool(checksum_cache_blocks, false, "Should the checksum scanners cache the read blocks");
-DEFINE_int64(timeout_ms, 1000 * 60, "RPC timeout in milliseconds");
 
 namespace kudu {
 namespace tools {

http://git-wip-us.apache.org/repos/asf/kudu/blob/4a6493d4/src/kudu/tools/kudu-tool-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc
index 6ad0d08..03adaee 100644
--- a/src/kudu/tools/kudu-tool-test.cc
+++ b/src/kudu/tools/kudu-tool-test.cc
@@ -37,6 +37,7 @@
 #include "kudu/consensus/consensus.pb.h"
 #include "kudu/consensus/log.h"
 #include "kudu/consensus/log_util.h"
+#include "kudu/consensus/metadata.pb.h"
 #include "kudu/consensus/opid.pb.h"
 #include "kudu/consensus/opid_util.h"
 #include "kudu/consensus/ref_counted_replicate.h"
@@ -46,12 +47,23 @@
 #include "kudu/gutil/ref_counted.h"
 #include "kudu/gutil/strings/split.h"
 #include "kudu/gutil/strings/substitute.h"
+#include "kudu/integration-tests/cluster_itest_util.h"
 #include "kudu/integration-tests/external_mini_cluster.h"
+#include "kudu/integration-tests/external_mini_cluster_fs_inspector.h"
+#include "kudu/integration-tests/mini_cluster.h"
+#include "kudu/integration-tests/test_workload.h"
 #include "kudu/tablet/local_tablet_writer.h"
 #include "kudu/tablet/tablet-harness.h"
 #include "kudu/tablet/tablet_metadata.h"
+#include "kudu/tablet/tablet_peer.h"
+#include "kudu/tablet/tablet.h"
+#include "kudu/tools/tool_action_common.h"
 #include "kudu/tools/tool_test_util.h"
+#include "kudu/tserver/mini_tablet_server.h"
+#include "kudu/tserver/tablet_server.h"
+#include "kudu/tserver/ts_tablet_manager.h"
 #include "kudu/tserver/tserver.pb.h"
+#include "kudu/tserver/tserver_service.proxy.h"
 #include "kudu/util/async_util.h"
 #include "kudu/util/env.h"
 #include "kudu/util/metrics.h"
@@ -74,8 +86,11 @@ using consensus::OpId;
 using consensus::ReplicateRefPtr;
 using consensus::ReplicateMsg;
 using fs::WritableBlock;
+using itest::ExternalMiniClusterFsInspector;
+using itest::TServerDetails;
 using log::Log;
 using log::LogOptions;
+using rpc::RpcController;
 using std::back_inserter;
 using std::copy;
 using std::ostringstream;
@@ -84,10 +99,19 @@ using std::unique_ptr;
 using std::vector;
 using strings::Substitute;
 using tablet::LocalTabletWriter;
+using tablet::Tablet;
+using tablet::TabletDataState;
 using tablet::TabletHarness;
 using tablet::TabletMetadata;
+using tablet::TabletPeer;
 using tablet::TabletSuperBlockPB;
+using tserver::DeleteTabletRequestPB;
+using tserver::DeleteTabletResponsePB;
+using tserver::MiniTabletServer;
 using tserver::WriteRequestPB;
+using tserver::ListTabletsRequestPB;
+using tserver::ListTabletsResponsePB;
+using tserver::TabletServerServiceProxy;
 
 class ToolTest : public KuduTest {
  public:
@@ -95,6 +119,16 @@ class ToolTest : public KuduTest {
       : tool_path_(GetKuduCtlAbsolutePath()) {
   }
 
+  ~ToolTest() {
+    STLDeleteValues(&ts_map_);
+  }
+
+  virtual void TearDown() OVERRIDE {
+    if (cluster_) cluster_->Shutdown();
+    if (mini_cluster_) mini_cluster_->Shutdown();
+    KuduTest::TearDown();
+  }
+
   Status RunTool(const string& arg_str,
                  string* stdout,
                  string* stderr,
@@ -127,21 +161,27 @@ class ToolTest : public KuduTest {
 
   void RunActionStdoutNone(const string& arg_str) const {
     string stdout;
-    Status s = RunTool(arg_str, &stdout, nullptr, nullptr, nullptr);
+    string stderr;
+    Status s = RunTool(arg_str, &stdout, &stderr, nullptr, nullptr);
     SCOPED_TRACE(stdout);
+    SCOPED_TRACE(stderr);
     ASSERT_OK(s);
     ASSERT_TRUE(stdout.empty());
   }
 
   void RunActionStdoutString(const string& arg_str, string* stdout) const {
-    Status s = RunTool(arg_str, stdout, nullptr, nullptr, nullptr);
+    string stderr;
+    Status s = RunTool(arg_str, stdout, &stderr, nullptr, nullptr);
     SCOPED_TRACE(*stdout);
+    SCOPED_TRACE(stderr);
     ASSERT_OK(s);
   }
 
   void RunActionStdoutLines(const string& arg_str, vector<string>* stdout_lines)
const {
-    Status s = RunTool(arg_str, nullptr, nullptr, stdout_lines, nullptr);
+    string stderr;
+    Status s = RunTool(arg_str, nullptr, &stderr, stdout_lines, nullptr);
     SCOPED_TRACE(*stdout_lines);
+    SCOPED_TRACE(stderr);
     ASSERT_OK(s);
   }
 
@@ -175,10 +215,45 @@ class ToolTest : public KuduTest {
     }
   }
 
- private:
+ protected:
+  void RunLoadgen(int num_tservers = 1,
+                  const vector<string>& tool_args = {},
+                  const string& table_name = "");
+  void StartExternalMiniCluster(const vector<string>& extra_master_flags = {},
+                                const vector<string>& extra_tserver_flags = {},
+                                int num_tablet_servers = 1);
+  void StartMiniCluster(int num_masters = 1,
+                        int num_tablet_servers = 1);
+  unique_ptr<ExternalMiniCluster> cluster_;
+  unique_ptr<ExternalMiniClusterFsInspector> inspect_;
+  unordered_map<string, TServerDetails*> ts_map_;
+  unique_ptr<MiniCluster> mini_cluster_;
   string tool_path_;
 };
 
+void ToolTest::StartExternalMiniCluster(const vector<string>& extra_master_flags,
+                                        const vector<string>& extra_tserver_flags,
+                                        int num_tablet_servers) {
+  ExternalMiniClusterOptions opts;
+  opts.extra_master_flags = extra_master_flags;
+  opts.extra_tserver_flags = extra_tserver_flags;
+  opts.num_tablet_servers = num_tablet_servers;
+  cluster_.reset(new ExternalMiniCluster(opts));
+  ASSERT_OK(cluster_->Start());
+  inspect_.reset(new ExternalMiniClusterFsInspector(cluster_.get()));
+  ASSERT_OK(CreateTabletServerMap(cluster_->master_proxy().get(),
+                                  cluster_->messenger(), &ts_map_));
+}
+
+void ToolTest::StartMiniCluster(int num_masters,
+                                int num_tablet_servers) {
+  MiniClusterOptions opts;
+  opts.num_masters = num_masters;
+  opts.num_tablet_servers = num_tablet_servers;
+  mini_cluster_.reset(new MiniCluster(env_.get(), opts));
+  ASSERT_OK(mini_cluster_->Start());
+}
+
 TEST_F(ToolTest, TestTopLevelHelp) {
   const vector<string> kTopLevelRegexes = {
       "cluster.*Kudu cluster",
@@ -223,6 +298,7 @@ TEST_F(ToolTest, TestModeHelp) {
         "cmeta.*Operate on a local Kudu replica's consensus",
         "dump.*Dump a Kudu filesystem",
         "copy_from_remote.*Copy a replica",
+        "delete.*Delete Kudu replica from the local filesystem",
         "list.*Show list of Kudu replicas"
     };
     NO_FATALS(RunTestHelp("local_replica", kLocalReplicaModeRegexes));
@@ -267,6 +343,7 @@ TEST_F(ToolTest, TestModeHelp) {
   {
     const vector<string> kRemoteReplicaModeRegexes = {
         "check.*Check if all replicas",
+        "copy.*Copy a replica from one Kudu Tablet Server to another",
         "delete.*Delete a replica",
         "dump.*Dump the data of a replica",
         "list.*List all replicas"
@@ -762,16 +839,11 @@ TEST_F(ToolTest, TestLocalReplicaOps) {
 
 // Create and start Kudu mini cluster, optionally creating a table in the DB,
 // and then run 'kudu test loadgen ...' utility against it.
-void RunLoadgen(size_t num_tservers = 1,
-                const vector<string>& tool_args = {},
-                const string& table_name = "") {
-  kudu::ExternalMiniClusterOptions opts;
-  opts.num_tablet_servers = num_tservers;
+void ToolTest::RunLoadgen(int num_tservers,
+                          const vector<string>& tool_args,
+                          const string& table_name) {
   // fsync causes flakiness on EC2
-  opts.extra_tserver_flags.push_back("--never_fsync");
-
-  unique_ptr<ExternalMiniCluster> cluster(new ExternalMiniCluster(opts));
-  ASSERT_OK(cluster->Start());
+  NO_FATALS(StartExternalMiniCluster({}, {"--never_fsync"}, num_tservers));
   if (!table_name.empty()) {
     static const string kKeyColumnName = "key";
     static const Schema kSchema = Schema(
@@ -790,21 +862,21 @@ void RunLoadgen(size_t num_tservers = 1,
       }, 1);
 
     shared_ptr<client::KuduClient> client;
-    ASSERT_OK(cluster->CreateClient(nullptr, &client));
+    ASSERT_OK(cluster_->CreateClient(nullptr, &client));
     client::KuduSchema client_schema(client::KuduSchemaFromSchema(kSchema));
     unique_ptr<client::KuduTableCreator> table_creator(
         client->NewTableCreator());
     ASSERT_OK(table_creator->table_name(table_name)
               .schema(&client_schema)
               .add_hash_partitions({kKeyColumnName}, 2)
-              .num_replicas(cluster->num_tablet_servers())
+              .num_replicas(cluster_->num_tablet_servers())
               .Create());
   }
   vector<string> args = {
     GetKuduCtlAbsolutePath(),
     "test",
     "loadgen",
-    cluster->master()->bound_rpc_addr().ToString(),
+    cluster_->master()->bound_rpc_addr().ToString(),
   };
   if (!table_name.empty()) {
     args.push_back(Substitute("-table_name=$0", table_name));
@@ -866,5 +938,213 @@ TEST_F(ToolTest, TestLoadgenManualFlush) {
       "bench_manual_flush"));
 }
 
+// Test 'kudu remote_replica copy' tool when the destination tablet server is online.
+// 1. Test the copy tool when the destination replica is healthy
+// 2. Test the copy tool when the destination replica is tombstoned
+// 3. Test the copy tool when the destination replica is deleted
+TEST_F(ToolTest, TestRemoteReplicaCopy) {
+  const string kTestDir = GetTestPath("test");
+  MonoDelta kTimeout = MonoDelta::FromSeconds(30);
+  const int kSrcTsIndex = 0;
+  const int kDstTsIndex = 1;
+  const int kNumTservers = 3;
+  const int kNumTablets = 3;
+  NO_FATALS(StartExternalMiniCluster(
+      {"--catalog_manager_wait_for_new_tablets_to_elect_leader=false"},
+      {"--enable_leader_failure_detection=false"}, kNumTservers));
+
+  // TestWorkLoad.Setup() internally generates a table.
+  TestWorkload workload(cluster_.get());
+  workload.set_num_tablets(kNumTablets);
+  workload.set_num_replicas(3);
+  workload.Setup();
+
+  // Choose source and destination tablet servers for tablet_copy.
+  vector<ListTabletsResponsePB::StatusAndSchemaPB> tablets;
+  TServerDetails* src_ts = ts_map_[cluster_->tablet_server(kSrcTsIndex)->uuid()];
+  ASSERT_OK(WaitForNumTabletsOnTS(src_ts, kNumTablets, kTimeout, &tablets));
+  TServerDetails* dst_ts = ts_map_[cluster_->tablet_server(kDstTsIndex)->uuid()];
+  ASSERT_OK(WaitForNumTabletsOnTS(dst_ts, kNumTablets, kTimeout, &tablets));
+
+  // Test 1: Test when the destination replica is healthy with and without --force_copy flag.
+  // This is an 'online tablet copy'. i.e, when the tool initiates a copy,
+  // the internal machinery of tablet-copy deletes the existing healthy
+  // replica on destination and copies the replica if --force_copy is specified.
+  // Without --force_copy flag, the test fails to copy since there is a healthy
+  // replica already present on the destination tserver.
+  string stderr;
+  const string& src_ts_addr = cluster_->tablet_server(kSrcTsIndex)->bound_rpc_addr().ToString();
+  const string& dst_ts_addr = cluster_->tablet_server(kDstTsIndex)->bound_rpc_addr().ToString();
+  const string& healthy_tablet_id = tablets[0].tablet_status().tablet_id();
+  Status s = RunTool(
+      Substitute("remote_replica copy $0 $1 $2",
+                 healthy_tablet_id, src_ts_addr, dst_ts_addr),
+                 nullptr, &stderr, nullptr, nullptr);
+  ASSERT_TRUE(s.IsRuntimeError());
+  SCOPED_TRACE(stderr);
+  ASSERT_STR_CONTAINS(stderr, "Rejecting tablet copy request");
+
+  NO_FATALS(RunActionStdoutNone(Substitute("remote_replica copy $0 $1 $2 --force_copy",
+                                           healthy_tablet_id, src_ts_addr, dst_ts_addr)));
+  ASSERT_OK(WaitUntilTabletInState(dst_ts, healthy_tablet_id,
+                                   tablet::RUNNING, kTimeout));
+
+  // Test 2 and 3: Test when the destination replica is tombstoned or deleted
+  DeleteTabletRequestPB req;
+  DeleteTabletResponsePB resp;
+  RpcController rpc;
+  rpc.set_timeout(kTimeout);
+  req.set_dest_uuid(dst_ts->uuid());
+  const string& tombstoned_tablet_id = tablets[2].tablet_status().tablet_id();
+  req.set_tablet_id(tombstoned_tablet_id);
+  req.set_delete_type(TabletDataState::TABLET_DATA_TOMBSTONED);
+  ASSERT_OK(dst_ts->tserver_admin_proxy->DeleteTablet(req, &resp, &rpc));
+  ASSERT_FALSE(resp.has_error());
+
+  // Shut down the destination server and delete one tablet from
+  // local fs on destination tserver while it is offline.
+  cluster_->tablet_server(kDstTsIndex)->Shutdown();
+  const string& tserver_dir = cluster_->tablet_server(kDstTsIndex)->data_dir();
+  const string& deleted_tablet_id = tablets[1].tablet_status().tablet_id();
+  NO_FATALS(RunActionStdoutNone(Substitute("local_replica delete $0 --fs_wal_dir=$1 "
+                                           "--fs_data_dirs=$1 --clean_unsafe",
+                                           deleted_tablet_id, tserver_dir)));
+
+  // At this point, we expect only 2 tablets to show up on destination when
+  // we restart the destination tserver. deleted_tablet_id should not be found on
+  // destination tserver until we do a copy from the tool again.
+  ASSERT_OK(cluster_->tablet_server(kDstTsIndex)->Restart());
+  vector<ListTabletsResponsePB::StatusAndSchemaPB> dst_tablets;
+  ASSERT_OK(WaitForNumTabletsOnTS(dst_ts, kNumTablets-1, kTimeout, &dst_tablets));
+  bool found_tombstoned_tablet = false;
+  for (const auto& t : dst_tablets) {
+    if (t.tablet_status().tablet_id() == tombstoned_tablet_id) {
+      found_tombstoned_tablet = true;
+    }
+    ASSERT_NE(t.tablet_status().tablet_id(), deleted_tablet_id);
+  }
+  ASSERT_TRUE(found_tombstoned_tablet);
+  // Wait until destination tserver has tombstoned_tablet_id in tombstoned state.
+  NO_FATALS(inspect_->WaitForTabletDataStateOnTS(kDstTsIndex, tombstoned_tablet_id,
+                                                 { TabletDataState::TABLET_DATA_TOMBSTONED
},
+                                                 kTimeout));
+  // Copy tombstoned_tablet_id from source to destination.
+  NO_FATALS(RunActionStdoutNone(Substitute("remote_replica copy $0 $1 $2 --force_copy",
+                                           tombstoned_tablet_id, src_ts_addr, dst_ts_addr)));
+  ASSERT_OK(WaitUntilTabletInState(dst_ts, tombstoned_tablet_id,
+                                   tablet::RUNNING, kTimeout));
+  // Copy deleted_tablet_id from source to destination.
+  NO_FATALS(RunActionStdoutNone(Substitute("remote_replica copy $0 $1 $2",
+                                           deleted_tablet_id, src_ts_addr, dst_ts_addr)));
+  ASSERT_OK(WaitUntilTabletInState(dst_ts, deleted_tablet_id,
+                                   tablet::RUNNING, kTimeout));
+}
+
+// Test 'kudu local_replica delete' tool when the tablet server is offline.
+TEST_F(ToolTest, TestLocalReplicaDelete) {
+  MonoDelta kTimeout = MonoDelta::FromSeconds(30);
+  NO_FATALS(StartMiniCluster());
+
+  // TestWorkLoad.Setup() internally generates a table.
+  TestWorkload workload(mini_cluster_.get());
+  workload.set_num_replicas(1);
+  workload.Setup();
+  workload.Start();
+
+  // There is only one tserver in the minicluster.
+  ASSERT_OK(mini_cluster_->WaitForTabletServerCount(1));
+  MiniTabletServer* ts = mini_cluster_->mini_tablet_server(0);
+
+  // Grab the tablet_id to delete
+  ListTabletsRequestPB req;
+  ListTabletsResponsePB resp;
+  RpcController rpc;
+  rpc.set_timeout(kTimeout);
+  {
+    unique_ptr<TabletServerServiceProxy> ts_proxy;
+    ASSERT_OK(BuildProxy(ts->bound_rpc_addr().ToString(),
+                         tserver::TabletServer::kDefaultPort, &ts_proxy));
+    ASSERT_OK(ts_proxy->ListTablets(req, &resp, &rpc));
+  }
+  ASSERT_FALSE(resp.has_error());
+  ASSERT_EQ(resp.status_and_schema_size(), 1);
+  const string& tablet_id = resp.status_and_schema(0).tablet_status().tablet_id();
+
+  // Generate some workload followed by a flush to grow the size of the tablet on disk.
+  while (workload.rows_inserted() < 10000) {
+    SleepFor(MonoDelta::FromMilliseconds(10));
+  }
+  workload.StopAndJoin();
+
+  // Make sure the tablet data is flushed to disk. This is needed
+  // so that we can compare the size of the data on disk before and
+  // after the deletion of local_replica to check if the size-on-disk is reduced at all.
+  {
+    scoped_refptr<TabletPeer> tablet_peer;
+    ASSERT_TRUE(ts->server()->tablet_manager()->LookupTablet(tablet_id, &tablet_peer));
+    Tablet* tablet = tablet_peer->tablet();
+    ASSERT_OK(tablet->Flush());
+  }
+  const string& tserver_dir = ts->options()->fs_opts.wal_path;
+
+  // Using the delete tool with tablet server running fails.
+  string stderr;
+  Status s = RunTool(
+      Substitute("local_replica delete $0 --fs_wal_dir=$1 --fs_data_dirs=$1 "
+                 "--clean_unsafe", tablet_id, tserver_dir),
+                 nullptr, &stderr, nullptr, nullptr);
+  ASSERT_TRUE(s.IsRuntimeError());
+  SCOPED_TRACE(stderr);
+  ASSERT_STR_CONTAINS(stderr, "Resource temporarily unavailable");
+
+  // Shut down tablet server and use the delete tool again.
+  ts->Shutdown();
+
+  // Run the tool without --clean_unsafe flag first.
+  s = RunTool(Substitute("local_replica delete $0 --fs_wal_dir=$1 --fs_data_dirs=$1",
+                         tablet_id, tserver_dir),
+              nullptr, &stderr, nullptr, nullptr);
+  ASSERT_TRUE(s.IsRuntimeError());
+  SCOPED_TRACE(stderr);
+  ASSERT_STR_CONTAINS(stderr, "currently not supported without --clean_unsafe flag");
+
+  const string& data_dir = JoinPathSegments(tserver_dir, "data");
+  uint64_t size_before_delete;
+  ASSERT_OK(env_->GetFileSizeOnDiskRecursively(data_dir, &size_before_delete));
+  NO_FATALS(RunActionStdoutNone(Substitute("local_replica delete $0 --fs_wal_dir=$1 "
+                                           "--fs_data_dirs=$1 --clean_unsafe",
+                                           tablet_id, tserver_dir)));
+  // Verify metadata and WAL segments for the tablet_id are gone.
+  const string& wal_dir = JoinPathSegments(tserver_dir,
+                                           Substitute("wals/$0", tablet_id));
+  ASSERT_FALSE(env_->FileExists(wal_dir));
+  const string& meta_dir = JoinPathSegments(tserver_dir,
+                                            Substitute("tablet-meta/$0", tablet_id));
+  ASSERT_FALSE(env_->FileExists(meta_dir));
+
+  // Verify that the total size of the data on disk after 'delete' action
+  // is less than before. Although this doesn't necessarily check
+  // for orphan data blocks left behind for the given tablet, it certainly
+  // indicates that some data has been deleted from disk.
+  uint64_t size_after_delete;
+  ASSERT_OK(env_->GetFileSizeOnDiskRecursively(data_dir, &size_after_delete));
+  ASSERT_LT(size_after_delete, size_before_delete);
+
+  // Since there was only one tablet on the node which was deleted by tool,
+  // we can expect the tablet server to have nothing after it comes up.
+  ASSERT_OK(ts->Start());
+  ASSERT_OK(ts->WaitStarted());
+  rpc.Reset();
+  rpc.set_timeout(kTimeout);
+  {
+    unique_ptr<TabletServerServiceProxy> ts_proxy;
+    ASSERT_OK(BuildProxy(ts->bound_rpc_addr().ToString(),
+                         tserver::TabletServer::kDefaultPort, &ts_proxy));
+    ASSERT_OK(ts_proxy->ListTablets(req, &resp, &rpc));
+  }
+  ASSERT_FALSE(resp.has_error());
+  ASSERT_EQ(resp.status_and_schema_size(), 0);
+}
+
 } // namespace tools
 } // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/4a6493d4/src/kudu/tools/tool_action_common.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/tool_action_common.cc b/src/kudu/tools/tool_action_common.cc
index bd0db19..80c14ba 100644
--- a/src/kudu/tools/tool_action_common.cc
+++ b/src/kudu/tools/tool_action_common.cc
@@ -50,8 +50,6 @@
 #include "kudu/util/pb_util.h"
 #include "kudu/util/status.h"
 
-DECLARE_int64(timeout_ms); // defined in ksck
-
 DEFINE_bool(force, false, "If true, allows the set_flag command to set a flag "
             "which is not explicitly marked as runtime-settable. Such flag "
             "changes may be simply ignored on the server, or may cause the "
@@ -63,6 +61,7 @@ DEFINE_string(print_entries, "decoded",
               "  true|1|yes|decoded = print them decoded\n"
               "  pb = print the raw protobuf\n"
               "  id = print only their ids");
+DEFINE_int64(timeout_ms, 1000 * 60, "RPC timeout in milliseconds");
 DEFINE_int32(truncate_data, 100,
              "Truncate the data fields to the given number of bytes "
              "before printing. Set to 0 to disable");

http://git-wip-us.apache.org/repos/asf/kudu/blob/4a6493d4/src/kudu/tools/tool_action_local_replica.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/tool_action_local_replica.cc b/src/kudu/tools/tool_action_local_replica.cc
index 431f7ad..21f0e23 100644
--- a/src/kudu/tools/tool_action_local_replica.cc
+++ b/src/kudu/tools/tool_action_local_replica.cc
@@ -50,10 +50,12 @@
 #include "kudu/rpc/messenger.h"
 #include "kudu/tablet/cfile_set.h"
 #include "kudu/tablet/deltafile.h"
+#include "kudu/tablet/metadata.pb.h"
 #include "kudu/tablet/rowset_metadata.h"
 #include "kudu/tablet/tablet.h"
 #include "kudu/tools/tool_action_common.h"
 #include "kudu/tserver/tablet_copy_client.h"
+#include "kudu/tserver/ts_tablet_manager.h"
 #include "kudu/tserver/tserver.pb.h"
 #include "kudu/util/env.h"
 #include "kudu/util/env_util.h"
@@ -75,6 +77,11 @@ DEFINE_bool(list_detail, false,
 DEFINE_int64(rowset_index, -1,
              "Index of the rowset in local replica, default value(-1) "
              "will dump all the rowsets of the local replica");
+DEFINE_bool(clean_unsafe, false,
+            "Delete the local replica completely, not leaving a tombstone. "
+            "This is not guaranteed to be safe because it also removes the "
+            "consensus metadata (including Raft voting record) for the "
+            "specified tablet, which violates the Raft vote durability requirements.");
 
 namespace kudu {
 namespace tools {
@@ -110,10 +117,10 @@ using tablet::DeltaKeyAndUpdate;
 using tablet::DeltaType;
 using tablet::MvccSnapshot;
 using tablet::RowSetMetadata;
-using tablet::Tablet;
 using tablet::TabletMetadata;
+using tablet::TabletDataState;
 using tserver::TabletCopyClient;
-using tserver::WriteRequestPB;
+using tserver::TSTabletManager;
 
 namespace {
 
@@ -257,6 +264,27 @@ Status CopyFromRemote(const RunnerContext& context) {
   return client.Finish();
 }
 
+Status DeleteLocalReplica(const RunnerContext& context) {
+  const string& tablet_id = FindOrDie(context.required_args, kTabletIdArg);
+  if (!FLAGS_clean_unsafe) {
+    return Status::NotSupported(Substitute("Deletion of local replica $0 is "
+        "currently not supported without --clean_unsafe flag", tablet_id));
+  }
+  FsManager fs_manager(Env::Default(), FsManagerOpts());
+  RETURN_NOT_OK(fs_manager.Open());
+
+  // This action cleans up metadata/data/WAL for a tablet on this node when
+  // the tablet server is offline. i.e, this tool is an alternative to
+  // actions like 'rm -rf <tablet-meta>' which could orphan the
+  // tablet data blocks with no means to clean them up later.
+  scoped_refptr<TabletMetadata> meta;
+  RETURN_NOT_OK(TabletMetadata::Load(&fs_manager, tablet_id, &meta));
+
+  RETURN_NOT_OK(TSTabletManager::DeleteTabletData(
+      meta, TabletDataState::TABLET_DATA_DELETED, boost::none));
+  return Status::OK();
+}
+
 Status DumpWals(const RunnerContext& context) {
   unique_ptr<FsManager> fs_manager;
   RETURN_NOT_OK(FsInit(&fs_manager));
@@ -712,10 +740,20 @@ unique_ptr<Mode> BuildLocalReplicaMode() {
       .AddOptionalParameter("list_detail")
       .Build();
 
+  unique_ptr<Action> delete_local_replica =
+      ActionBuilder("delete", &DeleteLocalReplica)
+      .Description("Delete Kudu replica from the local filesystem")
+      .AddRequiredParameter({ kTabletIdArg, kTabletIdArgDesc })
+      .AddOptionalParameter("fs_wal_dir")
+      .AddOptionalParameter("fs_data_dirs")
+      .AddOptionalParameter("clean_unsafe")
+      .Build();
+
   return ModeBuilder("local_replica")
       .Description("Operate on local Kudu replicas via the local filesystem")
       .AddMode(std::move(cmeta))
       .AddAction(std::move(copy_from_remote))
+      .AddAction(std::move(delete_local_replica))
       .AddAction(std::move(list))
       .AddMode(BuildDumpMode())
       .Build();

http://git-wip-us.apache.org/repos/asf/kudu/blob/4a6493d4/src/kudu/tools/tool_action_remote_replica.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/tool_action_remote_replica.cc b/src/kudu/tools/tool_action_remote_replica.cc
index 0e117a9..625afb0 100644
--- a/src/kudu/tools/tool_action_remote_replica.cc
+++ b/src/kudu/tools/tool_action_remote_replica.cc
@@ -29,6 +29,8 @@
 #include "kudu/client/row_result.h"
 #include "kudu/client/scan_batch.h"
 #include "kudu/client/scanner-internal.h"
+#include "kudu/consensus/consensus.pb.h"
+#include "kudu/consensus/consensus.proxy.h"
 #include "kudu/common/partition.h"
 #include "kudu/common/schema.h"
 #include "kudu/common/wire_protocol.h"
@@ -47,6 +49,8 @@
 #include "kudu/util/net/sockaddr.h"
 #include "kudu/util/status.h"
 
+DEFINE_bool(force_copy, false,
+            "Force the copy when the destination tablet server has this replica");
 DECLARE_int64(timeout_ms); // defined in ksck
 
 namespace kudu {
@@ -55,6 +59,9 @@ namespace tools {
 using client::KuduRowResult;
 using client::KuduScanBatch;
 using client::KuduSchema;
+using consensus::ConsensusServiceProxy;
+using consensus::StartTabletCopyRequestPB;
+using consensus::StartTabletCopyResponsePB;
 using rpc::RpcController;
 using server::ServerStatusPB;
 using std::cerr;
@@ -71,6 +78,8 @@ using tserver::ListTabletsResponsePB;
 using tserver::NewScanRequestPB;
 using tserver::ScanRequestPB;
 using tserver::ScanResponsePB;
+using tserver::TabletServer;
+using tserver::TabletServerErrorPB;
 using tserver::TabletServerAdminServiceProxy;
 using tserver::TabletServerServiceProxy;
 
@@ -140,6 +149,8 @@ const char* const kTServerAddressArg = "tserver_address";
 const char* const kTServerAddressDesc = "Address of a Kudu Tablet Server of "
     "form 'hostname:port'. Port may be omitted if the Tablet Server is bound "
     "to the default port.";
+const char* const kSrcAddressArg = "src_address";
+const char* const kDstAddressArg = "dst_address";
 
 Status GetReplicas(TabletServerServiceProxy* proxy,
                    vector<ListTabletsResponsePB::StatusAndSchemaPB>* replicas) {
@@ -280,6 +291,49 @@ Status ListReplicas(const RunnerContext& context) {
   return Status::OK();
 }
 
+Status CopyReplica(const RunnerContext& context) {
+  const string& src_address = FindOrDie(context.required_args, kSrcAddressArg);
+  const string& dst_address = FindOrDie(context.required_args, kDstAddressArg);
+  const string& tablet_id = FindOrDie(context.required_args, kTabletIdArg);
+
+  ServerStatusPB dst_status;
+  RETURN_NOT_OK(GetServerStatus(dst_address, TabletServer::kDefaultPort,
+                                &dst_status));
+  ServerStatusPB src_status;
+  RETURN_NOT_OK(GetServerStatus(src_address, TabletServer::kDefaultPort,
+                                &src_status));
+
+  unique_ptr<ConsensusServiceProxy> proxy;
+  RETURN_NOT_OK(BuildProxy(dst_address, TabletServer::kDefaultPort, &proxy));
+
+  StartTabletCopyRequestPB req;
+  StartTabletCopyResponsePB resp;
+  RpcController rpc;
+  req.set_dest_uuid(dst_status.node_instance().permanent_uuid());
+  req.set_tablet_id(tablet_id);
+  req.set_copy_peer_uuid(src_status.node_instance().permanent_uuid());
+  *req.mutable_copy_peer_addr() = src_status.bound_rpc_addresses(0);
+  // Provide a force option if the destination tablet server has the
+  // replica otherwise tablet-copy will fail.
+  if (FLAGS_force_copy) {
+    req.set_caller_term(std::numeric_limits<int64_t>::max());
+  }
+
+  LOG(INFO) << "Sending copy replica request:\n" << req.DebugString();
+  LOG(WARNING) << "NOTE: this copy may happen asynchronously "
+               << "and may timeout if the tablet size is large. Watch the logs on "
+               << "the target tablet server for indication of progress.";
+
+  RETURN_NOT_OK(proxy->StartTabletCopy(req, &resp, &rpc));
+  if (resp.has_error()) {
+    RETURN_NOT_OK_PREPEND(
+        StatusFromPB(resp.error().status()),
+        strings::Substitute("Remote server returned error code $0",
+                            TabletServerErrorPB::Code_Name(resp.error().code())));
+  }
+  return Status::OK();
+}
+
 } // anonymous namespace
 
 unique_ptr<Mode> BuildRemoteReplicaMode() {
@@ -289,6 +343,14 @@ unique_ptr<Mode> BuildRemoteReplicaMode() {
       .AddRequiredParameter({ kTServerAddressArg, kTServerAddressDesc })
       .Build();
 
+  unique_ptr<Action> copy_replica =
+      ActionBuilder("copy", &CopyReplica)
+      .Description("Copy a replica from one Kudu Tablet Server to another")
+      .AddRequiredParameter({ kTabletIdArg, kTabletIdArgDesc })
+      .AddRequiredParameter({ kSrcAddressArg, kTServerAddressDesc })
+      .AddRequiredParameter({ kDstAddressArg, kTServerAddressDesc })
+      .Build();
+
   unique_ptr<Action> delete_replica =
       ActionBuilder("delete", &DeleteReplica)
       .Description("Delete a replica from a Kudu Tablet Server")
@@ -313,6 +375,7 @@ unique_ptr<Mode> BuildRemoteReplicaMode() {
   return ModeBuilder("remote_replica")
       .Description("Operate on replicas on a Kudu Tablet Server")
       .AddAction(std::move(check_replicas))
+      .AddAction(std::move(copy_replica))
       .AddAction(std::move(delete_replica))
       .AddAction(std::move(dump_replica))
       .AddAction(std::move(list))

http://git-wip-us.apache.org/repos/asf/kudu/blob/4a6493d4/src/kudu/tserver/ts_tablet_manager.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tserver/ts_tablet_manager.cc b/src/kudu/tserver/ts_tablet_manager.cc
index bcce21c..fc637e7 100644
--- a/src/kudu/tserver/ts_tablet_manager.cc
+++ b/src/kudu/tserver/ts_tablet_manager.cc
@@ -559,8 +559,9 @@ Status TSTabletManager::DeleteTablet(
   return Status::OK();
 }
 
-string TSTabletManager::LogPrefix(const string& tablet_id) const {
-  return "T " + tablet_id + " P " + fs_manager_->uuid() + ": ";
+string TSTabletManager::LogPrefix(const string& tablet_id, FsManager *fs_manager) {
+  DCHECK(fs_manager != nullptr);
+  return Substitute("T $0 P $1: ", tablet_id, fs_manager->uuid());
 }
 
 Status TSTabletManager::CheckRunningUnlocked(
@@ -920,7 +921,8 @@ Status TSTabletManager::DeleteTabletData(const scoped_refptr<TabletMetadata>&
me
                                          TabletDataState data_state,
                                          const boost::optional<OpId>& last_logged_opid)
{
   const string& tablet_id = meta->tablet_id();
-  LOG(INFO) << LogPrefix(tablet_id) << "Deleting tablet data with delete state
"
+  LOG(INFO) << LogPrefix(tablet_id, meta->fs_manager())
+            << "Deleting tablet data with delete state "
             << TabletDataState_Name(data_state);
   CHECK(data_state == TABLET_DATA_DELETED ||
         data_state == TABLET_DATA_TOMBSTONED)
@@ -930,7 +932,8 @@ Status TSTabletManager::DeleteTabletData(const scoped_refptr<TabletMetadata>&
me
   // Note: Passing an unset 'last_logged_opid' will retain the last_logged_opid
   // that was previously in the metadata.
   RETURN_NOT_OK(meta->DeleteTabletData(data_state, last_logged_opid));
-  LOG(INFO) << LogPrefix(tablet_id) << "Tablet deleted. Last logged OpId: "
+  LOG(INFO) << LogPrefix(tablet_id, meta->fs_manager())
+            << "Tablet deleted. Last logged OpId: "
             << meta->tombstone_last_logged_opid();
   MAYBE_FAULT(FLAGS_fault_crash_after_blocks_deleted);
 
@@ -953,14 +956,13 @@ void TSTabletManager::LogAndTombstone(const scoped_refptr<TabletPeer>&
peer,
                                       const std::string& msg,
                                       const Status& s) {
   const string& tablet_id = peer->tablet_id();
-  const string kLogPrefix = "T " + tablet_id + " P " + fs_manager_->uuid() + ": ";
-  LOG(WARNING) << kLogPrefix << msg << ": " << s.ToString();
+  LOG(WARNING) << LogPrefix(tablet_id) << msg << ": " << s.ToString();
 
   Status delete_status = DeleteTabletData(
       peer->tablet_metadata(), TABLET_DATA_TOMBSTONED, boost::optional<OpId>());
   if (PREDICT_FALSE(!delete_status.ok())) {
     // This failure should only either indicate a bug or an IO error.
-    LOG(FATAL) << kLogPrefix << "Failed to tombstone tablet after tablet copy:
"
+    LOG(FATAL) << LogPrefix(tablet_id) << "Failed to tombstone tablet after tablet
copy: "
                << delete_status.ToString();
   }
   peer->StatusMessage(Substitute("Tombstoned tablet: $0 ($1)", msg, s.ToString()));

http://git-wip-us.apache.org/repos/asf/kudu/blob/4a6493d4/src/kudu/tserver/ts_tablet_manager.h
----------------------------------------------------------------------
diff --git a/src/kudu/tserver/ts_tablet_manager.h b/src/kudu/tserver/ts_tablet_manager.h
index dcdc76e..b09bfe1 100644
--- a/src/kudu/tserver/ts_tablet_manager.h
+++ b/src/kudu/tserver/ts_tablet_manager.h
@@ -175,6 +175,11 @@ class TSTabletManager : public tserver::TabletPeerLookupIf {
 
   Status RunAllLogGC();
 
+  // Delete the tablet using the specified delete_type as the final metadata
+  // state. Deletes the on-disk data, metadata, as well as all WAL segments.
+  static Status DeleteTabletData(const scoped_refptr<tablet::TabletMetadata>& meta,
+                                 tablet::TabletDataState delete_type,
+                                 const boost::optional<consensus::OpId>& last_logged_opid);
  private:
   FRIEND_TEST(TsTabletManagerTest, TestPersistBlocks);
 
@@ -185,7 +190,10 @@ class TSTabletManager : public tserver::TabletPeerLookupIf {
   };
 
   // Standard log prefix, given a tablet id.
-  std::string LogPrefix(const std::string& tablet_id) const;
+  static std::string LogPrefix(const string& tablet_id, FsManager *fs_manager);
+  std::string LogPrefix(const std::string& tablet_id) const {
+    return LogPrefix(tablet_id, fs_manager_);
+  }
 
   // Returns Status::OK() iff state_ == MANAGER_RUNNING.
   Status CheckRunningUnlocked(boost::optional<TabletServerErrorPB::Code>* error_code)
const;
@@ -252,12 +260,6 @@ class TSTabletManager : public tserver::TabletPeerLookupIf {
   // TABLET_DATA_READY state. Generally, we tombstone the replica.
   Status HandleNonReadyTabletOnStartup(const scoped_refptr<tablet::TabletMetadata>&
meta);
 
-  // Delete the tablet using the specified delete_type as the final metadata
-  // state. Deletes the on-disk data, as well as all WAL segments.
-  Status DeleteTabletData(const scoped_refptr<tablet::TabletMetadata>& meta,
-                          tablet::TabletDataState delete_type,
-                          const boost::optional<consensus::OpId>& last_logged_opid);
-
   // Return Status::IllegalState if leader_term < last_logged_term.
   // Helper function for use with tablet copy.
   Status CheckLeaderTermNotLower(const std::string& tablet_id,


Mime
View raw message