kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From t...@apache.org
Subject kudu git commit: KUDU-1125 (part 1) catalog_manager: try to avoid unnecessarily rewriting tablet info
Date Wed, 31 May 2017 18:30:31 GMT
Repository: kudu
Updated Branches:
  refs/heads/master f5e0024e0 -> 3b6ab6449


KUDU-1125 (part 1) catalog_manager: try to avoid unnecessarily rewriting tablet info

This adds some checks in SysCatalogTable so that, if the updated version
of a catalog table entry is identical to the previous version, we avoid
writing anything to the table.

This is a simple way of short-circuiting a common case of unnecessary
slowness in tablet report processing. For example, when the master
restarts or fails over, all of the tablet servers perform a full tablet
report. However, most of the tablets will not have changed any
information since their prior report, in which case the writes can be
safely skipped on the master.

This doesn't achieve all of the goals of KUDU-1125 (we still do separate
sync writes for each _changed_ reported tablet) but should still be a
substantial reduction. Perhaps most importantly, if a heartbeat from a
tserver times out due to long processing, the retry from the TS should
hit this code path and therefore be processed quickly.

Change-Id: I0de7189b8f1dbeea55172929396b73fd92fd62ba
Reviewed-on: http://gerrit.cloudera.org:8080/6916
Reviewed-by: Adar Dembo <adar@cloudera.com>
Tested-by: Kudu Jenkins


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/3b6ab644
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/3b6ab644
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/3b6ab644

Branch: refs/heads/master
Commit: 3b6ab64497bb091d0a94d6085c4fbd68da5cecda
Parents: f5e0024
Author: Todd Lipcon <todd@apache.org>
Authored: Wed May 17 18:51:22 2017 -0700
Committer: Todd Lipcon <todd@apache.org>
Committed: Wed May 31 18:24:50 2017 +0000

----------------------------------------------------------------------
 src/kudu/integration-tests/registration-test.cc | 47 +++++++++++++++---
 src/kudu/master/catalog_manager.cc              |  7 ++-
 src/kudu/master/master-test-util.h              |  8 ++--
 src/kudu/master/sys_catalog.cc                  | 50 ++++++++++++++++++++
 src/kudu/master/sys_catalog.h                   | 11 +++--
 5 files changed, 106 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/3b6ab644/src/kudu/integration-tests/registration-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/registration-test.cc b/src/kudu/integration-tests/registration-test.cc
index 1290710..6492adf 100644
--- a/src/kudu/integration-tests/registration-test.cc
+++ b/src/kudu/integration-tests/registration-test.cc
@@ -30,20 +30,26 @@
 #include "kudu/master/master.h"
 #include "kudu/master/master.pb.h"
 #include "kudu/master/mini_master.h"
+#include "kudu/master/sys_catalog.h"
 #include "kudu/master/ts_descriptor.h"
 #include "kudu/security/test/test_certs.h"
 #include "kudu/security/tls_context.h"
 #include "kudu/security/token_verifier.h"
+#include "kudu/tablet/tablet.h"
+#include "kudu/tablet/tablet_replica.h"
 #include "kudu/tserver/mini_tablet_server.h"
 #include "kudu/tserver/tablet_server.h"
 #include "kudu/util/curl_util.h"
 #include "kudu/util/faststring.h"
+#include "kudu/util/metrics.h"
 #include "kudu/util/pb_util.h"
 #include "kudu/util/stopwatch.h"
 #include "kudu/util/test_util.h"
 #include "kudu/util/version_info.h"
 
 DECLARE_int32(heartbeat_interval_ms);
+METRIC_DECLARE_counter(rows_inserted);
+METRIC_DECLARE_counter(rows_updated);
 
 namespace kudu {
 
@@ -186,16 +192,29 @@ TEST_F(RegistrationTest, TestTabletReports) {
   MiniTabletServer* ts = cluster_->mini_tablet_server(0);
   string ts_root = cluster_->GetTabletServerFsRoot(0);
 
-  // Add a tablet, make sure it reports itself.
-  CreateTabletForTesting(cluster_->mini_master(), "fake-table", schema_, &tablet_id_1);
+  auto GetCatalogMetric = [&](CounterPrototype& prototype) {
+    auto metrics = cluster_->mini_master()->master()->catalog_manager()->sys_catalog()
+        ->tablet_replica()->tablet()->GetMetricEntity();
+    return prototype.Instantiate(metrics)->value();
+  };
+  int startup_rows_inserted = GetCatalogMetric(METRIC_rows_inserted);
+
+  // Add a table, make sure it reports itself.
+  CreateTableForTesting(cluster_->mini_master(), "fake-table", schema_, &tablet_id_1);
 
   TabletLocationsPB locs;
   ASSERT_OK(WaitForReplicaCount(tablet_id_1, 1, &locs));
   ASSERT_EQ(1, locs.replicas_size());
   LOG(INFO) << "Tablet successfully reported on " << locs.replicas(0).ts_info().permanent_uuid();
 
-  // Add another tablet, make sure it is reported via incremental.
-  CreateTabletForTesting(cluster_->mini_master(), "fake-table2", schema_, &tablet_id_2);
+  // Check that we inserted the right number of rows for the new single-tablet table
+  // (one for the table, one for the tablet).
+  int post_create_rows_inserted = GetCatalogMetric(METRIC_rows_inserted);
+  EXPECT_EQ(2, post_create_rows_inserted - startup_rows_inserted)
+      << "Should have inserted one row each for the table and tablet";
+
+  // Add another table, make sure it is reported via incremental.
+  CreateTableForTesting(cluster_->mini_master(), "fake-table2", schema_, &tablet_id_2);
   ASSERT_OK(WaitForReplicaCount(tablet_id_2, 1, &locs));
 
   // Shut down the whole system, bring it back up, and make sure the tablets
@@ -208,9 +227,23 @@ TEST_F(RegistrationTest, TestTabletReports) {
   ASSERT_OK(WaitForReplicaCount(tablet_id_1, 1, &locs));
   ASSERT_OK(WaitForReplicaCount(tablet_id_2, 1, &locs));
 
-  // TODO: KUDU-870: once the master supports detecting failed/lost replicas,
-  // we should add a test case here which removes or corrupts metadata, restarts
-  // the TS, and verifies that the master notices the issue.
+  SleepFor(MonoDelta::FromSeconds(1));
+
+  // After restart, check that the tablet reports produced the expected number of
+  // writes to the catalog table:
+  // - No inserts, because there are no new tablets.
+  // - Two updates, since both replicas should have increased their term on restart.
+  EXPECT_EQ(0, GetCatalogMetric(METRIC_rows_inserted));
+  EXPECT_EQ(2, GetCatalogMetric(METRIC_rows_updated));
+
+  // If we restart just the master, it should not write any data to the catalog, since the
+  // tablets themselves are not changing term, etc.
+  cluster_->mini_master()->Shutdown();
+  ASSERT_OK(cluster_->mini_master()->Restart());
+  // Sleep for a second to make sure the TS has plenty of time to re-heartbeat.
+  SleepFor(MonoDelta::FromSeconds(1));
+  EXPECT_EQ(0, GetCatalogMetric(METRIC_rows_inserted));
+  EXPECT_EQ(0, GetCatalogMetric(METRIC_rows_updated));
 }
 
 // Check that after the tablet server registers, it gets a signed cert

http://git-wip-us.apache.org/repos/asf/kudu/blob/3b6ab644/src/kudu/master/catalog_manager.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/catalog_manager.cc b/src/kudu/master/catalog_manager.cc
index e61d167..c67b423 100644
--- a/src/kudu/master/catalog_manager.cc
+++ b/src/kudu/master/catalog_manager.cc
@@ -2581,8 +2581,11 @@ Status CatalogManager::HandleReportedTablet(TSDescriptor* ts_desc,
   }
 
   table_lock.Unlock();
-  // We update the tablets each time that someone reports it.
-  // This shouldn't be very frequent and should only happen when something in fact changed.
+
+  // We update the tablets each time they are reported.
+  // SysCatalogTable::Write will short-circuit the case where the data
+  // has not in fact changed since the previous version and avoid any
+  // unnecessary operations.
   SysCatalogTable::Actions actions;
   actions.tablets_to_update.push_back(tablet.get());
   Status s = sys_catalog_->Write(actions);

http://git-wip-us.apache.org/repos/asf/kudu/blob/3b6ab644/src/kudu/master/master-test-util.h
----------------------------------------------------------------------
diff --git a/src/kudu/master/master-test-util.h b/src/kudu/master/master-test-util.h
index dc8b0eb..44a2874 100644
--- a/src/kudu/master/master-test-util.h
+++ b/src/kudu/master/master-test-util.h
@@ -68,10 +68,10 @@ Status WaitForRunningTabletCount(MiniMaster* mini_master,
   return Status::RuntimeError("Unreachable statement"); // Suppress compiler warnings.
 }
 
-void CreateTabletForTesting(MiniMaster* mini_master,
-                            const string& table_name,
-                            const Schema& schema,
-                            string *tablet_id) {
+void CreateTableForTesting(MiniMaster* mini_master,
+                           const string& table_name,
+                           const Schema& schema,
+                           string *tablet_id) {
   {
     CreateTableRequestPB req;
     CreateTableResponsePB resp;

http://git-wip-us.apache.org/repos/asf/kudu/blob/3b6ab644/src/kudu/master/sys_catalog.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/sys_catalog.cc b/src/kudu/master/sys_catalog.cc
index 3aa736c..3d75e98 100644
--- a/src/kudu/master/sys_catalog.cc
+++ b/src/kudu/master/sys_catalog.cc
@@ -27,6 +27,7 @@
 
 #include <gflags/gflags.h>
 #include <glog/logging.h>
+#include <google/protobuf/util/message_differencer.h>
 
 #include "kudu/common/key_encoder.h"
 #include "kudu/common/partial_row.h"
@@ -78,6 +79,7 @@ using kudu::tserver::WriteResponsePB;
 using std::function;
 using std::shared_ptr;
 using std::unique_ptr;
+using std::string;
 using std::vector;
 using strings::Substitute;
 
@@ -95,6 +97,26 @@ const char* const SysCatalogTable::kSysCertAuthorityEntryId =
 const char* const SysCatalogTable::kInjectedFailureStatusMsg =
     "INJECTED FAILURE";
 
+
+namespace {
+
+// Return true if the two PBs are equal.
+//
+// If 'diff_str' is not null, stores a textual description of the
+// difference.
+bool ArePBsEqual(const google::protobuf::Message& prev_pb,
+                 const google::protobuf::Message& new_pb,
+                 string* diff_str) {
+  google::protobuf::util::MessageDifferencer md;
+  if (diff_str) {
+    md.ReportDifferencesToString(diff_str);
+  }
+  return md.Compare(prev_pb, new_pb);
+}
+
+} // anonymous namespace
+
+
 SysCatalogTable::SysCatalogTable(Master* master, MetricRegistry* metrics,
                                  ElectedLeaderCallback leader_cb)
     : metric_registry_(metrics),
@@ -434,6 +456,11 @@ Status SysCatalogTable::Write(const Actions& actions) {
   ReqUpdateTablets(&req, actions.tablets_to_update);
   ReqDeleteTablets(&req, actions.tablets_to_delete);
 
+  if (req.row_operations().rows().empty()) {
+    // No actual changes were written (i.e the data to be updated matched the
+    // previous version of the data).
+    return Status::OK();
+  }
   RETURN_NOT_OK(SyncWrite(&req, &resp));
   return Status::OK();
 }
@@ -443,6 +470,9 @@ Status SysCatalogTable::Write(const Actions& actions) {
 // ==================================================================
 
 void SysCatalogTable::ReqAddTable(WriteRequestPB* req, const TableInfo* table) {
+  VLOG(2) << "Adding table " << table->id() << " in catalog: " <<
+      SecureShortDebugString(table->metadata().dirty().pb);
+
   faststring metadata_buf;
   pb_util::SerializeToString(table->metadata().dirty().pb, &metadata_buf);
 
@@ -455,6 +485,15 @@ void SysCatalogTable::ReqAddTable(WriteRequestPB* req, const TableInfo*
table) {
 }
 
 void SysCatalogTable::ReqUpdateTable(WriteRequestPB* req, const TableInfo* table) {
+  string diff;
+  if (ArePBsEqual(table->metadata().state().pb,
+                  table->metadata().dirty().pb,
+                  VLOG_IS_ON(2) ? &diff : nullptr)) {
+    // Short-circuit empty updates.
+    return;
+  }
+  VLOG(2) << "Updating table " << table->id() << " in catalog: " <<
diff;
+
   faststring metadata_buf;
   pb_util::SerializeToString(table->metadata().dirty().pb, &metadata_buf);
 
@@ -653,6 +692,8 @@ void SysCatalogTable::ReqAddTablets(WriteRequestPB* req,
   KuduPartialRow row(&schema_);
   RowOperationsPBEncoder enc(req->mutable_row_operations());
   for (auto tablet : tablets) {
+    VLOG(2) << "Adding tablet " << tablet->tablet_id() << " in catalog:
"
+            << SecureShortDebugString(tablet->metadata().dirty().pb);
     pb_util::SerializeToString(tablet->metadata().dirty().pb, &metadata_buf);
     CHECK_OK(row.SetInt8(kSysCatalogTableColType, TABLETS_ENTRY));
     CHECK_OK(row.SetStringNoCopy(kSysCatalogTableColId, tablet->tablet_id()));
@@ -667,6 +708,15 @@ void SysCatalogTable::ReqUpdateTablets(WriteRequestPB* req,
   KuduPartialRow row(&schema_);
   RowOperationsPBEncoder enc(req->mutable_row_operations());
   for (auto tablet : tablets) {
+    string diff;
+    if (ArePBsEqual(tablet->metadata().state().pb,
+                    tablet->metadata().dirty().pb,
+                    VLOG_IS_ON(2) ? &diff : nullptr)) {
+      // Short-circuit empty updates.
+      continue;
+    }
+    VLOG(2) << "Updating tablet " << tablet->tablet_id() << " in catalog:
"
+            << diff;
     pb_util::SerializeToString(tablet->metadata().dirty().pb, &metadata_buf);
     CHECK_OK(row.SetInt8(kSysCatalogTableColType, TABLETS_ENTRY));
     CHECK_OK(row.SetStringNoCopy(kSysCatalogTableColId, tablet->tablet_id()));

http://git-wip-us.apache.org/repos/asf/kudu/blob/3b6ab644/src/kudu/master/sys_catalog.h
----------------------------------------------------------------------
diff --git a/src/kudu/master/sys_catalog.h b/src/kudu/master/sys_catalog.h
index 4197288..1f306b6 100644
--- a/src/kudu/master/sys_catalog.h
+++ b/src/kudu/master/sys_catalog.h
@@ -158,6 +158,13 @@ class SysCatalogTable {
   // (as in 'entry_id' column) from the system table.
   Status RemoveTskEntries(const std::set<std::string>& entry_ids);
 
+  // Return the underlying TabletReplica instance hosting the metadata.
+  // This should be used with caution -- typically the various methods
+  // above should be used rather than directly accessing the replica.
+  const scoped_refptr<tablet::TabletReplica>& tablet_replica() const {
+    return tablet_replica_;
+  }
+
  private:
   FRIEND_TEST(MasterTest, TestMasterMetadataConsistentDespiteFailures);
   DISALLOW_COPY_AND_ASSIGN(SysCatalogTable);
@@ -183,10 +190,6 @@ class SysCatalogTable {
   Status CreateDistributedConfig(const MasterOptions& options,
                                  consensus::RaftConfigPB* committed_config);
 
-  const scoped_refptr<tablet::TabletReplica>& tablet_replica() const {
-    return tablet_replica_;
-  }
-
   std::string tablet_id() const {
     return tablet_replica_->tablet_id();
   }


Mime
View raw message