kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From laiyingc...@apache.org
Subject [kudu] 12/23: [collector] collect server entity metrics
Date Sat, 15 Aug 2020 14:59:40 GMT
This is an automated email from the ASF dual-hosted git repository.

laiyingchun pushed a commit to tag kudu-1.12.0-mdh1.0.0-4c2c075-centos-release
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 66681c688240449ccbe95e2af3519803070f0e84
Author: Yingchun Lai <405403881@qq.com>
AuthorDate: Tue Dec 31 18:27:49 2019 +0800

    [collector] collect server entity metrics
    
    1. Cherry-pick a commit from community master branch to fix bugs when
       metrics do merge.
    2. Collector support to collect 'server' type entity metrics on
       tserver and master.
    3. Collector support to collect catalog table's metrics on master.
    4. Associate scripts to create or update falcon screens.
---
 src/kudu/collector/cluster_rebalancer.cc     |   4 +-
 src/kudu/collector/collector-test.cc         |  46 ++-
 src/kudu/collector/collector.cc              |  52 +--
 src/kudu/collector/collector.h               |   2 +-
 src/kudu/collector/collector_util.cc         |   2 +-
 src/kudu/collector/falcon_reporter-test.cc   |   2 +-
 src/kudu/collector/falcon_reporter.cc        |  26 +-
 src/kudu/collector/metrics_collector-test.cc | 452 +++++++++++----------------
 src/kudu/collector/metrics_collector.cc      | 279 +++++++++++------
 src/kudu/collector/metrics_collector.h       |  54 +++-
 src/kudu/collector/nodes_checker.cc          |  19 +-
 src/kudu/collector/nodes_checker.h           |   4 +-
 src/kudu/scripts/build_kudu.sh               |  33 +-
 src/kudu/scripts/falcon_screen.json          | 348 ++++++++++++++++++++-
 src/kudu/scripts/falcon_screen.py            |  43 ++-
 src/kudu/scripts/kudu_falcon_screen.sh       |  49 +--
 src/kudu/util/metrics.h                      |   1 +
 17 files changed, 918 insertions(+), 498 deletions(-)

diff --git a/src/kudu/collector/cluster_rebalancer.cc b/src/kudu/collector/cluster_rebalancer.cc
index 0015544..b46baa4 100644
--- a/src/kudu/collector/cluster_rebalancer.cc
+++ b/src/kudu/collector/cluster_rebalancer.cc
@@ -17,8 +17,8 @@
 
 #include "kudu/collector/cluster_rebalancer.h"
 
-#include <stdio.h>
-#include <time.h>
+#include <cstdio>
+#include <ctime>
 
 #include <ostream>
 #include <vector>
diff --git a/src/kudu/collector/collector-test.cc b/src/kudu/collector/collector-test.cc
index e8e1298..76e2a2f 100644
--- a/src/kudu/collector/collector-test.cc
+++ b/src/kudu/collector/collector-test.cc
@@ -17,27 +17,49 @@
 
 #include "kudu/collector/collector.h"
 
+#include <cstdint>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <gflags/gflags_declare.h>
 #include <gtest/gtest.h>
 
 #include "kudu/util/status.h"
 #include "kudu/util/test_macros.h"
 
+DECLARE_string(collector_cluster_name);
+DECLARE_string(collector_master_addrs);
+DECLARE_uint32(collector_interval_sec);
+DECLARE_uint32(collector_timeout_sec);
+
+using std::vector;
+
 namespace kudu {
 namespace collector {
 
 TEST(TestCollector, TestValidateIntervalAndTimeout) {
-  // 'interval' in error range.
-  ASSERT_TRUE(Collector::ValidateIntervalAndTimeout(9, 1).IsInvalidArgument());
-  ASSERT_TRUE(Collector::ValidateIntervalAndTimeout(61, 1).IsInvalidArgument());
-
-  // 'timeout' in error range.
-  ASSERT_TRUE(Collector::ValidateIntervalAndTimeout(10, 0).IsInvalidArgument());
-  ASSERT_TRUE(Collector::ValidateIntervalAndTimeout(10, 10).IsInvalidArgument());
-
-  // Both 'interval' and 'timeout' are in valid range.
-  ASSERT_OK(Collector::ValidateIntervalAndTimeout(10, 9));
-  ASSERT_OK(Collector::ValidateIntervalAndTimeout(60, 9));
-  ASSERT_OK(Collector::ValidateIntervalAndTimeout(60, 59));
+  FLAGS_collector_cluster_name = "test";
+  FLAGS_collector_master_addrs = "127.0.0.1:1234";
+  vector<std::pair<uint32_t, uint32_t>> invalid_arguments({{9, 1},
+                                                           {61, 1},
+                                                           {10, 0},
+                                                           {10, 10}});
+  for (const auto& arguments : invalid_arguments) {
+    FLAGS_collector_interval_sec = arguments.first;
+    FLAGS_collector_timeout_sec = arguments.second;
+    ASSERT_TRUE(Collector::ValidateFlags().IsInvalidArgument())
+        << FLAGS_collector_interval_sec << ", " << FLAGS_collector_timeout_sec;
+  }
+
+  vector<std::pair<uint32_t, uint32_t>> valid_arguments({{10, 9},
+                                                         {60, 9},
+                                                         {60, 59}});
+  for (const auto& arguments : valid_arguments) {
+    FLAGS_collector_interval_sec = arguments.first;
+    FLAGS_collector_timeout_sec = arguments.second;
+    ASSERT_OK(Collector::ValidateFlags());
+  }
 }
 }  // namespace collector
 }  // namespace kudu
diff --git a/src/kudu/collector/collector.cc b/src/kudu/collector/collector.cc
index 1c930e9..59f36df 100644
--- a/src/kudu/collector/collector.cc
+++ b/src/kudu/collector/collector.cc
@@ -42,15 +42,15 @@ DEFINE_string(collector_cluster_name, "",
 DEFINE_string(collector_master_addrs, "",
               "Comma-separated list of Kudu master addresses where each address is of "
               "form 'hostname:port");
-DEFINE_int32(collector_interval_sec, 60,
-             "Number of interval seconds to collect metrics");
-DEFINE_string(collector_report_method, "",
-              "Which monitor system the metrics reported to. Now supported system: falcon");
-DEFINE_int32(collector_timeout_sec, 10,
-             "Number of seconds to wait for a master, tserver, or CLI tool to return metrics");
-DEFINE_int32(collector_warn_threshold_ms, 1000,
-             "If a task takes more than this number of milliseconds, issue a warning with a "
-             "trace.");
+DEFINE_uint32(collector_interval_sec, 60,
+              "Number of interval seconds to collect metrics");
+DEFINE_string(collector_report_method, "falcon",
+              "Which monitor system the metrics reported to. Now supported system: local, falcon");
+DEFINE_uint32(collector_timeout_sec, 10,
+              "Number of seconds to wait for a master, tserver, or CLI tool to return metrics");
+DEFINE_uint32(collector_warn_threshold_ms, 1000,
+              "If a task takes more than this number of milliseconds, issue a warning with a "
+              "trace.");
 
 DECLARE_string(principal);
 DECLARE_string(keytab_file);
@@ -73,8 +73,7 @@ Collector::~Collector() {
 Status Collector::Init() {
   CHECK(!initialized_);
 
-  RETURN_NOT_OK(ValidateIntervalAndTimeout(FLAGS_collector_interval_sec,
-                                           FLAGS_collector_timeout_sec));
+  RETURN_NOT_OK(ValidateFlags());
   RETURN_NOT_OK(security::InitKerberosForServer(FLAGS_principal, FLAGS_keytab_file));
 
   if (FLAGS_collector_report_method == "falcon") {
@@ -82,9 +81,9 @@ Status Collector::Init() {
   } else if (FLAGS_collector_report_method == "local") {
     reporter_.reset(new LocalReporter());
   } else {
-    LOG(FATAL) << Substitute("Unsupported FLAGS_collector_report_method $0",
-                             FLAGS_collector_report_method);
+    __builtin_unreachable();
   }
+
   CHECK_OK(reporter_->Init());
   nodes_checker_.reset(new NodesChecker(reporter_));
   CHECK_OK(nodes_checker_->Init());
@@ -156,15 +155,28 @@ void Collector::ExcessLogFileDeleterThread() {
   }
 }
 
-Status Collector::ValidateIntervalAndTimeout(int interval, int timeout) {
-  if (10 <= interval && interval <= 60 &&
-      0 < timeout && timeout < interval) {
-    return Status::OK();
+Status Collector::ValidateFlags() {
+  if (FLAGS_collector_interval_sec < 10 ||
+      FLAGS_collector_interval_sec > 60 ||
+      FLAGS_collector_timeout_sec < 1 ||
+      FLAGS_collector_timeout_sec >= FLAGS_collector_interval_sec) {
+    return Status::InvalidArgument("--collector_interval_sec should in range [10, 60], and "
+                                   "--collector_timeout_sec should in range "
+                                   "(0, collector_interval_sec)");
+  }
+
+  if (FLAGS_collector_report_method != "local" &&
+      FLAGS_collector_report_method != "falcon") {
+    return Status::InvalidArgument("--collector_report_method only support 'local' and 'falcon'.");
   }
 
-  return Status::InvalidArgument(
-      Substitute("Invalid interval '$0'(should in range [10, 60]), "
-                 "or invalid timeout '$1'(should in range (0, interval))", interval, timeout));
+  if (FLAGS_collector_cluster_name.empty() ||
+      FLAGS_collector_master_addrs.empty()) {
+    return Status::InvalidArgument("--collector_cluster_name and --collector_master_addrs should "
+                                   "not be empty.");
+  }
+
+  return Status::OK();
 }
 } // namespace collector
 } // namespace kudu
diff --git a/src/kudu/collector/collector.h b/src/kudu/collector/collector.h
index 8e4e236..e135a3c 100644
--- a/src/kudu/collector/collector.h
+++ b/src/kudu/collector/collector.h
@@ -56,7 +56,7 @@ class Collector {
   Status StartExcessLogFileDeleterThread();
   void ExcessLogFileDeleterThread();
 
-  static Status ValidateIntervalAndTimeout(int interval, int timeout);
+  static Status ValidateFlags();
 
   bool initialized_;
 
diff --git a/src/kudu/collector/collector_util.cc b/src/kudu/collector/collector_util.cc
index aa79c40..ffbf0e1 100644
--- a/src/kudu/collector/collector_util.cc
+++ b/src/kudu/collector/collector_util.cc
@@ -19,7 +19,7 @@
 
 #include "kudu/collector/collector_util.h"
 
-#include <stddef.h>
+#include <cstddef>
 
 #include <gflags/gflags_declare.h>
 
diff --git a/src/kudu/collector/falcon_reporter-test.cc b/src/kudu/collector/falcon_reporter-test.cc
index 85810c7..3699b09 100644
--- a/src/kudu/collector/falcon_reporter-test.cc
+++ b/src/kudu/collector/falcon_reporter-test.cc
@@ -31,7 +31,7 @@
 
 DECLARE_string(collector_cluster_name);
 DECLARE_int32(collector_falcon_metrics_version);
-DECLARE_int32(collector_interval_sec);
+DECLARE_uint32(collector_interval_sec);
 
 using std::list;
 using std::string;
diff --git a/src/kudu/collector/falcon_reporter.cc b/src/kudu/collector/falcon_reporter.cc
index e492fd7..efe387b 100644
--- a/src/kudu/collector/falcon_reporter.cc
+++ b/src/kudu/collector/falcon_reporter.cc
@@ -17,10 +17,8 @@
 
 #include "kudu/collector/falcon_reporter.h"
 
-#include <kudu/util/curl_util.h>
-#include <stddef.h>
-
 #include <algorithm>
+#include <cstddef>
 #include <functional>
 #include <iterator>
 #include <mutex>
@@ -32,6 +30,7 @@
 #include <glog/logging.h>
 
 #include "kudu/gutil/strings/substitute.h"
+#include "kudu/util/curl_util.h"
 #include "kudu/util/debug/trace_event.h"
 #include "kudu/util/faststring.h"
 #include "kudu/util/jsonwriter.h"
@@ -49,17 +48,17 @@ DEFINE_string(collector_falcon_agent, "http://127.0.0.1:1988/v1/push",
 DEFINE_int32(collector_falcon_metrics_version, 4,
              "Version of metrics pushed to falcon, it will be tagged in "
              "'tag' section of an item");
-DEFINE_int32(collector_falcon_pusher_count, 4,
-             "Thread count to push collected items to falcon agent");
-DEFINE_int32(collector_report_batch_size, 1000,
-            "Count of items will be pushed to falcon agent by batch");
-DEFINE_int32(collector_push_timeout_ms, 20,
-             "Timeout for pushing items to falcon agent");
+DEFINE_uint32(collector_falcon_pusher_count, 4,
+              "Thread count to push collected items to falcon agent");
+DEFINE_uint32(collector_report_batch_size, 1000,
+             "Count of items will be pushed to falcon agent by batch");
+DEFINE_uint32(collector_push_timeout_ms, 20,
+              "Timeout for pushing items to falcon agent");
 
 DECLARE_string(collector_cluster_name);
-DECLARE_int32(collector_interval_sec);
-DECLARE_int32(collector_timeout_sec);
-DECLARE_int32(collector_warn_threshold_ms);
+DECLARE_uint32(collector_interval_sec);
+DECLARE_uint32(collector_timeout_sec);
+DECLARE_uint32(collector_warn_threshold_ms);
 
 using std::list;
 using std::string;
@@ -214,7 +213,8 @@ Status FalconReporter::PushToAgent(list<scoped_refptr<ItemBase>> falcon_items) {
   EasyCurl curl;
   faststring dst;
   curl.set_timeout(MonoDelta::FromMilliseconds(FLAGS_collector_push_timeout_ms));
-  RETURN_NOT_OK(curl.PostToURL(FLAGS_collector_falcon_agent, data, &dst));
+  RETURN_NOT_OK_PREPEND(curl.PostToURL(FLAGS_collector_falcon_agent, data, &dst),
+      Substitute("Failed to pushed items to agent, size $0", data.size()));
   TRACE(Substitute("Pushed items to agent, size $0", data.size()));
   return Status::OK();
 }
diff --git a/src/kudu/collector/metrics_collector-test.cc b/src/kudu/collector/metrics_collector-test.cc
index 19c00cf..865f821 100644
--- a/src/kudu/collector/metrics_collector-test.cc
+++ b/src/kudu/collector/metrics_collector-test.cc
@@ -17,9 +17,8 @@
 
 #include "kudu/collector/metrics_collector.h"
 
-#include <stdint.h>
+#include <cstdint>
 
-#include <map>
 #include <set>
 #include <string>
 #include <unordered_map>
@@ -27,14 +26,11 @@
 
 #include <gflags/gflags_declare.h>
 #include <gtest/gtest.h>
-#include <rapidjson/document.h>
 
 #include "kudu/collector/local_reporter.h"
 #include "kudu/collector/nodes_checker.h"
 #include "kudu/collector/reporter_base.h"
 #include "kudu/gutil/ref_counted.h"
-#include "kudu/util/jsonreader.h"
-#include "kudu/util/status.h"
 #include "kudu/util/test_macros.h"
 
 DECLARE_bool(collector_request_merged_metrics);
@@ -43,7 +39,6 @@ DECLARE_string(collector_cluster_level_metrics);
 DECLARE_string(collector_metrics);
 DECLARE_string(collector_metrics_types_for_test);
 
-using std::map;
 using std::set;
 using std::string;
 using std::unordered_map;
@@ -392,190 +387,158 @@ TEST(TestMetricsCollector, TestMergeToClusterLevelMetrics) {
 }
 
 TEST(TestMetricsCollector, TestParseMetrics) {
-  // Check ParseServerMetrics and ParseTabletMetrics.
-  {
-    string data;
-    JsonReader r(data);
-    const rapidjson::Value entity;
-    ASSERT_TRUE(MetricsCollector::ParseServerMetrics(r, &entity).IsNotSupported());
-    ASSERT_TRUE(MetricsCollector::ParseTabletMetrics(r, &entity).IsNotSupported());
-  }
-  // Check ParseTableMetrics.
-  {
-    auto collector = BuildCollector();
-    collector->metric_types_by_entity_type_["tablet"] = {
-        {"test_metric", "COUNTER"},
-        {"metric_counter1", "COUNTER"},
-        {"metric_counter2", "COUNTER"},
-        {"metric_histogram1", "HISTOGRAM"},
-        {"metric_histogram2", "HISTOGRAM"}
-    };
-    string data(
-        R"*([                                             )*"
-        R"*(  {                                           )*"
-        R"*(    "type": "server",                         )*"
-        R"*(    "id": "server1",                          )*"
-        R"*(    "attributes": {                           )*"
-        R"*(      "attrA": "val1",                        )*"
-        R"*(      "attrB": "val2"                         )*"
-        R"*(    },                                        )*"
-        R"*(    "metrics": [                              )*"
-        R"*(      {                                       )*"
-        R"*(        "name": "test_metric",                )*"
-        R"*(        "value": 123                          )*"
-        R"*(      }                                       )*"
-        R"*(    ]                                         )*"
-        R"*(  },                                          )*"
-        R"*(  {                                           )*"
-        R"*(    "type": "tablet",                         )*"
-        R"*(    "id": "tablet1",                          )*"
-        R"*(    "attributes": {                           )*"
-        R"*(      "attr1": "val1",                        )*"
-        R"*(      "attr2": "val2"                         )*"
-        R"*(    },                                        )*"
-        R"*(    "metrics": [                              )*"
-        R"*(      {                                       )*"
-        R"*(        "name": "test_metric",                )*"
-        R"*(        "value": 321                          )*"
-        R"*(      }                                       )*"
-        R"*(    ]                                         )*"
-        R"*(  },                                          )*"
-        R"*(  {                                           )*"
-        R"*(    "type": "table",                          )*"
-        R"*(    "id": "table1",                           )*"
-        R"*(    "attributes": {                           )*"
-        R"*(      "attr1": "val2",                        )*"
-        R"*(      "attr2": "val3"                         )*"
-        R"*(    },                                        )*"
-        R"*(    "metrics": [                              )*"
-        R"*(      {                                       )*"
-        R"*(        "name": "metric_counter1",            )*"
-        R"*(        "value": 10                           )*"
-        R"*(      },                                      )*"
-        R"*(      {                                       )*"
-        R"*(        "name": "metric_counter2",            )*"
-        R"*(        "value": 20                           )*"
-        R"*(      },                                      )*"
-        R"*(      {                                       )*"
-        R"*(        "name": "metric_histogram1",          )*"
-        R"*(        "total_count": 17,                    )*"
-        R"*(        "min": 6,                             )*"
-        R"*(        "mean": 47.8235,                      )*"
-        R"*(        "percentile_75": 62,                  )*"
-        R"*(        "percentile_95": 72,                  )*"
-        R"*(        "percentile_99": 73,                  )*"
-        R"*(        "percentile_99_9": 73,                )*"
-        R"*(        "percentile_99_99": 73,               )*"
-        R"*(        "max": 73,                            )*"
-        R"*(        "total_sum": 813                      )*"
-        R"*(      }                                       )*"
-        R"*(    ]                                         )*"
-        R"*(  },                                          )*"
-        R"*(  {                                           )*"
-        R"*(    "type": "table",                          )*"
-        R"*(    "id": "table2",                           )*"
-        R"*(    "attributes": {                           )*"
-        R"*(      "attr1": "val3",                        )*"
-        R"*(      "attr2": "val2"                         )*"
-        R"*(    },                                        )*"
-        R"*(    "metrics": [                              )*"
-        R"*(      {                                       )*"
-        R"*(        "name": "metric_counter1",            )*"
-        R"*(        "value": 100                          )*"
-        R"*(      },                                      )*"
-        R"*(      {                                       )*"
-        R"*(        "name": "metric_histogram1",          )*"
-        R"*(        "total_count": 170,                   )*"
-        R"*(        "min": 60,                            )*"
-        R"*(        "mean": 478.235,                      )*"
-        R"*(        "percentile_75": 620,                 )*"
-        R"*(        "percentile_95": 720,                 )*"
-        R"*(        "percentile_99": 730,                 )*"
-        R"*(        "percentile_99_9": 735,               )*"
-        R"*(        "percentile_99_99": 735,              )*"
-        R"*(        "max": 735,                           )*"
-        R"*(        "total_sum": 8130                     )*"
-        R"*(      },                                      )*"
-        R"*(      {                                       )*"
-        R"*(        "name": "metric_histogram2",          )*"
-        R"*(        "total_count": 34,                    )*"
-        R"*(        "min": 6,                             )*"
-        R"*(        "mean": 47.8235,                      )*"
-        R"*(        "percentile_75": 62,                  )*"
-        R"*(        "percentile_95": 72,                  )*"
-        R"*(        "percentile_99": 72,                  )*"
-        R"*(        "percentile_99_9": 73,                )*"
-        R"*(        "percentile_99_99": 73,               )*"
-        R"*(        "max": 73,                            )*"
-        R"*(        "total_sum": 813                      )*"
-        R"*(      }                                       )*"
-        R"*(    ]                                         )*"
-        R"*(  }                                           )*"
-        R"*(]                                             )*");
+  auto collector = BuildCollector();
+  collector->metric_types_ = {
+      {"server_metric", "COUNTER"},
+      {"metric_counter1", "COUNTER"},
+      {"metric_counter2", "COUNTER"},
+      {"server_metric_histogram", "HISTOGRAM"},
+      {"metric_histogram1", "HISTOGRAM"},
+      {"metric_histogram2", "HISTOGRAM"}
+  };
+  string data(
+      R"*([                                             )*"
+      R"*(  {                                           )*"
+      R"*(    "type": "server",                         )*"
+      R"*(    "id": "kudu.tabletserver",                )*"
+      R"*(    "attributes": {                           )*"
+      R"*(      "attrA": "val1",                        )*"
+      R"*(      "attrB": "val2"                         )*"
+      R"*(    },                                        )*"
+      R"*(    "metrics": [                              )*"
+      R"*(      {                                       )*"
+      R"*(        "name": "server_metric",              )*"
+      R"*(        "value": 123                          )*"
+      R"*(      },                                      )*"
+      R"*(      {                                       )*"
+      R"*(        "name": "server_metric_histogram",    )*"
+      R"*(        "total_count": 60,                    )*"
+      R"*(        "min": 4,                             )*"
+      R"*(        "mean": 76.16666666666667,            )*"
+      R"*(        "percentile_75": 25,                  )*"
+      R"*(        "percentile_95": 66,                  )*"
+      R"*(        "percentile_99": 79,                  )*"
+      R"*(        "percentile_99_9": 3486,              )*"
+      R"*(        "percentile_99_99": 3486,             )*"
+      R"*(        "max": 3486,                          )*"
+      R"*(        "total_sum": 4570                     )*"
+      R"*(      }                                       )*"
+      R"*(    ]                                         )*"
+      R"*(  },                                          )*"
+      R"*(  {                                           )*"
+      R"*(    "type": "table",                          )*"
+      R"*(    "id": "table1",                           )*"
+      R"*(    "attributes": {                           )*"
+      R"*(      "attr1": "val2",                        )*"
+      R"*(      "attr2": "val3"                         )*"
+      R"*(    },                                        )*"
+      R"*(    "metrics": [                              )*"
+      R"*(      {                                       )*"
+      R"*(        "name": "metric_counter1",            )*"
+      R"*(        "value": 10                           )*"
+      R"*(      },                                      )*"
+      R"*(      {                                       )*"
+      R"*(        "name": "metric_counter2",            )*"
+      R"*(        "value": 20                           )*"
+      R"*(      },                                      )*"
+      R"*(      {                                       )*"
+      R"*(        "name": "metric_histogram1",          )*"
+      R"*(        "total_count": 17,                    )*"
+      R"*(        "min": 6,                             )*"
+      R"*(        "mean": 47.8235,                      )*"
+      R"*(        "percentile_75": 62,                  )*"
+      R"*(        "percentile_95": 72,                  )*"
+      R"*(        "percentile_99": 73,                  )*"
+      R"*(        "percentile_99_9": 73,                )*"
+      R"*(        "percentile_99_99": 73,               )*"
+      R"*(        "max": 73,                            )*"
+      R"*(        "total_sum": 813                      )*"
+      R"*(      }                                       )*"
+      R"*(    ]                                         )*"
+      R"*(  },                                          )*"
+      R"*(  {                                           )*"
+      R"*(    "type": "table",                          )*"
+      R"*(    "id": "table2",                           )*"
+      R"*(    "attributes": {                           )*"
+      R"*(      "attr1": "val3",                        )*"
+      R"*(      "attr2": "val2"                         )*"
+      R"*(    },                                        )*"
+      R"*(    "metrics": [                              )*"
+      R"*(      {                                       )*"
+      R"*(        "name": "metric_counter1",            )*"
+      R"*(        "value": 100                          )*"
+      R"*(      },                                      )*"
+      R"*(      {                                       )*"
+      R"*(        "name": "metric_histogram1",          )*"
+      R"*(        "total_count": 170,                   )*"
+      R"*(        "min": 60,                            )*"
+      R"*(        "mean": 478.235,                      )*"
+      R"*(        "percentile_75": 620,                 )*"
+      R"*(        "percentile_95": 720,                 )*"
+      R"*(        "percentile_99": 730,                 )*"
+      R"*(        "percentile_99_9": 735,               )*"
+      R"*(        "percentile_99_99": 735,              )*"
+      R"*(        "max": 735,                           )*"
+      R"*(        "total_sum": 8130                     )*"
+      R"*(      },                                      )*"
+      R"*(      {                                       )*"
+      R"*(        "name": "metric_histogram2",          )*"
+      R"*(        "total_count": 34,                    )*"
+      R"*(        "min": 6,                             )*"
+      R"*(        "mean": 47.8235,                      )*"
+      R"*(        "percentile_75": 62,                  )*"
+      R"*(        "percentile_95": 72,                  )*"
+      R"*(        "percentile_99": 72,                  )*"
+      R"*(        "percentile_99_9": 73,                )*"
+      R"*(        "percentile_99_99": 73,               )*"
+      R"*(        "max": 73,                            )*"
+      R"*(        "total_sum": 813                      )*"
+      R"*(      }                                       )*"
+      R"*(    ]                                         )*"
+      R"*(  }                                           )*"
+      R"*(]                                             )*");
 
-    // Attribute filter is empty.
-    {
-      MetricsCollector::TablesMetrics tables_metrics;
-      MetricsCollector::TablesHistMetrics tables_hist_metrics;
-      MetricsCollector::Metrics host_metrics;
-      MetricsCollector::HistMetrics host_hist_metrics;
-      ASSERT_OK(collector->ParseMetrics(data,
-                                       &tables_metrics, &host_metrics,
-                                       &tables_hist_metrics, &host_hist_metrics));
-      ASSERT_EQ(tables_metrics, MetricsCollector::TablesMetrics({
-          {
-            "table1",
-            {
-              {"metric_counter1", 10},
-              {"metric_counter2", 20},
-            }
-          },
-          {
-            "table2",
-            {
-              {"metric_counter1", 100}
-            }
-          }
-      }));
-      ASSERT_EQ(tables_hist_metrics, MetricsCollector::TablesHistMetrics({
-          {
-            "table1",
-            {
-              {
-                "metric_histogram1_percentile_99",
-                {
-                  {17, 73}
-                }
-              }
-            }
-          },
+  MetricsCollector::TablesMetrics tables_metrics;
+  MetricsCollector::TablesHistMetrics tables_hist_metrics;
+  MetricsCollector::Metrics host_metrics;
+  MetricsCollector::HistMetrics host_hist_metrics;
+  ASSERT_OK(collector->ParseMetrics(MetricsCollector::NodeType::kTServer,
+                                    data,
+                                    &tables_metrics, &host_metrics,
+                                    &tables_hist_metrics, &host_hist_metrics));
+  ASSERT_EQ(tables_metrics, MetricsCollector::TablesMetrics({
+      {
+        "table1",
+        {
+          {"metric_counter1", 10},
+          {"metric_counter2", 20},
+        }
+      },
+      {
+        "table2",
+        {
+          {"metric_counter1", 100}
+        }
+      }
+  }));
+  ASSERT_EQ(tables_hist_metrics, MetricsCollector::TablesHistMetrics({
+      {
+        "table1",
+        {
           {
-            "table2",
+            "metric_histogram1_percentile_99",
             {
-              {
-                "metric_histogram1_percentile_99",
-                {
-                  {170, 730}
-                }
-              },
-              {
-                "metric_histogram2_percentile_99",
-                {
-                  {34, 72}
-                }
-              }
+              {17, 73}
             }
           }
-      }));
-      ASSERT_EQ(host_metrics, MetricsCollector::Metrics({
-          {"metric_counter1", 110},
-          {"metric_counter2", 20}
-      }));
-      ASSERT_EQ(host_hist_metrics, MetricsCollector::HistMetrics({
+        }
+      },
+      {
+        "table2",
+        {
           {
             "metric_histogram1_percentile_99",
             {
-              {17, 73},
               {170, 730}
             }
           },
@@ -585,63 +548,42 @@ TEST(TestMetricsCollector, TestParseMetrics) {
               {34, 72}
             }
           }
-      }));
-    }
-
-    // Attribute filter is not empty.
-    {
-      collector->attributes_filter_ = {{"attr1", {"val1", "val2"}}};
-
-      MetricsCollector::TablesMetrics tables_metrics;
-      MetricsCollector::TablesHistMetrics tables_hist_metrics;
-      MetricsCollector::Metrics host_metrics;
-      MetricsCollector::HistMetrics host_hist_metrics;
-      ASSERT_OK(collector->ParseMetrics(data,
-                                       &tables_metrics, &host_metrics,
-                                       &tables_hist_metrics, &host_hist_metrics));
-      ASSERT_EQ(tables_metrics, MetricsCollector::TablesMetrics({
-          {
-            "table1",
-            {
-              {"metric_counter1", 10},
-              {"metric_counter2", 20},
-            }
-          }
-      }));
-      ASSERT_EQ(tables_hist_metrics, MetricsCollector::TablesHistMetrics({
-          {
-            "table1",
-            {
-              {
-                "metric_histogram1_percentile_99",
-                {
-                  {17, 73}
-                }
-              }
-            }
-          }
-      }));
-      ASSERT_EQ(host_metrics, MetricsCollector::Metrics({
-          {"metric_counter1", 10},
-          {"metric_counter2", 20}
-      }));
-      ASSERT_EQ(host_hist_metrics, MetricsCollector::HistMetrics({
-          {
-            "metric_histogram1_percentile_99",
-            {
-              {17, 73},
-            }
-          }
-      }));
-    }
-  }
+        }
+      }
+  }));
+  ASSERT_EQ(host_metrics, MetricsCollector::Metrics({
+      {"metric_counter1", 110},
+      {"metric_counter2", 20},
+      {"server_metric", 123}
+  }));
+  ASSERT_EQ(host_hist_metrics, MetricsCollector::HistMetrics({
+      {
+        "metric_histogram1_percentile_99",
+        {
+          {17, 73},
+          {170, 730}
+        }
+      },
+      {
+        "metric_histogram2_percentile_99",
+        {
+          {34, 72}
+        }
+      },
+      {
+        "server_metric_histogram_percentile_99",
+        {
+          {60, 79}
+        }
+      }
+  }));
 }
 
 TEST(TestMetricsCollector, TestInitMetrics) {
   FLAGS_collector_metrics_types_for_test =
       R"*([                                                       )*"
       R"*(  {                                                     )*"
-      R"*(    "type": "tablet",                                   )*"
+      R"*(    "type": "table",                                   )*"
       R"*(    "id": "table1",                                     )*"
       R"*(    "metrics": [                                        )*"
       R"*(      {                                                 )*"
@@ -659,24 +601,6 @@ TEST(TestMetricsCollector, TestInitMetrics) {
       R"*(    ]                                                   )*"
       R"*(  },                                                    )*"
       R"*(  {                                                     )*"
-      R"*(    "type": "tablet",                                   )*"
-      R"*(    "id": "table2",                                     )*"
-      R"*(    "metrics": [                                        )*"
-      R"*(      {                                                 )*"
-      R"*(        "name": "counter_metric1",                      )*"
-      R"*(        "type": "counter"                               )*"
-      R"*(      },                                                )*"
-      R"*(      {                                                 )*"
-      R"*(        "name": "histogram_metric1",                    )*"
-      R"*(        "type": "histogram"                             )*"
-      R"*(      },                                                )*"
-      R"*(      {                                                 )*"
-      R"*(        "name": "gauge_metric1",                        )*"
-      R"*(        "type": "gauge"                                 )*"
-      R"*(      }                                                 )*"
-      R"*(    ]                                                   )*"
-      R"*(  },                                                    )*"
-      R"*(  {                                                     )*"
       R"*(    "type": "server",                                   )*"
       R"*(    "metrics": [                                        )*"
       R"*(      {                                                 )*"
@@ -696,25 +620,15 @@ TEST(TestMetricsCollector, TestInitMetrics) {
       R"*(]                                                       )*";
   auto collector = BuildCollector();
   ASSERT_OK(collector->InitMetrics());
-  map<string, MetricsCollector::MetricTypes> expect_metric_types({
-      {
-        "tablet",
-        {
-          {"counter_metric1", "COUNTER"},
-          {"histogram_metric1", "HISTOGRAM"},
-          {"gauge_metric1", "GAUGE"},
-        }
-      },
-      {
-        "server",
-        {
-          {"counter_metric2", "COUNTER"},
-          {"histogram_metric2", "HISTOGRAM"},
-          {"gauge_metric2", "GAUGE"},
-        }
-      }
+  MetricsCollector::MetricTypes expect_metric_types({
+      {"counter_metric1", "COUNTER"},
+      {"histogram_metric1", "HISTOGRAM"},
+      {"gauge_metric1", "GAUGE"},
+      {"counter_metric2", "COUNTER"},
+      {"histogram_metric2", "HISTOGRAM"},
+      {"gauge_metric2", "GAUGE"}
   });
-  ASSERT_EQ(collector->metric_types_by_entity_type_, expect_metric_types);
+  ASSERT_EQ(collector->metric_types_, expect_metric_types);
 }
 
 TEST(TestMetricsCollector, TestInitFilters) {
diff --git a/src/kudu/collector/metrics_collector.cc b/src/kudu/collector/metrics_collector.cc
index 688ea70..fc9f20a 100644
--- a/src/kudu/collector/metrics_collector.cc
+++ b/src/kudu/collector/metrics_collector.cc
@@ -17,9 +17,9 @@
 
 #include "kudu/collector/metrics_collector.h"
 
-#include <string.h>
-
+#include <algorithm>
 #include <cmath>
+#include <cstring>
 #include <functional>
 #include <list>
 #include <ostream>
@@ -59,27 +59,27 @@ DEFINE_string(collector_attributes, "",
 DEFINE_string(collector_cluster_level_metrics, "on_disk_size,on_disk_data_size",
               "Metric names which should be merged and pushed to cluster level view "
               "(comma-separated list of metric names)");
-DEFINE_bool(collector_ignore_hosttable_level_metrics, false,
-            "Whether to ignore to report host-table level metrics.");
+DEFINE_string(collector_hosttable_level_metrics, "merged_entities_count_of_tablet",
+              "Host-table level metrics need to report (comma-separated list of metric names).");
 DEFINE_string(collector_metrics, "",
               "Metrics to collect (comma-separated list of metric names)");
 DEFINE_string(collector_metrics_types_for_test, "",
-              "Only for test, used to initialize metric_types_by_entity_type_");
+              "Only for test, used to initialize metric_types_");
 DEFINE_bool(collector_request_merged_metrics, true,
             "Whether to request merged metrics and exclude unmerged metrics from server");
 
 DECLARE_string(collector_cluster_name);
-DECLARE_int32(collector_interval_sec);
-DECLARE_int32(collector_timeout_sec);
-DECLARE_int32(collector_warn_threshold_ms);
+DECLARE_uint32(collector_interval_sec);
+DECLARE_uint32(collector_timeout_sec);
+DECLARE_uint32(collector_warn_threshold_ms);
 
 using rapidjson::Value;
 using std::list;
-using std::map;
 using std::set;
 using std::string;
 using std::vector;
 using std::unordered_map;
+using std::unordered_set;
 using strings::Substitute;
 
 namespace kudu {
@@ -105,6 +105,7 @@ Status MetricsCollector::Init() {
   RETURN_NOT_OK(InitMetrics());
   RETURN_NOT_OK(InitFilters());
   RETURN_NOT_OK(InitMetricsUrlParameters());
+  RETURN_NOT_OK(InitHostTableLevelMetrics());
   RETURN_NOT_OK(InitClusterLevelMetrics());
 
   initialized_ = true;
@@ -147,7 +148,8 @@ void MetricsCollector::MetricCollectorThread() {
   MonoTime collect_time;
   do {
     collect_time = MonoTime::Now();
-    WARN_NOT_OK(CollectAndReportMetrics(), "Unable to collect metrics");
+    WARN_NOT_OK(CollectAndReportTServerMetrics(), "Unable to collect tserver metrics");
+    WARN_NOT_OK(CollectAndReportMasterMetrics(), "Unable to collect master metrics");
     collect_time += MonoDelta::FromSeconds(FLAGS_collector_interval_sec);
   } while (!RunOnceMode() && !stop_background_threads_latch_.WaitUntil(collect_time));
   LOG(INFO) << "MetricCollectorThread exit";
@@ -175,41 +177,61 @@ Status MetricsCollector::UpdateThreadPool(int32_t thread_count) {
 }
 
 Status MetricsCollector::InitMetrics() {
+  MetricTypes metric_types;
+  InitMetricsFromNode(NodeType::kMaster, &metric_types);
+
+  MetricTypes tserver_metric_types;
+  InitMetricsFromNode(NodeType::kTServer, &tserver_metric_types);
+
+  // TODO(yingchun): check values in debug mode.
+  for (const auto& metric_type : tserver_metric_types) {
+    const auto* type = FindOrNull(metric_types, metric_type.first);
+    if (type) {
+      CHECK_EQ(*type, metric_type.second);
+    } else {
+      EmplaceOrDie(&metric_types, std::make_pair(metric_type.first, metric_type.second));
+    }
+  }
+
+  metric_types_.swap(metric_types);
+  return Status::OK();
+}
+
+Status MetricsCollector::InitMetricsFromNode(NodeType node_type, MetricTypes* metric_types) const {
+  DCHECK(metric_types);
+
   string resp;
   if (PREDICT_TRUE(FLAGS_collector_metrics_types_for_test.empty())) {
+    auto node_addr = node_type == NodeType::kMaster ?
+        nodes_checker_->GetFirstMaster() : nodes_checker_->GetFirstTServer();
     RETURN_NOT_OK(GetMetrics(
-        nodes_checker_->GetFirstMaster() + "/metrics?include_schema=1", &resp));
+        node_addr + "/metrics?include_schema=1&merge_rules=tablet|table|table_name", &resp));
   } else {
     resp = FLAGS_collector_metrics_types_for_test;
   }
+
   JsonReader r(resp);
   RETURN_NOT_OK(r.Init());
   vector<const Value*> entities;
   RETURN_NOT_OK(r.ExtractObjectArray(r.root(), nullptr, &entities));
 
-  map<string, MetricTypes> metric_types_by_entity_type;
-  bool tablet_entity_inited = false;
+  bool table_entity_inited = false;
   bool server_entity_inited = false;
   for (const Value* entity : entities) {
     string entity_type;
     CHECK_OK(r.ExtractString(entity, "type", &entity_type));
-    if (entity_type == "tablet") {
-      if (tablet_entity_inited) continue;
-      EmplaceOrDie(&metric_types_by_entity_type, std::make_pair("tablet", MetricTypes()));
-      auto& tablet_metric_types = FindOrDie(metric_types_by_entity_type, "tablet");
-      ExtractMetricTypes(r, entity, &tablet_metric_types);
-      tablet_entity_inited = true;
+    if (entity_type == "table") {
+      if (table_entity_inited) continue;
+      ExtractMetricTypes(r, entity, metric_types);
+      table_entity_inited = true;
     } else if (entity_type == "server") {
       if (server_entity_inited) continue;
-      EmplaceOrDie(&metric_types_by_entity_type, std::make_pair("server", MetricTypes()));
-      auto& server_metric_types = FindOrDie(metric_types_by_entity_type, "server");
-      ExtractMetricTypes(r, entity, &server_metric_types);
+      ExtractMetricTypes(r, entity, metric_types);
       server_entity_inited = true;
     } else {
       LOG(WARNING) << "unhandled entity type " << entity_type;
     }
   }
-  metric_types_by_entity_type_.swap(metric_types_by_entity_type);
   return Status::OK();
 }
 
@@ -260,7 +282,6 @@ Status MetricsCollector::InitMetricsUrlParameters() {
                   "want collector work well";
   }
 
-  // TODO(yingchun) This is supported since version 1.10
   if (!attributes_filter_.empty()) {
     metric_url_parameters_ += "&attributes=";
   }
@@ -272,6 +293,13 @@ Status MetricsCollector::InitMetricsUrlParameters() {
   return Status::OK();
 }
 
+Status MetricsCollector::InitHostTableLevelMetrics() {
+  unordered_set<string> hosttable_metrics(
+      Split(FLAGS_collector_hosttable_level_metrics, ",", strings::SkipEmpty()));
+  hosttable_metrics_.swap(hosttable_metrics);
+  return Status::OK();
+}
+
 Status MetricsCollector::InitClusterLevelMetrics() {
   Metrics cluster_metrics;
   vector<string> metric_names =
@@ -283,14 +311,52 @@ Status MetricsCollector::InitClusterLevelMetrics() {
   return Status::OK();
 }
 
-Status MetricsCollector::CollectAndReportMetrics() {
-  LOG(INFO) << "Start to CollectAndReportMetrics";
+Status MetricsCollector::CollectAndReportMasterMetrics() {
+  LOG(INFO) << "Start to CollectAndReportMasterMetrics";
   MonoTime start(MonoTime::Now());
   scoped_refptr<Trace> trace(new Trace);
   ADOPT_TRACE(trace.get());
-  TRACE_EVENT0("collector", "MetricsCollector::CollectAndReportMetrics");
+  TRACE_EVENT0("collector", "MetricsCollector::CollectAndReportMasterMetrics");
   TRACE("init");
-  vector<string> tserver_http_addrs = nodes_checker_->GetNodes();
+  vector<string> master_http_addrs = nodes_checker_->GetMasters();
+  TRACE("Nodes got");
+  if (master_http_addrs.empty()) {
+    return Status::OK();
+  }
+  RETURN_NOT_OK(UpdateThreadPool(std::max(host_metric_collector_thread_pool_->num_threads(),
+                                          static_cast<int32_t>(master_http_addrs.size()))));
+  for (int i = 0; i < master_http_addrs.size(); ++i) {
+    RETURN_NOT_OK(host_metric_collector_thread_pool_->SubmitFunc(
+      std::bind(&MetricsCollector::CollectAndReportHostLevelMetrics,
+                this,
+                NodeType::kMaster,
+                master_http_addrs[i] + metric_url_parameters_,
+                nullptr,
+                nullptr)));
+  }
+  TRACE("Thead pool jobs submitted");
+  host_metric_collector_thread_pool_->Wait();
+  TRACE("Thead pool jobs done");
+
+  int64_t elapsed_ms = (MonoTime::Now() - start).ToMilliseconds();
+  if (elapsed_ms > FLAGS_collector_warn_threshold_ms) {
+    if (Trace::CurrentTrace()) {
+      LOG(WARNING) << "Trace:" << std::endl
+                   << Trace::CurrentTrace()->DumpToString();
+    }
+  }
+
+  return Status::OK();
+}
+
+Status MetricsCollector::CollectAndReportTServerMetrics() {
+  LOG(INFO) << "Start to CollectAndReportTServerMetrics";
+  MonoTime start(MonoTime::Now());
+  scoped_refptr<Trace> trace(new Trace);
+  ADOPT_TRACE(trace.get());
+  TRACE_EVENT0("collector", "MetricsCollector::CollectAndReportTServerMetrics");
+  TRACE("init");
+  vector<string> tserver_http_addrs = nodes_checker_->GetTServers();
   TRACE("Nodes got");
   if (tserver_http_addrs.empty()) {
     return Status::OK();
@@ -302,6 +368,7 @@ Status MetricsCollector::CollectAndReportMetrics() {
     RETURN_NOT_OK(host_metric_collector_thread_pool_->SubmitFunc(
       std::bind(&MetricsCollector::CollectAndReportHostLevelMetrics,
                 this,
+                NodeType::kTServer,
                 tserver_http_addrs[i] + metric_url_parameters_,
                 &hosts_metrics_by_table_name[i],
                 &hosts_hist_metrics_by_table_name[i])));
@@ -481,26 +548,21 @@ Status MetricsCollector::ConvertStateToInt(const string& value, int64_t* result)
   return Status::OK();
 }
 
-bool MetricsCollector::FilterByAttribute(const JsonReader& r,
-                                         const rapidjson::Value* entity) const {
-  if (attributes_filter_.empty()) {
-    return false;
-  }
-  const Value* attributes;
-  CHECK_OK(r.ExtractObject(entity, "attributes", &attributes));
-  for (const auto& name_values : attributes_filter_) {
-    string value;
-    Status s = r.ExtractString(attributes, name_values.first.c_str(), &value);
-    if (s.ok() && ContainsKey(name_values.second, value)) {
-      return false;
-    }
-  }
-  return true;
-}
+Status MetricsCollector::ParseServerMetrics(const JsonReader& r,
+                                            const rapidjson::Value* entity,
+                                            Metrics* host_metrics,
+                                            HistMetrics* host_hist_metrics) const {
+  CHECK(entity);
+  CHECK(host_metrics);
+  CHECK(host_hist_metrics);
+
+  string server_type;
+  CHECK_OK(r.ExtractString(entity, "id", &server_type));
+  CHECK(server_type == "kudu.tabletserver" || server_type == "kudu.master");
+
+  CHECK_OK(ParseEntityMetrics(r, entity, host_metrics, nullptr, host_hist_metrics, nullptr));
 
-Status MetricsCollector::ParseServerMetrics(const JsonReader& /*r*/,
-                                            const rapidjson::Value* /*entity*/) {
-  return Status::NotSupported("server entity is not supported");
+  return Status::OK();
 }
 
 Status MetricsCollector::ParseTableMetrics(const JsonReader& r,
@@ -509,6 +571,7 @@ Status MetricsCollector::ParseTableMetrics(const JsonReader& r,
                                            Metrics* host_metrics,
                                            TablesHistMetrics* hist_metrics_by_table_name,
                                            HistMetrics* host_hist_metrics) const {
+  CHECK(entity);
   CHECK(metrics_by_table_name);
   CHECK(host_metrics);
   CHECK(hist_metrics_by_table_name);
@@ -525,14 +588,47 @@ Status MetricsCollector::ParseTableMetrics(const JsonReader& r,
   EmplaceOrDie(hist_metrics_by_table_name, std::make_pair(table_name, HistMetrics()));
   auto& table_hist_metrics = FindOrDie(*hist_metrics_by_table_name, table_name);
 
+  CHECK_OK(ParseEntityMetrics(r, entity,
+      &table_metrics, host_metrics, &table_hist_metrics, host_hist_metrics));
+
+  return Status::OK();
+}
+
+Status MetricsCollector::ParseCatalogMetrics(const JsonReader& r,
+                                             const rapidjson::Value* entity,
+                                             Metrics* tablet_metrics,
+                                             HistMetrics* tablet_hist_metrics) const {
+  CHECK(entity);
+  CHECK(tablet_metrics);
+  CHECK(tablet_hist_metrics);
+
+  string tablet_id;
+  CHECK_OK(r.ExtractString(entity, "id", &tablet_id));
+  if (tablet_id != "sys.catalog") {  // Only used to parse 'sys.catalog'.
+    return Status::OK();
+  }
+
+  CHECK_OK(ParseEntityMetrics(r, entity, tablet_metrics, nullptr, tablet_hist_metrics, nullptr));
+
+  return Status::OK();
+}
+
+Status MetricsCollector::ParseEntityMetrics(const JsonReader& r,
+                                            const rapidjson::Value* entity,
+                                            Metrics* kv_metrics,
+                                            Metrics* merged_kv_metrics,
+                                            HistMetrics* hist_metrics,
+                                            HistMetrics* merged_hist_metrics) const {
+  CHECK(entity);
+  CHECK(kv_metrics);
+  CHECK(hist_metrics);
+
   vector<const Value*> metrics;
   CHECK_OK(r.ExtractObjectArray(entity, "metrics", &metrics));
   for (const Value* metric : metrics) {
     string name;
     CHECK_OK(r.ExtractString(metric, "name", &name));
-    const auto* tablet_metric_types = FindOrNull(metric_types_by_entity_type_, "tablet");
-    CHECK(tablet_metric_types);
-    const auto* known_type = FindOrNull(*tablet_metric_types, name);
+    const auto* known_type = FindOrNull(metric_types_, name);
     if (!known_type) {
       LOG(ERROR) << Substitute("metric $0 has unknown type, ignore it", name);
       continue;
@@ -554,10 +650,11 @@ Status MetricsCollector::ParseTableMetrics(const JsonReader& r,
           LOG(FATAL) << "Unknown type, metrics name: " << name;
       }
 
-      EmplaceOrDie(&table_metrics, std::make_pair(name, value));
-      if (!EmplaceIfNotPresent(host_metrics, std::make_pair(name, value))) {
-        auto& host_metric = FindOrDie(*host_metrics, name);
-        host_metric += value;
+      EmplaceOrDie(kv_metrics, std::make_pair(name, value));
+      if (merged_kv_metrics &&
+          !EmplaceIfNotPresent(merged_kv_metrics, std::make_pair(name, value))) {
+        auto& found_metric = FindOrDie(*merged_kv_metrics, name);
+        found_metric += value;
       }
     } else if (*known_type == "HISTOGRAM") {
       for (const auto& percentile : kRegisterPercentiles) {
@@ -568,10 +665,11 @@ Status MetricsCollector::ParseTableMetrics(const JsonReader& r,
         int64_t percentile_value;
         CHECK_OK(r.ExtractInt64(metric, percentile.c_str(), &percentile_value));
         vector<SimpleHistogram> tmp({{total_count, percentile_value}});
-        EmplaceOrDie(&table_hist_metrics, std::make_pair(hist_metric_name, tmp));
-        if (!EmplaceIfNotPresent(host_hist_metrics, std::make_pair(hist_metric_name, tmp))) {
-          auto& host_hist_metric = FindOrDie(*host_hist_metrics, hist_metric_name);
-          host_hist_metric.emplace_back(tmp[0]);
+        EmplaceOrDie(hist_metrics, std::make_pair(hist_metric_name, tmp));
+        if (merged_hist_metrics &&
+            !EmplaceIfNotPresent(merged_hist_metrics, std::make_pair(hist_metric_name, tmp))) {
+          auto& found_hist_metric = FindOrDie(*merged_hist_metrics, hist_metric_name);
+          found_hist_metric.emplace_back(tmp[0]);
         }
       }
     } else {
@@ -582,12 +680,8 @@ Status MetricsCollector::ParseTableMetrics(const JsonReader& r,
   return Status::OK();
 }
 
-Status MetricsCollector::ParseTabletMetrics(const JsonReader& /*r*/,
-                                            const rapidjson::Value* /*entity*/) {
-  return Status::NotSupported("tablet entity is not supported");
-}
-
 Status MetricsCollector::CollectAndReportHostLevelMetrics(
+    NodeType node_type,
     const string& url,
     TablesMetrics* metrics_by_table_name,
     TablesHistMetrics* hist_metrics_by_table_name) {
@@ -597,8 +691,6 @@ Status MetricsCollector::CollectAndReportHostLevelMetrics(
   TRACE_EVENT1("collector", "MetricsCollector::CollectAndReportHostLevelMetrics",
                "url", url);
   TRACE("init");
-  CHECK(metrics_by_table_name);
-  CHECK(hist_metrics_by_table_name);
 
   // Get metrics from server.
   string resp;
@@ -607,14 +699,14 @@ Status MetricsCollector::CollectAndReportHostLevelMetrics(
   // Merge metrics by table and metric type.
   Metrics host_metrics;
   HistMetrics host_hist_metrics;
-  RETURN_NOT_OK(ParseMetrics(resp, metrics_by_table_name, &host_metrics,
+  RETURN_NOT_OK(ParseMetrics(node_type, resp, metrics_by_table_name, &host_metrics,
                              hist_metrics_by_table_name, &host_hist_metrics));
 
   string host_name = ExtractHostName(url);
   auto timestamp = static_cast<uint64_t>(WallTime_Now());
 
   // Host table level.
-  if (!FLAGS_collector_ignore_hosttable_level_metrics) {
+  if (metrics_by_table_name && hist_metrics_by_table_name) {
     RETURN_NOT_OK(ReportHostTableLevelMetrics(host_name, timestamp,
                                               *metrics_by_table_name,
                                               *hist_metrics_by_table_name));
@@ -635,7 +727,8 @@ Status MetricsCollector::CollectAndReportHostLevelMetrics(
   return Status::OK();
 }
 
-Status MetricsCollector::ParseMetrics(const string& data,
+Status MetricsCollector::ParseMetrics(NodeType node_type,
+                                      const string& data,
                                       TablesMetrics* metrics_by_table_name,
                                       Metrics* host_metrics,
                                       TablesHistMetrics* hist_metrics_by_table_name,
@@ -646,19 +739,19 @@ Status MetricsCollector::ParseMetrics(const string& data,
   RETURN_NOT_OK(r.ExtractObjectArray(r.root(), nullptr, &entities));
 
   for (const Value* entity : entities) {
-    if (FilterByAttribute(r, entity)) {
-      continue;
-    }
     string entity_type;
     CHECK_OK(r.ExtractString(entity, "type", &entity_type));
     if (entity_type == "server") {
-      CHECK(ParseServerMetrics(r, entity).IsNotSupported());
+      CHECK_OK(ParseServerMetrics(r, entity, host_metrics, host_hist_metrics));
     } else if (entity_type == "table") {
-      CHECK_OK(ParseTableMetrics(r, entity,
-                                 metrics_by_table_name, host_metrics,
-                                 hist_metrics_by_table_name, host_hist_metrics));
-    } else if (entity_type == "tablet") {
-      CHECK(ParseTabletMetrics(r, entity).IsNotSupported());
+      if (NodeType::kMaster == node_type) {
+        CHECK_OK(ParseCatalogMetrics(r, entity, host_metrics, host_hist_metrics));
+      } else {
+        CHECK(NodeType::kTServer == node_type);
+        CHECK_OK(ParseTableMetrics(r, entity,
+                                   metrics_by_table_name, host_metrics,
+                                   hist_metrics_by_table_name, host_hist_metrics));
+      }
     } else {
       LOG(FATAL) << "Unknown entity_type: " << entity_type;
     }
@@ -681,17 +774,17 @@ void MetricsCollector::CollectMetrics(const string& endpoint,
                                level,
                                timestamp,
                                metric.second,
-                               FindOrDie(metric_types_by_entity_type_["tablet"], metric.first),
+                               metric_types_[metric.first],
                                extra_tags));
   }
 }
 
 void MetricsCollector::CollectMetrics(const string& endpoint,
-                      const HistMetrics& metrics,
-                      const string& level,
-                      uint64_t timestamp,
-                      const string& extra_tags,
-                      list<scoped_refptr<ItemBase>>* items) {
+                                      const HistMetrics& metrics,
+                                      const string& level,
+                                      uint64_t timestamp,
+                                      const string& extra_tags,
+                                      list<scoped_refptr<ItemBase>>* items) {
   for (const auto& metric : metrics) {
     items->emplace_back(
       reporter_->ConstructItem(endpoint,
@@ -714,8 +807,14 @@ Status MetricsCollector::ReportHostTableLevelMetrics(
   int metrics_count = 0;
   for (const auto& table_metrics : metrics_by_table_name) {
     const auto extra_tag = Substitute("table=$0", table_metrics.first);
-    metrics_count += table_metrics.second.size();
-    CollectMetrics(host_name, table_metrics.second, "host_table", timestamp, extra_tag, &items);
+    Metrics filtered_metrics;
+    for (const auto& metric : table_metrics.second) {
+      if (ContainsKey(hosttable_metrics_, metric.first)) {
+        filtered_metrics.insert(metric);
+      }
+    }
+    metrics_count += filtered_metrics.size();
+    CollectMetrics(host_name, filtered_metrics, "host_table", timestamp, extra_tag, &items);
   }
   TRACE(Substitute("Host-table GAUGE/COUNTER type metrics collected, count $0", metrics_count));
 
@@ -723,10 +822,14 @@ Status MetricsCollector::ReportHostTableLevelMetrics(
   int hist_metrics_count = 0;
   for (const auto& table_hist_metrics : hist_metrics_by_table_name) {
     const auto extra_tag = Substitute("table=$0", table_hist_metrics.first);
+    HistMetrics filtered_metrics;
+    for (const auto& metric : table_hist_metrics.second) {
+      if (ContainsKey(hosttable_metrics_, metric.first)) {
+        filtered_metrics.insert(metric);
+      }
+    }
     hist_metrics_count += table_hist_metrics.second.size();
-    CollectMetrics(host_name, table_hist_metrics.second,
-                   "host_table", timestamp, extra_tag,
-                   &items);
+    CollectMetrics(host_name, filtered_metrics, "host_table", timestamp, extra_tag, &items);
   }
   TRACE(Substitute("Host-table HISTOGRAM type metrics collected, count $0", hist_metrics_count));
 
diff --git a/src/kudu/collector/metrics_collector.h b/src/kudu/collector/metrics_collector.h
index 05673ef..2401030 100644
--- a/src/kudu/collector/metrics_collector.h
+++ b/src/kudu/collector/metrics_collector.h
@@ -18,11 +18,11 @@
 
 #include <cstdint>
 #include <list>
-#include <map>
 #include <memory>
 #include <set>
 #include <string>
 #include <unordered_map>
+#include <unordered_set>
 #include <vector>
 
 #include <gtest/gtest_prod.h>
@@ -70,15 +70,23 @@ class MetricsCollector : public RefCounted<MetricsCollector> {
   FRIEND_TEST(TestMetricsCollector, TestMergeToTableLevelMetrics);
   FRIEND_TEST(TestMetricsCollector, TestMergeToClusterLevelMetrics);
   FRIEND_TEST(TestMetricsCollector, TestParseMetrics);
+  FRIEND_TEST(TestMetricsCollector, TestParseTypesOfMetrics);
   FRIEND_TEST(TestMetricsCollector, TestInitMetrics);
   FRIEND_TEST(TestMetricsCollector, TestInitFilters);
   FRIEND_TEST(TestMetricsCollector, TestInitMetricsUrlParameters);
   FRIEND_TEST(TestMetricsCollector, TestInitClusterLevelMetrics);
 
+  // Metric name --> value, metric is in type of GAUGE or COUNTER.
   typedef std::unordered_map<std::string, int64_t> Metrics;
+  // Table name --> metric name-value pairs.
   typedef std::unordered_map<std::string, Metrics> TablesMetrics;
+
+  // Simple struct to collect histogram metrics.
   struct SimpleHistogram {
+    // 'total_count' value in histogram metric.
     int64_t count;
+    // 'percentile_xxx" value in histogram metric, percentile_xxx is specified
+    // by kRegisterPercentiles.
     int64_t value;
     SimpleHistogram(int64_t c, int64_t v) : count(c), value(v) {
     }
@@ -86,27 +94,37 @@ class MetricsCollector : public RefCounted<MetricsCollector> {
       return count == rhs.count && value == rhs.value;
     }
   };
-
+  // Metric name --> SimpleHistogram, metric is in type of HISTOGRAM.
   typedef std::unordered_map<std::string, std::vector<SimpleHistogram>> HistMetrics;
+  // Table name --> metric name-struct pairs.
   typedef std::unordered_map<std::string, HistMetrics> TablesHistMetrics;
 
+  // Metric name --> type, where type is in 'COUNTER', 'GAUGE' and 'HISTOGRAM'.
   typedef std::unordered_map<std::string, std::string> MetricTypes;
 
   Status InitMetrics();
+  enum class NodeType {
+    kMaster = 0,
+    kTServer = 1,
+  };
+  Status InitMetricsFromNode(NodeType node_type, MetricTypes* metric_types) const;
   static Status ExtractMetricTypes(const JsonReader& r,
                                    const rapidjson::Value* entity,
                                    MetricTypes* metric_types);
   Status InitFilters();
   Status InitMetricsUrlParameters();
+  Status InitHostTableLevelMetrics();
   Status InitClusterLevelMetrics();
 
   Status StartMetricCollectorThread();
   void MetricCollectorThread();
-  Status CollectAndReportMetrics();
+  Status CollectAndReportMasterMetrics();
+  Status CollectAndReportTServerMetrics();
 
   Status UpdateThreadPool(int32_t thread_count);
 
-  Status CollectAndReportHostLevelMetrics(const std::string& url,
+  Status CollectAndReportHostLevelMetrics(NodeType node_type,
+                                          const std::string& url,
                                           TablesMetrics* metrics_by_table_name,
                                           TablesHistMetrics* hist_metrics_by_table_name);
 
@@ -152,26 +170,33 @@ class MetricsCollector : public RefCounted<MetricsCollector> {
   static Status GetMetrics(const std::string& url, std::string* resp);
 
   // Parse metrics from http response, entities may be in different types.
-  Status ParseMetrics(const std::string& data,
+  Status ParseMetrics(NodeType node_type,
+                      const std::string& data,
                       TablesMetrics* metrics_by_table_name,
                       Metrics* host_metrics,
                       TablesHistMetrics* hist_metrics_by_table_name,
                       HistMetrics* host_hist_metrics);
-  static Status ParseServerMetrics(const JsonReader& r,
-                                   const rapidjson::Value* entity);
+  Status ParseServerMetrics(const JsonReader& r,
+                            const rapidjson::Value* entity,
+                            Metrics* host_metrics,
+                            HistMetrics* host_hist_metrics) const;
   Status ParseTableMetrics(const JsonReader& r,
                            const rapidjson::Value* entity,
                            TablesMetrics* metrics_by_table_name,
                            Metrics* host_metrics,
                            TablesHistMetrics* hist_metrics_by_table_name,
                            HistMetrics* host_hist_metrics) const;
-  static Status ParseTabletMetrics(const JsonReader& r,
-                                   const rapidjson::Value* entity);
+  Status ParseCatalogMetrics(const JsonReader& r,
+                             const rapidjson::Value* entity,
+                             Metrics* tablet_metrics,
+                             HistMetrics* tablet_hist_metrics) const;
+  Status ParseEntityMetrics(const JsonReader& r,
+                            const rapidjson::Value* entity,
+                            Metrics* kv_metrics,
+                            Metrics* merged_kv_metrics,
+                            HistMetrics* hist_metrics,
+                            HistMetrics* merged_hist_metrics) const;
 
-  // Return true when this entity could be filtered.
-  // When server side support attributes filter, this function has no effect.
-  bool FilterByAttribute(const JsonReader& r,
-                         const rapidjson::Value* entity) const;
   Status GetNumberMetricValue(const rapidjson::Value* metric,
                               const std::string& metric_name,
                               int64_t* result) const;
@@ -187,11 +212,12 @@ class MetricsCollector : public RefCounted<MetricsCollector> {
   scoped_refptr<NodesChecker> nodes_checker_;
   scoped_refptr<ReporterBase> reporter_;
 
-  std::map<std::string, MetricTypes> metric_types_by_entity_type_;
+  MetricTypes metric_types_;
   // Attribute filter, attributes not in this map will be filtered if it's not empty.
   // attribute name ---> attribute values
   std::unordered_map<std::string, std::set<std::string>> attributes_filter_;
   std::string metric_url_parameters_;
+  std::unordered_set<std::string> hosttable_metrics_;
   Metrics cluster_metrics_;
 
   CountDownLatch stop_background_threads_latch_;
diff --git a/src/kudu/collector/nodes_checker.cc b/src/kudu/collector/nodes_checker.cc
index 8b64c29..b2dacd8 100644
--- a/src/kudu/collector/nodes_checker.cc
+++ b/src/kudu/collector/nodes_checker.cc
@@ -43,9 +43,9 @@
 
 DECLARE_string(collector_cluster_name);
 DECLARE_string(collector_master_addrs);
-DECLARE_int32(collector_interval_sec);
-DECLARE_int32(collector_timeout_sec);
-DECLARE_int32(collector_warn_threshold_ms);
+DECLARE_uint32(collector_interval_sec);
+DECLARE_uint32(collector_timeout_sec);
+DECLARE_uint32(collector_warn_threshold_ms);
 
 using rapidjson::Value;
 using std::list;
@@ -108,7 +108,12 @@ string NodesChecker::ToString() const {
   return "NodesChecker";
 }
 
-vector<string> NodesChecker::GetNodes() {
+vector<string> NodesChecker::GetMasters() {
+  shared_lock<RWMutex> l(nodes_lock_);
+  return master_http_addrs_;
+}
+
+vector<string> NodesChecker::GetTServers() {
   shared_lock<RWMutex> l(nodes_lock_);
   return tserver_http_addrs_;
 }
@@ -119,6 +124,12 @@ string NodesChecker::GetFirstMaster() {
   return master_http_addrs_[0];
 }
 
+string NodesChecker::GetFirstTServer() {
+  shared_lock<RWMutex> l(nodes_lock_);
+  CHECK(!tserver_http_addrs_.empty());
+  return tserver_http_addrs_[0];
+}
+
 Status NodesChecker::StartNodesCheckerThread() {
   return Thread::Create("collector", "nodes-checker", &NodesChecker::NodesCheckerThread,
                         this, &nodes_checker_thread_);
diff --git a/src/kudu/collector/nodes_checker.h b/src/kudu/collector/nodes_checker.h
index 26aee89..8189271 100644
--- a/src/kudu/collector/nodes_checker.h
+++ b/src/kudu/collector/nodes_checker.h
@@ -49,8 +49,10 @@ class NodesChecker : public RefCounted<NodesChecker> {
 
   std::string ToString() const;
 
-  std::vector<std::string> GetNodes();
+  std::vector<std::string> GetMasters();
+  std::vector<std::string> GetTServers();
   std::string GetFirstMaster();
+  std::string GetFirstTServer();
 
  private:
   friend class RefCounted<NodesChecker>;
diff --git a/src/kudu/scripts/build_kudu.sh b/src/kudu/scripts/build_kudu.sh
index 1016697..58e92f4 100755
--- a/src/kudu/scripts/build_kudu.sh
+++ b/src/kudu/scripts/build_kudu.sh
@@ -42,6 +42,35 @@ function get_stdcpp_lib()
     fi
 }
 
+function get_system_lib()
+{
+    libname=`ldd ${BASE_DIR}/build/latest/bin/kudu 2>/dev/null | grep "lib${1}\.so"`
+    libname=`echo $libname | cut -f1 -d" "`
+    libs=(`ldconfig -p|grep $libname|awk '{print $NF}'`)
+
+    bit_mode=`getconf LONG_BIT`
+    for lib in ${libs[*]}; do
+        if [ "`check_bit $lib`" = "true" ]; then
+            echo "$lib"
+            return
+        fi
+    done;
+
+    # if get failed by ldconfig, then just extract lib from ldd result
+    libname=`ldd ${BASE_DIR}/build/latest/bin/kudu 2>/dev/null | grep "lib${1}\.so"`
+    libname=`echo $libname | cut -f3 -d" "`
+    if echo "$libname" | grep -q "lib${2}\.so"; then
+        echo "$libname"
+    fi
+}
+
+function get_system_libname()
+{
+    libname=`ldd ${BASE_DIR}/build/latest/bin/kudu 2>/dev/null | grep "lib${1}\.so"`
+    libname=`echo $libname | cut -f1 -d" "`
+    echo "$libname"
+}
+
 function check_bit()
 {
     bit_mode=`getconf LONG_BIT`
@@ -130,6 +159,7 @@ copy_file ${BASE_DIR}/build/latest/bin/kudu-master ${PACK_DIR}/kudu_master
 copy_file ${BASE_DIR}/build/latest/bin/kudu-tserver ${PACK_DIR}/kudu_tablet_server
 copy_file ${BASE_DIR}/build/latest/bin/kudu ${PACK_DIR}/
 copy_file `get_stdcpp_lib $custom_gcc` ${PACK_DIR}/
+copy_file `get_system_lib crypto` ${PACK_DIR}/`get_system_libname crypto`
 copy_file ${BASE_DIR}/src/kudu/scripts/batch_operate_on_tables.sh ${PACK_DIR}/
 copy_file ${BASE_DIR}/src/kudu/scripts/falcon_screen.json ${PACK_DIR}/
 copy_file ${BASE_DIR}/src/kudu/scripts/falcon_screen.py ${PACK_DIR}/
@@ -137,7 +167,8 @@ copy_file ${BASE_DIR}/src/kudu/scripts/kudu_falcon_screen.sh ${PACK_DIR}/
 copy_file ${BASE_DIR}/src/kudu/scripts/minos_control_server.py ${PACK_DIR}/
 copy_file ${BASE_DIR}/src/kudu/scripts/cal_bill_daily.py ${PACK_DIR}/
 copy_file ${BASE_DIR}/src/kudu/scripts/kudu_utils.py ${PACK_DIR}/
-copy_file ${BASE_DIR}/src/kudu/scripts/start_local_kudu.sh ${PACK_DIR}/
+copy_file ${BASE_DIR}/src/kudu/scripts/start_kudu.sh ${PACK_DIR}/
+copy_file ${BASE_DIR}/src/kudu/scripts/stop_kudu.sh ${PACK_DIR}/
 copy_file ${BASE_DIR}/src/kudu/scripts/kudurc ${PACK_DIR}/
 copy_file -r ${BASE_DIR}/www ${PACK_DIR}/
 cd ${BASE_DIR}/build
diff --git a/src/kudu/scripts/falcon_screen.json b/src/kudu/scripts/falcon_screen.json
index b15125c..68911d9 100644
--- a/src/kudu/scripts/falcon_screen.json
+++ b/src/kudu/scripts/falcon_screen.json
@@ -1,3 +1,4 @@
+
 {
   "comments": [
     {
@@ -16,9 +17,7 @@
   ],
   "version": "20180827",
   "counter_templates": {
-      "full": [
-          "metric=kudu-tserver-health service=kudu cluster=${cluster.name} level=${level} v=4",
-          "metric=kudu-table-health service=kudu cluster=${cluster.name} level=${level} v=4",
+      "tablet_metrics": [
           "metric=all_transactions_inflight service=kudu cluster=${cluster.name} level=${level} v=4",
           "metric=alter_schema_transactions_inflight service=kudu cluster=${cluster.name} level=${level} v=4",
           "metric=average_diskrowset_height service=kudu cluster=${cluster.name} level=${level} v=4",
@@ -45,6 +44,7 @@
           "metric=key_file_lookups_per_op_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
           "metric=key_file_lookups service=kudu cluster=${cluster.name} level=${level} v=4",
           "metric=leader_memory_pressure_rejections service=kudu cluster=${cluster.name} level=${level} v=4",
+          "metric=live_row_count service=kudu cluster=${cluster.name} level=${level} v=4",
           "metric=log_append_latency_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
           "metric=log_bytes_logged service=kudu cluster=${cluster.name} level=${level} v=4",
           "metric=log_cache_num_ops service=kudu cluster=${cluster.name} level=${level} v=4",
@@ -69,7 +69,6 @@
           "metric=op_prepare_run_time_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
           "metric=ops_behind_leader service=kudu cluster=${cluster.name} level=${level} v=4",
           "metric=raft_term service=kudu cluster=${cluster.name} level=${level} v=4",
-          "metric=replica_count service=kudu cluster=${cluster.name} level=${level} v=4",
           "metric=rows_deleted service=kudu cluster=${cluster.name} level=${level} v=4",
           "metric=rows_inserted service=kudu cluster=${cluster.name} level=${level} v=4",
           "metric=rows_updated service=kudu cluster=${cluster.name} level=${level} v=4",
@@ -97,6 +96,19 @@
           "metric=write_op_duration_commit_wait_consistency_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
           "metric=write_transactions_inflight service=kudu cluster=${cluster.name} level=${level} v=4"
       ],
+      "master_health": [
+          "metric=kudu-master-health service=kudu cluster=${cluster.name} level=host v=4"
+      ],
+      "tserver_health": [
+          "metric=kudu-tserver-health service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=merged_entities_count_of_tablet service=kudu cluster=${cluster.name} level=host v=4"
+      ],
+      "table_health": [
+          "metric=kudu-table-health service=kudu cluster=${cluster.name} level=table v=4"
+      ],
+      "host_table_distribution": [
+          "metric=merged_entities_count_of_tablet service=kudu cluster=${cluster.name} level=host_table v=4"
+      ],
       "table_ab": [
         "metric=all_transactions_inflight service=kudu cluster=${cluster.name} level=${level} v=4",
         "metric=alter_schema_transactions_inflight service=kudu cluster=${cluster.name} level=${level} v=4",
@@ -200,9 +212,6 @@
         "metric=write_op_duration_commit_wait_consistency_percentile_99 service=kudu cluster=${cluster.name} level=${level} v=4",
         "metric=write_transactions_inflight service=kudu cluster=${cluster.name} level=${level} v=4"
       ],
-      "replica_count" : [
-          "metric=replica_count service=kudu cluster=${cluster.name} level=${level} v=4"
-      ],
       "cluster" : [
           "metric=kudu.success service=kudu level=${level}",
           "metric=kudu.writeLatency service=kudu level=${level}",
@@ -218,6 +227,227 @@
         "metric=on_disk_size service=kudu cluster=${cluster.name} level=${level} v=4",
         "metric=on_disk_data_size service=kudu cluster=${cluster.name} level=${level} v=4"
       ],
+      "server_master": [
+          "metric=block_cache_evictions service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=block_cache_hits_caching service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=block_cache_hits service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=block_cache_inserts service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=block_cache_lookups service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=block_cache_misses_caching service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=block_cache_misses service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=block_cache_usage service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=block_manager_blocks_open_reading service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=block_manager_blocks_open_writing service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=block_manager_total_blocks_created service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=block_manager_total_blocks_deleted service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=block_manager_total_bytes_read service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=block_manager_total_bytes_written service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=block_manager_total_disk_sync service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=block_manager_total_readable_blocks service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=block_manager_total_writable_blocks service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=cluster_replica_skew service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=code_cache_hits service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=code_cache_queries service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=cpu_stime service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=cpu_utime service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=data_dirs_failed service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=data_dirs_full service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=file_cache_evictions service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=file_cache_hits_caching service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=file_cache_hits service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=file_cache_inserts service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=file_cache_lookups service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=file_cache_misses_caching service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=file_cache_misses service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=file_cache_usage service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=generic_current_allocated_bytes service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=generic_heap_size service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=glog_error_messages service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=glog_info_messages service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=glog_warning_messages service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_consensus_ConsensusService_BulkChangeConfig_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_consensus_ConsensusService_ChangeConfig_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_consensus_ConsensusService_GetConsensusState_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_consensus_ConsensusService_GetLastOpId_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_consensus_ConsensusService_GetNodeInstance_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_consensus_ConsensusService_LeaderStepDown_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_consensus_ConsensusService_RequestConsensusVote_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_consensus_ConsensusService_RunLeaderElection_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_consensus_ConsensusService_StartTabletCopy_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_consensus_ConsensusService_UnsafeChangeConfig_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_consensus_ConsensusService_UpdateConsensus_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_master_MasterService_AlterTable_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_master_MasterService_ChangeTServerState_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_master_MasterService_ConnectToMaster_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_master_MasterService_CreateTable_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_master_MasterService_DeleteTable_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_master_MasterService_GetMasterRegistration_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_master_MasterService_GetTableLocations_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_master_MasterService_GetTableSchema_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_master_MasterService_GetTableStatistics_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_master_MasterService_GetTabletLocations_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_master_MasterService_IsAlterTableDone_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_master_MasterService_IsCreateTableDone_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_master_MasterService_ListMasters_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_master_MasterService_ListTables_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_master_MasterService_ListTabletServers_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_master_MasterService_Ping_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_master_MasterService_ReplaceTablet_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_master_MasterService_ResetAuthzCache_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_master_MasterService_TSHeartbeat_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_server_GenericService_CheckLeaks_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_server_GenericService_DumpMemTrackers_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_server_GenericService_FlushCoverage_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_server_GenericService_GetFlags_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_server_GenericService_GetStatus_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_server_GenericService_ServerClock_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_server_GenericService_SetFlag_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_server_GenericService_SetServerWallClockForTests_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_tserver_TabletCopyService_BeginTabletCopySession_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_tserver_TabletCopyService_CheckSessionActive_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_tserver_TabletCopyService_EndTabletCopySession_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_tserver_TabletCopyService_FetchData_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=hybrid_clock_error service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=hybrid_clock_timestamp service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=involuntary_context_switches service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=log_block_manager_blocks_under_management service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=log_block_manager_bytes_under_management service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=log_block_manager_containers service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=log_block_manager_dead_containers_deleted service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=log_block_manager_full_containers service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=log_block_manager_holes_punched service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=op_apply_queue_length_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=op_apply_queue_time_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=op_apply_run_time_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=reactor_active_latency_us_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=reactor_load_percent_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=rpc_connections_accepted service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=rpc_incoming_queue_time_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=rpcs_queue_overflow service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=rpcs_timed_out_in_queue service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=spinlock_contention_time service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=tablet_copy_bytes_sent service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=tablet_copy_open_source_sessions service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=tcmalloc_current_total_thread_cache_bytes service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=tcmalloc_max_total_thread_cache_bytes service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=tcmalloc_pageheap_free_bytes service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=tcmalloc_pageheap_unmapped_bytes service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=threads_running service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=threads_started service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=voluntary_context_switches service=kudu cluster=${cluster.name} level=host v=4"
+      ],
+      "server_tserver": [
+          "metric=active_scanners service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=block_cache_evictions service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=block_cache_hits_caching service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=block_cache_hits service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=block_cache_inserts service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=block_cache_lookups service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=block_cache_misses_caching service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=block_cache_misses service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=block_cache_usage service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=block_manager_blocks_open_reading service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=block_manager_blocks_open_writing service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=block_manager_total_blocks_created service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=block_manager_total_blocks_deleted service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=block_manager_total_bytes_read service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=block_manager_total_bytes_written service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=block_manager_total_disk_sync service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=block_manager_total_readable_blocks service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=block_manager_total_writable_blocks service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=code_cache_hits service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=code_cache_queries service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=cpu_stime service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=cpu_utime service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=data_dirs_failed service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=data_dirs_full service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=file_cache_evictions service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=file_cache_hits_caching service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=file_cache_hits service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=file_cache_inserts service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=file_cache_lookups service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=file_cache_misses_caching service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=file_cache_misses service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=file_cache_usage service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=generic_current_allocated_bytes service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=generic_heap_size service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=glog_error_messages service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=glog_info_messages service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=glog_warning_messages service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_consensus_ConsensusService_BulkChangeConfig_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_consensus_ConsensusService_ChangeConfig_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_consensus_ConsensusService_GetConsensusState_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_consensus_ConsensusService_GetLastOpId_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_consensus_ConsensusService_GetNodeInstance_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_consensus_ConsensusService_LeaderStepDown_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_consensus_ConsensusService_RequestConsensusVote_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_consensus_ConsensusService_RunLeaderElection_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_consensus_ConsensusService_StartTabletCopy_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_consensus_ConsensusService_UnsafeChangeConfig_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_consensus_ConsensusService_UpdateConsensus_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_server_GenericService_CheckLeaks_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_server_GenericService_DumpMemTrackers_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_server_GenericService_FlushCoverage_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_server_GenericService_GetFlags_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_server_GenericService_GetStatus_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_server_GenericService_ServerClock_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_server_GenericService_SetFlag_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_server_GenericService_SetServerWallClockForTests_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_tserver_TabletCopyService_BeginTabletCopySession_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_tserver_TabletCopyService_CheckSessionActive_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_tserver_TabletCopyService_EndTabletCopySession_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_tserver_TabletCopyService_FetchData_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_tserver_TabletServerAdminService_AlterSchema_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_tserver_TabletServerAdminService_CreateTablet_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_tserver_TabletServerAdminService_DeleteTablet_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_tserver_TabletServerService_Checksum_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_tserver_TabletServerService_ListTablets_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_tserver_TabletServerService_Ping_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_tserver_TabletServerService_ScannerKeepAlive_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_tserver_TabletServerService_Scan_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_tserver_TabletServerService_SplitKeyRange_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=handler_latency_kudu_tserver_TabletServerService_Write_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=hybrid_clock_error service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=hybrid_clock_timestamp service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=involuntary_context_switches service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=log_block_manager_blocks_under_management service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=log_block_manager_bytes_under_management service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=log_block_manager_containers service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=log_block_manager_dead_containers_deleted service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=log_block_manager_full_containers service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=log_block_manager_holes_punched service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=op_apply_queue_length_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=op_apply_queue_time_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=op_apply_run_time_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=reactor_active_latency_us_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=reactor_load_percent_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=rpc_connections_accepted service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=rpc_incoming_queue_time_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=rpcs_queue_overflow service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=rpcs_timed_out_in_queue service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=scanner_duration_percentile_99 service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=scanners_expired service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=spinlock_contention_time service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=tablet_copy_bytes_fetched service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=tablet_copy_bytes_sent service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=tablet_copy_open_client_sessions service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=tablet_copy_open_source_sessions service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=tablets_num_bootstrapping service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=tablets_num_failed service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=tablets_num_initialized service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=tablets_num_not_initialized service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=tablets_num_running service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=tablets_num_shutdown service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=tablets_num_stopped service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=tablets_num_stopping service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=tcmalloc_current_total_thread_cache_bytes service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=tcmalloc_max_total_thread_cache_bytes service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=tcmalloc_pageheap_free_bytes service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=tcmalloc_pageheap_unmapped_bytes service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=threads_running service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=threads_started service=kudu cluster=${cluster.name} level=host v=4",
+          "metric=voluntary_context_switches service=kudu cluster=${cluster.name} level=host v=4"
+      ],
       "sys" : [
           "cpu.busy",
           "load.15min",
@@ -535,6 +765,60 @@
       ]
     },
     {
+      "screen": "${cluster.name} [nodes_healthy]",
+      "graphs": [
+        {
+          "title": "tserver健康状态",
+          "endpoints": ["${for.each.tserver}"],
+          "counters": {
+              "level": "host",
+              "template": "tserver_health"
+          },
+          "graph_type": "h",
+          "method": "",
+          "timespan": 86400
+        },
+        {
+          "title": "master健康状态",
+          "endpoints": ["${for.each.master}"],
+          "counters": {
+              "level": "host",
+              "template": "master_health"
+          },
+          "graph_type": "h",
+          "method": "",
+          "timespan": 86400
+        },
+        {
+          "title": "table健康状态",
+          "endpoints": ["${for.each.table}"],
+          "counters": {
+              "level": "table",
+              "template": "table_health"
+          },
+          "graph_type": "h",
+          "method": "",
+          "timespan": 86400
+        }
+      ]
+    },
+    {
+      "screen": "${cluster.name} [host_table_distribution]",
+      "graphs": [
+        {
+          "title": "tserver各表tablet分布",
+          "endpoints": ["${for.each.tserver} ${for.each.table}"],
+          "counters": {
+              "level": "host",
+              "template": "host_table_distribution"
+          },
+          "graph_type": "h",
+          "method": "",
+          "timespan": 86400
+        }
+      ]
+    },
+    {
       "screen": "${cluster.name} [tserver]",
       "graphs": [
         {
@@ -542,7 +826,55 @@
           "endpoints": ["${for.each.tserver}"],
           "counters": {
               "level": "host",
-              "template": "full"
+              "template": "tablet_metrics"
+          },
+          "graph_type": "h",
+          "method": "",
+          "timespan": 86400
+        }
+      ]
+    },
+    {
+      "screen": "${cluster.name} [server_tserver]",
+      "graphs": [
+        {
+          "title": "tserver Kudu服务指标",
+          "endpoints": ["${for.each.tserver}"],
+          "counters": {
+              "level": "host",
+              "template": "server_tserver"
+          },
+          "graph_type": "h",
+          "method": "",
+          "timespan": 86400
+        }
+      ]
+    },
+    {
+      "screen": "${cluster.name} [server_master]",
+      "graphs": [
+        {
+          "title": "master Kudu服务指标",
+          "endpoints": ["${for.each.master}"],
+          "counters": {
+              "level": "host",
+              "template": "server_master"
+          },
+          "graph_type": "h",
+          "method": "",
+          "timespan": 86400
+        }
+      ]
+    },
+    {
+      "screen": "${cluster.name} [master_catalog]",
+      "graphs": [
+        {
+          "title": "master catalog表指标",
+          "endpoints": ["${for.each.master}"],
+          "counters": {
+              "level": "host",
+              "template": "tablet_metrics"
           },
           "graph_type": "h",
           "method": "",
diff --git a/src/kudu/scripts/falcon_screen.py b/src/kudu/scripts/falcon_screen.py
index 26f330a..1695ca5 100755
--- a/src/kudu/scripts/falcon_screen.py
+++ b/src/kudu/scripts/falcon_screen.py
@@ -25,6 +25,11 @@ KUDU_CLUSTER_ID = 37613
 KUDU_TABLES_ID = 37638
 KUDU_TSERVER_ID = 37639
 KUDU_SYS_ID = 37640
+KUDU_SERVER_TSERVER = 40185
+KUDU_SERVER_MASTER = 40186
+KUDU_MASTER_CATALOG = 40207
+KUDU_NODES_HEALTHY = 40229
+KUDU_TSERVER_TABLET_DISTRIBUTION = 40258
 screenIdList = {
     KUDU_CLUSTER_ID: "[cluster]",
     KUDU_TABLES_ID: [
@@ -41,7 +46,13 @@ screenIdList = {
         "[metrics_u]",
         "[metrics_tw]"],
     KUDU_TSERVER_ID: "[tserver]",
-    KUDU_SYS_ID: "[server-sys]"}
+    KUDU_SYS_ID: "[server-sys]",
+    KUDU_SERVER_TSERVER: "[server_tserver]",
+    KUDU_SERVER_MASTER: "[server_master]",
+    KUDU_MASTER_CATALOG: "[master_catalog]",
+    KUDU_NODES_HEALTHY: "[nodes_healthy]",
+    KUDU_TSERVER_TABLET_DISTRIBUTION: "[host_table_distribution]"
+}
 # kuduScreenId = 351
 sessionId = ""
 metaPort = ""
@@ -185,7 +196,6 @@ def parse_lines(file_name):
 # return: screenConfigs
 def prepare_screen_config(
         clusterName,
-        templateName,
         screenTemplateFile,
         tableListFile,
         masterListFile,
@@ -276,7 +286,7 @@ def prepare_screen_config(
                     "ERROR: bad json: [details][%s][graphs][%s]: [counters] should be provided as non-empty list/dict" %
                     (screen, title))
                 sys.exit(1)
-            for counter in templateJson[counters["template"] if counters.has_key("template") else templateName]:
+            for counter in templateJson[counters["template"]]:
                 newCounters.append(
                     counter.replace(
                         "${cluster.name}",
@@ -550,31 +560,30 @@ if __name__ == '__main__':
             sys.argv[0])
         sys.exit(1)
 
-    if len(sys.argv) != 7:
+    if len(sys.argv) != 6:
         print(
-            "USAGE: python %s <cluster_name> <template_name> <screen_template_file> <master_list_file> <tserver_list_file> <table_list_file>" %
+            "USAGE: python %s <cluster_name> <screen_template_file> <master_list_file> <tserver_list_file> <table_list_file>" %
             sys.argv[0])
         sys.exit(1)
 
     clusterName = sys.argv[1]
-    templateName = sys.argv[2]
-    screenTemplateFile = sys.argv[3]
-    masterListFile = sys.argv[4]
-    tserverListFile = sys.argv[5]
-    tableListFile = sys.argv[6]
+    screenTemplateFile = sys.argv[2]
+    masterListFile = sys.argv[3]
+    tserverListFile = sys.argv[4]
+    tableListFile = sys.argv[5]
+
+    screenConfigs = prepare_screen_config(
+        clusterName,
+        screenTemplateFile,
+        tableListFile,
+        masterListFile,
+        tserverListFile)
 
     login()
 
     for scrid, scrNames in screenIdList.items():
         oldKuduScreens = get_kudu_screens(scrid)
         oldScreenName2Id = {}
-        screenConfigs = prepare_screen_config(
-            clusterName,
-            templateName,
-            screenTemplateFile,
-            tableListFile,
-            masterListFile,
-            tserverListFile)
         for oldScreen in oldKuduScreens:
             oldScreenName2Id[oldScreen['name']] = oldScreen['id']
         if scrid == KUDU_TABLES_ID:
diff --git a/src/kudu/scripts/kudu_falcon_screen.sh b/src/kudu/scripts/kudu_falcon_screen.sh
index 046352b..7dd2f90 100755
--- a/src/kudu/scripts/kudu_falcon_screen.sh
+++ b/src/kudu/scripts/kudu_falcon_screen.sh
@@ -3,11 +3,6 @@
 PID=$$
 BASE_DIR="$( cd "$( dirname "$0" )" && pwd )"
 KUDU=${KUDU_HOME}/kudu
-COLLECTOR=${KUDU_HOME}/kudu_collector
-if [[ ! -f ${KUDU} || ! -f ${COLLECTOR} ]]; then
-  echo "ERROR: ${KUDU} or ${COLLECTOR} not found"
-  exit 1
-fi
 KUDURC=${KUDU_CONFIG}/kudurc
 if [[ ! -f ${KUDURC} ]]; then
   echo "ERROR: ${KUDURC} not found"
@@ -17,10 +12,8 @@ fi
 function usage() {
 cat << EOF
 This tool is for update falcon screen for specified kudu cluster.
-USAGE: $0 <cluster_name> [table_count] [metrics_template]
+USAGE: $0 <cluster_name>
        cluster_name       Cluster name operated on, should be configurated in $KUDU_CONFIG/kudurc
-       table_count        An indicator of how many tables will be monitored, actual monitored table count is in range [table_count, 3*table_count]
-       metrics_template   Which metric template will be used, 'simple' or 'full'
 EOF
 }
 
@@ -31,28 +24,10 @@ then
 fi
 
 CLUSTER=$1
-TABLE_COUNT=9999
-if [[ $# -ge 2 ]]
-then
-  TABLE_COUNT=$2
-fi
-
-TEMPLATE_NAME='full'
-if [[ $# -ge 3 ]]
-then
-  TEMPLATE_NAME=$3
-fi
-if [[ "${TEMPLATE_NAME}"x != "simple"x && "${TEMPLATE_NAME}"x != "full"x ]]
-then
-  usage
-  exit 1
-fi
 
 echo "UID: ${UID}"
 echo "PID: ${PID}"
 echo "cluster: ${CLUSTER}"
-echo "top n table: ${TABLE_COUNT}"
-echo "metric template: ${TEMPLATE_NAME}"
 echo "Start time: `date`"
 ALL_START_TIME=$((`date +%s`))
 echo
@@ -83,29 +58,11 @@ if [[ ${TSERVER_COUNT} -eq 0 ]]; then
     exit 1
 fi
 
-function parse_yaml() {
-  python -c "import yaml;print(yaml.load(open('$1').read(), Loader=yaml.FullLoader)['clusters_info']['$2']['master_addresses'])"
-}
-MASTERS=$(parse_yaml ${KUDURC} ${CLUSTER})
-
 # get table list
-${COLLECTOR} -collector_master_addrs=${MASTERS} -collector_cluster_name=${CLUSTER} -collector_report_method=local -collector_metrics=bytes_flushed,on_disk_size,scanner_bytes_returned -log_dir=./log > /tmp/${UID}.${PID}.kudu.metric_table_value
-if [[ $? -ne 0 ]]; then
-    echo "ERROR: ${COLLECTOR} execute failed"
-    exit 1
-fi
-
-cat /tmp/${UID}.${PID}.kudu.metric_table_value | egrep "^table bytes_flushed " | sort -rnk4 | head -n ${TABLE_COUNT} | awk '{print $3}' > /tmp/${UID}.${PID}.kudu.top.bytes_flushed
-cat /tmp/${UID}.${PID}.kudu.metric_table_value | egrep "^table on_disk_size " | sort -rnk4 | head -n ${TABLE_COUNT} | awk '{print $3}' > /tmp/${UID}.${PID}.kudu.top.on_disk_size
-cat /tmp/${UID}.${PID}.kudu.metric_table_value | egrep "^table scanner_bytes_returned " | sort -rnk4 | head -n ${TABLE_COUNT} | awk '{print $3}' > /tmp/${UID}.${PID}.kudu.top.scanner_bytes_returned
-cat /tmp/${UID}.${PID}.kudu.top.* | sort -n | uniq > /tmp/${UID}.${PID}.kudu.table.list
+${KUDU} table list @${CLUSTER} | sort -n &>/tmp/${UID}.${PID}.kudu.table.list
 echo "total `wc -l /tmp/${UID}.${PID}.kudu.table.list | awk '{print $1}'` tables to monitor"
-echo -e "\033[32m Please set the following one line to the kudu collector's \`collector_attributes\` argument manually\033[0m"
-echo -n "table_name:"
-awk BEGIN{RS=EOF}'{gsub(/\n/,",");print}' /tmp/${UID}.${PID}.kudu.table.list
-echo ""
 
-python ${BASE_DIR}/falcon_screen.py ${CLUSTER} ${TEMPLATE_NAME} ${BASE_DIR}/falcon_screen.json /tmp/${UID}.${PID}.kudu.master.list /tmp/${UID}.${PID}.kudu.tserver.list /tmp/${UID}.${PID}.kudu.table.list
+python ${BASE_DIR}/falcon_screen.py ${CLUSTER} ${BASE_DIR}/falcon_screen.json /tmp/${UID}.${PID}.kudu.master.list /tmp/${UID}.${PID}.kudu.tserver.list /tmp/${UID}.${PID}.kudu.table.list
 if [[ $? -ne 0 ]]; then
     echo "ERROR: falcon screen operate failed"
     exit 1
diff --git a/src/kudu/util/metrics.h b/src/kudu/util/metrics.h
index ab31113..ea010b2 100644
--- a/src/kudu/util/metrics.h
+++ b/src/kudu/util/metrics.h
@@ -1239,6 +1239,7 @@ class FunctionGauge : public Gauge {
   // This should be used during destruction. If you want a settable
   // Gauge, use a normal Gauge instead of a FunctionGauge.
   void DetachToConstant(T v) {
+    UpdateModificationEpoch();
     std::lock_guard<simple_spinlock> l(lock_);
     function_ = [v]() { return v; };
   }


Mime
View raw message