kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jdcry...@apache.org
Subject [8/9] incubator-kudu git commit: ksck: improve filtering capability
Date Fri, 22 Jul 2016 22:02:28 GMT
ksck: improve filtering capability

- filters can now use glob-like pattern syntax
- filters now apply for the metadata checks, not just the checksums

Change-Id: Ic6ef8ab20679a9967c321cd4f8412ea4ea5fd50d
Reviewed-on: http://gerrit.cloudera.org:8080/3716
Tested-by: Kudu Jenkins
Reviewed-by: Jean-Daniel Cryans <jdcryans@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/incubator-kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kudu/commit/e8ddee80
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kudu/tree/e8ddee80
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kudu/diff/e8ddee80

Branch: refs/heads/master
Commit: e8ddee8047b33e3dd1d97f25ef8773242f64212d
Parents: f9928e8
Author: Todd Lipcon <todd@apache.org>
Authored: Wed Jul 20 19:06:23 2016 -0700
Committer: Jean-Daniel Cryans <jdcryans@apache.org>
Committed: Fri Jul 22 20:43:08 2016 +0000

----------------------------------------------------------------------
 src/kudu/integration-tests/cluster_verifier.cc |  5 +-
 src/kudu/tools/ksck-test.cc                    | 30 +++++++-
 src/kudu/tools/ksck.cc                         | 85 ++++++++++++---------
 src/kudu/tools/ksck.h                          | 29 +++++--
 src/kudu/tools/ksck_remote-test.cc             | 14 +---
 src/kudu/tools/kudu-ksck.cc                    |  7 +-
 6 files changed, 110 insertions(+), 60 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/e8ddee80/src/kudu/integration-tests/cluster_verifier.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/cluster_verifier.cc b/src/kudu/integration-tests/cluster_verifier.cc
index 64d406a..f156073 100644
--- a/src/kudu/integration-tests/cluster_verifier.cc
+++ b/src/kudu/integration-tests/cluster_verifier.cc
@@ -94,10 +94,7 @@ Status ClusterVerifier::DoKsck() {
   RETURN_NOT_OK(ksck->FetchTableAndTabletInfo());
   RETURN_NOT_OK(ksck->FetchInfoFromTabletServers());
   RETURN_NOT_OK(ksck->CheckTablesConsistency());
-
-  vector<string> tables;
-  vector<string> tablets;
-  RETURN_NOT_OK(ksck->ChecksumData(tables, tablets, checksum_options_));
+  RETURN_NOT_OK(ksck->ChecksumData(checksum_options_));
   return Status::OK();
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/e8ddee80/src/kudu/tools/ksck-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/ksck-test.cc b/src/kudu/tools/ksck-test.cc
index acc1ea4..f542c14 100644
--- a/src/kudu/tools/ksck-test.cc
+++ b/src/kudu/tools/ksck-test.cc
@@ -321,7 +321,7 @@ TEST_F(KsckTest, TestZeroTableCheck) {
 TEST_F(KsckTest, TestOneTableCheck) {
   CreateOneTableOneTablet();
   ASSERT_OK(RunKsck());
-  ASSERT_OK(ksck_->ChecksumData({}, {}, ChecksumOptions()));
+  ASSERT_OK(ksck_->ChecksumData(ChecksumOptions()));
   ASSERT_STR_CONTAINS(err_stream_.str(),
                       "0/1 replicas remaining (20B from disk, 10 rows summed)");
 }
@@ -329,9 +329,35 @@ TEST_F(KsckTest, TestOneTableCheck) {
 TEST_F(KsckTest, TestOneSmallReplicatedTable) {
   CreateOneSmallReplicatedTable();
   ASSERT_OK(RunKsck());
-  ASSERT_OK(ksck_->ChecksumData({}, {}, ChecksumOptions()));
+  ASSERT_OK(ksck_->ChecksumData(ChecksumOptions()));
   ASSERT_STR_CONTAINS(err_stream_.str(),
                       "0/9 replicas remaining (180B from disk, 90 rows summed)");
+
+  // Test filtering (a non-matching pattern)
+  err_stream_.str("");
+  ksck_->set_table_filters({"xyz"});
+  ASSERT_OK(RunKsck());
+  Status s = ksck_->ChecksumData(ChecksumOptions());
+  EXPECT_EQ("Not found: No tablet replicas found. Filter: table_filters=xyz", s.ToString());
+  ASSERT_STR_CONTAINS(err_stream_.str(),
+                      "INFO: The cluster doesn't have any matching tables");
+
+  // Test filtering with a matching table pattern.
+  err_stream_.str("");
+  ksck_->set_table_filters({"te*"});
+  ASSERT_OK(RunKsck());
+  ASSERT_OK(ksck_->ChecksumData(ChecksumOptions()));
+  ASSERT_STR_CONTAINS(err_stream_.str(),
+                      "0/9 replicas remaining (180B from disk, 90 rows summed)");
+
+  // Test filtering with a matching tablet ID pattern.
+  err_stream_.str("");
+  ksck_->set_table_filters({});
+  ksck_->set_tablet_id_filters({"*-id-2"});
+  ASSERT_OK(RunKsck());
+  ASSERT_OK(ksck_->ChecksumData(ChecksumOptions()));
+  ASSERT_STR_CONTAINS(err_stream_.str(),
+                      "0/3 replicas remaining (60B from disk, 30 rows summed)");
 }
 
 TEST_F(KsckTest, TestOneOneTabletBrokenTable) {

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/e8ddee80/src/kudu/tools/ksck.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/ksck.cc b/src/kudu/tools/ksck.cc
index 06eb3cd..3a21711 100644
--- a/src/kudu/tools/ksck.cc
+++ b/src/kudu/tools/ksck.cc
@@ -21,7 +21,6 @@
 #include <glog/logging.h>
 #include <iostream>
 #include <mutex>
-#include <unordered_set>
 
 #include "kudu/consensus/quorum_util.h"
 #include "kudu/gutil/map-util.h"
@@ -29,6 +28,7 @@
 #include "kudu/gutil/strings/join.h"
 #include "kudu/gutil/strings/human_readable.h"
 #include "kudu/gutil/strings/substitute.h"
+#include "kudu/gutil/strings/util.h"
 #include "kudu/util/atomic.h"
 #include "kudu/util/blocking_queue.h"
 #include "kudu/util/locks.h"
@@ -82,6 +82,20 @@ static ostream& Error() {
   return Out() << "ERROR: ";
 }
 
+namespace {
+// Return true if 'str' matches any of the patterns in 'patterns', or if
+// 'patterns' is empty.
+bool MatchesAnyPattern(const vector<string>& patterns, const string& str) {
+  // Consider no filter a wildcard.
+  if (patterns.empty()) return true;
+
+  for (const auto& p : patterns) {
+    if (MatchPattern(str, p)) return true;
+  }
+  return false;
+}
+} // anonymous namespace
+
 ChecksumOptions::ChecksumOptions()
     : timeout(MonoDelta::FromSeconds(FLAGS_checksum_timeout_sec)),
       scan_concurrency(FLAGS_checksum_scan_concurrency),
@@ -196,28 +210,30 @@ Status Ksck::ConnectToTabletServer(const shared_ptr<KsckTabletServer>&
ts) {
 }
 
 Status Ksck::CheckTablesConsistency() {
-  VLOG(1) << "Getting the tables list";
-  int tables_count = cluster_->tables().size();
-  VLOG(1) << Substitute("List of $0 table(s) retrieved", tables_count);
-
-  if (tables_count == 0) {
-    Info() << "The cluster doesn't have any tables" << endl;
-    return Status::OK();
-  }
-
-  VLOG(1) << "Verifying each table";
+  int tables_checked = 0;
   int bad_tables_count = 0;
   for (const shared_ptr<KsckTable> &table : cluster_->tables()) {
+    if (!MatchesAnyPattern(table_filters_, table->name())) {
+      VLOG(1) << "Skipping table " << table->name();
+      continue;
+    }
+    tables_checked++;
     if (!VerifyTable(table)) {
       bad_tables_count++;
     }
   }
+
+  if (tables_checked == 0) {
+    Info() << "The cluster doesn't have any matching tables" << endl;
+    return Status::OK();
+  }
+
   if (bad_tables_count == 0) {
-    Info() << Substitute("The metadata for $0 table(s) is HEALTHY", tables_count) <<
endl;
+    Info() << Substitute("The metadata for $0 table(s) is HEALTHY", tables_checked)
<< endl;
     return Status::OK();
   } else {
     Warn() << Substitute("$0 out of $1 table(s) are not in a healthy state",
-                         bad_tables_count, tables_count) << endl;
+                         bad_tables_count, tables_checked) << endl;
     return Status::Corruption(Substitute("$0 table(s) are bad", bad_tables_count));
   }
 }
@@ -359,12 +375,7 @@ class TabletServerChecksumCallbacks : public ChecksumProgressCallbacks
{
   std::string tablet_id_;
 };
 
-Status Ksck::ChecksumData(const vector<string>& tables,
-                          const vector<string>& tablets,
-                          const ChecksumOptions& opts) {
-  const unordered_set<string> tables_filter(tables.begin(), tables.end());
-  const unordered_set<string> tablets_filter(tablets.begin(), tablets.end());
-
+Status Ksck::ChecksumData(const ChecksumOptions& opts) {
   // Copy options so that local modifications can be made and passed on.
   ChecksumOptions options = opts;
 
@@ -374,23 +385,23 @@ Status Ksck::ChecksumData(const vector<string>& tables,
   int num_tablet_replicas = 0;
   for (const shared_ptr<KsckTable>& table : cluster_->tables()) {
     VLOG(1) << "Table: " << table->name();
-    if (!tables_filter.empty() && !ContainsKey(tables_filter, table->name()))
continue;
+    if (!MatchesAnyPattern(table_filters_, table->name())) continue;
     for (const shared_ptr<KsckTablet>& tablet : table->tablets()) {
       VLOG(1) << "Tablet: " << tablet->id();
-      if (!tablets_filter.empty() && !ContainsKey(tablets_filter, tablet->id()))
continue;
+      if (!MatchesAnyPattern(tablet_id_filters_, tablet->id())) continue;
       InsertOrDie(&tablet_table_map, tablet, table);
       num_tablet_replicas += tablet->replicas().size();
     }
   }
   if (num_tablet_replicas == 0) {
     string msg = "No tablet replicas found.";
-    if (!tables.empty() || !tablets.empty()) {
+    if (!table_filters_.empty() || !tablet_id_filters_.empty()) {
       msg += " Filter: ";
-      if (!tables.empty()) {
-        msg += "tables=" + JoinStrings(tables, ",") + ".";
+      if (!table_filters_.empty()) {
+        msg += "table_filters=" + JoinStrings(table_filters_, ",");
       }
-      if (!tablets.empty()) {
-        msg += "tablets=" + JoinStrings(tablets, ",") + ".";
+      if (!tablet_id_filters_.empty()) {
+        msg += "tablet_id_filters=" + JoinStrings(tablet_id_filters_, ",");
       }
     }
     return Status::NotFound(msg);
@@ -522,24 +533,30 @@ Status Ksck::ChecksumData(const vector<string>& tables,
 
 bool Ksck::VerifyTable(const shared_ptr<KsckTable>& table) {
   bool good_table = true;
-  vector<shared_ptr<KsckTablet> > tablets = table->tablets();
-  int tablets_count = tablets.size();
-  if (tablets_count == 0) {
-    Warn() << Substitute("Table $0 has 0 tablets", table->name()) << endl;
-    return false;
+  const auto all_tablets = table->tablets();
+  vector<shared_ptr<KsckTablet>> tablets;
+  std::copy_if(all_tablets.begin(), all_tablets.end(), std::back_inserter(tablets),
+                 [&](const shared_ptr<KsckTablet>& t) {
+                   return MatchesAnyPattern(tablet_id_filters_, t->id());
+                 });
+
+  if (tablets.empty()) {
+    Info() << Substitute("Table $0 has 0 matching tablets", table->name()) <<
endl;
+    return true;
   }
   int table_num_replicas = table->num_replicas();
   VLOG(1) << Substitute("Verifying $0 tablets for table $1 configured with num_replicas
= $2",
-                        tablets_count, table->name(), table_num_replicas);
+                        tablets.size(), table->name(), table_num_replicas);
+
   int bad_tablets_count = 0;
-  // TODO check if the tablets are contiguous and in order.
   for (const shared_ptr<KsckTablet> &tablet : tablets) {
     if (!VerifyTablet(tablet, table_num_replicas)) {
       bad_tablets_count++;
     }
   }
   if (bad_tablets_count == 0) {
-    Info() << Substitute("Table $0 is HEALTHY", table->name()) << endl;
+    Info() << Substitute("Table $0 is HEALTHY ($1 tablets checked)",
+                         table->name(), tablets.size()) << endl;
   } else {
     Warn() << Substitute("Table $0 has $1 bad tablets", table->name(), bad_tablets_count)
<< endl;
     good_table = false;

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/e8ddee80/src/kudu/tools/ksck.h
----------------------------------------------------------------------
diff --git a/src/kudu/tools/ksck.h b/src/kudu/tools/ksck.h
index 2976944..cb78686 100644
--- a/src/kudu/tools/ksck.h
+++ b/src/kudu/tools/ksck.h
@@ -332,6 +332,24 @@ class Ksck {
     check_replica_count_ = check;
   }
 
+  // Setters for filtering the tables/tablets to be checked.
+  //
+  // Filter strings are glob-style patterns. For example, 'Foo*' matches
+  // all tables whose name begins with 'Foo'.
+  //
+  // If tables is not empty, checks only the named tables.
+  // If tablets is not empty, checks only the specified tablet IDs.
+  // If both are specified, takes the intersection.
+  // If both are empty (unset), all tables and tablets are checked.
+  void set_table_filters(vector<string> table_names) {
+    table_filters_ = std::move(table_names);
+  }
+
+  // See above.
+  void set_tablet_id_filters(vector<string> tablet_ids) {
+    tablet_id_filters_ = std::move(tablet_ids);
+  }
+
   // Verifies that it can connect to the master.
   Status CheckMasterRunning();
 
@@ -353,14 +371,8 @@ class Ksck {
   Status CheckTablesConsistency();
 
   // Verifies data checksums on all tablets by doing a scan of the database on each replica.
-  // If tables is not empty, checks only the named tables.
-  // If tablets is not empty, checks only the specified tablets.
-  // If both are specified, takes the intersection.
-  // If both are empty, all tables and tablets are checked.
   // Must first call FetchTableAndTabletInfo().
-  Status ChecksumData(const std::vector<std::string>& tables,
-                      const std::vector<std::string>& tablets,
-                      const ChecksumOptions& options);
+  Status ChecksumData(const ChecksumOptions& options);
 
  private:
   bool VerifyTable(const std::shared_ptr<KsckTable>& table);
@@ -372,6 +384,9 @@ class Ksck {
   const std::shared_ptr<KsckCluster> cluster_;
 
   bool check_replica_count_ = true;
+  vector<string> table_filters_;
+  vector<string> tablet_id_filters_;
+
   DISALLOW_COPY_AND_ASSIGN(Ksck);
 };
 } // namespace tools

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/e8ddee80/src/kudu/tools/ksck_remote-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/ksck_remote-test.cc b/src/kudu/tools/ksck_remote-test.cc
index fd22975..1528930 100644
--- a/src/kudu/tools/ksck_remote-test.cc
+++ b/src/kudu/tools/ksck_remote-test.cc
@@ -235,9 +235,7 @@ TEST_F(RemoteKsckTest, TestChecksum) {
     ASSERT_OK(ksck_->FetchTableAndTabletInfo());
 
     err_stream_.str("");
-    s = ksck_->ChecksumData(vector<string>(),
-                            vector<string>(),
-                            ChecksumOptions(MonoDelta::FromSeconds(1), 16, false, 0));
+    s = ksck_->ChecksumData(ChecksumOptions(MonoDelta::FromSeconds(1), 16, false, 0));
     if (s.ok()) {
       // Check the status message at the end of the checksum.
       // We expect '0B from disk' because we didn't write enough data to trigger a flush
@@ -259,9 +257,7 @@ TEST_F(RemoteKsckTest, TestChecksumTimeout) {
   ASSERT_OK(GenerateRowWrites(num_writes));
   ASSERT_OK(ksck_->FetchTableAndTabletInfo());
   // Use an impossibly low timeout value of zero!
-  Status s = ksck_->ChecksumData(vector<string>(),
-                                 vector<string>(),
-                                 ChecksumOptions(MonoDelta::FromNanoseconds(0), 16, false,
0));
+  Status s = ksck_->ChecksumData(ChecksumOptions(MonoDelta::FromNanoseconds(0), 16, false,
0));
   ASSERT_TRUE(s.IsTimedOut()) << "Expected TimedOut Status, got: " << s.ToString();
 }
 
@@ -286,8 +282,7 @@ TEST_F(RemoteKsckTest, TestChecksumSnapshot) {
   // Remove this loop when that is done. See KUDU-1056.
   while (true) {
     ASSERT_OK(ksck_->FetchTableAndTabletInfo());
-    Status s = ksck_->ChecksumData(vector<string>(), vector<string>(),
-                                   ChecksumOptions(MonoDelta::FromSeconds(10), 16, true,
ts));
+    Status s = ksck_->ChecksumData(ChecksumOptions(MonoDelta::FromSeconds(10), 16, true,
ts));
     if (s.ok()) break;
     if (deadline.ComesBefore(MonoTime::Now(MonoTime::FINE))) break;
     SleepFor(MonoDelta::FromMilliseconds(10));
@@ -319,8 +314,7 @@ TEST_F(RemoteKsckTest, DISABLED_TestChecksumSnapshotCurrentTimestamp)
{
   CHECK(started_writing.WaitFor(MonoDelta::FromSeconds(30)));
 
   ASSERT_OK(ksck_->FetchTableAndTabletInfo());
-  ASSERT_OK(ksck_->ChecksumData(vector<string>(), vector<string>(),
-                                ChecksumOptions(MonoDelta::FromSeconds(10), 16, true,
+  ASSERT_OK(ksck_->ChecksumData(ChecksumOptions(MonoDelta::FromSeconds(10), 16, true,
                                                 ChecksumOptions::kCurrentTimestamp)));
   continue_writing.Store(false);
   ASSERT_OK(promise.Get());

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/e8ddee80/src/kudu/tools/kudu-ksck.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/kudu-ksck.cc b/src/kudu/tools/kudu-ksck.cc
index e704f2e..70a6ce1 100644
--- a/src/kudu/tools/kudu-ksck.cc
+++ b/src/kudu/tools/kudu-ksck.cc
@@ -90,6 +90,9 @@ static void RunKsck(vector<string>* error_messages) {
   shared_ptr<KsckCluster> cluster(new KsckCluster(master));
   shared_ptr<Ksck> ksck(new Ksck(cluster));
 
+  ksck->set_table_filters(strings::Split(FLAGS_tables, ",", strings::SkipEmpty()));
+  ksck->set_tablet_id_filters(strings::Split(FLAGS_tablets, ",", strings::SkipEmpty()));
+
   // This is required for everything below.
   PUSH_PREPEND_NOT_OK(ksck->CheckMasterRunning(), error_messages,
                       "Master aliveness check error");
@@ -108,9 +111,7 @@ static void RunKsck(vector<string>* error_messages) {
                       "Table consistency check error");
 
   if (FLAGS_checksum_scan) {
-    vector<string> tables = strings::Split(FLAGS_tables, ",", strings::SkipEmpty());
-    vector<string> tablets = strings::Split(FLAGS_tablets, ",", strings::SkipEmpty());
-    PUSH_PREPEND_NOT_OK(ksck->ChecksumData(tables, tablets, ChecksumOptions()),
+    PUSH_PREPEND_NOT_OK(ksck->ChecksumData(ChecksumOptions()),
                         error_messages, "Checksum scan error");
   }
 }


Mime
View raw message