kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From danburk...@apache.org
Subject kudu git commit: hms-tool: filter non-Kudu tables in the HMS
Date Fri, 13 Jul 2018 02:34:52 GMT
Repository: kudu
Updated Branches:
  refs/heads/master f62bbdd64 -> d074de85d


hms-tool: filter non-Kudu tables in the HMS

Renames HmsCatalog::RetrieveTables to GetKuduTables and changes the
semantics such that only Kudu tables are returned. The only caller was
the HMS tool, and it only needs to inspect Kudu tables.

Special APIs are added to the HMS client and HMS catalog layers that
allow sending a filter to the HMS to strip out non-Kudu tables when
listing tables, as well as a bulk-get API. The combination of these APIs
should be significantly more efficient than issuing a get for every
single table in the HMS and doing Kudu-side filtering.

Also included are some style and formatting fixups in HMS tool.

This patch includes no functional changes to the HMS tool.

Change-Id: I5f83d2e705ea6910a9aa0a1eda0d30b5feb2607b
Reviewed-on: http://gerrit.cloudera.org:8080/10934
Reviewed-by: Adar Dembo <adar@cloudera.com>
Tested-by: Kudu Jenkins


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/d074de85
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/d074de85
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/d074de85

Branch: refs/heads/master
Commit: d074de85dd19697444bb48eabe5b5c577a579d07
Parents: f62bbdd
Author: Dan Burkert <danburkert@apache.org>
Authored: Tue Jul 3 17:12:59 2018 -0700
Committer: Dan Burkert <danburkert@apache.org>
Committed: Fri Jul 13 02:34:34 2018 +0000

----------------------------------------------------------------------
 src/kudu/hms/hms_catalog-test.cc                | 37 +++++-----
 src/kudu/hms/hms_catalog.cc                     | 27 +++++--
 src/kudu/hms/hms_catalog.h                      | 12 ++-
 src/kudu/hms/hms_client-test.cc                 | 49 ++++++++++---
 src/kudu/hms/hms_client.cc                      | 30 +++++++-
 src/kudu/hms/hms_client.h                       | 19 ++++-
 src/kudu/integration-tests/master_hms-itest.cc  |  2 +-
 .../mini-cluster/external_mini_cluster-test.cc  |  2 +-
 src/kudu/tools/kudu-tool-test.cc                |  6 +-
 src/kudu/tools/tool_action_hms.cc               | 77 +++++++++-----------
 10 files changed, 167 insertions(+), 94 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/d074de85/src/kudu/hms/hms_catalog-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/hms/hms_catalog-test.cc b/src/kudu/hms/hms_catalog-test.cc
index 472f3a8..4574402 100644
--- a/src/kudu/hms/hms_catalog-test.cc
+++ b/src/kudu/hms/hms_catalog-test.cc
@@ -396,36 +396,39 @@ TEST_F(HmsCatalogTest, TestExternalTable) {
   NO_FATALS(CheckTableDoesNotExist("default", "bogus_table_name"));
 }
 
-TEST_F(HmsCatalogTest, TestRetrieveTables) {
+TEST_F(HmsCatalogTest, TestGetKuduTables) {
   const string kHmsDatabase = "db";
   const string kManagedTableName = "managed_table";
   const string kExternalTableName = "external_table";
+  const string kTableName = "external_table";
   const string kNonKuduTableName = "non_kudu_table";
 
-  // Create a Impala managed table, a external table and a non Kudu table.
+  // Create a legacy Impala managed table, a legacy Impala external table, a
+  // Kudu table, and a non Kudu table.
   hive::Database db;
-  db.name = kHmsDatabase;
+  db.name = "db";
   ASSERT_OK(hms_client_->CreateDatabase(db));
-  ASSERT_OK(CreateLegacyTable(kHmsDatabase,
-                              kManagedTableName,
-                              HmsClient::kManagedTable));
+  ASSERT_OK(CreateLegacyTable("db", "managed_table", HmsClient::kManagedTable));
   hive::Table table;
-  ASSERT_OK(hms_client_->GetTable(kHmsDatabase, kManagedTableName, &table));
-  ASSERT_OK(CreateLegacyTable(kHmsDatabase,
-                              kExternalTableName,
-                              HmsClient::kExternalTable));
-  ASSERT_OK(hms_client_->GetTable(kHmsDatabase, kExternalTableName, &table));
+  ASSERT_OK(hms_client_->GetTable(kHmsDatabase, "managed_table", &table));
+  ASSERT_OK(CreateLegacyTable("db", "external_table", HmsClient::kExternalTable));
+  ASSERT_OK(hms_client_->GetTable("db", "external_table", &table));
+
+  ASSERT_OK(hms_catalog_->CreateTable("fake-id", "db.table", Schema()));
 
   hive::Table non_kudu_table;
-  non_kudu_table.dbName = kHmsDatabase;
-  non_kudu_table.tableName = kNonKuduTableName;
+  non_kudu_table.dbName = "db";
+  non_kudu_table.tableName = "non_kudu_table";
   ASSERT_OK(hms_client_->CreateTable(non_kudu_table));
-  ASSERT_OK(hms_client_->GetTable(kHmsDatabase, kNonKuduTableName, &table));
+  ASSERT_OK(hms_client_->GetTable("db", "non_kudu_table", &table));
 
   // Retrieve all tables and ensure all entries are found.
-  vector<hive::Table> hms_tables;
-  ASSERT_OK(hms_catalog_->RetrieveTables(&hms_tables));
-  ASSERT_EQ(3, hms_tables.size());
+  vector<hive::Table> kudu_tables;
+  ASSERT_OK(hms_catalog_->GetKuduTables(&kudu_tables));
+  ASSERT_EQ(3, kudu_tables.size());
+  for (const auto& kudu_table : kudu_tables) {
+    ASSERT_FALSE(kudu_table.tableName == "non_kudu_table") << kudu_table;
+  }
 }
 
 // Checks that the HmsCatalog handles reconnecting to the metastore after a connection failure.

http://git-wip-us.apache.org/repos/asf/kudu/blob/d074de85/src/kudu/hms/hms_catalog.cc
----------------------------------------------------------------------
diff --git a/src/kudu/hms/hms_catalog.cc b/src/kudu/hms/hms_catalog.cc
index fe904cd..11faec0 100644
--- a/src/kudu/hms/hms_catalog.cc
+++ b/src/kudu/hms/hms_catalog.cc
@@ -209,17 +209,28 @@ Status HmsCatalog::DowngradeToLegacyImpalaTable(const std::string&
name) {
   });
 }
 
-Status HmsCatalog::RetrieveTables(vector<hive::Table>* hms_tables) {
+Status HmsCatalog::GetKuduTables(vector<hive::Table>* kudu_tables) {
   return Execute([&] (HmsClient* client) {
     vector<string> database_names;
     RETURN_NOT_OK(client->GetAllDatabases(&database_names));
-    for (const auto &database_name : database_names) {
-      vector<string> table_names;
-      RETURN_NOT_OK(client->GetAllTables(database_name, &table_names));
-      for (const auto &table_name : table_names) {
-        hive::Table hms_table;
-        RETURN_NOT_OK(client->GetTable(database_name, table_name, &hms_table));
-        hms_tables->emplace_back(move(hms_table));
+    vector<string> table_names;
+    vector<hive::Table> tables;
+
+    for (const auto& database_name : database_names) {
+      table_names.clear();
+      tables.clear();
+      RETURN_NOT_OK(client->GetTableNames(
+            database_name,
+            Substitute("$0$1 = \"$2\" OR $0$1 = \"$3\"",
+              HmsClient::kHiveFilterFieldParams,
+              HmsClient::kStorageHandlerKey,
+              HmsClient::kKuduStorageHandler,
+              HmsClient::kLegacyKuduStorageHandler),
+            &table_names));
+
+      if (!table_names.empty()) {
+        RETURN_NOT_OK(client->GetTables(database_name, table_names, &tables));
+        std::move(tables.begin(), tables.end(), std::back_inserter(*kudu_tables));
       }
     }
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/d074de85/src/kudu/hms/hms_catalog.h
----------------------------------------------------------------------
diff --git a/src/kudu/hms/hms_catalog.h b/src/kudu/hms/hms_catalog.h
index d40fb3c..f75ee3e 100644
--- a/src/kudu/hms/hms_catalog.h
+++ b/src/kudu/hms/hms_catalog.h
@@ -103,17 +103,21 @@ class HmsCatalog {
   // Kudu table, or if the table entry in not in the HMS.
   Status DowngradeToLegacyImpalaTable(const std::string& name) WARN_UNUSED_RESULT;
 
-  // Retrieves all tables in the HMS.
+  // Retrieves all Kudu tables in the HMS.
+  //
+  // Tables are considered to be Kudu tables if their storage handler matches
+  // the legacy Kudu storage handler used by Impala, or the new Kudu storage
+  // handler.
   //
   // This method will fail if the HMS is unreachable.
-  Status RetrieveTables(std::vector<hive::Table>* hms_tables) WARN_UNUSED_RESULT;
+  Status GetKuduTables(std::vector<hive::Table>* kudu_tables) WARN_UNUSED_RESULT;
 
   // Retrieves notification log events from the HMS.
   //
   // The events will begin at id 'last_event_id + 1', and at most 'max_events'
   // events are returned.
   Status GetNotificationEvents(int64_t last_event_id, int max_events,
-                               std::vector<hive::NotificationEvent>* events);
+                               std::vector<hive::NotificationEvent>* events) WARN_UNUSED_RESULT;
 
   // Validates the hive_metastore_uris gflag.
   static bool ValidateUris(const char* flag_name, const std::string& metastore_uris);
@@ -145,7 +149,7 @@ class HmsCatalog {
   // Hive handles validating and canonicalizing table names in
   // org.apache.hadoop.hive.metastore.MetaStoreUtils.validateName and
   // org.apache.hadoop.hive.common.util.normalizeIdentifier.
-  static Status NormalizeTableName(std::string* table_name);
+  static Status NormalizeTableName(std::string* table_name) WARN_UNUSED_RESULT;
 
  private:
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/d074de85/src/kudu/hms/hms_client-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/hms/hms_client-test.cc b/src/kudu/hms/hms_client-test.cc
index 2cb6a4c..fa8d9bf 100644
--- a/src/kudu/hms/hms_client-test.cc
+++ b/src/kudu/hms/hms_client-test.cc
@@ -31,6 +31,7 @@
 #include <glog/stl_logging.h>
 #include <gtest/gtest.h>
 
+#include "kudu/gutil/strings/substitute.h"
 #include "kudu/hms/hive_metastore_types.h"
 #include "kudu/hms/mini_hms.h"
 #include "kudu/rpc/sasl_common.h"
@@ -48,6 +49,7 @@ using kudu::rpc::SaslProtection;
 using std::make_pair;
 using std::string;
 using std::vector;
+using strings::Substitute;
 
 namespace kudu {
 namespace hms {
@@ -146,7 +148,7 @@ TEST_P(HmsClientTest, TestHmsOperations) {
   std::sort(databases.begin(), databases.end());
   EXPECT_EQ(vector<string>({ "default", database_name }), databases) << "Databases:
" << databases;
 
-  // Get a specific database..
+  // Get a specific database.
   hive::Database my_db;
   ASSERT_OK(client.GetDatabase(database_name, &my_db));
   EXPECT_EQ(database_name, my_db.name) << "my_db: " << my_db;
@@ -203,12 +205,36 @@ TEST_P(HmsClientTest, TestHmsOperations) {
   // Create a table with an illegal utf-8 name.
   ASSERT_TRUE(CreateTable(&client, database_name, "☃ sculptures 😉", table_id).IsInvalidArgument());
 
-  // Get all tables.
-  vector<string> tables;
-  ASSERT_OK(client.GetAllTables(database_name, &tables));
-  std::sort(tables.begin(), tables.end());
-  EXPECT_EQ(vector<string>({ new_table_name, "my_uppercase_table" }), tables)
-      << "Tables: " << tables;
+  // Create a non-Kudu table.
+  hive::Table non_kudu_table;
+  non_kudu_table.dbName = database_name;
+  non_kudu_table.tableName = "non_kudu_table";
+  non_kudu_table.parameters[HmsClient::kStorageHandlerKey] = "bogus.storage.Handler";
+  ASSERT_OK(client.CreateTable(non_kudu_table));
+
+  // Get all table names.
+  vector<string> table_names;
+  ASSERT_OK(client.GetTableNames(database_name, &table_names));
+  std::sort(table_names.begin(), table_names.end());
+  EXPECT_EQ(vector<string>({ new_table_name, "my_uppercase_table", "non_kudu_table"
}), table_names)
+      << "table names: " << table_names;
+
+  // Get filtered table names.
+  table_names.clear();
+  string filter = Substitute(
+      "$0$1 = \"$2\"", HmsClient::kHiveFilterFieldParams,
+      HmsClient::kStorageHandlerKey, HmsClient::kKuduStorageHandler);
+  ASSERT_OK(client.GetTableNames(database_name, filter, &table_names))
+  std::sort(table_names.begin(), table_names.end());
+  EXPECT_EQ(vector<string>({ new_table_name, "my_uppercase_table" }), table_names)
+      << "table names: " << table_names;
+
+  // Get multiple tables.
+  vector<hive::Table> tables;
+  ASSERT_OK(client.GetTables(database_name, table_names, &tables));
+  ASSERT_EQ(2, tables.size());
+  EXPECT_EQ(new_table_name, tables[0].tableName);
+  EXPECT_EQ("my_uppercase_table", tables[1].tableName);
 
   // Check that the HMS rejects Kudu table drops with a bogus table ID.
   ASSERT_TRUE(DropTable(&client, database_name, new_table_name, "bogus-table-id").IsRemoteError());
@@ -232,15 +258,16 @@ TEST_P(HmsClientTest, TestHmsOperations) {
   vector<hive::NotificationEvent> events;
   ASSERT_OK(client.GetNotificationEvents(-1, 100, &events));
 
-  ASSERT_EQ(5, events.size());
+  ASSERT_EQ(6, events.size());
   EXPECT_EQ("CREATE_DATABASE", events[0].eventType);
   EXPECT_EQ("CREATE_TABLE", events[1].eventType);
   EXPECT_EQ("ALTER_TABLE", events[2].eventType);
   EXPECT_EQ("CREATE_TABLE", events[3].eventType);
-  EXPECT_EQ("DROP_TABLE", events[4].eventType);
+  EXPECT_EQ("CREATE_TABLE", events[4].eventType);
+  EXPECT_EQ("DROP_TABLE", events[5].eventType);
   // TODO(HIVE-17008)
-  //EXPECT_EQ("DROP_TABLE", events[5].eventType);
-  //EXPECT_EQ("DROP_DATABASE", events[6].eventType);
+  //EXPECT_EQ("DROP_TABLE", events[6].eventType);
+  //EXPECT_EQ("DROP_DATABASE", events[7].eventType);
 
   // Retrieve a specific notification log.
   events.clear();

http://git-wip-us.apache.org/repos/asf/kudu/blob/d074de85/src/kudu/hms/hms_client.cc
----------------------------------------------------------------------
diff --git a/src/kudu/hms/hms_client.cc b/src/kudu/hms/hms_client.cc
index 3d398c5..257e150 100644
--- a/src/kudu/hms/hms_client.cc
+++ b/src/kudu/hms/hms_client.cc
@@ -131,6 +131,7 @@ const char* const HmsClient::kExternalTableKey = "EXTERNAL";
 const char* const HmsClient::kStorageHandlerKey = "storage_handler";
 const char* const HmsClient::kKuduMetastorePlugin =
   "org.apache.kudu.hive.metastore.KuduMetastorePlugin";
+const char* const HmsClient::kHiveFilterFieldParams = "hive_filter_field_params__";
 
 const char* const HmsClient::kManagedTable = "MANAGED_TABLE";
 const char* const HmsClient::kExternalTable = "EXTERNAL_TABLE";
@@ -300,15 +301,26 @@ Status HmsClient::DropTable(const string& database_name,
   return Status::OK();
 }
 
-Status HmsClient::GetAllTables(const string& database_name,
-                               vector<string>* tables) {
-  DCHECK(tables);
+Status HmsClient::GetTableNames(const string& database_name,
+                                vector<string>* table_names) {
+  DCHECK(table_names);
   SCOPED_LOG_SLOW_EXECUTION(WARNING, kSlowExecutionWarningThresholdMs, "get all HMS tables");
-  HMS_RET_NOT_OK(client_.get_all_tables(*tables, database_name),
+  HMS_RET_NOT_OK(client_.get_all_tables(*table_names, database_name),
                  "failed to get Hive Metastore tables");
   return Status::OK();
 }
 
+Status HmsClient::GetTableNames(const std::string& database_name,
+                                const std::string& filter,
+                                std::vector<std::string>* table_names) {
+  DCHECK(table_names);
+  SCOPED_LOG_SLOW_EXECUTION(WARNING, kSlowExecutionWarningThresholdMs, "get filtered HMS
tables");
+  HMS_RET_NOT_OK(client_.get_table_names_by_filter(*table_names, database_name,
+                                                   filter, /*max_tables*/ -1),
+                 "failed to get filtered Hive Metastore tables");
+  return Status::OK();
+}
+
 Status HmsClient::GetTable(const string& database_name,
                            const string& table_name,
                            hive::Table* table) {
@@ -319,6 +331,16 @@ Status HmsClient::GetTable(const string& database_name,
   return Status::OK();
 }
 
+Status HmsClient::GetTables(const string& database_name,
+                            const vector<string>& table_names,
+                            vector<hive::Table>* tables) {
+  DCHECK(tables);
+  SCOPED_LOG_SLOW_EXECUTION(WARNING, kSlowExecutionWarningThresholdMs, "get HMS tables");
+  HMS_RET_NOT_OK(client_.get_table_objects_by_name(*tables, database_name, table_names),
+                 "failed to get Hive Metastore tables");
+  return Status::OK();
+}
+
 Status HmsClient::GetCurrentNotificationEventId(int64_t* event_id) {
   DCHECK(event_id);
   SCOPED_LOG_SLOW_EXECUTION(WARNING, kSlowExecutionWarningThresholdMs,

http://git-wip-us.apache.org/repos/asf/kudu/blob/d074de85/src/kudu/hms/hms_client.h
----------------------------------------------------------------------
diff --git a/src/kudu/hms/hms_client.h b/src/kudu/hms/hms_client.h
index 157043a..82ac324 100644
--- a/src/kudu/hms/hms_client.h
+++ b/src/kudu/hms/hms_client.h
@@ -96,6 +96,7 @@ class HmsClient {
   static const char* const kKuduMasterEventKey;;
   static const char* const kStorageHandlerKey;
   static const char* const kKuduStorageHandler;
+  static const char* const kHiveFilterFieldParams;
 
   static const char* const kTransactionalEventListeners;
   static const char* const kDisallowIncompatibleColTypeChanges;
@@ -175,9 +176,21 @@ class HmsClient {
                   const std::string& table_name,
                   hive::Table* table) WARN_UNUSED_RESULT;
 
-  // Retrieves all tables in an HMS database.
-  Status GetAllTables(const std::string& database_name,
-                      std::vector<std::string>* tables) WARN_UNUSED_RESULT;
+  // Retrieves HMS table metadata for all tables in 'table_names'.
+  Status GetTables(const std::string& database_name,
+                   const std::vector<std::string>& table_names,
+                   std::vector<hive::Table>* tables) WARN_UNUSED_RESULT;
+
+  // Retrieves all table names in an HMS database.
+  Status GetTableNames(const std::string& database_name,
+                       std::vector<std::string>* table_names) WARN_UNUSED_RESULT;
+
+  // Retrieves all table names in an HMS database matching a filter. See the
+  // docs for 'get_table_names_by_filter' in hive_metastore.thrift for filter
+  // syntax examples.
+  Status GetTableNames(const std::string& database_name,
+                       const std::string& filter,
+                       std::vector<std::string>* table_names) WARN_UNUSED_RESULT;
 
   // Retrieves a the current HMS notification event ID.
   Status GetCurrentNotificationEventId(int64_t* event_id) WARN_UNUSED_RESULT;

http://git-wip-us.apache.org/repos/asf/kudu/blob/d074de85/src/kudu/integration-tests/master_hms-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/master_hms-itest.cc b/src/kudu/integration-tests/master_hms-itest.cc
index 2fd415c..4706a9d 100644
--- a/src/kudu/integration-tests/master_hms-itest.cc
+++ b/src/kudu/integration-tests/master_hms-itest.cc
@@ -327,7 +327,7 @@ TEST_F(MasterHmsTest, TestRenameTable) {
 
   // Check that the two tables still exist.
   vector<string> tables;
-  ASSERT_OK(hms_client_->GetAllTables("db", &tables));
+  ASSERT_OK(hms_client_->GetTableNames("db", &tables));
   std::sort(tables.begin(), tables.end());
   ASSERT_EQ(tables, vector<string>({ "b", "d" })) << tables;
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/d074de85/src/kudu/mini-cluster/external_mini_cluster-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/mini-cluster/external_mini_cluster-test.cc b/src/kudu/mini-cluster/external_mini_cluster-test.cc
index 7eb209e..ff99c80 100644
--- a/src/kudu/mini-cluster/external_mini_cluster-test.cc
+++ b/src/kudu/mini-cluster/external_mini_cluster-test.cc
@@ -198,7 +198,7 @@ TEST_P(ExternalMiniClusterTest, TestBasicOperation) {
     hms::HmsClient hms_client(cluster.hms()->address(), hms_client_opts);
     ASSERT_OK(hms_client.Start());
     vector<string> tables;
-    ASSERT_OK(hms_client.GetAllTables("default", &tables));
+    ASSERT_OK(hms_client.GetTableNames("default", &tables));
     ASSERT_TRUE(tables.empty()) << "tables: " << tables;
   }
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/d074de85/src/kudu/tools/kudu-tool-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc
index 84d8c5c..d19dd15 100644
--- a/src/kudu/tools/kudu-tool-test.cc
+++ b/src/kudu/tools/kudu-tool-test.cc
@@ -2249,7 +2249,7 @@ TEST_F(ToolTest, TestHmsUpgrade) {
 
   {
     vector<string> table_names;
-    ASSERT_OK(hms_client.GetAllTables(kDatabaseName, &table_names));
+    ASSERT_OK(hms_client.GetTableNames(kDatabaseName, &table_names));
     ASSERT_EQ(2, table_names.size());
   }
 
@@ -2285,10 +2285,10 @@ TEST_F(ToolTest, TestHmsUpgrade) {
     vector<string> db_names;
     ASSERT_OK(hms_client.GetAllDatabases(&db_names));
     ASSERT_EQ(2, db_names.size());
-    ASSERT_OK(hms_client.GetAllTables(kDatabaseName, &table_names));
+    ASSERT_OK(hms_client.GetTableNames(kDatabaseName, &table_names));
     ASSERT_EQ(2, table_names.size());
     table_names.clear();
-    ASSERT_OK(hms_client.GetAllTables(kDefaultDatabaseName, &table_names));
+    ASSERT_OK(hms_client.GetTableNames(kDefaultDatabaseName, &table_names));
     ASSERT_EQ(6, table_names.size());
 
     string out;

http://git-wip-us.apache.org/repos/asf/kudu/blob/d074de85/src/kudu/tools/tool_action_hms.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/tool_action_hms.cc b/src/kudu/tools/tool_action_hms.cc
index f06367b..70a1aeb 100644
--- a/src/kudu/tools/tool_action_hms.cc
+++ b/src/kudu/tools/tool_action_hms.cc
@@ -87,14 +87,11 @@ const char* const kInvalidNameError = "is not a valid object name";
 unordered_map<string, hive::Table> RetrieveTablesMap(vector<hive::Table> hms_tables)
{
   unordered_map<string, hive::Table> hms_tables_map;
   for (auto& hms_table : hms_tables) {
-    if (hms_table.parameters[HmsClient::kStorageHandlerKey] ==
-        HmsClient::kLegacyKuduStorageHandler) {
-      hms_tables_map.emplace(hms_table.parameters[HmsClient::kLegacyKuduTableNameKey],
-                             hms_table);
-    } else if (hms_table.parameters[HmsClient::kStorageHandlerKey] ==
-               HmsClient::kKuduStorageHandler) {
-      hms_tables_map.emplace(Substitute("$0.$1", hms_table.dbName, hms_table.tableName),
-                             hms_table);
+    const string& storage_handler = hms_table.parameters[HmsClient::kStorageHandlerKey];
+    if (storage_handler == HmsClient::kLegacyKuduStorageHandler) {
+      hms_tables_map.emplace(hms_table.parameters[HmsClient::kLegacyKuduTableNameKey], hms_table);
+    } else if (storage_handler == HmsClient::kKuduStorageHandler) {
+      hms_tables_map.emplace(Substitute("$0.$1", hms_table.dbName, hms_table.tableName),
hms_table);
     }
   }
   return hms_tables_map;
@@ -213,22 +210,20 @@ Status AlterLegacyKuduTables(KuduClient* kudu_client,
 Status Init(const RunnerContext& context,
             shared_ptr<KuduClient>* kudu_client,
             unique_ptr<HmsCatalog>* hms_catalog) {
-  const string& master_addresses_str = FindOrDie(context.required_args,
-                                                 kMasterAddressesArg);
-  vector<string> master_addresses = Split(master_addresses_str, ",");
+  const string& master_addresses = FindOrDie(context.required_args, kMasterAddressesArg);
 
   if (!hms::HmsCatalog::IsEnabled()) {
     return Status::IllegalState("HMS URIs cannot be empty!");
   }
 
   // Create Hms Catalog.
-  hms_catalog->reset(new hms::HmsCatalog(master_addresses_str));
+  hms_catalog->reset(new hms::HmsCatalog(master_addresses));
   RETURN_NOT_OK((*hms_catalog)->Start());
 
   // Create a Kudu Client.
   return KuduClientBuilder()
       .default_rpc_timeout(MonoDelta::FromMilliseconds(FLAGS_timeout_ms))
-      .master_server_addrs(master_addresses)
+      .master_server_addrs(Split(master_addresses, ","))
       .Build(kudu_client);
 }
 
@@ -270,7 +265,7 @@ Status HmsUpgrade(const RunnerContext& context) {
 
   // 1. Identify all Kudu tables in the HMS entries.
   vector<hive::Table> hms_tables;
-  RETURN_NOT_OK(hms_catalog->RetrieveTables(&hms_tables));
+  RETURN_NOT_OK(hms_catalog->GetKuduTables(&hms_tables));
 
   // 2. Rename all existing Kudu tables to have Hive-compatible table names.
   //    Also, correct all out of sync metadata in HMS entries.
@@ -286,7 +281,7 @@ Status HmsDowngrade(const RunnerContext& context) {
 
   // 1. Identify all Kudu tables in the HMS entries.
   vector<hive::Table> hms_tables;
-  RETURN_NOT_OK(hms_catalog->RetrieveTables(&hms_tables));
+  RETURN_NOT_OK(hms_catalog->GetKuduTables(&hms_tables));
 
   // 2. Downgrades all Kudu tables to legacy table format.
   for (auto& hms_table : hms_tables) {
@@ -301,14 +296,14 @@ Status HmsDowngrade(const RunnerContext& context) {
 }
 
 // Given a kudu table and a hms table, checks if their metadata is in sync.
-bool isSynced(const string& master_addresses,
-              const shared_ptr<KuduTable>& kudu_table,
+bool IsSynced(const string& master_addresses,
+              const KuduTable& kudu_table,
               const hive::Table& hms_table) {
-  Schema schema(client::SchemaFromKuduSchema(kudu_table->schema()));
+  Schema schema(client::SchemaFromKuduSchema(kudu_table.schema()));
   hive::Table hms_table_copy(hms_table);
-  Status s = HmsCatalog::PopulateTable(kudu_table->id(), kudu_table->name(),
+  Status s = HmsCatalog::PopulateTable(kudu_table.id(), kudu_table.name(),
                                        schema, master_addresses, &hms_table_copy);
-  return hms_table_copy == hms_table && s.ok();
+  return s.ok() && hms_table_copy == hms_table;
 }
 
 // Filter orphan tables from the unsynchronized tables map.
@@ -360,8 +355,7 @@ Status PrintUnsyncedTables(const string& master_addresses,
   return table.PrintTo(out);
 }
 
-Status PrintLegacyTables(const vector<hive::Table>& tables,
-                         ostream& out) {
+Status PrintLegacyTables(const vector<hive::Table>& tables, ostream& out) {
   cout << "Found legacy tables in the Hive Metastore, "
        << "use metadata upgrade tool first: 'kudu hms upgrade'."
        << endl;
@@ -375,25 +369,25 @@ Status PrintLegacyTables(const vector<hive::Table>& tables,
   return table.PrintTo(out);
 }
 
-Status RetrieveUnsyncedTables(const unique_ptr<HmsCatalog>& hms_catalog,
-                              const shared_ptr<KuduClient>& kudu_client,
-                              const string& master_addresses_str,
+Status RetrieveUnsyncedTables(HmsCatalog* hms_catalog,
+                              KuduClient* kudu_client,
+                              const string& master_addresses,
                               TablesMap* unsynced_tables_map,
                               vector<hive::Table>* legacy_tables) {
   // 1. Identify all Kudu table in the HMS entries.
   vector<hive::Table> hms_tables;
-  RETURN_NOT_OK(hms_catalog->RetrieveTables(&hms_tables));
+  RETURN_NOT_OK(hms_catalog->GetKuduTables(&hms_tables));
 
   // 2. Walk through all the Kudu tables in the HMS and identify any
   //    out of sync tables.
   for (auto& hms_table : hms_tables) {
-    string hms_table_id = hms_table.parameters[HmsClient::kKuduTableIdKey];
-    if (hms_table.parameters[HmsClient::kStorageHandlerKey] ==
-        HmsClient::kKuduStorageHandler) {
+    const string& storage_handler = hms_table.parameters[HmsClient::kStorageHandlerKey];
+    if (storage_handler == HmsClient::kKuduStorageHandler) {
+      const string& hms_table_id = hms_table.parameters[HmsClient::kKuduTableIdKey];
       string table_name = Substitute("$0.$1", hms_table.dbName, hms_table.tableName);
       shared_ptr<KuduTable> kudu_table;
       Status s = kudu_client->OpenTable(table_name, &kudu_table);
-      if (s.ok() && !isSynced(master_addresses_str, kudu_table, hms_table)) {
+      if (s.ok() && !IsSynced(master_addresses, *kudu_table.get(), hms_table)) {
         (*unsynced_tables_map)[kudu_table->id()].first = kudu_table;
         (*unsynced_tables_map)[hms_table_id].second.emplace_back(hms_table);
       } else if (s.IsNotFound()) {
@@ -404,8 +398,7 @@ Status RetrieveUnsyncedTables(const unique_ptr<HmsCatalog>&
hms_catalog,
       } else {
         RETURN_NOT_OK(s);
       }
-    } else if (hms_table.parameters[HmsClient::kStorageHandlerKey] ==
-               HmsClient::kLegacyKuduStorageHandler) {
+    } else if (storage_handler == HmsClient::kLegacyKuduStorageHandler) {
       legacy_tables->push_back(hms_table);
     }
   }
@@ -430,16 +423,17 @@ Status RetrieveUnsyncedTables(const unique_ptr<HmsCatalog>&
hms_catalog,
 }
 
 Status CheckHmsMetadata(const RunnerContext& context) {
-  const string& master_addresses_str = FindOrDie(context.required_args,
-                                                 kMasterAddressesArg);
+  const string& master_addresses = FindOrDie(context.required_args, kMasterAddressesArg);
   shared_ptr<KuduClient> kudu_client;
   unique_ptr<HmsCatalog> hms_catalog;
   Init(context, &kudu_client, &hms_catalog);
 
   TablesMap unsynced_tables_map;
   std::vector<hive::Table> legacy_tables;
-  RETURN_NOT_OK_PREPEND(RetrieveUnsyncedTables(hms_catalog, kudu_client,
-                                               master_addresses_str, &unsynced_tables_map,
+  RETURN_NOT_OK_PREPEND(RetrieveUnsyncedTables(hms_catalog.get(),
+                                               kudu_client.get(),
+                                               master_addresses,
+                                               &unsynced_tables_map,
                                                &legacy_tables),
                         "error fetching unsynchronized tables");
 
@@ -452,8 +446,7 @@ Status CheckHmsMetadata(const RunnerContext& context) {
   // Something went wrong.
   cout << "FAILED" << endl;
   if (!unsynced_tables_map.empty()) {
-    RETURN_NOT_OK_PREPEND(PrintUnsyncedTables(master_addresses_str,
-                                              unsynced_tables_map, cout),
+    RETURN_NOT_OK_PREPEND(PrintUnsyncedTables(master_addresses, unsynced_tables_map, cout),
                           "error printing inconsistent data");
     cout << endl;
   }
@@ -522,16 +515,16 @@ Status FixUnsyncedTables(KuduClient* kudu_client,
 }
 
 Status FixHmsMetadata(const RunnerContext& context) {
-  const string& master_addresses_str = FindOrDie(context.required_args,
-                                                 kMasterAddressesArg);
+  const string& master_addresses = FindOrDie(context.required_args, kMasterAddressesArg);
   shared_ptr<KuduClient> kudu_client;
   unique_ptr<HmsCatalog> hms_catalog;
   Init(context, &kudu_client, &hms_catalog);
 
   TablesMap unsynced_tables_map;
   std::vector<hive::Table> legacy_tables;
-  RETURN_NOT_OK_PREPEND(RetrieveUnsyncedTables(hms_catalog, kudu_client,
-                                               master_addresses_str,
+  RETURN_NOT_OK_PREPEND(RetrieveUnsyncedTables(hms_catalog.get(),
+                                               kudu_client.get(),
+                                               master_addresses,
                                                &unsynced_tables_map,
                                                &legacy_tables),
                         "error fetching unsynchronized tables");


Mime
View raw message