kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ha...@apache.org
Subject kudu git commit: KUDU-2191: Metadata Upgrade Tool
Date Sat, 19 May 2018 00:55:03 GMT
Repository: kudu
Updated Branches:
  refs/heads/master 8f525829d -> 10a7f2a9b


KUDU-2191: Metadata Upgrade Tool

This commit introduces an upgrade tool to allows backward compatibility.
It provides support to upgrade the metadata format of existing Impala
managed/external tables in HMS entries, as well as rename the existing
tables in Kudu to adapt to the new naming rules. To run this tool, it
requires to turn off the HMS integration feature. It adds test case
using external mini cluster to ensure the upgrade tool works as
expected.

Change-Id: I1c8488e1c7b04b8bedc31c83b1496b53bae49cb3
Reviewed-on: http://gerrit.cloudera.org:8080/10075
Reviewed-by: Dan Burkert <danburkert@apache.org>
Tested-by: Kudu Jenkins


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/10a7f2a9
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/10a7f2a9
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/10a7f2a9

Branch: refs/heads/master
Commit: 10a7f2a9b32c7ed00c55322b27b61e6d46ef17bd
Parents: 8f52582
Author: hahao <hao.hao@cloudera.com>
Authored: Fri Apr 20 17:49:47 2018 -0700
Committer: Hao Hao <hao.hao@cloudera.com>
Committed: Sat May 19 00:43:16 2018 +0000

----------------------------------------------------------------------
 .../hive/metastore/KuduMetastorePlugin.java     |  41 ++-
 .../hive/metastore/TestKuduMetastorePlugin.java |  61 ++--
 src/kudu/client/client-test-util.cc             |   5 +
 src/kudu/client/client-test-util.h              |   3 +
 src/kudu/client/schema.h                        |   2 +-
 src/kudu/common/common.proto                    |   6 +
 src/kudu/hms/hms_catalog-test.cc                |  66 +++++
 src/kudu/hms/hms_catalog.cc                     |  49 +++-
 src/kudu/hms/hms_catalog.h                      |  16 ++
 src/kudu/hms/hms_client.cc                      |   6 +
 src/kudu/hms/hms_client.h                       |   5 +
 .../alter_table-randomized-test.cc              |   3 +-
 .../integration-tests/master_failover-itest.cc  |   3 +-
 src/kudu/integration-tests/master_hms-itest.cc  |   3 +-
 .../mini-cluster/external_mini_cluster-test.cc  |  10 +-
 src/kudu/mini-cluster/external_mini_cluster.cc  |  21 +-
 src/kudu/mini-cluster/external_mini_cluster.h   |  18 +-
 src/kudu/tools/CMakeLists.txt                   |   2 +
 src/kudu/tools/kudu-tool-test.cc                | 212 +++++++++++++-
 src/kudu/tools/tool.proto                       |   5 +-
 src/kudu/tools/tool_action.h                    |   1 +
 src/kudu/tools/tool_action_hms.cc               | 285 +++++++++++++++++++
 src/kudu/tools/tool_action_test.cc              |   2 +-
 src/kudu/tools/tool_main.cc                     |   1 +
 src/kudu/tools/tool_test_util.cc                |   5 +-
 src/kudu/tools/tool_test_util.h                 |   3 +-
 26 files changed, 775 insertions(+), 59 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/10a7f2a9/java/kudu-hive/src/main/java/org/apache/kudu/hive/metastore/KuduMetastorePlugin.java
----------------------------------------------------------------------
diff --git a/java/kudu-hive/src/main/java/org/apache/kudu/hive/metastore/KuduMetastorePlugin.java b/java/kudu-hive/src/main/java/org/apache/kudu/hive/metastore/KuduMetastorePlugin.java
index 7a0a8f2..709b687 100644
--- a/java/kudu-hive/src/main/java/org/apache/kudu/hive/metastore/KuduMetastorePlugin.java
+++ b/java/kudu-hive/src/main/java/org/apache/kudu/hive/metastore/KuduMetastorePlugin.java
@@ -61,6 +61,8 @@ public class KuduMetastorePlugin extends MetaStoreEventListener {
   @VisibleForTesting
   static final String KUDU_STORAGE_HANDLER = "org.apache.kudu.hive.KuduStorageHandler";
   @VisibleForTesting
+  static final String LEGACY_KUDU_STORAGE_HANDLER = "com.cloudera.kudu.hive.KuduStorageHandler";
+  @VisibleForTesting
   static final String KUDU_TABLE_ID_KEY = "kudu.table_id";
   @VisibleForTesting
   static final String KUDU_MASTER_ADDRS_KEY = "kudu.master_addresses";
@@ -126,21 +128,24 @@ public class KuduMetastorePlugin extends MetaStoreEventListener {
     Table oldTable = tableEvent.getOldTable();
     Table newTable = tableEvent.getNewTable();
 
-    // Allow non-Kudu tables to be altered.
-    if (!isKuduTable(oldTable)) {
+    // Allow non-Kudu tables (even the legacy ones) to be altered.
+    if (!isKuduTable(oldTable) && !isLegacyKuduTable(oldTable)) {
       // But ensure that the alteration isn't introducing Kudu-specific properties.
       checkNoKuduProperties(newTable);
       return;
     }
 
-    // Check the altered table's properties.
+    // Check the altered table's properties. This implies legacy Kudu table
+    // can only be upgraded to the new format.
     checkKuduProperties(newTable);
 
-    // Check that the table ID isn't changing.
-    String oldTableId = oldTable.getParameters().get(KUDU_TABLE_ID_KEY);
-    String newTableId = newTable.getParameters().get(KUDU_TABLE_ID_KEY);
-    if (!newTableId.equals(oldTableId)) {
-      throw new MetaException("Kudu table ID does not match the existing HMS entry");
+    // Check that the non legacy Kudu table ID isn't changing.
+    if (!isLegacyKuduTable(oldTable)) {
+      String oldTableId = oldTable.getParameters().get(KUDU_TABLE_ID_KEY);
+      String newTableId = newTable.getParameters().get(KUDU_TABLE_ID_KEY);
+      if (!newTableId.equals(oldTableId)) {
+        throw new MetaException("Kudu table ID does not match the existing HMS entry");
+      }
     }
 
     if (!isKuduMasterAction(tableEvent) &&
@@ -156,7 +161,20 @@ public class KuduMetastorePlugin extends MetaStoreEventListener {
    */
   private boolean isKuduTable(Table table) {
     return KUDU_STORAGE_HANDLER.equals(table.getParameters()
-                                            .get(hive_metastoreConstants.META_TABLE_STORAGE));
+        .get(hive_metastoreConstants.META_TABLE_STORAGE));
+  }
+
+  /**
+   * Checks whether the table is a Kudu table with legacy Kudu
+   * storage handler.
+   *
+   * @param table the table to check
+   * @return {@code true} if the table is a legacy Kudu table,
+   *         otherwise {@code false}
+   */
+  private boolean isLegacyKuduTable(Table table) {
+    return LEGACY_KUDU_STORAGE_HANDLER.equals(table.getParameters()
+        .get(hive_metastoreConstants.META_TABLE_STORAGE));
   }
 
   /**
@@ -198,11 +216,6 @@ public class KuduMetastorePlugin extends MetaStoreEventListener {
           "non-Kudu table entry must not contain a table ID property (%s)",
           KUDU_TABLE_ID_KEY));
     }
-    if (table.getParameters().containsKey(KUDU_MASTER_ADDRS_KEY)) {
-      throw new MetaException(String.format(
-          "non-Kudu table entry must not contain a Master addresses property (%s)",
-          KUDU_MASTER_ADDRS_KEY));
-    }
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/kudu/blob/10a7f2a9/java/kudu-hive/src/test/java/org/apache/kudu/hive/metastore/TestKuduMetastorePlugin.java
----------------------------------------------------------------------
diff --git a/java/kudu-hive/src/test/java/org/apache/kudu/hive/metastore/TestKuduMetastorePlugin.java b/java/kudu-hive/src/test/java/org/apache/kudu/hive/metastore/TestKuduMetastorePlugin.java
index 4d56acb..b45dcce 100644
--- a/java/kudu-hive/src/test/java/org/apache/kudu/hive/metastore/TestKuduMetastorePlugin.java
+++ b/java/kudu-hive/src/test/java/org/apache/kudu/hive/metastore/TestKuduMetastorePlugin.java
@@ -77,19 +77,21 @@ public class TestKuduMetastorePlugin {
   }
 
   /**
-   * @return a valid Kudu table descriptor.
+   * @return a Kudu table descriptor given the storage handler type.
    */
-  private static Table newTable(String name) {
+  private static Table newKuduTable(String name, String storageHandler) {
     Table table = new Table();
     table.setDbName("default");
     table.setTableName(name);
     table.setTableType(TableType.MANAGED_TABLE.toString());
     table.putToParameters(hive_metastoreConstants.META_TABLE_STORAGE,
-                          KuduMetastorePlugin.KUDU_STORAGE_HANDLER);
-    table.putToParameters(KuduMetastorePlugin.KUDU_TABLE_ID_KEY,
-                          UUID.randomUUID().toString());
-    table.putToParameters(KuduMetastorePlugin.KUDU_MASTER_ADDRS_KEY,
-                          "localhost");
+                          storageHandler);
+    if (!storageHandler.equals(KuduMetastorePlugin.LEGACY_KUDU_STORAGE_HANDLER)) {
+      table.putToParameters(KuduMetastorePlugin.KUDU_TABLE_ID_KEY,
+                            UUID.randomUUID().toString());
+      table.putToParameters(KuduMetastorePlugin.KUDU_MASTER_ADDRS_KEY,
+                           "localhost");
+    }
 
     // The HMS will NPE if the storage descriptor and partition keys aren't set...
     StorageDescriptor sd = new StorageDescriptor();
@@ -104,6 +106,20 @@ public class TestKuduMetastorePlugin {
     return table;
   }
 
+  /**
+   * @return a legacy Kudu table descriptor.
+   */
+  private static Table newLegacyTable(String name) {
+    return newKuduTable(name, KuduMetastorePlugin.LEGACY_KUDU_STORAGE_HANDLER);
+  }
+
+  /**
+   * @return a valid Kudu table descriptor.
+   */
+  private static Table newTable(String name) {
+    return newKuduTable(name, KuduMetastorePlugin.KUDU_STORAGE_HANDLER);
+  }
+
   @Test
   public void testCreateTableHandler() throws Exception {
     // A non-Kudu table with a Kudu table ID should be rejected.
@@ -118,18 +134,6 @@ public class TestKuduMetastorePlugin {
           "non-Kudu table entry must not contain a table ID property"));
     }
 
-    // A non-Kudu table with a Kudu master address should be rejected.
-    try {
-      Table table = newTable("table");
-      table.getParameters().remove(hive_metastoreConstants.META_TABLE_STORAGE);
-      table.getParameters().remove(KuduMetastorePlugin.KUDU_TABLE_ID_KEY);
-      client.createTable(table);
-      fail();
-    } catch (TException e) {
-      assertTrue(e.getMessage().contains(
-          "non-Kudu table entry must not contain a Master addresses property"));
-    }
-
     // A Kudu table without a Kudu table ID.
     try {
       Table table = newTable("table");
@@ -168,9 +172,11 @@ public class TestKuduMetastorePlugin {
 
   @Test
   public void testAlterTableHandler() throws Exception {
-    // Test altering a Kudu table.
+    // Test altering a Kudu (or a legacy) table.
     Table table = newTable("table");
     client.createTable(table, masterContext());
+    Table legacyTable = newLegacyTable("legacy_table");
+    client.createTable(legacyTable, masterContext());
     try {
 
       // Try to alter the Kudu table with a different table ID.
@@ -244,6 +250,21 @@ public class TestKuduMetastorePlugin {
 
       // Check that altering the table succeeds.
       client.alter_table(table.getDbName(), table.getTableName(), table);
+
+      // Check that altering the legacy table with Kudu storage handler
+      // succeeds.
+      {
+        Table alteredTable = legacyTable.deepCopy();
+        alteredTable.putToParameters(hive_metastoreConstants.META_TABLE_STORAGE,
+                                     KuduMetastorePlugin.KUDU_STORAGE_HANDLER);
+        alteredTable.putToParameters(KuduMetastorePlugin.KUDU_TABLE_ID_KEY,
+                                     UUID.randomUUID().toString());
+        alteredTable.putToParameters(KuduMetastorePlugin.KUDU_MASTER_ADDRS_KEY,
+                                    "localhost");
+        client.alter_table(legacyTable.getDbName(), legacyTable.getTableName(),
+                           alteredTable);
+      }
+
     } finally {
       client.dropTable(table.getDbName(), table.getTableName());
     }

http://git-wip-us.apache.org/repos/asf/kudu/blob/10a7f2a9/src/kudu/client/client-test-util.cc
----------------------------------------------------------------------
diff --git a/src/kudu/client/client-test-util.cc b/src/kudu/client/client-test-util.cc
index e1b0333..847a682 100644
--- a/src/kudu/client/client-test-util.cc
+++ b/src/kudu/client/client-test-util.cc
@@ -28,6 +28,7 @@
 #include "kudu/client/scan_batch.h"
 #include "kudu/client/schema.h"
 #include "kudu/client/write_op.h"
+#include "kudu/common/schema.h"
 #include "kudu/gutil/stl_util.h"
 #include "kudu/util/status.h"
 #include "kudu/util/test_macros.h"
@@ -136,5 +137,9 @@ KuduSchema KuduSchemaFromSchema(const Schema& schema) {
   return KuduSchema(schema);
 }
 
+Schema SchemaFromKuduSchema(const KuduSchema& schema) {
+  return Schema(*schema.schema_);
+}
+
 } // namespace client
 } // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/10a7f2a9/src/kudu/client/client-test-util.h
----------------------------------------------------------------------
diff --git a/src/kudu/client/client-test-util.h b/src/kudu/client/client-test-util.h
index 244235a..1939785 100644
--- a/src/kudu/client/client-test-util.h
+++ b/src/kudu/client/client-test-util.h
@@ -63,5 +63,8 @@ Status ScanToStrings(KuduScanner* scanner,
 // Convert a kudu::Schema to a kudu::client::KuduSchema.
 KuduSchema KuduSchemaFromSchema(const Schema& schema);
 
+// Convert a kudu::client::KuduSchema to a kudu::Schema.
+Schema SchemaFromKuduSchema(const KuduSchema& schema);
+
 } // namespace client
 } // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/10a7f2a9/src/kudu/client/schema.h
----------------------------------------------------------------------
diff --git a/src/kudu/client/schema.h b/src/kudu/client/schema.h
index 4e46414..c8d79e8 100644
--- a/src/kudu/client/schema.h
+++ b/src/kudu/client/schema.h
@@ -590,7 +590,7 @@ class KUDU_EXPORT KuduSchema {
   friend class tools::ReplicaDumper;
 
   friend KuduSchema KuduSchemaFromSchema(const Schema& schema);
-
+  friend Schema SchemaFromKuduSchema(const KuduSchema& schema);
 
   // For use by kudu tests.
   explicit KuduSchema(const Schema& schema);

http://git-wip-us.apache.org/repos/asf/kudu/blob/10a7f2a9/src/kudu/common/common.proto
----------------------------------------------------------------------
diff --git a/src/kudu/common/common.proto b/src/kudu/common/common.proto
index 54016bc..0a44d57 100644
--- a/src/kudu/common/common.proto
+++ b/src/kudu/common/common.proto
@@ -68,6 +68,12 @@ enum EncodingType {
   BIT_SHUFFLE = 6;
 }
 
+enum HmsMode {
+  NONE = 0;
+  ENABLE_HIVE_METASTORE = 1;
+  ENABLE_METASTORE_INTEGRATION = 2;
+};
+
 // Holds detailed attributes for the column. Only certain fields will be set,
 // depending on the type of the column.
 message ColumnTypeAttributesPB {

http://git-wip-us.apache.org/repos/asf/kudu/blob/10a7f2a9/src/kudu/hms/hms_catalog-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/hms/hms_catalog-test.cc b/src/kudu/hms/hms_catalog-test.cc
index 5a25d00..12066f6 100644
--- a/src/kudu/hms/hms_catalog-test.cc
+++ b/src/kudu/hms/hms_catalog-test.cc
@@ -20,6 +20,8 @@
 #include <map>
 #include <memory>
 #include <string>
+#include <type_traits>
+#include <utility>
 #include <vector>
 
 #include <gflags/gflags_declare.h>
@@ -27,6 +29,7 @@
 
 #include "kudu/common/common.pb.h"
 #include "kudu/common/schema.h"
+#include "kudu/gutil/map-util.h" // IWYU pragma: keep
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/hms/hive_metastore_types.h"
 #include "kudu/hms/hms_client.h"
@@ -42,6 +45,7 @@ DECLARE_string(hive_metastore_uris);
 DECLARE_bool(hive_metastore_sasl_enabled);
 
 using kudu::rpc::SaslProtection;
+using std::make_pair;
 using std::string;
 using std::unique_ptr;
 using std::vector;
@@ -215,6 +219,36 @@ class HmsCatalogTest : public KuduTest {
     }
   }
 
+  Status CreateLegacyTable(const string& database_name,
+                           const string& table_name,
+                           const string& table_type) {
+    hive::Table table;
+    string kudu_table_name(table_name);
+    table.dbName = database_name;
+    table.tableName = table_name;
+    table.tableType = table_type;
+    if (table_type == HmsClient::kManagedTable) {
+      kudu_table_name = Substitute("$0$1.$2", HmsClient::kLegacyTablePrefix,
+                                   database_name, table_name);
+    }
+
+    table.__set_parameters({
+        make_pair(HmsClient::kStorageHandlerKey,
+                  HmsClient::kLegacyKuduStorageHandler),
+        make_pair(HmsClient::kLegacyKuduTableNameKey,
+                  kudu_table_name),
+        make_pair(HmsClient::kKuduMasterAddrsKey,
+                  kMasterAddrs),
+    });
+
+    // TODO(Hao): Remove this once HIVE-19253 is fixed.
+    if (table_type == HmsClient::kExternalTable) {
+      table.parameters[HmsClient::kExternalTableKey] = "TRUE";
+    }
+
+    return hms_client_->CreateTable(table);
+  }
+
   void CheckTableDoesNotExist(const string& database_name, const string& table_name) {
     hive::Table table;
     Status s = hms_client_->GetTable(database_name, table_name, &table);
@@ -332,6 +366,38 @@ TEST_F(HmsCatalogTest, TestExternalTable) {
   NO_FATALS(CheckTableDoesNotExist("default", "bogus_table_name"));
 }
 
+TEST_F(HmsCatalogTest, TestRetrieveTables) {
+  const string kHmsDatabase = "db";
+  const string kManagedTableName = "managed_table";
+  const string kExternalTableName = "external_table";
+  const string kNonKuduTableName = "non_kudu_table";
+
+  // Create a Impala managed table, a external table and a non Kudu table.
+  hive::Database db;
+  db.name = kHmsDatabase;
+  ASSERT_OK(hms_client_->CreateDatabase(db));
+  ASSERT_OK(CreateLegacyTable(kHmsDatabase,
+                              kManagedTableName,
+                              HmsClient::kManagedTable));
+  hive::Table table;
+  ASSERT_OK(hms_client_->GetTable(kHmsDatabase, kManagedTableName, &table));
+  ASSERT_OK(CreateLegacyTable(kHmsDatabase,
+                              kExternalTableName,
+                              HmsClient::kExternalTable));
+  ASSERT_OK(hms_client_->GetTable(kHmsDatabase, kExternalTableName, &table));
+
+  hive::Table non_kudu_table;
+  non_kudu_table.dbName = kHmsDatabase;
+  non_kudu_table.tableName = kNonKuduTableName;
+  ASSERT_OK(hms_client_->CreateTable(non_kudu_table));
+  ASSERT_OK(hms_client_->GetTable(kHmsDatabase, kNonKuduTableName, &table));
+
+  // Retrieve all tables and ensure all entries are found.
+  vector<hive::Table> hms_tables;
+  ASSERT_OK(hms_catalog_->RetrieveTables(&hms_tables));
+  ASSERT_EQ(3, hms_tables.size());
+}
+
 // Checks that the HmsCatalog handles reconnecting to the metastore after a connection failure.
 TEST_F(HmsCatalogTest, TestReconnect) {
   // TODO(dan): Figure out a way to test failover among HA HMS instances. The

http://git-wip-us.apache.org/repos/asf/kudu/blob/10a7f2a9/src/kudu/hms/hms_catalog.cc
----------------------------------------------------------------------
diff --git a/src/kudu/hms/hms_catalog.cc b/src/kudu/hms/hms_catalog.cc
index b32348c..05370dc 100644
--- a/src/kudu/hms/hms_catalog.cc
+++ b/src/kudu/hms/hms_catalog.cc
@@ -43,6 +43,7 @@
 #include "kudu/util/slice.h"
 #include "kudu/util/threadpool.h"
 
+using std::move;
 using std::string;
 using std::vector;
 using strings::Substitute;
@@ -95,6 +96,10 @@ TAG_FLAG(hive_metastore_conn_timeout, runtime);
 namespace kudu {
 namespace hms {
 
+const char* const HmsCatalog::kInvalidTableError = "when the Hive Metastore integration "
+    "is enabled, Kudu table names must be a period "
+    "('.') separated database and table name pair";
+
 HmsCatalog::HmsCatalog(string master_addresses)
     : master_addresses_(std::move(master_addresses)),
       hms_client_(HostPort("", 0), hms_client_options_),
@@ -154,6 +159,42 @@ Status HmsCatalog::DropTable(const string& id, const string& name) {
   });
 }
 
+Status HmsCatalog::UpgradeLegacyImpalaTable(const string& id,
+                                            const std::string& db_name,
+                                            const std::string& tb_name,
+                                            const Schema& schema) {
+  return Execute([&] (HmsClient* client) {
+    hive::Table table;
+    RETURN_NOT_OK(client->GetTable(db_name, tb_name, &table));
+    if (table.parameters[HmsClient::kStorageHandlerKey] !=
+        HmsClient::kLegacyKuduStorageHandler) {
+      return Status::IllegalState("non-legacy table cannot be upgraded");
+    }
+
+    RETURN_NOT_OK(PopulateTable(id, Substitute("$0.$1", db_name, tb_name),
+                                schema, master_addresses_, &table));
+    return client->AlterTable(db_name, tb_name, table, EnvironmentContext());
+  });
+}
+
+Status HmsCatalog::RetrieveTables(vector<hive::Table>* hms_tables) {
+  return Execute([&] (HmsClient* client) {
+    vector<string> database_names;
+    RETURN_NOT_OK(client->GetAllDatabases(&database_names));
+    for (const auto &database_name : database_names) {
+      vector<string> table_names;
+      RETURN_NOT_OK(client->GetAllTables(database_name, &table_names));
+      for (const auto &table_name : table_names) {
+        hive::Table hms_table;
+        RETURN_NOT_OK(client->GetTable(database_name, table_name, &hms_table));
+        hms_tables->emplace_back(move(hms_table));
+      }
+    }
+
+    return Status::OK();
+  });
+}
+
 Status HmsCatalog::AlterTable(const string& id,
                               const string& name,
                               const string& new_name,
@@ -388,6 +429,9 @@ Status HmsCatalog::PopulateTable(const string& id,
   table->parameters[HmsClient::kKuduMasterAddrsKey] = master_addresses;
   table->parameters[HmsClient::kStorageHandlerKey] = HmsClient::kKuduStorageHandler;
 
+  // Remove the deprecated Kudu-specific field 'kudu.table_name'.
+  EraseKeyReturnValuePtr(&table->parameters, HmsClient::kLegacyKuduTableNameKey);
+
   // Overwrite the entire set of columns.
   vector<hive::FieldSchema> fields;
   for (const auto& column : schema.columns()) {
@@ -411,10 +455,7 @@ Status HmsCatalog::ParseTableName(const string& table,
 
   vector<string> identifiers = strings::Split(table, ".");
   if (identifiers.size() != 2) {
-    return Status::InvalidArgument(
-        "when the Hive Metastore integration is enabled, Kudu table names must be a "
-        "period ('.') separated database and table name pair",
-        table);
+    return Status::InvalidArgument(kInvalidTableError, table);
   }
 
   *hms_database = std::move(identifiers[0]);

http://git-wip-us.apache.org/repos/asf/kudu/blob/10a7f2a9/src/kudu/hms/hms_catalog.h
----------------------------------------------------------------------
diff --git a/src/kudu/hms/hms_catalog.h b/src/kudu/hms/hms_catalog.h
index 330c29d..0f0c690 100644
--- a/src/kudu/hms/hms_catalog.h
+++ b/src/kudu/hms/hms_catalog.h
@@ -49,6 +49,8 @@ namespace hms {
 class HmsCatalog {
  public:
 
+  static const char* const kInvalidTableError;
+
   explicit HmsCatalog(std::string master_addresses);
   ~HmsCatalog();
 
@@ -83,6 +85,20 @@ class HmsCatalog {
                     const std::string& new_name,
                     const Schema& schema) WARN_UNUSED_RESULT;
 
+  // Upgrades a legacy Impala table entry in the HMS.
+  //
+  // This method will fail if the HMS is unreachable, if the table is not a
+  // legacy table, or if the table entry in not in the HMS.
+  Status UpgradeLegacyImpalaTable(const std::string& id,
+                                  const std::string& db_name,
+                                  const std::string& tb_name,
+                                  const Schema& schema) WARN_UNUSED_RESULT;
+
+  // Retrieves all tables in the HMS.
+  //
+  // This method will fail if the HMS is unreachable.
+  Status RetrieveTables(std::vector<hive::Table>* hms_tables) WARN_UNUSED_RESULT;
+
   // Validates the hive_metastore_uris gflag.
   static bool ValidateUris(const char* flag_name, const std::string& metastore_uris);
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/10a7f2a9/src/kudu/hms/hms_client.cc
----------------------------------------------------------------------
diff --git a/src/kudu/hms/hms_client.cc b/src/kudu/hms/hms_client.cc
index 3f1f948..3d398c5 100644
--- a/src/kudu/hms/hms_client.cc
+++ b/src/kudu/hms/hms_client.cc
@@ -112,6 +112,10 @@ namespace hms {
     return Status::RuntimeError((msg), e.what()); \
   }
 
+const char* const HmsClient::kLegacyKuduStorageHandler =
+  "com.cloudera.kudu.hive.KuduStorageHandler";
+const char* const HmsClient::kLegacyKuduTableNameKey = "kudu.table_name";
+const char* const HmsClient::kLegacyTablePrefix = "impala::";
 const char* const HmsClient::kKuduTableIdKey = "kudu.table_id";
 const char* const HmsClient::kKuduMasterAddrsKey = "kudu.master_addresses";
 const char* const HmsClient::kKuduMasterEventKey = "kudu.master_event";
@@ -123,11 +127,13 @@ const char* const HmsClient::kDisallowIncompatibleColTypeChanges =
   "hive.metastore.disallow.incompatible.col.type.changes";
 const char* const HmsClient::kDbNotificationListener =
   "org.apache.hive.hcatalog.listener.DbNotificationListener";
+const char* const HmsClient::kExternalTableKey = "EXTERNAL";
 const char* const HmsClient::kStorageHandlerKey = "storage_handler";
 const char* const HmsClient::kKuduMetastorePlugin =
   "org.apache.kudu.hive.metastore.KuduMetastorePlugin";
 
 const char* const HmsClient::kManagedTable = "MANAGED_TABLE";
+const char* const HmsClient::kExternalTable = "EXTERNAL_TABLE";
 
 const uint16_t HmsClient::kDefaultHmsPort = 9083;
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/10a7f2a9/src/kudu/hms/hms_client.h
----------------------------------------------------------------------
diff --git a/src/kudu/hms/hms_client.h b/src/kudu/hms/hms_client.h
index baee762..157043a 100644
--- a/src/kudu/hms/hms_client.h
+++ b/src/kudu/hms/hms_client.h
@@ -87,6 +87,10 @@ struct HmsClientOptions {
 class HmsClient {
  public:
 
+  static const char* const kExternalTableKey;
+  static const char* const kLegacyKuduStorageHandler;
+  static const char* const kLegacyKuduTableNameKey;
+  static const char* const kLegacyTablePrefix;
   static const char* const kKuduTableIdKey;
   static const char* const kKuduMasterAddrsKey;
   static const char* const kKuduMasterEventKey;;
@@ -100,6 +104,7 @@ class HmsClient {
 
   // See org.apache.hadoop.hive.metastore.TableType.
   static const char* const kManagedTable;
+  static const char* const kExternalTable;
 
   static const uint16_t kDefaultHmsPort;
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/10a7f2a9/src/kudu/integration-tests/alter_table-randomized-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/alter_table-randomized-test.cc b/src/kudu/integration-tests/alter_table-randomized-test.cc
index ceda666..89d2371 100644
--- a/src/kudu/integration-tests/alter_table-randomized-test.cc
+++ b/src/kudu/integration-tests/alter_table-randomized-test.cc
@@ -36,6 +36,7 @@
 #include "kudu/client/shared_ptr.h"
 #include "kudu/client/value.h"
 #include "kudu/client/write_op.h"
+#include "kudu/common/common.pb.h"
 #include "kudu/common/partial_row.h"
 #include "kudu/gutil/map-util.h"
 #include "kudu/gutil/port.h"
@@ -99,7 +100,7 @@ class AlterTableRandomized : public KuduTest {
 
     ExternalMiniClusterOptions opts;
     opts.num_tablet_servers = 3;
-    opts.enable_hive_metastore = true;
+    opts.hms_mode = HmsMode::ENABLE_METASTORE_INTEGRATION;
     // This test produces tables with lots of columns. With container preallocation,
     // we end up using quite a bit of disk space. So, we disable it.
     opts.extra_tserver_flags.emplace_back("--log_container_preallocate_bytes=0");

http://git-wip-us.apache.org/repos/asf/kudu/blob/10a7f2a9/src/kudu/integration-tests/master_failover-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/master_failover-itest.cc b/src/kudu/integration-tests/master_failover-itest.cc
index 06498a4..54248ef 100644
--- a/src/kudu/integration-tests/master_failover-itest.cc
+++ b/src/kudu/integration-tests/master_failover-itest.cc
@@ -29,6 +29,7 @@
 #include "kudu/client/client.h"
 #include "kudu/client/schema.h"
 #include "kudu/client/shared_ptr.h"
+#include "kudu/common/common.pb.h"
 #include "kudu/gutil/port.h"
 #include "kudu/gutil/strings/split.h"
 #include "kudu/gutil/strings/strip.h" // IWYU pragma: keep
@@ -80,7 +81,7 @@ class MasterFailoverTest : public KuduTest {
   MasterFailoverTest() {
     opts_.num_masters = 3;
     opts_.num_tablet_servers = kNumTabletServerReplicas;
-    opts_.enable_hive_metastore = true;
+    opts_.hms_mode = HmsMode::ENABLE_METASTORE_INTEGRATION;
 
     // Reduce various timeouts below as to make the detection of
     // leader master failures (specifically, failures as result of

http://git-wip-us.apache.org/repos/asf/kudu/blob/10a7f2a9/src/kudu/integration-tests/master_hms-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/master_hms-itest.cc b/src/kudu/integration-tests/master_hms-itest.cc
index f30f89f..1648595 100644
--- a/src/kudu/integration-tests/master_hms-itest.cc
+++ b/src/kudu/integration-tests/master_hms-itest.cc
@@ -29,6 +29,7 @@
 #include "kudu/client/client.h"
 #include "kudu/client/schema.h"
 #include "kudu/client/shared_ptr.h"
+#include "kudu/common/common.pb.h"
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/hms/hive_metastore_types.h"
 #include "kudu/hms/hms_client.h"
@@ -66,7 +67,7 @@ class MasterHmsTest : public ExternalMiniClusterITestBase {
     ExternalMiniClusterITestBase::SetUp();
 
     ExternalMiniClusterOptions opts;
-    opts.enable_hive_metastore = true;
+    opts.hms_mode = HmsMode::ENABLE_METASTORE_INTEGRATION;
     opts.num_masters = 1;
     opts.num_tablet_servers = 1;
     StartClusterWithOpts(std::move(opts));

http://git-wip-us.apache.org/repos/asf/kudu/blob/10a7f2a9/src/kudu/mini-cluster/external_mini_cluster-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/mini-cluster/external_mini_cluster-test.cc b/src/kudu/mini-cluster/external_mini_cluster-test.cc
index cb25156..7eb209e 100644
--- a/src/kudu/mini-cluster/external_mini_cluster-test.cc
+++ b/src/kudu/mini-cluster/external_mini_cluster-test.cc
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include "kudu/mini-cluster/external_mini_cluster.h"
+
 #include <iosfwd>
 #include <ostream>
 #include <string>
@@ -25,11 +27,11 @@
 #include <glog/stl_logging.h> // IWYU pragma: keep
 #include <gtest/gtest.h>
 
+#include "kudu/common/common.pb.h"
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/gutil/strings/util.h" // IWYU pragma: keep
 #include "kudu/hms/hms_client.h"
 #include "kudu/hms/mini_hms.h"
-#include "kudu/mini-cluster/external_mini_cluster.h"
 #include "kudu/mini-cluster/mini_cluster.h"
 #include "kudu/security/test/mini_kdc.h"
 #include "kudu/util/monotime.h"
@@ -124,7 +126,9 @@ TEST_F(ExternalMiniClusterTest, TestKerberosReacquire) {
 TEST_P(ExternalMiniClusterTest, TestBasicOperation) {
   ExternalMiniClusterOptions opts;
   opts.enable_kerberos = GetParam().first == Kerberos::ENABLED;
-  opts.enable_hive_metastore = GetParam().second == HiveMetastore::ENABLED;
+  if (GetParam().second == HiveMetastore::ENABLED) {
+    opts.hms_mode = HmsMode::ENABLE_HIVE_METASTORE;
+  }
 
   opts.num_masters = 3;
   opts.num_tablet_servers = 3;
@@ -188,7 +192,7 @@ TEST_P(ExternalMiniClusterTest, TestBasicOperation) {
   ASSERT_EQ(ts_http.ToString(), ts->bound_http_hostport().ToString());
 
   // Verify that the HMS is reachable.
-  if (opts.enable_hive_metastore) {
+  if (opts.hms_mode == HmsMode::ENABLE_HIVE_METASTORE) {
     hms::HmsClientOptions hms_client_opts;
     hms_client_opts.enable_kerberos = opts.enable_kerberos;
     hms::HmsClient hms_client(cluster.hms()->address(), hms_client_opts);

http://git-wip-us.apache.org/repos/asf/kudu/blob/10a7f2a9/src/kudu/mini-cluster/external_mini_cluster.cc
----------------------------------------------------------------------
diff --git a/src/kudu/mini-cluster/external_mini_cluster.cc b/src/kudu/mini-cluster/external_mini_cluster.cc
index 8b300e3..7db8edb 100644
--- a/src/kudu/mini-cluster/external_mini_cluster.cc
+++ b/src/kudu/mini-cluster/external_mini_cluster.cc
@@ -106,7 +106,7 @@ ExternalMiniClusterOptions::ExternalMiniClusterOptions()
       bind_mode(MiniCluster::kDefaultBindMode),
       num_data_dirs(1),
       enable_kerberos(false),
-      enable_hive_metastore(false),
+      hms_mode(HmsMode::NONE),
       logtostderr(true),
       start_process_timeout(MonoDelta::FromSeconds(30)) {
 }
@@ -186,7 +186,8 @@ Status ExternalMiniCluster::Start() {
                           "could not set krb5 client env");
   }
 
-  if (opts_.enable_hive_metastore) {
+  if (opts_.hms_mode == HmsMode::ENABLE_HIVE_METASTORE ||
+      opts_.hms_mode == HmsMode::ENABLE_METASTORE_INTEGRATION) {
     hms_.reset(new hms::MiniHms());
 
     if (opts_.enable_kerberos) {
@@ -233,6 +234,9 @@ void ExternalMiniCluster::ShutdownNodes(ClusterNodes nodes) {
 Status ExternalMiniCluster::Restart() {
   for (const scoped_refptr<ExternalMaster>& master : masters_) {
     if (master && master->IsShutdown()) {
+      if (opts_.hms_mode == HmsMode::ENABLE_METASTORE_INTEGRATION) {
+        master->SetMetastoreIntegration(hms_->uris(), opts_.enable_kerberos);
+      }
       RETURN_NOT_OK_PREPEND(master->Restart(), "Cannot restart master bound at: " +
                                                master->bound_rpc_hostport().ToString());
     }
@@ -252,6 +256,10 @@ Status ExternalMiniCluster::Restart() {
   return Status::OK();
 }
 
+void ExternalMiniCluster::EnableMetastoreIntegration() {
+  opts_.hms_mode = HmsMode::ENABLE_METASTORE_INTEGRATION;
+}
+
 void ExternalMiniCluster::SetDaemonBinPath(string daemon_bin_path) {
   opts_.daemon_bin_path = std::move(daemon_bin_path);
   for (auto& master : masters_) {
@@ -367,7 +375,7 @@ Status ExternalMiniCluster::StartMasters() {
     opts.extra_flags = SubstituteInFlags(flags, i);
     opts.start_process_timeout = opts_.start_process_timeout;
     opts.rpc_bind_address = master_rpc_addrs[i];
-    if (opts_.enable_hive_metastore) {
+    if (opts_.hms_mode == HmsMode::ENABLE_METASTORE_INTEGRATION) {
       opts.extra_flags.emplace_back(Substitute("--hive_metastore_uris=$0", hms_->uris()));
       opts.extra_flags.emplace_back(Substitute("--hive_metastore_sasl_enabled=$0",
                                                opts_.enable_kerberos));
@@ -858,6 +866,13 @@ void ExternalDaemon::SetExePath(string exe) {
   opts_.exe = std::move(exe);
 }
 
+void ExternalDaemon::SetMetastoreIntegration(const string& hms_uris,
+                                             bool enable_kerberos) {
+  opts_.extra_flags.emplace_back(Substitute("--hive_metastore_uris=$0", hms_uris));
+  opts_.extra_flags.emplace_back(Substitute("--hive_metastore_sasl_enabled=$0",
+                                            enable_kerberos));
+}
+
 Status ExternalDaemon::Pause() {
   if (!process_) {
     return Status::IllegalState(Substitute(

http://git-wip-us.apache.org/repos/asf/kudu/blob/10a7f2a9/src/kudu/mini-cluster/external_mini_cluster.h
----------------------------------------------------------------------
diff --git a/src/kudu/mini-cluster/external_mini_cluster.h b/src/kudu/mini-cluster/external_mini_cluster.h
index 238f8f9..15c9b02 100644
--- a/src/kudu/mini-cluster/external_mini_cluster.h
+++ b/src/kudu/mini-cluster/external_mini_cluster.h
@@ -32,6 +32,7 @@
 #include <gtest/gtest_prod.h>
 
 #include "kudu/client/shared_ptr.h"
+#include "kudu/common/common.pb.h"
 #include "kudu/gutil/macros.h"
 #include "kudu/gutil/port.h"
 #include "kudu/gutil/ref_counted.h"
@@ -137,10 +138,11 @@ struct ExternalMiniClusterOptions {
   // Default: false.
   bool enable_kerberos;
 
-  // If true, set up a Hive Metastore as part of this ExternalMiniCluster.
+  // Tri state mode flag that indicates whether to set up a Hive Metastore as
+  // part of this ExternalMiniCluster and enable Kudu Hive Metastore integration.
   //
-  // Default: false.
-  bool enable_hive_metastore;
+  // Default: HmsMode::NONE.
+  HmsMode hms_mode;
 
   // If true, sends logging output to stderr instead of a log file.
   //
@@ -317,6 +319,10 @@ class ExternalMiniCluster : public MiniCluster {
                  const std::string& flag,
                  const std::string& value) WARN_UNUSED_RESULT;
 
+  // Enable Hive Metastore integration.
+  // Overrides 'enable_metastore_integration' set by ExternalMiniClusterOptions.
+  void EnableMetastoreIntegration();
+
   // Set the path where daemon binaries can be found.
   // Overrides 'daemon_bin_path' set by ExternalMiniClusterOptions.
   // The cluster must be shut down before calling this method.
@@ -412,6 +418,12 @@ class ExternalDaemon : public RefCountedThreadSafe<ExternalDaemon> {
   // The daemon must be shut down before calling this method.
   void SetExePath(std::string exe);
 
+  // Set '--hive_metastore_uris' and '--hive_metastore_sasl_enabled'
+  // to enable Hive Metastore integration.
+  // Overrides the extra flags specified in the constructor.
+  void SetMetastoreIntegration(const std::string& hms_uris,
+                               bool enable_kerberos);
+
   // Enable Kerberos for this daemon. This creates a Kerberos principal
   // and keytab, and sets the appropriate environment variables in the
   // subprocess such that the server will use Kerberos authentication.

http://git-wip-us.apache.org/repos/asf/kudu/blob/10a7f2a9/src/kudu/tools/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/kudu/tools/CMakeLists.txt b/src/kudu/tools/CMakeLists.txt
index 1a7a0fa..fd3ccb4 100644
--- a/src/kudu/tools/CMakeLists.txt
+++ b/src/kudu/tools/CMakeLists.txt
@@ -86,6 +86,7 @@ target_link_libraries(ksck
 add_executable(kudu
   tool_action_cluster.cc
   tool_action_fs.cc
+  tool_action_hms.cc
   tool_action_local_replica.cc
   tool_action_master.cc
   tool_action_pbc.cc
@@ -106,6 +107,7 @@ target_link_libraries(kudu
   krpc
   ksck
   kudu_client
+  kudu_client_test_util
   kudu_common
   kudu_fs
   kudu_util

http://git-wip-us.apache.org/repos/asf/kudu/blob/10a7f2a9/src/kudu/tools/kudu-tool-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc
index 6e8aeae..9f596a2 100644
--- a/src/kudu/tools/kudu-tool-test.cc
+++ b/src/kudu/tools/kudu-tool-test.cc
@@ -22,10 +22,12 @@
 #include <cstdio>
 #include <cstdlib>
 #include <iterator>
+#include <map>
 #include <memory>
 #include <sstream>
 #include <string>
 #include <tuple>  // IWYU pragma: keep
+#include <type_traits>
 #include <unordered_map>
 #include <utility>
 #include <vector>
@@ -63,6 +65,7 @@
 #include "kudu/fs/block_manager.h"
 #include "kudu/fs/fs_manager.h"
 #include "kudu/fs/fs_report.h"
+#include "kudu/gutil/map-util.h"
 #include "kudu/gutil/port.h"
 #include "kudu/gutil/ref_counted.h"
 #include "kudu/gutil/stl_util.h"
@@ -72,6 +75,10 @@
 #include "kudu/gutil/strings/strcat.h"
 #include "kudu/gutil/strings/strip.h"
 #include "kudu/gutil/strings/substitute.h"
+#include "kudu/gutil/strings/util.h"
+#include "kudu/hms/hive_metastore_types.h"
+#include "kudu/hms/hms_client.h"
+#include "kudu/hms/mini_hms.h"
 #include "kudu/integration-tests/cluster_itest_util.h"
 #include "kudu/integration-tests/cluster_verifier.h"
 #include "kudu/integration-tests/mini_cluster_fs_inspector.h"
@@ -145,6 +152,8 @@ using consensus::ReplicateMsg;
 using fs::BlockDeletionTransaction;
 using fs::FsReport;
 using fs::WritableBlock;
+using hms::HmsClient;
+using hms::HmsClientOptions;
 using itest::MiniClusterFsInspector;
 using itest::TServerDetails;
 using log::Log;
@@ -152,6 +161,7 @@ using log::LogOptions;
 using rpc::RpcController;
 using std::back_inserter;
 using std::copy;
+using std::make_pair;
 using std::ostringstream;
 using std::pair;
 using std::string;
@@ -190,11 +200,12 @@ class ToolTest : public KuduTest {
                  string* stdout = nullptr,
                  string* stderr = nullptr,
                  vector<string>* stdout_lines = nullptr,
-                 vector<string>* stderr_lines = nullptr) const {
+                 vector<string>* stderr_lines = nullptr,
+                 const string& in = "") const {
     string out;
     string err;
     Status s = RunKuduTool(strings::Split(arg_str, " ", strings::SkipEmpty()),
-                           &out, &err);
+                           &out, &err, in);
     if (stdout) {
       *stdout = out;
       StripTrailingNewline(stdout);
@@ -236,6 +247,15 @@ class ToolTest : public KuduTest {
     ASSERT_OK(s);
   }
 
+  void RunActionStdinStdoutString(const string& arg_str, const string& stdin,
+                                  string* stdout) const {
+    string stderr;
+    Status s = RunTool(arg_str, stdout, &stderr, nullptr, nullptr, stdin);
+    SCOPED_TRACE(*stdout);
+    SCOPED_TRACE(stderr);
+    ASSERT_OK(s);
+  }
+
   Status RunActionStderrString(const string& arg_str, string* stderr) const {
     return RunTool(arg_str, nullptr, stderr, nullptr, nullptr);
   }
@@ -1896,6 +1916,194 @@ TEST_F(ToolTest, TestRenameColumn) {
   ASSERT_STR_CONTAINS(table->schema().ToString(), kNewColumnName);
 }
 
+Status CreateHmsTable(HmsClient* client,
+                      const string& database_name,
+                      const string& table_name,
+                      const string& table_type) {
+  hive::Table table;
+  string kudu_table_name(table_name);
+  table.dbName = database_name;
+  table.tableName = table_name;
+  table.tableType = table_type;
+  if (table_type == HmsClient::kManagedTable) {
+    kudu_table_name = Substitute("$0$1.$2", HmsClient::kLegacyTablePrefix,
+                                 database_name, table_name);
+  }
+
+  table.__set_parameters({
+      make_pair(HmsClient::kStorageHandlerKey,
+                HmsClient::kLegacyKuduStorageHandler),
+      make_pair(HmsClient::kLegacyKuduTableNameKey,
+                kudu_table_name),
+      make_pair(HmsClient::kKuduMasterAddrsKey,
+                "Master_Addrs"),
+  });
+
+  // TODO(Hao): Remove this once HIVE-19253 is fixed.
+  if (table_type == HmsClient::kExternalTable) {
+    table.parameters[HmsClient::kExternalTableKey] = "TRUE";
+  }
+
+  return client->CreateTable(table);
+}
+
+Status CreateKuduTable(const shared_ptr<KuduClient>& kudu_client,
+                       const string& table_name) {
+  KuduSchemaBuilder schema_builder;
+  schema_builder.AddColumn("foo")
+      ->Type(client::KuduColumnSchema::INT32)
+      ->NotNull()
+      ->PrimaryKey();
+  KuduSchema schema;
+  RETURN_NOT_OK(schema_builder.Build(&schema));
+  unique_ptr<client::KuduTableCreator> table_creator(kudu_client->NewTableCreator());
+  return table_creator->table_name(table_name)
+              .schema(&schema)
+              .num_replicas(1)
+              .set_range_partition_columns({ "foo" })
+              .Create();
+}
+
+bool IsValidTableName(const string& table_name) {
+  vector<string> identifiers = strings::Split(table_name, ".");
+  return identifiers.size() ==2 &&
+         !HasPrefixString(table_name, HmsClient::kLegacyTablePrefix);
+}
+
+void ValidateHmsEntries(HmsClient* hms_client,
+                        const shared_ptr<KuduClient>& kudu_client,
+                        const string& database_name,
+                        const string& table_name,
+                        const string& master_addr) {
+  hive::Table hms_table;
+  ASSERT_OK(hms_client->GetTable(database_name, table_name, &hms_table));
+  shared_ptr<KuduTable> kudu_table;
+  ASSERT_OK(kudu_client->OpenTable(Substitute("$0.$1", database_name, table_name),
+                                   &kudu_table));
+  ASSERT_TRUE(hms_table.parameters[HmsClient::kStorageHandlerKey] ==
+                  HmsClient::kKuduStorageHandler &&
+              hms_table.parameters[HmsClient::kKuduTableIdKey] ==
+                  kudu_table->id() &&
+              hms_table.parameters[HmsClient::kKuduMasterAddrsKey] == master_addr &&
+              !ContainsKey(hms_table.parameters, HmsClient::kLegacyKuduTableNameKey));
+}
+
+TEST_F(ToolTest, TestHmsUpgrade) {
+  ExternalMiniClusterOptions opts;
+  opts.hms_mode = HmsMode::ENABLE_HIVE_METASTORE;
+  NO_FATALS(StartExternalMiniCluster(std::move(opts)));
+
+  string master_addr = cluster_->master()->bound_rpc_addr().ToString();
+  HmsClientOptions hms_client_opts;
+  HmsClient hms_client(cluster_->hms()->address(), hms_client_opts);
+  ASSERT_OK(hms_client.Start());
+  ASSERT_TRUE(hms_client.IsConnected());
+
+  const string kDatabaseName = "my_db";
+  const string kDefaultDatabaseName = "default";
+  const string kManagedTableName = "managed_table";
+  const string kExternalTableName = "external_table";
+  const string kKuduTableName = "kudu_table";
+  shared_ptr<KuduClient> kudu_client;
+  ASSERT_OK(KuduClientBuilder()
+      .add_master_server_addr(master_addr)
+      .Build(&kudu_client));
+
+  // 1. Create a managed impala table in HMS and the corresponding table in Kudu.
+  {
+    string legacy_managed_table_name = Substitute("$0$1.$2", HmsClient::kLegacyTablePrefix,
+                                                  kDatabaseName, kManagedTableName);
+    ASSERT_OK(CreateKuduTable(kudu_client, legacy_managed_table_name));
+    shared_ptr<KuduTable> table;
+    ASSERT_OK(kudu_client->OpenTable(legacy_managed_table_name, &table));
+    hive::Database db;
+    db.name = kDatabaseName;
+    ASSERT_OK(hms_client.CreateDatabase(db));
+    ASSERT_OK(CreateHmsTable(&hms_client, kDatabaseName, kManagedTableName,
+                             HmsClient::kManagedTable));
+    hive::Table hms_table;
+    ASSERT_OK(hms_client.GetTable(kDatabaseName, kManagedTableName, &hms_table));
+    ASSERT_EQ(HmsClient::kManagedTable, hms_table.tableType);
+  }
+
+  // 2. Create an external impala table in HMS and the corresponding table in Kudu.
+  {
+    ASSERT_OK(CreateKuduTable(kudu_client, kExternalTableName));
+    shared_ptr<KuduTable> table;
+    ASSERT_OK(kudu_client->OpenTable(kExternalTableName, &table));
+    ASSERT_OK(CreateHmsTable(&hms_client, kDatabaseName, kExternalTableName,
+                             HmsClient::kExternalTable));
+    hive::Table hms_table;
+    ASSERT_OK(hms_client.GetTable(kDatabaseName, kExternalTableName, &hms_table));
+    ASSERT_EQ(HmsClient::kExternalTable, hms_table.tableType);
+  }
+
+  // 3. Create several non-impala Kudu tables. One with hive compatible name, and the
+  //    other ones with hive incompatible names.
+  {
+    ASSERT_OK(CreateKuduTable(kudu_client, kKuduTableName));
+    shared_ptr<KuduTable> table;
+    ASSERT_OK(kudu_client->OpenTable(kKuduTableName, &table));
+
+    ASSERT_OK(CreateKuduTable(kudu_client, "invalid#table"));
+    ASSERT_OK(kudu_client->OpenTable("invalid#table", &table));
+
+    ASSERT_OK(CreateKuduTable(kudu_client, "invalid.@"));
+    ASSERT_OK(kudu_client->OpenTable("invalid.@", &table));
+
+    ASSERT_OK(CreateKuduTable(kudu_client, "@.invalid"));
+    ASSERT_OK(kudu_client->OpenTable("@.invalid", &table));
+
+    ASSERT_OK(CreateKuduTable(kudu_client, "invalid"));
+    ASSERT_OK(kudu_client->OpenTable("invalid", &table));
+
+    ASSERT_OK(CreateKuduTable(kudu_client, "in.val.id"));
+    ASSERT_OK(kudu_client->OpenTable("in.val.id", &table));
+  }
+
+  {
+    vector<string> table_names;
+    ASSERT_OK(hms_client.GetAllTables(kDatabaseName, &table_names));
+    ASSERT_EQ(2, table_names.size());
+  }
+
+  // Upgrade the historical metadata in both Hive Metastore and Kudu.
+  string out;
+  NO_FATALS(RunActionStdinStdoutString(
+      Substitute("hms upgrade $0 $1 --unlock_experimental_flags=true "
+                 "--hive_metastore_uris=$2", master_addr,
+                 kDefaultDatabaseName, cluster_->hms()->uris()),
+      "valid_1\nvalid_2\nvalid_3\nvalid_4\nvalid_5\n", &out));
+
+  // Validate the Kudu table names and metadata format of hms entries.
+  {
+    vector<string> table_names;
+    ASSERT_OK(kudu_client->ListTables(&table_names));
+    ASSERT_EQ(8, table_names.size());
+    for (const auto& n : table_names) {
+      ASSERT_TRUE(IsValidTableName(n));
+    }
+
+    NO_FATALS(ValidateHmsEntries(&hms_client, kudu_client, kDatabaseName,
+                                 kManagedTableName, master_addr));
+    NO_FATALS(ValidateHmsEntries(&hms_client, kudu_client, kDatabaseName,
+                                 kExternalTableName, master_addr));
+    NO_FATALS(ValidateHmsEntries(&hms_client, kudu_client, kDefaultDatabaseName,
+                                 kKuduTableName, master_addr));
+    table_names.clear();
+    vector<string> db_names;
+    ASSERT_OK(hms_client.GetAllDatabases(&db_names));
+    ASSERT_EQ(2, db_names.size());
+    ASSERT_OK(hms_client.GetAllTables(kDatabaseName, &table_names));
+    ASSERT_EQ(2, table_names.size());
+    table_names.clear();
+    ASSERT_OK(hms_client.GetAllTables(kDefaultDatabaseName, &table_names));
+    ASSERT_EQ(6, table_names.size());
+  }
+
+  ASSERT_OK(hms_client.Stop());
+}
+
 // This test is parameterized on the serialization mode and Kerberos.
 class ControlShellToolTest :
     public ToolTest,

http://git-wip-us.apache.org/repos/asf/kudu/blob/10a7f2a9/src/kudu/tools/tool.proto
----------------------------------------------------------------------
diff --git a/src/kudu/tools/tool.proto b/src/kudu/tools/tool.proto
index e6acb0e..63e3961 100644
--- a/src/kudu/tools/tool.proto
+++ b/src/kudu/tools/tool.proto
@@ -42,8 +42,9 @@ message CreateClusterRequestPB {
   // Whether or not the cluster should be Kerberized.
   optional bool enable_kerberos = 3;
 
-  // Whether or not to create a Hive Metastore and enable the HMS integration.
-  optional bool enable_hive_metastore = 7;
+  // Whether or not to create a Hive Metastore, and/or enable Kudu Hive
+  // Metastore integration.
+  optional HmsMode hms_mode = 7;
 
   // The directory where the cluster's data and logs should be placed.
   optional string cluster_root = 4;

http://git-wip-us.apache.org/repos/asf/kudu/blob/10a7f2a9/src/kudu/tools/tool_action.h
----------------------------------------------------------------------
diff --git a/src/kudu/tools/tool_action.h b/src/kudu/tools/tool_action.h
index 8a86e36..d69d13e 100644
--- a/src/kudu/tools/tool_action.h
+++ b/src/kudu/tools/tool_action.h
@@ -309,6 +309,7 @@ class Action {
 // Returns new nodes for each major mode.
 std::unique_ptr<Mode> BuildClusterMode();
 std::unique_ptr<Mode> BuildFsMode();
+std::unique_ptr<Mode> BuildHmsMode();
 std::unique_ptr<Mode> BuildLocalReplicaMode();
 std::unique_ptr<Mode> BuildMasterMode();
 std::unique_ptr<Mode> BuildPbcMode();

http://git-wip-us.apache.org/repos/asf/kudu/blob/10a7f2a9/src/kudu/tools/tool_action_hms.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/tool_action_hms.cc b/src/kudu/tools/tool_action_hms.cc
new file mode 100644
index 0000000..1f141c0
--- /dev/null
+++ b/src/kudu/tools/tool_action_hms.cc
@@ -0,0 +1,285 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <iostream>
+#include <map>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include <gflags/gflags.h>
+#include <gflags/gflags_declare.h>
+#include <glog/logging.h>
+
+#include "kudu/client/client-test-util.h"
+#include "kudu/client/client.h"
+#include "kudu/client/shared_ptr.h"
+#include "kudu/common/schema.h"
+#include "kudu/gutil/map-util.h"
+#include "kudu/gutil/strings/split.h"
+#include "kudu/gutil/strings/substitute.h"
+#include "kudu/gutil/strings/util.h"
+#include "kudu/hms/hive_metastore_types.h"
+#include "kudu/hms/hms_catalog.h"
+#include "kudu/hms/hms_client.h"
+#include "kudu/tools/tool_action.h"
+#include "kudu/tools/tool_action_common.h"
+#include "kudu/util/monotime.h"
+#include "kudu/util/status.h"
+
+DECLARE_int64(timeout_ms); // defined in tool_action_common
+DECLARE_string(hive_metastore_uris);
+
+namespace kudu {
+namespace tools {
+
+using client::KuduClient;
+using client::KuduClientBuilder;
+using client::KuduTable;
+using client::KuduTableAlterer;
+using hms::HmsClient;
+using hms::HmsCatalog;
+using std::cin;
+using std::cout;
+using std::endl;
+using std::pair;
+using std::string;
+using std::unique_ptr;
+using std::unordered_map;
+using std::vector;
+using strings::Split;
+using strings::Substitute;
+
+DEFINE_bool(enable_input, true,
+            "Whether to enable user input for renaming tables that have hive"
+            "incompatible names.");
+
+const char* const kDefaultDatabaseArg = "default_database";
+const char* const kDefaultDatabaseArgDesc = "The database that non-Impala Kudu "
+                                            "table should reside in";
+const char* const kInvalidNameError = "is not a valid object name";
+
+// Returns a map that contains all Kudu tables in the HMS. Its key is
+// the Kudu table name and its value is the corresponding HMS table.
+unordered_map<string, hive::Table> RetrieveTablesMap(vector<hive::Table> hms_tables) {
+  unordered_map<string, hive::Table> hms_tables_map;
+  for (auto& hms_table : hms_tables) {
+    if (hms_table.parameters[HmsClient::kStorageHandlerKey] ==
+        HmsClient::kLegacyKuduStorageHandler) {
+      hms_tables_map.emplace(hms_table.parameters[HmsClient::kLegacyKuduTableNameKey],
+                              hms_table);
+    } else if (hms_table.parameters[HmsClient::kStorageHandlerKey] ==
+        HmsClient::kKuduStorageHandler) {
+      hms_tables_map.emplace(Substitute("$0.$1", hms_table.dbName, hms_table.tableName),
+                              hms_table);
+    }
+  }
+  return hms_tables_map;
+}
+
+string RenameHiveIncompatibleTable(const string& name) {
+  string table_name(name);
+  cout << Substitute("Table $0 is not hive compatible.", table_name) << endl;
+  cout << "Please input a new table name: ";
+  getline(cin, table_name);
+
+  return table_name;
+}
+
+// Alter legacy tables (which includes non-Impala tables, Impala managed/external
+// tables) to follow the format 'database_name.table_name' in table naming in Kudu.
+// Also, create HMS entries for non-Impala tables.
+//
+// Note that non-Impala tables name should conform to Hive naming standard.
+// Otherwise, the upgrade process will fail.
+Status AlterLegacyKuduTables(const client::sp::shared_ptr<KuduClient>& kudu_client,
+                             const unique_ptr<HmsCatalog>& hms_catalog,
+                             const string& default_database,
+                             const vector<hive::Table>& hms_tables) {
+  vector<string> table_names;
+  RETURN_NOT_OK(kudu_client->ListTables(&table_names));
+  unordered_map<string, hive::Table> hms_tables_map = RetrieveTablesMap(hms_tables);
+
+  // Take care of the orphan tables in the HMS.
+  for (const auto& hms_table : hms_tables_map) {
+    bool exist;
+    RETURN_NOT_OK(kudu_client->TableExists(hms_table.first, &exist));
+    if (!exist) {
+      LOG(WARNING) << Substitute("Found orphan table $0.$1 in the Hive Metastore",
+                                 hms_table.second.dbName, hms_table.second.tableName);
+    }
+  }
+
+  auto alter_kudu_table = [&](const string& name,
+                              const string& new_name) -> Status {
+    unique_ptr<KuduTableAlterer> alterer(kudu_client->NewTableAlterer(name));
+    return alterer->RenameTo(new_name)
+                  ->Alter();
+  };
+
+  unordered_map<string, Status> failures;
+  for (const auto& table_name : table_names) {
+    hive::Table* hms_table = FindOrNull(hms_tables_map, table_name);
+    client::sp::shared_ptr<KuduTable> kudu_table;
+    RETURN_NOT_OK(kudu_client->OpenTable(table_name, &kudu_table));
+    Status s;
+    if (hms_table) {
+      // Rename legacy Impala managed tables and external tables to follow the
+      // format 'database_name.table_name'. This is a no-op for non-legacy tables
+      // stored in the HMS.
+      if (hms_table->parameters[HmsClient::kStorageHandlerKey] ==
+          HmsClient::kLegacyKuduStorageHandler) {
+        string new_table_name = Substitute("$0.$1", hms_table->dbName, hms_table->tableName);
+        bool exist;
+        RETURN_NOT_OK(kudu_client->TableExists(new_table_name, &exist));
+        if (!exist) {
+          // TODO(Hao): Use notification listener to avoid race conditions.
+          s = alter_kudu_table(table_name, new_table_name).AndThen([&] {
+            return hms_catalog->UpgradeLegacyImpalaTable(kudu_table->id(),
+                hms_table->dbName, hms_table->tableName,
+                client::SchemaFromKuduSchema(kudu_table->schema()));
+          });
+        } else {
+          LOG(WARNING) << Substitute("Failed to upgrade legacy Impala table '$0.$1' "
+                                     "(Kudu table name: $2), because a Kudu table with "
+                                     "name '$0.$1' already exists'.", hms_table->dbName,
+                                      hms_table->tableName, table_name);
+        }
+      }
+    } else {
+      // Create the table in the HMS.
+      string new_table_name = Substitute("$0.$1", default_database, table_name);
+      Schema schema = client::SchemaFromKuduSchema(kudu_table->schema());
+      s = hms_catalog->CreateTable(kudu_table->id(), new_table_name, schema);
+      while (!s.ok() && FLAGS_enable_input &&
+             (MatchPattern(s.ToString(), Substitute("*$0*", kInvalidNameError)) ||
+              MatchPattern(s.ToString(), Substitute("*$0*", HmsCatalog::kInvalidTableError)))) {
+        new_table_name = Substitute("$0.$1", default_database,
+                                    RenameHiveIncompatibleTable(table_name));
+        s = hms_catalog->CreateTable(kudu_table->id(), new_table_name, schema);
+      }
+      s = s.AndThen([&] {
+        return alter_kudu_table(table_name, new_table_name);
+      });
+    }
+
+    if (!s.ok()) {
+      failures.emplace(table_name, s);
+    }
+  }
+
+  // Returns the first failure that was seen, if any.
+  if (!failures.empty()) {
+    for (const auto& failure : failures) {
+      LOG(WARNING) << Substitute("Failed to upgrade Kudu table $0, because: ",
+                                 failure.first, failure.second.ToString());
+    }
+
+    return failures.begin()->second;
+  } else {
+    return Status::OK();
+  }
+}
+
+// Upgrade the metadata format of legacy impala managed or external tables
+// in HMS entries, as well as rename the existing tables in Kudu to adapt
+// to the new naming rules.
+//
+// Sample Legacy Hms Entries
+// Managed table
+//  Table(
+//      tableName=customer,
+//      dbName=tpch_1000_kudu,
+//      parameters={
+//          kudu.master_addresses: <master-addr>:7051,
+//          kudu.table_name: impala::tpch_1000_kudu.customer,
+//          storage_handler: com.cloudera.kudu.hive.KuduStorageHandler,
+//      },
+//      tableType=MANAGED_TABLE,
+//  )
+//
+//  External table
+//  Table(
+//      tableName=metrics,
+//      dbName=default,
+//      parameters={
+//          EXTERNAL: TRUE,
+//          kudu.master_addresses: <master-addr>,
+//          kudu.table_name: metrics,
+//          storage_handler: com.cloudera.kudu.hive.KuduStorageHandler,
+//      },
+//      tableType=EXTERNAL_TABLE,
+//  )
+Status HmsUpgrade(const RunnerContext& context) {
+  const string& master_addresses_str = FindOrDie(context.required_args,
+                                                 kMasterAddressesArg);
+  vector<string> master_addresses = Split(master_addresses_str, ",");
+  const string& default_database = FindOrDie(context.required_args,
+                                             kDefaultDatabaseArg);
+
+  if (!hms::HmsCatalog::IsEnabled()) {
+    return Status::IllegalState("HMS URIs cannot be empty!");
+  }
+
+  // Start Hms Catalog.
+  unique_ptr<HmsCatalog> hms_catalog(new hms::HmsCatalog(master_addresses_str));
+  RETURN_NOT_OK(hms_catalog->Start());
+
+  // Create a Kudu Client.
+  client::sp::shared_ptr<KuduClient> kudu_client;
+  RETURN_NOT_OK(KuduClientBuilder()
+      .default_rpc_timeout(MonoDelta::FromMilliseconds(FLAGS_timeout_ms))
+      .master_server_addrs(master_addresses)
+      .Build(&kudu_client));
+
+  // 1. Identify all Kudu tables in the HMS entries.
+  vector<hive::Table> hms_tables;
+  RETURN_NOT_OK(hms_catalog->RetrieveTables(&hms_tables));
+
+  // 2. Rename all existing Kudu tables to have Hive-compatible table names.
+  //    Also, correct all out of sync metadata in HMS entries.
+  RETURN_NOT_OK(AlterLegacyKuduTables(kudu_client, hms_catalog,
+                                      default_database, hms_tables));
+
+  hms_catalog->Stop();
+  return Status::OK();
+}
+
+unique_ptr<Mode> BuildHmsMode() {
+  unique_ptr<Action> hms_upgrade =
+      ActionBuilder("upgrade", &HmsUpgrade)
+          .Description("Upgrade the legacy metadata for Kudu and Hive Metastores")
+          .AddRequiredParameter({ kMasterAddressesArg, kMasterAddressesArgDesc })
+          .AddRequiredParameter({ kDefaultDatabaseArg, kDefaultDatabaseArgDesc })
+          .AddOptionalParameter("hive_metastore_uris")
+          .AddOptionalParameter("hive_metastore_sasl_enabled")
+          .AddOptionalParameter("hive_metastore_retry_count")
+          .AddOptionalParameter("hive_metastore_send_timeout")
+          .AddOptionalParameter("hive_metastore_recv_timeout")
+          .AddOptionalParameter("hive_metastore_conn_timeout")
+          .AddOptionalParameter("enable_input")
+          .Build();
+
+  return ModeBuilder("hms").Description("Operate on remote Hive Metastores")
+                           .AddAction(std::move(hms_upgrade))
+                           .Build();
+}
+
+} // namespace tools
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/10a7f2a9/src/kudu/tools/tool_action_test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/tool_action_test.cc b/src/kudu/tools/tool_action_test.cc
index a458b84..356350b 100644
--- a/src/kudu/tools/tool_action_test.cc
+++ b/src/kudu/tools/tool_action_test.cc
@@ -157,7 +157,7 @@ Status ProcessRequest(const ControlShellRequestPB& req,
         opts.num_tablet_servers = cc.num_tservers();
       }
       opts.enable_kerberos = cc.enable_kerberos();
-      opts.enable_hive_metastore = cc.enable_hive_metastore();
+      opts.hms_mode = cc.hms_mode();
       if (cc.has_cluster_root()) {
         opts.cluster_root = cc.cluster_root();
       } else {

http://git-wip-us.apache.org/repos/asf/kudu/blob/10a7f2a9/src/kudu/tools/tool_main.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/tool_main.cc b/src/kudu/tools/tool_main.cc
index 4622074..e4066ec 100644
--- a/src/kudu/tools/tool_main.cc
+++ b/src/kudu/tools/tool_main.cc
@@ -63,6 +63,7 @@ unique_ptr<Mode> RootMode(const string& name) {
       .Description("Kudu Command Line Tools") // root mode description isn't printed
       .AddMode(BuildClusterMode())
       .AddMode(BuildFsMode())
+      .AddMode(BuildHmsMode())
       .AddMode(BuildLocalReplicaMode())
       .AddMode(BuildMasterMode())
       .AddMode(BuildPbcMode())

http://git-wip-us.apache.org/repos/asf/kudu/blob/10a7f2a9/src/kudu/tools/tool_test_util.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/tool_test_util.cc b/src/kudu/tools/tool_test_util.cc
index 7f80732..60da0bb 100644
--- a/src/kudu/tools/tool_test_util.cc
+++ b/src/kudu/tools/tool_test_util.cc
@@ -46,7 +46,8 @@ string GetKuduToolAbsolutePath() {
   return tool_abs_path;
 }
 
-Status RunKuduTool(const vector<string>& args, string* out, string* err) {
+Status RunKuduTool(const vector<string>& args, string* out, string* err,
+                   const std::string& in) {
   vector<string> total_args = { GetKuduToolAbsolutePath() };
 
   // Speed up filesystem-based operations.
@@ -54,7 +55,7 @@ Status RunKuduTool(const vector<string>& args, string* out, string* err) {
   total_args.emplace_back("--never_fsync");
 
   total_args.insert(total_args.end(), args.begin(), args.end());
-  return Subprocess::Call(total_args, "", out, err);
+  return Subprocess::Call(total_args, in, out, err);
 }
 
 } // namespace tools

http://git-wip-us.apache.org/repos/asf/kudu/blob/10a7f2a9/src/kudu/tools/tool_test_util.h
----------------------------------------------------------------------
diff --git a/src/kudu/tools/tool_test_util.h b/src/kudu/tools/tool_test_util.h
index f5d6012..6925468 100644
--- a/src/kudu/tools/tool_test_util.h
+++ b/src/kudu/tools/tool_test_util.h
@@ -36,7 +36,8 @@ std::string GetKuduToolAbsolutePath();
 // written to each respectively.
 Status RunKuduTool(const std::vector<std::string>& args,
                    std::string* out = nullptr,
-                   std::string* err = nullptr);
+                   std::string* err = nullptr,
+                   const std::string& in = "");
 
 } // namespace tools
 } // namespace kudu


Mime
View raw message