kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From granthe...@apache.org
Subject [kudu] branch master updated: [hms] Sync external HMS tables with purge property
Date Tue, 10 Sep 2019 23:09:09 GMT
This is an automated email from the ASF dual-hosted git repository.

granthenke pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new ab69837  [hms] Sync external HMS tables with purge property
ab69837 is described below

commit ab69837059136dff64df33d5c1eedbc6d93b2357
Author: Grant Henke <granthenke@apache.org>
AuthorDate: Thu Sep 5 08:31:44 2019 -0500

    [hms] Sync external HMS tables with purge property
    
    This patch is another patch in preparation for Hive 3 and 4.
    In Hive 3 the meaning of managed tables is a bit more strict.
    To replace the old behavior of a managed table, external tables
    now have the `external.table.purge` property. Further, in
    Hive 4, non-transactional Hive tables will get auto-migrated
    to external tables with `external.table.purge=true`.
    
    With this change, the HMS sync now treats managed tables and
    external tables with `external.table.purge=true` as synchronized tables.
    External tables with `external.table.purge=false` will continue to be
    ingnored like all external tables before.
    
    To avoid orphaned tables, the HMS plugin was changed to restrict changing
    the `external.table.purge` property. This is similar to the existing
    functionality which prevented changing the table type. The HMS plugin
    tests were also adjusted to use external purged tables by default to avoid
    testing issues one the auto-migration code is enabled in Hive.
    
    Last, the HMS tooling was updated to differentiate between synchronized
    and unsynchronized tables instead of explicitly managed and external tables.
    
    Change-Id: I942515c015b1a4a63cf08383c331c4e19350c312
    Reviewed-on: http://gerrit.cloudera.org:8080/14179
    Reviewed-by: Hao Hao <hao.hao@cloudera.com>
    Tested-by: Grant Henke <granthenke@apache.org>
---
 .../kudu/hive/metastore/KuduMetastorePlugin.java   | 61 +++++++++++++++------
 .../hive/metastore/TestKuduMetastorePlugin.java    | 63 ++++++++++++++++++----
 src/kudu/hms/hms_catalog.cc                        | 28 +++++-----
 src/kudu/hms/hms_client.cc                         |  9 ++++
 src/kudu/hms/hms_client.h                          |  6 +++
 src/kudu/integration-tests/hms_itest-base.cc       | 21 +++++++-
 src/kudu/integration-tests/hms_itest-base.h        |  7 ++-
 src/kudu/integration-tests/master_hms-itest.cc     | 51 +++++++++++++++++-
 src/kudu/master/hms_notification_log_listener.cc   |  8 +--
 src/kudu/tools/kudu-tool-test.cc                   | 62 ++++++++++++++++++---
 src/kudu/tools/tool_action_hms.cc                  | 36 ++++++-------
 11 files changed, 281 insertions(+), 71 deletions(-)

diff --git a/java/kudu-hive/src/main/java/org/apache/kudu/hive/metastore/KuduMetastorePlugin.java
b/java/kudu-hive/src/main/java/org/apache/kudu/hive/metastore/KuduMetastorePlugin.java
index bf72cef..2acc8e1 100644
--- a/java/kudu-hive/src/main/java/org/apache/kudu/hive/metastore/KuduMetastorePlugin.java
+++ b/java/kudu-hive/src/main/java/org/apache/kudu/hive/metastore/KuduMetastorePlugin.java
@@ -18,6 +18,7 @@
 package org.apache.kudu.hive.metastore;
 
 import java.util.Map;
+import java.util.Objects;
 
 import com.google.common.annotations.VisibleForTesting;
 import org.apache.hadoop.conf.Configuration;
@@ -83,7 +84,9 @@ public class KuduMetastorePlugin extends MetaStoreEventListener {
   // The key should keep in sync with the one used in
   // org.apache.hadoop.hive.metastore.MetaStoreUtils.isExternalTable().
   @VisibleForTesting
-  static final String EXTERNAL_TABLE_KEY= "EXTERNAL";
+  static final String EXTERNAL_TABLE_KEY = "EXTERNAL";
+
+  static final String EXTERNAL_PURGE_KEY = "external.table.purge";
 
   // System env to track if the HMS plugin validation should be skipped.
   static final String SKIP_VALIDATION_ENV = "KUDU_SKIP_HMS_PLUGIN_VALIDATION";
@@ -102,8 +105,8 @@ public class KuduMetastorePlugin extends MetaStoreEventListener {
 
     Table table = tableEvent.getTable();
 
-    // Only validate managed tables. Kudu only synchronizes managed tables.
-    if (!TableType.MANAGED_TABLE.name().equals(table.getTableType())) {
+    // Only validate synchronized tables.
+    if (!isSynchronizedTable(table)) {
       return;
     }
 
@@ -130,8 +133,8 @@ public class KuduMetastorePlugin extends MetaStoreEventListener {
       return;
     }
 
-    // Only validate managed tables. Kudu only synchronizes managed tables.
-    if (!TableType.MANAGED_TABLE.name().equals(table.getTableType())) {
+    // Only validate synchronized tables.
+    if (!isSynchronizedTable(table)) {
       return;
     }
 
@@ -170,23 +173,24 @@ public class KuduMetastorePlugin extends MetaStoreEventListener {
     Table newTable = tableEvent.getNewTable();
 
     // Prevent altering the table type (managed/external) of Kudu tables (or via
-    // altering table property 'EXTERNAL'). This can cause orphaned tables and
-    // the Sentry integration depends on having a managed table for each Kudu
-    // table to prevent security issues due to overlapping names with Kudu tables
-    // and tables in the HMS.
+    // altering table properties 'EXTERNAL' or `external.table.purge`).
+    // This can cause orphaned tables and the Sentry integration depends on having a
+    // synchronized table for each Kudu table to prevent security issues due to overlapping
+    // names with Kudu tables and tables in the HMS.
     // Note: This doesn't prevent altering the table type for legacy tables
     // because they should continue to work as they always have primarily for
     // migration purposes.
-    String oldTableType = oldTable.getTableType();
+    // The Kudu master is allowed to make these changes if necessary as it is a trusted user.
     if (isKuduTable(oldTable) &&
-        ((oldTableType != null && !oldTableType.equals(newTable.getTableType()))
||
-         (isExternalTable(oldTable) != isExternalTable(newTable)))) {
+        !isKuduMasterAction(tableEvent) &&
+        (!Objects.equals(oldTable.getTableType(), newTable.getTableType()) ||
+            isExternalTable(oldTable) != isExternalTable(newTable) ||
+            isPurgeTable(oldTable) != isPurgeTable(newTable))) {
       throw new MetaException("Kudu table type may not be altered");
     }
 
-    // Only validate managed tables.
-    // Kudu only synchronizes managed tables.
-    if (!TableType.MANAGED_TABLE.name().equals(oldTableType)) {
+    // Only validate synchronized tables.
+    if (!isSynchronizedTable(oldTable)) {
       return;
     }
 
@@ -270,6 +274,33 @@ public class KuduMetastorePlugin extends MetaStoreEventListener {
   }
 
   /**
+   * Checks whether the table should be purged when deleted, i.e. the
+   * underlying Kudu table should be deleted when the HMS table entry is
+   * deleted.
+   *
+   * @param table the table to check
+   * @return {@code true} if the table is a managed table or has external.table.purge = true,
+   *         otherwise {@code false}
+   */
+  private boolean isPurgeTable(Table table) {
+    boolean externalPurge =
+        Boolean.parseBoolean(table.getParameters().getOrDefault(EXTERNAL_PURGE_KEY, "false"));
+    return TableType.MANAGED_TABLE.name().equals(table.getTableType()) || externalPurge;
+  }
+
+  /**
+   * Checks whether the table is considered a synchronized Kudu table.
+   *
+   * @param table the table to check
+   * @return {@code true} if the table is a managed table or an external table with
+   *         `external.table.purge = true`, otherwise {@code false}
+   */
+  private boolean isSynchronizedTable(Table table) {
+    return TableType.MANAGED_TABLE.name().equals(table.getTableType()) ||
+        (isExternalTable(table) && isPurgeTable(table));
+  }
+
+  /**
    * Checks that the Kudu table entry contains the required Kudu table properties.
    * @param table the table to check
    */
diff --git a/java/kudu-hive/src/test/java/org/apache/kudu/hive/metastore/TestKuduMetastorePlugin.java
b/java/kudu-hive/src/test/java/org/apache/kudu/hive/metastore/TestKuduMetastorePlugin.java
index 7148c1b..0ea471e 100644
--- a/java/kudu-hive/src/test/java/org/apache/kudu/hive/metastore/TestKuduMetastorePlugin.java
+++ b/java/kudu-hive/src/test/java/org/apache/kudu/hive/metastore/TestKuduMetastorePlugin.java
@@ -122,7 +122,9 @@ public class TestKuduMetastorePlugin {
     Table table = new Table();
     table.setDbName("default");
     table.setTableName(name);
-    table.setTableType(TableType.MANAGED_TABLE.toString());
+    table.setTableType(TableType.EXTERNAL_TABLE.toString());
+    table.putToParameters(KuduMetastorePlugin.EXTERNAL_TABLE_KEY, "TRUE");
+    table.putToParameters(KuduMetastorePlugin.EXTERNAL_PURGE_KEY, "TRUE");
     table.putToParameters(hive_metastoreConstants.META_TABLE_STORAGE,
                           storageHandler);
     if (!storageHandler.equals(KuduMetastorePlugin.LEGACY_KUDU_STORAGE_HANDLER)) {
@@ -210,10 +212,12 @@ public class TestKuduMetastorePlugin {
       client.dropTable(table.getDbName(), table.getTableName());
     }
 
-    // Check that creating a non-managed table is accepted.
+    // Check that creating an unsynchronized table is accepted.
     {
       Table table = newTable("table");
-      table.setTableType(TableType.EXTERNAL_TABLE.name());
+      table.setTableType(TableType.EXTERNAL_TABLE.toString());
+      table.putToParameters(KuduMetastorePlugin.EXTERNAL_TABLE_KEY, "TRUE");
+      table.putToParameters(KuduMetastorePlugin.EXTERNAL_PURGE_KEY, "FALSE");
       client.createTable(table);
       client.dropTable(table.getDbName(), table.getTableName());
     }
@@ -227,6 +231,8 @@ public class TestKuduMetastorePlugin {
     Table legacyTable = newLegacyTable("legacy_table");
     client.createTable(legacyTable, masterContext());
     try {
+      // Check that altering the table succeeds.
+      client.alter_table(table.getDbName(), table.getTableName(), table);
 
       // Try to alter the Kudu table with a different table ID.
       try {
@@ -260,7 +266,7 @@ public class TestKuduMetastorePlugin {
       // Try to alter the Kudu table to a different type.
       try {
         Table alteredTable = table.deepCopy();
-        alteredTable.setTableType(TableType.EXTERNAL_TABLE.toString());
+        alteredTable.setTableType(TableType.MANAGED_TABLE.toString());
         client.alter_table(table.getDbName(), table.getTableName(), alteredTable);
         fail();
       } catch (TException e) {
@@ -268,10 +274,10 @@ public class TestKuduMetastorePlugin {
             "Kudu table type may not be altered"));
       }
 
-      // Alter the Kudu table to a different type by setting the table property fails.
+      // Alter the Kudu table to a different type by setting the external property fails.
       try {
         Table alteredTable = table.deepCopy();
-        alteredTable.putToParameters(KuduMetastorePlugin.EXTERNAL_TABLE_KEY, "true");
+        alteredTable.putToParameters(KuduMetastorePlugin.EXTERNAL_TABLE_KEY, "FALSE");
         client.alter_table(table.getDbName(), table.getTableName(), alteredTable);
         fail();
       } catch (TException e) {
@@ -282,12 +288,40 @@ public class TestKuduMetastorePlugin {
       // Alter the Kudu table to the same type by setting the table property works.
       {
         Table alteredTable = table.deepCopy();
-        alteredTable.putToParameters(KuduMetastorePlugin.EXTERNAL_TABLE_KEY, "false");
+        alteredTable.putToParameters(KuduMetastorePlugin.EXTERNAL_TABLE_KEY, "TRUE");
         client.alter_table(table.getDbName(), table.getTableName(), alteredTable);
       }
 
-      // Check that altering the table succeeds.
-      client.alter_table(table.getDbName(), table.getTableName(), table);
+      // Alter the Kudu table to a managed type with the master context succeeds.
+      {
+        Table alteredTable = table.deepCopy();
+        alteredTable.setTableType(TableType.MANAGED_TABLE.toString());
+        alteredTable.putToParameters(KuduMetastorePlugin.EXTERNAL_TABLE_KEY, "FALSE");
+        alteredTable.putToParameters(KuduMetastorePlugin.EXTERNAL_PURGE_KEY, "FALSE");
+        client.alter_table_with_environmentContext(table.getDbName(), table.getTableName(),
+            alteredTable, masterContext());
+      }
+
+      // Alter the Kudu table to a different type by setting the purge property fails.
+      try {
+        Table alteredTable = table.deepCopy();
+        alteredTable.putToParameters(KuduMetastorePlugin.EXTERNAL_PURGE_KEY, "TRUE");
+        client.alter_table(table.getDbName(), table.getTableName(), alteredTable);
+        fail();
+      } catch (TException e) {
+        assertTrue(e.getMessage().contains(
+            "Kudu table type may not be altered"));
+      }
+
+      // Alter the Kudu table to an external type with the master context succeeds.
+      {
+        Table alteredTable = table.deepCopy();
+        alteredTable.setTableType(TableType.EXTERNAL_TABLE.toString());
+        alteredTable.putToParameters(KuduMetastorePlugin.EXTERNAL_TABLE_KEY, "TRUE");
+        alteredTable.putToParameters(KuduMetastorePlugin.EXTERNAL_PURGE_KEY, "TRUE");
+        client.alter_table_with_environmentContext(table.getDbName(), table.getTableName(),
+            alteredTable, masterContext());
+      }
 
       // Check that adding a column fails.
       table.getSd().addToCols(new FieldSchema("b", "int", ""));
@@ -309,6 +343,9 @@ public class TestKuduMetastorePlugin {
       {
         Table alteredTable = table.deepCopy();
         alteredTable.getParameters().clear();
+        alteredTable.setTableType(TableType.EXTERNAL_TABLE.toString());
+        alteredTable.putToParameters(KuduMetastorePlugin.EXTERNAL_TABLE_KEY, "TRUE");
+        alteredTable.putToParameters(KuduMetastorePlugin.EXTERNAL_PURGE_KEY, "TRUE");
         alteredTable.putToParameters(hive_metastoreConstants.META_TABLE_STORAGE,
             KuduMetastorePlugin.LEGACY_KUDU_STORAGE_HANDLER);
         alteredTable.putToParameters(KuduMetastorePlugin.KUDU_TABLE_NAME,
@@ -372,10 +409,12 @@ public class TestKuduMetastorePlugin {
       }
     }
 
-    // Test altering a non-managed table is accepted.
+    // Test altering an unsynchronized table is accepted.
     {
       table.getParameters().clear();
       table.setTableType(TableType.EXTERNAL_TABLE.name());
+      table.putToParameters(KuduMetastorePlugin.EXTERNAL_TABLE_KEY, "TRUE");
+      table.putToParameters(KuduMetastorePlugin.EXTERNAL_PURGE_KEY, "FALSE");
       client.createTable(table);
       try {
         client.alter_table(table.getDbName(), table.getTableName(), table);
@@ -478,10 +517,12 @@ public class TestKuduMetastorePlugin {
       }
     }
 
-    // Test dropping a non-managed table is accepted.
+    // Test dropping an unsynchronized table is accepted.
     {
       table.getParameters().clear();
       table.setTableType(TableType.EXTERNAL_TABLE.name());
+      table.putToParameters(KuduMetastorePlugin.EXTERNAL_TABLE_KEY, "TRUE");
+      table.putToParameters(KuduMetastorePlugin.EXTERNAL_PURGE_KEY, "FALSE");
       client.createTable(table);
       client.dropTable(table.getDbName(), table.getTableName());
     }
diff --git a/src/kudu/hms/hms_catalog.cc b/src/kudu/hms/hms_catalog.cc
index 197bf99..6009e47 100644
--- a/src/kudu/hms/hms_catalog.cc
+++ b/src/kudu/hms/hms_catalog.cc
@@ -186,14 +186,17 @@ Status HmsCatalog::UpgradeLegacyImpalaTable(const string& id,
       return Status::IllegalState("non-legacy table cannot be upgraded");
     }
 
-    // If this is an external table, only upgrade the storage handler.
-    if (table.tableType == HmsClient::kExternalTable) {
+    if (table.tableType != HmsClient::kManagedTable &&
+        table.tableType != HmsClient::kExternalTable) {
+      return Status::IllegalState(Substitute("Unsupported table type $0", table.tableType));
+    }
+
+    // If this is an unsynchronized table, only upgrade the storage handler.
+    if (!HmsClient::IsSynchronized(table)) {
       table.parameters[HmsClient::kStorageHandlerKey] = HmsClient::kKuduStorageHandler;
-    } else if (table.tableType == HmsClient::kManagedTable) {
+    } else {
       RETURN_NOT_OK(PopulateTable(id, Substitute("$0.$1", db_name, tb_name),
                                   table.owner, schema, master_addresses_, table.tableType,
&table));
-    } else {
-      return Status::IllegalState(Substitute("Unsupported table type $0", table.tableType));
     }
     return client->AlterTable(db_name, tb_name, table, EnvironmentContext());
   });
@@ -212,10 +215,8 @@ Status HmsCatalog::DowngradeToLegacyImpalaTable(const string& name)
{
     }
     // Downgrade the storage handler.
     table.parameters[HmsClient::kStorageHandlerKey] = HmsClient::kLegacyKuduStorageHandler;
-    if (table.tableType == HmsClient::kManagedTable) {
-      // Remove the Kudu-specific field 'kudu.table_id'.
-      EraseKeyReturnValuePtr(&table.parameters, HmsClient::kKuduTableIdKey);
-    }
+    // Remove the Kudu-specific field 'kudu.table_id'.
+    EraseKeyReturnValuePtr(&table.parameters, HmsClient::kKuduTableIdKey);
     return client->AlterTable(table.dbName, table.tableName, table, EnvironmentContext());
   });
 }
@@ -377,13 +378,16 @@ Status HmsCatalog::PopulateTable(const string& id,
   // TODO(HIVE-19253): Used along with table type to indicate an external table.
   if (table_type == HmsClient::kExternalTable) {
     table->parameters[HmsClient::kExternalTableKey] = "TRUE";
-  // Only set the table id on managed tables and ensure the
-  // kExternalTableKey property is unset.
+  // Ensure the kExternalTableKey property is unset on a managed table.
   } else if (table_type == HmsClient::kManagedTable) {
-    table->parameters[HmsClient::kKuduTableIdKey] = id;
     EraseKeyReturnValuePtr(&table->parameters, HmsClient::kExternalTableKey);
   }
 
+  // Only set the table id on synchronized tables.
+  if (HmsClient::IsSynchronized(*table)) {
+    table->parameters[HmsClient::kKuduTableIdKey] = id;
+  }
+
   // Add the Kudu-specific parameters. This intentionally avoids overwriting
   // other parameters.
   table->parameters[HmsClient::kKuduTableNameKey] = name;
diff --git a/src/kudu/hms/hms_client.cc b/src/kudu/hms/hms_client.cc
index 0286fcf..9aa9127 100644
--- a/src/kudu/hms/hms_client.cc
+++ b/src/kudu/hms/hms_client.cc
@@ -120,6 +120,7 @@ const char* const HmsClient::kDisallowIncompatibleColTypeChanges =
 const char* const HmsClient::kDbNotificationListener =
   "org.apache.hive.hcatalog.listener.DbNotificationListener";
 const char* const HmsClient::kExternalTableKey = "EXTERNAL";
+const char* const HmsClient::kExternalPurgeKey = "external.table.purge";
 const char* const HmsClient::kStorageHandlerKey = "storage_handler";
 const char* const HmsClient::kKuduMetastorePlugin =
   "org.apache.kudu.hive.metastore.KuduMetastorePlugin";
@@ -405,5 +406,13 @@ bool HmsClient::IsKuduTable(const hive::Table& table) {
   return *storage_handler == hms::HmsClient::kKuduStorageHandler;
 }
 
+bool HmsClient::IsSynchronized(const hive::Table& table) {
+  const string externalPurge =
+      FindWithDefault(table.parameters, hms::HmsClient::kExternalPurgeKey, "false");
+  return table.tableType == hms::HmsClient::kManagedTable ||
+         (table.tableType == hms::HmsClient::kExternalTable &&
+          boost::iequals(externalPurge, "true"));
+}
+
 } // namespace hms
 } // namespace kudu
diff --git a/src/kudu/hms/hms_client.h b/src/kudu/hms/hms_client.h
index 4012dae..e87082f 100644
--- a/src/kudu/hms/hms_client.h
+++ b/src/kudu/hms/hms_client.h
@@ -57,6 +57,7 @@ class HmsClient {
  public:
 
   static const char* const kExternalTableKey;
+  static const char* const kExternalPurgeKey;
   static const char* const kLegacyKuduStorageHandler;
   static const char* const kLegacyTablePrefix;
   static const char* const kKuduTableIdKey;
@@ -186,6 +187,11 @@ class HmsClient {
   // Returns true if the HMS table is a Kudu table.
   static bool IsKuduTable(const hive::Table& table) WARN_UNUSED_RESULT;
 
+  // Returns if the HMS table should be synchronized.
+  // Returns true if the HMS table is a managed table or
+  // if the HMS is an external table with `external.table.purge = true`.
+  static bool IsSynchronized(const hive::Table& table) WARN_UNUSED_RESULT;
+
   // Deserializes a JSON encoded table.
   //
   // Notification event log messages often include table objects serialized as
diff --git a/src/kudu/integration-tests/hms_itest-base.cc b/src/kudu/integration-tests/hms_itest-base.cc
index 3078ede..46c265b 100644
--- a/src/kudu/integration-tests/hms_itest-base.cc
+++ b/src/kudu/integration-tests/hms_itest-base.cc
@@ -162,9 +162,26 @@ Status HmsITestBase::AlterHmsTableDropColumns(const string& database_name,
     return Status::OK();
 }
 
+Status HmsITestBase::AlterHmsTableExternalPurge(const string& database_name,
+                                                const string& table_name) {
+  hive::Table table;
+  RETURN_NOT_OK(hms_client_->GetTable(database_name, table_name, &table));
+  table.tableType = HmsClient::kExternalTable;
+  table.parameters[HmsClient::kExternalTableKey] = "TRUE";
+  table.parameters[HmsClient::kExternalPurgeKey] = "TRUE";
+
+  // The KuduMetastorePlugin only allows the master to alter the table type
+  // and purge property, so we pretend to be the master.
+  hive::EnvironmentContext env_ctx;
+  env_ctx.__set_properties({ std::make_pair(hms::HmsClient::kKuduMasterEventKey, "true")
});
+  RETURN_NOT_OK(hms_client_->AlterTable(database_name, table_name, table, env_ctx));
+  return Status::OK();
+}
+
 void HmsITestBase::CheckTable(const string& database_name,
                               const string& table_name,
-                              boost::optional<const string&> user) {
+                              boost::optional<const string&> user,
+                              const string& table_type) {
   SCOPED_TRACE(Substitute("Checking table $0.$1", database_name, table_name));
   shared_ptr<KuduTable> table;
   ASSERT_OK(client_->OpenTable(Substitute("$0.$1", database_name, table_name), &table));
@@ -172,7 +189,7 @@ void HmsITestBase::CheckTable(const string& database_name,
   hive::Table hms_table;
   ASSERT_OK(hms_client_->GetTable(database_name, table_name, &hms_table));
 
-  ASSERT_EQ(hms::HmsClient::kManagedTable, hms_table.tableType);
+  ASSERT_EQ(table_type, hms_table.tableType);
 
   string username;
   if (user) {
diff --git a/src/kudu/integration-tests/hms_itest-base.h b/src/kudu/integration-tests/hms_itest-base.h
index ca30c9a..58e5cd5 100644
--- a/src/kudu/integration-tests/hms_itest-base.h
+++ b/src/kudu/integration-tests/hms_itest-base.h
@@ -61,13 +61,18 @@ class HmsITestBase : public ExternalMiniClusterITestBase {
   Status AlterHmsTableDropColumns(const std::string& database_name,
                                   const std::string& table_name) WARN_UNUSED_RESULT;
 
+  // Alter the HMS entry to be an external table with `external.table.purge = true`.
+  Status AlterHmsTableExternalPurge(const std::string& database_name,
+                                    const std::string& table_name) WARN_UNUSED_RESULT;
+
   // Checks that the Kudu table schema and the HMS table entry in their
   // respective catalogs are synchronized for a particular table. It also
   // verifies that the table owner is the given user (if not provided,
   // checks against the logged in user).
   void CheckTable(const std::string& database_name,
                   const std::string& table_name,
-                  boost::optional<const std::string&> user);
+                  boost::optional<const std::string&> user,
+                  const std::string& table_type = hms::HmsClient::kManagedTable);
 
   // Checks that a table does not exist in the Kudu and HMS catalogs.
   void CheckTableDoesNotExist(const std::string& database_name,
diff --git a/src/kudu/integration-tests/master_hms-itest.cc b/src/kudu/integration-tests/master_hms-itest.cc
index 4c68fce..9680836 100644
--- a/src/kudu/integration-tests/master_hms-itest.cc
+++ b/src/kudu/integration-tests/master_hms-itest.cc
@@ -253,6 +253,17 @@ TEST_F(MasterHmsTest, TestAlterTable) {
   ASSERT_OK(comment_alterer->Alter());
   NO_FATALS(CheckTable("default", "a", /*user=*/ none));
 
+  // Create the Kudu table and alter the HMS entry to be an external table
+  // with `external.table.purge = true`, then alter it in the HMS and ensure
+  // the Kudu table is actually altered.
+  ASSERT_OK(CreateKuduTable("default", "b"));
+  ASSERT_OK(AlterHmsTableExternalPurge("default", "b"));
+  NO_FATALS(CheckTable("default", "b", /*user=*/ none, hms::HmsClient::kExternalTable));
+  unique_ptr<KuduTableAlterer> comment_alterer_b(client_->NewTableAlterer("default.b"));
+  comment_alterer_b->AlterColumn("int16_val")->Comment("Another comment");
+  ASSERT_OK(comment_alterer_b->Alter());
+  NO_FATALS(CheckTable("default", "b", /*user=*/ none, hms::HmsClient::kExternalTable));
+
   // Shutdown the HMS and try to alter the table.
   ASSERT_OK(StopHms());
   table_alterer.reset(client_->NewTableAlterer("default.a")->DropColumn("int16_val"));
@@ -268,11 +279,11 @@ TEST_F(MasterHmsTest, TestAlterTable) {
   NO_FATALS(CheckTable("default", "a", /*user=*/ none));
 
   // Only alter the table in Kudu, the corresponding table in the HMS will not be altered.
-  table_alterer.reset(client_->NewTableAlterer("default.a")->RenameTo("default.b")
+  table_alterer.reset(client_->NewTableAlterer("default.a")->RenameTo("default.c")
                              ->modify_external_catalogs(false));
   ASSERT_OK(table_alterer->Alter());
   bool exists;
-  ASSERT_OK(client_->TableExists("default.b", &exists));
+  ASSERT_OK(client_->TableExists("default.c", &exists));
   ASSERT_TRUE(exists);
   hive::Table hms_table;
   ASSERT_OK(hms_client_->GetTable("default", "a", &hms_table));
@@ -300,6 +311,19 @@ TEST_F(MasterHmsTest, TestDeleteTable) {
       NO_FATALS(CheckTableDoesNotExist("default", "b"));
   });
 
+  // Create the Kudu table and alter the HMS entry to be an external table
+  // with `external.table.purge = true`, then drop it from the HMS, and ensure
+  // the Kudu table is deleted.
+  ASSERT_OK(CreateKuduTable("default", "b"));
+  ASSERT_OK(AlterHmsTableExternalPurge("default", "b"));
+  NO_FATALS(CheckTable("default", "b", /*user=*/ none, hms::HmsClient::kExternalTable));
+  shared_ptr<KuduTable> table2;
+  ASSERT_OK(client_->OpenTable("default.b", &table2));
+  ASSERT_OK(hms_client_->DropTable("default", "b"));
+  ASSERT_EVENTUALLY([&] {
+    NO_FATALS(CheckTableDoesNotExist("default", "b"));
+  });
+
   // Ensure that dropping a table while the HMS is unreachable fails.
   ASSERT_OK(CreateKuduTable("default", "c"));
   NO_FATALS(CheckTable("default", "c", /*user=*/ none));
@@ -388,6 +412,29 @@ TEST_F(MasterHmsTest, TestNotificationLogListener) {
   s = hms_client_->DropTable("default", "a");
   ASSERT_TRUE(s.IsNotFound()) << s.ToString();
   NO_FATALS(CheckTableDoesNotExist("default", "a"));
+
+  // Create the Kudu table and alter the HMS entry to be an external table
+  // with `external.table.purge = true`.
+  ASSERT_OK(CreateKuduTable("default", "d"));
+  ASSERT_OK(AlterHmsTableExternalPurge("default", "d"));
+  NO_FATALS(CheckTable("default", "d", /*user=*/ none, hms::HmsClient::kExternalTable));
+
+  // Rename the table in the HMS, and ensure that the notification log listener
+  // detects the rename and updates the Kudu catalog accordingly.
+  ASSERT_OK(RenameHmsTable("default", "d", "e"));
+  ASSERT_EVENTUALLY([&] {
+  NO_FATALS({
+      CheckTable("default", "e", /*user=*/ none, hms::HmsClient::kExternalTable);
+      CheckTableDoesNotExist("default", "d");
+    });
+  });
+
+  // Drop the table in the HMS, and ensure that the notification log listener
+  // detects the drop and updates the Kudu catalog accordingly.
+  ASSERT_OK(hms_client_->DropTable("default", "e"));
+  ASSERT_EVENTUALLY([&] {
+    NO_FATALS(CheckTableDoesNotExist("default", "e"));
+  });
 }
 
 TEST_F(MasterHmsTest, TestIgnoreExternalTables) {
diff --git a/src/kudu/master/hms_notification_log_listener.cc b/src/kudu/master/hms_notification_log_listener.cc
index 5b19815..c94b5f8 100644
--- a/src/kudu/master/hms_notification_log_listener.cc
+++ b/src/kudu/master/hms_notification_log_listener.cc
@@ -333,8 +333,8 @@ Status HmsNotificationLogListenerTask::HandleAlterTableEvent(const hive::Notific
   hive::Table before_table;
   RETURN_NOT_OK(DeserializeTable(event, message, "tableObjBeforeJson", &before_table));
 
-  if (before_table.tableType != hms::HmsClient::kManagedTable) {
-    // Not a managed table; skip it.
+  if (!hms::HmsClient::IsSynchronized(before_table)) {
+    // Not a synchronized table; skip it.
     VLOG(2) << Substitute("Ignoring alter event for table $0 of type $1",
                           before_table.tableName, before_table.tableType);
     return Status::OK();
@@ -394,8 +394,8 @@ Status HmsNotificationLogListenerTask::HandleDropTableEvent(const hive::Notifica
   hive::Table table;
   RETURN_NOT_OK(DeserializeTable(event, message, "tableObjJson", &table));
 
-  if (table.tableType != hms::HmsClient::kManagedTable) {
-    // Not a managed table; skip it.
+  if (!hms::HmsClient::IsSynchronized(table)) {
+    // Not a synchronized table; skip it.
     VLOG(2) << Substitute("Ignoring drop event for table $0 of type $1",
                           table.tableName, table.tableType);
     return Status::OK();
diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc
index 17ccf4c..1d59fef 100644
--- a/src/kudu/tools/kudu-tool-test.cc
+++ b/src/kudu/tools/kudu-tool-test.cc
@@ -3595,15 +3595,13 @@ void ValidateHmsEntries(HmsClient* hms_client,
   ASSERT_EQ(hms_table.parameters[HmsClient::kStorageHandlerKey], HmsClient::kKuduStorageHandler);
   ASSERT_EQ(hms_table.parameters[HmsClient::kKuduMasterAddrsKey], master_addr);
 
-  if (hms_table.tableType == HmsClient::kManagedTable) {
+  if (HmsClient::IsSynchronized(hms_table)) {
     shared_ptr<KuduTable> kudu_table;
     ASSERT_OK(kudu_client->OpenTable(Substitute("$0.$1", database_name, table_name), &kudu_table));
     ASSERT_TRUE(boost::iequals(kudu_table->name(),
                                hms_table.parameters[hms::HmsClient::kKuduTableNameKey]));
     ASSERT_EQ(kudu_table->id(), hms_table.parameters[HmsClient::kKuduTableIdKey]);
-  }
-
-  if (hms_table.tableType == HmsClient::kExternalTable) {
+  } else {
     ASSERT_TRUE(ContainsKey(hms_table.parameters, HmsClient::kKuduTableNameKey));
     ASSERT_FALSE(ContainsKey(hms_table.parameters, HmsClient::kKuduTableIdKey));
   }
@@ -3678,10 +3676,14 @@ TEST_P(ToolTestKerberosParameterized, TestCheckAndAutomaticFixHmsMetadata)
{
   shared_ptr<KuduClient> kudu_client;
   ASSERT_OK(cluster_->CreateClient(nullptr, &kudu_client));
 
+  // Need to pretend to be the master to allow the purge property change.
+  hive::EnvironmentContext master_ctx;
+  master_ctx.__set_properties({ std::make_pair(hms::HmsClient::kKuduMasterEventKey, "true")
});
+
   // While the metastore integration is disabled create tables in Kudu and the
   // HMS with inconsistent metadata.
 
-  // Control case: the check tool should not flag this table.
+  // Control case: the check tool should not flag this managed table.
   shared_ptr<KuduTable> control;
   ASSERT_OK(CreateKuduTable(kudu_client, "default.control"));
   ASSERT_OK(kudu_client->OpenTable("default.control", &control));
@@ -3689,6 +3691,20 @@ TEST_P(ToolTestKerberosParameterized, TestCheckAndAutomaticFixHmsMetadata)
{
         control->id(), control->name(), kUsername,
         KuduSchema::ToSchema(control->schema())));
 
+  // Control case: the check tool should not flag this external synchronized table.
+  shared_ptr<KuduTable> control_external;
+  ASSERT_OK(CreateKuduTable(kudu_client, "default.control_external"));
+  ASSERT_OK(kudu_client->OpenTable("default.control_external", &control_external));
+  ASSERT_OK(hms_catalog.CreateTable(
+      control_external->id(), control_external->name(), kUsername,
+      KuduSchema::ToSchema(control_external->schema()), HmsClient::kExternalTable));
+  hive::Table hms_control_external;
+  ASSERT_OK(hms_client.GetTable("default", "control_external", &hms_control_external));
+  hms_control_external.parameters[HmsClient::kKuduTableIdKey] = control_external->id();
+  hms_control_external.parameters[HmsClient::kExternalPurgeKey] = "true";
+  ASSERT_OK(hms_client.AlterTable("default", "control_external",
+      hms_control_external, master_ctx));
+
   // Test case: Upper-case names are handled specially in a few places.
   shared_ptr<KuduTable> test_uppercase;
   ASSERT_OK(CreateKuduTable(kudu_client, "default.UPPERCASE"));
@@ -3733,10 +3749,22 @@ TEST_P(ToolTestKerberosParameterized, TestCheckAndAutomaticFixHmsMetadata)
{
       "not_a_table_id", "default.bad_id", kUsername,
       KuduSchema::ToSchema(bad_id->schema())));
 
-  // Test cases: orphan tables in the HMS.
+  // Test case: orphan table in the HMS.
   ASSERT_OK(hms_catalog.CreateTable(
         "orphan-hms-table-id", "default.orphan_hms_table", kUsername,
         SchemaBuilder().Build()));
+
+  // Test case: orphan external synchronized table in the HMS.
+  ASSERT_OK(hms_catalog.CreateTable(
+      "orphan-hms-table-id-external", "default.orphan_hms_table_external", kUsername,
+      SchemaBuilder().Build(), HmsClient::kExternalTable));
+  hive::Table hms_orphan_external;
+  ASSERT_OK(hms_client.GetTable("default", "orphan_hms_table_external", &hms_orphan_external));
+  hms_orphan_external.parameters[HmsClient::kExternalPurgeKey] = "true";
+  ASSERT_OK(hms_client.AlterTable("default", "orphan_hms_table_external",
+      hms_orphan_external, master_ctx));
+
+  // Test case: orphan legacy table in the HMS.
   ASSERT_OK(CreateLegacyHmsTable(&hms_client, "default", "orphan_hms_table_legacy_managed",
         "impala::default.orphan_hms_table_legacy_managed",
         master_addr, HmsClient::kManagedTable, kUsername));
@@ -3751,6 +3779,18 @@ TEST_P(ToolTestKerberosParameterized, TestCheckAndAutomaticFixHmsMetadata)
{
   ASSERT_OK(CreateLegacyHmsTable(&hms_client, "default", "legacy_managed",
       "impala::default.legacy_managed", master_addr, HmsClient::kManagedTable, kUsername));
 
+  // Test case: Legacy external purge table.
+  shared_ptr<KuduTable> legacy_purge;
+  ASSERT_OK(CreateKuduTable(kudu_client, "impala::default.legacy_purge"));
+  ASSERT_OK(kudu_client->OpenTable("impala::default.legacy_purge", &legacy_purge));
+  ASSERT_OK(CreateLegacyHmsTable(&hms_client, "default", "legacy_purge",
+      "impala::default.legacy_purge", master_addr, HmsClient::kExternalTable, kUsername));
+  hive::Table hms_legacy_purge;
+  ASSERT_OK(hms_client.GetTable("default", "legacy_purge", &hms_legacy_purge));
+  hms_legacy_purge.parameters[HmsClient::kExternalPurgeKey] = "true";
+  ASSERT_OK(hms_client.AlterTable("default", "legacy_purge",
+                                  hms_legacy_purge, master_ctx));
+
   // Test case: legacy external table (pointed at the legacy managed table).
   ASSERT_OK(CreateLegacyHmsTable(&hms_client, "default", "legacy_external",
       "impala::default.legacy_managed", master_addr, HmsClient::kExternalTable, kUsername));
@@ -3779,6 +3819,7 @@ TEST_P(ToolTestKerberosParameterized, TestCheckAndAutomaticFixHmsMetadata)
{
 
   unordered_set<string> consistent_tables = {
     "default.control",
+    "default.control_external",
   };
 
   unordered_set<string> inconsistent_tables = {
@@ -3788,9 +3829,11 @@ TEST_P(ToolTestKerberosParameterized, TestCheckAndAutomaticFixHmsMetadata)
{
     "default.inconsistent_master_addrs",
     "default.bad_id",
     "default.orphan_hms_table",
+    "default.orphan_hms_table_external",
     "default.orphan_hms_table_legacy_managed",
     "default.kudu_orphan",
     "default.legacy_managed",
+    "default.legacy_purge",
     "default.legacy_no_owner",
     "legacy_external",
     "legacy_hive_incompatible_name",
@@ -3846,6 +3889,7 @@ TEST_P(ToolTestKerberosParameterized, TestCheckAndAutomaticFixHmsMetadata)
{
                    master_addr, hms_flags)));
   make_consistent({
     "default.orphan_hms_table",
+    "default.orphan_hms_table_external",
     "default.orphan_hms_table_legacy_managed",
   });
   NO_FATALS(check());
@@ -3865,6 +3909,7 @@ TEST_P(ToolTestKerberosParameterized, TestCheckAndAutomaticFixHmsMetadata)
{
         Substitute("hms fix $0 --nofix_inconsistent_tables $1", master_addr, hms_flags)));
   make_consistent({
     "default.legacy_managed",
+    "default.legacy_purge",
     "legacy_external",
     "default.legacy_no_owner",
     "legacy_hive_incompatible_name",
@@ -3886,6 +3931,7 @@ TEST_P(ToolTestKerberosParameterized, TestCheckAndAutomaticFixHmsMetadata)
{
 
   for (const string& table : {
     "control",
+    "control_external",
     "uppercase",
     "inconsistent_schema",
     "inconsistent_name_hms",
@@ -3893,6 +3939,7 @@ TEST_P(ToolTestKerberosParameterized, TestCheckAndAutomaticFixHmsMetadata)
{
     "bad_id",
     "kudu_orphan",
     "legacy_managed",
+    "legacy_purge",
     "legacy_external",
     "legacy_hive_incompatible_name",
   }) {
@@ -3908,6 +3955,7 @@ TEST_P(ToolTestKerberosParameterized, TestCheckAndAutomaticFixHmsMetadata)
{
   ASSERT_EQ(vector<string>({
     "default.bad_id",
     "default.control",
+    "default.control_external",
     "default.inconsistent_master_addrs",
     "default.inconsistent_name_hms",
     "default.inconsistent_schema",
@@ -3915,6 +3963,7 @@ TEST_P(ToolTestKerberosParameterized, TestCheckAndAutomaticFixHmsMetadata)
{
     "default.legacy_hive_incompatible_name",
     "default.legacy_managed",
     "default.legacy_no_owner",
+    "default.legacy_purge",
     "default.uppercase",
     "my_db.table",
   }), kudu_tables);
@@ -3922,6 +3971,7 @@ TEST_P(ToolTestKerberosParameterized, TestCheckAndAutomaticFixHmsMetadata)
{
   // Check that table ownership is preserved in upgraded legacy tables.
   for (auto p : vector<pair<string, string>>({
         make_pair("legacy_managed", kUsername),
+        make_pair("legacy_purge", kUsername),
         make_pair("legacy_no_owner", ""),
   })) {
     hive::Table table;
diff --git a/src/kudu/tools/tool_action_hms.cc b/src/kudu/tools/tool_action_hms.cc
index 201d031..d659282 100644
--- a/src/kudu/tools/tool_action_hms.cc
+++ b/src/kudu/tools/tool_action_hms.cc
@@ -365,7 +365,7 @@ Status AnalyzeCatalogs(const string& master_addrs,
   // table type and table ID.
   const set<string> kudu_master_addrs = Split(master_addrs, ",");
   vector<hive::Table> orphan_hms_tables;
-  unordered_map<string, vector<hive::Table>> managed_hms_tables_by_id;
+  unordered_map<string, vector<hive::Table>> synchronized_hms_tables_by_id;
   unordered_map<string, vector<hive::Table>> external_hms_tables_by_id;
   {
     vector<hive::Table> hms_tables;
@@ -392,7 +392,7 @@ Status AnalyzeCatalogs(const string& master_addrs,
         }
       }
 
-      // If this is a non-legacy, managed table, we expect a table ID to exist
+      // If this is a non-legacy, synchronized table, we expect a table ID to exist
       // in the HMS entry; look up the Kudu table by ID. Otherwise, look it up
       // by table name.
       shared_ptr<KuduTable>* kudu_table;
@@ -400,10 +400,10 @@ Status AnalyzeCatalogs(const string& master_addrs,
       const string& hms_table_id = hms_table.parameters[HmsClient::kKuduTableIdKey];
       const string& hms_table_name = hms_table.parameters[HmsClient::kKuduTableNameKey];
       if (storage_handler == HmsClient::kKuduStorageHandler &&
-          hms_table.tableType == HmsClient::kManagedTable) {
+          HmsClient::IsSynchronized(hms_table)) {
         kudu_table = FindOrNull(kudu_tables_by_id, hms_table_id);
         // If there is no kudu table that matches the id, or the id doesn't exist,
-        // lookup the table by name. This handles manged tables created when HMS
+        // lookup the table by name. This handles synchronized tables created when HMS
         // sync was off or tables with IDs out of sync.
         if (!kudu_table) {
           kudu_table = FindOrNull(kudu_tables_by_name, hms_table_name);
@@ -413,16 +413,17 @@ Status AnalyzeCatalogs(const string& master_addrs,
       }
 
       if (kudu_table) {
-        if (hms_table.tableType == HmsClient::kExternalTable) {
-          external_hms_tables_by_id[(*kudu_table)->id()].emplace_back(
+        if (HmsClient::IsSynchronized(hms_table)) {
+          synchronized_hms_tables_by_id[(*kudu_table)->id()].emplace_back(
               std::move(hms_table));
-        } else if (hms_table.tableType == HmsClient::kManagedTable) {
-          managed_hms_tables_by_id[(*kudu_table)->id()].emplace_back(
+        } else if (hms_table.tableType == HmsClient::kExternalTable) {
+          external_hms_tables_by_id[(*kudu_table)->id()].emplace_back(
               std::move(hms_table));
         }
-      } else if (hms_table.tableType == HmsClient::kManagedTable) {
-        // Note: we only consider managed HMS table entries "orphans" because
-        // external tables don't always point at valid Kudu tables.
+      } else if (HmsClient::IsSynchronized(hms_table)) {
+        // Note: we only consider synchronized HMS table entries "orphans"
+        // because unsynchronized external tables don't always point at valid
+        // Kudu tables.
         orphan_hms_tables.emplace_back(std::move(hms_table));
       }
     }
@@ -437,10 +438,10 @@ Status AnalyzeCatalogs(const string& master_addrs,
   vector<shared_ptr<KuduTable>> invalid_name_tables;
   for (auto& kudu_table_pair : kudu_tables_by_id) {
     shared_ptr<KuduTable> kudu_table = kudu_table_pair.second;
-    // Check all of the managed HMS tables.
-    vector<hive::Table>* hms_tables = FindOrNull(managed_hms_tables_by_id,
+    // Check all of the synchronized HMS tables.
+    vector<hive::Table>* hms_tables = FindOrNull(synchronized_hms_tables_by_id,
                                                  kudu_table_pair.first);
-    // If the there are no managed HMS table entries, this table is missing
+    // If the there are no synchronized HMS table entries, this table is missing
     // HMS tables and might have an invalid table name.
     if (!hms_tables) {
       const string& table_name = kudu_table->name();
@@ -451,7 +452,7 @@ Status AnalyzeCatalogs(const string& master_addrs,
       } else {
         missing_tables.emplace_back(std::move(kudu_table));
       }
-    // If there is a single managed HMS table, this table could be unsynced or
+    // If there is a single synchronized HMS table, this table could be unsynced or
     // using the legacy handler.
     } else if (hms_tables->size() == 1) {
       hive::Table& hms_table = (*hms_tables)[0];
@@ -462,7 +463,7 @@ Status AnalyzeCatalogs(const string& master_addrs,
       } else if (storage_handler == HmsClient::kLegacyKuduStorageHandler) {
         legacy_tables.emplace_back(make_pair(std::move(kudu_table), std::move(hms_table)));
       }
-    // Otherwise, there are multiple managed HMS tables for a single Kudu table.
+    // Otherwise, there are multiple synchronized HMS tables for a single Kudu table.
     } else {
       for (hive::Table& hms_table : *hms_tables) {
         duplicate_tables.emplace_back(make_pair(kudu_table, std::move(hms_table)));
@@ -694,8 +695,7 @@ Status FixHmsMetadata(const RunnerContext& context) {
               hms_table_name));
       }
 
-      if (hms_table.tableType == HmsClient::kManagedTable &&
-          kudu_table.name() != hms_table_name) {
+      if (HmsClient::IsSynchronized(hms_table) && kudu_table.name() != hms_table_name)
{
         if (FLAGS_dryrun) {
           LOG(INFO) << "[dryrun] Renaming Kudu table " << TableIdent(kudu_table)
                     << " to " << hms_table_name;


Mime
View raw message