kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From t...@apache.org
Subject [3/5] kudu git commit: KUDU-2191 (2/n): Hive Metastore client
Date Fri, 03 Nov 2017 00:27:33 GMT
http://git-wip-us.apache.org/repos/asf/kudu/blob/31d16f74/src/kudu/hms/hms_client-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/hms/hms_client-test.cc b/src/kudu/hms/hms_client-test.cc
new file mode 100644
index 0000000..72214a9
--- /dev/null
+++ b/src/kudu/hms/hms_client-test.cc
@@ -0,0 +1,208 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "kudu/hms/hms_client.h"
+
+#include <cstdint>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <glog/stl_logging.h> // IWYU pragma: keep
+#include <gtest/gtest.h>
+
+#include "kudu/hms/hive_metastore_constants.h"
+#include "kudu/hms/hive_metastore_types.h"
+#include "kudu/hms/mini_hms.h"
+#include "kudu/util/status.h"
+#include "kudu/util/test_macros.h"
+#include "kudu/util/test_util.h"
+
+using std::make_pair;
+using std::string;
+using std::vector;
+
+namespace kudu {
+namespace hms {
+
+class HmsClientTest : public KuduTest {
+ public:
+
+  Status CreateTable(HmsClient* client,
+                     const string& database_name,
+                     const string& table_name,
+                     const string& table_id) {
+    hive::Table table;
+    table.dbName = database_name;
+    table.tableName = table_name;
+    table.tableType = "MANAGED_TABLE";
+
+    table.__set_parameters({
+        make_pair(HmsClient::kKuduTableIdKey, table_id),
+        make_pair(HmsClient::kKuduMasterAddrsKey, string("TODO")),
+        make_pair(hive::g_hive_metastore_constants.META_TABLE_STORAGE,
+                  HmsClient::kKuduStorageHandler),
+    });
+
+    return client->CreateTable(table);
+  }
+
+  Status DropTable(HmsClient* client,
+                   const string& database_name,
+                   const string& table_name,
+                   const string& table_id) {
+    hive::EnvironmentContext env_ctx;
+    env_ctx.__set_properties({ make_pair(HmsClient::kKuduTableIdKey, table_id) });
+    return client->DropTableWithContext(database_name, table_name, env_ctx);
+  }
+};
+
+TEST_F(HmsClientTest, TestHmsOperations) {
+  MiniHms hms;
+  ASSERT_OK(hms.Start());
+
+  HmsClient client(hms.address());
+  ASSERT_OK(client.Start());
+
+  // Create a database.
+  string database_name = "my_db";
+  hive::Database db;
+  db.name = database_name;
+  ASSERT_OK(client.CreateDatabase(db));
+  ASSERT_TRUE(client.CreateDatabase(db).IsAlreadyPresent());
+
+  // Get all databases.
+  vector<string> databases;
+  ASSERT_OK(client.GetAllDatabases(&databases));
+  std::sort(databases.begin(), databases.end());
+  EXPECT_EQ(vector<string>({ "default", database_name }), databases) << "Databases:
" << databases;
+
+  // Get a specific database..
+  hive::Database my_db;
+  ASSERT_OK(client.GetDatabase(database_name, &my_db));
+  EXPECT_EQ(database_name, my_db.name) << "my_db: " << my_db;
+
+  string table_name = "my_table";
+  string table_id = "table-id";
+
+  // Check that the HMS rejects Kudu tables without a table ID.
+  ASSERT_STR_CONTAINS(CreateTable(&client, database_name, table_name, "").ToString(),
+                      "Kudu table entry must contain a table ID");
+
+  // Create a table.
+  ASSERT_OK(CreateTable(&client, database_name, table_name, table_id));
+  ASSERT_TRUE(CreateTable(&client, database_name, table_name, table_id).IsAlreadyPresent());
+
+  // Retrieve a table.
+  hive::Table my_table;
+  ASSERT_OK(client.GetTable(database_name, table_name, &my_table));
+  EXPECT_EQ(database_name, my_table.dbName) << "my_table: " << my_table;
+  EXPECT_EQ(table_name, my_table.tableName);
+  EXPECT_EQ(table_id, my_table.parameters[HmsClient::kKuduTableIdKey]);
+  EXPECT_EQ(HmsClient::kKuduStorageHandler,
+            my_table.parameters[hive::g_hive_metastore_constants.META_TABLE_STORAGE]);
+  EXPECT_EQ("MANAGED_TABLE", my_table.tableType);
+
+  string new_table_name = "my_altered_table";
+
+  // Renaming the table with an incorrect table ID should fail.
+  hive::Table altered_table(my_table);
+  altered_table.tableName = new_table_name;
+  altered_table.parameters[HmsClient::kKuduTableIdKey] = "bogus-table-id";
+  ASSERT_TRUE(client.AlterTable(database_name, table_name, altered_table).IsRuntimeError());
+
+  // Rename the table.
+  altered_table.parameters[HmsClient::kKuduTableIdKey] = table_id;
+  ASSERT_OK(client.AlterTable(database_name, table_name, altered_table));
+
+  // Original table is gone.
+  ASSERT_TRUE(client.AlterTable(database_name, table_name, altered_table).IsIllegalState());
+
+  // Check that the altered table's properties are intact.
+  hive::Table renamed_table;
+  ASSERT_OK(client.GetTable(database_name, new_table_name, &renamed_table));
+  EXPECT_EQ(database_name, renamed_table.dbName);
+  EXPECT_EQ(new_table_name, renamed_table.tableName);
+  EXPECT_EQ(table_id, renamed_table.parameters[HmsClient::kKuduTableIdKey]);
+  EXPECT_EQ(HmsClient::kKuduStorageHandler,
+            renamed_table.parameters[hive::g_hive_metastore_constants.META_TABLE_STORAGE]);
+  EXPECT_EQ("MANAGED_TABLE", renamed_table.tableType);
+
+  // Create a table with an uppercase name.
+  string uppercase_table_name = "my_UPPERCASE_Table";
+  ASSERT_OK(CreateTable(&client, database_name, uppercase_table_name, "uppercase-table-id"));
+
+  // Create a table with an illegal utf-8 name.
+  ASSERT_TRUE(CreateTable(&client, database_name, "☃ sculptures 😉", table_id).IsInvalidArgument());
+
+  // Get all tables.
+  vector<string> tables;
+  ASSERT_OK(client.GetAllTables(database_name, &tables));
+  std::sort(tables.begin(), tables.end());
+  EXPECT_EQ(vector<string>({ new_table_name, "my_uppercase_table" }), tables)
+      << "Tables: " << tables;
+
+  // Check that the HMS rejects Kudu table drops with a bogus table ID.
+  ASSERT_TRUE(DropTable(&client, database_name, new_table_name, "bogus-table-id").IsRuntimeError());
+  // Check that the HMS rejects non-existent table drops.
+  ASSERT_TRUE(DropTable(&client, database_name, "foo-bar", "bogus-table-id").IsNotFound());
+
+  // Drop a table.
+  ASSERT_OK(DropTable(&client, database_name, new_table_name, table_id));
+
+  // Drop the database.
+  ASSERT_TRUE(client.DropDatabase(database_name).IsIllegalState());
+  // TODO(HIVE-17008)
+  // ASSERT_OK(client.DropDatabase(database_name, Cascade::kTrue));
+  // TODO(HIVE-17008)
+  // ASSERT_TRUE(client.DropDatabase(database_name).IsNotFound());
+
+  int64_t event_id;
+  ASSERT_OK(client.GetCurrentNotificationEventId(&event_id));
+
+  // Retrieve the notification log and spot-check that the results look sensible.
+  vector<hive::NotificationEvent> events;
+  ASSERT_OK(client.GetNotificationEvents(-1, 100, &events));
+
+  ASSERT_EQ(5, events.size());
+  EXPECT_EQ("CREATE_DATABASE", events[0].eventType);
+  EXPECT_EQ("CREATE_TABLE", events[1].eventType);
+  EXPECT_EQ("ALTER_TABLE", events[2].eventType);
+  EXPECT_EQ("CREATE_TABLE", events[3].eventType);
+  EXPECT_EQ("DROP_TABLE", events[4].eventType);
+  // TODO(HIVE-17008)
+  //EXPECT_EQ("DROP_TABLE", events[5].eventType);
+  //EXPECT_EQ("DROP_DATABASE", events[6].eventType);
+
+  // Retrieve a specific notification log.
+  events.clear();
+  ASSERT_OK(client.GetNotificationEvents(2, 1, &events));
+  ASSERT_EQ(1, events.size()) << "events: " << events;
+  EXPECT_EQ("ALTER_TABLE", events[0].eventType);
+  ASSERT_OK(client.Stop());
+}
+
+TEST_F(HmsClientTest, TestDeserializeJsonTable) {
+  string json = R"#({"1":{"str":"table_name"},"2":{"str":"database_name"}})#";
+  hive::Table table;
+  ASSERT_OK(HmsClient::DeserializeJsonTable(json, &table));
+  ASSERT_EQ("table_name", table.tableName);
+  ASSERT_EQ("database_name", table.dbName);
+}
+
+} // namespace hms
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/31d16f74/src/kudu/hms/hms_client.cc
----------------------------------------------------------------------
diff --git a/src/kudu/hms/hms_client.cc b/src/kudu/hms/hms_client.cc
new file mode 100644
index 0000000..2ee87a8
--- /dev/null
+++ b/src/kudu/hms/hms_client.cc
@@ -0,0 +1,250 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "kudu/hms/hms_client.h"
+
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include <glog/logging.h>
+#include <thrift/Thrift.h>
+#include <thrift/protocol/TBinaryProtocol.h>
+#include <thrift/protocol/TJSONProtocol.h>
+#include <thrift/transport/TBufferTransports.h>
+#include <thrift/transport/TSocket.h>
+
+#include "kudu/gutil/strings/split.h"
+#include "kudu/gutil/strings/strip.h"
+#include "kudu/gutil/strings/substitute.h"
+#include "kudu/hms/ThriftHiveMetastore.h"
+#include "kudu/hms/hive_metastore_constants.h"
+#include "kudu/util/net/net_util.h"
+#include "kudu/util/status.h"
+#include "kudu/util/stopwatch.h"
+
+using apache::thrift::TException;
+using apache::thrift::protocol::TBinaryProtocol;
+using apache::thrift::protocol::TJSONProtocol;
+using apache::thrift::transport::TBufferedTransport;
+using apache::thrift::transport::TMemoryBuffer;
+using apache::thrift::transport::TSocket;
+using std::make_shared;
+using std::shared_ptr;
+using std::string;
+using std::vector;
+
+namespace kudu {
+namespace hms {
+
+// The entire set of Hive-specific exceptions is defined in
+// hive_metastore.thrift. We do not try to handle all of them - TException acts
+// as a catch all, as well as default for network errors.
+#define HMS_RET_NOT_OK(call, msg) \
+  try { \
+    (call); \
+  } catch (const hive::AlreadyExistsException& e) { \
+    return Status::AlreadyPresent((msg), e.what()); \
+  } catch (const hive::UnknownDBException& e) { \
+    return Status::NotFound((msg), e.what()); \
+  } catch (const hive::UnknownTableException& e) { \
+    return Status::NotFound((msg), e.what()); \
+  } catch (const hive::NoSuchObjectException& e) { \
+    return Status::NotFound((msg), e.what()); \
+  } catch (const hive::InvalidObjectException& e) { \
+    return Status::InvalidArgument((msg), e.what()); \
+  } catch (const hive::InvalidOperationException& e) { \
+    return Status::IllegalState((msg), e.what()); \
+  } catch (const hive::MetaException& e) { \
+    return Status::RuntimeError((msg), e.what()); \
+  } catch (const TException& e) { \
+    return Status::IOError((msg), e.what()); \
+  }
+
+const char* const HmsClient::kKuduTableIdKey = "kudu.table_id";
+const char* const HmsClient::kKuduMasterAddrsKey = "kudu.master_addresses";
+const char* const HmsClient::kKuduStorageHandler = "org.apache.kudu.hive.KuduStorageHandler";
+
+const char* const HmsClient::kTransactionalEventListeners =
+  "hive.metastore.transactional.event.listeners";
+const char* const HmsClient::kDbNotificationListener =
+  "org.apache.hive.hcatalog.listener.DbNotificationListener";
+const char* const HmsClient::kKuduMetastorePlugin =
+  "org.apache.kudu.hive.metastore.KuduMetastorePlugin";
+
+const int kSlowExecutionWarningThresholdMs = 500;
+
+HmsClient::HmsClient(const HostPort& hms_address)
+    : client_(nullptr) {
+  auto socket = make_shared<TSocket>(hms_address.host(), hms_address.port());
+  auto transport = make_shared<TBufferedTransport>(std::move(socket));
+  auto protocol = make_shared<TBinaryProtocol>(std::move(transport));
+  client_ = hive::ThriftHiveMetastoreClient(std::move(protocol));
+}
+
+HmsClient::~HmsClient() {
+  WARN_NOT_OK(Stop(), "failed to shutdown HMS client");
+}
+
+Status HmsClient::Start() {
+  SCOPED_LOG_SLOW_EXECUTION(WARNING, 1000 /* ms */, "starting HMS client");
+  HMS_RET_NOT_OK(client_.getOutputProtocol()->getTransport()->open(),
+                 "failed to open Hive MetaStore connection");
+
+  // Immediately after connecting to the HMS, check that it is configured with
+  // the required event listeners.
+  string event_listener_config;
+  HMS_RET_NOT_OK(client_.get_config_value(event_listener_config, kTransactionalEventListeners,
""),
+                 "failed to get Hive MetaStore transactional event listener configuration");
+
+  // Parse the set of listeners from the configuration string.
+  vector<string> listeners = strings::Split(event_listener_config, ",", strings::SkipWhitespace());
+  for (auto& listener : listeners) {
+    StripWhiteSpace(&listener);
+  }
+
+  for (const auto& required_listener : { kDbNotificationListener, kKuduMetastorePlugin
}) {
+    if (std::find(listeners.begin(), listeners.end(), required_listener) == listeners.end())
{
+      return Status::IllegalState(
+          strings::Substitute("Hive Metastore configuration is missing required "
+                              "transactional event listener ($0): $1",
+                              kTransactionalEventListeners, required_listener));
+    }
+  }
+
+  return Status::OK();
+}
+
+Status HmsClient::Stop() {
+  SCOPED_LOG_SLOW_EXECUTION(WARNING, kSlowExecutionWarningThresholdMs, "stopping HMS client");
+  HMS_RET_NOT_OK(client_.getInputProtocol()->getTransport()->close(),
+                 "failed to close Hive MetaStore connection");
+  return Status::OK();
+}
+
+Status HmsClient::CreateDatabase(const hive::Database& database) {
+  SCOPED_LOG_SLOW_EXECUTION(WARNING, kSlowExecutionWarningThresholdMs, "create HMS database");
+  HMS_RET_NOT_OK(client_.create_database(database), "failed to create Hive MetaStore database");
+  return Status::OK();
+}
+
+Status HmsClient::DropDatabase(const string& database_name, Cascade cascade) {
+  SCOPED_LOG_SLOW_EXECUTION(WARNING, kSlowExecutionWarningThresholdMs, "drop HMS database");
+  HMS_RET_NOT_OK(client_.drop_database(database_name, true, cascade == Cascade::kTrue),
+                 "failed to drop Hive MetaStore database");
+  return Status::OK();
+}
+
+Status HmsClient::GetAllDatabases(vector<string>* databases) {
+  DCHECK(databases);
+  SCOPED_LOG_SLOW_EXECUTION(WARNING, kSlowExecutionWarningThresholdMs, "get all HMS databases");
+  HMS_RET_NOT_OK(client_.get_all_databases(*databases),
+                 "failed to get Hive MetaStore databases");
+  return Status::OK();
+}
+
+Status HmsClient::GetDatabase(const string& pattern, hive::Database* database) {
+  DCHECK(database);
+  SCOPED_LOG_SLOW_EXECUTION(WARNING, kSlowExecutionWarningThresholdMs, "get HMS database");
+  HMS_RET_NOT_OK(client_.get_database(*database, pattern),
+                 "failed to get Hive MetaStore database");
+  return Status::OK();
+}
+
+Status HmsClient::CreateTable(const hive::Table& table) {
+  SCOPED_LOG_SLOW_EXECUTION(WARNING, kSlowExecutionWarningThresholdMs, "create HMS table");
+  HMS_RET_NOT_OK(client_.create_table(table), "failed to create Hive MetaStore table");
+  return Status::OK();
+}
+
+Status HmsClient::AlterTable(const std::string& database_name,
+                             const std::string& table_name,
+                             const hive::Table& table) {
+  HMS_RET_NOT_OK(client_.alter_table(database_name, table_name, table),
+                 "failed to alter Hive MetaStore table");
+  return Status::OK();
+}
+
+Status HmsClient::DropTableWithContext(const string& database_name,
+                                       const string& table_name,
+                                       const hive::EnvironmentContext& env_ctx) {
+  SCOPED_LOG_SLOW_EXECUTION(WARNING, kSlowExecutionWarningThresholdMs, "drop HMS table");
+  HMS_RET_NOT_OK(client_.drop_table_with_environment_context(database_name, table_name,
+                                                             true, env_ctx),
+                 "failed to drop Hive MetaStore table");
+  return Status::OK();
+}
+
+Status HmsClient::GetAllTables(const string& database_name,
+                               vector<string>* tables) {
+  DCHECK(tables);
+  SCOPED_LOG_SLOW_EXECUTION(WARNING, kSlowExecutionWarningThresholdMs, "get all HMS tables");
+  HMS_RET_NOT_OK(client_.get_all_tables(*tables, database_name),
+                 "failed to get Hive MetaStore tables");
+  return Status::OK();
+}
+
+Status HmsClient::GetTable(const string& database_name,
+                           const string& table_name,
+                           hive::Table* table) {
+  DCHECK(table);
+  SCOPED_LOG_SLOW_EXECUTION(WARNING, kSlowExecutionWarningThresholdMs, "get HMS table");
+  HMS_RET_NOT_OK(client_.get_table(*table, database_name, table_name),
+                 "failed to get Hive MetaStore table");
+  return Status::OK();
+}
+
+Status HmsClient::GetCurrentNotificationEventId(int64_t* event_id) {
+  DCHECK(event_id);
+  SCOPED_LOG_SLOW_EXECUTION(WARNING, kSlowExecutionWarningThresholdMs,
+                            "get HMS current notification event ID");
+  hive::CurrentNotificationEventId response;
+  HMS_RET_NOT_OK(client_.get_current_notificationEventId(response),
+                 "failed to get Hive MetaStore current event ID");
+  *event_id = response.eventId;
+  return Status::OK();
+}
+
+Status HmsClient::GetNotificationEvents(int64_t last_event_id,
+                                        int32_t max_events,
+                                        vector<hive::NotificationEvent>* events) {
+  DCHECK(events);
+  SCOPED_LOG_SLOW_EXECUTION(WARNING, kSlowExecutionWarningThresholdMs,
+                            "get HMS notification events");
+  hive::NotificationEventRequest request;
+  request.lastEvent = last_event_id;
+  request.__set_maxEvents(max_events);
+  hive::NotificationEventResponse response;
+  HMS_RET_NOT_OK(client_.get_next_notification(response, request),
+                 "failed to get Hive MetaStore next notification");
+  events->swap(response.events);
+  return Status::OK();
+}
+
+Status HmsClient::DeserializeJsonTable(Slice json, hive::Table* table)  {
+  shared_ptr<TMemoryBuffer> membuffer(new TMemoryBuffer(json.size()));
+  membuffer->write(json.data(), json.size());
+  TJSONProtocol protocol(membuffer);
+  HMS_RET_NOT_OK(table->read(&protocol), "failed to deserialize JSON table");
+  return Status::OK();
+}
+
+} // namespace hms
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/31d16f74/src/kudu/hms/hms_client.h
----------------------------------------------------------------------
diff --git a/src/kudu/hms/hms_client.h b/src/kudu/hms/hms_client.h
new file mode 100644
index 0000000..a5ed09f
--- /dev/null
+++ b/src/kudu/hms/hms_client.h
@@ -0,0 +1,149 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <string>
+#include <vector> // IWYU pragma: keep
+
+#include "kudu/gutil/port.h"
+#include "kudu/hms/ThriftHiveMetastore.h"
+#include "kudu/hms/hive_metastore_types.h"
+#include "kudu/util/slice.h"
+#include "kudu/util/status.h"
+
+namespace hive = Apache::Hadoop::Hive;
+
+namespace kudu {
+
+class HostPort;
+
+namespace hms {
+
+// Whether to drop child-objects when dropping an HMS object.
+enum class Cascade {
+  kTrue,
+  kFalse,
+};
+
+// A client for the Hive MetaStore.
+//
+// All operations are synchronous, and may block.
+//
+// HmsClient is not thread safe.
+//
+// TODO(dan): this client is lacking adequate failure handling, including:
+//  - Documentation of specific Status codes returned in error scenarios
+//  - Connection timeouts
+//  - Handling and/or documentation of retry and reconnection behavior
+//
+// TODO(dan): this client does not handle HA (multi) HMS deployments.
+//
+// TODO(dan): this client does not handle Kerberized HMS deployments.
+class HmsClient {
+ public:
+
+  static const char* const kKuduTableIdKey;
+  static const char* const kKuduMasterAddrsKey;
+  static const char* const kKuduStorageHandler;
+
+  static const char* const kTransactionalEventListeners;
+  static const char* const kDbNotificationListener;
+  static const char* const kKuduMetastorePlugin;
+
+  explicit HmsClient(const HostPort& hms_address);
+  ~HmsClient();
+
+  // Starts the HMS client.
+  //
+  // This method will open a synchronous TCP connection to the HMS. If the HMS
+  // can not be reached, an error is returned.
+  //
+  // Must be called before any subsequent operations using the client.
+  Status Start() WARN_UNUSED_RESULT;
+
+  // Stops the HMS client.
+  //
+  // This is optional; if not called the destructor will stop the client.
+  Status Stop() WARN_UNUSED_RESULT;
+
+  // Creates a new database in the HMS.
+  //
+  // If a database already exists by the same name an AlreadyPresent status is
+  // returned.
+  Status CreateDatabase(const hive::Database& database) WARN_UNUSED_RESULT;
+
+  // Drops a database in the HMS.
+  //
+  // If 'cascade' is Cascade::kTrue, tables in the database will automatically
+  // be dropped (this is the default in HiveQL). Otherwise, the operation will
+  // return IllegalState if the database contains tables.
+  Status DropDatabase(const std::string& database_name,
+                      Cascade cascade = Cascade::kFalse) WARN_UNUSED_RESULT;
+
+  // Returns all HMS databases.
+  Status GetAllDatabases(std::vector<std::string>* databases) WARN_UNUSED_RESULT;
+
+  // Retrieves a database from the HMS.
+  Status GetDatabase(const std::string& pattern, hive::Database* database) WARN_UNUSED_RESULT;
+
+  // Creates a table in the HMS.
+  Status CreateTable(const hive::Table& table) WARN_UNUSED_RESULT;
+
+  // Alter a table in the HMS.
+  Status AlterTable(const std::string& database_name,
+                    const std::string& table_name,
+                    const hive::Table& table) WARN_UNUSED_RESULT;
+
+  // Drops a Kudu table in the HMS.
+  Status DropTableWithContext(const std::string& database_name,
+                              const std::string& table_name,
+                              const hive::EnvironmentContext& env_ctx) WARN_UNUSED_RESULT;
+
+  // Retrieves an HMS table metadata.
+  Status GetTable(const std::string& database_name,
+                  const std::string& table_name,
+                  hive::Table* table) WARN_UNUSED_RESULT;
+
+  // Retrieves all tables in an HMS database.
+  Status GetAllTables(const std::string& database_name,
+                      std::vector<std::string>* tables) WARN_UNUSED_RESULT;
+
+  // Retrieves a the current HMS notification event ID.
+  Status GetCurrentNotificationEventId(int64_t* event_id) WARN_UNUSED_RESULT;
+
+  // Retrieves HMS notification log events, beginning after 'last_event_id'.
+  Status GetNotificationEvents(int64_t last_event_id,
+                               int32_t max_events,
+                               std::vector<hive::NotificationEvent>* events) WARN_UNUSED_RESULT;
+
+  // Deserializes a JSON encoded table.
+  //
+  // Notification event log messages often include table objects serialized as
+  // JSON.
+  //
+  // See org.apache.hadoop.hive.metastore.messaging.json.JSONMessageFactory for
+  // the Java equivalent.
+  static Status DeserializeJsonTable(Slice json, hive::Table* table) WARN_UNUSED_RESULT;
+
+ private:
+  hive::ThriftHiveMetastoreClient client_;
+};
+
+} // namespace hms
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/31d16f74/src/kudu/hms/mini_hms.cc
----------------------------------------------------------------------
diff --git a/src/kudu/hms/mini_hms.cc b/src/kudu/hms/mini_hms.cc
new file mode 100644
index 0000000..392033b
--- /dev/null
+++ b/src/kudu/hms/mini_hms.cc
@@ -0,0 +1,177 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "kudu/hms/mini_hms.h"
+
+#include <algorithm>
+#include <csignal>
+#include <cstdlib>
+#include <map>
+#include <memory>
+#include <ostream>
+#include <string>
+
+#include <glog/logging.h>
+
+#include "kudu/gutil/strings/substitute.h"
+#include "kudu/util/env.h"
+#include "kudu/util/monotime.h"
+#include "kudu/util/path_util.h"
+#include "kudu/util/slice.h"
+#include "kudu/util/status.h"
+#include "kudu/util/stopwatch.h"
+#include "kudu/util/string_case.h"
+#include "kudu/util/subprocess.h"
+#include "kudu/util/test_util.h"
+
+using std::map;
+using std::string;
+using std::unique_ptr;
+using strings::Substitute;
+
+namespace kudu {
+namespace hms {
+
+MiniHms::~MiniHms() {
+  if (hms_process_) {
+    VLOG(1) << "Stopping HMS";
+    unique_ptr<Subprocess> proc = std::move(hms_process_);
+    WARN_NOT_OK(proc->KillAndWait(SIGTERM), "failed to stop the Hive MetaStore process");
+  }
+}
+
+namespace {
+
+Status FindHomeDir(const char* name, const string& bin_dir, string* home_dir) {
+  string name_upper;
+  ToUpperCase(name, &name_upper);
+
+  string env_var = Substitute("$0_HOME", name_upper);
+  const char* env = std::getenv(env_var.c_str());
+  *home_dir = env == nullptr ? JoinPathSegments(bin_dir, Substitute("$0-home", name)) : env;
+
+  if (!Env::Default()->FileExists(*home_dir)) {
+    return Status::NotFound(Substitute("$0 directory does not exist", env_var), *home_dir);
+  }
+  return Status::OK();
+}
+
+} // anonymous namespace
+
+Status MiniHms::Start() {
+  SCOPED_LOG_SLOW_EXECUTION(WARNING, 20000, "Starting HMS");
+  CHECK(!hms_process_);
+
+  VLOG(1) << "Starting HMS";
+
+  Env* env = Env::Default();
+
+  string exe;
+  RETURN_NOT_OK(env->GetExecutablePath(&exe));
+  const string bin_dir = DirName(exe);
+
+  string hadoop_home;
+  string hive_home;
+  string java_home;
+  RETURN_NOT_OK(FindHomeDir("hadoop", bin_dir, &hadoop_home));
+  RETURN_NOT_OK(FindHomeDir("hive", bin_dir, &hive_home));
+  RETURN_NOT_OK(FindHomeDir("java", bin_dir, &java_home));
+
+  auto tmp_dir = GetTestDataDirectory();
+
+  RETURN_NOT_OK(CreateHiveSite(tmp_dir));
+
+  // Comma-separated list of additional jars to add to the HMS classpath.
+  string aux_jars = Substitute("$0/hcatalog/share/hcatalog,$1/hms-plugin.jar",
+                               hive_home, bin_dir);
+  map<string, string> env_vars {
+      { "JAVA_HOME", java_home },
+      { "HADOOP_HOME", hadoop_home },
+      { "HIVE_AUX_JARS_PATH", aux_jars },
+      { "HIVE_CONF_DIR", tmp_dir },
+  };
+
+  // Start the HMS.
+  hms_process_.reset(new Subprocess({
+        Substitute("$0/bin/hive", hive_home),
+        "--service", "metastore",
+        "-v",
+        "-p", "0", // Use an ephemeral port.
+  }));
+
+  hms_process_->SetEnvVars(env_vars);
+  RETURN_NOT_OK(hms_process_->Start());
+
+  // Wait for HMS to start listening on its ports and commencing operation.
+  VLOG(1) << "Waiting for HMS ports";
+  return WaitForTcpBind(hms_process_->pid(), &port_, MonoDelta::FromSeconds(20));
+}
+
+Status MiniHms::CreateHiveSite(const string& tmp_dir) const {
+  // 'datanucleus.schema.autoCreateAll' and 'hive.metastore.schema.verification'
+  // allow Hive to startup and run without first running the schemaTool.
+  // 'hive.metastore.event.db.listener.timetolive' configures how long the
+  // Metastore will store notification log events before GCing them.
+  static const string kFileTemplate = R"(
+<configuration>
+  <property>
+    <name>hive.metastore.transactional.event.listeners</name>
+    <value>
+      org.apache.hive.hcatalog.listener.DbNotificationListener,
+      org.apache.kudu.hive.metastore.KuduMetastorePlugin
+    </value>
+  </property>
+
+  <property>
+    <name>datanucleus.schema.autoCreateAll</name>
+    <value>true</value>
+  </property>
+
+  <property>
+    <name>hive.metastore.schema.verification</name>
+    <value>false</value>
+  </property>
+
+  <property>
+    <name>hive.metastore.warehouse.dir</name>
+    <value>file://$1/warehouse/</value>
+  </property>
+
+  <property>
+    <name>javax.jdo.option.ConnectionURL</name>
+    <value>jdbc:derby:memory:$1/metadb;create=true</value>
+  </property>
+
+  <property>
+    <name>hive.metastore.event.db.listener.timetolive</name>
+    <value>$0s</value>
+  </property>
+
+</configuration>
+  )";
+
+  string file_contents = strings::Substitute(kFileTemplate,
+                                             notification_log_ttl_.ToSeconds(),
+                                             tmp_dir);
+
+  return WriteStringToFile(Env::Default(),
+                           file_contents,
+                           JoinPathSegments(tmp_dir, "hive-site.xml"));
+}
+
+} // namespace hms
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/31d16f74/src/kudu/hms/mini_hms.h
----------------------------------------------------------------------
diff --git a/src/kudu/hms/mini_hms.h b/src/kudu/hms/mini_hms.h
new file mode 100644
index 0000000..2e5ac30
--- /dev/null
+++ b/src/kudu/hms/mini_hms.h
@@ -0,0 +1,71 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+
+#include <glog/logging.h>
+
+#include "kudu/gutil/port.h"
+#include "kudu/util/monotime.h"
+#include "kudu/util/net/net_util.h"
+#include "kudu/util/status.h"
+#include "kudu/util/subprocess.h" // IWYU pragma: keep
+
+namespace kudu {
+namespace hms {
+
+class MiniHms {
+ public:
+
+  MiniHms() = default;
+
+  ~MiniHms();
+
+  // Configures the notification log TTL. Must be called before Start().
+  void SetNotificationLogTtl(MonoDelta ttl) {
+    CHECK(hms_process_);
+    notification_log_ttl_ = ttl;
+  }
+
+  // Starts the mini Hive metastore.
+  Status Start() WARN_UNUSED_RESULT;
+
+  // Returns the address of the Hive metastore. Should only be called after the
+  // metastore is started.
+  HostPort address() const {
+    return HostPort("127.0.0.1", port_);
+  }
+
+ private:
+
+  // Creates a hive-site.xml for the mini HMS.
+  Status CreateHiveSite(const std::string& tmp_dir) const WARN_UNUSED_RESULT;
+
+  // Waits for the metastore process to bind to a port.
+  Status WaitForHmsPorts() WARN_UNUSED_RESULT;
+
+  std::unique_ptr<Subprocess> hms_process_;
+  MonoDelta notification_log_ttl_ = MonoDelta::FromSeconds(86400);
+  uint16_t port_ = 0;
+};
+
+} // namespace hms
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/31d16f74/thirdparty/LICENSE.txt
----------------------------------------------------------------------
diff --git a/thirdparty/LICENSE.txt b/thirdparty/LICENSE.txt
index 623fd46..b229691 100644
--- a/thirdparty/LICENSE.txt
+++ b/thirdparty/LICENSE.txt
@@ -619,7 +619,9 @@ Source: http://www.boost.org/
   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
   DEALINGS IN THE SOFTWARE.
 
-
+--------------------------------------------------------------------------------
+thirdparty/thrift-*/: Apache License v2.0
+Source: https://thrift.apache.org
 
 ================================================================================
 BUILD-ONLY DEPENDENCIES
@@ -655,3 +657,17 @@ thirdparty/python-*: Python 2.7 license (https://www.python.org/download/release
 Source: http://www.python.org/
 NOTE: optional build-time dependency, not linked or bundled.
 
+--------------------------------------------------------------------------------
+thirdparty/bison: GPLv3 license (https://www.gnu.org/licenses/gpl.html)
+Source: https://www.gnu.org/software/bison
+NOTE: build-time dependency
+
+--------------------------------------------------------------------------------
+thirdparty/hadoop-*/: Apache License v2.0
+Source: https://hadoop.apache.org
+NOTE: build-time dependency
+
+--------------------------------------------------------------------------------
+thirdparty/hive-*/: Apache License v2.0
+Source: https://hive.apache.org
+NOTE: build-time dependency

http://git-wip-us.apache.org/repos/asf/kudu/blob/31d16f74/thirdparty/build-definitions.sh
----------------------------------------------------------------------
diff --git a/thirdparty/build-definitions.sh b/thirdparty/build-definitions.sh
index e5b8045..e38d54a 100644
--- a/thirdparty/build-definitions.sh
+++ b/thirdparty/build-definitions.sh
@@ -728,3 +728,59 @@ build_sparsepp() {
   rsync -av --delete sparsepp/ $PREFIX/include/sparsepp/
   popd
 }
+
+build_thrift() {
+  THRIFT_BDIR=$TP_BUILD_DIR/$THRIFT_NAME$MODE_SUFFIX
+  mkdir -p $THRIFT_BDIR
+  pushd $THRIFT_BDIR
+  rm -Rf CMakeCache.txt CMakeFiles/
+
+  # Thrift depends on bison.
+  #
+  # Configure for a very minimal install - only the C++ client libraries are needed.
+  # Thrift requires C++11 when compiled on Linux against libc++ (see cxxfunctional.h).
+  CFLAGS="$EXTRA_CFLAGS" \
+    CXXFLAGS="$EXTRA_CXXFLAGS -std=c++11" \
+    LDFLAGS="$EXTRA_LDFLAGS" \
+    LIBS="$EXTRA_LIBS" \
+    cmake \
+    -DBOOST_ROOT=$PREFIX \
+    -DBUILD_C_GLIB=OFF \
+    -DBUILD_COMPILER=ON \
+    -DBUILD_CPP=ON \
+    -DBUILD_EXAMPLES=OFF \
+    -DBUILD_HASKELL=OFF \
+    -DBUILD_JAVA=OFF \
+    -DBUILD_PYTHON=OFF \
+    -DBUILD_TESTING=OFF \
+    -DBUILD_TUTORIALS=OFF \
+    -DCMAKE_BUILD_TYPE=Release \
+    -DCMAKE_INSTALL_PREFIX=$PREFIX \
+    -DWITH_BOOSTTHREADS=OFF \
+    -DWITH_LIBEVENT=OFF \
+    -DWITH_OPENSSL=OFF \
+    -DWITH_PLUGIN=OFF \
+    $EXTRA_CMAKE_FLAGS \
+    $THRIFT_SOURCE
+
+  ${NINJA:-make} -j$PARALLEL $EXTRA_MAKEFLAGS install
+  popd
+
+  # Install fb303.thrift into the share directory.
+  mkdir -p $PREFIX/share/fb303/if
+  cp $THRIFT_SOURCE/contrib/fb303/if/fb303.thrift $PREFIX/share/fb303/if
+}
+
+build_bison() {
+  BISON_BDIR=$TP_BUILD_DIR/$BISON_NAME$MODE_SUFFIX
+  mkdir -p $BISON_BDIR
+  pushd $BISON_BDIR
+  CFLAGS="$EXTRA_CFLAGS" \
+    CXXFLAGS="$EXTRA_CXXFLAGS" \
+    LDFLAGS="$EXTRA_LDFLAGS" \
+    LIBS="$EXTRA_LIBS" \
+    $BISON_SOURCE/configure \
+    --prefix=$PREFIX
+  make -j$PARALLEL $EXTRA_MAKEFLAGS install
+  popd
+}

http://git-wip-us.apache.org/repos/asf/kudu/blob/31d16f74/thirdparty/build-thirdparty.sh
----------------------------------------------------------------------
diff --git a/thirdparty/build-thirdparty.sh b/thirdparty/build-thirdparty.sh
index 962981b..e239984 100755
--- a/thirdparty/build-thirdparty.sh
+++ b/thirdparty/build-thirdparty.sh
@@ -95,6 +95,10 @@ else
       "breakpad")     F_BREAKPAD=1 ;;
       "sparsehash")   F_SPARSEHASH=1 ;;
       "sparsepp")     F_SPARSEPP=1 ;;
+      "thrift")       F_THRIFT=1 ;;
+      "bison")        F_BISON=1 ;;
+      "hadoop")       F_HADOOP=1 ;;
+      "hive")         F_HIVE=1 ;;
       *)              echo "Unknown module: $arg"; exit 1 ;;
     esac
   done
@@ -237,6 +241,22 @@ if [ -n "$F_COMMON" -o -n "$F_SPARSEPP" ]; then
   build_sparsepp
 fi
 
+if [ -n "$F_COMMON" -o -n "$F_BISON" ]; then
+  build_bison
+fi
+
+# Install Hadoop and Hive by symlinking their source directories (which are
+# pre-built) into $PREFIX/opt.
+if [ -n "$F_COMMON" -o -n "$F_HADOOP" ]; then
+  mkdir -p $PREFIX/opt
+  ln -nsf $HADOOP_SOURCE $PREFIX/opt/hadoop
+fi
+
+if [ -n "$F_COMMON" -o -n "$F_HIVE" ]; then
+  mkdir -p $PREFIX/opt
+  ln -nsf $HIVE_SOURCE $PREFIX/opt/hive
+fi
+
 ### Build C dependencies without instrumentation
 
 PREFIX=$PREFIX_DEPS
@@ -338,6 +358,10 @@ if [ -n "$F_UNINSTRUMENTED" -o -n "$F_BREAKPAD" ]; then
   build_breakpad
 fi
 
+if [ -n "$F_UNINSTRUMENTED" -o -n "$F_THRIFT" ]; then
+  build_thrift
+fi
+
 restore_env
 
 # If we're on macOS best to exit here, otherwise single dependency builds will try to
@@ -509,6 +533,10 @@ if [ -n "$F_TSAN" -o -n "$F_BREAKPAD" ]; then
   build_breakpad
 fi
 
+if [ -n "$F_TSAN" -o -n "$F_THRIFT" ]; then
+  build_thrift
+fi
+
 restore_env
 
 finish

http://git-wip-us.apache.org/repos/asf/kudu/blob/31d16f74/thirdparty/download-thirdparty.sh
----------------------------------------------------------------------
diff --git a/thirdparty/download-thirdparty.sh b/thirdparty/download-thirdparty.sh
index fc94910..d7f85bf 100755
--- a/thirdparty/download-thirdparty.sh
+++ b/thirdparty/download-thirdparty.sh
@@ -340,6 +340,39 @@ if [ ! -d "$SPARSEPP_SOURCE" ]; then
   popd
 fi
 
+THRIFT_PATCHLEVEL=0
+if [ ! -d "$THRIFT_SOURCE" ]; then
+  fetch_and_expand $THRIFT_NAME.tar.gz
+  pushd $THRIFT_SOURCE
+  touch patchlevel-$THRIFT_PATCHLEVEL
+  popd
+fi
+
+BISON_PATCHLEVEL=0
+if [ ! -d "$BISON_SOURCE" ]; then
+  fetch_and_expand $BISON_NAME.tar.gz
+  # This would normally call autoreconf, but it does not succeed with
+  # autoreconf 2.69 (RHEL 7): "autoreconf: 'configure.ac' or 'configure.in' is required".
+  pushd $BISON_SOURCE
+  touch patchlevel-$BISON_PATCHLEVEL
+  popd
+fi
+
+HIVE_PATCHLEVEL=0
+if [ ! -d "$HIVE_SOURCE" ]; then
+  fetch_and_expand $HIVE_NAME.tar.gz
+  pushd $HIVE_SOURCE
+  touch patchlevel-$HIVE_PATCHLEVEL
+  popd
+fi
+
+HADOOP_PATCHLEVEL=0
+if [ ! -d "$HADOOP_SOURCE" ]; then
+  fetch_and_expand $HADOOP_NAME.tar.gz
+  pushd $HADOOP_SOURCE
+  touch patchlevel-$HADOOP_PATCHLEVEL
+  popd
+fi
 
 echo "---------------"
 echo "Thirdparty dependencies downloaded successfully"

http://git-wip-us.apache.org/repos/asf/kudu/blob/31d16f74/thirdparty/vars.sh
----------------------------------------------------------------------
diff --git a/thirdparty/vars.sh b/thirdparty/vars.sh
index 7fe0e0e..43f3d84 100644
--- a/thirdparty/vars.sh
+++ b/thirdparty/vars.sh
@@ -199,3 +199,27 @@ SPARSEHASH_SOURCE=$TP_SOURCE_DIR/$SPARSEHASH_NAME
 SPARSEPP_VERSION=824860bb76893d163efbcff330734b9f62eecb17
 SPARSEPP_NAME=sparsepp-$SPARSEPP_VERSION
 SPARSEPP_SOURCE=$TP_SOURCE_DIR/$SPARSEPP_NAME
+
+# TODO(dan): bump to 0.11 when it's released. We chose to use a bleeding edge
+# version instead of 0.10 in order to get the API and header inclusion
+# simplifications introduced in THRIFT-2221 (Thrift's previous use of tr1
+# conflicted with gtest's use of tuples).
+THRIFT_VERSION=8b8a8efea13d1c97f856053af0a5c0e6a8a76354
+THRIFT_NAME=thrift-$THRIFT_VERSION
+THRIFT_SOURCE=$TP_SOURCE_DIR/$THRIFT_NAME
+
+BISON_VERSION=3.0.4
+BISON_NAME=bison-$BISON_VERSION
+BISON_SOURCE=$TP_SOURCE_DIR/$BISON_NAME
+
+# TODO(dan): bump to a release version once HIVE-17747 is published.
+HIVE_VERSION=3fb4649fa847cfec33f701f6c884f12087680cf0
+HIVE_NAME=apache-hive-$HIVE_VERSION-bin
+HIVE_SOURCE=$TP_SOURCE_DIR/$HIVE_NAME
+
+# The Hadoop tarball is the binary release tarball from hadoop.apache.org, with
+# the share/doc folder removed. The share/doc folder is about 2GiB of HTML
+# Javadoc files.
+HADOOP_VERSION=2.8.1
+HADOOP_NAME=hadoop-$HADOOP_VERSION
+HADOOP_SOURCE=$TP_SOURCE_DIR/$HADOOP_NAME


Mime
View raw message