kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From t...@apache.org
Subject incubator-kudu git commit: master: don't expose intermediate CreateTable() state to consumers
Date Wed, 23 Mar 2016 17:46:52 GMT
Repository: incubator-kudu
Updated Branches:
  refs/heads/master e88b989c7 -> f97197185


master: don't expose intermediate CreateTable() state to consumers

CreateTable() publishes new tables to the catalog manager maps before
committing the in-memory mutations that populate table metadata. This order
makes sense; if it were inverted, there'd be no safe way for CreateTable()
to atomically check for a table by name and claim that name for itself. As
such, consumers of the maps must ensure that the tables they're retrieving
are fully formed before using them.

Read-write consumers (who lock tables with TableMetadataLock::WRITE) are
already protected by the semantics of the table's underlying
CowLock. Read-only consumers, however, must explicitly check the state of
the table before using it. I audited the various read-only consumers and
added checks where needed.

Not every client call yields enough information to detect this intermediate
state. As far as I can tell, the only testable call is GetTableSchema(), so
that's what I used in the new test. Before the fix, the test failed every
time. Afterwards, it doesn't fail in 1000 repetitions.

Change-Id: I05ab48d5a9dd328b4a74f5dc35c710cd1c01f3cd
Reviewed-on: http://gerrit.cloudera.org:8080/2577
Tested-by: Kudu Jenkins
Reviewed-by: Todd Lipcon <todd@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/incubator-kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kudu/commit/f9719718
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kudu/tree/f9719718
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kudu/diff/f9719718

Branch: refs/heads/master
Commit: f97197185d358d3c251def1740d97573323e1611
Parents: e88b989
Author: Adar Dembo <adar@cloudera.com>
Authored: Thu Mar 17 17:50:35 2016 -0700
Committer: Todd Lipcon <todd@apache.org>
Committed: Wed Mar 23 17:46:41 2016 +0000

----------------------------------------------------------------------
 src/kudu/master/catalog_manager.cc | 49 +++++++++++--------------
 src/kudu/master/master-test.cc     | 63 +++++++++++++++++++++++++++++++++
 2 files changed, 83 insertions(+), 29 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/f9719718/src/kudu/master/catalog_manager.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/catalog_manager.cc b/src/kudu/master/catalog_manager.cc
index 9e02a8a..6165ded 100644
--- a/src/kudu/master/catalog_manager.cc
+++ b/src/kudu/master/catalog_manager.cc
@@ -421,6 +421,21 @@ void CheckIfNoLongerLeaderAndSetupError(Status s, RespClass* resp) {
   }
 }
 
+template<class RespClass>
+Status CheckIfTableDeletedOrNotRunning(TableMetadataLock* lock, RespClass* resp) {
+  if (lock->data().is_deleted()) {
+    Status s = Status::NotFound("The table was deleted", lock->data().pb.state_msg());
+    SetupError(resp->mutable_error(), MasterErrorPB::TABLE_NOT_FOUND, s);
+    return s;
+  }
+  if (!lock->data().is_running()) {
+    Status s = Status::ServiceUnavailable("The table is not running");
+    SetupError(resp->mutable_error(), MasterErrorPB::TABLE_NOT_FOUND, s);
+    return s;
+  }
+  return Status::OK();
+}
+
 } // anonymous namespace
 
 CatalogManager::CatalogManager(Master *master)
@@ -868,11 +883,7 @@ Status CatalogManager::IsCreateTableDone(const IsCreateTableDoneRequestPB*
req,
 
   TRACE("Locking table");
   TableMetadataLock l(table.get(), TableMetadataLock::READ);
-  if (l.data().is_deleted()) {
-    Status s = Status::NotFound("The table was deleted", l.data().pb.state_msg());
-    SetupError(resp->mutable_error(), MasterErrorPB::TABLE_NOT_FOUND, s);
-    return s;
-  }
+  RETURN_NOT_OK(CheckIfTableDeletedOrNotRunning(&l, resp));
 
   // 2. Verify if the create is in-progress
   TRACE("Verify if the table creation is in progress for $0", table->ToString());
@@ -1222,11 +1233,7 @@ Status CatalogManager::IsAlterTableDone(const IsAlterTableDoneRequestPB*
req,
 
   TRACE("Locking table");
   TableMetadataLock l(table.get(), TableMetadataLock::READ);
-  if (l.data().is_deleted()) {
-    Status s = Status::NotFound("The table was deleted", l.data().pb.state_msg());
-    SetupError(resp->mutable_error(), MasterErrorPB::TABLE_NOT_FOUND, s);
-    return s;
-  }
+  RETURN_NOT_OK(CheckIfTableDeletedOrNotRunning(&l, resp));
 
   // 2. Verify if the alter is in-progress
   TRACE("Verify if there is an alter operation in progress for $0", table->ToString());
@@ -1253,11 +1260,7 @@ Status CatalogManager::GetTableSchema(const GetTableSchemaRequestPB*
req,
 
   TRACE("Locking table");
   TableMetadataLock l(table.get(), TableMetadataLock::READ);
-  if (l.data().is_deleted()) {
-    Status s = Status::NotFound("The table was deleted", l.data().pb.state_msg());
-    SetupError(resp->mutable_error(), MasterErrorPB::TABLE_NOT_FOUND, s);
-    return s;
-  }
+  RETURN_NOT_OK(CheckIfTableDeletedOrNotRunning(&l, resp));
 
   if (l.data().pb.has_fully_applied_schema()) {
     // An AlterTable is in progress; fully_applied_schema is the last
@@ -1284,7 +1287,7 @@ Status CatalogManager::ListTables(const ListTablesRequestPB* req,
 
   for (const TableInfoMap::value_type& entry : table_names_map_) {
     TableMetadataLock ltm(entry.second.get(), TableMetadataLock::READ);
-    if (!ltm.data().is_running()) continue;
+    if (!ltm.data().is_running()) continue; // implies !is_deleted() too
 
     if (req->has_name_filter()) {
       size_t found = ltm.data().name().find(req->name_filter());
@@ -3030,7 +3033,6 @@ Status CatalogManager::GetTableLocations(const GetTableLocationsRequestPB*
req,
 
   scoped_refptr<TableInfo> table;
   RETURN_NOT_OK(FindTable(req->table(), &table));
-
   if (table == nullptr) {
     Status s = Status::NotFound("The table does not exist");
     SetupError(resp->mutable_error(), MasterErrorPB::TABLE_NOT_FOUND, s);
@@ -3038,18 +3040,7 @@ Status CatalogManager::GetTableLocations(const GetTableLocationsRequestPB*
req,
   }
 
   TableMetadataLock l(table.get(), TableMetadataLock::READ);
-  if (l.data().is_deleted()) {
-    Status s = Status::NotFound("The table was deleted",
-                                l.data().pb.state_msg());
-    SetupError(resp->mutable_error(), MasterErrorPB::TABLE_NOT_FOUND, s);
-    return s;
-  }
-
-  if (!l.data().is_running()) {
-    Status s = Status::ServiceUnavailable("The table is not running");
-    SetupError(resp->mutable_error(), MasterErrorPB::TABLE_NOT_FOUND, s);
-    return s;
-  }
+  RETURN_NOT_OK(CheckIfTableDeletedOrNotRunning(&l, resp));
 
   vector<scoped_refptr<TabletInfo> > tablets_in_range;
   table->GetTabletsInRange(req, &tablets_in_range);

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/f9719718/src/kudu/master/master-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/master-test.cc b/src/kudu/master/master-test.cc
index e1c8dc9..01684d4 100644
--- a/src/kudu/master/master-test.cc
+++ b/src/kudu/master/master-test.cc
@@ -476,5 +476,68 @@ TEST_F(MasterTest, TestShutdownDuringTableVisit) {
   // CatalogManager::VisitTablesAndTabletsTask.
 }
 
+static void GetTableSchema(const char* kTableName,
+                           const Schema* kSchema,
+                           MasterServiceProxy* proxy,
+                           CountDownLatch* started,
+                           AtomicBool* done) {
+  GetTableSchemaRequestPB req;
+  GetTableSchemaResponsePB resp;
+  req.mutable_table()->set_table_name(kTableName);
+
+  started->CountDown();
+  while (!done->Load()) {
+    RpcController controller;
+
+    CHECK_OK(proxy->GetTableSchema(req, &resp, &controller));
+    SCOPED_TRACE(resp.DebugString());
+
+    // There are two possible outcomes:
+    //
+    // 1. GetTableSchema() happened before CreateTable(): we expect to see a
+    //    TABLE_NOT_FOUND error.
+    // 2. GetTableSchema() happened after CreateTable(): we expect to see the
+    //    full table schema.
+    //
+    // Any other outcome is an error.
+    if (resp.has_error()) {
+      CHECK_EQ(MasterErrorPB::TABLE_NOT_FOUND, resp.error().code());
+    } else {
+      Schema receivedSchema;
+      CHECK_OK(SchemaFromPB(resp.schema(), &receivedSchema));
+      CHECK(kSchema->Equals(receivedSchema)) <<
+          strings::Substitute("$0 not equal to $1",
+                              kSchema->ToString(), receivedSchema.ToString());
+    }
+  }
+}
+
+// The catalog manager had a bug wherein GetTableSchema() interleaved with
+// CreateTable() could expose intermediate uncommitted state to clients. This
+// test ensures that bug does not regress.
+TEST_F(MasterTest, TestGetTableSchemaIsAtomicWithCreateTable) {
+  const char* kTableName = "testtb";
+  const Schema kTableSchema({ ColumnSchema("key", INT32),
+                              ColumnSchema("v1", UINT64),
+                              ColumnSchema("v2", STRING) },
+                            1);
+
+  CountDownLatch started(1);
+  AtomicBool done(false);
+
+  // Kick off a thread that calls GetTableSchema() in a loop.
+  scoped_refptr<Thread> t;
+  ASSERT_OK(Thread::Create("test", "test",
+                           &GetTableSchema, kTableName, &kTableSchema,
+                           proxy_.get(), &started, &done, &t));
+
+  // Only create the table after the thread has started.
+  started.Wait();
+  EXPECT_OK(CreateTable(kTableName, kTableSchema));
+
+  done.Store(true);
+  t->Join();
+}
+
 } // namespace master
 } // namespace kudu


Mime
View raw message