kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From danburk...@apache.org
Subject kudu git commit: KUDU-2509 fix use-after-free in case of WAL replay error
Date Wed, 01 Aug 2018 19:47:52 GMT
Repository: kudu
Updated Branches:
  refs/heads/branch-1.7.x cf21f3673 -> 7fba099d5


KUDU-2509 fix use-after-free in case of WAL replay error

Fixed use-after-free mistake in case of a failure to apply a pending
commit message from the WAL while bootstrapping a tablet.

Also, a repro scenario to expose the use-after-free condition is added.
Prior to the fix, the repro scenario would crash with SIGSEGV on Linux
or with SIGBUS on OS X (at least for DEBUG builds).

Change-Id: I11373b1cc34d9e2e0181bee2d3841b49022218ed
Reviewed-on: http://gerrit.cloudera.org:8080/10997
Tested-by: Alexey Serbin <aserbin@cloudera.com>
Reviewed-by: Adar Dembo <adar@cloudera.com>
(cherry picked from commit 6b429e8a42ad9fb12a97cc26e33ca19ac2626533)
Reviewed-on: http://gerrit.cloudera.org:8080/11063
Tested-by: Kudu Jenkins
Reviewed-by: Mike Percy <mpercy@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/7fba099d
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/7fba099d
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/7fba099d

Branch: refs/heads/branch-1.7.x
Commit: 7fba099d553ed7b3e56c1d8e89dcc0bae4397616
Parents: cf21f36
Author: Alexey Serbin <aserbin@cloudera.com>
Authored: Thu Jul 19 21:05:54 2018 -0700
Committer: Alexey Serbin <aserbin@cloudera.com>
Committed: Tue Jul 31 23:44:10 2018 +0000

----------------------------------------------------------------------
 src/kudu/tablet/tablet_bootstrap-test.cc | 75 ++++++++++++++++++++++++++-
 src/kudu/tablet/tablet_bootstrap.cc      |  2 +-
 2 files changed, 74 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/7fba099d/src/kudu/tablet/tablet_bootstrap-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/tablet_bootstrap-test.cc b/src/kudu/tablet/tablet_bootstrap-test.cc
index e8131e0..ed32517 100644
--- a/src/kudu/tablet/tablet_bootstrap-test.cc
+++ b/src/kudu/tablet/tablet_bootstrap-test.cc
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "kudu/consensus/log-test-base.h"
+#include "kudu/tablet/tablet_bootstrap.h"
 
 #include <cstddef>
 #include <cstdint>
@@ -35,7 +35,9 @@
 #include "kudu/clock/logical_clock.h"
 #include "kudu/common/common.pb.h"
 #include "kudu/common/iterator.h"
+#include "kudu/common/partial_row.h"
 #include "kudu/common/partition.h"
+#include "kudu/common/row_operations.h"
 #include "kudu/common/schema.h"
 #include "kudu/common/timestamp.h"
 #include "kudu/common/wire_protocol-test-util.h"
@@ -45,6 +47,7 @@
 #include "kudu/consensus/consensus.pb.h"
 #include "kudu/consensus/consensus_meta.h"
 #include "kudu/consensus/consensus_meta_manager.h"
+#include "kudu/consensus/log-test-base.h"
 #include "kudu/consensus/log.h"
 #include "kudu/consensus/log_anchor_registry.h"
 #include "kudu/consensus/log_reader.h"
@@ -68,7 +71,6 @@
 #include "kudu/tablet/tablet-test-util.h"
 #include "kudu/tablet/tablet.h"
 #include "kudu/tablet/tablet.pb.h"
-#include "kudu/tablet/tablet_bootstrap.h"
 #include "kudu/tablet/tablet_metadata.h"
 #include "kudu/tablet/tablet_replica.h"
 #include "kudu/tserver/tserver.pb.h"
@@ -676,5 +678,74 @@ TEST_F(BootstrapTest, TestConsensusOnlyOperationOutOfOrderTimestamp)
{
   ASSERT_EQ(1, results.size());
 }
 
+// Regression test for KUDU-2509. There was a use-after-free bug that sometimes
+// lead to SIGSEGV while replaying the WAL. This scenario would crash or
+// at least UB sanitizer would report a warning if such condition exists.
+TEST_F(BootstrapTest, TestKudu2509) {
+  ASSERT_OK(BuildLog());
+
+  consensus::ReplicateRefPtr replicate = consensus::make_scoped_refptr_replicate(
+      new consensus::ReplicateMsg());
+  replicate->get()->set_op_type(consensus::WRITE_OP);
+  tserver::WriteRequestPB* batch_request = replicate->get()->mutable_write_request();
+  ASSERT_OK(SchemaToPB(schema_, batch_request->mutable_schema()));
+  batch_request->set_tablet_id(log::kTestTablet);
+
+  // This appends Insert(1) with op 10.10
+  const OpId insert_opid = MakeOpId(10, 10);
+  replicate->get()->mutable_id()->CopyFrom(insert_opid);
+  replicate->get()->set_timestamp(clock_->Now().ToUint64());
+  AddTestRowToPB(RowOperationsPB::INSERT, schema_, 10, 1,
+                 "this is a test insert", batch_request->mutable_row_operations());
+  NO_FATALS(AppendReplicateBatch(replicate));
+
+  // This appends Mutate(1) with op 10.11. The operation would try to update
+  // a row having an extra column. This should fail since there hasn't been
+  // corresponding DDL operation committed yet.
+  const OpId mutate_opid = MakeOpId(10, 11);
+  batch_request->mutable_row_operations()->Clear();
+  replicate->get()->mutable_id()->CopyFrom(mutate_opid);
+  replicate->get()->set_timestamp(clock_->Now().ToUint64());
+  {
+    // Modify the existing schema to add an extra row.
+    SchemaBuilder builder(schema_);
+    ASSERT_OK(builder.AddNullableColumn("string_val_extra", STRING));
+    const auto schema = builder.BuildWithoutIds();
+    ASSERT_OK(SchemaToPB(schema, batch_request->mutable_schema()));
+
+    KuduPartialRow row(&schema);
+    ASSERT_OK(row.SetInt32("key", 100));
+    ASSERT_OK(row.SetInt32("int_val", 200));
+    ASSERT_OK(row.SetStringCopy("string_val", "300"));
+    ASSERT_OK(row.SetStringCopy("string_val_extra", "100500"));
+    RowOperationsPBEncoder enc(batch_request->mutable_row_operations());
+    enc.Add(RowOperationsPB::UPDATE, row);
+  }
+  NO_FATALS(AppendReplicateBatch(replicate));
+
+  // Now commit the mutate before the insert (in the log).
+  gscoped_ptr<consensus::CommitMsg> mutate_commit(new consensus::CommitMsg);
+  mutate_commit->set_op_type(consensus::WRITE_OP);
+  mutate_commit->mutable_commited_op_id()->CopyFrom(mutate_opid);
+  mutate_commit->mutable_result()->add_ops()->add_mutated_stores()->set_mrs_id(1);
+  NO_FATALS(AppendCommit(std::move(mutate_commit)));
+
+  gscoped_ptr<consensus::CommitMsg> insert_commit(new consensus::CommitMsg);
+  insert_commit->set_op_type(consensus::WRITE_OP);
+  insert_commit->mutable_commited_op_id()->CopyFrom(insert_opid);
+  insert_commit->mutable_result()->add_ops()->add_mutated_stores()->set_mrs_id(1);
+  NO_FATALS(AppendCommit(std::move(insert_commit)));
+
+  ConsensusBootstrapInfo boot_info;
+  shared_ptr<Tablet> tablet;
+  const auto s = BootstrapTestTablet(-1, -1, &tablet, &boot_info);
+  const auto& status_msg = s.ToString();
+  ASSERT_TRUE(s.IsInvalidArgument()) << status_msg;
+  ASSERT_STR_CONTAINS(status_msg,
+      "Unable to bootstrap test tablet: Failed log replay.");
+  ASSERT_STR_CONTAINS(status_msg,
+      "column string_val_extra[string NULLABLE] not present in tablet");
+}
+
 } // namespace tablet
 } // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/7fba099d/src/kudu/tablet/tablet_bootstrap.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/tablet_bootstrap.cc b/src/kudu/tablet/tablet_bootstrap.cc
index 04516b4..3e4e43a 100644
--- a/src/kudu/tablet/tablet_bootstrap.cc
+++ b/src/kudu/tablet/tablet_bootstrap.cc
@@ -975,7 +975,6 @@ Status TabletBootstrap::HandleCommitMessage(ReplayState* state, LogEntryPB*
comm
   // ... if it does, we apply it and all the commits that immediately follow in the sequence.
   OpId last_applied = commit_entry->commit().commited_op_id();
   RETURN_NOT_OK(ApplyCommitMessage(state, commit_entry));
-  delete commit_entry;
 
   auto iter = state->pending_commits.begin();
   while (iter != state->pending_commits.end()) {
@@ -989,6 +988,7 @@ Status TabletBootstrap::HandleCommitMessage(ReplayState* state, LogEntryPB*
comm
     break;
   }
 
+  delete commit_entry;
   return Status::OK();
 }
 


Mime
View raw message