kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ale...@apache.org
Subject [1/2] kudu git commit: KUDU-2509 fix use-after-free in case of WAL replay error
Date Tue, 24 Jul 2018 04:51:31 GMT
Repository: kudu
Updated Branches:
  refs/heads/master ec1e47f41 -> 5b09a693d


KUDU-2509 fix use-after-free in case of WAL replay error

Fixed use-after-free mistake in case of a failure to apply a pending
commit message from the WAL while bootstrapping a tablet.

Also, a repro scenario to expose the use-after-free condition is added.
Prior to the fix, the repro scenario would crash with SIGSEGV on Linux
or with SIGBUS on OS X (at least for DEBUG builds).

Change-Id: I11373b1cc34d9e2e0181bee2d3841b49022218ed
Reviewed-on: http://gerrit.cloudera.org:8080/10997
Tested-by: Alexey Serbin <aserbin@cloudera.com>
Reviewed-by: Adar Dembo <adar@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/6b429e8a
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/6b429e8a
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/6b429e8a

Branch: refs/heads/master
Commit: 6b429e8a42ad9fb12a97cc26e33ca19ac2626533
Parents: ec1e47f
Author: Alexey Serbin <aserbin@cloudera.com>
Authored: Thu Jul 19 21:05:54 2018 -0700
Committer: Alexey Serbin <aserbin@cloudera.com>
Committed: Tue Jul 24 04:40:20 2018 +0000

----------------------------------------------------------------------
 src/kudu/tablet/tablet_bootstrap-test.cc | 75 ++++++++++++++++++++++++++-
 src/kudu/tablet/tablet_bootstrap.cc      |  2 +-
 2 files changed, 74 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/6b429e8a/src/kudu/tablet/tablet_bootstrap-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/tablet_bootstrap-test.cc b/src/kudu/tablet/tablet_bootstrap-test.cc
index 0b80c20..f5b2668 100644
--- a/src/kudu/tablet/tablet_bootstrap-test.cc
+++ b/src/kudu/tablet/tablet_bootstrap-test.cc
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "kudu/consensus/log-test-base.h"
+#include "kudu/tablet/tablet_bootstrap.h"
 
 #include <cstddef>
 #include <cstdint>
@@ -35,7 +35,9 @@
 #include "kudu/clock/logical_clock.h"
 #include "kudu/common/common.pb.h"
 #include "kudu/common/iterator.h"
+#include "kudu/common/partial_row.h"
 #include "kudu/common/partition.h"
+#include "kudu/common/row_operations.h"
 #include "kudu/common/schema.h"
 #include "kudu/common/timestamp.h"
 #include "kudu/common/wire_protocol-test-util.h"
@@ -45,6 +47,7 @@
 #include "kudu/consensus/consensus.pb.h"
 #include "kudu/consensus/consensus_meta.h"
 #include "kudu/consensus/consensus_meta_manager.h"
+#include "kudu/consensus/log-test-base.h"
 #include "kudu/consensus/log.h"
 #include "kudu/consensus/log_anchor_registry.h"
 #include "kudu/consensus/log_reader.h"
@@ -67,7 +70,6 @@
 #include "kudu/tablet/tablet-test-util.h"
 #include "kudu/tablet/tablet.h"
 #include "kudu/tablet/tablet.pb.h"
-#include "kudu/tablet/tablet_bootstrap.h"
 #include "kudu/tablet/tablet_metadata.h"
 #include "kudu/tablet/tablet_replica.h"
 #include "kudu/tserver/tserver.pb.h"
@@ -675,5 +677,74 @@ TEST_F(BootstrapTest, TestConsensusOnlyOperationOutOfOrderTimestamp)
{
   ASSERT_EQ(1, results.size());
 }
 
+// Regression test for KUDU-2509. There was a use-after-free bug that sometimes
+// lead to SIGSEGV while replaying the WAL. This scenario would crash or
+// at least UB sanitizer would report a warning if such condition exists.
+TEST_F(BootstrapTest, TestKudu2509) {
+  ASSERT_OK(BuildLog());
+
+  consensus::ReplicateRefPtr replicate = consensus::make_scoped_refptr_replicate(
+      new consensus::ReplicateMsg());
+  replicate->get()->set_op_type(consensus::WRITE_OP);
+  tserver::WriteRequestPB* batch_request = replicate->get()->mutable_write_request();
+  ASSERT_OK(SchemaToPB(schema_, batch_request->mutable_schema()));
+  batch_request->set_tablet_id(log::kTestTablet);
+
+  // This appends Insert(1) with op 10.10
+  const OpId insert_opid = MakeOpId(10, 10);
+  replicate->get()->mutable_id()->CopyFrom(insert_opid);
+  replicate->get()->set_timestamp(clock_->Now().ToUint64());
+  AddTestRowToPB(RowOperationsPB::INSERT, schema_, 10, 1,
+                 "this is a test insert", batch_request->mutable_row_operations());
+  NO_FATALS(AppendReplicateBatch(replicate));
+
+  // This appends Mutate(1) with op 10.11. The operation would try to update
+  // a row having an extra column. This should fail since there hasn't been
+  // corresponding DDL operation committed yet.
+  const OpId mutate_opid = MakeOpId(10, 11);
+  batch_request->mutable_row_operations()->Clear();
+  replicate->get()->mutable_id()->CopyFrom(mutate_opid);
+  replicate->get()->set_timestamp(clock_->Now().ToUint64());
+  {
+    // Modify the existing schema to add an extra row.
+    SchemaBuilder builder(schema_);
+    ASSERT_OK(builder.AddNullableColumn("string_val_extra", STRING));
+    const auto schema = builder.BuildWithoutIds();
+    ASSERT_OK(SchemaToPB(schema, batch_request->mutable_schema()));
+
+    KuduPartialRow row(&schema);
+    ASSERT_OK(row.SetInt32("key", 100));
+    ASSERT_OK(row.SetInt32("int_val", 200));
+    ASSERT_OK(row.SetStringCopy("string_val", "300"));
+    ASSERT_OK(row.SetStringCopy("string_val_extra", "100500"));
+    RowOperationsPBEncoder enc(batch_request->mutable_row_operations());
+    enc.Add(RowOperationsPB::UPDATE, row);
+  }
+  NO_FATALS(AppendReplicateBatch(replicate));
+
+  // Now commit the mutate before the insert (in the log).
+  gscoped_ptr<consensus::CommitMsg> mutate_commit(new consensus::CommitMsg);
+  mutate_commit->set_op_type(consensus::WRITE_OP);
+  mutate_commit->mutable_commited_op_id()->CopyFrom(mutate_opid);
+  mutate_commit->mutable_result()->add_ops()->add_mutated_stores()->set_mrs_id(1);
+  NO_FATALS(AppendCommit(std::move(mutate_commit)));
+
+  gscoped_ptr<consensus::CommitMsg> insert_commit(new consensus::CommitMsg);
+  insert_commit->set_op_type(consensus::WRITE_OP);
+  insert_commit->mutable_commited_op_id()->CopyFrom(insert_opid);
+  insert_commit->mutable_result()->add_ops()->add_mutated_stores()->set_mrs_id(1);
+  NO_FATALS(AppendCommit(std::move(insert_commit)));
+
+  ConsensusBootstrapInfo boot_info;
+  shared_ptr<Tablet> tablet;
+  const auto s = BootstrapTestTablet(-1, -1, &tablet, &boot_info);
+  const auto& status_msg = s.ToString();
+  ASSERT_TRUE(s.IsInvalidArgument()) << status_msg;
+  ASSERT_STR_CONTAINS(status_msg,
+      "Unable to bootstrap test tablet: Failed log replay.");
+  ASSERT_STR_CONTAINS(status_msg,
+      "column string_val_extra[string NULLABLE] not present in tablet");
+}
+
 } // namespace tablet
 } // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/6b429e8a/src/kudu/tablet/tablet_bootstrap.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/tablet_bootstrap.cc b/src/kudu/tablet/tablet_bootstrap.cc
index 58f9f54..fcc472a 100644
--- a/src/kudu/tablet/tablet_bootstrap.cc
+++ b/src/kudu/tablet/tablet_bootstrap.cc
@@ -946,7 +946,6 @@ Status TabletBootstrap::HandleCommitMessage(ReplayState* state, LogEntryPB*
comm
   // ... if it does, we apply it and all the commits that immediately follow in the sequence.
   OpId last_applied = commit_entry->commit().commited_op_id();
   RETURN_NOT_OK(ApplyCommitMessage(state, commit_entry));
-  delete commit_entry;
 
   auto iter = state->pending_commits.begin();
   while (iter != state->pending_commits.end()) {
@@ -960,6 +959,7 @@ Status TabletBootstrap::HandleCommitMessage(ReplayState* state, LogEntryPB*
comm
     break;
   }
 
+  delete commit_entry;
   return Status::OK();
 }
 


Mime
View raw message