Repository: kudu
Updated Branches:
refs/heads/master 09fb50224 -> 88e39bad1
tablet: mark delta tracker read-only on error
When DeltaTracker::Flush() fails, it leaves a DeltaMemStore in the redo
store list. This is fine for reads because the readpath expects any
DeltaStore to be in that slot, but this is a problem for updates, which
expect only DeltaFileReaders in the list while the compact_flush_lock_
is held.
Before, we would get around this by CHECKing so Flush() could never
return in such a state. This patch introduces a flag to the
DeltaTracker that indicates whether it has experienced such an error.
In order to ensure type-correctness, this flag must be checked
immediately upon entering the critical section of the
compact_flush_lock_.
Change-Id: Ib950048e9cd0929a10714ab1cc2bd829835afced
Reviewed-on: http://gerrit.cloudera.org:8080/8605
Tested-by: Kudu Jenkins
Reviewed-by: Mike Percy <mpercy@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/22987c73
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/22987c73
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/22987c73
Branch: refs/heads/master
Commit: 22987c739ed131861f156c3014eb4e429a53dd2a
Parents: 09fb502
Author: Andrew Wong <awong@cloudera.com>
Authored: Sat Nov 18 08:57:18 2017 -0800
Committer: Andrew Wong <awong@cloudera.com>
Committed: Thu Nov 23 00:27:52 2017 +0000
----------------------------------------------------------------------
src/kudu/tablet/delta_tracker.cc | 27 ++++++++++++++++++++++-----
src/kudu/tablet/delta_tracker.h | 13 +++++++++++++
src/kudu/tablet/diskrowset.cc | 1 +
3 files changed, 36 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kudu/blob/22987c73/src/kudu/tablet/delta_tracker.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/delta_tracker.cc b/src/kudu/tablet/delta_tracker.cc
index 86cea57..ada9c88 100644
--- a/src/kudu/tablet/delta_tracker.cc
+++ b/src/kudu/tablet/delta_tracker.cc
@@ -37,6 +37,7 @@
#include "kudu/fs/fs_manager.h"
#include "kudu/gutil/basictypes.h"
#include "kudu/gutil/casts.h"
+#include "kudu/gutil/port.h"
#include "kudu/gutil/strings/join.h"
#include "kudu/gutil/strings/substitute.h"
#include "kudu/tablet/delta_applier.h"
@@ -93,6 +94,7 @@ DeltaTracker::DeltaTracker(shared_ptr<RowSetMetadata> rowset_metadata,
: rowset_metadata_(std::move(rowset_metadata)),
num_rows_(num_rows),
open_(false),
+ read_only_(false),
log_anchor_registry_(log_anchor_registry),
mem_trackers_(std::move(mem_trackers)),
dms_empty_(true) {}
@@ -350,11 +352,20 @@ Status DeltaTracker::CommitDeltaStoreMetadataUpdate(const RowSetMetadataUpdate&
return Status::OK();
}
+Status DeltaTracker::CheckWritableUnlocked() const {
+ compact_flush_lock_.AssertAcquired();
+ if (PREDICT_FALSE(read_only_)) {
+ return Status::IllegalState("delta tracker has been marked read-only");
+ }
+ return Status::OK();
+}
+
Status DeltaTracker::CompactStores(int start_idx, int end_idx) {
// Prevent concurrent compactions or a compaction concurrent with a flush
//
// TODO(perf): this could be more fine grained
std::lock_guard<Mutex> l(compact_flush_lock_);
+ RETURN_NOT_OK(CheckWritableUnlocked());
// At the time of writing, minor delta compaction only compacts REDO delta
// files, so we need at least 2 REDO delta stores to proceed.
@@ -459,6 +470,7 @@ Status DeltaTracker::DeleteAncientUndoDeltas(Timestamp ancient_history_mark,
// we need to be the only thread doing a flush or a compaction on this RowSet
// while we do our work.
std::lock_guard<Mutex> l(compact_flush_lock_);
+ RETURN_NOT_OK(CheckWritableUnlocked());
// Get the list of undo deltas.
SharedDeltaStoreVector undos_newest_first;
@@ -679,6 +691,7 @@ Status DeltaTracker::FlushDMS(DeltaMemStore* dms,
Status DeltaTracker::Flush(MetadataFlushType flush_type) {
std::lock_guard<Mutex> l(compact_flush_lock_);
+ RETURN_NOT_OK(CheckWritableUnlocked());
// First, swap out the old DeltaMemStore a new one,
// and add it to the list of delta stores to be reflected
@@ -714,15 +727,19 @@ Status DeltaTracker::Flush(MetadataFlushType flush_type) {
LOG_WITH_PREFIX(INFO) << "Flushing " << count << " deltas from DMS "
<< old_dms->id() << "...";
// Now, actually flush the contents of the old DMS.
- // TODO(awong): failures here leaves a DeltaMemStore permanently in the store
- // list. For now, handle this by CHECKing for success, but we're going to
- // want a more concrete solution if, say, we want to handle arbitrary file
- // errors.
//
// TODO(todd): need another lock to prevent concurrent flushers
// at some point.
shared_ptr<DeltaFileReader> dfr;
- CHECK_OK(FlushDMS(old_dms.get(), &dfr, flush_type));
+ Status s = FlushDMS(old_dms.get(), &dfr, flush_type);
+ if (PREDICT_FALSE(!s.ok())) {
+ // A failure here leaves a DeltaMemStore permanently in the store list.
+ // This isn't allowed, and rolling back the store is difficult, so we leave
+ // the delta tracker in an safe, read-only state.
+ CHECK(s.IsDiskFailure()) << LogPrefix() << s.ToString();
+ read_only_ = true;
+ return s;
+ }
// Now, re-take the lock and swap in the DeltaFileReader in place of
// of the DeltaMemStore
http://git-wip-us.apache.org/repos/asf/kudu/blob/22987c73/src/kudu/tablet/delta_tracker.h
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/delta_tracker.h b/src/kudu/tablet/delta_tracker.h
index bb9bcec..f7388c7 100644
--- a/src/kudu/tablet/delta_tracker.h
+++ b/src/kudu/tablet/delta_tracker.h
@@ -259,6 +259,12 @@ class DeltaTracker {
return &compact_flush_lock_;
}
+ // Returns an error if the delta tracker has been marked read-only.
+ // Else, returns OK.
+ //
+ // 'compact_flush_lock_' must be held when this is called.
+ Status CheckWritableUnlocked() const;
+
// Init() all of the specified delta stores. For tests only.
Status InitAllDeltaStoresForTests(WhichStores stores);
@@ -321,6 +327,13 @@ class DeltaTracker {
bool open_;
+ // Certain errors (e.g. failed delta tracker flushes) may leave the delta
+ // store lists in a state such that it would be unsafe to run further
+ // maintenance ops.
+ //
+ // Must be checked immediately after locking compact_flush_lock_.
+ bool read_only_;
+
log::LogAnchorRegistry* log_anchor_registry_;
TabletMemTrackers mem_trackers_;
http://git-wip-us.apache.org/repos/asf/kudu/blob/22987c73/src/kudu/tablet/diskrowset.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/diskrowset.cc b/src/kudu/tablet/diskrowset.cc
index dfa5801..adabb79 100644
--- a/src/kudu/tablet/diskrowset.cc
+++ b/src/kudu/tablet/diskrowset.cc
@@ -564,6 +564,7 @@ Status DiskRowSet::MajorCompactDeltaStoresWithColumnIds(const vector<ColumnId>&
LOG_WITH_PREFIX(INFO) << "Major compacting REDO delta stores (cols: " << col_ids
<< ")";
TRACE_EVENT0("tablet", "DiskRowSet::MajorCompactDeltaStoresWithColumnIds");
std::lock_guard<Mutex> l(*delta_tracker()->compact_flush_lock());
+ RETURN_NOT_OK(delta_tracker()->CheckWritableUnlocked());
// TODO(todd): do we need to lock schema or anything here?
gscoped_ptr<MajorDeltaCompaction> compaction;
|