maintenance_manager-test: fix flaky test
TestCompletedOps failed relatively often because of the following racy
interleaving:
- Multiple MM threads called Prepare() while remaining_runs_ was positive
- These threads then called Perform(), which decremented remaining_runs_
once per thread. This resulted in the op being called more times than
'remaining_runs_', causing an assertion failure.
The fix is to separately count the number of 'prepared' ops.
Change-Id: I003e72339f69228195130c89572a20be3009f22c
Reviewed-on: http://gerrit.cloudera.org:8080/4634
Reviewed-by: Jean-Daniel Cryans <jdcryans@apache.org>
Tested-by: Kudu Jenkins
Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/07ce9fbc
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/07ce9fbc
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/07ce9fbc
Branch: refs/heads/master
Commit: 07ce9fbce4fcdcfafe01db0347b69c0283df5e2c
Parents: 4db8851
Author: Todd Lipcon <todd@apache.org>
Authored: Wed Oct 5 14:37:50 2016 -0700
Committer: Todd Lipcon <todd@apache.org>
Committed: Wed Oct 5 22:26:33 2016 +0000
----------------------------------------------------------------------
src/kudu/util/maintenance_manager-test.cc | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kudu/blob/07ce9fbc/src/kudu/util/maintenance_manager-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/maintenance_manager-test.cc b/src/kudu/util/maintenance_manager-test.cc
index 2448766..cb124af 100644
--- a/src/kudu/util/maintenance_manager-test.cc
+++ b/src/kudu/util/maintenance_manager-test.cc
@@ -86,6 +86,7 @@ class TestMaintenanceOp : public MaintenanceOp {
maintenance_op_duration_(METRIC_maintenance_op_duration.Instantiate(metric_entity_)),
maintenance_ops_running_(METRIC_maintenance_ops_running.Instantiate(metric_entity_,
0)),
remaining_runs_(1),
+ prepared_runs_(0),
sleep_time_(MonoDelta::FromSeconds(0)) {
}
@@ -96,6 +97,8 @@ class TestMaintenanceOp : public MaintenanceOp {
if (remaining_runs_ == 0) {
return false;
}
+ remaining_runs_--;
+ prepared_runs_++;
DLOG(INFO) << "Prepared op " << name();
return true;
}
@@ -104,8 +107,11 @@ class TestMaintenanceOp : public MaintenanceOp {
{
std::lock_guard<Mutex> guard(lock_);
DLOG(INFO) << "Performing op " << name();
- CHECK_GE(remaining_runs_, 1);
- remaining_runs_--;
+
+ // Ensure that we don't call Perform() more times than we returned
+ // success from Prepare().
+ CHECK_GE(prepared_runs_, 1);
+ prepared_runs_--;
}
SleepFor(sleep_time_);
@@ -166,6 +172,8 @@ class TestMaintenanceOp : public MaintenanceOp {
// The number of remaining times this operation will run before disabling
// itself.
int remaining_runs_;
+ // The number of Prepared() operations which have not yet been Perform()ed.
+ int prepared_runs_;
// The amount of time each op invocation will sleep.
MonoDelta sleep_time_;
|