couchdb-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From vatam...@apache.org
Subject [1/2] fabric commit: updated refs/heads/master to ec22351
Date Fri, 20 Jan 2017 15:56:39 GMT
Repository: couchdb-fabric
Updated Branches:
  refs/heads/master 998cf2d66 -> ec2235196


In open_revs, do not count errors in quorum threshold calculation

Previously quorum check looked just at the number of replies and decided quorum
was met even if it only received errors. For example, if 2 nodes are in
maintance mode it might receive this sequence of replies:

  `rexi_EXIT, rexi_EXIT, ok`

In that case after the first two it would decide quorum (r=2) was met, return
what it had so far ([]) and kill the remaining worker, who was about to return
a valid revision.

The fix is to keep track of error replies and subtract them when deciding if
quorum was met.

COUCHDB-3271


Project: http://git-wip-us.apache.org/repos/asf/couchdb-fabric/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-fabric/commit/ec223519
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-fabric/tree/ec223519
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-fabric/diff/ec223519

Branch: refs/heads/master
Commit: ec2235196d7195afab59cedc2d61a02b11596ab4
Parents: 6fa8d3c
Author: Nick Vatamaniuc <vatamane@apache.org>
Authored: Thu Jan 12 13:50:20 2017 -0500
Committer: Nick Vatamaniuc <vatamane@apache.org>
Committed: Fri Jan 20 10:56:14 2017 -0500

----------------------------------------------------------------------
 src/fabric_doc_open_revs.erl | 30 ++++++++++++++++++++++++++----
 1 file changed, 26 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-fabric/blob/ec223519/src/fabric_doc_open_revs.erl
----------------------------------------------------------------------
diff --git a/src/fabric_doc_open_revs.erl b/src/fabric_doc_open_revs.erl
index 573cda7..5393f92 100644
--- a/src/fabric_doc_open_revs.erl
+++ b/src/fabric_doc_open_revs.erl
@@ -24,6 +24,7 @@
     worker_count,
     workers,
     reply_count = 0,
+    reply_error_count = 0,
     r,
     revs,
     latest,
@@ -60,13 +61,15 @@ go(DbName, Id, Revs, Options) ->
 
 handle_message({rexi_DOWN, _, {_,NodeRef},_}, _Worker, #state{workers=Workers}=State) ->
     NewState = State#state{
-        workers = lists:keydelete(NodeRef, #shard.node, Workers)
+        workers = lists:keydelete(NodeRef, #shard.node, Workers),
+        reply_error_count = State#state.reply_error_count + 1
     },
     handle_message({ok, []}, nil, NewState);
 
 handle_message({rexi_EXIT, _}, Worker, #state{workers=Workers}=State) ->
     NewState = State#state{
-        workers = lists:delete(Worker, Workers)
+        workers = lists:delete(Worker, Workers),
+        reply_error_count = State#state.reply_error_count + 1
     },
     handle_message({ok, []}, nil, NewState);
 
@@ -80,18 +83,22 @@ handle_message({ok, RawReplies}, Worker, State) ->
         r = R,
         revs = Revs,
         latest = Latest,
-        repair = InRepair
+        repair = InRepair,
+        reply_error_count = ReplyErrorCount
     } = State,
 
     IsTree = Revs == all orelse Latest,
 
+    % Do not count error replies when checking quorum
+    QuorumReplies = ReplyCount + 1 - ReplyErrorCount >= R,
+
     {NewReplies, QuorumMet, Repair} = case IsTree of
         true ->
             {NewReplies0, AllInternal, Repair0} =
                     tree_replies(PrevReplies, tree_sort(RawReplies)),
             NumLeafs = couch_key_tree:count_leafs(PrevReplies),
             SameNumRevs = length(RawReplies) == NumLeafs,
-            QMet = AllInternal andalso SameNumRevs andalso ReplyCount + 1 >= R,
+            QMet = AllInternal andalso SameNumRevs andalso QuorumReplies,
             {NewReplies0, QMet, Repair0};
         false ->
             {NewReplies0, MinCount} = dict_replies(PrevReplies, RawReplies),
@@ -306,6 +313,7 @@ open_doc_revs_test_() ->
             check_ancestor_counted_in_quorum(),
             check_not_found_counts_for_descendant(),
             check_worker_error_skipped(),
+            check_quorum_only_counts_valid_responses(),
             check_not_found_replies_are_removed_when_doc_found(),
             check_not_found_returned_when_one_of_docs_not_found(),
             check_not_found_returned_when_doc_not_found()
@@ -469,6 +477,20 @@ check_worker_error_skipped() ->
     end).
 
 
+check_quorum_only_counts_valid_responses() ->
+    ?_test(begin
+        S0 = state0(revs(), true),
+        Msg1 = {rexi_EXIT, reason},
+        Msg2 = {rexi_EXIT, reason},
+        Msg3 = {ok, [foo1(), bar1(), baz1()]},
+        Expect = {stop, [bar1(), baz1(), foo1()]},
+
+        {ok, S1} = handle_message(Msg1, w1, S0),
+        {ok, S2} = handle_message(Msg2, w2, S1),
+        ?assertEqual(Expect, handle_message(Msg3, w3, S2))
+    end).
+
+
 check_not_found_replies_are_removed_when_doc_found() ->
     ?_test(begin
         Replies = replies_to_dict([foo1(), bar1(), fooNF()]),


Mime
View raw message