couchdb-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rnew...@apache.org
Subject couch-replicator commit: updated refs/heads/master to 13326bb
Date Fri, 06 May 2016 16:28:11 GMT
Repository: couchdb-couch-replicator
Updated Branches:
  refs/heads/master ab0afce2d -> 13326bb4b


Add jittered delay during replication error handling

For one-to-many replications, when source fails, it can create a stampede
effect. A jittered delay is used to avoid that. Delay is random, in a range
proportional to current number of replications, with a maximum of 1 minute.

Seed random number generator within each replication process with a
non-deterministic value, otherwise the same sequence of delays is generated
for all replications.

Jira: COUCHDB-3006


Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch-replicator/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch-replicator/commit/13326bb4
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch-replicator/tree/13326bb4
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch-replicator/diff/13326bb4

Branch: refs/heads/master
Commit: 13326bb4b265c09c37f8088c6432f80f495ef0cd
Parents: ab0afce
Author: Nick Vatamaniuc <vatamane@gmail.com>
Authored: Wed Apr 27 15:21:14 2016 -0400
Committer: Nick Vatamaniuc <vatamane@gmail.com>
Committed: Fri May 6 10:20:41 2016 -0400

----------------------------------------------------------------------
 src/couch_replicator.erl         |  2 ++
 src/couch_replicator_manager.erl | 12 ++++++++++++
 2 files changed, 14 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-couch-replicator/blob/13326bb4/src/couch_replicator.erl
----------------------------------------------------------------------
diff --git a/src/couch_replicator.erl b/src/couch_replicator.erl
index 4e25e14..7f0c7ee 100644
--- a/src/couch_replicator.erl
+++ b/src/couch_replicator.erl
@@ -256,6 +256,8 @@ init(InitArgs) ->
 do_init(#rep{options = Options, id = {BaseId, Ext}, user_ctx=UserCtx} = Rep) ->
     process_flag(trap_exit, true),
 
+    random:seed(os:timestamp()),
+
     #rep_state{
         source = Source,
         target = Target,

http://git-wip-us.apache.org/repos/asf/couchdb-couch-replicator/blob/13326bb4/src/couch_replicator_manager.erl
----------------------------------------------------------------------
diff --git a/src/couch_replicator_manager.erl b/src/couch_replicator_manager.erl
index 0811796..342dffb 100644
--- a/src/couch_replicator_manager.erl
+++ b/src/couch_replicator_manager.erl
@@ -45,6 +45,8 @@
 -define(REP_TO_STATE, couch_rep_id_to_rep_state).
 -define(INITIAL_WAIT, 2.5). % seconds
 -define(MAX_WAIT, 600).     % seconds
+-define(AVG_ERROR_DELAY_MSEC, 100).
+-define(MAX_ERROR_DELAY_MSEC, 60000).
 -define(OWNER, <<"owner">>).
 
 -define(DB_TO_SEQ, db_to_seq).
@@ -124,6 +126,7 @@ replication_error(#rep{id = {BaseId, _} = RepId}, Error) ->
     nil ->
         ok;
     #rep_state{rep = #rep{db_name = DbName, doc_id = DocId}} ->
+        ok = add_error_jitter(),
         update_rep_doc(DbName, DocId, [
             {<<"_replication_state">>, <<"error">>},
             {<<"_replication_state_reason">>, to_binary(error_reason(Error))},
@@ -131,6 +134,15 @@ replication_error(#rep{id = {BaseId, _} = RepId}, Error) ->
         ok = gen_server:call(?MODULE, {rep_error, RepId, Error}, infinity)
     end.
 
+% Add random delay proportional to the number of replications
+% on current node, in order to prevent a stampede when a source
+% with multiple replication targets fails
+add_error_jitter() ->
+    RepCount = ets:info(?REP_TO_STATE, size),
+    Range = min(2 * RepCount * ?AVG_ERROR_DELAY_MSEC, ?MAX_ERROR_DELAY_MSEC),
+    timer:sleep(random:uniform(Range)).
+
+
 continue(#rep{doc_id = null}) ->
     {true, no_owner};
 continue(#rep{id = RepId}) ->


Mime
View raw message