couchdb-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From iil...@apache.org
Subject [1/2] couch-replicator commit: updated refs/heads/master to c233bea
Date Mon, 01 Aug 2016 18:04:51 GMT
Repository: couchdb-couch-replicator
Updated Branches:
  refs/heads/master 05247a260 -> c233beaec


Inject random delays in scan_all_dbs

couch_replication_server scans filesystem to find all _replication
databases. For every database found it does

    gen_server:cast(Server, {resume_scan, DbName})

Extract independent process where we do gen_server:cast after a random delay.
This effectively removes stampede and randomizes the order in which we
process _replication databases.

COUCHDB-3088


Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch-replicator/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch-replicator/commit/884cf3e5
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch-replicator/tree/884cf3e5
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch-replicator/diff/884cf3e5

Branch: refs/heads/master
Commit: 884cf3e55f77ab1a5f26dc7202ce21771062eae6
Parents: 05247a2
Author: ILYA Khlopotov <iilyak@ca.ibm.com>
Authored: Fri Jul 29 14:32:02 2016 -0700
Committer: ILYA Khlopotov <iilyak@ca.ibm.com>
Committed: Mon Aug 1 10:50:28 2016 -0700

----------------------------------------------------------------------
 src/couch_replicator_manager.erl | 35 +++++++++++++++++++----------------
 1 file changed, 19 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-couch-replicator/blob/884cf3e5/src/couch_replicator_manager.erl
----------------------------------------------------------------------
diff --git a/src/couch_replicator_manager.erl b/src/couch_replicator_manager.erl
index 0b7eeca..c36526d 100644
--- a/src/couch_replicator_manager.erl
+++ b/src/couch_replicator_manager.erl
@@ -45,8 +45,8 @@
 -define(REP_TO_STATE, couch_rep_id_to_rep_state).
 -define(INITIAL_WAIT, 2.5). % seconds
 -define(MAX_WAIT, 600).     % seconds
--define(AVG_ERROR_DELAY_MSEC, 100).
--define(MAX_ERROR_DELAY_MSEC, 60000).
+-define(AVG_DELAY_MSEC, 100).
+-define(MAX_DELAY_MSEC, 60000).
 -define(OWNER, <<"owner">>).
 -define(REPLICATOR_DB, <<"_replicator">>).
 
@@ -127,7 +127,7 @@ replication_error(#rep{id = {BaseId, _} = RepId}, Error) ->
     nil ->
         ok;
     #rep_state{rep = #rep{db_name = DbName, doc_id = DocId}} ->
-        ok = add_error_jitter(),
+        ok = timer:sleep(jitter(ets:info(?REP_TO_STATE, size))),
         update_rep_doc(DbName, DocId, [
             {<<"_replication_state">>, <<"error">>},
             {<<"_replication_state_reason">>, to_binary(error_reason(Error))},
@@ -135,15 +135,6 @@ replication_error(#rep{id = {BaseId, _} = RepId}, Error) ->
         ok = gen_server:call(?MODULE, {rep_error, RepId, Error}, infinity)
     end.
 
-% Add random delay proportional to the number of replications
-% on current node, in order to prevent a stampede when a source
-% with multiple replication targets fails
-add_error_jitter() ->
-    RepCount = ets:info(?REP_TO_STATE, size),
-    Range = min(2 * RepCount * ?AVG_ERROR_DELAY_MSEC, ?MAX_ERROR_DELAY_MSEC),
-    timer:sleep(random:uniform(Range)).
-
-
 continue(#rep{doc_id = null}) ->
     {true, no_owner};
 continue(#rep{id = RepId}) ->
@@ -939,7 +930,7 @@ scan_all_dbs(Server) when is_pid(Server) ->
     Root = config:get("couchdb", "database_dir", "."),
     NormRoot = couch_util:normpath(Root),
     filelib:fold_files(Root, "_replicator(\\.[0-9]{10,})?.couch$", true,
-        fun(Filename, _) ->
+        fun(Filename, Acc) ->
 	    % shamelessly stolen from couch_server.erl
             NormFilename = couch_util:normpath(Filename),
             case NormFilename -- NormRoot of
@@ -947,9 +938,21 @@ scan_all_dbs(Server) when is_pid(Server) ->
                 RelativeFilename -> ok
             end,
             DbName = ?l2b(filename:rootname(RelativeFilename, ".couch")),
-	    gen_server:cast(Server, {resume_scan, DbName}),
-	    ok
-	end, ok).
+            Jitter = jitter(Acc),
+            spawn_link(fun() ->
+                timer:sleep(Jitter),
+                gen_server:cast(Server, {resume_scan, DbName})
+            end),
+	    Acc + 1
+	end, 1).
+
+% calculate random delay proportional to the number of replications
+% on current node, in order to prevent a stampede:
+%   - when a source with multiple replication targets fails
+%   - when we restart couch_replication_manager
+jitter(N) ->
+    Range = min(2 * N * ?AVG_DELAY_MSEC, ?MAX_DELAY_MSEC),
+    random:uniform(Range).
 
 is_replicator_db(DbName) ->
     ?REPLICATOR_DB =:= couch_db:dbname_suffix(DbName).


Mime
View raw message