couchdb-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tonysu...@apache.org
Subject couch-replicator commit: updated refs/heads/3010-handle-429 to c6e891d
Date Fri, 27 May 2016 19:56:12 GMT
Repository: couchdb-couch-replicator
Updated Branches:
  refs/heads/3010-handle-429 [created] c6e891d26


Add exponential backoff for 429 errors.

When we encounter a 429, we retry with a different set of retries and
timeout. This will theoretically reduce client replication overload.
When 429s have stopped, it's possible that a 500 error could occur.
Then the retry mechanism should go back to the original way for
backwards compatibility.

COUCHDB-3010


Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch-replicator/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch-replicator/commit/c6e891d2
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch-replicator/tree/c6e891d2
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch-replicator/diff/c6e891d2

Branch: refs/heads/3010-handle-429
Commit: c6e891d26879bdaef9408196f436769e69e5e58f
Parents: 8697441
Author: Tony Sun <tony.sun@cloudant.com>
Authored: Fri May 27 12:47:08 2016 -0700
Committer: Tony Sun <tony.sun@cloudant.com>
Committed: Fri May 27 12:47:08 2016 -0700

----------------------------------------------------------------------
 src/couch_replicator_httpc.erl | 40 ++++++++++++++++++++++++++++++++-----
 1 file changed, 35 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-couch-replicator/blob/c6e891d2/src/couch_replicator_httpc.erl
----------------------------------------------------------------------
diff --git a/src/couch_replicator_httpc.erl b/src/couch_replicator_httpc.erl
index 668b218..601de06 100644
--- a/src/couch_replicator_httpc.erl
+++ b/src/couch_replicator_httpc.erl
@@ -27,6 +27,7 @@
 
 -define(replace(L, K, V), lists:keystore(K, 1, L, {K, V})).
 -define(MAX_WAIT, 5 * 60 * 1000).
+-define(MAX_BACKOFF_WAIT, 250 * 32768).
 -define(STREAM_STATUS, ibrowse_stream_status).
 
 
@@ -138,6 +139,8 @@ process_response({ibrowse_req_id, ReqId}, Worker, HttpDb, Params, Callback)
->
 
 process_response({ok, Code, Headers, Body}, Worker, HttpDb, Params, Callback) ->
     case list_to_integer(Code) of
+    C when C =:= 429 ->
+        maybe_retry(back_off, Worker, HttpDb, Params);
     Ok when (Ok >= 200 andalso Ok < 300) ; (Ok >= 400 andalso Ok < 500) ->
         couch_stats:increment_counter([couch_replicator, responses, success]),
         EJson = case Body of
@@ -162,6 +165,9 @@ process_stream_response(ReqId, Worker, HttpDb, Params, Callback) ->
     receive
     {ibrowse_async_headers, ReqId, Code, Headers} ->
         case list_to_integer(Code) of
+        C when C =:= 429 ->
+            maybe_retry(back_off, Worker,
+                HttpDb#httpdb{timeout = ?MAX_BACKOFF_WAIT}, Params);
         Ok when (Ok >= 200 andalso Ok < 300) ; (Ok >= 400 andalso Ok < 500) ->
             StreamDataFun = fun() ->
                 stream_data_self(HttpDb, Params, Worker, ReqId, Callback)
@@ -251,18 +257,42 @@ clean_mailbox(_, Count) when Count > 0 ->
 maybe_retry(Error, Worker, #httpdb{retries = 0} = HttpDb, Params) ->
     report_error(Worker, HttpDb, Params, {error, Error});
 
+%% For 429 errors, we perform an exponential backoff up to 250 * 2^15
+%% times, or roughly 2.17 hours. Since the #httpd.retries is initialized
+%% to 10 and we need 15, we use the Wait time as a timeout/failure end.
+maybe_retry(backoff, Worker, #httpdb{wait = Wait} = HttpDb, Params) ->
+    ok = timer:sleep(random:uniform(Wait)),
+    Wait2 = Wait*2,
+    case Wait2 of
+        W0 when W0 >= 512000 -> % Past 8 min, we log retries
+            log_retry_error(Params, HttpDb, Wait, "429 Retry");
+        W1 when W1 > ?MAX_BACKOFF_WAIT ->
+            report_error(Worker, HttpDb, Params, {error,
+                "429 Retry Timeout"});
+        _ ->
+            NewWait = erlang:min(Wait2, ?MAX_BACKOFF_WAIT),
+            NewHttpDb = HttpDb#httpdb{wait = NewWait},
+            throw({retry, NewHttpDb, Params})
+    end;
+
 maybe_retry(Error, _Worker, #httpdb{retries = Retries, wait = Wait} = HttpDb,
     Params) ->
-    Method = string:to_upper(atom_to_list(get_value(method, Params, get))),
-    Url = couch_util:url_strip_password(full_url(HttpDb, Params)),
-    couch_log:notice("Retrying ~s request to ~s in ~p seconds due to error ~s",
-        [Method, Url, Wait / 1000, error_cause(Error)]),
-    ok = timer:sleep(Wait),
+    log_retry_error(Params, HttpDb, Wait, Error),
+    % This is so that a long backoff time is not used to ensure
+    % backwards compatibility.
+    ok = timer:sleep(erlang:min(Wait, ?MAX_WAIT)),
     Wait2 = erlang:min(Wait * 2, ?MAX_WAIT),
     NewHttpDb = HttpDb#httpdb{retries = Retries - 1, wait = Wait2},
     throw({retry, NewHttpDb, Params}).
 
 
+log_retry_error(Params, HttpDb, Wait, Error) ->
+    Method = string:to_upper(atom_to_list(get_value(method, Params, get))),
+    Url = couch_util:url_strip_password(full_url(HttpDb, Params)),
+    couch_log:notice("Retrying ~s request to ~s in ~p seconds due to error ~s",
+        [Method, Url, Wait / 1000, error_cause(Error)]).
+
+
 report_error(_Worker, HttpDb, Params, Error) ->
     Method = string:to_upper(atom_to_list(get_value(method, Params, get))),
     Url = couch_util:url_strip_password(full_url(HttpDb, Params)),


Mime
View raw message