couchdb-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From vatam...@apache.org
Subject [1/3] couch-replicator commit: updated refs/heads/master to 93c4cea
Date Tue, 04 Oct 2016 14:29:51 GMT
Repository: couchdb-couch-replicator
Updated Branches:
  refs/heads/master b9232c8d4 -> 93c4ceaf9


Fix replicator handling of max_document_size when posting to _bulk_docs

Currently `max_document_size` setting is a misnomer, it actually configures
maximum request body size. For single document requests it is a good enough
approximation. However, _bulk_docs updates could fail the total request size
check even if individual documents stay below the maximum limit.

Before this fix during replication, `_bulk_docs` reqeust would crash, which
eventually leads to an infinite cycles of crashes and restarts (with a
potential large state being dumped to logs), without replicaton job making
progress.

The is to do binary split on the batch size until either all documents will
fit under max_document_size limit, or some documents will fail to replicate.

If documents fail to replicate, they bump the `doc_write_failures` count.
Effectively `max_document_size` acts as in implicit replication filter in this
case.

Jira: COUCHDB-3168


Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch-replicator/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch-replicator/commit/2f23b57c
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch-replicator/tree/2f23b57c
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch-replicator/diff/2f23b57c

Branch: refs/heads/master
Commit: 2f23b57cd705c87570d98340a4aad1bc611cd4f0
Parents: b9232c8
Author: Nick Vatamaniuc <vatamane@apache.org>
Authored: Mon Oct 3 15:30:23 2016 -0400
Committer: Nick Vatamaniuc <vatamane@apache.org>
Committed: Mon Oct 3 16:10:04 2016 -0400

----------------------------------------------------------------------
 src/couch_replicator_api_wrap.erl |  2 ++
 src/couch_replicator_worker.erl   | 30 +++++++++++++++++++++++-------
 2 files changed, 25 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-couch-replicator/blob/2f23b57c/src/couch_replicator_api_wrap.erl
----------------------------------------------------------------------
diff --git a/src/couch_replicator_api_wrap.erl b/src/couch_replicator_api_wrap.erl
index ff6b00c..9043314 100644
--- a/src/couch_replicator_api_wrap.erl
+++ b/src/couch_replicator_api_wrap.erl
@@ -448,6 +448,8 @@ update_docs(#httpdb{} = HttpDb, DocList, Options, UpdateType) ->
             {body, {BodyFun, [prefix | Docs]}}, {headers, Headers}],
         fun(201, _, Results) when is_list(Results) ->
                 {ok, bulk_results_to_errors(DocList, Results, remote)};
+           (413, _, _) ->
+                {error, request_body_too_large};
            (417, _, Results) when is_list(Results) ->
                 {ok, bulk_results_to_errors(DocList, Results, remote)}
         end);

http://git-wip-us.apache.org/repos/asf/couchdb-couch-replicator/blob/2f23b57c/src/couch_replicator_worker.erl
----------------------------------------------------------------------
diff --git a/src/couch_replicator_worker.erl b/src/couch_replicator_worker.erl
index 155e11d..ee0c455 100644
--- a/src/couch_replicator_worker.erl
+++ b/src/couch_replicator_worker.erl
@@ -447,23 +447,39 @@ batch_doc(#doc{atts = Atts}) ->
 
 flush_docs(_Target, []) ->
     couch_replicator_stats:new();
-
 flush_docs(Target, DocList) ->
-    {ok, Errors} = couch_replicator_api_wrap:update_docs(
-        Target, DocList, [delay_commit], replicated_changes),
+    FlushResult = couch_replicator_api_wrap:update_docs(Target, DocList,
+        [delay_commit], replicated_changes),
+    handle_flush_docs_result(FlushResult, Target, DocList).
+
+
+handle_flush_docs_result({error, request_body_too_large}, _Target, [Doc]) ->
+    couch_log:error("Replicator: failed to write doc ~p. Too large", [Doc]),
+    couch_replicator_stats:new([{doc_write_failures, 1}]);
+handle_flush_docs_result({error, request_body_too_large}, Target, DocList) ->
+    Len = length(DocList),
+    {DocList1, DocList2} = lists:split(Len div 2, DocList),
+    couch_log:notice("Replicator: couldn't write batch of size ~p to ~p because"
+        " request body is too large. Splitting batch into 2 separate batches of"
+        " sizes ~p and ~p", [Len, couch_replicator_api_wrap:db_uri(Target),
+        length(DocList1), length(DocList2)]),
+    flush_docs(Target, DocList1),
+    flush_docs(Target, DocList2);
+handle_flush_docs_result({ok, Errors}, Target, DocList) ->
     DbUri = couch_replicator_api_wrap:db_uri(Target),
     lists:foreach(
         fun({Props}) ->
-            couch_log:error("Replicator: couldn't write document `~s`, revision `~s`,"
-                " to target database `~s`. Error: `~s`, reason: `~s`.",
-                [get_value(id, Props, ""), get_value(rev, Props, ""), DbUri,
-                    get_value(error, Props, ""), get_value(reason, Props, "")])
+            couch_log:error("Replicator: couldn't write document `~s`, revision"
+                " `~s`, to target database `~s`. Error: `~s`, reason: `~s`.", [
+                get_value(id, Props, ""), get_value(rev, Props, ""), DbUri,
+                get_value(error, Props, ""), get_value(reason, Props, "")])
         end, Errors),
     couch_replicator_stats:new([
         {docs_written, length(DocList) - length(Errors)},
         {doc_write_failures, length(Errors)}
     ]).
 
+
 flush_doc(Target, #doc{id = Id, revs = {Pos, [RevId | _]}} = Doc) ->
     try couch_replicator_api_wrap:update_doc(Target, Doc, [], replicated_changes) of
     {ok, _} ->


Mime
View raw message