couchdb-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kocol...@apache.org
Subject couch commit: updated refs/heads/2735-duplicate-docs to 80503e2
Date Sat, 18 Jul 2015 15:44:38 GMT
Repository: couchdb-couch
Updated Branches:
  refs/heads/2735-duplicate-docs cb4f1aa07 -> 80503e255 (forced update)


Ensure doc groups are sorted before merging them

We had been implicitly assuming that clients send us sorted groups, but
unsurprisingly that's not always the case. The additional sorting here
should be redundant, but the consequences of merging unsorted groups are
severe -- we can end up with uniqueness violations on the primary key in
the database -- and so we add an additional sort here.

COUCHDB-2735


Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch/commit/80503e25
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch/tree/80503e25
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch/diff/80503e25

Branch: refs/heads/2735-duplicate-docs
Commit: 80503e255e4ffad8a3c055f4332383f34c8dab2a
Parents: 331f811
Author: Adam Kocoloski <adam@cloudant.com>
Authored: Sat Jul 18 07:49:00 2015 -0400
Committer: Adam Kocoloski <adam@cloudant.com>
Committed: Sat Jul 18 11:44:22 2015 -0400

----------------------------------------------------------------------
 src/couch_db_updater.erl | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/80503e25/src/couch_db_updater.erl
----------------------------------------------------------------------
diff --git a/src/couch_db_updater.erl b/src/couch_db_updater.erl
index bd42b35..92139a9 100644
--- a/src/couch_db_updater.erl
+++ b/src/couch_db_updater.erl
@@ -276,7 +276,7 @@ handle_cast(Msg, #db{name = Name} = Db) ->
 
 handle_info({update_docs, Client, GroupedDocs, NonRepDocs, MergeConflicts,
         FullCommit}, Db) ->
-    GroupedDocs2 = maybe_tag_grouped_docs(Client, GroupedDocs),
+    GroupedDocs2 = sort_and_tag_grouped_docs(Client, GroupedDocs),
     if NonRepDocs == [] ->
         {GroupedDocs3, Clients, FullCommit2} = collect_updates(GroupedDocs2,
                 [Client], MergeConflicts, FullCommit);
@@ -338,10 +338,15 @@ handle_info({'DOWN', Ref, _, _, Reason}, #db{fd_monitor=Ref, name=Name}
= Db) ->
 code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
 
-maybe_tag_grouped_docs(Client, GroupedDocs) ->
+sort_and_tag_grouped_docs(Client, GroupedDocs) ->
+    % These groups should already be sorted but sometimes clients misbehave.
+    % The merge_updates function will fail and the database can end up with
+    % duplicate documents if the incoming groups are not sorted, so as a sanity
+    % check we sort them again here. See COUCHDB-2735.
+    Cmp = fun([#doc{id=A}|_], [#doc{id=B}|_]) -> A < B end,
     lists:map(fun(DocGroup) ->
         [{Client, maybe_tag_doc(D)} || D <- DocGroup]
-    end, GroupedDocs).
+    end, lists:sort(Cmp, GroupedDocs)).
 
 maybe_tag_doc(#doc{id=Id, revs={Pos,[_Rev|PrevRevs]}, meta=Meta0}=Doc) ->
     case lists:keymember(ref, 1, Meta0) of
@@ -370,7 +375,7 @@ collect_updates(GroupedDocsAcc, ClientsAcc, MergeConflicts, FullCommit)
->
         % updaters than deal with their possible conflicts, and local docs
         % writes are relatively rare. Can be optmized later if really needed.
         {update_docs, Client, GroupedDocs, [], MergeConflicts, FullCommit2} ->
-            GroupedDocs2 = maybe_tag_grouped_docs(Client, GroupedDocs),
+            GroupedDocs2 = sort_and_tag_grouped_docs(Client, GroupedDocs),
             GroupedDocsAcc2 =
                 merge_updates(GroupedDocsAcc, GroupedDocs2),
             collect_updates(GroupedDocsAcc2, [Client | ClientsAcc],


Mime
View raw message