couchdb-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kocol...@apache.org
Subject couchdb commit: updated refs/heads/1.6.x to 9f3e93d
Date Sat, 18 Jul 2015 11:38:36 GMT
Repository: couchdb
Updated Branches:
  refs/heads/1.6.x fb696d71b -> 9f3e93da9


Ensure doc groups are sorted before merging them

We had been implicily assuming that clients send us sorted groups, but
unsurprisingly that's not always the case. The additional sorting here
should be redundant, but the consequences of merging unsorted groups are
severe -- we can end up with uniqueness violations on the primary key in
the database -- and so we add an additional sort here.

COUCHDB-2735


Project: http://git-wip-us.apache.org/repos/asf/couchdb/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb/commit/9f3e93da
Tree: http://git-wip-us.apache.org/repos/asf/couchdb/tree/9f3e93da
Diff: http://git-wip-us.apache.org/repos/asf/couchdb/diff/9f3e93da

Branch: refs/heads/1.6.x
Commit: 9f3e93da931db4069ca84a6b94c2b60c1b4c8440
Parents: fb696d7
Author: Adam Kocoloski <adam@cloudant.com>
Authored: Fri Jul 17 19:20:36 2015 -0400
Committer: Adam Kocoloski <adam@cloudant.com>
Committed: Sat Jul 18 07:38:19 2015 -0400

----------------------------------------------------------------------
 src/couchdb/couch_db_updater.erl | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb/blob/9f3e93da/src/couchdb/couch_db_updater.erl
----------------------------------------------------------------------
diff --git a/src/couchdb/couch_db_updater.erl b/src/couchdb/couch_db_updater.erl
index 947669c..c92097f 100644
--- a/src/couchdb/couch_db_updater.erl
+++ b/src/couchdb/couch_db_updater.erl
@@ -222,7 +222,7 @@ handle_cast(Msg, #db{name = Name} = Db) ->
 
 handle_info({update_docs, Client, GroupedDocs, NonRepDocs, MergeConflicts,
         FullCommit}, Db) ->
-    GroupedDocs2 = [[{Client, D} || D <- DocGroup] || DocGroup <- GroupedDocs],
+    GroupedDocs2 = sort_and_tag_groups(Client, GroupedDocs),
     if NonRepDocs == [] ->
         {GroupedDocs3, Clients, FullCommit2} = collect_updates(GroupedDocs2,
                 [Client], MergeConflicts, FullCommit);
@@ -291,8 +291,7 @@ collect_updates(GroupedDocsAcc, ClientsAcc, MergeConflicts, FullCommit)
->
         % updaters than deal with their possible conflicts, and local docs
         % writes are relatively rare. Can be optmized later if really needed.
         {update_docs, Client, GroupedDocs, [], MergeConflicts, FullCommit2} ->
-            GroupedDocs2 = [[{Client, Doc} || Doc <- DocGroup]
-                    || DocGroup <- GroupedDocs],
+            GroupedDocs2 = sort_and_tag_groups(Client, GroupedDocs),
             GroupedDocsAcc2 =
                 merge_updates(GroupedDocsAcc, GroupedDocs2, []),
             collect_updates(GroupedDocsAcc2, [Client | ClientsAcc],
@@ -302,6 +301,15 @@ collect_updates(GroupedDocsAcc, ClientsAcc, MergeConflicts, FullCommit)
->
     end.
 
 
+sort_and_tag_groups(Client, GroupedDocs) ->
+    % These groups should already be sorted but sometimes clients misbehave.
+    % The merge_updates function will fail and the database can end up with
+    % duplicate documents if the incoming groups are not sorted, so as a sanity
+    % check we sort them again here. See COUCHDB-2735.
+    Cmp = fun([{#doc{id=A}, _}|_], [{#doc{id=B}, _}|_]) -> A < B end,
+    SortedGroups = lists:sort(Cmp, GroupedDocs),
+    [[{Client, D} || D <- DocGroup] || DocGroup <- SortedGroups].
+
 btree_by_seq_split(#doc_info{id=Id, high_seq=KeySeq, revs=Revs}) ->
     {RevInfos, DeletedRevInfos} = lists:foldl(
         fun(#rev_info{deleted = false, seq = Seq} = Ri, {Acc, AccDel}) ->


Mime
View raw message