Return-Path: X-Original-To: apmail-couchdb-commits-archive@www.apache.org Delivered-To: apmail-couchdb-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 4EF1518D58 for ; Sat, 18 Jul 2015 15:44:51 +0000 (UTC) Received: (qmail 92165 invoked by uid 500); 18 Jul 2015 15:44:38 -0000 Delivered-To: apmail-couchdb-commits-archive@couchdb.apache.org Received: (qmail 92116 invoked by uid 500); 18 Jul 2015 15:44:38 -0000 Mailing-List: contact commits-help@couchdb.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@couchdb.apache.org Delivered-To: mailing list commits@couchdb.apache.org Received: (qmail 92107 invoked by uid 99); 18 Jul 2015 15:44:38 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 18 Jul 2015 15:44:38 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 64F84E0F79; Sat, 18 Jul 2015 15:44:38 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: kocolosk@apache.org To: commits@couchdb.apache.org Message-Id: X-Mailer: ASF-Git Admin Mailer Subject: couch commit: updated refs/heads/2735-duplicate-docs to 80503e2 Date: Sat, 18 Jul 2015 15:44:38 +0000 (UTC) Repository: couchdb-couch Updated Branches: refs/heads/2735-duplicate-docs cb4f1aa07 -> 80503e255 (forced update) Ensure doc groups are sorted before merging them We had been implicitly assuming that clients send us sorted groups, but unsurprisingly that's not always the case. The additional sorting here should be redundant, but the consequences of merging unsorted groups are severe -- we can end up with uniqueness violations on the primary key in the database -- and so we add an additional sort here. COUCHDB-2735 Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch/repo Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch/commit/80503e25 Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch/tree/80503e25 Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch/diff/80503e25 Branch: refs/heads/2735-duplicate-docs Commit: 80503e255e4ffad8a3c055f4332383f34c8dab2a Parents: 331f811 Author: Adam Kocoloski Authored: Sat Jul 18 07:49:00 2015 -0400 Committer: Adam Kocoloski Committed: Sat Jul 18 11:44:22 2015 -0400 ---------------------------------------------------------------------- src/couch_db_updater.erl | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/80503e25/src/couch_db_updater.erl ---------------------------------------------------------------------- diff --git a/src/couch_db_updater.erl b/src/couch_db_updater.erl index bd42b35..92139a9 100644 --- a/src/couch_db_updater.erl +++ b/src/couch_db_updater.erl @@ -276,7 +276,7 @@ handle_cast(Msg, #db{name = Name} = Db) -> handle_info({update_docs, Client, GroupedDocs, NonRepDocs, MergeConflicts, FullCommit}, Db) -> - GroupedDocs2 = maybe_tag_grouped_docs(Client, GroupedDocs), + GroupedDocs2 = sort_and_tag_grouped_docs(Client, GroupedDocs), if NonRepDocs == [] -> {GroupedDocs3, Clients, FullCommit2} = collect_updates(GroupedDocs2, [Client], MergeConflicts, FullCommit); @@ -338,10 +338,15 @@ handle_info({'DOWN', Ref, _, _, Reason}, #db{fd_monitor=Ref, name=Name} = Db) -> code_change(_OldVsn, State, _Extra) -> {ok, State}. -maybe_tag_grouped_docs(Client, GroupedDocs) -> +sort_and_tag_grouped_docs(Client, GroupedDocs) -> + % These groups should already be sorted but sometimes clients misbehave. + % The merge_updates function will fail and the database can end up with + % duplicate documents if the incoming groups are not sorted, so as a sanity + % check we sort them again here. See COUCHDB-2735. + Cmp = fun([#doc{id=A}|_], [#doc{id=B}|_]) -> A < B end, lists:map(fun(DocGroup) -> [{Client, maybe_tag_doc(D)} || D <- DocGroup] - end, GroupedDocs). + end, lists:sort(Cmp, GroupedDocs)). maybe_tag_doc(#doc{id=Id, revs={Pos,[_Rev|PrevRevs]}, meta=Meta0}=Doc) -> case lists:keymember(ref, 1, Meta0) of @@ -370,7 +375,7 @@ collect_updates(GroupedDocsAcc, ClientsAcc, MergeConflicts, FullCommit) -> % updaters than deal with their possible conflicts, and local docs % writes are relatively rare. Can be optmized later if really needed. {update_docs, Client, GroupedDocs, [], MergeConflicts, FullCommit2} -> - GroupedDocs2 = maybe_tag_grouped_docs(Client, GroupedDocs), + GroupedDocs2 = sort_and_tag_grouped_docs(Client, GroupedDocs), GroupedDocsAcc2 = merge_updates(GroupedDocsAcc, GroupedDocs2), collect_updates(GroupedDocsAcc2, [Client | ClientsAcc],