From commits-return-33041-archive-asf-public=cust-asf.ponee.io@couchdb.apache.org Fri Apr 27 21:13:37 2018 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx-eu-01.ponee.io (Postfix) with SMTP id 22BEF180679 for ; Fri, 27 Apr 2018 21:13:36 +0200 (CEST) Received: (qmail 5970 invoked by uid 500); 27 Apr 2018 19:13:36 -0000 Mailing-List: contact commits-help@couchdb.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@couchdb.apache.org Delivered-To: mailing list commits@couchdb.apache.org Received: (qmail 5910 invoked by uid 99); 27 Apr 2018 19:13:36 -0000 Received: from ec2-52-202-80-70.compute-1.amazonaws.com (HELO gitbox.apache.org) (52.202.80.70) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 27 Apr 2018 19:13:36 +0000 Received: by gitbox.apache.org (ASF Mail Server at gitbox.apache.org, from userid 33) id 6D82D85314; Fri, 27 Apr 2018 19:13:35 +0000 (UTC) Date: Fri, 27 Apr 2018 19:13:36 +0000 To: "commits@couchdb.apache.org" Subject: [couchdb] 01/02: [SQUERGE] Fix handling of duplicated doc ids MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit From: davisp@apache.org In-Reply-To: <152485641527.3589.6319397055220738547@gitbox.apache.org> References: <152485641527.3589.6319397055220738547@gitbox.apache.org> X-Git-Host: gitbox.apache.org X-Git-Repo: couchdb X-Git-Refname: refs/heads/COUCHDB-3326-clustered-purge-davisp-refactor-2 X-Git-Reftype: branch X-Git-Rev: deda03e91600d956a7f97c7017aa79e0ba8e0ff3 X-Git-NotificationType: diff X-Git-Multimail-Version: 1.5.dev Auto-Submitted: auto-generated Message-Id: <20180427191335.6D82D85314@gitbox.apache.org> This is an automated email from the ASF dual-hosted git repository. davisp pushed a commit to branch COUCHDB-3326-clustered-purge-davisp-refactor-2 in repository https://gitbox.apache.org/repos/asf/couchdb.git commit deda03e91600d956a7f97c7017aa79e0ba8e0ff3 Author: Paul J. Davis AuthorDate: Fri Apr 27 14:12:06 2018 -0500 [SQUERGE] Fix handling of duplicated doc ids Turns out we weren't properly handling when a document id was being repeated in a single purge batch. This fixes that. Squerge to implementing the APIs bit --- src/couch/src/couch_db_updater.erl | 86 ++++++++++++++++++++++---------------- 1 file changed, 49 insertions(+), 37 deletions(-) diff --git a/src/couch/src/couch_db_updater.erl b/src/couch/src/couch_db_updater.erl index 368228a..31a36ad 100644 --- a/src/couch/src/couch_db_updater.erl +++ b/src/couch/src/couch_db_updater.erl @@ -97,7 +97,10 @@ handle_call({set_revs_limit, Limit}, _From, Db) -> handle_call({set_purge_infos_limit, Limit}, _From, Db) -> {ok, Db2} = couch_db_engine:set_purge_infos_limit(Db, Limit), ok = gen_server:call(couch_server, {db_updated, Db2}, infinity), - {reply, ok, Db2}; + {reply, ok, Db2, idle_limit()}; + +handle_call({purge_docs, [], _}, _From, Db) -> + {reply, {ok, []}, Db, idle_limit()}; handle_call({purge_docs, PurgeReqs0, Options}, _From, Db) -> % Filter out any previously applied updates during @@ -112,23 +115,40 @@ handle_call({purge_docs, PurgeReqs0, Options}, _From, Db) -> end, lists:zip(PurgeInfos, PurgeReqs0)) end, - Ids = lists:map(fun({_UUID, Id, _Revs}) -> Id end, PurgeReqs), - DocInfos = couch_db_engine:open_docs(Db, Ids), - UpdateSeq = couch_db_engine:get_update_seq(Db), - PurgeSeq = couch_db_engine:get_purge_seq(Db), + Ids = lists:usort(lists:map(fun({_UUID, Id, _Revs}) -> Id end, PurgeReqs)), + FDIs = couch_db_engine:open_docs(Db, Ids), + USeq = couch_db_engine:get_update_seq(Db), + + IdFDIs = lists:zip(Ids, FDIs), + {NewIdFDIs, Replies} = purge_docs(PurgeReqs, IdFDIs, USeq, []), + + Pairs = lists:flatmap(fun({DocId, OldFDI}) -> + {DocId, NewFDI} = lists:keyfind(DocId, 1, NewIdFDIs), + %io:format(standard_error, "~nPAIR: ~p~n", [{OldFDI, NewFDI}]), + case {OldFDI, NewFDI} of + {not_found, not_found} -> + []; + {#full_doc_info{} = A, #full_doc_info{} = A} -> + []; + {#full_doc_info{}, _} -> + [{OldFDI, NewFDI}] + end + end, IdFDIs), - InitAcc = {[], [], []}, - {Pairs, PInfos, Replies} = purge_docs( - PurgeReqs, DocInfos, UpdateSeq, PurgeSeq, InitAcc), + PSeq = couch_db_engine:get_purge_seq(Db), + {RevPInfos, _} = lists:foldl(fun({UUID, DocId, Revs}, {PIAcc, PSeqAcc}) -> + Info = {PSeqAcc + 1, UUID, DocId, Revs}, + {[Info | PIAcc], PSeqAcc + 1} + end, {[], PSeq}, PurgeReqs), + PInfos = lists:reverse(RevPInfos), - Db3 = if Pairs == [] andalso PInfos == [] -> Db; true -> - {ok, Db1} = couch_db_engine:purge_docs(Db, Pairs, PInfos), - Db2 = commit_data(Db1), - ok = gen_server:call(couch_server, {db_updated, Db2}, infinity), - couch_event:notify(Db2#db.name, updated), - Db2 - end, - {reply, {ok, Replies}, Db3, idle_limit()}; + %io:format(standard_error, "~n~nPAIRS: ~p~n~n", [Pairs]), + + {ok, Db1} = couch_db_engine:purge_docs(Db, Pairs, PInfos), + Db2 = commit_data(Db1), + ok = gen_server:call(couch_server, {db_updated, Db2}, infinity), + couch_event:notify(Db2#db.name, updated), + {reply, {ok, Replies}, Db2, idle_limit()}; handle_call(Msg, From, Db) -> case couch_db_engine:handle_db_updater_call(Msg, From, Db) of @@ -654,20 +674,21 @@ update_local_doc_revs(Docs) -> end, Docs). -purge_docs([], [], _USeq, _PSeq, {Pairs, PInfos, Replies}) -> - {lists:reverse(Pairs), lists:reverse(PInfos), lists:reverse(Replies)}; +purge_docs([], IdFDIs, _USeq, Replies) -> + {IdFDIs, lists:reverse(Replies)}; -purge_docs([Req | RestReqs], [FDI | RestInfos], USeq, PSeq, Acc) -> - {UUID, DocId, Revs} = Req, - {Pair, RemovedRevs, NewUSeq} = case FDI of +purge_docs([Req | RestReqs], IdFDIs, USeq, Replies) -> + {_UUID, DocId, Revs} = Req, + {value, {_, FDI0}, RestIdFDIs} = lists:keytake(DocId, 1, IdFDIs), + {NewFDI, RemovedRevs, NewUSeq} = case FDI0 of #full_doc_info{rev_tree = Tree} -> case couch_key_tree:remove_leafs(Tree, Revs) of {_, []} -> % No change - {no_change, [], USeq}; + {FDI0, [], USeq}; {[], Removed} -> % Completely purged - {{FDI, not_found}, Removed, USeq}; + {not_found, Removed, USeq}; {NewTree, Removed} -> % Its possible to purge the #leaf{} that contains % the update_seq where this doc sits in the @@ -682,27 +703,18 @@ purge_docs([Req | RestReqs], [FDI | RestInfos], USeq, PSeq, Acc) -> {Value, SeqAcc} end, USeq, NewTree), - NewFDI = FDI#full_doc_info{ + FDI1 = FDI0#full_doc_info{ update_seq = NewUpdateSeq, rev_tree = NewTree2 }, - {{FDI, NewFDI}, Removed, NewUpdateSeq} + {FDI1, Removed, NewUpdateSeq} end; not_found -> % Not found means nothing to change - {no_change, [], USeq} + {not_found, [], USeq} end, - {Pairs, PInfos, Replies} = Acc, - NewPairs = case Pair of - no_change -> Pairs; - _ -> [Pair | Pairs] - end, - NewAcc = { - NewPairs, - [{PSeq + 1, UUID, DocId, Revs} | PInfos], - [{ok, RemovedRevs} | Replies] - }, - purge_docs(RestReqs, RestInfos, NewUSeq, PSeq + 1, NewAcc). + NewReplies = [{ok, RemovedRevs} | Replies], + purge_docs(RestReqs, [{DocId, NewFDI} | RestIdFDIs], NewUSeq, NewReplies). commit_data(Db) -> -- To stop receiving notification emails like this one, please contact davisp@apache.org.