Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 16D73200D2B for ; Thu, 2 Nov 2017 20:32:20 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id 155931609EB; Thu, 2 Nov 2017 19:32:20 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 43293160BE6 for ; Thu, 2 Nov 2017 20:32:19 +0100 (CET) Received: (qmail 88152 invoked by uid 500); 2 Nov 2017 19:32:18 -0000 Mailing-List: contact commits-help@couchdb.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@couchdb.apache.org Delivered-To: mailing list commits@couchdb.apache.org Received: (qmail 87979 invoked by uid 99); 2 Nov 2017 19:32:18 -0000 Received: from ec2-52-202-80-70.compute-1.amazonaws.com (HELO gitbox.apache.org) (52.202.80.70) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 02 Nov 2017 19:32:18 +0000 Received: by gitbox.apache.org (ASF Mail Server at gitbox.apache.org, from userid 33) id 9E1B783A17; Thu, 2 Nov 2017 19:32:14 +0000 (UTC) Date: Thu, 02 Nov 2017 19:32:16 +0000 To: "commits@couchdb.apache.org" Subject: [couchdb] 02/04: Optimize document updates MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit From: davisp@apache.org In-Reply-To: <150965113467.13658.1019656453679493846@gitbox.apache.org> References: <150965113467.13658.1019656453679493846@gitbox.apache.org> X-Git-Host: gitbox.apache.org X-Git-Repo: couchdb X-Git-Refname: refs/heads/optimize-doc-updates X-Git-Reftype: branch X-Git-Rev: 97d7122def7718a02377ac08db8bd186bd92fc43 X-Git-NotificationType: diff X-Git-Multimail-Version: 1.5.dev Auto-Submitted: auto-generated Message-Id: <20171102193215.9E1B783A17@gitbox.apache.org> archived-at: Thu, 02 Nov 2017 19:32:20 -0000 This is an automated email from the ASF dual-hosted git repository. davisp pushed a commit to branch optimize-doc-updates in repository https://gitbox.apache.org/repos/asf/couchdb.git commit 97d7122def7718a02377ac08db8bd186bd92fc43 Author: Paul J. Davis AuthorDate: Thu Nov 2 12:26:01 2017 -0500 Optimize document updates This works by delaying the stemming step until all updates to a given document have been processed. --- src/couch/src/couch_db_updater.erl | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/src/couch/src/couch_db_updater.erl b/src/couch/src/couch_db_updater.erl index ca61e04..0daed90 100644 --- a/src/couch/src/couch_db_updater.erl +++ b/src/couch/src/couch_db_updater.erl @@ -756,23 +756,24 @@ merge_rev_trees(Limit, MergeConflicts, [NewDocs|RestDocsList], [OldDocInfo|RestOldInfo], AccNewInfos, AccRemoveSeqs, AccSeq) -> erlang:put(last_id_merged, OldDocInfo#full_doc_info.id), % for debugging NewDocInfo0 = lists:foldl(fun({Client, NewDoc}, OldInfoAcc) -> - merge_rev_tree(OldInfoAcc, NewDoc, Client, Limit, MergeConflicts) + merge_rev_tree(OldInfoAcc, NewDoc, Client, MergeConflicts) end, OldDocInfo, NewDocs), + NewDocInfo1 = stem_full_doc_info(NewDocInfo0, Limit), % When MergeConflicts is false, we updated #full_doc_info.deleted on every % iteration of merge_rev_tree. However, merge_rev_tree does not update % #full_doc_info.deleted when MergeConflicts is true, since we don't need % to know whether the doc is deleted between iterations. Since we still % need to know if the doc is deleted after the merge happens, we have to % set it here. - NewDocInfo1 = case MergeConflicts of + NewDocInfo2 = case MergeConflicts of true -> - NewDocInfo0#full_doc_info{ - deleted = couch_doc:is_deleted(NewDocInfo0) + NewDocInfo1#full_doc_info{ + deleted = couch_doc:is_deleted(NewDocInfo1) }; false -> - NewDocInfo0 + NewDocInfo1 end, - if NewDocInfo1 == OldDocInfo -> + if NewDocInfo2 == OldDocInfo -> % nothing changed merge_rev_trees(Limit, MergeConflicts, RestDocsList, RestOldInfo, AccNewInfos, AccRemoveSeqs, AccSeq); @@ -781,7 +782,7 @@ merge_rev_trees(Limit, MergeConflicts, [NewDocs|RestDocsList], % important to note that the update_seq on OldDocInfo should % be identical to the value on NewDocInfo1. OldSeq = OldDocInfo#full_doc_info.update_seq, - NewDocInfo2 = NewDocInfo1#full_doc_info{ + NewDocInfo3 = NewDocInfo2#full_doc_info{ update_seq = AccSeq + 1 }, RemoveSeqs = case OldSeq of @@ -789,10 +790,10 @@ merge_rev_trees(Limit, MergeConflicts, [NewDocs|RestDocsList], _ -> [OldSeq | AccRemoveSeqs] end, merge_rev_trees(Limit, MergeConflicts, RestDocsList, RestOldInfo, - [NewDocInfo2|AccNewInfos], RemoveSeqs, AccSeq+1) + [NewDocInfo3|AccNewInfos], RemoveSeqs, AccSeq+1) end. -merge_rev_tree(OldInfo, NewDoc, Client, Limit, false) +merge_rev_tree(OldInfo, NewDoc, Client, false) when OldInfo#full_doc_info.deleted -> % We're recreating a document that was previously % deleted. To check that this is a recreation from @@ -816,7 +817,7 @@ merge_rev_tree(OldInfo, NewDoc, Client, Limit, false) % Merge our modified new doc into the tree #full_doc_info{rev_tree=OldTree} = OldInfo, NewTree0 = couch_doc:to_path(NewDoc2), - case couch_key_tree:merge(OldTree, NewTree0, Limit) of + case couch_key_tree:merge(OldTree, NewTree0) of {NewTree1, new_leaf} -> % We changed the revision id so inform the caller send_result(Client, NewDoc, {ok, {OldPos+1, NewRevId}}), @@ -831,7 +832,7 @@ merge_rev_tree(OldInfo, NewDoc, Client, Limit, false) send_result(Client, NewDoc, conflict), OldInfo end; -merge_rev_tree(OldInfo, NewDoc, Client, Limit, false) -> +merge_rev_tree(OldInfo, NewDoc, Client, false) -> % We're attempting to merge a new revision into an % undeleted document. To not be a conflict we require % that the merge results in extending a branch. @@ -839,7 +840,7 @@ merge_rev_tree(OldInfo, NewDoc, Client, Limit, false) -> OldTree = OldInfo#full_doc_info.rev_tree, NewTree0 = couch_doc:to_path(NewDoc), NewDeleted = NewDoc#doc.deleted, - case couch_key_tree:merge(OldTree, NewTree0, Limit) of + case couch_key_tree:merge(OldTree, NewTree0) of {NewTree, new_leaf} when not NewDeleted -> OldInfo#full_doc_info{ rev_tree = NewTree, @@ -857,17 +858,20 @@ merge_rev_tree(OldInfo, NewDoc, Client, Limit, false) -> send_result(Client, NewDoc, conflict), OldInfo end; -merge_rev_tree(OldInfo, NewDoc, _Client, Limit, true) -> +merge_rev_tree(OldInfo, NewDoc, _Client, true) -> % We're merging in revisions without caring about % conflicts. Most likely this is a replication update. OldTree = OldInfo#full_doc_info.rev_tree, NewTree0 = couch_doc:to_path(NewDoc), - {NewTree, _} = couch_key_tree:merge(OldTree, NewTree0, Limit), + {NewTree, _} = couch_key_tree:merge(OldTree, NewTree0), OldInfo#full_doc_info{rev_tree = NewTree}. +stem_full_doc_info(#full_doc_info{rev_tree = Tree} = Info, Limit) -> + Stemmed = couch_key_tree:stem(Tree, Limit), + Info#full_doc_info{rev_tree = Stemmed}. + stem_full_doc_infos(#db{revs_limit=Limit}, DocInfos) -> - [Info#full_doc_info{rev_tree=couch_key_tree:stem(Tree, Limit)} || - #full_doc_info{rev_tree=Tree}=Info <- DocInfos]. + lists:map(fun(FDI) -> stem_full_doc_info(FDI, Limit) end, DocInfos). update_docs_int(Db, DocsList, NonRepDocs, MergeConflicts, FullCommit) -> #db{ -- To stop receiving notification emails like this one, please contact "commits@couchdb.apache.org" .