couchdb-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dav...@apache.org
Subject [couchdb] 02/04: Optimize document updates
Date Thu, 02 Nov 2017 19:32:16 GMT
This is an automated email from the ASF dual-hosted git repository.

davisp pushed a commit to branch optimize-doc-updates
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 97d7122def7718a02377ac08db8bd186bd92fc43
Author: Paul J. Davis <paul.joseph.davis@gmail.com>
AuthorDate: Thu Nov 2 12:26:01 2017 -0500

    Optimize document updates
    
    This works by delaying the stemming step until all updates to a given
    document have been processed.
---
 src/couch/src/couch_db_updater.erl | 36 ++++++++++++++++++++----------------
 1 file changed, 20 insertions(+), 16 deletions(-)

diff --git a/src/couch/src/couch_db_updater.erl b/src/couch/src/couch_db_updater.erl
index ca61e04..0daed90 100644
--- a/src/couch/src/couch_db_updater.erl
+++ b/src/couch/src/couch_db_updater.erl
@@ -756,23 +756,24 @@ merge_rev_trees(Limit, MergeConflicts, [NewDocs|RestDocsList],
         [OldDocInfo|RestOldInfo], AccNewInfos, AccRemoveSeqs, AccSeq) ->
     erlang:put(last_id_merged, OldDocInfo#full_doc_info.id), % for debugging
     NewDocInfo0 = lists:foldl(fun({Client, NewDoc}, OldInfoAcc) ->
-        merge_rev_tree(OldInfoAcc, NewDoc, Client, Limit, MergeConflicts)
+        merge_rev_tree(OldInfoAcc, NewDoc, Client, MergeConflicts)
     end, OldDocInfo, NewDocs),
+    NewDocInfo1 = stem_full_doc_info(NewDocInfo0, Limit),
     % When MergeConflicts is false, we updated #full_doc_info.deleted on every
     % iteration of merge_rev_tree. However, merge_rev_tree does not update
     % #full_doc_info.deleted when MergeConflicts is true, since we don't need
     % to know whether the doc is deleted between iterations. Since we still
     % need to know if the doc is deleted after the merge happens, we have to
     % set it here.
-    NewDocInfo1 = case MergeConflicts of
+    NewDocInfo2 = case MergeConflicts of
         true ->
-            NewDocInfo0#full_doc_info{
-                deleted = couch_doc:is_deleted(NewDocInfo0)
+            NewDocInfo1#full_doc_info{
+                deleted = couch_doc:is_deleted(NewDocInfo1)
             };
         false ->
-            NewDocInfo0
+            NewDocInfo1
     end,
-    if NewDocInfo1 == OldDocInfo ->
+    if NewDocInfo2 == OldDocInfo ->
         % nothing changed
         merge_rev_trees(Limit, MergeConflicts, RestDocsList, RestOldInfo,
             AccNewInfos, AccRemoveSeqs, AccSeq);
@@ -781,7 +782,7 @@ merge_rev_trees(Limit, MergeConflicts, [NewDocs|RestDocsList],
         % important to note that the update_seq on OldDocInfo should
         % be identical to the value on NewDocInfo1.
         OldSeq = OldDocInfo#full_doc_info.update_seq,
-        NewDocInfo2 = NewDocInfo1#full_doc_info{
+        NewDocInfo3 = NewDocInfo2#full_doc_info{
             update_seq = AccSeq + 1
         },
         RemoveSeqs = case OldSeq of
@@ -789,10 +790,10 @@ merge_rev_trees(Limit, MergeConflicts, [NewDocs|RestDocsList],
             _ -> [OldSeq | AccRemoveSeqs]
         end,
         merge_rev_trees(Limit, MergeConflicts, RestDocsList, RestOldInfo,
-            [NewDocInfo2|AccNewInfos], RemoveSeqs, AccSeq+1)
+            [NewDocInfo3|AccNewInfos], RemoveSeqs, AccSeq+1)
     end.
 
-merge_rev_tree(OldInfo, NewDoc, Client, Limit, false)
+merge_rev_tree(OldInfo, NewDoc, Client, false)
         when OldInfo#full_doc_info.deleted ->
     % We're recreating a document that was previously
     % deleted. To check that this is a recreation from
@@ -816,7 +817,7 @@ merge_rev_tree(OldInfo, NewDoc, Client, Limit, false)
             % Merge our modified new doc into the tree
             #full_doc_info{rev_tree=OldTree} = OldInfo,
             NewTree0 = couch_doc:to_path(NewDoc2),
-            case couch_key_tree:merge(OldTree, NewTree0, Limit) of
+            case couch_key_tree:merge(OldTree, NewTree0) of
                 {NewTree1, new_leaf} ->
                     % We changed the revision id so inform the caller
                     send_result(Client, NewDoc, {ok, {OldPos+1, NewRevId}}),
@@ -831,7 +832,7 @@ merge_rev_tree(OldInfo, NewDoc, Client, Limit, false)
             send_result(Client, NewDoc, conflict),
             OldInfo
     end;
-merge_rev_tree(OldInfo, NewDoc, Client, Limit, false) ->
+merge_rev_tree(OldInfo, NewDoc, Client, false) ->
     % We're attempting to merge a new revision into an
     % undeleted document. To not be a conflict we require
     % that the merge results in extending a branch.
@@ -839,7 +840,7 @@ merge_rev_tree(OldInfo, NewDoc, Client, Limit, false) ->
     OldTree = OldInfo#full_doc_info.rev_tree,
     NewTree0 = couch_doc:to_path(NewDoc),
     NewDeleted = NewDoc#doc.deleted,
-    case couch_key_tree:merge(OldTree, NewTree0, Limit) of
+    case couch_key_tree:merge(OldTree, NewTree0) of
         {NewTree, new_leaf} when not NewDeleted ->
             OldInfo#full_doc_info{
                 rev_tree = NewTree,
@@ -857,17 +858,20 @@ merge_rev_tree(OldInfo, NewDoc, Client, Limit, false) ->
             send_result(Client, NewDoc, conflict),
             OldInfo
     end;
-merge_rev_tree(OldInfo, NewDoc, _Client, Limit, true) ->
+merge_rev_tree(OldInfo, NewDoc, _Client, true) ->
     % We're merging in revisions without caring about
     % conflicts. Most likely this is a replication update.
     OldTree = OldInfo#full_doc_info.rev_tree,
     NewTree0 = couch_doc:to_path(NewDoc),
-    {NewTree, _} = couch_key_tree:merge(OldTree, NewTree0, Limit),
+    {NewTree, _} = couch_key_tree:merge(OldTree, NewTree0),
     OldInfo#full_doc_info{rev_tree = NewTree}.
 
+stem_full_doc_info(#full_doc_info{rev_tree = Tree} = Info, Limit) ->
+    Stemmed = couch_key_tree:stem(Tree, Limit),
+    Info#full_doc_info{rev_tree = Stemmed}.
+
 stem_full_doc_infos(#db{revs_limit=Limit}, DocInfos) ->
-    [Info#full_doc_info{rev_tree=couch_key_tree:stem(Tree, Limit)} ||
-            #full_doc_info{rev_tree=Tree}=Info <- DocInfos].
+    lists:map(fun(FDI) -> stem_full_doc_info(FDI, Limit) end, DocInfos).
 
 update_docs_int(Db, DocsList, NonRepDocs, MergeConflicts, FullCommit) ->
     #db{

-- 
To stop receiving notification emails like this one, please contact
"commits@couchdb.apache.org" <commits@couchdb.apache.org>.

Mime
View raw message