Return-Path: X-Original-To: apmail-couchdb-commits-archive@www.apache.org Delivered-To: apmail-couchdb-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id E109E11B44 for ; Thu, 28 Aug 2014 12:11:38 +0000 (UTC) Received: (qmail 4048 invoked by uid 500); 28 Aug 2014 12:11:38 -0000 Delivered-To: apmail-couchdb-commits-archive@couchdb.apache.org Received: (qmail 3768 invoked by uid 500); 28 Aug 2014 12:11:38 -0000 Mailing-List: contact commits-help@couchdb.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@couchdb.apache.org Delivered-To: mailing list commits@couchdb.apache.org Received: (qmail 3455 invoked by uid 99); 28 Aug 2014 12:11:38 -0000 Received: from tyr.zones.apache.org (HELO tyr.zones.apache.org) (140.211.11.114) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 28 Aug 2014 12:11:38 +0000 Received: by tyr.zones.apache.org (Postfix, from userid 65534) id EB1BFA02BAC; Thu, 28 Aug 2014 12:11:37 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: rnewson@apache.org To: commits@couchdb.apache.org Date: Thu, 28 Aug 2014 12:11:43 -0000 Message-Id: In-Reply-To: <149709301f134b60b82ddbd5d3fa6544@git.apache.org> References: <149709301f134b60b82ddbd5d3fa6544@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [07/50] couch commit: updated refs/heads/master to 9d0ac7d Be more specific on the merge result We have three results that can happen when merging a path into a revision tree: * We can extend a branch which replaces a leaf * We can create a new branch which results in a new leaf * We can land on an existing internal node The first result (new_leaf) means that there is no conflict in the update operation. The second (new_branch) means we branched a revision path which means the operation introduces a conflict. The third (internal_node) means that the merge was the result of an edit that has already been applied to this document. For the third case we have a subtle special case in that if we have deleted the document and want to recreate it into the same initial state we need to give the new state a different revision. The current code follows the edit path and extends the first deleted leaf it finds. BugzID: 25150 Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch/repo Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch/commit/72318e57 Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch/tree/72318e57 Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch/diff/72318e57 Branch: refs/heads/master Commit: 72318e57db587ca315aa49cb917581432252d04e Parents: e9dc0fb Author: Paul J. Davis Authored: Fri Nov 15 12:13:17 2013 -0600 Committer: Robert Newson Committed: Thu Aug 28 13:00:00 2014 +0100 ---------------------------------------------------------------------- include/couch_db.hrl | 2 - src/couch_key_tree.erl | 218 +++++++++++++++++++++++++------------------- 2 files changed, 126 insertions(+), 94 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/72318e57/include/couch_db.hrl ---------------------------------------------------------------------- diff --git a/include/couch_db.hrl b/include/couch_db.hrl index c57a583..570d3db 100644 --- a/include/couch_db.hrl +++ b/include/couch_db.hrl @@ -42,10 +42,8 @@ -define(LOG_WARN(Format, Args), couch_log:warning(Format, Args)). -define(LOG_ERROR(Format, Args), couch_log:error(Format, Args)). -% Tree::term() is really a tree(), but we don't want to require R13B04 yet -type branch() :: {Key::term(), Value::term(), Tree::term()}. -type path() :: {Start::pos_integer(), branch()}. --type tree() :: [branch()]. % sorted by key -record(rev_info, { rev, http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/72318e57/src/couch_key_tree.erl ---------------------------------------------------------------------- diff --git a/src/couch_key_tree.erl b/src/couch_key_tree.erl index bf7e61c..1659547 100644 --- a/src/couch_key_tree.erl +++ b/src/couch_key_tree.erl @@ -65,104 +65,138 @@ stem/2 ]). -include_lib("couch/include/couch_db.hrl"). - -%% @doc Merge a path with a list of paths and stem to the given length. --spec merge([path()], path(), pos_integer()) -> {[path()], - conflicts | no_conflicts}. -merge(Paths, Path, Depth) -> - {Merged, Conflicts} = merge(Paths, Path), - {stem(Merged, Depth), Conflicts}. - -%% @doc Merge a path with an existing list of paths, returning a new list of -%% paths. A return of conflicts indicates a new conflict was discovered in this -%% merge. Conflicts may already exist in the original list of paths. --spec merge([path()], path()) -> {[path()], conflicts | no_conflicts}. -merge(Paths, Path) -> - {ok, Merged, HasConflicts} = merge_one(Paths, Path, [], false), - if HasConflicts -> - Conflicts = conflicts; - (length(Merged) =/= length(Paths)) and (length(Merged) =/= 1) -> - Conflicts = conflicts; - true -> - Conflicts = no_conflicts - end, - {lists:sort(Merged), Conflicts}. - --spec merge_one(Original::[path()], Inserted::path(), [path()], boolean()) -> - {ok, Merged::[path()], NewConflicts::boolean()}. -merge_one([], Insert, OutAcc, ConflictsAcc) -> - {ok, [Insert | OutAcc], ConflictsAcc}; -merge_one([{Start, Tree}|Rest], {StartInsert, TreeInsert}, Acc, HasConflicts) -> - case merge_at([Tree], StartInsert - Start, [TreeInsert]) of - {ok, [Merged], Conflicts} -> - MergedStart = lists:min([Start, StartInsert]), - {ok, Rest ++ [{MergedStart, Merged} | Acc], Conflicts or HasConflicts}; - no -> - AccOut = [{Start, Tree} | Acc], - merge_one(Rest, {StartInsert, TreeInsert}, AccOut, HasConflicts) +-type treenode() :: {Key::term(), Value::term(), [Node::treenode()]}. +-type tree() :: {Depth::pos_integer(), [treenode()]}. +-type revtree() :: [tree()]. + + +%% @doc Merge a path into the given tree and then stem the result. +%% Although Tree is of type tree(), it must not contain any branches. +-spec merge(revtree(), tree(), pos_integer()) -> + {revtree(), new_leaf | new_branch | internal_node}. +merge(RevTree, Tree, StemDepth) -> + {Merged, Result} = merge(RevTree, Tree), + {stem(Merged, StemDepth), Result}. + + +%% @doc Merge a path into a tree. +-spec merge(revtree(), tree()) -> + {revtree(), new_leaf | new_branch | internal_node}. +merge(RevTree, Tree) -> + {Merged, Result} = merge_tree(RevTree, Tree, []), + {lists:sort(Merged), Result}. + +%% @private +%% @doc Attempt to merge Tree into each branch of the RevTree. +%% If it can't find a branch that the new tree merges into, add it as a +%% new branch in the RevTree. +-spec merge_tree(revtree(), tree(), revtree()) -> {revtree(), boolean()}. +merge_tree([], Tree, []) -> + {[Tree], new_leaf}; +merge_tree([], Tree, MergeAcc) -> + {[Tree|MergeAcc], new_branch}; +merge_tree([{Depth, Nodes} | Rest], {IDepth, INodes}=Tree, MergeAcc) -> + % For the intrepid observer following along at home, notice what we're + % doing here with (Depth - IDepth). This tells us which of the two + % branches (Nodes or INodes) we need to seek into. If Depth > IDepth + % that means we need go into INodes to find where we line up with + % Nodes. If Depth < IDepth, its obviously the other way. If it turns + % out that (Depth - IDepth) == 0, then we know that this is where + % we begin our actual merge operation (ie, looking for key matches). + % Its helpful to note that this whole moving into sub-branches is due + % to how we store trees that have been stemmed. When a path is + % stemmed so that the root node is lost, we wrap it in a tuple with + % the number keys that have been droped. This number is the depth + % value that's used throughout this module. + case merge_at([Nodes], Depth - IDepth, [INodes]) of + {[Merged], Result} -> + NewDepth = erlang:min(Depth, IDepth), + {Rest ++ [{NewDepth, Merged} | MergeAcc], Result}; + fail -> + merge_tree(Rest, Tree, [{Depth, Nodes} | MergeAcc]) end. --spec merge_at(tree(), Place::integer(), tree()) -> - {ok, Merged::tree(), HasConflicts::boolean()} | no. -merge_at(_Ours, _Place, []) -> - no; -merge_at([], _Place, _Insert) -> - no; -merge_at([{Key, Value, SubTree}|Sibs], Place, InsertTree) when Place > 0 -> - % inserted starts later than committed, need to drill into committed subtree - case merge_at(SubTree, Place - 1, InsertTree) of - {ok, Merged, Conflicts} -> - {ok, [{Key, Value, Merged} | Sibs], Conflicts}; - no -> - % first branch didn't merge, move to next branch - case merge_at(Sibs, Place, InsertTree) of - {ok, Merged, Conflicts} -> - {ok, [{Key, Value, SubTree} | Merged], Conflicts}; - no -> - no - end +%% @private +%% @doc Locate the point at which merging can start. +%% Because of stemming we may need to seek into one of the branches +%% before we can start comparing node keys. If one of the branches +%% ends up running out of nodes we know that these two branches can +%% not be merged. +-spec merge_at([node()], integer(), [node()]) -> {revtree(), boolean()} | fail. +merge_at(_Nodes, _Pos, []) -> + fail; +merge_at([], _Pos, _INodes) -> + fail; +merge_at(Nodes, Pos, [{IK, IV, [NextINode]}]) when Pos > 0 -> + % Depth was bigger than IDepth, so we need to discard from the + % insert path to find where it might start matching. + case merge_at(Nodes, Pos - 1, [NextINode]) of + {Merged, Result} -> {[{IK, IV, Merged}], Result}; + fail -> fail end; -merge_at(OurTree, Place, [{Key, Value, SubTree}]) when Place < 0 -> - % inserted starts earlier than committed, need to drill into insert subtree - case merge_at(OurTree, Place + 1, SubTree) of - {ok, Merged, Conflicts} -> - {ok, [{Key, Value, Merged}], Conflicts}; - no -> - no +merge_at([{K, V, SubTree} | Sibs], Pos, INodes) when Pos < 0 -> + % When Pos is negative, Depth was less than IDepth, so we + % need to discard from the revision tree path + case merge_at(SubTree, Pos + 1, INodes) of + {Merged, Result} -> + {[{K, V, Merged} | Sibs], Result}; + fail -> + % Merging along the subtree failed. We need to also try + % merging the insert branch against the siblings of this + % node. + case merge_at(Sibs, Pos, INodes) of + {Merged, Result} -> {[{K, V, SubTree} | Merged], Result}; + fail -> fail + end end; -merge_at([{Key, V1, SubTree}|Sibs], 0, [{Key, V2, InsertSubTree}]) -> - {Merged, Conflicts} = merge_simple(SubTree, InsertSubTree), - {ok, [{Key, value_pref(V1, V2), Merged} | Sibs], Conflicts}; -merge_at([{OurKey, _, _} | _], 0, [{Key, _, _}]) when OurKey > Key -> - % siblings keys are ordered, no point in continuing - no; -merge_at([Tree | Sibs], 0, InsertTree) -> - case merge_at(Sibs, 0, InsertTree) of - {ok, Merged, Conflicts} -> - {ok, [Tree | Merged], Conflicts}; - no -> - no +merge_at([{K, V1, Nodes} | Sibs], 0, [{K, V2, INodes}]) -> + % Keys are equal. At this point we have found a possible starting + % position for our merge to take place. + {Merged, Result} = merge_extend(Nodes, INodes), + {[{K, value_pref(V1, V2), Merged} | Sibs], Result}; +merge_at([{K1, _, _} | _], 0, [{K2, _, _}]) when K1 > K2 -> + % Siblings keys are ordered, no point in continuing + fail; +merge_at([Tree | Sibs], 0, INodes) -> + % INodes key comes after this key, so move on to the next sibling. + case merge_at(Sibs, 0, INodes) of + {Merged, Result} -> {[Tree | Merged], Result}; + fail -> fail end. -% key tree functions - --spec merge_simple(tree(), tree()) -> {Merged::tree(), NewConflicts::boolean()}. -merge_simple([], B) -> - {B, false}; -merge_simple(A, []) -> - {A, false}; -merge_simple([{Key, V1, SubA} | NextA], [{Key, V2, SubB} | NextB]) -> - {MergedSubTree, Conflict1} = merge_simple(SubA, SubB), - {MergedNextTree, Conflict2} = merge_simple(NextA, NextB), - Value = value_pref(V1, V2), - {[{Key, Value, MergedSubTree} | MergedNextTree], Conflict1 or Conflict2}; -merge_simple([{A, _, _} = Tree | Next], [{B, _, _} | _] = Insert) when A < B -> - {Merged, Conflict} = merge_simple(Next, Insert), - % if Merged has more branches than the input we added a new conflict - {[Tree | Merged], Conflict orelse (length(Merged) > length(Next))}; -merge_simple(Ours, [Tree | Next]) -> - {Merged, Conflict} = merge_simple(Ours, Next), - {[Tree | Merged], Conflict orelse (length(Merged) > length(Next))}. +-spec merge_extend(tree(), tree()) -> {tree(), atom()}. +merge_extend([], B) when B =/= [] -> + % Most likely the insert branch simply extends this one, so the new + % branch is exactly B. Its also possible that B is a branch because + % its key sorts greater than all siblings of an internal node. This + % condition is checked in the last clause of this function and the + % new_leaf result is fixed to be new_branch. + {B, new_leaf}; +merge_extend(A, []) -> + % Insert branch ends an internal node in our original revtree() + % so the end result is exactly our original revtree. + {A, internal_node}; +merge_extend([{K, V1, SubA} | NextA], [{K, V2, SubB}]) -> + % Here we're simply extending the path to the next deeper + % level in the two branches. + {Merged, Result} = merge_extend(SubA, SubB), + {[{K, value_pref(V1, V2), Merged} | NextA], Result}; +merge_extend([{K1, _, _}=NodeA | Rest], [{K2, _, _}=NodeB]) when K1 > K2 -> + % Keys are ordered so we know this is where the insert branch needs + % to be inserted into the tree. We also know that this creates a new + % branch so we have a new leaf to report. + {[NodeB, NodeA | Rest], new_branch}; +merge_extend([Tree | RestA], NextB) -> + % Here we're moving on to the next sibling to try and extend our + % merge even deeper. The length check is due to the fact that the + % key in NextB might be larger than the largest key in RestA which + % means we've created a new branch. + {Merged, Result0} = merge_extend(RestA, NextB), + Result = case length(Merged) == length(RestA) of + true -> Result0; + false -> new_branch + end, + {[Tree | Merged], Result}. find_missing(_Tree, []) -> [];