Return-Path: Delivered-To: apmail-couchdb-commits-archive@www.apache.org Received: (qmail 82776 invoked from network); 31 Jan 2011 02:47:08 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3) by minotaur.apache.org with SMTP; 31 Jan 2011 02:47:08 -0000 Received: (qmail 78262 invoked by uid 500); 31 Jan 2011 02:47:08 -0000 Delivered-To: apmail-couchdb-commits-archive@couchdb.apache.org Received: (qmail 78139 invoked by uid 500); 31 Jan 2011 02:47:06 -0000 Mailing-List: contact commits-help@couchdb.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@couchdb.apache.org Delivered-To: mailing list commits@couchdb.apache.org Received: (qmail 78132 invoked by uid 99); 31 Jan 2011 02:47:06 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 31 Jan 2011 02:47:06 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 31 Jan 2011 02:47:05 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id 0968423889EB; Mon, 31 Jan 2011 02:46:45 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1065471 - in /couchdb/trunk: src/couchdb/couch_key_tree.erl test/etap/060-kt-merging.t Date: Mon, 31 Jan 2011 02:46:44 -0000 To: commits@couchdb.apache.org From: kocolosk@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20110131024645.0968423889EB@eris.apache.org> Author: kocolosk Date: Mon Jan 31 02:46:44 2011 New Revision: 1065471 URL: http://svn.apache.org/viewvc?rev=1065471&view=rev Log: Fix spurious declarations of new merge conflicts This patch also adds extra tests of the key tree merging logic as well as edoc-formatted documentation for the module and a few of the merge functions. Closes COUCHDB-902. Thanks Paul Davis, Bob Dionne, Klaus Trainer. Modified: couchdb/trunk/src/couchdb/couch_key_tree.erl couchdb/trunk/test/etap/060-kt-merging.t Modified: couchdb/trunk/src/couchdb/couch_key_tree.erl URL: http://svn.apache.org/viewvc/couchdb/trunk/src/couchdb/couch_key_tree.erl?rev=1065471&r1=1065470&r2=1065471&view=diff ============================================================================== --- couchdb/trunk/src/couchdb/couch_key_tree.erl (original) +++ couchdb/trunk/src/couchdb/couch_key_tree.erl Mon Jan 31 02:46:44 2011 @@ -10,6 +10,41 @@ % License for the specific language governing permissions and limitations under % the License. +%% @doc Data structure used to represent document edit histories. + +%% A key tree is used to represent the edit history of a document. Each node of +%% the tree represents a particular version. Relations between nodes represent +%% the order that these edits were applied. For instance, a set of three edits +%% would produce a tree of versions A->B->C indicating that edit C was based on +%% version B which was in turn based on A. In a world without replication (and +%% no ability to disable MVCC checks), all histories would be forced to be +%% linear lists of edits due to constraints imposed by MVCC (ie, new edits must +%% be based on the current version). However, we have replication, so we must +%% deal with not so easy cases, which lead to trees. +%% +%% Consider a document in state A. This doc is replicated to a second node. We +%% then edit the document on each node leaving it in two different states, B +%% and C. We now have two key trees, A->B and A->C. When we go to replicate a +%% second time, the key tree must combine these two trees which gives us +%% A->(B|C). This is how conflicts are introduced. In terms of the key tree, we +%% say that we have two leaves (B and C) that are not deleted. The presense of +%% the multiple leaves indicate conflict. To remove a conflict, one of the +%% edits (B or C) can be deleted, which results in, A->(B|C->D) where D is an +%% edit that is specially marked with the a deleted=true flag. +%% +%% What makes this a bit more complicated is that there is a limit to the +%% number of revisions kept, specified in couch_db.hrl (default is 1000). When +%% this limit is exceeded only the last 1000 are kept. This comes in to play +%% when branches are merged. The comparison has to begin at the same place in +%% the branches. A revision id is of the form N-XXXXXXX where N is the current +%% revision. So each path will have a start number, calculated in +%% couch_doc:to_path using the formula N - length(RevIds) + 1 So, .eg. if a doc +%% was edit 1003 times this start number would be 4, indicating that 3 +%% revisions were truncated. +%% +%% This comes into play in @see merge_at/3 which recursively walks down one +%% tree or the other until they begin at the same revision. + -module(couch_key_tree). -export([merge/3, find_missing/2, get_key_leafs/2, get_full_key_paths/2, get/2]). @@ -18,12 +53,16 @@ -include("couch_db.hrl"). +%% @doc Merge a path with a list of paths and stem to the given length. -spec merge([path()], path(), pos_integer()) -> {[path()], conflicts | no_conflicts}. merge(Paths, Path, Depth) -> {Merged, Conflicts} = merge(Paths, Path), {stem(Merged, Depth), Conflicts}. +%% @doc Merge a path with an existing list of paths, returning a new list of +%% paths. A return of conflicts indicates a new conflict was discovered in this +%% merge. Conflicts may already exist in the original list of paths. -spec merge([path()], path()) -> {[path()], conflicts | no_conflicts}. merge(Paths, Path) -> {ok, Merged, HasConflicts} = merge_one(Paths, Path, [], false), @@ -62,6 +101,7 @@ merge_at([{Key, Value, SubTree}|Sibs], P {ok, Merged, Conflicts} -> {ok, [{Key, Value, Merged} | Sibs], Conflicts}; no -> + % first branch didn't merge, move to next branch case merge_at(Sibs, Place, InsertTree) of {ok, Merged, Conflicts} -> {ok, [{Key, Value, SubTree} | Merged], Conflicts}; @@ -103,11 +143,12 @@ merge_simple([{Key, Value, SubA} | NextA {MergedNextTree, Conflict2} = merge_simple(NextA, NextB), {[{Key, Value, MergedSubTree} | MergedNextTree], Conflict1 or Conflict2}; merge_simple([{A, _, _} = Tree | Next], [{B, _, _} | _] = Insert) when A < B -> - {Merged, _} = merge_simple(Next, Insert), - {[Tree | Merged], true}; + {Merged, Conflict} = merge_simple(Next, Insert), + % if Merged has more branches than the input we added a new conflict + {[Tree | Merged], Conflict orelse (length(Merged) > length(Next))}; merge_simple(Ours, [Tree | Next]) -> - {Merged, _} = merge_simple(Ours, Next), - {[Tree | Merged], true}. + {Merged, Conflict} = merge_simple(Ours, Next), + {[Tree | Merged], Conflict orelse (length(Merged) > length(Next))}. find_missing(_Tree, []) -> []; Modified: couchdb/trunk/test/etap/060-kt-merging.t URL: http://svn.apache.org/viewvc/couchdb/trunk/test/etap/060-kt-merging.t?rev=1065471&r1=1065470&r2=1065471&view=diff ============================================================================== --- couchdb/trunk/test/etap/060-kt-merging.t (original) +++ couchdb/trunk/test/etap/060-kt-merging.t Mon Jan 31 02:46:44 2011 @@ -15,7 +15,7 @@ main(_) -> test_util:init_code_path(), - etap:plan(12), + etap:plan(16), case (catch test()) of ok -> etap:end_tests(); @@ -26,25 +26,21 @@ main(_) -> ok. test() -> - One = {0, {"1","foo",[]}}, - TwoSibs = [{0, {"1","foo",[]}}, - {0, {"2","foo",[]}}], - OneChild = {0, {"1","foo",[{"1a", "bar", []}]}}, - TwoChild = {0, {"1","foo", [{"1a", "bar", [{"1aa", "bar", []}]}]}}, - TwoChildSibs = {0, {"1","foo", [{"1a", "bar", []}, - {"1b", "bar", []}]}}, - TwoChildSibs2 = {0, {"1","foo", [{"1a", "bar", []}, - {"1b", "bar", [{"1bb", "boo", []}]}]}}, - Stemmed1b = {1, {"1a", "bar", []}}, - Stemmed1a = {1, {"1a", "bar", [{"1aa", "bar", []}]}}, - Stemmed1aa = {2, {"1aa", "bar", []}}, - Stemmed1bb = {2, {"1bb", "boo", []}}, + One = {1, {"1","foo",[]}}, etap:is( {[One], no_conflicts}, couch_key_tree:merge([], One, 10), "The empty tree is the identity for merge." ), + etap:is( + {[One], no_conflicts}, + couch_key_tree:merge([One], One, 10), + "Merging is reflexive." + ), + + TwoSibs = [{1, {"1","foo",[]}}, + {1, {"2","foo",[]}}], etap:is( {TwoSibs, no_conflicts}, @@ -52,41 +48,75 @@ test() -> "Merging a prefix of a tree with the tree yields the tree." ), + Three = {1, {"3","foo",[]}}, + ThreeSibs = [{1, {"1","foo",[]}}, + {1, {"2","foo",[]}}, + {1, {"3","foo",[]}}], + etap:is( - {[One], no_conflicts}, - couch_key_tree:merge([One], One, 10), - "Merging is reflexive." + {ThreeSibs, conflicts}, + couch_key_tree:merge(TwoSibs, Three, 10), + "Merging a third unrelated branch leads to a conflict." ), + + TwoChild = {1, {"1","foo", [{"1a", "bar", [{"1aa", "bar", []}]}]}}, + etap:is( {[TwoChild], no_conflicts}, couch_key_tree:merge([TwoChild], TwoChild, 10), "Merging two children is still reflexive." ), + TwoChildSibs = {1, {"1","foo", [{"1a", "bar", []}, + {"1b", "bar", []}]}}, etap:is( {[TwoChildSibs], no_conflicts}, couch_key_tree:merge([TwoChildSibs], TwoChildSibs, 10), "Merging a tree to itself is itself."), + TwoChildPlusSibs = + {1, {"1","foo", [{"1a", "bar", [{"1aa", "bar", []}]}, + {"1b", "bar", []}]}}, + + etap:is( + {[TwoChildPlusSibs], no_conflicts}, + couch_key_tree:merge([TwoChild], TwoChildSibs, 10), + "Merging tree of uneven length at node 2."), + + Stemmed1b = {2, {"1a", "bar", []}}, etap:is( {[TwoChildSibs], no_conflicts}, couch_key_tree:merge([TwoChildSibs], Stemmed1b, 10), "Merging a tree with a stem." ), + TwoChildSibs2 = {1, {"1","foo", [{"1a", "bar", []}, + {"1b", "bar", [{"1bb", "boo", []}]}]}}, + Stemmed1bb = {3, {"1bb", "boo", []}}, etap:is( {[TwoChildSibs2], no_conflicts}, couch_key_tree:merge([TwoChildSibs2], Stemmed1bb, 10), "Merging a stem at a deeper level." ), + StemmedTwoChildSibs2 = [{2,{"1a", "bar", []}}, + {2,{"1b", "bar", [{"1bb", "boo", []}]}}], + + etap:is( + {StemmedTwoChildSibs2, no_conflicts}, + couch_key_tree:merge(StemmedTwoChildSibs2, Stemmed1bb, 10), + "Merging a stem at a deeper level against paths at deeper levels." + ), + + Stemmed1aa = {3, {"1aa", "bar", []}}, etap:is( {[TwoChild], no_conflicts}, couch_key_tree:merge([TwoChild], Stemmed1aa, 10), "Merging a single tree with a deeper stem." ), + Stemmed1a = {2, {"1a", "bar", [{"1aa", "bar", []}]}}, etap:is( {[TwoChild], no_conflicts}, couch_key_tree:merge([TwoChild], Stemmed1a, 10), @@ -99,6 +129,7 @@ test() -> "More merging." ), + OneChild = {1, {"1","foo",[{"1a", "bar", []}]}}, Expect1 = [OneChild, Stemmed1aa], etap:is( {Expect1, conflicts}, @@ -112,4 +143,34 @@ test() -> "Merge should have no conflicts." ), + %% this test is based on couch-902-test-case2.py + %% foo has conflicts from replication at depth two + %% foo3 is the current value + Foo = {1, {"foo", + "val1", + [{"foo2","val2",[]}, + {"foo3", "val3", []} + ]}}, + %% foo now has an attachment added, which leads to foo4 and val4 + %% off foo3 + Bar = {1, {"foo", + [], + [{"foo3", + [], + [{"foo4","val4",[]} + ]}]}}, + %% this is what the merge returns + %% note that it ignore the conflicting branch as there's no match + FooBar = {1, {"foo", + "val1", + [{"foo2","val2",[]}, + {"foo3", "val3", [{"foo4","val4",[]}]} + ]}}, + + etap:is( + {[FooBar], no_conflicts}, + couch_key_tree:merge([Foo],Bar,10), + "Merging trees with conflicts ought to behave." + ), + ok.