couchdb-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kocol...@apache.org
Subject svn commit: r1065471 - in /couchdb/trunk: src/couchdb/couch_key_tree.erl test/etap/060-kt-merging.t
Date Mon, 31 Jan 2011 02:46:44 GMT
Author: kocolosk
Date: Mon Jan 31 02:46:44 2011
New Revision: 1065471

URL: http://svn.apache.org/viewvc?rev=1065471&view=rev
Log:
Fix spurious declarations of new merge conflicts

This patch also adds extra tests of the key tree merging logic as well
as edoc-formatted documentation for the module and a few of the merge
functions.  Closes COUCHDB-902.

Thanks Paul Davis, Bob Dionne, Klaus Trainer.

Modified:
    couchdb/trunk/src/couchdb/couch_key_tree.erl
    couchdb/trunk/test/etap/060-kt-merging.t

Modified: couchdb/trunk/src/couchdb/couch_key_tree.erl
URL: http://svn.apache.org/viewvc/couchdb/trunk/src/couchdb/couch_key_tree.erl?rev=1065471&r1=1065470&r2=1065471&view=diff
==============================================================================
--- couchdb/trunk/src/couchdb/couch_key_tree.erl (original)
+++ couchdb/trunk/src/couchdb/couch_key_tree.erl Mon Jan 31 02:46:44 2011
@@ -10,6 +10,41 @@
 % License for the specific language governing permissions and limitations under
 % the License.
 
+%% @doc Data structure used to represent document edit histories.
+
+%% A key tree is used to represent the edit history of a document. Each node of
+%% the tree represents a particular version. Relations between nodes represent
+%% the order that these edits were applied. For instance, a set of three edits
+%% would produce a tree of versions A->B->C indicating that edit C was based on
+%% version B which was in turn based on A. In a world without replication (and
+%% no ability to disable MVCC checks), all histories would be forced to be
+%% linear lists of edits due to constraints imposed by MVCC (ie, new edits must
+%% be based on the current version). However, we have replication, so we must
+%% deal with not so easy cases, which lead to trees.
+%%
+%% Consider a document in state A. This doc is replicated to a second node. We
+%% then edit the document on each node leaving it in two different states, B
+%% and C. We now have two key trees, A->B and A->C. When we go to replicate a
+%% second time, the key tree must combine these two trees which gives us
+%% A->(B|C). This is how conflicts are introduced. In terms of the key tree, we
+%% say that we have two leaves (B and C) that are not deleted. The presense of
+%% the multiple leaves indicate conflict. To remove a conflict, one of the
+%% edits (B or C) can be deleted, which results in, A->(B|C->D) where D is an
+%% edit that is specially marked with the a deleted=true flag.
+%%
+%% What makes this a bit more complicated is that there is a limit to the
+%% number of revisions kept, specified in couch_db.hrl (default is 1000). When
+%% this limit is exceeded only the last 1000 are kept. This comes in to play
+%% when branches are merged. The comparison has to begin at the same place in
+%% the branches. A revision id is of the form N-XXXXXXX where N is the current
+%% revision. So each path will have a start number, calculated in
+%% couch_doc:to_path using the formula N - length(RevIds) + 1 So, .eg. if a doc
+%% was edit 1003 times this start number would be 4, indicating that 3
+%% revisions were truncated.
+%%
+%% This comes into play in @see merge_at/3 which recursively walks down one
+%% tree or the other until they begin at the same revision.
+
 -module(couch_key_tree).
 
 -export([merge/3, find_missing/2, get_key_leafs/2, get_full_key_paths/2, get/2]).
@@ -18,12 +53,16 @@
 
 -include("couch_db.hrl").
 
+%% @doc Merge a path with a list of paths and stem to the given length.
 -spec merge([path()], path(), pos_integer()) -> {[path()],
     conflicts | no_conflicts}.
 merge(Paths, Path, Depth) ->
     {Merged, Conflicts} = merge(Paths, Path),
     {stem(Merged, Depth), Conflicts}.
 
+%% @doc Merge a path with an existing list of paths, returning a new list of
+%% paths. A return of conflicts indicates a new conflict was discovered in this
+%% merge. Conflicts may already exist in the original list of paths.
 -spec merge([path()], path()) -> {[path()], conflicts | no_conflicts}.
 merge(Paths, Path) ->
     {ok, Merged, HasConflicts} = merge_one(Paths, Path, [], false),
@@ -62,6 +101,7 @@ merge_at([{Key, Value, SubTree}|Sibs], P
     {ok, Merged, Conflicts} ->
         {ok, [{Key, Value, Merged} | Sibs], Conflicts};
     no ->
+        % first branch didn't merge, move to next branch
         case merge_at(Sibs, Place, InsertTree) of
         {ok, Merged, Conflicts} ->
             {ok, [{Key, Value, SubTree} | Merged], Conflicts};
@@ -103,11 +143,12 @@ merge_simple([{Key, Value, SubA} | NextA
     {MergedNextTree, Conflict2} = merge_simple(NextA, NextB),
     {[{Key, Value, MergedSubTree} | MergedNextTree], Conflict1 or Conflict2};
 merge_simple([{A, _, _} = Tree | Next], [{B, _, _} | _] = Insert) when A < B ->
-    {Merged, _} = merge_simple(Next, Insert),
-    {[Tree | Merged], true};
+    {Merged, Conflict} = merge_simple(Next, Insert),
+    % if Merged has more branches than the input we added a new conflict
+    {[Tree | Merged], Conflict orelse (length(Merged) > length(Next))};
 merge_simple(Ours, [Tree | Next]) ->
-    {Merged, _} = merge_simple(Ours, Next),
-    {[Tree | Merged], true}.
+    {Merged, Conflict} = merge_simple(Ours, Next),
+    {[Tree | Merged], Conflict orelse (length(Merged) > length(Next))}.
 
 find_missing(_Tree, []) ->
     [];

Modified: couchdb/trunk/test/etap/060-kt-merging.t
URL: http://svn.apache.org/viewvc/couchdb/trunk/test/etap/060-kt-merging.t?rev=1065471&r1=1065470&r2=1065471&view=diff
==============================================================================
--- couchdb/trunk/test/etap/060-kt-merging.t (original)
+++ couchdb/trunk/test/etap/060-kt-merging.t Mon Jan 31 02:46:44 2011
@@ -15,7 +15,7 @@
 
 main(_) ->
     test_util:init_code_path(),
-    etap:plan(12),
+    etap:plan(16),
     case (catch test()) of
         ok ->
             etap:end_tests();
@@ -26,25 +26,21 @@ main(_) ->
     ok.
 
 test() ->
-    One = {0, {"1","foo",[]}},
-    TwoSibs = [{0, {"1","foo",[]}},
-               {0, {"2","foo",[]}}],
-    OneChild = {0, {"1","foo",[{"1a", "bar", []}]}},
-    TwoChild = {0, {"1","foo", [{"1a", "bar", [{"1aa", "bar", []}]}]}},
-    TwoChildSibs = {0, {"1","foo", [{"1a", "bar", []},
-                                     {"1b", "bar", []}]}},
-    TwoChildSibs2 = {0, {"1","foo", [{"1a", "bar", []},
-                                     {"1b", "bar", [{"1bb", "boo", []}]}]}},
-    Stemmed1b = {1, {"1a", "bar", []}},
-    Stemmed1a = {1, {"1a", "bar", [{"1aa", "bar", []}]}},
-    Stemmed1aa = {2, {"1aa", "bar", []}},
-    Stemmed1bb = {2, {"1bb", "boo", []}},
+    One = {1, {"1","foo",[]}},
 
     etap:is(
         {[One], no_conflicts},
         couch_key_tree:merge([], One, 10),
         "The empty tree is the identity for merge."
     ),
+    etap:is(
+        {[One], no_conflicts},
+        couch_key_tree:merge([One], One, 10),
+        "Merging is reflexive."
+    ),
+
+    TwoSibs = [{1, {"1","foo",[]}},
+               {1, {"2","foo",[]}}],
 
     etap:is(
         {TwoSibs, no_conflicts},
@@ -52,41 +48,75 @@ test() ->
         "Merging a prefix of a tree with the tree yields the tree."
     ),
 
+    Three = {1, {"3","foo",[]}},
+    ThreeSibs = [{1, {"1","foo",[]}},
+                 {1, {"2","foo",[]}},
+                 {1, {"3","foo",[]}}],
+
     etap:is(
-        {[One], no_conflicts},
-        couch_key_tree:merge([One], One, 10),
-        "Merging is reflexive."
+        {ThreeSibs, conflicts},
+        couch_key_tree:merge(TwoSibs, Three, 10),
+        "Merging a third unrelated branch leads to a conflict."
     ),
 
+
+    TwoChild = {1, {"1","foo", [{"1a", "bar", [{"1aa", "bar", []}]}]}},
+
     etap:is(
         {[TwoChild], no_conflicts},
         couch_key_tree:merge([TwoChild], TwoChild, 10),
         "Merging two children is still reflexive."
     ),
 
+    TwoChildSibs = {1, {"1","foo", [{"1a", "bar", []},
+                                     {"1b", "bar", []}]}},
     etap:is(
         {[TwoChildSibs], no_conflicts},
         couch_key_tree:merge([TwoChildSibs], TwoChildSibs, 10),
         "Merging a tree to itself is itself."),
 
+    TwoChildPlusSibs =
+        {1, {"1","foo", [{"1a", "bar", [{"1aa", "bar", []}]},
+                         {"1b", "bar", []}]}},
+
+    etap:is(
+        {[TwoChildPlusSibs], no_conflicts},
+        couch_key_tree:merge([TwoChild], TwoChildSibs, 10),
+        "Merging tree of uneven length at node 2."),
+
+    Stemmed1b = {2, {"1a", "bar", []}},
     etap:is(
         {[TwoChildSibs], no_conflicts},
         couch_key_tree:merge([TwoChildSibs], Stemmed1b, 10),
         "Merging a tree with a stem."
     ),
 
+    TwoChildSibs2 = {1, {"1","foo", [{"1a", "bar", []},
+                                     {"1b", "bar", [{"1bb", "boo", []}]}]}},
+    Stemmed1bb = {3, {"1bb", "boo", []}},
     etap:is(
         {[TwoChildSibs2], no_conflicts},
         couch_key_tree:merge([TwoChildSibs2], Stemmed1bb, 10),
         "Merging a stem at a deeper level."
     ),
 
+    StemmedTwoChildSibs2 = [{2,{"1a", "bar", []}},
+                            {2,{"1b", "bar", [{"1bb", "boo", []}]}}],
+
+    etap:is(
+        {StemmedTwoChildSibs2, no_conflicts},
+        couch_key_tree:merge(StemmedTwoChildSibs2, Stemmed1bb, 10),
+        "Merging a stem at a deeper level against paths at deeper levels."
+    ),
+
+    Stemmed1aa = {3, {"1aa", "bar", []}},
     etap:is(
         {[TwoChild], no_conflicts},
         couch_key_tree:merge([TwoChild], Stemmed1aa, 10),
         "Merging a single tree with a deeper stem."
     ),
 
+    Stemmed1a = {2, {"1a", "bar", [{"1aa", "bar", []}]}},
     etap:is(
         {[TwoChild], no_conflicts},
         couch_key_tree:merge([TwoChild], Stemmed1a, 10),
@@ -99,6 +129,7 @@ test() ->
         "More merging."
     ),
 
+    OneChild = {1, {"1","foo",[{"1a", "bar", []}]}},
     Expect1 = [OneChild, Stemmed1aa],
     etap:is(
         {Expect1, conflicts},
@@ -112,4 +143,34 @@ test() ->
         "Merge should have no conflicts."
     ),
 
+    %% this test is based on couch-902-test-case2.py
+    %% foo has conflicts from replication at depth two
+    %% foo3 is the current value
+    Foo = {1, {"foo",
+               "val1",
+               [{"foo2","val2",[]},
+                {"foo3", "val3", []}
+               ]}},
+    %% foo now has an attachment added, which leads to foo4 and val4
+    %% off foo3
+    Bar = {1, {"foo",
+               [],
+               [{"foo3",
+                 [],
+                 [{"foo4","val4",[]}
+                  ]}]}},
+    %% this is what the merge returns
+    %% note that it ignore the conflicting branch as there's no match
+    FooBar = {1, {"foo",
+               "val1",
+               [{"foo2","val2",[]},
+                {"foo3", "val3", [{"foo4","val4",[]}]}
+               ]}},
+
+    etap:is(
+      {[FooBar], no_conflicts},
+      couch_key_tree:merge([Foo],Bar,10),
+      "Merging trees with conflicts ought to behave."
+    ),
+
     ok.



Mime
View raw message