couchdb-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dav...@apache.org
Subject [couchdb] 02/02: Automatically repair revision trees
Date Wed, 15 Nov 2017 18:04:01 GMT
This is an automated email from the ASF dual-hosted git repository.

davisp pushed a commit to branch optimize-doc-updates
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 047ebe2a1a5b82d32667741db3865ef3c8052e3d
Author: Paul J. Davis <paul.joseph.davis@gmail.com>
AuthorDate: Wed Nov 15 12:03:28 2017 -0600

    Automatically repair revision trees
---
 src/couch/src/couch_db_updater.erl |  9 +++------
 src/couch/src/couch_key_tree.erl   | 30 ++++++++++++++++++++----------
 2 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/src/couch/src/couch_db_updater.erl b/src/couch/src/couch_db_updater.erl
index f437426..bcddbe0 100644
--- a/src/couch/src/couch_db_updater.erl
+++ b/src/couch/src/couch_db_updater.erl
@@ -876,11 +876,8 @@ stem_full_doc_info(#full_doc_info{rev_tree = Tree} = Info, Limit) ->
     Stemmed = couch_key_tree:stem(Tree, Limit),
     Info#full_doc_info{rev_tree = Stemmed}.
 
-full_stem_full_doc_infos(#db{revs_limit=Limit}, DocInfos) ->
-    lists:map(fun(#full_doc_info{rev_tree=Tree}=FDI) ->
-        Stemmed = couch_key_tree:full_stem(Tree, Limit),
-        FDI#full_doc_info{rev_tree=Stemmed}
-    end, DocInfos).
+stem_full_doc_infos(#db{revs_limit=Limit}, DocInfos) ->
+    lists:map(fun(FDI) -> stem_full_doc_info(FDI, Limit) end, DocInfos).
 
 update_docs_int(Db, DocsList, NonRepDocs, MergeConflicts, FullCommit) ->
     #db{
@@ -1128,7 +1125,7 @@ copy_docs(Db, #db{fd = DestFd} = NewDb, MixedInfos, Retry) ->
         }
     end, NewInfos0),
 
-    NewInfos = full_stem_full_doc_infos(Db, NewInfos1),
+    NewInfos = stem_full_doc_infos(Db, NewInfos1),
     RemoveSeqs =
     case Retry of
     nil ->
diff --git a/src/couch/src/couch_key_tree.erl b/src/couch/src/couch_key_tree.erl
index d18ecbc..6208cd0 100644
--- a/src/couch/src/couch_key_tree.erl
+++ b/src/couch/src/couch_key_tree.erl
@@ -63,8 +63,7 @@ multi_merge/2,
 merge/3,
 merge/2,
 remove_leafs/2,
-stem/2,
-full_stem/2
+stem/2
 ]).
 
 -include_lib("couch/include/couch_db.hrl").
@@ -480,13 +479,18 @@ map_leafs_simple(Fun, Pos, [{Key, Value, SubTree} | RestTree]) ->
 
 
 stem(Trees, Limit) ->
-    lists:sort(lists:flatmap(fun(Tree) ->
-        stem_tree(Tree, Limit)
-    end, Trees)).
+    Seen = khash:new(),
+    try
+        lists:sort(lists:flatmap(fun(Tree) ->
+            stem_tree(Tree, Limit, Seen)
+        end, Trees))
+    catch throw:dupe_keys ->
+        repair_tree(Trees, Limit)
+    end.
 
 
-stem_tree({Depth, Child}, Limit) ->
-    case stem_tree(Depth, Child, Limit) of
+stem_tree({Depth, Child}, Limit, Seen) ->
+    case stem_tree(Depth, Child, Limit, Seen) of
         {_, NewChild, NewBranches} ->
             [{Depth, NewChild} | NewBranches];
         {_, NewBranches} ->
@@ -494,10 +498,16 @@ stem_tree({Depth, Child}, Limit) ->
     end.
 
 
-stem_tree(_Depth, {_Key, _Val, []} = Leaf, Limit) ->
+stem_tree(_Depth, {_Key, _Val, []} = Leaf, Limit, _Seen) ->
     {Limit - 1, Leaf, []};
 
-stem_tree(Depth, {Key, Val, Children}, Limit) ->
+stem_tree(Depth, {Key, Val, Children}, Limit, Seen) ->
+    case khash:lookup(Seen, Key) of
+        not_found ->
+            khash:put(Key, seen);
+        _ ->
+            throw(dupe_keys)
+    end,
     FinalAcc = lists:foldl(fun(Child, {LimitPosAcc, ChildAcc, BranchAcc}) ->
         case stem_tree(Depth + 1, Child, Limit) of
             {LimitPos, NewChild, NewBranches} ->
@@ -526,7 +536,7 @@ stem_tree(Depth, {Key, Val, Children}, Limit) ->
     end.
 
 
-full_stem(Trees, Limit) ->
+repair_tree(Trees, Limit) ->
     % flatten each branch in a tree into a tree path, sort by starting rev #
     Paths = lists:sort(lists:map(fun({Pos, Path}) ->
         StemmedPath = lists:sublist(Path, Limit),

-- 
To stop receiving notification emails like this one, please contact
"commits@couchdb.apache.org" <commits@couchdb.apache.org>.

Mime
View raw message