couchdb-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dav...@apache.org
Subject [25/50] git commit: Remove dups and fix holes in unpacked since sequences
Date Fri, 17 Jan 2014 21:50:25 GMT
Remove dups and fix holes in unpacked since sequences

It's important that sequences include all the shard ranges. This patch checks
that and also removes any duplicates, choosing the shard with the maximum
sequence number.

BugzID: 13533
BugzID: 17240
BugzID: 16415


Project: http://git-wip-us.apache.org/repos/asf/couchdb-fabric/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-fabric/commit/9828f8aa
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-fabric/tree/9828f8aa
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-fabric/diff/9828f8aa

Branch: refs/heads/import
Commit: 9828f8aa574224b4924a767b72dc6f0e74019a48
Parents: 8e72cf7
Author: Paul J. Davis <paul@cloudant.com>
Authored: Wed Feb 20 16:28:06 2013 -0500
Committer: Bob Dionne <bob@cloudant.com>
Committed: Thu Feb 21 06:13:58 2013 -0500

----------------------------------------------------------------------
 src/fabric_view_changes.erl | 43 +++++++++++++++++++++++++++++++++++-----
 1 file changed, 38 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-fabric/blob/9828f8aa/src/fabric_view_changes.erl
----------------------------------------------------------------------
diff --git a/src/fabric_view_changes.erl b/src/fabric_view_changes.erl
index 817bbf7..8a7f7c7 100644
--- a/src/fabric_view_changes.erl
+++ b/src/fabric_view_changes.erl
@@ -287,16 +287,49 @@ unpack_seqs(Packed, DbName) ->
     do_unpack_seqs(Opaque, DbName).
 
 do_unpack_seqs(Opaque, DbName) ->
+    % A preventative fix for FB 13533 to remove duplicate shards.
+    % This just picks each unique shard and keeps the largest seq
+    % value recorded.
+    Decoded = binary_to_term(couch_util:decodeBase64Url(Opaque)),
+    DedupDict = lists:foldl(fun({Node, [A, B], Seq}, Acc) ->
+        dict:append({Node, [A, B]}, Seq, Acc)
+    end, dict:new(), Decoded),
+    Deduped = lists:map(fun({{Node, [A, B]}, SeqList}) ->
+        {Node, [A, B], lists:max(SeqList)}
+    end, dict:to_list(DedupDict)),
+
+    % Create a fabric_dict of {Shard, Seq} entries
     % TODO relies on internal structure of fabric_dict as keylist
-    lists:map(fun({Node, [A,B], Seq}) ->
+    Unpacked = lists:flatmap(fun({Node, [A,B], Seq}) ->
         case mem3:get_shard(DbName, Node, [A,B]) of
         {ok, Shard} ->
-            {Shard, Seq};
+            [{Shard, Seq}];
         {error, not_found} ->
-            PlaceHolder = #shard{node=Node, range=[A,B], dbname=DbName, _='_'},
-            {PlaceHolder, Seq} % will be replaced in find_replacement_shards
+            []
         end
-    end, binary_to_term(couch_util:decodeBase64Url(Opaque))).
+    end, Deduped),
+
+    % Fill holes in the since sequence. If/when we ever start
+    % using overlapping shard ranges this will need to be updated
+    % to not include shard ranges that overlap entries in Upacked.
+    % A quick and dirty approach would be like such:
+    %
+    %   lists:foldl(fun(S, Acc) ->
+    %       fabric_view:remove_overlapping_shards(S, Acc)
+    %   end, mem3:shards(DbName), Unpacked)
+    %
+    % Unfortunately remove_overlapping_shards isn't reusable because
+    % of its calls to rexi:kill/2. When we get to overlapping
+    % shard ranges and have to rewrite shard range management
+    % we can revisit this simpler algorithm.
+    case fabric_view:is_progress_possible(Unpacked) of
+        true ->
+            Unpacked;
+        false ->
+            Ranges = lists:usort([R || #shard{range=R} <- Unpacked]),
+            Filter = fun(S) -> not lists:member(S#shard.range, Ranges) end,
+            Unpacked ++ lists:filter(Filter, mem3:shards(DbName))
+    end.
 
 changes_row(#change{key=Seq, id=Id, value=Value, deleted=true, doc=Doc}, true) ->
     {change, {[{seq,Seq}, {id,Id}, {changes,Value}, {deleted, true}, {doc, Doc}]}};


Mime
View raw message