couchdb-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dav...@apache.org
Subject [27/50] git commit: Introduce a #leaf{} record in revision trees
Date Sat, 18 Jan 2014 00:47:52 GMT
Introduce a #leaf{} record in revision trees

This is substantially based on work by Bob Dionne (a452a4a) to introduce
the data size calculations at Cloudant. There's quite a bit of conflict
in code and actual behavior between this work and what Filipe wrote for
CouchDB. This new record should ease the transition of merging both
behaviors.

An important thing to note is that this record is only ever in RAM and
not written to disk so we don't have to worry about record upgrades
though we will have to maintain upgrade info that Filipe and Bob both
introduced (which is fairly straightforward).


Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch/commit/85cf2b26
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch/tree/85cf2b26
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch/diff/85cf2b26

Branch: refs/heads/import
Commit: 85cf2b26e52135b75135587cc553abb635c2a2c2
Parents: dc5a6de
Author: Robert Newson <rnewson@apache.org>
Authored: Sun Mar 10 16:12:28 2013 -0500
Committer: Paul J. Davis <paul.joseph.davis@gmail.com>
Committed: Fri Jan 17 16:44:31 2014 -0800

----------------------------------------------------------------------
 include/couch_db.hrl     |  9 +++++++++
 src/couch_db.erl         | 22 +++++++---------------
 src/couch_db_updater.erl | 40 +++++++++++++++++-----------------------
 src/couch_doc.erl        | 13 ++++++++-----
 src/couch_util.erl       | 11 +++++++++++
 5 files changed, 52 insertions(+), 43 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/85cf2b26/include/couch_db.hrl
----------------------------------------------------------------------
diff --git a/include/couch_db.hrl b/include/couch_db.hrl
index 77006e4..61a59f7 100644
--- a/include/couch_db.hrl
+++ b/include/couch_db.hrl
@@ -23,6 +23,8 @@
 
 -define(b2l(V), binary_to_list(V)).
 -define(l2b(V), list_to_binary(V)).
+-define(i2b(V), couch_util:integer_to_boolean(V)).
+-define(b2i(V), couch_util:boolean_to_integer(V)).
 -define(term_to_bin(T), term_to_binary(T, [{minor_version, 1}])).
 -define(term_size(T),
     try
@@ -271,3 +273,10 @@
     stop_fun
 }).
 
+-record(leaf,  {
+    deleted,
+    ptr,
+    seq,
+    size = nil
+}).
+

http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/85cf2b26/src/couch_db.erl
----------------------------------------------------------------------
diff --git a/src/couch_db.erl b/src/couch_db.erl
index 7734c7c..e4e8cca 100644
--- a/src/couch_db.erl
+++ b/src/couch_db.erl
@@ -580,7 +580,7 @@ prep_and_validate_update(Db, #doc{id=Id,revs={RevStart, Revs}}=Doc,
     case Revs of
     [PrevRev|_] ->
         case dict:find({RevStart, PrevRev}, LeafRevsDict) of
-        {ok, {Deleted, DiskSp, DiskRevs}} ->
+        {ok, {#leaf{deleted=Deleted, ptr=DiskSp}, DiskRevs}} ->
             case couch_doc:has_stubs(Doc) of
             true ->
                 DiskDoc = make_doc(Db, Id, Deleted, DiskSp, DiskRevs),
@@ -643,12 +643,8 @@ prep_and_validate_updates(Db, [DocBucket|RestBuckets],
         AllowConflict, AccPrepped, AccErrors) ->
     Leafs = couch_key_tree:get_all_leafs(OldRevTree),
     LeafRevsDict = dict:from_list([
-        begin
-            Deleted = element(1, LeafVal),
-            Sp = element(2, LeafVal),
-            {{Start, RevId}, {Deleted, Sp, Revs}}
-        end ||
-        {LeafVal, {Start, [RevId | _]} = Revs} <- Leafs
+        {{Start, RevId}, {Leaf, Revs}} ||
+        {Leaf, {Start, [RevId | _]} = Revs} <- Leafs
     ]),
     {PreppedBucket, AccErrors3} = lists:foldl(
         fun({Doc, Ref}, {Docs2Acc, AccErrors2}) ->
@@ -895,9 +891,7 @@ make_first_doc_on_disk(Db, Id, Pos, [{_Rev, #doc{}} | RestPath]) ->
     make_first_doc_on_disk(Db, Id, Pos-1, RestPath);
 make_first_doc_on_disk(Db, Id, Pos, [{_Rev, ?REV_MISSING}|RestPath]) ->
     make_first_doc_on_disk(Db, Id, Pos - 1, RestPath);
-make_first_doc_on_disk(Db, Id, Pos, [{_Rev, RevValue} |_]=DocPath) ->
-    IsDel = element(1, RevValue),
-    Sp = element(2, RevValue),
+make_first_doc_on_disk(Db, Id, Pos, [{_Rev, #leaf{deleted=IsDel, ptr=Sp}} |_]=DocPath) ->
     Revs = [Rev || {Rev, _} <- DocPath],
     make_doc(Db, Id, IsDel, Sp, {Pos, Revs}).
 
@@ -1243,9 +1237,7 @@ open_doc_revs_int(Db, IdRevs, Options) ->
                     ?REV_MISSING ->
                         % we have the rev in our list but know nothing about it
                         {{not_found, missing}, {Pos, Rev}};
-                    RevValue ->
-                        IsDeleted = element(1, RevValue),
-                        SummaryPtr = element(2, RevValue),
+                    #leaf{deleted=IsDeleted, ptr=SummaryPtr} ->
                         {ok, make_doc(Db, Id, IsDeleted, SummaryPtr, FoundRevPath)}
                     end
                 end, FoundRevs),
@@ -1297,8 +1289,8 @@ doc_meta_info(#doc_info{high_seq=Seq,revs=[#rev_info{rev=Rev}|RestInfo]},
RevTre
         [{revs_info, Pos, lists:map(
             fun({Rev1, ?REV_MISSING}) ->
                 {Rev1, missing};
-            ({Rev1, RevValue}) ->
-                case element(1, RevValue) of
+            ({Rev1, Leaf}) ->
+                case Leaf#leaf.deleted of
                 true ->
                     {Rev1, deleted};
                 false ->

http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/85cf2b26/src/couch_db_updater.erl
----------------------------------------------------------------------
diff --git a/src/couch_db_updater.erl b/src/couch_db_updater.erl
index c64911e..cc48ef8 100644
--- a/src/couch_db_updater.erl
+++ b/src/couch_db_updater.erl
@@ -158,10 +158,8 @@ handle_call({purge_docs, IdRevs}, _From, Db) ->
     {DocInfoToUpdate, NewSeq} = lists:mapfoldl(
         fun(#full_doc_info{rev_tree=Tree}=FullInfo, SeqAcc) ->
             Tree2 = couch_key_tree:map_leafs(
-                fun(_RevId, LeafVal) ->
-                    IsDeleted = element(1, LeafVal),
-                    BodyPointer = element(2, LeafVal),
-                    {IsDeleted, BodyPointer, SeqAcc + 1}
+                fun(_RevId, Leaf) ->
+                    Leaf#leaf{seq=SeqAcc+1}
                 end, Tree),
             {FullInfo#full_doc_info{rev_tree=Tree2}, SeqAcc + 1}
         end, LastSeq, FullDocInfoToUpdate),
@@ -340,37 +338,35 @@ rev_tree(DiskTree) ->
     couch_key_tree:mapfold(fun
         (_RevId, {IsDeleted, BodyPointer, UpdateSeq}, leaf, _Acc) ->
             % pre 1.2 format, will be upgraded on compaction
-            {{IsDeleted == 1, BodyPointer, UpdateSeq, nil}, nil};
+            {#leaf{deleted=?i2b(IsDeleted), ptr=BodyPointer, seq=UpdateSeq}, nil};
         (_RevId, {IsDeleted, BodyPointer, UpdateSeq}, branch, Acc) ->
-            {{IsDeleted == 1, BodyPointer, UpdateSeq, nil}, Acc};
+            {#leaf{deleted=?i2b(IsDeleted), ptr=BodyPointer, seq=UpdateSeq}, Acc};
         (_RevId, {IsDeleted, BodyPointer, UpdateSeq, Size}, leaf, Acc) ->
             Acc2 = sum_leaf_sizes(Acc, Size),
-            {{IsDeleted == 1, BodyPointer, UpdateSeq, Size}, Acc2};
+            {#leaf{deleted=?i2b(IsDeleted), ptr=BodyPointer, seq=UpdateSeq, size=Size}, Acc2};
         (_RevId, {IsDeleted, BodyPointer, UpdateSeq, Size}, branch, Acc) ->
-            {{IsDeleted == 1, BodyPointer, UpdateSeq, Size}, Acc};
+            {#leaf{deleted=?i2b(IsDeleted), ptr=BodyPointer, seq=UpdateSeq, size=Size}, Acc};
         (_RevId, ?REV_MISSING, _Type, Acc) ->
             {?REV_MISSING, Acc}
-    end, DiskTree).
+    end, 0, DiskTree).
 
 disk_tree(RevTree) ->
     couch_key_tree:map(fun
         (_RevId, ?REV_MISSING) ->
             ?REV_MISSING;
-        (_RevId, {IsDeleted, BodyPointer, UpdateSeq}) ->
-            {if IsDeleted -> 1; true -> 0 end, BodyPointer, UpdateSeq, nil};
-        (_RevId, {IsDeleted, BodyPointer, UpdateSeq, Size}) ->
-            {if IsDeleted -> 1; true -> 0 end, BodyPointer, UpdateSeq, Size}
+        (_RevId, #leaf{deleted=IsDeleted, ptr=BodyPointer, seq=UpdateSeq, size=Size}) ->
+            {?b2i(IsDeleted), BodyPointer, UpdateSeq, Size}
     end, RevTree).
 
 btree_by_seq_split(#full_doc_info{id=Id, update_seq=Seq, deleted=Del, rev_tree=T}) ->
-    {Seq, {Id, if Del -> 1; true -> 0 end, disk_tree(T)}}.
+    {Seq, {Id, ?b2i(Del), disk_tree(T)}}.
 
 btree_by_seq_join(Seq, {Id, Del, DiskTree}) when is_integer(Del) ->
     {RevTree, LeafsSize} = rev_tree(DiskTree),
     #full_doc_info{
         id = Id,
         update_seq = Seq,
-        deleted = (Del == 1),
+        deleted = ?i2b(Del),
         rev_tree = RevTree,
         leafs_size = LeafsSize
     };
@@ -388,14 +384,14 @@ btree_by_seq_join(KeySeq, {Id, RevInfos, DeletedRevInfos}) ->
 
 btree_by_id_split(#full_doc_info{id=Id, update_seq=Seq,
         deleted=Deleted, rev_tree=Tree}) ->
-    {Id, {Seq, if Deleted -> 1; true -> 0 end, disk_tree(Tree)}}.
+    {Id, {Seq, ?b2i(Deleted), disk_tree(Tree)}}.
 
 btree_by_id_join(Id, {HighSeq, Deleted, DiskTree}) ->
     {Tree, LeafsSize} = rev_tree(DiskTree),
     #full_doc_info{
         id = Id,
         update_seq = HighSeq,
-        deleted = (Deleted == 1),
+        deleted = ?i2b(Deleted),
         rev_tree = Tree,
         leafs_size = LeafsSize
     }.
@@ -573,7 +569,8 @@ flush_trees(#db{fd = Fd} = Db,
                 TotalSize = lists:foldl(
                     fun(#att{att_len = L}, A) -> A + L end,
                     SummarySize, Value#doc.atts),
-                NewValue = {IsDeleted, NewSummaryPointer, UpdateSeq, TotalSize},
+                NewValue = #leaf{deleted=IsDeleted, ptr=NewSummaryPointer,
+                                 seq=UpdateSeq, size=TotalSize},
                 case Type of
                 leaf ->
                     {NewValue, Acc + TotalSize};
@@ -899,10 +896,7 @@ copy_docs(Db, #db{fd = DestFd} = NewDb, MixedInfos, Retry) ->
             Info#full_doc_info{rev_tree=couch_key_tree:map(
                 fun(_, _, branch) ->
                     ?REV_MISSING;
-                (_Rev, LeafVal, leaf) ->
-                    IsDel = element(1, LeafVal),
-                    Sp = element(2, LeafVal),
-                    Seq = element(3, LeafVal),
+                (_Rev, #leaf{ptr=Sp}=Leaf, leaf) ->
                     {_Body, AttsInfo} = Summary = copy_doc_attachments(
                         Db, Sp, DestFd),
                     SummaryChunk = make_doc_summary(NewDb, Summary),
@@ -911,7 +905,7 @@ copy_docs(Db, #db{fd = DestFd} = NewDb, MixedInfos, Retry) ->
                     TotalLeafSize = lists:foldl(
                         fun({_, _, _, AttLen, _, _, _, _}, S) -> S + AttLen end,
                         SummarySize, AttsInfo),
-                    {IsDel, Pos, Seq, TotalLeafSize}
+                    Leaf#leaf{ptr=Pos, size=TotalLeafSize}
                 end, RevTree)}
         end, NewInfos0),
 

http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/85cf2b26/src/couch_doc.erl
----------------------------------------------------------------------
diff --git a/src/couch_doc.erl b/src/couch_doc.erl
index bcf79d2..6f2ca9b 100644
--- a/src/couch_doc.erl
+++ b/src/couch_doc.erl
@@ -330,7 +330,10 @@ max_seq(Tree, UpdateSeq) ->
             {_Deleted, _DiskPos, OldTreeSeq} ->
                 % Older versions didn't track data sizes.
                 erlang:max(MaxOldSeq, OldTreeSeq);
-            {_Deleted, _DiskPos, OldTreeSeq, _Size} ->
+            {_Deleted, _DiskPos, OldTreeSeq, _Size} -> % necessary clause?
+                % Older versions didn't store #leaf records.
+                erlang:max(MaxOldSeq, OldTreeSeq);
+            #leaf{seq=OldTreeSeq} ->
                 erlang:max(MaxOldSeq, OldTreeSeq);
             _ ->
                 MaxOldSeq
@@ -341,11 +344,11 @@ max_seq(Tree, UpdateSeq) ->
 to_doc_info_path(#full_doc_info{id=Id,rev_tree=Tree,update_seq=FDISeq}) ->
     RevInfosAndPath = [
         {#rev_info{
-            deleted = element(1, LeafVal),
-            body_sp = element(2, LeafVal),
-            seq = element(3, LeafVal),
+            deleted = Leaf#leaf.deleted,
+            body_sp = Leaf#leaf.ptr,
+            seq = Leaf#leaf.seq,
             rev = {Pos, RevId}
-        }, Path} || {LeafVal, {Pos, [RevId | _]} = Path} <-
+        }, Path} || {Leaf, {Pos, [RevId | _]} = Path} <-
             couch_key_tree:get_all_leafs(Tree)
     ],
     SortedRevInfosAndPath = lists:sort(

http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/85cf2b26/src/couch_util.erl
----------------------------------------------------------------------
diff --git a/src/couch_util.erl b/src/couch_util.erl
index 3556d36..d09211a 100644
--- a/src/couch_util.erl
+++ b/src/couch_util.erl
@@ -29,6 +29,7 @@
 -export([encode_doc_id/1]).
 -export([with_db/2]).
 -export([rfc1123_date/0, rfc1123_date/1]).
+-export([integer_to_boolean/1, boolean_to_integer/1]).
 
 -include_lib("couch/include/couch_db.hrl").
 
@@ -487,3 +488,13 @@ month(9) -> "Sep";
 month(10) -> "Oct";
 month(11) -> "Nov";
 month(12) -> "Dec".
+
+integer_to_boolean(1) ->
+    true;
+integer_to_boolean(0) ->
+    false.
+
+boolean_to_integer(true) ->
+    1;
+boolean_to_integer(false) ->
+    0.


Mime
View raw message