couchdb-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dav...@apache.org
Subject couch commit: updated refs/heads/windsor-merge to e1422e2
Date Sat, 23 Aug 2014 15:59:18 GMT
Repository: couchdb-couch
Updated Branches:
  refs/heads/windsor-merge 37f2218b8 -> e1422e236


Squash to data size merge


Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch/commit/e1422e23
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch/tree/e1422e23
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch/diff/e1422e23

Branch: refs/heads/windsor-merge
Commit: e1422e23605d071243bc9e116c6afa3244a74696
Parents: 37f2218
Author: Paul J. Davis <paul.joseph.davis@gmail.com>
Authored: Sat Aug 23 10:58:43 2014 -0500
Committer: Paul J. Davis <paul.joseph.davis@gmail.com>
Committed: Sat Aug 23 10:58:43 2014 -0500

----------------------------------------------------------------------
 include/couch_db.hrl     |  10 +-
 src/couch_att.erl        |  26 ++---
 src/couch_db.erl         |  15 +--
 src/couch_db_updater.erl | 241 +++++++++++++++++++++++++++---------------
 4 files changed, 180 insertions(+), 112 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/e1422e23/include/couch_db.hrl
----------------------------------------------------------------------
diff --git a/include/couch_db.hrl b/include/couch_db.hrl
index 698c96e..bbdfda9 100644
--- a/include/couch_db.hrl
+++ b/include/couch_db.hrl
@@ -58,12 +58,17 @@
     revs = [] % rev_info
 }).
 
+-record(size_info, {
+    active = 0,
+    external = 0
+}).
+
 -record(full_doc_info, {
     id = <<"">>,
     update_seq = 0,
     deleted = false,
     rev_tree = [],
-    sizes = {0, 0}
+    sizes = #size_info{}
 }).
 
 -record(httpd, {
@@ -204,6 +209,7 @@
     deleted,
     ptr,
     seq,
-    sizes = nil
+    sizes = #size_info{},
+    atts = []
 }).
 

http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/e1422e23/src/couch_att.erl
----------------------------------------------------------------------
diff --git a/src/couch_att.erl b/src/couch_att.erl
index 128b9d2..07ee4da 100644
--- a/src/couch_att.erl
+++ b/src/couch_att.erl
@@ -27,7 +27,7 @@
 ]).
 
 -export([
-    disk_info/2,
+    size_info/1,
     to_disk_term/1,
     from_disk_term/2
 ]).
@@ -276,22 +276,14 @@ merge_stubs([], _, Merged) ->
     {ok, lists:reverse(Merged)}.
 
 
-disk_info(_, []) ->
-    {ok, [], []};
-disk_info(ActiveFd, Atts) ->
-    {AttFd, _} = fetch(data, hd(Atts)),
-    if
-        AttFd == ActiveFd ->
-            Tuples = [to_disk_term(Att) || Att <- Atts],
-            Info = lists:map(fun(Att) ->
-                [{_, Pos}, AttLen] = fetch([data, att_len], Att),
-                {Pos, AttLen}
-            end, Atts),
-            {ok, Tuples, lists:usort(Info)};
-        true ->
-            ?LOG_ERROR("MISMATCH: ~p ; ~p~n", [ActiveFd, Atts]),
-            file_mismatch
-    end.
+size_info([]) ->
+    {ok, []};
+size_info(Atts) ->
+    Info = lists:map(fun(Att) ->
+        [{_, Pos}, AttLen] = fetch([data, att_len], Att),
+        {Pos, AttLen}
+    end, Atts),
+    {ok, lists:usort(Info)}.
 
 
 %% When converting an attachment to disk term format, attempt to stay with the

http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/e1422e23/src/couch_db.erl
----------------------------------------------------------------------
diff --git a/src/couch_db.erl b/src/couch_db.erl
index 4ed85ac..63387d4 100644
--- a/src/couch_db.erl
+++ b/src/couch_db.erl
@@ -325,8 +325,8 @@ get_db_info(Db) ->
     } = Db,
     {ok, FileSize} = couch_file:bytes(Fd),
     {ok, DbReduction} = couch_btree:full_reduce(IdBtree),
-    {ActiveSize0, ExternalSize} = element(3, DbReduction),
-    ActiveSize = active_size(Db, ActiveSize0),
+    SizeInfo = element(3, DbReduction),
+    ActiveSize = active_size(Db, SizeInfo),
     DiskVersion = couch_db_header:disk_version(Header),
     Uuid = case get_uuid(Db) of
         undefined -> null;
@@ -344,12 +344,12 @@ get_db_info(Db) ->
         {purge_seq, couch_db:get_purge_seq(Db)},
         {compact_running, Compactor/=nil},
         {disk_size, FileSize}, % legacy
-        {other, {[{data_size, ActiveSize}]}}, % legacy
+        {other, {[{data_size, SizeInfo#size_info.external}]}}, % legacy
         {data_size, ActiveSize}, % legacy
         {sizes, {[
             {file, FileSize},
             {active, ActiveSize},
-            {external, ExternalSize}
+            {external, SizeInfo#size_info.external}
         ]}},
         {instance_start_time, StartTime},
         {disk_format_version, DiskVersion},
@@ -359,7 +359,7 @@ get_db_info(Db) ->
         ],
     {ok, InfoList}.
 
-active_size(#db{}=Db, DocActiveSize) ->
+active_size(#db{}=Db, #size_info{}=SI) ->
     Trees = [
         Db#db.id_tree,
         Db#db.seq_tree,
@@ -374,7 +374,7 @@ active_size(#db{}=Db, DocActiveSize) ->
             Size ->
                 Acc + Size
         end
-    end, DocActiveSize, Trees).
+    end, SI#size_info.active, Trees).
 
 get_design_docs(#db{name = <<"shards/", _:18/binary, DbName/binary>>}) ->
     {_, Ref} = spawn_monitor(fun() -> exit(fabric:design_docs(DbName)) end),
@@ -1027,6 +1027,7 @@ prepare_doc_summaries(Db, BucketList) ->
     [lists:map(
         fun(#doc{body = Body, atts = Atts} = Doc) ->
             DiskAtts = [couch_att:to_disk_term(Att) || Att <- Atts],
+            SizeInfo = couch_att:size_info(Atts),
             AttsFd = case Atts of
             [Att | _] ->
                 {Fd, _} = couch_att:fetch(data, Att),
@@ -1035,7 +1036,7 @@ prepare_doc_summaries(Db, BucketList) ->
                 nil
             end,
             SummaryChunk = couch_db_updater:make_doc_summary(Db, {Body, DiskAtts}),
-            Doc#doc{body = {summary, SummaryChunk, AttsFd}}
+            Doc#doc{body = {summary, SummaryChunk, SizeInfo, AttsFd}}
         end,
         Bucket) || Bucket <- BucketList].
 

http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/e1422e23/src/couch_db_updater.erl
----------------------------------------------------------------------
diff --git a/src/couch_db_updater.erl b/src/couch_db_updater.erl
index f304769..f9fb291 100644
--- a/src/couch_db_updater.erl
+++ b/src/couch_db_updater.erl
@@ -373,48 +373,79 @@ collect_updates(GroupedDocsAcc, ClientsAcc, MergeConflicts, FullCommit)
->
     end.
 
 rev_tree(DiskTree) ->
-    couch_key_tree:mapfold(fun
-        (_RevId, {IsDeleted, BodyPointer, UpdateSeq}, leaf, _Acc) ->
-            % pre 1.2 format, will be upgraded on compaction
-            {#leaf{deleted=?i2b(IsDeleted), ptr=BodyPointer, seq=UpdateSeq}, nil};
-        (_RevId, {IsDeleted, BodyPointer, UpdateSeq}, branch, Acc) ->
-            {#leaf{deleted=?i2b(IsDeleted), ptr=BodyPointer, seq=UpdateSeq}, Acc};
-        (_RevId, {IsDeleted, BodyPointer, UpdateSeq, Sizes0}, leaf, Acc) ->
-            Sizes = upgrade_sizes(Sizes0),
-            Acc2 = reduce_sizes(Acc, Sizes),
-            {#leaf{deleted=?i2b(IsDeleted), ptr=BodyPointer, seq=UpdateSeq,
-                   sizes=Sizes}, Acc2};
-        (_RevId, {IsDeleted, BodyPointer, UpdateSeq, Sizes}, branch, Acc) ->
-            {#leaf{deleted=?i2b(IsDeleted), ptr=BodyPointer, seq=UpdateSeq,
-                   sizes=upgrade_sizes(Sizes)}, Acc};
-        (_RevId, ?REV_MISSING, _Type, Acc) ->
-            {?REV_MISSING, Acc}
-    end, {0, 0}, DiskTree).
+    couch_key_tree:map(fun
+        (_RevId, {Del, Ptr, Seq}) ->
+            #leaf{
+                deleted = ?i2b(Del),
+                ptr = Ptr,
+                seq = Seq
+            };
+        (_RevId, {Del, Ptr, Seq, Size}) ->
+            #leaf{
+                deleted = ?i2b(Del),
+                ptr = Ptr,
+                seq = Seq,
+                sizes = upgrade_sizes(Size)
+            };
+        (_RevId, {Del, Ptr, Seq, Sizes, Atts}) ->
+            #leaf{
+                deleted = ?i2b(Del),
+                ptr = Ptr,
+                seq = Seq,
+                sizes = upgrade_sizes(Sizes),
+                atts = Atts
+            };
+        (_RevId, ?REV_MISSING) ->
+            ?REV_MISSING
+    end, DiskTree).
 
 disk_tree(RevTree) ->
     couch_key_tree:map(fun
         (_RevId, ?REV_MISSING) ->
             ?REV_MISSING;
-        (_RevId, #leaf{deleted=IsDeleted, ptr=BodyPointer, seq=UpdateSeq, sizes=Sizes}) ->
-            {?b2i(IsDeleted), BodyPointer, UpdateSeq, upgrade_sizes(Sizes)}
+        (_RevId, #leaf{} = Leaf) ->
+            #leaf{
+                deleted = Del,
+                ptr = Ptr,
+                seq = Seq,
+                sizes = Sizes,
+                atts = Atts
+            } = Leaf,
+            {?b2i(Del), Ptr, Seq, split_sizes(Sizes), Atts}
     end, RevTree).
 
-upgrade_sizes({_, _}=Sizes) ->
-    Sizes;
+upgrade_sizes(#size_info{}=SI) ->
+    SI;
+upgrade_sizes({D, E}) ->
+    #size_info{active=D, external=E};
 upgrade_sizes(S) when is_integer(S) ->
-    {0, S}.
+    #size_info{active=S, external=0}.
+
+split_sizes(#size_info{}=SI) ->
+    {SI#size_info.active, SI#size_info.external}.
 
-btree_by_seq_split(#full_doc_info{id=Id, update_seq=Seq, deleted=Del, rev_tree=T}) ->
-    {Seq, {Id, ?b2i(Del), disk_tree(T)}}.
+join_sizes({Active, External}) when is_integer(Active), is_integer(External) ->
+    #size_info{active=Active, external=External}.
+
+btree_by_seq_split(#full_doc_info{}=Info) ->
+    #full_doc_info{
+        id = Id,
+        update_seq = Seq,
+        deleted = Del,
+        sizes = SizeInfo,
+        rev_tree = Tree
+    } = Info,
+    {Seq, {Id, ?b2i(Del), split_sizes(SizeInfo), disk_tree(Tree)}}.
 
 btree_by_seq_join(Seq, {Id, Del, DiskTree}) when is_integer(Del) ->
-    {RevTree, Sizes} = rev_tree(DiskTree),
+    btree_by_seq_join(Seq, {Id, Del, #size_info{}, DiskTree});
+btree_by_seq_join(Seq, {Id, Del, Sizes, DiskTree}) when is_integer(Del) ->
     #full_doc_info{
         id = Id,
         update_seq = Seq,
         deleted = ?i2b(Del),
-        rev_tree = RevTree,
-        sizes = upgrade_sizes(Sizes)
+        sizes = join_sizes(Sizes),
+        rev_tree = rev_tree(DiskTree)
     };
 btree_by_seq_join(KeySeq, {Id, RevInfos, DeletedRevInfos}) ->
     % Older versions stored #doc_info records in the seq_tree.
@@ -428,18 +459,27 @@ btree_by_seq_join(KeySeq, {Id, RevInfos, DeletedRevInfos}) ->
             [#rev_info{rev=Rev,seq=Seq,deleted=true,body_sp = Bp} ||
                 {Rev, Seq, Bp} <- DeletedRevInfos]}.
 
-btree_by_id_split(#full_doc_info{id=Id, update_seq=Seq,
-        deleted=Deleted, rev_tree=Tree}) ->
-    {Id, {Seq, ?b2i(Deleted), disk_tree(Tree)}}.
+btree_by_id_split(#full_doc_info{}=Info) ->
+    #full_doc_info{
+        id = Id,
+        update_seq = Seq,
+        deleted = Deleted,
+        sizes = SizeInfo,
+        rev_tree = Tree
+    } = Info,
+    {Id, {Seq, ?b2i(Deleted), split_sizes(SizeInfo), disk_tree(Tree)}}.
 
+% Handle old formats before data_size was added
 btree_by_id_join(Id, {HighSeq, Deleted, DiskTree}) ->
-    {Tree, Sizes} = rev_tree(DiskTree),
+    btree_by_id_join(Id, {HighSeq, Deleted, #size_info{}, DiskTree});
+
+btree_by_id_join(Id, {HighSeq, Deleted, Sizes, DiskTree}) ->
     #full_doc_info{
         id = Id,
         update_seq = HighSeq,
         deleted = ?i2b(Deleted),
-        rev_tree = Tree,
-        sizes = upgrade_sizes(Sizes)
+        sizes = upgrade_sizes(Sizes),
+        rev_tree = rev_tree(DiskTree)
     }.
 
 btree_by_id_reduce(reduce, FullDocInfos) ->
@@ -453,27 +493,29 @@ btree_by_id_reduce(reduce, FullDocInfos) ->
                 {NotDeleted + 1, Deleted, Sizes2}
             end
         end,
-        {0, 0, {0, 0}}, FullDocInfos);
+        {0, 0, #size_info{}}, FullDocInfos);
 btree_by_id_reduce(rereduce, Reds) ->
     lists:foldl(
-        fun({NotDeleted, Deleted}, {AccNotDeleted, AccDeleted, _AccSize}) ->
+        fun({NotDeleted, Deleted}, {AccNotDeleted, AccDeleted, _AccSizes}) ->
             % pre 1.2 format, will be upgraded on compaction
             {AccNotDeleted + NotDeleted, AccDeleted + Deleted, nil};
         ({NotDeleted, Deleted, Sizes}, {AccNotDeleted, AccDeleted, AccSizes}) ->
             AccSizes2 = reduce_sizes(AccSizes, Sizes),
             {AccNotDeleted + NotDeleted, AccDeleted + Deleted, AccSizes2}
         end,
-        {0, 0, {0, 0}}, Reds).
+        {0, 0, #size_info{}}, Reds).
 
 reduce_sizes(nil, _) ->
     nil;
 reduce_sizes(_, nil) ->
     nil;
-reduce_sizes({A1, E1}, {A2, E2}) ->
-    {A1 + A2, E1 + E2};
-reduce_sizes(S, {_, _} = Acc) when is_integer(Acc) ->
-    reduce_sizes({0, S}, Acc).
-
+reduce_sizes(S1, S2) when is_integer(S1); is_integer(S2) ->
+    reduce_sizes(upgrade_sizes(S1), upgrade_sizes(S2));
+reduce_sizes(#size_info{}=S1, #size_info{}=S2) ->
+    #size_info{
+        active = S1#size_info.active + S2#size_info.active,
+        external = S1#size_info.external + S2#size_info.external
+    }.
 
 btree_by_seq_reduce(reduce, DocInfos) ->
     % count the number of documents
@@ -585,10 +627,11 @@ flush_trees(_Db, [], AccFlushedTrees) ->
 flush_trees(#db{fd = Fd} = Db,
         [InfoUnflushed | RestUnflushed], AccFlushed) ->
     #full_doc_info{update_seq=UpdateSeq, rev_tree=Unflushed} = InfoUnflushed,
-    {Flushed, Sizes} = couch_key_tree:mapfold(
-        fun(_Rev, Value, Type, Acc) ->
+    {Flushed, FinalAcc} = couch_key_tree:mapfold(
+        fun(_Rev, Value, Type, SizesAcc) ->
             case Value of
-            #doc{deleted = IsDeleted, body = {summary, Summary, AttsFd}} ->
+            #doc{deleted = IsDeleted, body = {summary, _, _, _} = DocSummary} ->
+                {summary, Summary, AttSizeInfo, AttsFd} = DocSummary,
                 % this node value is actually an unwritten document summary,
                 % write to disk.
                 % make sure the Fd in the written bins is the same Fd we are
@@ -610,33 +653,45 @@ flush_trees(#db{fd = Fd} = Db,
                 ExternalSize = ?term_size(Summary),
                 {ok, NewSummaryPointer, SummarySize} =
                     couch_file:append_raw_chunk(Fd, Summary),
-                AttsSize = lists:foldl(
-                    fun(Att, A) -> A + couch_att:fetch(att_len, Att) end,
-                    0, Value#doc.atts),
-                NewValue = #leaf{deleted=IsDeleted,
-                                 ptr=NewSummaryPointer,
-                                 seq=UpdateSeq,
-                                 sizes={SummarySize + AttsSize,
-                                        ExternalSize + AttsSize}},
-                case Type of
-                leaf ->
-                    {NewValue, reduce_sizes(Acc, {SummarySize + AttsSize,
-                                                  ExternalSize + AttsSize})};
-                branch ->
-                    {NewValue, Acc}
-                end;
-            {_, _, _, Sizes1} when Type =:= leaf, Sizes1 =/= nil ->
-                {Value, reduce_sizes(Acc, Sizes1)};
+                Leaf = #leaf{
+                    deleted = IsDeleted,
+                    ptr = NewSummaryPointer,
+                    seq = UpdateSeq,
+                    sizes = #size_info{
+                        active = SummarySize,
+                        external = ExternalSize
+                    },
+                    atts = AttSizeInfo
+                },
+                {Leaf, add_sizes(Type, Leaf, SizesAcc)};
+            #leaf{} ->
+                {Value, add_sizes(Type, Value, SizesAcc)};
             _ ->
-                {Value, Acc}
+                {Value, SizesAcc}
             end
-        end, {0, 0}, Unflushed),
-    InfoFlushed = InfoUnflushed#full_doc_info{
+        end, {0, 0, []}, Unflushed),
+    {FinalAS, FinalES, FinalAtts} = FinalAcc,
+    TotalAttSize = lists:foldl(fun({_, S}, A) -> S + A end, 0, FinalAtts),
+    NewInfo = InfoUnflushed#full_doc_info{
         rev_tree = Flushed,
-        sizes = Sizes
+        sizes = #size_info{
+            active = FinalAS + TotalAttSize,
+            external = FinalES + TotalAttSize
+        }
     },
-    flush_trees(Db, RestUnflushed, [InfoFlushed | AccFlushed]).
-
+    flush_trees(Db, RestUnflushed, [NewInfo | AccFlushed]).
+
+add_sizes(Type, #leaf{sizes=Sizes, atts=AttSizes}, Acc) ->
+    % Maybe upgrade from disk_size only
+    #size_info{
+        active = ActiveSize,
+        external = ExternalSize
+    } = upgrade_sizes(Sizes),
+    {ASAcc, ESAcc, AttsAcc} = Acc,
+    NewASAcc = ActiveSize + ASAcc,
+    NewESAcc = ESAcc + if Type == leaf -> ExternalSize; true -> 0 end,
+    NewAttsAcc = lists:umerge(AttSizes, AttsAcc),
+    {NewASAcc, NewESAcc, NewAttsAcc}.
 
 send_result(Client, Doc, NewResult) ->
     % used to send a result to the client
@@ -982,25 +1037,39 @@ copy_docs(Db, #db{fd = DestFd} = NewDb, MixedInfos, Retry) ->
         A =< B
     end, merge_lookups(MixedInfos, LookupResults)),
 
-    NewInfos1 = lists:map(
-        fun(#full_doc_info{rev_tree=RevTree}=Info) ->
-            Info#full_doc_info{rev_tree=couch_key_tree:map(
-                fun(_, _, branch) ->
-                    ?REV_MISSING;
-                (_Rev, #leaf{ptr=Sp}=Leaf, leaf) ->
-                    {_Body, AttsInfo} = Summary = copy_doc_attachments(
-                        Db, Sp, DestFd),
-                    SummaryChunk = make_doc_summary(NewDb, Summary),
-                    ExternalSize = ?term_size(SummaryChunk),
-                    {ok, Pos, SummarySize} = couch_file:append_raw_chunk(
-                        DestFd, SummaryChunk),
-                    AttsSize = lists:foldl(
-                        fun({_, _, _, AttLen, _, _, _, _}, S) -> S + AttLen end,
-                        0, AttsInfo),
-                    Leaf#leaf{ptr=Pos, sizes={SummarySize + AttsSize,
-                                              ExternalSize + AttsSize}}
-                end, RevTree)}
-        end, NewInfos0),
+    NewInfos1 = lists:map(fun(Info) ->
+        {NewRevTree, FinalAcc} = couch_key_tree:mapfold(fun
+            (_Rev, #leaf{ptr=Sp}=Leaf, leaf, SizesAcc) ->
+                {Body, AttInfos} = copy_doc_attachments(Db, Sp, DestFd),
+                SummaryChunk = make_doc_summary(NewDb, {Body, AttInfos}),
+                ExternalSize = ?term_size(SummaryChunk),
+                {ok, Pos, SummarySize} = couch_file:append_raw_cunk(
+                    DestFd, SummaryChunk),
+                AttSizes = [{element(3,A), element(4,A)} || A <- AttInfos],
+                NewLeaf = Leaf#leaf{
+                    ptr = Pos,
+                    sizes = #size_info{
+                        active = SummarySize,
+                        external = ExternalSize
+                    },
+                    atts = AttSizes
+                },
+                {NewLeaf, add_sizes(leaf, NewLeaf, SizesAcc)};
+            (_Rev, _Leaf, branch, SizesAcc) ->
+                {?REV_MISSING, SizesAcc}
+        end, {0, 0, []}, Info#full_doc_info.rev_tree),
+        {FinalAS, FinalES, FinalAtts} = FinalAcc,
+        TotalAttSize = lists:foldl(fun({_, S}, A) -> S + A end, 0, FinalAtts),
+        NewActiveSize = FinalAS + TotalAttSize,
+        NewExternalSize = FinalES + TotalAttSize,
+        Info#full_doc_info{
+            rev_tree = NewRevTree,
+            sizes = #size_info{
+                active = NewActiveSize,
+                external = NewExternalSize
+            }
+        }
+    end, NewInfos0),
 
     NewInfos = stem_full_doc_infos(Db, NewInfos1),
     RemoveSeqs =


Mime
View raw message