Return-Path: X-Original-To: apmail-couchdb-commits-archive@www.apache.org Delivered-To: apmail-couchdb-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 8F152106D3 for ; Tue, 4 Feb 2014 23:08:10 +0000 (UTC) Received: (qmail 65251 invoked by uid 500); 4 Feb 2014 23:07:24 -0000 Delivered-To: apmail-couchdb-commits-archive@couchdb.apache.org Received: (qmail 63785 invoked by uid 500); 4 Feb 2014 23:06:32 -0000 Mailing-List: contact commits-help@couchdb.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@couchdb.apache.org Delivered-To: mailing list commits@couchdb.apache.org Received: (qmail 63088 invoked by uid 99); 4 Feb 2014 23:06:16 -0000 Received: from tyr.zones.apache.org (HELO tyr.zones.apache.org) (140.211.11.114) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 04 Feb 2014 23:06:16 +0000 Received: by tyr.zones.apache.org (Postfix, from userid 65534) id 40176824EED; Tue, 4 Feb 2014 23:06:15 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: davisp@apache.org To: commits@couchdb.apache.org Date: Tue, 04 Feb 2014 23:06:40 -0000 Message-Id: In-Reply-To: <1094a8bf4296470e9b76821c8c074ad1@git.apache.org> References: <1094a8bf4296470e9b76821c8c074ad1@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [27/50] couch commit: updated refs/heads/import to c3116d7 Introduce a #leaf{} record in revision trees This is substantially based on work by Bob Dionne (a452a4a) to introduce the data size calculations at Cloudant. There's quite a bit of conflict in code and actual behavior between this work and what Filipe wrote for CouchDB. This new record should ease the transition of merging both behaviors. An important thing to note is that this record is only ever in RAM and not written to disk so we don't have to worry about record upgrades though we will have to maintain upgrade info that Filipe and Bob both introduced (which is fairly straightforward). Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch/repo Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch/commit/15b84c05 Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch/tree/15b84c05 Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch/diff/15b84c05 Branch: refs/heads/import Commit: 15b84c051eca732be0644e6af7a3a134bbf329b5 Parents: 22fdbe2 Author: Robert Newson Authored: Sun Mar 10 16:12:28 2013 -0500 Committer: Paul J. Davis Committed: Tue Feb 4 17:03:24 2014 -0600 ---------------------------------------------------------------------- include/couch_db.hrl | 9 +++++++++ src/couch_db.erl | 22 +++++++--------------- src/couch_db_updater.erl | 40 +++++++++++++++++----------------------- src/couch_doc.erl | 13 ++++++++----- src/couch_util.erl | 11 +++++++++++ 5 files changed, 52 insertions(+), 43 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/15b84c05/include/couch_db.hrl ---------------------------------------------------------------------- diff --git a/include/couch_db.hrl b/include/couch_db.hrl index 77006e4..61a59f7 100644 --- a/include/couch_db.hrl +++ b/include/couch_db.hrl @@ -23,6 +23,8 @@ -define(b2l(V), binary_to_list(V)). -define(l2b(V), list_to_binary(V)). +-define(i2b(V), couch_util:integer_to_boolean(V)). +-define(b2i(V), couch_util:boolean_to_integer(V)). -define(term_to_bin(T), term_to_binary(T, [{minor_version, 1}])). -define(term_size(T), try @@ -271,3 +273,10 @@ stop_fun }). +-record(leaf, { + deleted, + ptr, + seq, + size = nil +}). + http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/15b84c05/src/couch_db.erl ---------------------------------------------------------------------- diff --git a/src/couch_db.erl b/src/couch_db.erl index 7734c7c..e4e8cca 100644 --- a/src/couch_db.erl +++ b/src/couch_db.erl @@ -580,7 +580,7 @@ prep_and_validate_update(Db, #doc{id=Id,revs={RevStart, Revs}}=Doc, case Revs of [PrevRev|_] -> case dict:find({RevStart, PrevRev}, LeafRevsDict) of - {ok, {Deleted, DiskSp, DiskRevs}} -> + {ok, {#leaf{deleted=Deleted, ptr=DiskSp}, DiskRevs}} -> case couch_doc:has_stubs(Doc) of true -> DiskDoc = make_doc(Db, Id, Deleted, DiskSp, DiskRevs), @@ -643,12 +643,8 @@ prep_and_validate_updates(Db, [DocBucket|RestBuckets], AllowConflict, AccPrepped, AccErrors) -> Leafs = couch_key_tree:get_all_leafs(OldRevTree), LeafRevsDict = dict:from_list([ - begin - Deleted = element(1, LeafVal), - Sp = element(2, LeafVal), - {{Start, RevId}, {Deleted, Sp, Revs}} - end || - {LeafVal, {Start, [RevId | _]} = Revs} <- Leafs + {{Start, RevId}, {Leaf, Revs}} || + {Leaf, {Start, [RevId | _]} = Revs} <- Leafs ]), {PreppedBucket, AccErrors3} = lists:foldl( fun({Doc, Ref}, {Docs2Acc, AccErrors2}) -> @@ -895,9 +891,7 @@ make_first_doc_on_disk(Db, Id, Pos, [{_Rev, #doc{}} | RestPath]) -> make_first_doc_on_disk(Db, Id, Pos-1, RestPath); make_first_doc_on_disk(Db, Id, Pos, [{_Rev, ?REV_MISSING}|RestPath]) -> make_first_doc_on_disk(Db, Id, Pos - 1, RestPath); -make_first_doc_on_disk(Db, Id, Pos, [{_Rev, RevValue} |_]=DocPath) -> - IsDel = element(1, RevValue), - Sp = element(2, RevValue), +make_first_doc_on_disk(Db, Id, Pos, [{_Rev, #leaf{deleted=IsDel, ptr=Sp}} |_]=DocPath) -> Revs = [Rev || {Rev, _} <- DocPath], make_doc(Db, Id, IsDel, Sp, {Pos, Revs}). @@ -1243,9 +1237,7 @@ open_doc_revs_int(Db, IdRevs, Options) -> ?REV_MISSING -> % we have the rev in our list but know nothing about it {{not_found, missing}, {Pos, Rev}}; - RevValue -> - IsDeleted = element(1, RevValue), - SummaryPtr = element(2, RevValue), + #leaf{deleted=IsDeleted, ptr=SummaryPtr} -> {ok, make_doc(Db, Id, IsDeleted, SummaryPtr, FoundRevPath)} end end, FoundRevs), @@ -1297,8 +1289,8 @@ doc_meta_info(#doc_info{high_seq=Seq,revs=[#rev_info{rev=Rev}|RestInfo]}, RevTre [{revs_info, Pos, lists:map( fun({Rev1, ?REV_MISSING}) -> {Rev1, missing}; - ({Rev1, RevValue}) -> - case element(1, RevValue) of + ({Rev1, Leaf}) -> + case Leaf#leaf.deleted of true -> {Rev1, deleted}; false -> http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/15b84c05/src/couch_db_updater.erl ---------------------------------------------------------------------- diff --git a/src/couch_db_updater.erl b/src/couch_db_updater.erl index c64911e..cc48ef8 100644 --- a/src/couch_db_updater.erl +++ b/src/couch_db_updater.erl @@ -158,10 +158,8 @@ handle_call({purge_docs, IdRevs}, _From, Db) -> {DocInfoToUpdate, NewSeq} = lists:mapfoldl( fun(#full_doc_info{rev_tree=Tree}=FullInfo, SeqAcc) -> Tree2 = couch_key_tree:map_leafs( - fun(_RevId, LeafVal) -> - IsDeleted = element(1, LeafVal), - BodyPointer = element(2, LeafVal), - {IsDeleted, BodyPointer, SeqAcc + 1} + fun(_RevId, Leaf) -> + Leaf#leaf{seq=SeqAcc+1} end, Tree), {FullInfo#full_doc_info{rev_tree=Tree2}, SeqAcc + 1} end, LastSeq, FullDocInfoToUpdate), @@ -340,37 +338,35 @@ rev_tree(DiskTree) -> couch_key_tree:mapfold(fun (_RevId, {IsDeleted, BodyPointer, UpdateSeq}, leaf, _Acc) -> % pre 1.2 format, will be upgraded on compaction - {{IsDeleted == 1, BodyPointer, UpdateSeq, nil}, nil}; + {#leaf{deleted=?i2b(IsDeleted), ptr=BodyPointer, seq=UpdateSeq}, nil}; (_RevId, {IsDeleted, BodyPointer, UpdateSeq}, branch, Acc) -> - {{IsDeleted == 1, BodyPointer, UpdateSeq, nil}, Acc}; + {#leaf{deleted=?i2b(IsDeleted), ptr=BodyPointer, seq=UpdateSeq}, Acc}; (_RevId, {IsDeleted, BodyPointer, UpdateSeq, Size}, leaf, Acc) -> Acc2 = sum_leaf_sizes(Acc, Size), - {{IsDeleted == 1, BodyPointer, UpdateSeq, Size}, Acc2}; + {#leaf{deleted=?i2b(IsDeleted), ptr=BodyPointer, seq=UpdateSeq, size=Size}, Acc2}; (_RevId, {IsDeleted, BodyPointer, UpdateSeq, Size}, branch, Acc) -> - {{IsDeleted == 1, BodyPointer, UpdateSeq, Size}, Acc}; + {#leaf{deleted=?i2b(IsDeleted), ptr=BodyPointer, seq=UpdateSeq, size=Size}, Acc}; (_RevId, ?REV_MISSING, _Type, Acc) -> {?REV_MISSING, Acc} - end, DiskTree). + end, 0, DiskTree). disk_tree(RevTree) -> couch_key_tree:map(fun (_RevId, ?REV_MISSING) -> ?REV_MISSING; - (_RevId, {IsDeleted, BodyPointer, UpdateSeq}) -> - {if IsDeleted -> 1; true -> 0 end, BodyPointer, UpdateSeq, nil}; - (_RevId, {IsDeleted, BodyPointer, UpdateSeq, Size}) -> - {if IsDeleted -> 1; true -> 0 end, BodyPointer, UpdateSeq, Size} + (_RevId, #leaf{deleted=IsDeleted, ptr=BodyPointer, seq=UpdateSeq, size=Size}) -> + {?b2i(IsDeleted), BodyPointer, UpdateSeq, Size} end, RevTree). btree_by_seq_split(#full_doc_info{id=Id, update_seq=Seq, deleted=Del, rev_tree=T}) -> - {Seq, {Id, if Del -> 1; true -> 0 end, disk_tree(T)}}. + {Seq, {Id, ?b2i(Del), disk_tree(T)}}. btree_by_seq_join(Seq, {Id, Del, DiskTree}) when is_integer(Del) -> {RevTree, LeafsSize} = rev_tree(DiskTree), #full_doc_info{ id = Id, update_seq = Seq, - deleted = (Del == 1), + deleted = ?i2b(Del), rev_tree = RevTree, leafs_size = LeafsSize }; @@ -388,14 +384,14 @@ btree_by_seq_join(KeySeq, {Id, RevInfos, DeletedRevInfos}) -> btree_by_id_split(#full_doc_info{id=Id, update_seq=Seq, deleted=Deleted, rev_tree=Tree}) -> - {Id, {Seq, if Deleted -> 1; true -> 0 end, disk_tree(Tree)}}. + {Id, {Seq, ?b2i(Deleted), disk_tree(Tree)}}. btree_by_id_join(Id, {HighSeq, Deleted, DiskTree}) -> {Tree, LeafsSize} = rev_tree(DiskTree), #full_doc_info{ id = Id, update_seq = HighSeq, - deleted = (Deleted == 1), + deleted = ?i2b(Deleted), rev_tree = Tree, leafs_size = LeafsSize }. @@ -573,7 +569,8 @@ flush_trees(#db{fd = Fd} = Db, TotalSize = lists:foldl( fun(#att{att_len = L}, A) -> A + L end, SummarySize, Value#doc.atts), - NewValue = {IsDeleted, NewSummaryPointer, UpdateSeq, TotalSize}, + NewValue = #leaf{deleted=IsDeleted, ptr=NewSummaryPointer, + seq=UpdateSeq, size=TotalSize}, case Type of leaf -> {NewValue, Acc + TotalSize}; @@ -899,10 +896,7 @@ copy_docs(Db, #db{fd = DestFd} = NewDb, MixedInfos, Retry) -> Info#full_doc_info{rev_tree=couch_key_tree:map( fun(_, _, branch) -> ?REV_MISSING; - (_Rev, LeafVal, leaf) -> - IsDel = element(1, LeafVal), - Sp = element(2, LeafVal), - Seq = element(3, LeafVal), + (_Rev, #leaf{ptr=Sp}=Leaf, leaf) -> {_Body, AttsInfo} = Summary = copy_doc_attachments( Db, Sp, DestFd), SummaryChunk = make_doc_summary(NewDb, Summary), @@ -911,7 +905,7 @@ copy_docs(Db, #db{fd = DestFd} = NewDb, MixedInfos, Retry) -> TotalLeafSize = lists:foldl( fun({_, _, _, AttLen, _, _, _, _}, S) -> S + AttLen end, SummarySize, AttsInfo), - {IsDel, Pos, Seq, TotalLeafSize} + Leaf#leaf{ptr=Pos, size=TotalLeafSize} end, RevTree)} end, NewInfos0), http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/15b84c05/src/couch_doc.erl ---------------------------------------------------------------------- diff --git a/src/couch_doc.erl b/src/couch_doc.erl index bcf79d2..6f2ca9b 100644 --- a/src/couch_doc.erl +++ b/src/couch_doc.erl @@ -330,7 +330,10 @@ max_seq(Tree, UpdateSeq) -> {_Deleted, _DiskPos, OldTreeSeq} -> % Older versions didn't track data sizes. erlang:max(MaxOldSeq, OldTreeSeq); - {_Deleted, _DiskPos, OldTreeSeq, _Size} -> + {_Deleted, _DiskPos, OldTreeSeq, _Size} -> % necessary clause? + % Older versions didn't store #leaf records. + erlang:max(MaxOldSeq, OldTreeSeq); + #leaf{seq=OldTreeSeq} -> erlang:max(MaxOldSeq, OldTreeSeq); _ -> MaxOldSeq @@ -341,11 +344,11 @@ max_seq(Tree, UpdateSeq) -> to_doc_info_path(#full_doc_info{id=Id,rev_tree=Tree,update_seq=FDISeq}) -> RevInfosAndPath = [ {#rev_info{ - deleted = element(1, LeafVal), - body_sp = element(2, LeafVal), - seq = element(3, LeafVal), + deleted = Leaf#leaf.deleted, + body_sp = Leaf#leaf.ptr, + seq = Leaf#leaf.seq, rev = {Pos, RevId} - }, Path} || {LeafVal, {Pos, [RevId | _]} = Path} <- + }, Path} || {Leaf, {Pos, [RevId | _]} = Path} <- couch_key_tree:get_all_leafs(Tree) ], SortedRevInfosAndPath = lists:sort( http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/15b84c05/src/couch_util.erl ---------------------------------------------------------------------- diff --git a/src/couch_util.erl b/src/couch_util.erl index 3556d36..d09211a 100644 --- a/src/couch_util.erl +++ b/src/couch_util.erl @@ -29,6 +29,7 @@ -export([encode_doc_id/1]). -export([with_db/2]). -export([rfc1123_date/0, rfc1123_date/1]). +-export([integer_to_boolean/1, boolean_to_integer/1]). -include_lib("couch/include/couch_db.hrl"). @@ -487,3 +488,13 @@ month(9) -> "Sep"; month(10) -> "Oct"; month(11) -> "Nov"; month(12) -> "Dec". + +integer_to_boolean(1) -> + true; +integer_to_boolean(0) -> + false. + +boolean_to_integer(true) -> + 1; +boolean_to_integer(false) -> + 0.