Return-Path: X-Original-To: apmail-couchdb-commits-archive@www.apache.org Delivered-To: apmail-couchdb-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id C04104A03 for ; Tue, 31 May 2011 10:04:43 +0000 (UTC) Received: (qmail 65722 invoked by uid 500); 31 May 2011 10:04:43 -0000 Delivered-To: apmail-couchdb-commits-archive@couchdb.apache.org Received: (qmail 65668 invoked by uid 500); 31 May 2011 10:04:42 -0000 Mailing-List: contact commits-help@couchdb.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@couchdb.apache.org Delivered-To: mailing list commits@couchdb.apache.org Received: (qmail 65659 invoked by uid 99); 31 May 2011 10:04:42 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 31 May 2011 10:04:42 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 31 May 2011 10:04:41 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id 12D1923889F7; Tue, 31 May 2011 10:04:21 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1129597 - in /couchdb/trunk/src/couchdb: couch_btree.erl couch_db.hrl couch_db_updater.erl couch_view_compactor.erl couch_work_queue.erl Date: Tue, 31 May 2011 10:04:20 -0000 To: commits@couchdb.apache.org From: fdmanana@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20110531100421.12D1923889F7@eris.apache.org> Author: fdmanana Date: Tue May 31 10:04:20 2011 New Revision: 1129597 URL: http://svn.apache.org/viewvc?rev=1129597&view=rev Log: More efficient term size calculation Unlike byte_size(term_to_binary(Term)), the BIF erlang:external_size/1 doesn't do the serialization step, it only calculates the maximum external size for any term, which is more efficient (faster and avoids the garbage generation). With the test couch_http_bulk_writes.sh at [1], using 20 writers and batches of 100 1Kb documents, it's possible to write about 1 400 000 documents with this patch instead of about 1 300 000. [1] https://github.com/fdmanana/basho_bench_couch Modified: couchdb/trunk/src/couchdb/couch_btree.erl couchdb/trunk/src/couchdb/couch_db.hrl couchdb/trunk/src/couchdb/couch_db_updater.erl couchdb/trunk/src/couchdb/couch_view_compactor.erl couchdb/trunk/src/couchdb/couch_work_queue.erl Modified: couchdb/trunk/src/couchdb/couch_btree.erl URL: http://svn.apache.org/viewvc/couchdb/trunk/src/couchdb/couch_btree.erl?rev=1129597&r1=1129596&r2=1129597&view=diff ============================================================================== --- couchdb/trunk/src/couchdb/couch_btree.erl (original) +++ couchdb/trunk/src/couchdb/couch_btree.erl Tue May 31 10:04:20 2011 @@ -276,26 +276,26 @@ complete_root(Bt, KPs) -> % written. Plus with the "case byte_size(term_to_binary(InList)) of" code % it's probably really inefficient. -chunkify(#btree{compression = Comp} = Bt, InList) -> - case byte_size(couch_compress:compress(InList, Comp)) of +chunkify(InList) -> + case ?term_size(InList) of Size when Size > ?CHUNK_THRESHOLD -> NumberOfChunksLikely = ((Size div ?CHUNK_THRESHOLD) + 1), ChunkThreshold = Size div NumberOfChunksLikely, - chunkify(Bt, InList, ChunkThreshold, [], 0, []); + chunkify(InList, ChunkThreshold, [], 0, []); _Else -> [InList] end. -chunkify(_Bt, [], _ChunkThreshold, [], 0, OutputChunks) -> +chunkify([], _ChunkThreshold, [], 0, OutputChunks) -> lists:reverse(OutputChunks); -chunkify(_Bt, [], _ChunkThreshold, OutList, _OutListSize, OutputChunks) -> +chunkify([], _ChunkThreshold, OutList, _OutListSize, OutputChunks) -> lists:reverse([lists:reverse(OutList) | OutputChunks]); -chunkify(Bt, [InElement | RestInList], ChunkThreshold, OutList, OutListSize, OutputChunks) -> - case byte_size(couch_compress:compress(InElement, Bt#btree.compression)) of +chunkify([InElement | RestInList], ChunkThreshold, OutList, OutListSize, OutputChunks) -> + case ?term_size(InElement) of Size when (Size + OutListSize) > ChunkThreshold andalso OutList /= [] -> - chunkify(Bt, RestInList, ChunkThreshold, [], 0, [lists:reverse([InElement | OutList]) | OutputChunks]); + chunkify(RestInList, ChunkThreshold, [], 0, [lists:reverse([InElement | OutList]) | OutputChunks]); Size -> - chunkify(Bt, RestInList, ChunkThreshold, [InElement | OutList], OutListSize + Size, OutputChunks) + chunkify(RestInList, ChunkThreshold, [InElement | OutList], OutListSize + Size, OutputChunks) end. modify_node(Bt, RootPointerInfo, Actions, QueryOutput) -> @@ -350,7 +350,7 @@ get_node(#btree{fd = Fd}, NodePos) -> write_node(#btree{fd = Fd, compression = Comp} = Bt, NodeType, NodeList) -> % split up nodes into smaller sizes - NodeListList = chunkify(Bt, NodeList), + NodeListList = chunkify(NodeList), % now write out each chunk and return the KeyPointer pairs for those nodes ResultList = [ begin Modified: couchdb/trunk/src/couchdb/couch_db.hrl URL: http://svn.apache.org/viewvc/couchdb/trunk/src/couchdb/couch_db.hrl?rev=1129597&r1=1129596&r2=1129597&view=diff ============================================================================== --- couchdb/trunk/src/couchdb/couch_db.hrl (original) +++ couchdb/trunk/src/couchdb/couch_db.hrl Tue May 31 10:04:20 2011 @@ -27,6 +27,12 @@ -define(b2l(V), binary_to_list(V)). -define(l2b(V), list_to_binary(V)). -define(term_to_bin(T), term_to_binary(T, [{minor_version, 1}])). +-define(term_size(T), + try + erlang:external_size(T) + catch _:_ -> + byte_size(?term_to_bin(T)) + end). -define(DEFAULT_ATTACHMENT_CONTENT_TYPE, <<"application/octet-stream">>). Modified: couchdb/trunk/src/couchdb/couch_db_updater.erl URL: http://svn.apache.org/viewvc/couchdb/trunk/src/couchdb/couch_db_updater.erl?rev=1129597&r1=1129596&r2=1129597&view=diff ============================================================================== --- couchdb/trunk/src/couchdb/couch_db_updater.erl (original) +++ couchdb/trunk/src/couchdb/couch_db_updater.erl Tue May 31 10:04:20 2011 @@ -888,7 +888,7 @@ copy_compact(Db, NewDb0, Retry) -> fun(#doc_info{high_seq=Seq}=DocInfo, _Offset, {AccNewDb, AccUncopied, AccUncopiedSize, AccCopiedSize, TotalCopied}) -> - AccUncopiedSize2 = AccUncopiedSize + byte_size(?term_to_bin(DocInfo)), + AccUncopiedSize2 = AccUncopiedSize + ?term_size(DocInfo), if AccUncopiedSize2 >= BufferSize -> NewDb2 = copy_docs( Db, AccNewDb, lists:reverse([DocInfo | AccUncopied]), Retry), Modified: couchdb/trunk/src/couchdb/couch_view_compactor.erl URL: http://svn.apache.org/viewvc/couchdb/trunk/src/couchdb/couch_view_compactor.erl?rev=1129597&r1=1129596&r2=1129597&view=diff ============================================================================== --- couchdb/trunk/src/couchdb/couch_view_compactor.erl (original) +++ couchdb/trunk/src/couchdb/couch_view_compactor.erl Tue May 31 10:04:20 2011 @@ -57,7 +57,7 @@ compact_group(Group, EmptyGroup) -> Msg = "Duplicates of ~s detected in ~s ~s - rebuild required", exit(io_lib:format(Msg, [DocId, DbName, GroupId])); true -> ok end, - AccSize2 = AccSize + byte_size(?term_to_bin(KV)), + AccSize2 = AccSize + ?term_size(KV), if AccSize2 >= BufferSize -> {ok, Bt2} = couch_btree:add(Bt, lists:reverse([KV|Acc])), couch_task_status:update("Copied ~p of ~p Ids (~p%)", @@ -90,7 +90,7 @@ compact_view(View, EmptyView, BufferSize %% Key is {Key,DocId} Fun = fun(KV, {Bt, Acc, AccSize, TotalCopied}) -> - AccSize2 = AccSize + byte_size(?term_to_bin(KV)), + AccSize2 = AccSize + ?term_size(KV), if AccSize2 >= BufferSize -> {ok, Bt2} = couch_btree:add(Bt, lists:reverse([KV|Acc])), couch_task_status:update("View #~p: copied ~p of ~p KVs (~p%)", Modified: couchdb/trunk/src/couchdb/couch_work_queue.erl URL: http://svn.apache.org/viewvc/couchdb/trunk/src/couchdb/couch_work_queue.erl?rev=1129597&r1=1129596&r2=1129597&view=diff ============================================================================== --- couchdb/trunk/src/couchdb/couch_work_queue.erl (original) +++ couchdb/trunk/src/couchdb/couch_work_queue.erl Tue May 31 10:04:20 2011 @@ -42,7 +42,7 @@ new(Options) -> queue(Wq, Item) when is_binary(Item) -> gen_server:call(Wq, {queue, Item, byte_size(Item)}, infinity); queue(Wq, Item) -> - gen_server:call(Wq, {queue, Item, byte_size(?term_to_bin(Item))}, infinity). + gen_server:call(Wq, {queue, Item, ?term_size(Item)}, infinity). dequeue(Wq) ->