incubator-couchdb-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Adam Kocoloski <kocol...@apache.org>
Subject Re: svn commit: r1129597 - in /couchdb/trunk/src/couchdb: couch_btree.erl couch_db.hrl couch_db_updater.erl couch_view_compactor.erl couch_work_queue.erl
Date Thu, 02 Jun 2011 02:25:11 GMT
Nice.  So many useful things one can find digging around the undocumented internals of Erlang/OTP
...

On May 31, 2011, at 6:04 AM, fdmanana@apache.org wrote:

> Author: fdmanana
> Date: Tue May 31 10:04:20 2011
> New Revision: 1129597
> 
> URL: http://svn.apache.org/viewvc?rev=1129597&view=rev
> Log:
> More efficient term size calculation
> 
> Unlike byte_size(term_to_binary(Term)), the BIF erlang:external_size/1 doesn't
> do the serialization step, it only calculates the maximum external size for
> any term, which is more efficient (faster and avoids the garbage generation).
> 
> With the test couch_http_bulk_writes.sh at [1], using 20 writers and batches
> of 100 1Kb documents, it's possible to write about 1 400 000 documents with
> this patch instead of about 1 300 000.
> 
> [1] https://github.com/fdmanana/basho_bench_couch
> 
> 
> 
> Modified:
>    couchdb/trunk/src/couchdb/couch_btree.erl
>    couchdb/trunk/src/couchdb/couch_db.hrl
>    couchdb/trunk/src/couchdb/couch_db_updater.erl
>    couchdb/trunk/src/couchdb/couch_view_compactor.erl
>    couchdb/trunk/src/couchdb/couch_work_queue.erl
> 
> Modified: couchdb/trunk/src/couchdb/couch_btree.erl
> URL: http://svn.apache.org/viewvc/couchdb/trunk/src/couchdb/couch_btree.erl?rev=1129597&r1=1129596&r2=1129597&view=diff
> ==============================================================================
> --- couchdb/trunk/src/couchdb/couch_btree.erl (original)
> +++ couchdb/trunk/src/couchdb/couch_btree.erl Tue May 31 10:04:20 2011
> @@ -276,26 +276,26 @@ complete_root(Bt, KPs) ->
> % written. Plus with the "case byte_size(term_to_binary(InList)) of" code
> % it's probably really inefficient.
> 
> -chunkify(#btree{compression = Comp} = Bt, InList) ->
> -    case byte_size(couch_compress:compress(InList, Comp)) of
> +chunkify(InList) ->
> +    case ?term_size(InList) of
>     Size when Size > ?CHUNK_THRESHOLD ->
>         NumberOfChunksLikely = ((Size div ?CHUNK_THRESHOLD) + 1),
>         ChunkThreshold = Size div NumberOfChunksLikely,
> -        chunkify(Bt, InList, ChunkThreshold, [], 0, []);
> +        chunkify(InList, ChunkThreshold, [], 0, []);
>     _Else ->
>         [InList]
>     end.
> 
> -chunkify(_Bt, [], _ChunkThreshold, [], 0, OutputChunks) ->
> +chunkify([], _ChunkThreshold, [], 0, OutputChunks) ->
>     lists:reverse(OutputChunks);
> -chunkify(_Bt, [], _ChunkThreshold, OutList, _OutListSize, OutputChunks) ->
> +chunkify([], _ChunkThreshold, OutList, _OutListSize, OutputChunks) ->
>     lists:reverse([lists:reverse(OutList) | OutputChunks]);
> -chunkify(Bt, [InElement | RestInList], ChunkThreshold, OutList, OutListSize, OutputChunks)
->
> -    case byte_size(couch_compress:compress(InElement, Bt#btree.compression)) of
> +chunkify([InElement | RestInList], ChunkThreshold, OutList, OutListSize, OutputChunks)
->
> +    case ?term_size(InElement) of
>     Size when (Size + OutListSize) > ChunkThreshold andalso OutList /= [] ->
> -        chunkify(Bt, RestInList, ChunkThreshold, [], 0, [lists:reverse([InElement |
OutList]) | OutputChunks]);
> +        chunkify(RestInList, ChunkThreshold, [], 0, [lists:reverse([InElement | OutList])
| OutputChunks]);
>     Size ->
> -        chunkify(Bt, RestInList, ChunkThreshold, [InElement | OutList], OutListSize
+ Size, OutputChunks)
> +        chunkify(RestInList, ChunkThreshold, [InElement | OutList], OutListSize + Size,
OutputChunks)
>     end.
> 
> modify_node(Bt, RootPointerInfo, Actions, QueryOutput) ->
> @@ -350,7 +350,7 @@ get_node(#btree{fd = Fd}, NodePos) ->
> 
> write_node(#btree{fd = Fd, compression = Comp} = Bt, NodeType, NodeList) ->
>     % split up nodes into smaller sizes
> -    NodeListList = chunkify(Bt, NodeList),
> +    NodeListList = chunkify(NodeList),
>     % now write out each chunk and return the KeyPointer pairs for those nodes
>     ResultList = [
>         begin
> 
> Modified: couchdb/trunk/src/couchdb/couch_db.hrl
> URL: http://svn.apache.org/viewvc/couchdb/trunk/src/couchdb/couch_db.hrl?rev=1129597&r1=1129596&r2=1129597&view=diff
> ==============================================================================
> --- couchdb/trunk/src/couchdb/couch_db.hrl (original)
> +++ couchdb/trunk/src/couchdb/couch_db.hrl Tue May 31 10:04:20 2011
> @@ -27,6 +27,12 @@
> -define(b2l(V), binary_to_list(V)).
> -define(l2b(V), list_to_binary(V)).
> -define(term_to_bin(T), term_to_binary(T, [{minor_version, 1}])).
> +-define(term_size(T),
> +    try
> +        erlang:external_size(T)
> +    catch _:_ ->
> +        byte_size(?term_to_bin(T))
> +    end).
> 
> -define(DEFAULT_ATTACHMENT_CONTENT_TYPE, <<"application/octet-stream">>).
> 
> 
> Modified: couchdb/trunk/src/couchdb/couch_db_updater.erl
> URL: http://svn.apache.org/viewvc/couchdb/trunk/src/couchdb/couch_db_updater.erl?rev=1129597&r1=1129596&r2=1129597&view=diff
> ==============================================================================
> --- couchdb/trunk/src/couchdb/couch_db_updater.erl (original)
> +++ couchdb/trunk/src/couchdb/couch_db_updater.erl Tue May 31 10:04:20 2011
> @@ -888,7 +888,7 @@ copy_compact(Db, NewDb0, Retry) ->
>     fun(#doc_info{high_seq=Seq}=DocInfo, _Offset,
>         {AccNewDb, AccUncopied, AccUncopiedSize, AccCopiedSize, TotalCopied}) ->
> 
> -        AccUncopiedSize2 = AccUncopiedSize + byte_size(?term_to_bin(DocInfo)),
> +        AccUncopiedSize2 = AccUncopiedSize + ?term_size(DocInfo),
>         if AccUncopiedSize2 >= BufferSize ->
>             NewDb2 = copy_docs(
>                 Db, AccNewDb, lists:reverse([DocInfo | AccUncopied]), Retry),
> 
> Modified: couchdb/trunk/src/couchdb/couch_view_compactor.erl
> URL: http://svn.apache.org/viewvc/couchdb/trunk/src/couchdb/couch_view_compactor.erl?rev=1129597&r1=1129596&r2=1129597&view=diff
> ==============================================================================
> --- couchdb/trunk/src/couchdb/couch_view_compactor.erl (original)
> +++ couchdb/trunk/src/couchdb/couch_view_compactor.erl Tue May 31 10:04:20 2011
> @@ -57,7 +57,7 @@ compact_group(Group, EmptyGroup) ->
>             Msg = "Duplicates of ~s detected in ~s ~s - rebuild required",
>             exit(io_lib:format(Msg, [DocId, DbName, GroupId]));
>         true -> ok end,
> -        AccSize2 = AccSize + byte_size(?term_to_bin(KV)),
> +        AccSize2 = AccSize + ?term_size(KV),
>         if AccSize2 >= BufferSize ->
>             {ok, Bt2} = couch_btree:add(Bt, lists:reverse([KV|Acc])),
>             couch_task_status:update("Copied ~p of ~p Ids (~p%)",
> @@ -90,7 +90,7 @@ compact_view(View, EmptyView, BufferSize
> 
>     %% Key is {Key,DocId}
>     Fun = fun(KV, {Bt, Acc, AccSize, TotalCopied}) ->
> -        AccSize2 = AccSize + byte_size(?term_to_bin(KV)),
> +        AccSize2 = AccSize + ?term_size(KV),
>         if AccSize2 >= BufferSize ->
>             {ok, Bt2} = couch_btree:add(Bt, lists:reverse([KV|Acc])),
>             couch_task_status:update("View #~p: copied ~p of ~p KVs (~p%)",
> 
> Modified: couchdb/trunk/src/couchdb/couch_work_queue.erl
> URL: http://svn.apache.org/viewvc/couchdb/trunk/src/couchdb/couch_work_queue.erl?rev=1129597&r1=1129596&r2=1129597&view=diff
> ==============================================================================
> --- couchdb/trunk/src/couchdb/couch_work_queue.erl (original)
> +++ couchdb/trunk/src/couchdb/couch_work_queue.erl Tue May 31 10:04:20 2011
> @@ -42,7 +42,7 @@ new(Options) ->
> queue(Wq, Item) when is_binary(Item) ->
>     gen_server:call(Wq, {queue, Item, byte_size(Item)}, infinity);
> queue(Wq, Item) ->
> -    gen_server:call(Wq, {queue, Item, byte_size(?term_to_bin(Item))}, infinity).
> +    gen_server:call(Wq, {queue, Item, ?term_size(Item)}, infinity).
> 
> 
> dequeue(Wq) ->
> 
> 


Mime
View raw message