couchdb-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Chris Anderson <jch...@apache.org>
Subject Re: Patch to couch_btree:chunkify
Date Mon, 11 May 2009 20:52:15 GMT
On Mon, May 11, 2009 at 12:57 PM, Adam Kocoloski
<adam.kocoloski@gmail.com> wrote:
> I'd like to see some more concrete numbers on the performance difference the
> two versions.  I wasn't able to reproduce Chris' 10%+ speedup using
> hovercraft:lightning; in fact, the two versions seem to be compatible within
> measurement variance.
>

All I did was run hovercraft:lightning() with and without the patch.
Without the patch I was seeing 6.9k doc/sec, with the patch I think it
was 7.7k. The fastest I got was with the patch *and* with compression
turned off in couch_btree:append_term, when I saw 10.1k docs/sec.

I'm running on an old white MacBook, and I seem to be IO bound (with
compression turned off). If I turn compression back on, I become CPU
bound and end up writing to the disk at a slower rate. These effects
would be wildly different on other hardware, I'm guessing.

> I tried messing around with fprof for a while today, and if anything it
> indicates that the original version might actually be faster (though I find
> that hard to believe).  Anyway, I think we should get in the habit of having
> some quantitative, reproducible way of evaluating performance-related
> patches.
>
> +1 for Bob's suggestion of stripping out Bt from the arguments, though.
>
> Adam
>
> On May 11, 2009, at 3:28 PM, Damien Katz wrote:
>
>> +1 for committing.
>>
>> -Damien
>>
>>
>> On May 10, 2009, at 9:49 PM, Paul Davis wrote:
>>
>>> Chris reminded me that I had an optimization patch laying around for
>>> couch_btree:chunkify and his tests show that it gets a bit of a speed
>>> increase when running some tests with hovercraft. The basic outline of
>>> what I did was to swap a call like term_to_binary([ListOfTuples]) to a
>>> sequence of ListOfSizes = lists:map(term_to_binary, ListOfTuples),
>>> Size = sum(ListOfSizes), and then when we go through the list of
>>> tuples to split them into chunks I use the pre calculated sizes.
>>>
>>> Anyway, I just wanted to run it across the list before I commit it in
>>> case anyone sees anything subtle I might be missing.
>>>
>>> chunkify(_Bt, []) ->
>>>  [];
>>> chunkify(Bt, InList) ->
>>>  ToSize = fun(X) -> size(term_to_binary(X)) end,
>>>  SizeList = lists:map(ToSize, InList),
>>>  TotalSize = lists:sum(SizeList),
>>>  case TotalSize of
>>>  Size when Size > ?CHUNK_THRESHOLD ->
>>>      NumberOfChunksLikely = ((Size div ?CHUNK_THRESHOLD) + 1),
>>>      ChunkThreshold = Size div NumberOfChunksLikely,
>>>      chunkify(Bt, InList, SizeList, ChunkThreshold, [], 0, []);
>>>  _Else ->
>>>      [InList]
>>>  end.
>>>
>>> chunkify(_Bt, [], [], _Threshold, [], 0, Chunks) ->
>>>  lists:reverse(Chunks);
>>> chunkify(_Bt, [], [], _Threshold, OutAcc, _OutAccSize, Chunks) ->
>>>  lists:reverse([lists:reverse(OutAcc) | Chunks]);
>>> chunkify(Bt, [InElement | RestInList], [InSize | RestSizes], Threshold,
>>> OutAcc,
>>>      OutAccSize, Chunks) ->
>>>  case InSize of
>>>  InSize when (InSize + OutAccSize) > Threshold andalso OutAcc /= [] ->
>>>      chunkify(Bt, RestInList, RestSizes, Threshold, [], 0,
>>>          [lists:reverse([InElement | OutAcc]) | Chunks]);
>>>  InSize ->
>>>      chunkify(Bt, RestInList, RestSizes, Threshold, [InElement | OutAcc],
>>>          OutAccSize + InSize, Chunks)
>>>  end.
>>
>
>



-- 
Chris Anderson
http://jchrisa.net
http://couch.io

Mime
View raw message