couchdb-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From fdman...@apache.org
Subject git commit: Make attachment stream buffer size configurable
Date Sun, 30 Oct 2011 14:20:31 GMT
Updated Branches:
  refs/heads/master 6621c5317 -> f4bdd95ed


Make attachment stream buffer size configurable

Instead of being hardcoded to 4Kb, it is now configurable.
A quick test using couch_doc:att_foldl/3 against a 128Kb
attachment written using a buffer of 4Kb and 32Kb:

Buffer size of 4Kb:

4> Att = hd(Doc#doc.atts).
     type = <<"application/x-www-form-urlencoded">>,
     att_len = 131072,disk_len = 131072,
     md5 = <<76,223,24,223,251,252,182,43,98,104,11,89,222,122,
             185,12>>,
     revpos = 1,
     data = {<0.159.0>,
             [{79,8192},
              {8277,8192},
              {16475,8192},
              {24673,8192},
              {32871,8192},
              {41069,8192},
              {49267,8192},
              {57465,8192},
              {65663,8192},
              {73861,8192},
              {82059,8192},
              {90257,8192},
              {98455,8192},
              {106653,8192},
              {114851,8192},
              {123049,8192}]},
     encoding = identity}
5> element(1, timer:tc(couch_doc, att_foldl, [Att, fun(B, A) -> [B | A] end, []])).
1353

Buffer size of of 32Kb:

4> Att = hd(Doc#doc.atts).
     type = <<"application/x-www-form-urlencoded">>,
     att_len = 131072,disk_len = 131072,
     md5 = <<76,223,24,223,251,252,182,43,98,104,11,89,222,122,
             185,12>>,
     revpos = 1,
     data = {<0.157.0>,
             [{79,40960},{41053,40960},{82027,40960},{123001,8192}]},
     encoding = identity}
5> element(1, timer:tc(couch_doc, att_foldl, [Att, fun(B, A) -> [B | A] end, []])).
782

Closes COUCHDB-1009.


Project: http://git-wip-us.apache.org/repos/asf/couchdb/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb/commit/f4bdd95e
Tree: http://git-wip-us.apache.org/repos/asf/couchdb/tree/f4bdd95e
Diff: http://git-wip-us.apache.org/repos/asf/couchdb/diff/f4bdd95e

Branch: refs/heads/master
Commit: f4bdd95edcab4f8e49575aa3d7f040c134644947
Parents: 6621c53
Author: Filipe David Manana <fdmanana@apache.org>
Authored: Sun Oct 30 14:13:14 2011 +0000
Committer: Filipe David Manana <fdmanana@apache.org>
Committed: Sun Oct 30 14:19:20 2011 +0000

----------------------------------------------------------------------
 etc/couchdb/default.ini.tpl.in |    4 ++++
 src/couchdb/couch_db.erl       |   11 ++++++++---
 src/couchdb/couch_stream.erl   |   21 +++++++++++++--------
 test/etap/050-stream.t         |    2 +-
 4 files changed, 26 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb/blob/f4bdd95e/etc/couchdb/default.ini.tpl.in
----------------------------------------------------------------------
diff --git a/etc/couchdb/default.ini.tpl.in b/etc/couchdb/default.ini.tpl.in
index 7fd3cb4..021732e 100644
--- a/etc/couchdb/default.ini.tpl.in
+++ b/etc/couchdb/default.ini.tpl.in
@@ -22,6 +22,10 @@ uri_file = %localstaterundir%/couch.uri
 ; deflate_[N]  - use zlib's deflate, N is the compression level which ranges from 1 (fastest,
 ;                lowest compression ratio) to 9 (slowest, highest compression ratio)
 file_compression = snappy
+; Higher values may give better read performance due to less read operations
+; and/or more OS page cache hits, but they can also increase overall response
+; time for writes when there are many attachment write requests in parallel.
+attachment_stream_buffer_size = 4096
 
 [database_compaction]
 ; larger buffer sizes can originate smaller files

http://git-wip-us.apache.org/repos/asf/couchdb/blob/f4bdd95e/src/couchdb/couch_db.erl
----------------------------------------------------------------------
diff --git a/src/couchdb/couch_db.erl b/src/couchdb/couch_db.erl
index acedd53..325689f 100644
--- a/src/couchdb/couch_db.erl
+++ b/src/couchdb/couch_db.erl
@@ -900,10 +900,12 @@ flush_att(Fd, #att{data=Data}=Att) when is_binary(Data) ->
     end);
 
 flush_att(Fd, #att{data=Fun,att_len=undefined}=Att) when is_function(Fun) ->
+    MaxChunkSize = list_to_integer(
+        couch_config:get("couchdb", "attachment_stream_buffer_size", "4096")),
     with_stream(Fd, Att, fun(OutputStream) ->
         % Fun(MaxChunkSize, WriterFun) must call WriterFun
         % once for each chunk of the attachment,
-        Fun(4096,
+        Fun(MaxChunkSize,
             % WriterFun({Length, Binary}, State)
             % WriterFun({0, _Footers}, State)
             % Called with Length == 0 on the last time.
@@ -955,15 +957,18 @@ compressible_att_type(MimeType) ->
 % trailer, we're free to ignore this inconsistency and
 % pretend that no Content-MD5 exists.
 with_stream(Fd, #att{md5=InMd5,type=Type,encoding=Enc}=Att, Fun) ->
+    BufferSize = list_to_integer(
+        couch_config:get("couchdb", "attachment_stream_buffer_size", "4096")),
     {ok, OutputStream} = case (Enc =:= identity) andalso
         compressible_att_type(Type) of
     true ->
         CompLevel = list_to_integer(
             couch_config:get("attachments", "compression_level", "0")
         ),
-        couch_stream:open(Fd, gzip, [{compression_level, CompLevel}]);
+        couch_stream:open(Fd, [{buffer_size, BufferSize},
+            {encoding, gzip}, {compression_level, CompLevel}]);
     _ ->
-        couch_stream:open(Fd)
+        couch_stream:open(Fd, [{buffer_size, BufferSize}])
     end,
     ReqMd5 = case Fun(OutputStream) of
         {md5, FooterMd5} ->

http://git-wip-us.apache.org/repos/asf/couchdb/blob/f4bdd95e/src/couchdb/couch_stream.erl
----------------------------------------------------------------------
diff --git a/src/couchdb/couch_stream.erl b/src/couchdb/couch_stream.erl
index 2781121..959feef 100644
--- a/src/couchdb/couch_stream.erl
+++ b/src/couchdb/couch_stream.erl
@@ -14,7 +14,7 @@
 -behaviour(gen_server).
 
 % public API
--export([open/1, open/3, close/1]).
+-export([open/1, open/2, close/1]).
 -export([foldl/4, foldl/5, foldl_decode/6, range_foldl/6]).
 -export([copy_to_new_stream/3, write/2]).
 
@@ -24,12 +24,14 @@
 
 -include("couch_db.hrl").
 
+-define(DEFAULT_BUFFER_SIZE, 4096).
+
 -record(stream,
     {fd = 0,
     written_pointers=[],
     buffer_list = [],
     buffer_len = 0,
-    max_buffer = 4096,
+    max_buffer,
     written_len = 0,
     md5,
     % md5 of the content without any transformation applied (e.g. compression)
@@ -44,10 +46,10 @@
 %%% Interface functions %%%
 
 open(Fd) ->
-    open(Fd, identity, []).
+    open(Fd, []).
 
-open(Fd, Encoding, Options) ->
-    gen_server:start_link(couch_stream, {Fd, Encoding, Options}, []).
+open(Fd, Options) ->
+    gen_server:start_link(couch_stream, {Fd, Options}, []).
 
 close(Pid) ->
     gen_server:call(Pid, close, infinity).
@@ -194,8 +196,9 @@ write(Pid, Bin) ->
     gen_server:call(Pid, {write, Bin}, infinity).
 
 
-init({Fd, Encoding, Options}) ->
-    {EncodingFun, EndEncodingFun} = case Encoding of
+init({Fd, Options}) ->
+    {EncodingFun, EndEncodingFun} =
+    case couch_util:get_value(encoding, Options, identity) of
     identity ->
         identity_enc_dec_funs();
     gzip ->
@@ -206,7 +209,9 @@ init({Fd, Encoding, Options}) ->
             md5=couch_util:md5_init(),
             identity_md5=couch_util:md5_init(),
             encoding_fun=EncodingFun,
-            end_encoding_fun=EndEncodingFun
+            end_encoding_fun=EndEncodingFun,
+            max_buffer=couch_util:get_value(
+                buffer_size, Options, ?DEFAULT_BUFFER_SIZE)
         }
     }.
 

http://git-wip-us.apache.org/repos/asf/couchdb/blob/f4bdd95e/test/etap/050-stream.t
----------------------------------------------------------------------
diff --git a/test/etap/050-stream.t b/test/etap/050-stream.t
index d30b524..de0dfad 100755
--- a/test/etap/050-stream.t
+++ b/test/etap/050-stream.t
@@ -67,7 +67,7 @@ test() ->
 
     % Stream more the 4K chunk size.
     {ok, ExpPtr2} = couch_file:bytes(Fd),
-    {ok, Stream3} = couch_stream:open(Fd),
+    {ok, Stream3} = couch_stream:open(Fd, [{buffer_size, 4096}]),
     Acc2 = lists:foldl(fun(_, Acc) ->
         Data = <<"a1b2c">>,
         couch_stream:write(Stream3, Data),


Mime
View raw message