couchdb-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From fdman...@apache.org
Subject svn commit: r1102684 - in /couchdb/trunk: etc/couchdb/default.ini.tpl.in src/couchdb/couch_db_updater.erl src/couchdb/couch_view_compactor.erl
Date Fri, 13 May 2011 11:44:23 GMT
Author: fdmanana
Date: Fri May 13 11:44:22 2011
New Revision: 1102684

URL: http://svn.apache.org/viewvc?rev=1102684&view=rev
Log:
Configurable database and view compaction parameters

These parameters are now configurable as byte quantities.
Larger buffer sizes can significantly decrease final file sizes and
in some cases make the compaction process faster or slower.

Closes COUCHDB-1142.


Modified:
    couchdb/trunk/etc/couchdb/default.ini.tpl.in
    couchdb/trunk/src/couchdb/couch_db_updater.erl
    couchdb/trunk/src/couchdb/couch_view_compactor.erl

Modified: couchdb/trunk/etc/couchdb/default.ini.tpl.in
URL: http://svn.apache.org/viewvc/couchdb/trunk/etc/couchdb/default.ini.tpl.in?rev=1102684&r1=1102683&r2=1102684&view=diff
==============================================================================
--- couchdb/trunk/etc/couchdb/default.ini.tpl.in (original)
+++ couchdb/trunk/etc/couchdb/default.ini.tpl.in Fri May 13 11:44:22 2011
@@ -20,6 +20,15 @@ uri_file = %localstaterundir%/couch.uri
 ;                lowest compression ratio) to 9 (slowest, highest compression ratio)
 file_compression = snappy
 
+[database_compaction]
+; larger buffer sizes can originate smaller files
+doc_buffer_size = 524288 ; value in bytes
+checkpoint_after = 5242880 ; checkpoint after every N bytes were written
+
+[view_compaction]
+; larger buffer sizes can originate smaller files
+keyvalue_buffer_size = 2097152 ; value in bytes
+
 [httpd]
 port = 5984
 bind_address = 127.0.0.1

Modified: couchdb/trunk/src/couchdb/couch_db_updater.erl
URL: http://svn.apache.org/viewvc/couchdb/trunk/src/couchdb/couch_db_updater.erl?rev=1102684&r1=1102683&r2=1102684&view=diff
==============================================================================
--- couchdb/trunk/src/couchdb/couch_db_updater.erl (original)
+++ couchdb/trunk/src/couchdb/couch_db_updater.erl Fri May 13 11:44:22 2011
@@ -878,32 +878,48 @@ copy_compact(Db, NewDb0, Retry) ->
     FsyncOptions = [Op || Op <- NewDb0#db.fsync_options, Op == before_header],
     NewDb = NewDb0#db{fsync_options=FsyncOptions},
     TotalChanges = couch_db:count_changes_since(Db, NewDb#db.update_seq),
+    BufferSize = list_to_integer(
+        couch_config:get("database_compaction", "doc_buffer_size", "524288")),
+    CheckpointAfter = couch_util:to_integer(
+        couch_config:get("database_compaction", "checkpoint_after",
+            BufferSize * 10)),
+
     EnumBySeqFun =
-    fun(#doc_info{high_seq=Seq}=DocInfo, _Offset, {AccNewDb, AccUncopied, TotalCopied}) ->
-        couch_task_status:update("Copied ~p of ~p changes (~p%)",
-                [TotalCopied, TotalChanges, (TotalCopied*100) div TotalChanges]),
-        if TotalCopied rem 1000 =:= 0 ->
-            NewDb2 = copy_docs(Db, AccNewDb, lists:reverse([DocInfo | AccUncopied]), Retry),
-            if TotalCopied rem 10000 =:= 0 ->
-                {ok, {commit_data(NewDb2#db{update_seq=Seq}), [], TotalCopied + 1}};
+    fun(#doc_info{high_seq=Seq}=DocInfo, _Offset,
+        {AccNewDb, AccUncopied, AccUncopiedSize, AccCopiedSize, TotalCopied}) ->
+
+        AccUncopiedSize2 = AccUncopiedSize + byte_size(?term_to_bin(DocInfo)),
+        if AccUncopiedSize2 >= BufferSize ->
+            NewDb2 = copy_docs(
+                Db, AccNewDb, lists:reverse([DocInfo | AccUncopied]), Retry),
+            TotalCopied2 = TotalCopied + 1 + length(AccUncopied),
+            couch_task_status:update("Copied ~p of ~p changes (~p%)",
+                [TotalCopied2, TotalChanges, (TotalCopied2 * 100) div TotalChanges]),
+            AccCopiedSize2 = AccCopiedSize + AccUncopiedSize2,
+            if AccCopiedSize2 >= CheckpointAfter ->
+                {ok, {commit_data(NewDb2#db{update_seq = Seq}), [],
+                    0, 0, TotalCopied2}};
             true ->
-                {ok, {NewDb2#db{update_seq=Seq}, [], TotalCopied + 1}}
+                {ok, {NewDb2#db{update_seq = Seq}, [],
+                    0, AccCopiedSize2, TotalCopied2}}
             end;
         true ->
-            {ok, {AccNewDb, [DocInfo | AccUncopied], TotalCopied + 1}}
+            {ok, {AccNewDb, [DocInfo | AccUncopied], AccUncopiedSize2,
+                AccCopiedSize, TotalCopied}}
         end
     end,
 
     couch_task_status:set_update_frequency(500),
 
-    {ok, _, {NewDb2, Uncopied, TotalChanges}} =
+    {ok, _, {NewDb2, Uncopied, _, _, ChangesDone}} =
         couch_btree:foldl(Db#db.docinfo_by_seq_btree, EnumBySeqFun,
-            {NewDb, [], 0},
+            {NewDb, [], 0, 0, 0},
             [{start_key, NewDb#db.update_seq + 1}]),
 
     couch_task_status:update("Flushing"),
 
     NewDb3 = copy_docs(Db, NewDb2, lists:reverse(Uncopied), Retry),
+    TotalChanges = ChangesDone + length(Uncopied),
 
     % copy misc header values
     if NewDb3#db.security /= Db#db.security ->

Modified: couchdb/trunk/src/couchdb/couch_view_compactor.erl
URL: http://svn.apache.org/viewvc/couchdb/trunk/src/couchdb/couch_view_compactor.erl?rev=1102684&r1=1102683&r2=1102684&view=diff
==============================================================================
--- couchdb/trunk/src/couchdb/couch_view_compactor.erl (original)
+++ couchdb/trunk/src/couchdb/couch_view_compactor.erl Fri May 13 11:44:22 2011
@@ -48,27 +48,31 @@ compact_group(Group, EmptyGroup) ->
     DbName = couch_db:name(Db),
     TaskName = <<DbName/binary, ShortName/binary>>,
     couch_task_status:add_task(<<"View Group Compaction">>, TaskName, <<"">>),
+    BufferSize = list_to_integer(
+        couch_config:get("view_compaction", "keyvalue_buffer_size", "2097152")),
 
-    Fun = fun({DocId, _ViewIdKeys} = KV, {Bt, Acc, TotalCopied, LastId}) ->
+    Fun = fun({DocId, _ViewIdKeys} = KV,
+            {Bt, Acc, AccSize, TotalCopied, LastId}) ->
         if DocId =:= LastId -> % COUCHDB-999
             Msg = "Duplicates of ~s detected in ~s ~s - rebuild required",
             exit(io_lib:format(Msg, [DocId, DbName, GroupId]));
         true -> ok end,
-        if TotalCopied rem 10000 =:= 0 ->
+        AccSize2 = AccSize + byte_size(?term_to_bin(KV)),
+        if AccSize2 >= BufferSize ->
+            {ok, Bt2} = couch_btree:add(Bt, lists:reverse([KV|Acc])),
             couch_task_status:update("Copied ~p of ~p Ids (~p%)",
                 [TotalCopied, Count, (TotalCopied*100) div Count]),
-            {ok, Bt2} = couch_btree:add(Bt, lists:reverse([KV|Acc])),
-            {ok, {Bt2, [], TotalCopied+1, DocId}};
+            {ok, {Bt2, [], 0, TotalCopied + 1 + length(Acc), DocId}};
         true ->
-            {ok, {Bt, [KV|Acc], TotalCopied+1, DocId}}
+            {ok, {Bt, [KV|Acc], AccSize2, TotalCopied, DocId}}
         end
     end,
-    {ok, _, {Bt3, Uncopied, _Total, _LastId}} = couch_btree:foldl(IdBtree, Fun,
-        {EmptyIdBtree, [], 0, nil}),
+    {ok, _, {Bt3, Uncopied, _, _Total, _LastId}} = couch_btree:foldl(
+        IdBtree, Fun, {EmptyIdBtree, [], 0, 0, nil}),
     {ok, NewIdBtree} = couch_btree:add(Bt3, lists:reverse(Uncopied)),
 
     NewViews = lists:map(fun({View, EmptyView}) ->
-        compact_view(View, EmptyView)
+        compact_view(View, EmptyView, BufferSize)
     end, lists:zip(Views, EmptyViews)),
 
     NewGroup = EmptyGroup#group{
@@ -81,23 +85,25 @@ compact_group(Group, EmptyGroup) ->
     gen_server:cast(Pid, {compact_done, NewGroup}).
 
 %% @spec compact_view(View, EmptyView, Retry) -> CompactView
-compact_view(View, EmptyView) ->
+compact_view(View, EmptyView, BufferSize) ->
     {ok, Count} = couch_view:get_row_count(View),
 
     %% Key is {Key,DocId}
-    Fun = fun(KV, {Bt, Acc, TotalCopied}) ->
-        if TotalCopied rem 10000 =:= 0 ->
-            couch_task_status:update("View #~p: copied ~p of ~p KVs (~p%)",
-                [View#view.id_num, TotalCopied, Count, (TotalCopied*100) div Count]),
+    Fun = fun(KV, {Bt, Acc, AccSize, TotalCopied}) ->
+        AccSize2 = AccSize + byte_size(?term_to_bin(KV)),
+        if AccSize2 >= BufferSize ->
             {ok, Bt2} = couch_btree:add(Bt, lists:reverse([KV|Acc])),
-            {ok, {Bt2, [], TotalCopied + 1}};
+            couch_task_status:update("View #~p: copied ~p of ~p KVs (~p%)",
+                [View#view.id_num, TotalCopied, Count,
+                    (TotalCopied*100) div Count]),
+            {ok, {Bt2, [], 0, TotalCopied + 1 + length(Acc)}};
         true ->
-            {ok, {Bt, [KV|Acc], TotalCopied + 1}}
+            {ok, {Bt, [KV|Acc], AccSize2, TotalCopied}}
         end
     end,
 
-    {ok, _, {Bt3, Uncopied, _Total}} = couch_btree:foldl(View#view.btree, Fun,
-        {EmptyView#view.btree, [], 0}),
+    {ok, _, {Bt3, Uncopied, _, _Total}} = couch_btree:foldl(
+        View#view.btree, Fun, {EmptyView#view.btree, [], 0, 0}),
     {ok, NewBt} = couch_btree:add(Bt3, lists:reverse(Uncopied)),
     EmptyView#view{btree = NewBt}.
 



Mime
View raw message