couchdb-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dav...@apache.org
Subject [couchdb] 02/02: Enforce partition size limits
Date Wed, 02 Jan 2019 21:18:59 GMT
This is an automated email from the ASF dual-hosted git repository.

davisp pushed a commit to branch feature/database-partition-limits
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit f0efadb031f0211d192627344848a88a2acdf053
Author: Paul J. Davis <paul.joseph.davis@gmail.com>
AuthorDate: Fri Dec 14 11:06:03 2018 -0600

    Enforce partition size limits
    
    This limit helps prevent users from inadvertently misusing partitions by
    refusing to add documents when the size of a partition exceeds 10GiB.
    
    Co-authored-by: Robert Newson <rnewson@apache.org>
---
 rel/overlay/etc/default.ini        |  5 +++
 src/chttpd/src/chttpd.erl          |  3 ++
 src/couch/src/couch_db_updater.erl | 81 ++++++++++++++++++++++++++++++++++++--
 3 files changed, 85 insertions(+), 4 deletions(-)

diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini
index a77add4..ae9d313 100644
--- a/rel/overlay/etc/default.ini
+++ b/rel/overlay/etc/default.ini
@@ -64,6 +64,11 @@ default_engine = couch
 ; move deleted databases/shards there instead. You can then manually delete
 ; these files later, as desired.
 ;enable_database_recovery = false
+;
+; Set the maximum size allowed for a partition. This helps users avoid
+; inadvertently abusing partitions resulting in hot shards. The default
+; is 10GiB. A value of 0 or less will disable partition size checks.
+;max_partition_size = 10737418240
 
 [couchdb_engines]
 ; The keys in this section are the filename extension that
diff --git a/src/chttpd/src/chttpd.erl b/src/chttpd/src/chttpd.erl
index 2f241cd..6558b1e 100644
--- a/src/chttpd/src/chttpd.erl
+++ b/src/chttpd/src/chttpd.erl
@@ -873,6 +873,9 @@ error_info(conflict) ->
     {409, <<"conflict">>, <<"Document update conflict.">>};
 error_info({conflict, _}) ->
     {409, <<"conflict">>, <<"Document update conflict.">>};
+error_info({partition_overflow, DocId}) ->
+    Descr = <<"'", DocId/binary, "' exceeds partition limit">>,
+    {403, <<"partition_overflow">>, Descr};
 error_info({{not_found, missing}, {_, _}}) ->
     {409, <<"not_found">>, <<"missing_rev">>};
 error_info({forbidden, Error, Msg}) ->
diff --git a/src/couch/src/couch_db_updater.erl b/src/couch/src/couch_db_updater.erl
index 95508e2..00fee90 100644
--- a/src/couch/src/couch_db_updater.erl
+++ b/src/couch/src/couch_db_updater.erl
@@ -21,6 +21,7 @@
 -include("couch_db_int.hrl").
 
 -define(IDLE_LIMIT_DEFAULT, 61000).
+-define(DEFAULT_MAX_PARTITION_SIZE, 16#280000000). % 10 GiB
 
 
 -record(merge_acc, {
@@ -28,7 +29,8 @@
     merge_conflicts,
     add_infos = [],
     rem_seqs = [],
-    cur_seq
+    cur_seq,
+    full_partitions = []
 }).
 
 
@@ -466,13 +468,22 @@ merge_rev_trees([], [], Acc) ->
 merge_rev_trees([NewDocs | RestDocsList], [OldDocInfo | RestOldInfo], Acc) ->
     #merge_acc{
         revs_limit = Limit,
-        merge_conflicts = MergeConflicts
+        merge_conflicts = MergeConflicts,
+        full_partitions = FullPartitions
     } = Acc,
 
     % Track doc ids so we can debug large revision trees
     erlang:put(last_id_merged, OldDocInfo#full_doc_info.id),
     NewDocInfo0 = lists:foldl(fun({Client, NewDoc}, OldInfoAcc) ->
-        merge_rev_tree(OldInfoAcc, NewDoc, Client, MergeConflicts)
+        NewInfo = merge_rev_tree(OldInfoAcc, NewDoc, Client, MergeConflicts),
+        case is_overflowed(NewInfo, OldInfoAcc, FullPartitions) of
+            true when not MergeConflicts ->
+                DocId = NewInfo#doc.id,
+                send_result(Client, NewDoc, {partition_overflow, DocId}),
+                OldInfoAcc;
+            false ->
+                NewInfo
+        end
     end, OldDocInfo, NewDocs),
     NewDocInfo1 = maybe_stem_full_doc_info(NewDocInfo0, Limit),
     % When MergeConflicts is false, we updated #full_doc_info.deleted on every
@@ -595,6 +606,16 @@ merge_rev_tree(OldInfo, NewDoc, _Client, true) ->
     {NewTree, _} = couch_key_tree:merge(OldTree, NewTree0),
     OldInfo#full_doc_info{rev_tree = NewTree}.
 
+is_overflowed(_New, _Old, []) ->
+    false;
+is_overflowed(Old, Old, _FullPartitions) ->
+    false;
+is_overflowed(New, Old, FullPartitions) ->
+    Partition = couch_partition:from_docid(New#full_doc_info.id),
+    NewSize = estimate_size(New),
+    OldSize = estimate_size(Old),
+    lists:member(Partition, FullPartitions) andalso NewSize > OldSize.
+
 maybe_stem_full_doc_info(#full_doc_info{rev_tree = Tree} = Info, Limit) ->
     case config:get_boolean("couchdb", "stem_interactive_updates", true) of
         true ->
@@ -617,13 +638,31 @@ update_docs_int(Db, DocsList, LocalDocs, MergeConflicts, FullCommit)
->
         (Id, not_found) ->
             #full_doc_info{id=Id}
     end, Ids, OldDocLookups),
+
+    %% Get the list of full partitions
+    FullPartitions = case couch_db:is_partitioned(Db) of
+        true ->
+            case max_partition_size() of
+                N when N =< 0 ->
+                    [];
+                Max ->
+                    Partitions = lists:usort(lists:map(fun(Id) ->
+                        couch_partition:from_docid(Id)
+                    end, Ids)),
+                    [P || P <- Partitions, partition_size(Db, P) >= Max]
+            end;
+        false ->
+            []
+    end,
+
     % Merge the new docs into the revision trees.
     AccIn = #merge_acc{
         revs_limit = RevsLimit,
         merge_conflicts = MergeConflicts,
         add_infos = [],
         rem_seqs = [],
-        cur_seq = UpdateSeq
+        cur_seq = UpdateSeq,
+        full_partitions = FullPartitions
     },
     {ok, AccOut} = merge_rev_trees(DocsList, OldDocInfos, AccIn),
     #merge_acc{
@@ -685,6 +724,40 @@ increment_local_doc_revs(#doc{revs = {0, [RevStr | _]}} = Doc) ->
 increment_local_doc_revs(#doc{}) ->
     {error, <<"Invalid rev format">>}.
 
+max_partition_size() ->
+    config:get_integer("couchdb", "max_partition_size",
+            ?DEFAULT_MAX_PARTITION_SIZE).
+
+partition_size(Db, Partition) ->
+    {ok, Info} = couch_db:get_partition_info(Db, Partition),
+    Sizes = couch_util:get_value(sizes, Info),
+    couch_util:get_value(external, Sizes).
+
+estimate_size(#full_doc_info{} = FDI) ->
+    #full_doc_info{rev_tree = RevTree} = FDI,
+    Fun = fun
+        (_Rev, Value, leaf, SizesAcc) ->
+            case Value of
+                #doc{} = Doc ->
+                    ExternalSize = get_meta_body_size(Value#doc.meta),
+                    {size_info, AttSizeInfo} =
+                        lists:keyfind(size_info, 1, Doc#doc.meta),
+                    Leaf = #leaf{
+                        sizes = #size_info{
+                            external = ExternalSize
+                        },
+                        atts = AttSizeInfo
+                    },
+                    add_sizes(leaf, Leaf, SizesAcc);
+                #leaf{} ->
+                    add_sizes(leaf, Value, SizesAcc)
+            end;
+        (_Rev, _Value, branch, SizesAcc) ->
+            SizesAcc
+    end,
+    {_, FinalES, FinalAtts} = couch_key_tree:fold(Fun, {0, 0, []}, RevTree),
+    TotalAttSize = lists:foldl(fun({_, S}, A) -> S + A end, 0, FinalAtts),
+    FinalES + TotalAttSize.
 
 purge_docs(Db, []) ->
     {ok, Db, []};


Mime
View raw message