couchdb-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tonysu...@apache.org
Subject [couchdb] branch master updated: Use Ejson Body Instead of Compressed Body for External size (#606)
Date Sat, 15 Jul 2017 17:26:56 GMT
This is an automated email from the ASF dual-hosted git repository.

tonysun83 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/couchdb.git


The following commit(s) were added to refs/heads/master by this push:
     new dce6e34  Use Ejson Body Instead of Compressed Body for External size (#606)
dce6e34 is described below

commit dce6e34686329e711e1a6c50aae00761ecb3262e
Author: Tony Sun <tony.sun427@gmail.com>
AuthorDate: Sat Jul 15 10:26:53 2017 -0700

    Use Ejson Body Instead of Compressed Body for External size (#606)
    
    Use ejson body instead of compressed body for external size
    
    In two places where we calculate the ExternalSize of the document body,
    we use the Summary which is a compressed version of the doc body. We
    change this to use the actual ejson body. In copy_docs we don't have
    access to the #doc record so we can't access the meta where we store
    the ejson body. Unfortunately, this means we have to decompress the
    document body after reading it from disk.
    
    COUCHDB-3429
---
 src/couch/src/couch_db.erl                        |  6 +++++-
 src/couch/src/couch_db_updater.erl                | 22 ++++++++++++++++++++--
 src/couch/test/couchdb_file_compression_tests.erl | 18 +++++++++++++++++-
 3 files changed, 42 insertions(+), 4 deletions(-)

diff --git a/src/couch/src/couch_db.erl b/src/couch/src/couch_db.erl
index d01a3e0..e4e3a8b 100644
--- a/src/couch/src/couch_db.erl
+++ b/src/couch/src/couch_db.erl
@@ -1111,7 +1111,11 @@ prepare_doc_summaries(Db, BucketList) ->
                 nil
             end,
             SummaryChunk = couch_db_updater:make_doc_summary(Db, {Body, DiskAtts}),
-            Doc#doc{body = {summary, SummaryChunk, SizeInfo, AttsFd}}
+            Meta = Doc#doc.meta,
+            Doc#doc{
+                body = {summary, SummaryChunk, SizeInfo, AttsFd},
+                meta = [{ejson_size, ?term_size(Body)} | Meta]
+            }
         end,
         Bucket) || Bucket <- BucketList].
 
diff --git a/src/couch/src/couch_db_updater.erl b/src/couch/src/couch_db_updater.erl
index 49061b2..277f2b5 100644
--- a/src/couch/src/couch_db_updater.erl
+++ b/src/couch/src/couch_db_updater.erl
@@ -677,6 +677,7 @@ flush_trees(#db{fd = Fd} = Db,
             case Value of
             #doc{deleted = IsDeleted, body = {summary, _, _, _} = DocSummary} ->
                 {summary, Summary, AttSizeInfo, AttsFd} = DocSummary,
+                ExternalSize = get_meta_body_size(Value#doc.meta, Summary),
                 % this node value is actually an unwritten document summary,
                 % write to disk.
                 % make sure the Fd in the written bins is the same Fd we are
@@ -695,7 +696,6 @@ flush_trees(#db{fd = Fd} = Db,
                                     " changed. Possibly retrying.", []),
                     throw(retry)
                 end,
-                ExternalSize = ?term_size(Summary),
                 {ok, NewSummaryPointer, SummarySize} =
                     couch_file:append_raw_chunk(Fd, Summary),
                 Leaf = #leaf{
@@ -1086,8 +1086,16 @@ copy_docs(Db, #db{fd = DestFd} = NewDb, MixedInfos, Retry) ->
         {NewRevTree, FinalAcc} = couch_key_tree:mapfold(fun
             (_Rev, #leaf{ptr=Sp}=Leaf, leaf, SizesAcc) ->
                 {Body, AttInfos} = copy_doc_attachments(Db, Sp, DestFd),
+                % In the future, we should figure out how to do this for
+                % upgrade purposes.
+                EJsonBody = case is_binary(Body) of
+                    true ->
+                        couch_compress:decompress(Body);
+                    false ->
+                        Body
+                end,
                 SummaryChunk = make_doc_summary(NewDb, {Body, AttInfos}),
-                ExternalSize = ?term_size(SummaryChunk),
+                ExternalSize = ?term_size(EJsonBody),
                 {ok, Pos, SummarySize} = couch_file:append_raw_chunk(
                     DestFd, SummaryChunk),
                 AttSizes = [{element(3,A), element(4,A)} || A <- AttInfos],
@@ -1467,6 +1475,16 @@ make_doc_summary(#db{compression = Comp}, {Body0, Atts0}) ->
     SummaryBin = ?term_to_bin({Body, Atts}),
     couch_file:assemble_file_chunk(SummaryBin, couch_crypto:hash(md5, SummaryBin)).
 
+
+get_meta_body_size(Meta, Summary) ->
+    case lists:keyfind(ejson_size, 1, Meta) of
+        {ejson_size, ExternalSize} ->
+            ExternalSize;
+        false ->
+            ?term_size(couch_compress:decompress(Summary))
+    end.
+
+
 default_security_object(<<"shards/", _/binary>>) ->
     case config:get("couchdb", "default_security", "everyone") of
         "admin_only" ->
diff --git a/src/couch/test/couchdb_file_compression_tests.erl b/src/couch/test/couchdb_file_compression_tests.erl
index ccfa244..41d0556 100644
--- a/src/couch/test/couchdb_file_compression_tests.erl
+++ b/src/couch/test/couchdb_file_compression_tests.erl
@@ -119,16 +119,19 @@ should_compare_compression_methods(DbName) ->
 
 compare_compression_methods(DbName) ->
     config:set("couchdb", "file_compression", "none", false),
+    ExternalSizePreCompact = db_external_size(DbName),
     compact_db(DbName),
     compact_view(DbName),
     DbSizeNone = db_disk_size(DbName),
     ViewSizeNone = view_disk_size(DbName),
+    ExternalSizeNone = db_external_size(DbName),
 
     config:set("couchdb", "file_compression", "snappy", false),
     compact_db(DbName),
     compact_view(DbName),
     DbSizeSnappy = db_disk_size(DbName),
     ViewSizeSnappy = view_disk_size(DbName),
+    ExternalSizeSnappy = db_external_size(DbName),
 
     ?assert(DbSizeNone > DbSizeSnappy),
     ?assert(ViewSizeNone > ViewSizeSnappy),
@@ -147,9 +150,13 @@ compare_compression_methods(DbName) ->
     compact_view(DbName),
     DbSizeDeflate9 = db_disk_size(DbName),
     ViewSizeDeflate9 = view_disk_size(DbName),
+    ExternalSizeDeflate9 = db_external_size(DbName),
 
     ?assert(DbSizeDeflate1 > DbSizeDeflate9),
-    ?assert(ViewSizeDeflate1 > ViewSizeDeflate9).
+    ?assert(ViewSizeDeflate1 > ViewSizeDeflate9),
+    ?assert(ExternalSizePreCompact =:= ExternalSizeNone),
+    ?assert(ExternalSizeNone =:= ExternalSizeSnappy),
+    ?assert(ExternalSizeNone =:= ExternalSizeDeflate9).
 
 
 populate_db(_Db, NumDocs) when NumDocs =< 0 ->
@@ -194,6 +201,12 @@ db_disk_size(DbName) ->
     ok = couch_db:close(Db),
     active_size(Info).
 
+db_external_size(DbName) ->
+    {ok, Db} = couch_db:open_int(DbName, []),
+    {ok, Info} = couch_db:get_db_info(Db),
+    ok = couch_db:close(Db),
+    external_size(Info).
+
 view_disk_size(DbName) ->
     {ok, Db} = couch_db:open_int(DbName, []),
     {ok, DDoc} = couch_db:open_doc(Db, ?DDOC_ID, [ejson_body]),
@@ -204,6 +217,9 @@ view_disk_size(DbName) ->
 active_size(Info) ->
     couch_util:get_nested_json_value({Info}, [sizes, active]).
 
+external_size(Info) ->
+    couch_util:get_nested_json_value({Info}, [sizes, external]).
+
 wait_compaction(DbName, Kind, Line) ->
     WaitFun = fun() ->
        case is_compaction_running(DbName) of

-- 
To stop receiving notification emails like this one, please contact
['"commits@couchdb.apache.org" <commits@couchdb.apache.org>'].

Mime
View raw message