couchdb-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dav...@apache.org
Subject [couchdb] 06/06: Ensure deterministic revisions for attachments
Date Wed, 28 Feb 2018 16:32:53 GMT
This is an automated email from the ASF dual-hosted git repository.

davisp pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 49d41949f59d39e3f1d24f76e8bf281dd17745f5
Author: Paul J. Davis <paul.joseph.davis@gmail.com>
AuthorDate: Wed Feb 8 07:25:37 2017 -0600

    Ensure deterministic revisions for attachments
    
    This re-fixes a corner case when recreating a document with an
    attachment in a single multipart request. Since we don't detect that we
    need a new revision until after the document has been serialized we need
    to be able to deserialize the body so that we can generate the same
    revisions regardless of the contents of the database. If we don't do
    this then we end up including information from the position of the
    attachment on disk in the revision calculation which can introduce
    branches in the revision tree.
    
    I've left this as a separate commit from the pluggable storage engine
    work so that its called out clearly for us to revisit.
    
    COUCHDB-3255
---
 src/couch/src/couch_bt_engine.erl  | 10 +++++++++-
 src/couch/src/couch_db.erl         | 12 +-----------
 src/couch/src/couch_db_updater.erl | 12 +++++++++++-
 3 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/src/couch/src/couch_bt_engine.erl b/src/couch/src/couch_bt_engine.erl
index c5dda77..347c931 100644
--- a/src/couch/src/couch_bt_engine.erl
+++ b/src/couch/src/couch_bt_engine.erl
@@ -331,7 +331,15 @@ serialize_doc(#st{} = St, #doc{} = Doc) ->
     SummaryBin = ?term_to_bin({Body, Atts}),
     Md5 = crypto:hash(md5, SummaryBin),
     Data = couch_file:assemble_file_chunk(SummaryBin, Md5),
-    Doc#doc{body = Data}.
+    % TODO: This is a terrible hack to get around the issues
+    %       in COUCHDB-3255. We'll need to come back and figure
+    %       out a better approach to handling the case when we
+    %       need to generate a new revision id after the doc
+    %       has been serialized.
+    Doc#doc{
+        body = Data,
+        meta = [{comp_body, Body} | Doc#doc.meta]
+    }.
 
 
 write_doc_body(St, #doc{} = Doc) ->
diff --git a/src/couch/src/couch_db.erl b/src/couch/src/couch_db.erl
index e5bb949..2ea94b9 100644
--- a/src/couch/src/couch_db.erl
+++ b/src/couch/src/couch_db.erl
@@ -941,7 +941,7 @@ prep_and_validate_replicated_updates(Db, [Bucket|RestBuckets], [OldInfo|RestOldI
 
 
 
-new_revid(#doc{body=Body0, revs={OldStart,OldRevs}, atts=Atts, deleted=Deleted}) ->
+new_revid(#doc{body=Body, revs={OldStart,OldRevs}, atts=Atts, deleted=Deleted}) ->
     DigestedAtts = lists:foldl(fun(Att, Acc) ->
         [N, T, M] = couch_att:fetch([name, type, md5], Att),
         case M == <<>> of
@@ -949,16 +949,6 @@ new_revid(#doc{body=Body0, revs={OldStart,OldRevs}, atts=Atts, deleted=Deleted})
             false -> [{N, T, M} | Acc]
         end
     end, [], Atts),
-    Body = case Body0 of
-        {summary, [_Len, _Md5, BodyAtts], _SizeInfo, _AttsFd} ->
-            {CompBody, _CompAtts} = binary_to_term(BodyAtts),
-            couch_compress:decompress(CompBody);
-        {summary, [_Len, BodyAtts], _SizeInfo, _AttsFd} ->
-            {CompBody, _CompAtts} = binary_to_term(BodyAtts),
-            couch_compress:decompress(CompBody);
-        Else ->
-            Else
-    end,
     case DigestedAtts of
         Atts2 when length(Atts) =/= length(Atts2) ->
             % We must have old style non-md5 attachments
diff --git a/src/couch/src/couch_db_updater.erl b/src/couch/src/couch_db_updater.erl
index 67126be..79567e9 100644
--- a/src/couch/src/couch_db_updater.erl
+++ b/src/couch/src/couch_db_updater.erl
@@ -557,7 +557,17 @@ merge_rev_tree(OldInfo, NewDoc, Client, Limit, false)
             % Update the new doc based on revisions in OldInfo
             #doc_info{revs=[WinningRev | _]} = couch_doc:to_doc_info(OldInfo),
             #rev_info{rev={OldPos, OldRev}} = WinningRev,
-            NewRevId = couch_db:new_revid(NewDoc#doc{revs={OldPos, [OldRev]}}),
+            Body = case couch_util:get_value(comp_body, NewDoc#doc.meta) of
+                CompBody when is_binary(CompBody) ->
+                    couch_compress:decompress(CompBody);
+                _ ->
+                    NewDoc#doc.body
+            end,
+            RevIdDoc = NewDoc#doc{
+                revs = {OldPos, [OldRev]},
+                body = Body
+            },
+            NewRevId = couch_db:new_revid(RevIdDoc),
             NewDoc2 = NewDoc#doc{revs={OldPos + 1, [NewRevId, OldRev]}},
 
             % Merge our modified new doc into the tree

-- 
To stop receiving notification emails like this one, please contact
davisp@apache.org.

Mime
View raw message