couchdb-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dav...@apache.org
Subject svn commit: r891077 - in /couchdb/trunk: THANKS src/couchdb/couch_db.erl src/couchdb/couch_httpd.erl src/couchdb/couch_httpd_db.erl src/couchdb/couch_util.erl src/mochiweb/mochiweb_headers.erl test/etap/130-attachments-md5.t
Date Wed, 16 Dec 2009 00:05:36 GMT
Author: davisp
Date: Wed Dec 16 00:05:35 2009
New Revision: 891077

URL: http://svn.apache.org/viewvc?rev=891077&view=rev
Log:
Provide Content-MD5 header support for attachments.

Fixes COUCHDB-558.

Thanks to Filipe Manana we now have checks for attachment transfer integrity
using the Content-MD5 header (or trailer). Use of this integrity check is
triggered by specifying a Content-MD5 header in your request with a value that
is a base64 encoded md5. For requests that are using a chunked Transfer-Encoding
it is also possible to use a trailer so that the Content-MD5 doesn't need to be
known before transfer. This works by specifying a header "Trailer:
Content-MD5" and then in the final chunk (the one with a size of zero) you can
specify a Content-MD5 with exactly the same format as in the request headers.

See the ETap test 130-attachments-md5.t for explicit examples of the request
messages.


Added:
    couchdb/trunk/test/etap/130-attachments-md5.t   (with props)
Modified:
    couchdb/trunk/THANKS
    couchdb/trunk/src/couchdb/couch_db.erl
    couchdb/trunk/src/couchdb/couch_httpd.erl
    couchdb/trunk/src/couchdb/couch_httpd_db.erl
    couchdb/trunk/src/couchdb/couch_util.erl
    couchdb/trunk/src/mochiweb/mochiweb_headers.erl

Modified: couchdb/trunk/THANKS
URL: http://svn.apache.org/viewvc/couchdb/trunk/THANKS?rev=891077&r1=891076&r2=891077&view=diff
==============================================================================
--- couchdb/trunk/THANKS (original)
+++ couchdb/trunk/THANKS Wed Dec 16 00:05:35 2009
@@ -40,5 +40,6 @@
  * Joshua Bronson <jabronson@gmail.com>
  * Kostis Sagonas <kostis@cs.ntua.gr>
  * Matthew Hooker <mwhooker@gmail.com>
+ * Filipe Manana <fdmanana@gmail.com>
 
 For a list of authors see the `AUTHORS` file.

Modified: couchdb/trunk/src/couchdb/couch_db.erl
URL: http://svn.apache.org/viewvc/couchdb/trunk/src/couchdb/couch_db.erl?rev=891077&r1=891076&r2=891077&view=diff
==============================================================================
--- couchdb/trunk/src/couchdb/couch_db.erl (original)
+++ couchdb/trunk/src/couchdb/couch_db.erl Wed Dec 16 00:05:35 2009
@@ -687,7 +687,7 @@
 
 check_md5(_NewSig, <<>>) -> ok;
 check_md5(Sig1, Sig2) when Sig1 == Sig2 -> ok;
-check_md5(_, _) -> throw(data_corruption).
+check_md5(_, _) -> throw(md5_mismatch).
 
 flush_att(Fd, #att{data={Fd0, _}}=Att) when Fd0 == Fd ->
     % already written to our file, nothing to write
@@ -713,8 +713,14 @@
             % WriterFun({0, _Footers}, State)
             % Called with Length == 0 on the last time.
             % WriterFun returns NewState.
-            fun({0, _Footers}, _) ->
-                ok;
+            fun({0, Footers}, _) ->
+                F = mochiweb_headers:from_binary(Footers),
+                case mochiweb_headers:get_value("Content-MD5", F) of
+                undefined ->
+                    ok;
+                Md5 ->
+                    {md5, base64:decode(Md5)}
+                end;
             ({_Length, Chunk}, _) ->
                 couch_stream:write(OutputStream, Chunk)
             end, ok)
@@ -725,11 +731,29 @@
         write_streamed_attachment(OutputStream, Fun, Len)
     end).
 
+% From RFC 2616 3.6.1 - Chunked Transfer Coding
+%
+%   In other words, the origin server is willing to accept
+%   the possibility that the trailer fields might be silently
+%   discarded along the path to the client.
+%
+% I take this to mean that if "Trailers: Content-MD5\r\n"
+% is present in the request, but there is no Content-MD5
+% trailer, we're free to ignore this inconsistency and
+% pretend that no Content-MD5 exists.
 with_stream(Fd, #att{md5=InMd5}=Att, Fun) ->
     {ok, OutputStream} = couch_stream:open(Fd),
-    Fun(OutputStream),
+    ReqMd5 = case Fun(OutputStream) of
+        {md5, FooterMd5} ->
+            case InMd5 of
+                md5_in_footer -> FooterMd5;
+                _ -> InMd5
+            end;
+        _ ->
+            InMd5
+    end,
     {StreamInfo, Len, Md5} = couch_stream:close(OutputStream),
-    check_md5(Md5, InMd5),
+    check_md5(Md5, ReqMd5),
     Att#att{data={Fd,StreamInfo},len=Len,md5=Md5}.
 
 

Modified: couchdb/trunk/src/couchdb/couch_httpd.erl
URL: http://svn.apache.org/viewvc/couchdb/trunk/src/couchdb/couch_httpd.erl?rev=891077&r1=891076&r2=891077&view=diff
==============================================================================
--- couchdb/trunk/src/couchdb/couch_httpd.erl (original)
+++ couchdb/trunk/src/couchdb/couch_httpd.erl Wed Dec 16 00:05:35 2009
@@ -541,6 +541,9 @@
     {400, <<"bad_request">>, Reason};
 error_info({query_parse_error, Reason}) ->
     {400, <<"query_parse_error">>, Reason};
+% Prior art for md5 mismatch resulting in a 400 is from AWS S3
+error_info(md5_mismatch) ->
+    {400, <<"content_md5_mismatch">>, <<"Possible message corruption.">>};
 error_info(not_found) ->
     {404, <<"not_found">>, <<"missing">>};
 error_info({not_found, Reason}) ->

Modified: couchdb/trunk/src/couchdb/couch_httpd_db.erl
URL: http://svn.apache.org/viewvc/couchdb/trunk/src/couchdb/couch_httpd_db.erl?rev=891077&r1=891076&r2=891077&view=diff
==============================================================================
--- couchdb/trunk/src/couchdb/couch_httpd_db.erl (original)
+++ couchdb/trunk/src/couchdb/couch_httpd_db.erl Wed Dec 16 00:05:35 2009
@@ -1043,8 +1043,9 @@
                         undefined;
                     Length ->
                         list_to_integer(Length)
-                    end
-                    }]
+                    end,
+                md5 = get_md5_header(Req)
+            }]
     end,
 
     Doc = case extract_header_rev(Req, couch_httpd:qs_value(Req, "rev")) of
@@ -1084,6 +1085,27 @@
 db_attachment_req(Req, _Db, _DocId, _FileNameParts) ->
     send_method_not_allowed(Req, "DELETE,GET,HEAD,PUT").
 
+
+get_md5_header(Req) ->
+    ContentMD5 = couch_httpd:header_value(Req, "Content-MD5"),
+    Length = couch_httpd:body_length(Req),
+    Trailer = couch_httpd:header_value(Req, "Trailer"),
+    case {ContentMD5, Length, Trailer} of
+        _ when is_list(ContentMD5) orelse is_binary(ContentMD5) ->
+            base64:decode(ContentMD5);
+        {_, chunked, undefined} ->
+            <<>>;
+        {_, chunked, _} ->
+            case re:run(Trailer, "\\bContent-MD5\\b", [caseless]) of
+                {match, _} ->
+                    md5_in_footer;
+                _ ->
+                    <<>>
+            end;
+        _ ->
+            <<>>
+    end.
+
 parse_doc_format(FormatStr) when is_binary(FormatStr) ->
     parse_doc_format(?b2l(FormatStr));
 parse_doc_format(FormatStr) when is_list(FormatStr) ->

Modified: couchdb/trunk/src/couchdb/couch_util.erl
URL: http://svn.apache.org/viewvc/couchdb/trunk/src/couchdb/couch_util.erl?rev=891077&r1=891076&r2=891077&view=diff
==============================================================================
--- couchdb/trunk/src/couchdb/couch_util.erl (original)
+++ couchdb/trunk/src/couchdb/couch_util.erl Wed Dec 16 00:05:35 2009
@@ -419,4 +419,7 @@
 
 json_decode(V) ->
     try (mochijson2:decoder([{object_hook, fun({struct,L}) -> {L} end}]))(V)
-    catch _:_ -> throw({invalid_json,V}) end.
+    catch
+        _Type:_Error ->
+            throw({invalid_json,V})
+    end.

Modified: couchdb/trunk/src/mochiweb/mochiweb_headers.erl
URL: http://svn.apache.org/viewvc/couchdb/trunk/src/mochiweb/mochiweb_headers.erl?rev=891077&r1=891076&r2=891077&view=diff
==============================================================================
--- couchdb/trunk/src/mochiweb/mochiweb_headers.erl (original)
+++ couchdb/trunk/src/mochiweb/mochiweb_headers.erl Wed Dec 16 00:05:35 2009
@@ -9,6 +9,7 @@
 -export([delete_any/2, get_primary_value/2]).
 -export([default/3, enter_from_list/2, default_from_list/2]).
 -export([to_list/1, make/1]).
+-export([from_binary/1]).
 -export([test/0]).
 
 %% @type headers().
@@ -37,6 +38,36 @@
                                              "content-type", H4),
     H4 = ?MODULE:delete_any("nonexistent-header", H4),
     H3 = ?MODULE:delete_any("content-type", H4),
+    HB = <<"Content-Length: 47\r\nContent-Type: text/plain\r\n\r\n">>,
+    H_HB = ?MODULE:from_binary(HB),
+    H_HB = ?MODULE:from_binary(binary_to_list(HB)),
+    "47" = ?MODULE:get_value("Content-Length", H_HB),
+    "text/plain" = ?MODULE:get_value("Content-Type", H_HB),
+    L_H_HB = ?MODULE:to_list(H_HB),
+    2 = length(L_H_HB),
+    true = lists:member({'Content-Length', "47"}, L_H_HB),
+    true = lists:member({'Content-Type', "text/plain"}, L_H_HB),
+    HL = [ <<"Content-Length: 47\r\n">>, <<"Content-Type: text/plain\r\n">>
],
+    HL2 = [ "Content-Length: 47\r\n", <<"Content-Type: text/plain\r\n">> ],
+    HL3 = [ <<"Content-Length: 47\r\n">>, "Content-Type: text/plain\r\n" ],
+    H_HL = ?MODULE:from_binary(HL),
+    H_HL = ?MODULE:from_binary(HL2),
+    H_HL = ?MODULE:from_binary(HL3),
+    "47" = ?MODULE:get_value("Content-Length", H_HL),
+    "text/plain" = ?MODULE:get_value("Content-Type", H_HL),
+    L_H_HL = ?MODULE:to_list(H_HL),
+    2 = length(L_H_HL),
+    true = lists:member({'Content-Length', "47"}, L_H_HL),
+    true = lists:member({'Content-Type', "text/plain"}, L_H_HL),
+    [] = ?MODULE:to_list(?MODULE:from_binary(<<>>)),
+    [] = ?MODULE:to_list(?MODULE:from_binary(<<"">>)),
+    [] = ?MODULE:to_list(?MODULE:from_binary(<<"\r\n">>)),
+    [] = ?MODULE:to_list(?MODULE:from_binary(<<"\r\n\r\n">>)),
+    [] = ?MODULE:to_list(?MODULE:from_binary("")),
+    [] = ?MODULE:to_list(?MODULE:from_binary([<<>>])),
+    [] = ?MODULE:to_list(?MODULE:from_binary([<<"">>])),
+    [] = ?MODULE:to_list(?MODULE:from_binary([<<"\r\n">>])),
+    [] = ?MODULE:to_list(?MODULE:from_binary([<<"\r\n\r\n">>])),
     ok.
 
 %% @spec empty() -> headers()
@@ -52,6 +83,40 @@
 make(T) when is_tuple(T) ->
     T.
 
+%% @spec from_binary(RawHttpHeader()) -> headers() 
+%% @type RawHttpHeader() -> string() | binary() | [ string() | binary() ]
+%%
+%% @doc Transforms a raw HTTP header into a mochiweb headers structure.
+%%
+%%      The given raw HTTP header can be one of the following:
+%%
+%%      1) A string or a binary representing a full HTTP header ending with 
+%%         double CRLF.
+%%         Examples:
+%%         "Content-Length: 47\r\nContent-Type: text/plain\r\n\r\n"
+%%         <<"Content-Length: 47\r\nContent-Type: text/plain\r\n\r\n">>
+%%
+%%      2) A list of binaries or strings where each element represents a raw 
+%%         HTTP header line ending with a single CRLF.
+%%         Examples:
+%%         [ <<"Content-Length: 47\r\n">>, <<"Content-Type: text/plain\r\n">>
]
+%%         [ "Content-Length: 47\r\n", "Content-Type: text/plain\r\n" ]
+%%         [ "Content-Length: 47\r\n", <<"Content-Type: text/plain\r\n">> ]
+%%
+from_binary(RawHttpHeader) when is_binary(RawHttpHeader) ->
+    from_binary(RawHttpHeader, []);
+
+from_binary(RawHttpHeaderList) ->
+    from_binary(list_to_binary([RawHttpHeaderList, "\r\n"])).
+
+from_binary(RawHttpHeader, Acc) ->
+    case erlang:decode_packet(httph, RawHttpHeader, []) of
+        { ok, {http_header, _, H, _, V}, Rest } ->
+            from_binary(Rest, [{H, V} | Acc]);
+        _ ->
+            make(Acc)
+    end.
+
 %% @spec from_list([{key(), value()}]) -> headers()
 %% @doc Construct a headers() from the given list.
 from_list(List) ->

Added: couchdb/trunk/test/etap/130-attachments-md5.t
URL: http://svn.apache.org/viewvc/couchdb/trunk/test/etap/130-attachments-md5.t?rev=891077&view=auto
==============================================================================
--- couchdb/trunk/test/etap/130-attachments-md5.t (added)
+++ couchdb/trunk/test/etap/130-attachments-md5.t Wed Dec 16 00:05:35 2009
@@ -0,0 +1,252 @@
+#!/usr/bin/env escript
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+default_config() ->
+    test_util:build_file("etc/couchdb/default_dev.ini").
+
+test_db_name() ->
+    <<"etap-test-db">>.
+
+docid() ->
+    case get(docid) of
+        undefined ->
+            put(docid, 1),
+            "1";
+        Count ->
+            put(docid, Count+1),
+            integer_to_list(Count+1)
+    end.
+
+main(_) ->
+    test_util:init_code_path(),
+    
+    etap:plan(16),
+    case (catch test()) of
+        ok ->
+            etap:end_tests();
+        Other ->
+            etap:diag(io_lib:format("Test died abnormally: ~p", [Other])),
+            etap:bail(Other)
+    end,
+    ok.
+
+test() ->
+    couch_server_sup:start_link([default_config()]),
+    Addr = couch_config:get("httpd", "bind_address", any),
+    Port = list_to_integer(couch_config:get("httpd", "port", "5984")),
+    put(addr, Addr),
+    put(port, Port),
+    timer:sleep(1000),
+
+    couch_server:delete(test_db_name(), []),
+    couch_db:create(test_db_name(), []),
+
+    test_identity_without_md5(),
+    test_chunked_without_md5(),
+
+    test_identity_with_valid_md5(),
+    test_chunked_with_valid_md5_header(),
+    test_chunked_with_valid_md5_trailer(),
+
+    test_identity_with_invalid_md5(),
+    test_chunked_with_invalid_md5_header(),
+    test_chunked_with_invalid_md5_trailer(),
+
+    couch_server:delete(test_db_name(), []),
+    couch_server_sup:stop(),
+    ok.
+
+test_identity_without_md5() ->
+    Data = [
+        "PUT /", test_db_name(), "/", docid(), "/readme.txt HTTP/1.1\r\n",
+        "Content-Type: text/plain\r\n",
+        "Content-Length: 34\r\n",
+        "\r\n",
+        "We all live in a yellow submarine!"],
+
+    {Code, Json} = do_request(Data),
+    etap:is(Code, 201, "Stored with identity encoding and no MD5"),
+    etap:is(get_json(Json, [<<"ok">>]), true, "Body indicates success.").
+
+test_chunked_without_md5() ->
+    AttData = <<"We all live in a yellow submarine!">>,
+    <<Part1:21/binary, Part2:13/binary>> = AttData,
+    Data = [
+        "PUT /", test_db_name(), "/", docid(), "/readme.txt HTTP/1.1\r\n",
+        "Content-Type: text/plain\r\n",
+        "Transfer-Encoding: chunked\r\n",
+        "\r\n",
+        to_hex(size(Part1)), "\r\n",
+        Part1, "\r\n",
+        to_hex(size(Part2)), "\r\n",
+        Part2, "\r\n"
+        "0\r\n"
+        "\r\n"],
+
+    {Code, Json} = do_request(Data),
+    etap:is(Code, 201, "Stored with chunked encoding and no MD5"),
+    etap:is(get_json(Json, [<<"ok">>]), true, "Body indicates success.").
+
+test_identity_with_valid_md5() ->
+    AttData = "We all live in a yellow submarine!",
+    Data = [
+        "PUT /", test_db_name(), "/", docid(), "/readme.txt HTTP/1.1\r\n",
+        "Content-Type: text/plain\r\n",
+        "Content-Length: 34\r\n",
+        "Content-MD5: ", base64:encode(erlang:md5(AttData)), "\r\n",
+        "\r\n",
+        AttData],
+
+    {Code, Json} = do_request(Data),
+    etap:is(Code, 201, "Stored with identity encoding and valid MD5"),
+    etap:is(get_json(Json, [<<"ok">>]), true, "Body indicates success.").
+
+test_chunked_with_valid_md5_header() ->
+    AttData = <<"We all live in a yellow submarine!">>,
+    <<Part1:21/binary, Part2:13/binary>> = AttData,
+    Data = [
+        "PUT /", test_db_name(), "/", docid(), "/readme.txt HTTP/1.1\r\n",
+        "Content-Type: text/plain\r\n",
+        "Transfer-Encoding: chunked\r\n",
+        "Content-MD5: ", base64:encode(erlang:md5(AttData)), "\r\n",
+        "\r\n",
+        to_hex(size(Part1)), "\r\n",
+        Part1, "\r\n",
+        to_hex(size(Part2)), "\r\n",
+        Part2, "\r\n",
+        "0\r\n",
+        "\r\n"],
+
+    {Code, Json} = do_request(Data),
+    etap:is(Code, 201, "Stored with chunked encoding and valid MD5 header."),
+    etap:is(get_json(Json, [<<"ok">>]), true, "Body indicates success.").
+
+test_chunked_with_valid_md5_trailer() ->
+    AttData = <<"We all live in a yellow submarine!">>,
+    <<Part1:21/binary, Part2:13/binary>> = AttData,
+    Data = [
+        "PUT /", test_db_name(), "/", docid(), "/readme.txt HTTP/1.1\r\n",
+        "Content-Type: text/plain\r\n",
+        "Transfer-Encoding: chunked\r\n",
+        "Trailer: Content-MD5\r\n",
+        "\r\n",
+        to_hex(size(Part1)), "\r\n",
+        Part1, "\r\n",
+        to_hex(size(Part2)), "\r\n",
+        Part2, "\r\n",
+        "0\r\n",
+        "Content-MD5: ", base64:encode(erlang:md5(AttData)), "\r\n",
+        "\r\n"],
+
+    {Code, Json} = do_request(Data),
+    etap:is(Code, 201, "Stored with chunked encoding and valid MD5 trailer."),
+    etap:is(get_json(Json, [<<"ok">>]), true, "Body indicates success.").
+
+test_identity_with_invalid_md5() ->
+    Data = [
+        "PUT /", test_db_name(), "/", docid(), "/readme.txt HTTP/1.1\r\n",
+        "Content-Type: text/plain\r\n",
+        "Content-Length: 34\r\n",
+        "Content-MD5: ", base64:encode(<<"foobar!">>), "\r\n",
+        "\r\n",
+        "We all live in a yellow submarine!"],
+
+    {Code, Json} = do_request(Data),
+    etap:is(Code, 400, "Invalid MD5 header causes an error: identity"),
+    etap:is(
+        get_json(Json, [<<"error">>]),
+        <<"content_md5_mismatch">>,
+        "Body indicates reason for failure."
+    ).
+
+test_chunked_with_invalid_md5_header() ->
+    AttData = <<"We all live in a yellow submarine!">>,
+    <<Part1:21/binary, Part2:13/binary>> = AttData,
+    Data = [
+        "PUT /", test_db_name(), "/", docid(), "/readme.txt HTTP/1.1\r\n",
+        "Content-Type: text/plain\r\n",
+        "Transfer-Encoding: chunked\r\n",
+        "Content-MD5: ", base64:encode(<<"so sneaky...">>), "\r\n",
+        "\r\n",
+        to_hex(size(Part1)), "\r\n",
+        Part1, "\r\n",
+        to_hex(size(Part2)), "\r\n",
+        Part2, "\r\n",
+        "0\r\n",
+        "\r\n"],
+
+    {Code, Json} = do_request(Data),
+    etap:is(Code, 400, "Invalid MD5 header causes an error: chunked"),
+    etap:is(
+        get_json(Json, [<<"error">>]),
+        <<"content_md5_mismatch">>,
+        "Body indicates reason for failure."
+    ).
+
+test_chunked_with_invalid_md5_trailer() ->
+    AttData = <<"We all live in a yellow submarine!">>,
+    <<Part1:21/binary, Part2:13/binary>> = AttData,
+    Data = [
+        "PUT /", test_db_name(), "/", docid(), "/readme.txt HTTP/1.1\r\n",
+        "Content-Type: text/plain\r\n",
+        "Transfer-Encoding: chunked\r\n",
+        "Trailer: Content-MD5\r\n",
+        "\r\n",
+        to_hex(size(Part1)), "\r\n",
+        Part1, "\r\n",
+        to_hex(size(Part2)), "\r\n",
+        Part2, "\r\n",
+        "0\r\n",
+        "Content-MD5: ", base64:encode(<<"Kool-Aid Fountain!">>), "\r\n",
+        "\r\n"],
+
+    {Code, Json} = do_request(Data),
+    etap:is(Code, 400, "Invalid MD5 Trailer causes an error"),
+    etap:is(
+        get_json(Json, [<<"error">>]),
+        <<"content_md5_mismatch">>,
+        "Body indicates reason for failure."
+    ).
+
+
+get_socket() ->
+    Options = [binary, {packet, 0}, {active, false}],
+    {ok, Sock} = gen_tcp:connect(get(addr), get(port), Options),
+    Sock.
+
+do_request(Request) ->
+    Sock = get_socket(),
+    gen_tcp:send(Sock, list_to_binary(lists:flatten(Request))),
+    timer:sleep(100),
+    {ok, R} = gen_tcp:recv(Sock, 0),
+    gen_tcp:close(Sock),
+    [Header, Body] = re:split(R, "\r\n\r\n", [{return, binary}]),
+    {ok, {http_response, _, Code, _}, _} =
+        erlang:decode_packet(http, Header, []),
+    Json = couch_util:json_decode(Body),
+    {Code, Json}.
+
+get_json(Json, Path) ->
+    couch_util:get_nested_json_value(Json, Path).
+
+to_hex(Val) ->
+    to_hex(Val, []).
+
+to_hex(0, Acc) ->
+    Acc;
+to_hex(Val, Acc) ->
+    to_hex(Val div 16, [hex_char(Val rem 16) | Acc]).
+
+hex_char(V) when V < 10 -> $0 + V;
+hex_char(V) -> $A + V - 10.
+

Propchange: couchdb/trunk/test/etap/130-attachments-md5.t
------------------------------------------------------------------------------
    svn:executable = *



Mime
View raw message