couchdb-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dav...@apache.org
Subject svn commit: r1064417 - in /couchdb/trunk/src/couchdb: couch_doc.erl couch_util.erl
Date Fri, 28 Jan 2011 01:48:51 GMT
Author: davisp
Date: Fri Jan 28 01:48:51 2011
New Revision: 1064417

URL: http://svn.apache.org/viewvc?rev=1064417&view=rev
Log:
Validate UTF-8 in doc ids taken from the URL.

We weren't validating URL's for UTF-8 correctness when creating
a document and using the URL for the docid. Uses the same logic
from mochijson2.erl to do the check.

Thanks to Thiago Arrais for the report.
Fixes COUCHDB-1039


Modified:
    couchdb/trunk/src/couchdb/couch_doc.erl
    couchdb/trunk/src/couchdb/couch_util.erl

Modified: couchdb/trunk/src/couchdb/couch_doc.erl
URL: http://svn.apache.org/viewvc/couchdb/trunk/src/couchdb/couch_doc.erl?rev=1064417&r1=1064416&r2=1064417&view=diff
==============================================================================
--- couchdb/trunk/src/couchdb/couch_doc.erl (original)
+++ couchdb/trunk/src/couchdb/couch_doc.erl Fri Jan 28 01:48:51 2011
@@ -177,6 +177,10 @@ parse_revs([Rev | Rest]) ->
 
 
 validate_docid(Id) when is_binary(Id) ->
+    case couch_util:validate_utf8(Id) of
+        false -> throw({bad_request, <<"Document id must be valid UTF-8">>});
+        true -> ok
+    end,
     case Id of
     <<"_design/", _/binary>> -> ok;
     <<"_local/", _/binary>> -> ok;

Modified: couchdb/trunk/src/couchdb/couch_util.erl
URL: http://svn.apache.org/viewvc/couchdb/trunk/src/couchdb/couch_util.erl?rev=1064417&r1=1064416&r2=1064417&view=diff
==============================================================================
--- couchdb/trunk/src/couchdb/couch_util.erl (original)
+++ couchdb/trunk/src/couchdb/couch_util.erl Fri Jan 28 01:48:51 2011
@@ -17,7 +17,7 @@
 -export([rand32/0, implode/2, collate/2, collate/3]).
 -export([abs_pathname/1,abs_pathname/2, trim/1]).
 -export([encodeBase64Url/1, decodeBase64Url/1]).
--export([to_hex/1, parse_term/1, dict_find/3]).
+-export([validate_utf8/1, to_hex/1, parse_term/1, dict_find/3]).
 -export([get_nested_json_value/2, json_user_ctx/1]).
 -export([proplist_apply_field/2, json_apply_field/2]).
 -export([to_binary/1, to_integer/1, to_list/1, url_encode/1]).
@@ -96,6 +96,37 @@ simple_call(Pid, Message) ->
         erlang:demonitor(MRef, [flush])
     end.
 
+validate_utf8(Data) when is_list(Data) ->
+    validate_utf8(?l2b(Data));
+validate_utf8(Bin) when is_binary(Bin) ->
+    validate_utf8_fast(Bin, 0).
+
+validate_utf8_fast(B, O) ->
+    case B of
+        <<_:O/binary>> ->
+            true;
+        <<_:O/binary, C1, _/binary>> when
+                C1 < 128 ->
+            validate_utf8_fast(B, 1 + O);
+        <<_:O/binary, C1, C2, _/binary>> when
+                C1 >= 194, C1 =< 223,
+                C2 >= 128, C2 =< 191 ->
+            validate_utf8_fast(B, 2 + O);
+        <<_:O/binary, C1, C2, C3, _/binary>> when
+                C1 >= 224, C1 =< 239,
+                C2 >= 128, C2 =< 191,
+                C3 >= 128, C3 =< 191 ->
+            validate_utf8_fast(B, 3 + O);
+        <<_:O/binary, C1, C2, C3, C4, _/binary>> when
+                C1 >= 240, C1 =< 244,
+                C2 >= 128, C2 =< 191,
+                C3 >= 128, C3 =< 191,
+                C4 >= 128, C4 =< 191 ->
+            validate_utf8_fast(B, 4 + O);
+        _ ->
+            false
+    end.
+
 to_hex([]) ->
     [];
 to_hex(Bin) when is_binary(Bin) ->



Mime
View raw message