couchdb-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dav...@apache.org
Subject svn commit: r1135346 - in /couchdb/branches/1.0.x: share/www/script/test/attachment_names.js src/couchdb/couch_httpd_db.erl src/couchdb/couch_util.erl
Date Tue, 14 Jun 2011 00:19:15 GMT
Author: davisp
Date: Tue Jun 14 00:19:15 2011
New Revision: 1135346

URL: http://svn.apache.org/viewvc?rev=1135346&view=rev
Log:
COUCHDB-760 - Allow UTF-8 in attachment names.

This is a backport of 1100254 from trunk except I had to
manually add the couch_util:validate_utf8 function.


Modified:
    couchdb/branches/1.0.x/share/www/script/test/attachment_names.js
    couchdb/branches/1.0.x/src/couchdb/couch_httpd_db.erl
    couchdb/branches/1.0.x/src/couchdb/couch_util.erl

Modified: couchdb/branches/1.0.x/share/www/script/test/attachment_names.js
URL: http://svn.apache.org/viewvc/couchdb/branches/1.0.x/share/www/script/test/attachment_names.js?rev=1135346&r1=1135345&r2=1135346&view=diff
==============================================================================
--- couchdb/branches/1.0.x/share/www/script/test/attachment_names.js (original)
+++ couchdb/branches/1.0.x/share/www/script/test/attachment_names.js Tue Jun 14 00:19:15 2011
@@ -16,6 +16,24 @@ couchTests.attachment_names = function(d
   db.createDb();
   if (debug) debugger;
 
+  var goodDoc = {
+    _id: "good_doc",
+    _attachments: {
+      "Колян.txt": {
+       content_type:"text/plain",
+       data: "VGhpcyBpcyBhIGJhc2U2NCBlbmNvZGVkIHRleHQ="
+      }
+    }
+  };
+
+  var save_response = db.save(goodDoc);
+  T(save_response.ok);
+
+  var xhr = CouchDB.request("GET", "/test_suite_db/good_doc/Колян.txt");
+  T(xhr.responseText == "This is a base64 encoded text");
+  T(xhr.getResponseHeader("Content-Type") == "text/plain");
+  T(xhr.getResponseHeader("Etag") == '"' + save_response.rev + '"');
+
   var binAttDoc = {
     _id: "bin_doc",
     _attachments:{
@@ -26,14 +44,8 @@ couchTests.attachment_names = function(d
     }
   };
 
-  // inline attachments
-  try {
-    db.save(binAttDoc);
-    TEquals(1, 2, "Attachment name with non UTF-8 encoding saved. Should never show!");
-  } catch (e) {
-    TEquals("bad_request", e.error, "attachment_name: inline attachments");
-    TEquals("Attachment name is not UTF-8 encoded", e.reason, "attachment_name: inline attachments");
-  }
+  resp = db.save(binAttDoc);
+  TEquals(true, resp.ok, "attachment_name: inline attachment");
 
 
   // standalone docs
@@ -45,10 +57,9 @@ couchTests.attachment_names = function(d
   }));
 
   var resp = JSON.parse(xhr.responseText);
-  TEquals(400, xhr.status, "attachment_name: standalone API");
-  TEquals("bad_request", resp.error, "attachment_name: standalone API");
-  TEquals("Attachment name is not UTF-8 encoded", resp.reason, "attachment_name: standalone
API");
-
+  TEquals(201, xhr.status, "attachment_name: standalone API");
+  TEquals("Created",  xhr.statusText, "attachment_name: standalone API");
+  TEquals(true, resp.ok, "attachment_name: standalone API");
 
   // bulk docs
   var docs = { docs: [binAttDoc] };
@@ -58,10 +69,8 @@ couchTests.attachment_names = function(d
   });
 
   var resp = JSON.parse(xhr.responseText);
-  TEquals(400, xhr.status, "attachment_name: bulk docs");
-  TEquals("bad_request", resp.error, "attachment_name: bulk docs");
-  TEquals("Attachment name is not UTF-8 encoded", resp.reason, "attachment_name: bulk docs");
-
+  TEquals(201, xhr.status, "attachment_name: bulk docs");
+  TEquals("Created", xhr.statusText, "attachment_name: bulk docs");
 
   // leading underscores
   var binAttDoc = {

Modified: couchdb/branches/1.0.x/src/couchdb/couch_httpd_db.erl
URL: http://svn.apache.org/viewvc/couchdb/branches/1.0.x/src/couchdb/couch_httpd_db.erl?rev=1135346&r1=1135345&r2=1135346&view=diff
==============================================================================
--- couchdb/branches/1.0.x/src/couchdb/couch_httpd_db.erl (original)
+++ couchdb/branches/1.0.x/src/couchdb/couch_httpd_db.erl Tue Jun 14 00:19:15 2011
@@ -1228,34 +1228,7 @@ validate_attachment_name(Name) when is_l
 validate_attachment_name(<<"_",_/binary>>) ->
     throw({bad_request, <<"Attachment name can't start with '_'">>});
 validate_attachment_name(Name) ->
-    case is_valid_utf8(Name) of
+    case couch_util:validate_utf8(Name) of
         true -> Name;
         false -> throw({bad_request, <<"Attachment name is not UTF-8 encoded">>})
     end.
-
-%% borrowed from mochijson2:json_bin_is_safe()
-is_valid_utf8(<<>>) ->
-    true;
-is_valid_utf8(<<C, Rest/binary>>) ->
-    case C of
-        $\" ->
-            false;
-        $\\ ->
-            false;
-        $\b ->
-            false;
-        $\f ->
-            false;
-        $\n ->
-            false;
-        $\r ->
-            false;
-        $\t ->
-            false;
-        C when C >= 0, C < $\s; C >= 16#7f, C =< 16#10FFFF ->
-            false;
-        C when C < 16#7f ->
-            is_valid_utf8(Rest);
-        _ ->
-            false
-    end.

Modified: couchdb/branches/1.0.x/src/couchdb/couch_util.erl
URL: http://svn.apache.org/viewvc/couchdb/branches/1.0.x/src/couchdb/couch_util.erl?rev=1135346&r1=1135345&r2=1135346&view=diff
==============================================================================
--- couchdb/branches/1.0.x/src/couchdb/couch_util.erl (original)
+++ couchdb/branches/1.0.x/src/couchdb/couch_util.erl Tue Jun 14 00:19:15 2011
@@ -17,7 +17,7 @@
 -export([rand32/0, implode/2, collate/2, collate/3]).
 -export([abs_pathname/1,abs_pathname/2, trim/1, ascii_lower/1]).
 -export([encodeBase64Url/1, decodeBase64Url/1]).
--export([to_hex/1, parse_term/1, dict_find/3]).
+-export([validate_utf8/1, to_hex/1, parse_term/1, dict_find/3]).
 -export([file_read_size/1, get_nested_json_value/2, json_user_ctx/1]).
 -export([proplist_apply_field/2, json_apply_field/2]).
 -export([to_binary/1, to_integer/1, to_list/1, url_encode/1]).
@@ -107,6 +107,37 @@ simple_call(Pid, Message) ->
         erlang:demonitor(MRef, [flush])
     end.
 
+validate_utf8(Data) when is_list(Data) ->
+    validate_utf8(?l2b(Data));
+validate_utf8(Bin) when is_binary(Bin) ->
+    validate_utf8_fast(Bin, 0).
+
+validate_utf8_fast(B, O) ->
+    case B of
+        <<_:O/binary>> ->
+            true;
+        <<_:O/binary, C1, _/binary>> when
+                C1 < 128 ->
+            validate_utf8_fast(B, 1 + O);
+        <<_:O/binary, C1, C2, _/binary>> when
+                C1 >= 194, C1 =< 223,
+                C2 >= 128, C2 =< 191 ->
+            validate_utf8_fast(B, 2 + O);
+        <<_:O/binary, C1, C2, C3, _/binary>> when
+                C1 >= 224, C1 =< 239,
+                C2 >= 128, C2 =< 191,
+                C3 >= 128, C3 =< 191 ->
+            validate_utf8_fast(B, 3 + O);
+        <<_:O/binary, C1, C2, C3, C4, _/binary>> when
+                C1 >= 240, C1 =< 244,
+                C2 >= 128, C2 =< 191,
+                C3 >= 128, C3 =< 191,
+                C4 >= 128, C4 =< 191 ->
+            validate_utf8_fast(B, 4 + O);
+        _ ->
+            false
+    end.
+
 to_hex([]) ->
     [];
 to_hex(Bin) when is_binary(Bin) ->



Mime
View raw message