couchdb-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From chewbra...@apache.org
Subject [couchdb] 01/01: Expose ICU ucol_getSortKey
Date Wed, 10 Jul 2019 23:20:33 GMT
This is an automated email from the ASF dual-hosted git repository.

chewbranca pushed a commit to branch 2067-add-get-sort-key
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 05d4fb4983d6146f82fa2e475d816dbc164053bb
Author: Russell Branca <chewbranca@apache.org>
AuthorDate: Tue Jul 2 13:31:33 2019 -0700

    Expose ICU ucol_getSortKey
---
 src/couch/priv/icu_driver/couch_icu_driver.c | 57 ++++++++++++++++++++++
 src/couch/src/couch_util.erl                 | 13 ++++-
 src/couch/test/couch_util_tests.erl          | 72 ++++++++++++++++++++++++++++
 3 files changed, 141 insertions(+), 1 deletion(-)

diff --git a/src/couch/priv/icu_driver/couch_icu_driver.c b/src/couch/priv/icu_driver/couch_icu_driver.c
index 4d9bb98..cfa7682 100644
--- a/src/couch/priv/icu_driver/couch_icu_driver.c
+++ b/src/couch/priv/icu_driver/couch_icu_driver.c
@@ -30,6 +30,8 @@ specific language governing permissions and limitations under the License.
 #include <string.h> /* for memcpy */
 #endif
 
+#define BUFFER_SIZE 1000
+
 
 typedef struct {
     ErlDrvPort port;
@@ -147,6 +149,61 @@ couch_drv_control(ErlDrvData drv_data, unsigned int command,
 
         return return_control_result(&response, sizeof(response), rbuf, rlen);
         }
+    case 2: /* GET_SORT_KEY: */
+        {
+
+        UChar source[BUFFER_SIZE];
+        UChar* sourcePtr = source;
+        int32_t sourceLen = BUFFER_SIZE;
+
+        uint8_t sortKey[BUFFER_SIZE];
+        uint8_t* sortKeyPtr = sortKey;
+        int32_t sortKeyLen = BUFFER_SIZE;
+
+        int32_t inputLen;
+
+        UErrorCode status = U_ZERO_ERROR;
+        ErlDrvSSizeT res;
+
+        /* first 32bits are the length */
+        memcpy(&inputLen, pBuf, sizeof(inputLen));
+        pBuf += sizeof(inputLen);
+
+        u_strFromUTF8(sourcePtr, BUFFER_SIZE, &sourceLen, pBuf, inputLen, &status);
+
+        if (sourceLen >= BUFFER_SIZE) {
+            /* reset status or next u_strFromUTF8 call will auto-fail */
+            status = U_ZERO_ERROR;
+            sourcePtr = (UChar*) malloc(sourceLen * sizeof(UChar));
+            u_strFromUTF8(sourcePtr, sourceLen, NULL, pBuf, inputLen, &status);
+            if (U_FAILURE(status)) {
+                rbuf = NULL;
+                return 0;
+            }
+        } else if (U_FAILURE(status)) {
+            rbuf = NULL;
+            return 0;
+        }
+
+        sortKeyLen = ucol_getSortKey(pData->coll, sourcePtr, sourceLen, sortKeyPtr, BUFFER_SIZE);
+
+        if (sortKeyLen > BUFFER_SIZE) {
+            sortKeyPtr = (uint8_t*) malloc(sortKeyLen);
+            ucol_getSortKey(pData->coll, sourcePtr, sourceLen, sortKeyPtr, sortKeyLen);
+        }
+
+        res = return_control_result(sortKeyPtr, sortKeyLen, rbuf, rlen);
+
+        if (sourcePtr != source) {
+            free(sourcePtr);
+        }
+
+        if (sortKeyPtr != sortKey) {
+            free(sortKeyPtr);
+        }
+
+        return res;
+    }
 
     default:
         return -1;
diff --git a/src/couch/src/couch_util.erl b/src/couch/src/couch_util.erl
index 62e17ce..adcc3e8 100644
--- a/src/couch/src/couch_util.erl
+++ b/src/couch/src/couch_util.erl
@@ -14,7 +14,7 @@
 
 -export([priv_dir/0, normpath/1, fold_files/5]).
 -export([should_flush/0, should_flush/1, to_existing_atom/1]).
--export([rand32/0, implode/2, collate/2, collate/3]).
+-export([rand32/0, implode/2, collate/2, collate/3, get_sort_key/1]).
 -export([abs_pathname/1,abs_pathname/2, trim/1, drop_dot_couch_ext/1]).
 -export([encodeBase64Url/1, decodeBase64Url/1]).
 -export([validate_utf8/1, to_hex/1, parse_term/1, dict_find/3]).
@@ -411,6 +411,17 @@ collate(A, B, Options) when is_binary(A), is_binary(B) ->
     % expected typical -1, 0, 1
     Result - 1.
 
+get_sort_key(<<>>) ->
+    error;
+get_sort_key(Str) when is_binary(Str) ->
+    Operation = 2, % get_sort_key
+    Size = byte_size(Str),
+    Bin = <<Size:32/native, Str/binary>>,
+    case erlang:port_control(drv_port(), Operation, Bin) of
+        [] -> error;
+        Res -> Res
+    end.
+
 should_flush() ->
     should_flush(?FLUSH_MAX_MEM).
 
diff --git a/src/couch/test/couch_util_tests.erl b/src/couch/test/couch_util_tests.erl
index 3e145c4..9476360 100644
--- a/src/couch/test/couch_util_tests.erl
+++ b/src/couch/test/couch_util_tests.erl
@@ -168,3 +168,75 @@ to_hex_test_() ->
         ?_assertEqual("", couch_util:to_hex(<<>>)),
         ?_assertEqual("010203faff", couch_util:to_hex(<<1, 2, 3, 250, 255>>))
     ].
+
+sort_key_test_() ->
+    {
+        "Sort Key tests",
+        [
+            {
+                foreach,
+                fun setup/0, fun teardown/1,
+                [
+                    fun test_get_sort_key/1,
+                    fun test_get_sort_key_jiffy_string/1,
+                    fun test_get_sort_key_fails_on_bad_input/1,
+                    fun test_get_sort_key_longer_than_buffer/1
+                ]
+            }
+        ]
+    }.
+
+test_get_sort_key(_) ->
+    Strs = [<<"foo">>, <<"bar">>, <<"Bar">>, <<"baz">>,
<<"BAZ">>, <<"quaz">>,
+        <<"1234fdsa">>, <<"1234">>, <<"pizza">>],
+    Pairs = [{S1, S2} || S1 <- Strs, S2 <- Strs],
+    lists:map(fun({S1, S2}) ->
+        S1K = couch_util:get_sort_key(S1),
+        S2K = couch_util:get_sort_key(S2),
+        SortRes = sort_keys(S1K, S2K),
+        Comment = list_to_binary(io_lib:format("strcmp(~p, ~p)", [S1, S2])),
+        CollRes = couch_util:collate(S1, S2),
+        {Comment, ?_assertEqual(SortRes, CollRes)}
+    end, Pairs).
+
+test_get_sort_key_jiffy_string(_) ->
+    %% jiffy:decode does not null terminate strings
+    %% so we use it here to test unterminated strings
+    {[{S1,S2}]} = jiffy:decode(<<"{\"foo\": \"bar\"}">>),
+    S1K = couch_util:get_sort_key(S1),
+    S2K = couch_util:get_sort_key(S2),
+    SortRes = sort_keys(S1K, S2K),
+    CollRes = couch_util:collate(S1, S2),
+    ?_assertEqual(SortRes, CollRes).
+
+test_get_sort_key_fails_on_bad_input(_) ->
+    %% generated with crypto:strong_rand_bytes
+    %% contains invalid character, should error
+    S = <<209,98,222,144,60,163,72,134,206,157>>,
+    Res = couch_util:get_sort_key(S),
+    ?_assertEqual(error, Res).
+
+test_get_sort_key_longer_than_buffer(_) ->
+    %% stack allocated buffer is 1000 units, test resize logic
+    %% "asdf" * 300 = 1200 extra characters
+    Extra = list_to_binary(["asdf" || _ <- lists:seq(1, 300)]),
+    S1 = <<"foo", Extra/binary>>,
+    S2 = <<"bar", Extra/binary>>,
+    S1K = couch_util:get_sort_key(S1),
+    S2K = couch_util:get_sort_key(S2),
+    SortRes = sort_keys(S1K, S2K),
+    CollRes = couch_util:collate(S1, S2),
+    ?_assertEqual(SortRes, CollRes).
+
+sort_keys(S1, S2) ->
+    case S1 < S2 of
+        true ->
+            -1;
+        false -> case S1 =:= S2 of
+            true ->
+                0;
+            false ->
+                1
+        end
+    end.
+


Mime
View raw message