Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 5B314200D3C for ; Mon, 30 Oct 2017 22:42:41 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id 59BDC1609D5; Mon, 30 Oct 2017 21:42:41 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 7AF18160BE4 for ; Mon, 30 Oct 2017 22:42:40 +0100 (CET) Received: (qmail 93839 invoked by uid 500); 30 Oct 2017 21:42:39 -0000 Mailing-List: contact commits-help@couchdb.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@couchdb.apache.org Delivered-To: mailing list commits@couchdb.apache.org Received: (qmail 93826 invoked by uid 99); 30 Oct 2017 21:42:39 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 30 Oct 2017 21:42:39 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 9A086DF9AE; Mon, 30 Oct 2017 21:42:36 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: davisp@apache.org To: commits@couchdb.apache.org Date: Mon, 30 Oct 2017 21:42:38 -0000 Message-Id: In-Reply-To: References: X-Mailer: ASF-Git Admin Mailer Subject: [3/4] jiffy commit: updated refs/heads/master to f85e2fc archived-at: Mon, 30 Oct 2017 21:42:41 -0000 Add `dedupe_keys` option You can no optionally request that keys are deduplicate inside of Jiffy instead of having to perform that operation in Erlang. Project: http://git-wip-us.apache.org/repos/asf/couchdb-jiffy/repo Commit: http://git-wip-us.apache.org/repos/asf/couchdb-jiffy/commit/128811a7 Tree: http://git-wip-us.apache.org/repos/asf/couchdb-jiffy/tree/128811a7 Diff: http://git-wip-us.apache.org/repos/asf/couchdb-jiffy/diff/128811a7 Branch: refs/heads/master Commit: 128811a7cf2dc79de05872dabb3b95f356eb4194 Parents: df791ef Author: Paul J. Davis Authored: Mon Oct 30 14:01:53 2017 -0500 Committer: Paul J. Davis Committed: Mon Oct 30 14:02:46 2017 -0500 ---------------------------------------------------------------------- c_src/decoder.c | 43 +++---------------- c_src/jiffy.c | 1 + c_src/jiffy.h | 4 ++ c_src/objects.cc | 71 ++++++++++++++++++++++++++++++++ test/jiffy_16_dedupe_keys_tests.erl | 50 ++++++++++++++++++++++ 5 files changed, 132 insertions(+), 37 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/couchdb-jiffy/blob/128811a7/c_src/decoder.c ---------------------------------------------------------------------- diff --git a/c_src/decoder.c b/c_src/decoder.c index 34cf467..7f19809 100644 --- a/c_src/decoder.c +++ b/c_src/decoder.c @@ -53,6 +53,7 @@ typedef struct { int is_partial; int return_maps; int return_trailer; + int dedupe_keys; ERL_NIF_TERM null_term; char* p; @@ -82,6 +83,7 @@ dec_new(ErlNifEnv* env) d->is_partial = 0; d->return_maps = 0; d->return_trailer = 0; + d->dedupe_keys = 0; d->null_term = d->atoms->atom_null; d->p = NULL; @@ -623,42 +625,6 @@ make_empty_object(ErlNifEnv* env, int ret_map) return enif_make_tuple1(env, enif_make_list(env, 0)); } -int -make_object(ErlNifEnv* env, ERL_NIF_TERM pairs, ERL_NIF_TERM* out, int ret_map) -{ - ERL_NIF_TERM ret; - ERL_NIF_TERM key; - ERL_NIF_TERM val; - -#if MAP_TYPE_PRESENT - if(ret_map) { - ret = enif_make_new_map(env); - while(enif_get_list_cell(env, pairs, &val, &pairs)) { - if(!enif_get_list_cell(env, pairs, &key, &pairs)) { - assert(0 == 1 && "Unbalanced object pairs."); - } - if(!enif_make_map_put(env, ret, key, val, &ret)) { - return 0; - } - } - *out = ret; - return 1; - } -#endif - - ret = enif_make_list(env, 0); - while(enif_get_list_cell(env, pairs, &val, &pairs)) { - if(!enif_get_list_cell(env, pairs, &key, &pairs)) { - assert(0 == 1 && "Unbalanced object pairs."); - } - val = enif_make_tuple2(env, key, val); - ret = enif_make_list_cell(env, val, ret); - } - *out = enif_make_tuple1(env, ret); - - return 1; -} - ERL_NIF_TERM make_array(ErlNifEnv* env, ERL_NIF_TERM list) { @@ -716,6 +682,8 @@ decode_init(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) #endif } else if(enif_compare(val, d->atoms->atom_return_trailer) == 0) { d->return_trailer = 1; + } else if(enif_compare(val, d->atoms->atom_dedupe_keys) == 0) { + d->dedupe_keys = 1; } else if(enif_compare(val, d->atoms->atom_use_nil) == 0) { d->null_term = d->atoms->atom_nil; } else if(get_null_term(env, val, &(d->null_term))) { @@ -984,7 +952,8 @@ decode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) } dec_pop(d, st_object); dec_pop(d, st_value); - if(!make_object(env, curr, &val, d->return_maps)) { + if(!make_object(env, curr, &val, + d->return_maps, d->dedupe_keys)) { ret = dec_error(d, "internal_object_error"); goto done; } http://git-wip-us.apache.org/repos/asf/couchdb-jiffy/blob/128811a7/c_src/jiffy.c ---------------------------------------------------------------------- diff --git a/c_src/jiffy.c b/c_src/jiffy.c index 1ea60a3..a3c1ad9 100644 --- a/c_src/jiffy.c +++ b/c_src/jiffy.c @@ -32,6 +32,7 @@ load(ErlNifEnv* env, void** priv, ERL_NIF_TERM info) st->atom_use_nil = make_atom(env, "use_nil"); st->atom_null_term = make_atom(env, "null_term"); st->atom_escape_forward_slashes = make_atom(env, "escape_forward_slashes"); + st->atom_dedupe_keys = make_atom(env, "dedupe_keys"); // Markers used in encoding st->ref_object = make_atom(env, "$object_ref$"); http://git-wip-us.apache.org/repos/asf/couchdb-jiffy/blob/128811a7/c_src/jiffy.h ---------------------------------------------------------------------- diff --git a/c_src/jiffy.h b/c_src/jiffy.h index abe5154..b9d5d43 100644 --- a/c_src/jiffy.h +++ b/c_src/jiffy.h @@ -35,6 +35,7 @@ typedef struct { ERL_NIF_TERM atom_use_nil; ERL_NIF_TERM atom_null_term; ERL_NIF_TERM atom_escape_forward_slashes; + ERL_NIF_TERM atom_dedupe_keys; ERL_NIF_TERM ref_object; ERL_NIF_TERM ref_array; @@ -61,6 +62,9 @@ ERL_NIF_TERM encode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); void dec_destroy(ErlNifEnv* env, void* obj); void enc_destroy(ErlNifEnv* env, void* obj); +int make_object(ErlNifEnv* env, ERL_NIF_TERM pairs, ERL_NIF_TERM* out, + int ret_map, int dedupe_keys); + int int_from_hex(const unsigned char* p); int int_to_hex(int val, char* p); int utf8_len(int c); http://git-wip-us.apache.org/repos/asf/couchdb-jiffy/blob/128811a7/c_src/objects.cc ---------------------------------------------------------------------- diff --git a/c_src/objects.cc b/c_src/objects.cc new file mode 100644 index 0000000..4ddb049 --- /dev/null +++ b/c_src/objects.cc @@ -0,0 +1,71 @@ +// This file is part of Jiffy released under the MIT license. +// See the LICENSE file for more information. + +#include +#include + +#include + +#include "erl_nif.h" + +#define MAP_TYPE_PRESENT \ + ((ERL_NIF_MAJOR_VERSION == 2 && ERL_NIF_MINOR_VERSION >= 6) \ + || (ERL_NIF_MAJOR_VERSION > 2)) + +#define BEGIN_C extern "C" { +#define END_C } + +BEGIN_C + +int +make_object(ErlNifEnv* env, ERL_NIF_TERM pairs, ERL_NIF_TERM* out, + int ret_map, int dedupe_keys) +{ + ERL_NIF_TERM ret; + ERL_NIF_TERM key; + ERL_NIF_TERM val; + +#if MAP_TYPE_PRESENT + if(ret_map) { + ret = enif_make_new_map(env); + while(enif_get_list_cell(env, pairs, &val, &pairs)) { + if(!enif_get_list_cell(env, pairs, &key, &pairs)) { + assert(0 == 1 && "Unbalanced object pairs."); + } + if(!enif_make_map_put(env, ret, key, val, &ret)) { + return 0; + } + } + *out = ret; + return 1; + } +#endif + + std::set seen; + ret = enif_make_list(env, 0); + while(enif_get_list_cell(env, pairs, &val, &pairs)) { + if(!enif_get_list_cell(env, pairs, &key, &pairs)) { + assert(0 == 1 && "Unbalanced object pairs."); + } + if(dedupe_keys) { + ErlNifBinary bin; + if(!enif_inspect_binary(env, key, &bin)) { + return 0; + } + std::string skey((char*) bin.data, bin.size); + if(seen.count(skey) == 0) { + seen.insert(skey); + val = enif_make_tuple2(env, key, val); + ret = enif_make_list_cell(env, val, ret); + } + } else { + val = enif_make_tuple2(env, key, val); + ret = enif_make_list_cell(env, val, ret); + } + } + *out = enif_make_tuple1(env, ret); + + return 1; +} + +END_C http://git-wip-us.apache.org/repos/asf/couchdb-jiffy/blob/128811a7/test/jiffy_16_dedupe_keys_tests.erl ---------------------------------------------------------------------- diff --git a/test/jiffy_16_dedupe_keys_tests.erl b/test/jiffy_16_dedupe_keys_tests.erl new file mode 100644 index 0000000..a17c474 --- /dev/null +++ b/test/jiffy_16_dedupe_keys_tests.erl @@ -0,0 +1,50 @@ +% This file is part of Jiffy released under the MIT license. +% See the LICENSE file for more information. + +-module(jiffy_16_dedupe_keys_tests). + +-include_lib("eunit/include/eunit.hrl"). + +dedupe_keys_test_() -> + Opts = [dedupe_keys], + Cases = [ + % Simple sanity check + { + {[{<<"foo">>, 1}]}, + {[{<<"foo">>, 1}]} + }, + % Basic test + { + {[{<<"foo">>, 1}, {<<"foo">>, 2}]}, + {[{<<"foo">>, 2}]} + }, + % Non-repeated keys are fine + { + {[{<<"foo">>, 1}, {<<"bar">>, 2}]}, + {[{<<"foo">>, 1}, {<<"bar">>, 2}]} + }, + % Key order stays the same other than deduped keys + { + {[{<<"bar">>, 1}, {<<"foo">>, 2}, {<<"baz">>, 3}, {<<"foo">>, 4}]}, + {[{<<"bar">>, 1}, {<<"baz">>, 3}, {<<"foo">>, 4}]} + }, + % Multiple repeats are handled + { + {[{<<"foo">>, 1}, {<<"foo">>, 2}, {<<"foo">>, 3}]}, + {[{<<"foo">>, 3}]} + }, + % Embedded NULL bytes are handled + { + {[{<<"foo\\u0000bar">>, 1}, {<<"foo\\u0000baz">>, 2}]}, + {[{<<"foo\\u0000bar">>, 1}, {<<"foo\\u0000baz">>, 2}]} + }, + % Can dedupe with embedded NULL bytes + { + {[{<<"foo\\u0000bar">>, 1}, {<<"foo\\u0000bar">>, 2}]}, + {[{<<"foo\\u0000bar">>, 2}]} + } + ], + {"Test dedupe_keys", lists:map(fun({Data, Result}) -> + Json = jiffy:encode(Data), + ?_assertEqual(Result, jiffy:decode(Json, Opts)) + end, Cases)}.