couchdb-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dav...@apache.org
Subject [3/4] jiffy commit: updated refs/heads/master to f85e2fc
Date Mon, 30 Oct 2017 21:42:38 GMT
Add `dedupe_keys` option

You can no optionally request that keys are deduplicate inside of Jiffy
instead of having to perform that operation in Erlang.


Project: http://git-wip-us.apache.org/repos/asf/couchdb-jiffy/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-jiffy/commit/128811a7
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-jiffy/tree/128811a7
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-jiffy/diff/128811a7

Branch: refs/heads/master
Commit: 128811a7cf2dc79de05872dabb3b95f356eb4194
Parents: df791ef
Author: Paul J. Davis <paul.joseph.davis@gmail.com>
Authored: Mon Oct 30 14:01:53 2017 -0500
Committer: Paul J. Davis <paul.joseph.davis@gmail.com>
Committed: Mon Oct 30 14:02:46 2017 -0500

----------------------------------------------------------------------
 c_src/decoder.c                     | 43 +++----------------
 c_src/jiffy.c                       |  1 +
 c_src/jiffy.h                       |  4 ++
 c_src/objects.cc                    | 71 ++++++++++++++++++++++++++++++++
 test/jiffy_16_dedupe_keys_tests.erl | 50 ++++++++++++++++++++++
 5 files changed, 132 insertions(+), 37 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-jiffy/blob/128811a7/c_src/decoder.c
----------------------------------------------------------------------
diff --git a/c_src/decoder.c b/c_src/decoder.c
index 34cf467..7f19809 100644
--- a/c_src/decoder.c
+++ b/c_src/decoder.c
@@ -53,6 +53,7 @@ typedef struct {
     int             is_partial;
     int             return_maps;
     int             return_trailer;
+    int             dedupe_keys;
     ERL_NIF_TERM    null_term;
 
     char*           p;
@@ -82,6 +83,7 @@ dec_new(ErlNifEnv* env)
     d->is_partial = 0;
     d->return_maps = 0;
     d->return_trailer = 0;
+    d->dedupe_keys = 0;
     d->null_term = d->atoms->atom_null;
 
     d->p = NULL;
@@ -623,42 +625,6 @@ make_empty_object(ErlNifEnv* env, int ret_map)
     return enif_make_tuple1(env, enif_make_list(env, 0));
 }
 
-int
-make_object(ErlNifEnv* env, ERL_NIF_TERM pairs, ERL_NIF_TERM* out, int ret_map)
-{
-    ERL_NIF_TERM ret;
-    ERL_NIF_TERM key;
-    ERL_NIF_TERM val;
-
-#if MAP_TYPE_PRESENT
-    if(ret_map) {
-        ret = enif_make_new_map(env);
-        while(enif_get_list_cell(env, pairs, &val, &pairs)) {
-            if(!enif_get_list_cell(env, pairs, &key, &pairs)) {
-                assert(0 == 1 && "Unbalanced object pairs.");
-            }
-            if(!enif_make_map_put(env, ret, key, val, &ret)) {
-                return 0;
-            }
-        }
-        *out = ret;
-        return 1;
-    }
-#endif
-
-    ret = enif_make_list(env, 0);
-    while(enif_get_list_cell(env, pairs, &val, &pairs)) {
-        if(!enif_get_list_cell(env, pairs, &key, &pairs)) {
-            assert(0 == 1 && "Unbalanced object pairs.");
-        }
-        val = enif_make_tuple2(env, key, val);
-        ret = enif_make_list_cell(env, val, ret);
-    }
-    *out = enif_make_tuple1(env, ret);
-
-    return 1;
-}
-
 ERL_NIF_TERM
 make_array(ErlNifEnv* env, ERL_NIF_TERM list)
 {
@@ -716,6 +682,8 @@ decode_init(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
 #endif
         } else if(enif_compare(val, d->atoms->atom_return_trailer) == 0) {
             d->return_trailer = 1;
+        } else if(enif_compare(val, d->atoms->atom_dedupe_keys) == 0) {
+            d->dedupe_keys = 1;
         } else if(enif_compare(val, d->atoms->atom_use_nil) == 0) {
             d->null_term = d->atoms->atom_nil;
         } else if(get_null_term(env, val, &(d->null_term))) {
@@ -984,7 +952,8 @@ decode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
                         }
                         dec_pop(d, st_object);
                         dec_pop(d, st_value);
-                        if(!make_object(env, curr, &val, d->return_maps)) {
+                        if(!make_object(env, curr, &val,
+                                d->return_maps, d->dedupe_keys)) {
                             ret = dec_error(d, "internal_object_error");
                             goto done;
                         }

http://git-wip-us.apache.org/repos/asf/couchdb-jiffy/blob/128811a7/c_src/jiffy.c
----------------------------------------------------------------------
diff --git a/c_src/jiffy.c b/c_src/jiffy.c
index 1ea60a3..a3c1ad9 100644
--- a/c_src/jiffy.c
+++ b/c_src/jiffy.c
@@ -32,6 +32,7 @@ load(ErlNifEnv* env, void** priv, ERL_NIF_TERM info)
     st->atom_use_nil = make_atom(env, "use_nil");
     st->atom_null_term = make_atom(env, "null_term");
     st->atom_escape_forward_slashes = make_atom(env, "escape_forward_slashes");
+    st->atom_dedupe_keys = make_atom(env, "dedupe_keys");
 
     // Markers used in encoding
     st->ref_object = make_atom(env, "$object_ref$");

http://git-wip-us.apache.org/repos/asf/couchdb-jiffy/blob/128811a7/c_src/jiffy.h
----------------------------------------------------------------------
diff --git a/c_src/jiffy.h b/c_src/jiffy.h
index abe5154..b9d5d43 100644
--- a/c_src/jiffy.h
+++ b/c_src/jiffy.h
@@ -35,6 +35,7 @@ typedef struct {
     ERL_NIF_TERM    atom_use_nil;
     ERL_NIF_TERM    atom_null_term;
     ERL_NIF_TERM    atom_escape_forward_slashes;
+    ERL_NIF_TERM    atom_dedupe_keys;
 
     ERL_NIF_TERM    ref_object;
     ERL_NIF_TERM    ref_array;
@@ -61,6 +62,9 @@ ERL_NIF_TERM encode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]);
 void dec_destroy(ErlNifEnv* env, void* obj);
 void enc_destroy(ErlNifEnv* env, void* obj);
 
+int make_object(ErlNifEnv* env, ERL_NIF_TERM pairs, ERL_NIF_TERM* out,
+        int ret_map, int dedupe_keys);
+
 int int_from_hex(const unsigned char* p);
 int int_to_hex(int val, char* p);
 int utf8_len(int c);

http://git-wip-us.apache.org/repos/asf/couchdb-jiffy/blob/128811a7/c_src/objects.cc
----------------------------------------------------------------------
diff --git a/c_src/objects.cc b/c_src/objects.cc
new file mode 100644
index 0000000..4ddb049
--- /dev/null
+++ b/c_src/objects.cc
@@ -0,0 +1,71 @@
+// This file is part of Jiffy released under the MIT license.
+// See the LICENSE file for more information.
+
+#include <set>
+#include <string>
+
+#include <assert.h>
+
+#include "erl_nif.h"
+
+#define MAP_TYPE_PRESENT \
+    ((ERL_NIF_MAJOR_VERSION == 2 && ERL_NIF_MINOR_VERSION >= 6) \
+    || (ERL_NIF_MAJOR_VERSION > 2))
+
+#define BEGIN_C extern "C" {
+#define END_C }
+
+BEGIN_C
+
+int
+make_object(ErlNifEnv* env, ERL_NIF_TERM pairs, ERL_NIF_TERM* out,
+        int ret_map, int dedupe_keys)
+{
+    ERL_NIF_TERM ret;
+    ERL_NIF_TERM key;
+    ERL_NIF_TERM val;
+
+#if MAP_TYPE_PRESENT
+    if(ret_map) {
+        ret = enif_make_new_map(env);
+        while(enif_get_list_cell(env, pairs, &val, &pairs)) {
+            if(!enif_get_list_cell(env, pairs, &key, &pairs)) {
+                assert(0 == 1 && "Unbalanced object pairs.");
+            }
+            if(!enif_make_map_put(env, ret, key, val, &ret)) {
+                return 0;
+            }
+        }
+        *out = ret;
+        return 1;
+    }
+#endif
+
+    std::set<std::string> seen;
+    ret = enif_make_list(env, 0);
+    while(enif_get_list_cell(env, pairs, &val, &pairs)) {
+        if(!enif_get_list_cell(env, pairs, &key, &pairs)) {
+            assert(0 == 1 && "Unbalanced object pairs.");
+        }
+        if(dedupe_keys) {
+            ErlNifBinary bin;
+            if(!enif_inspect_binary(env, key, &bin)) {
+                return 0;
+            }
+            std::string skey((char*) bin.data, bin.size);
+            if(seen.count(skey) == 0) {
+                seen.insert(skey);
+                val = enif_make_tuple2(env, key, val);
+                ret = enif_make_list_cell(env, val, ret);
+            }
+        } else {
+            val = enif_make_tuple2(env, key, val);
+            ret = enif_make_list_cell(env, val, ret);
+        }
+    }
+    *out = enif_make_tuple1(env, ret);
+
+    return 1;
+}
+
+END_C

http://git-wip-us.apache.org/repos/asf/couchdb-jiffy/blob/128811a7/test/jiffy_16_dedupe_keys_tests.erl
----------------------------------------------------------------------
diff --git a/test/jiffy_16_dedupe_keys_tests.erl b/test/jiffy_16_dedupe_keys_tests.erl
new file mode 100644
index 0000000..a17c474
--- /dev/null
+++ b/test/jiffy_16_dedupe_keys_tests.erl
@@ -0,0 +1,50 @@
+% This file is part of Jiffy released under the MIT license.
+% See the LICENSE file for more information.
+
+-module(jiffy_16_dedupe_keys_tests).
+
+-include_lib("eunit/include/eunit.hrl").
+
+dedupe_keys_test_() ->
+    Opts = [dedupe_keys],
+    Cases = [
+        % Simple sanity check
+        {
+            {[{<<"foo">>, 1}]},
+            {[{<<"foo">>, 1}]}
+        },
+        % Basic test
+        {
+            {[{<<"foo">>, 1}, {<<"foo">>, 2}]},
+            {[{<<"foo">>, 2}]}
+        },
+        % Non-repeated keys are fine
+        {
+            {[{<<"foo">>, 1}, {<<"bar">>, 2}]},
+            {[{<<"foo">>, 1}, {<<"bar">>, 2}]}
+        },
+        % Key order stays the same other than deduped keys
+        {
+            {[{<<"bar">>, 1}, {<<"foo">>, 2}, {<<"baz">>,
3}, {<<"foo">>, 4}]},
+            {[{<<"bar">>, 1}, {<<"baz">>, 3}, {<<"foo">>,
4}]}
+        },
+        % Multiple repeats are handled
+        {
+            {[{<<"foo">>, 1}, {<<"foo">>, 2}, {<<"foo">>,
3}]},
+            {[{<<"foo">>, 3}]}
+        },
+        % Embedded NULL bytes are handled
+        {
+            {[{<<"foo\\u0000bar">>, 1}, {<<"foo\\u0000baz">>, 2}]},
+            {[{<<"foo\\u0000bar">>, 1}, {<<"foo\\u0000baz">>, 2}]}
+        },
+        % Can dedupe with embedded NULL bytes
+        {
+            {[{<<"foo\\u0000bar">>, 1}, {<<"foo\\u0000bar">>, 2}]},
+            {[{<<"foo\\u0000bar">>, 2}]}
+        }
+    ],
+    {"Test dedupe_keys", lists:map(fun({Data, Result}) ->
+        Json = jiffy:encode(Data),
+        ?_assertEqual(Result, jiffy:decode(Json, Opts))
+    end, Cases)}.


Mime
View raw message