avro-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From bru...@apache.org
Subject svn commit: r1057736 - in /avro/trunk: CHANGES.txt lang/c/src/CMakeLists.txt lang/c/src/Makefile.am lang/c/src/avro.h lang/c/src/datum_json.c lang/c/tests/test_avro_data.c
Date Tue, 11 Jan 2011 17:16:12 GMT
Author: brucem
Date: Tue Jan 11 17:16:12 2011
New Revision: 1057736

URL: http://svn.apache.org/viewvc?rev=1057736&view=rev
Log:
AVRO-729. JSON encoded Avro values.

You can now produce the JSON-encoded version of an Avro datum.  It
correctly handles Avro values that produce a top-level JSON object that
isn't an array or object, even though that technically violates the JSON
spec.  You have to free the resulting string using the standard free()
function, and *not* using the custom Avro allocator, since the string
will be produced by the Jansson library, which doesn't know about our
custom allocator.

Added:
    avro/trunk/lang/c/src/datum_json.c
Modified:
    avro/trunk/CHANGES.txt
    avro/trunk/lang/c/src/CMakeLists.txt
    avro/trunk/lang/c/src/Makefile.am
    avro/trunk/lang/c/src/avro.h
    avro/trunk/lang/c/tests/test_avro_data.c

Modified: avro/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/avro/trunk/CHANGES.txt?rev=1057736&r1=1057735&r2=1057736&view=diff
==============================================================================
--- avro/trunk/CHANGES.txt (original)
+++ avro/trunk/CHANGES.txt Tue Jan 11 17:16:12 2011
@@ -59,6 +59,8 @@ Avro 1.5.0 (unreleased)
     AVRO-549. C: Route all memory allocations through an interface. (Douglas
     Creager via brucem)
 
+    AVRO-729. C: JSON encoded Avro values. (Douglas Creager via brucem)
+
   IMPROVEMENTS
 
     AVRO-682. Java: Add method DataFileStream.getMetaKeys().

Modified: avro/trunk/lang/c/src/CMakeLists.txt
URL: http://svn.apache.org/viewvc/avro/trunk/lang/c/src/CMakeLists.txt?rev=1057736&r1=1057735&r2=1057736&view=diff
==============================================================================
--- avro/trunk/lang/c/src/CMakeLists.txt (original)
+++ avro/trunk/lang/c/src/CMakeLists.txt Tue Jan 11 17:16:12 2011
@@ -26,6 +26,7 @@ set(AVRO_SRC
     datum.c
     datum.h
     datum_equal.c
+    datum_json.c
     datum_read.c
     datum_size.c
     datum_skip.c

Modified: avro/trunk/lang/c/src/Makefile.am
URL: http://svn.apache.org/viewvc/avro/trunk/lang/c/src/Makefile.am?rev=1057736&r1=1057735&r2=1057736&view=diff
==============================================================================
--- avro/trunk/lang/c/src/Makefile.am (original)
+++ avro/trunk/lang/c/src/Makefile.am Tue Jan 11 17:16:12 2011
@@ -8,6 +8,7 @@ include_HEADERS = avro.h
 lib_LTLIBRARIES = libavro.la
 libavro_la_SOURCES = st.c st.h schema.c schema.h schema_equal.c \
 datum.c datum_equal.c datum_validate.c datum_read.c datum_skip.c datum_write.c datum_size.c
datum.h \
+datum_json.c \
 io.c dump.c dump.h encoding_binary.c \
 allocation.h allocation.c \
 avro_private.h encoding.h datafile.c

Modified: avro/trunk/lang/c/src/avro.h
URL: http://svn.apache.org/viewvc/avro/trunk/lang/c/src/avro.h?rev=1057736&r1=1057735&r2=1057736&view=diff
==============================================================================
--- avro/trunk/lang/c/src/avro.h (original)
+++ avro/trunk/lang/c/src/avro.h Tue Jan 11 17:16:12 2011
@@ -338,6 +338,15 @@ void avro_datum_print(avro_datum_t value
 
 int avro_datum_equal(avro_datum_t a, avro_datum_t b);
 
+/*
+ * Returns a string containing the JSON encoding of an Avro value.  You
+ * must free this string when you're done with it, using the standard
+ * free() function.  (*Not* using the custom Avro allocator.)
+ */
+
+int avro_datum_to_json(const avro_datum_t datum, const avro_schema_t schema,
+		       int one_line, char **json_str);
+
 int avro_schema_match(avro_schema_t writers_schema,
 		      avro_schema_t readers_schema);
 

Added: avro/trunk/lang/c/src/datum_json.c
URL: http://svn.apache.org/viewvc/avro/trunk/lang/c/src/datum_json.c?rev=1057736&view=auto
==============================================================================
--- avro/trunk/lang/c/src/datum_json.c (added)
+++ avro/trunk/lang/c/src/datum_json.c Tue Jan 11 17:16:12 2011
@@ -0,0 +1,351 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0 
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.  See the License for the specific language governing
+ * permissions and limitations under the License. 
+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "avro.h"
+#include "allocation.h"
+#include "datum.h"
+#include "jansson.h"
+
+/*
+ * Converts a binary buffer into a NUL-terminated JSON UTF-8 string.
+ * Avro bytes and fixed values are encoded in JSON as a string, and JSON
+ * strings must be in UTF-8.  For these Avro types, the JSON string is
+ * restricted to the characters U+0000..U+00FF, which corresponds to the
+ * ISO-8859-1 character set.  This function performs this conversion.
+ * The resulting string must be freed using avro_free when you're done
+ * with it.
+ */
+
+static int
+encode_utf8_bytes(const void *src, size_t src_len,
+		  void **dest, size_t *dest_len)
+{
+	if (!src || !dest || !dest_len) {
+		return EINVAL;
+	}
+
+	// First, determine the size of the resulting UTF-8 buffer.
+	// Bytes in the range 0x00..0x7f will take up one byte; bytes in
+	// the range 0x80..0xff will take up two.
+	const uint8_t  *src8 = src;
+
+	size_t  utf8_len = src_len + 1;  // +1 for NUL terminator
+	size_t  i;
+	for (i = 0; i < src_len; i++) {
+		if (src8[i] & 0x80) {
+			utf8_len++;
+		}
+	}
+
+	// Allocate a new buffer for the UTF-8 string and fill it in.
+	uint8_t  *dest8 = avro_malloc(utf8_len);
+	if (dest8 == NULL) {
+		return ENOMEM;
+	}
+
+	uint8_t  *curr = dest8;
+	for (i = 0; i < src_len; i++) {
+		if (src8[i] & 0x80) {
+			*curr++ = (0xc0 | (src8[i] >> 6));
+			*curr++ = (0x80 | (src8[i] & 0x3f));
+		} else {
+			*curr++ = src8[i];
+		}
+	}
+
+	*curr = '\0';
+
+	// And we're good.
+	*dest = dest8;
+	*dest_len = utf8_len;
+	return 0;
+}
+
+static json_t *
+avro_datum_to_json_t(const avro_datum_t datum, const avro_schema_t schema)
+{
+	switch (avro_typeof(datum)) {
+		case AVRO_BOOLEAN:
+			return avro_datum_to_boolean(datum)->i?
+			    json_true():
+			    json_false();
+
+		case AVRO_BYTES:
+			{
+				struct avro_bytes_datum_t  *bytes =
+				    avro_datum_to_bytes(datum);
+
+				void  *encoded = NULL;
+				size_t  encoded_size = 0;
+
+				if (encode_utf8_bytes(bytes->bytes, bytes->size,
+						      &encoded, &encoded_size)) {
+					return NULL;
+				}
+
+				json_t  *result = json_string_nocheck(encoded);
+				avro_free(encoded, encoded_size);
+				return result;
+			}
+
+		case AVRO_DOUBLE:
+			return json_real(avro_datum_to_double(datum)->d);
+
+		case AVRO_FLOAT:
+			return json_real(avro_datum_to_float(datum)->f);
+
+		case AVRO_INT32:
+			return json_integer(avro_datum_to_int32(datum)->i32);
+
+		case AVRO_INT64:
+			return json_integer(avro_datum_to_int64(datum)->i64);
+
+		case AVRO_NULL:
+			return json_null();
+
+		case AVRO_STRING:
+			return json_string(avro_datum_to_string(datum)->s);
+
+		case AVRO_ARRAY:
+			{
+				json_t  *result = json_array();
+				if (!result) {
+					return NULL;
+				}
+
+				avro_schema_t  element_schema = avro_schema_array_items(schema);
+				int  num_elements = avro_array_size(datum);
+				int  i;
+				for (i = 0; i < num_elements; i++) {
+					avro_datum_t  element = NULL;
+					if (avro_array_get(datum, i, &element)) {
+						json_decref(result);
+						return NULL;
+					}
+
+					json_t  *element_json =
+					    avro_datum_to_json_t(element, element_schema);
+					if (!element_json) {
+						json_decref(result);
+						return NULL;
+					}
+
+					if (json_array_append_new(result, element_json)) {
+						json_decref(result);
+						return NULL;
+					}
+				}
+
+				return result;
+			}
+
+		case AVRO_ENUM:
+			return json_string(avro_enum_get_name(datum, schema));
+
+		case AVRO_FIXED:
+			{
+				struct avro_fixed_datum_t  *fixed =
+				    avro_datum_to_fixed(datum);
+
+				void  *encoded = NULL;
+				size_t  encoded_size = 0;
+
+				if (encode_utf8_bytes(fixed->bytes, fixed->size,
+						      &encoded, &encoded_size)) {
+					return NULL;
+				}
+
+				json_t  *result = json_string_nocheck(encoded);
+				avro_free(encoded, encoded_size);
+				return result;
+			}
+
+		case AVRO_MAP:
+			{
+				json_t  *result = json_object();
+				if (!result) {
+					return NULL;
+				}
+
+				avro_schema_t  element_schema = avro_schema_map_values(schema);
+				int  num_elements = avro_map_size(datum);
+				int  i;
+				for (i = 0; i < num_elements; i++) {
+					const char  *key = NULL;
+					if (avro_map_get_key(datum, i, &key)) {
+						json_decref(result);
+						return NULL;
+					}
+
+					avro_datum_t  element = NULL;
+					if (avro_map_get(datum, key, &element)) {
+						json_decref(result);
+						return NULL;
+					}
+
+					json_t  *element_json =
+					    avro_datum_to_json_t(element, element_schema);
+					if (!element_json) {
+						json_decref(result);
+						return NULL;
+					}
+
+					if (json_object_set_new(result, key, element_json)) {
+						json_decref(result);
+						return NULL;
+					}
+				}
+
+				return result;
+			}
+
+		case AVRO_RECORD:
+			{
+				json_t  *result = json_object();
+				if (!result) {
+					return NULL;
+				}
+
+				int  num_fields = avro_schema_record_size(schema);
+				int  i;
+				for (i = 0; i < num_fields; i++) {
+					const char  *field_name =
+					    avro_schema_record_field_name(schema, i);
+
+					avro_schema_t  field_schema =
+					    avro_schema_record_field_get(schema, field_name);
+
+					avro_datum_t  field = NULL;
+					if (avro_record_get(datum, field_name, &field)) {
+						json_decref(result);
+						return NULL;
+					}
+
+					json_t  *field_json =
+					    avro_datum_to_json_t(field, field_schema);
+					if (!field_json) {
+						json_decref(result);
+						return NULL;
+					}
+
+					if (json_object_set_new(result, field_name, field_json)) {
+						json_decref(result);
+						return NULL;
+					}
+				}
+
+				return result;
+			}
+
+		case AVRO_UNION:
+			{
+				int64_t  discriminant = avro_union_discriminant(datum);
+				avro_datum_t  branch = avro_union_current_branch(datum);
+				avro_schema_t  branch_schema =
+				    avro_schema_union_branch(schema, discriminant);
+
+				if (is_avro_null(branch_schema)) {
+					return json_null();
+				}
+
+				json_t  *result = json_object();
+				if (!result) {
+					return NULL;
+				}
+
+				json_t  *branch_json = avro_datum_to_json_t(branch, branch_schema);
+				if (!branch_json) {
+					json_decref(result);
+					return NULL;
+				}
+
+				const char  *branch_name = avro_schema_type_name(branch_schema);
+				if (json_object_set_new(result, branch_name, branch_json)) {
+					json_decref(result);
+					return NULL;
+				}
+
+				return result;
+			}
+
+		default:
+			return NULL;
+	}
+}
+
+int avro_datum_to_json(const avro_datum_t datum, const avro_schema_t schema,
+		       int one_line, char **json_str)
+{
+	if (!is_avro_datum(datum) || !is_avro_schema(schema) || !json_str) {
+		return EINVAL;
+	}
+
+	json_t  *json = avro_datum_to_json_t(datum, schema);
+	if (!json) {
+		return ENOMEM;
+	}
+
+	// Jansson will only encode an object or array as the root
+	// element.
+
+	if (json_is_array(json) || json_is_object(json)) {
+		*json_str = json_dumps
+		    (json,
+		     JSON_INDENT(one_line? 0: 2) |
+		     JSON_ENSURE_ASCII |
+		     JSON_PRESERVE_ORDER);
+		json_decref(json);
+		return 0;
+	}
+
+	// Otherwise we have to play some games.  We'll wrap the JSON
+	// value in an array, and then strip off the leading and
+	// trailing square brackets.
+
+	json_t  *array = json_array();
+	json_array_append_new(array, json);
+	char  *array_str = json_dumps
+	    (array,
+	     JSON_INDENT(one_line? 0: 2) |
+	     JSON_ENSURE_ASCII |
+	     JSON_PRESERVE_ORDER);
+	json_decref(array);
+
+	// If the caller requested a one-line string, then we strip off
+	// "[" from the front and "]" from the back.  Otherwise, we
+	// strip off "[\n  " from the front and "\n]" from the back.
+
+	size_t  length = strlen(array_str);
+	size_t  front_chop = one_line? 1: 4;
+	size_t  back_chop = one_line? 1: 2;
+	length -= (front_chop + back_chop);
+
+	// We don't use the custom allocator, because we need to mimic
+	// the string that Jansson would have returned.
+
+	char  *result = malloc(length + 1);
+	memcpy(result, array_str + front_chop, length);
+	result[length] = '\0';
+	free(array_str);
+
+	*json_str = result;
+	return 0;
+}

Modified: avro/trunk/lang/c/tests/test_avro_data.c
URL: http://svn.apache.org/viewvc/avro/trunk/lang/c/tests/test_avro_data.c?rev=1057736&r1=1057735&r2=1057736&view=diff
==============================================================================
--- avro/trunk/lang/c/tests/test_avro_data.c (original)
+++ avro/trunk/lang/c/tests/test_avro_data.c Tue Jan 11 17:16:12 2011
@@ -130,6 +130,18 @@ write_read_check(avro_schema_t writers_s
 	}
 }
 
+static void test_json(avro_datum_t datum, avro_schema_t schema,
+		      const char *expected)
+{
+	char  *json = NULL;
+	avro_datum_to_json(datum, schema, 1, &json);
+	if (strcmp(json, expected) != 0) {
+		fprintf(stderr, "Unexpected JSON encoding: %s\n", json);
+		exit(EXIT_FAILURE);
+	}
+	free(json);
+}
+
 static int test_string(void)
 {
 	unsigned int i;
@@ -144,6 +156,12 @@ static int test_string(void)
 		write_read_check(writer_schema, NULL, datum, "string");
 		avro_datum_decref(datum);
 	}
+
+	avro_datum_t  datum = avro_wrapstring(strings[0]);
+	test_json(datum, writer_schema,
+		  "\"Four score and seven years ago\"");
+	avro_datum_decref(datum);
+
 	avro_schema_decref(writer_schema);
 	return 0;
 }
@@ -157,6 +175,8 @@ static int test_bytes(void)
 
 	datum = avro_wrapbytes(bytes, sizeof(bytes));
 	write_read_check(writer_schema, NULL, datum, "bytes");
+	test_json(datum, writer_schema,
+		  "\"\\u00de\\u00ad\\u00be\\u00ef\"");
 	avro_datum_decref(datum);
 	avro_schema_decref(writer_schema);
 
@@ -184,6 +204,11 @@ static int test_int32(void)
 		write_read_check(writer_schema, NULL, datum, "int");
 		avro_datum_decref(datum);
 	}
+
+	avro_datum_t  datum = avro_int32(10000);
+	test_json(datum, writer_schema, "10000");
+	avro_datum_decref(datum);
+
 	avro_schema_decref(writer_schema);
 	return 0;
 }
@@ -197,6 +222,11 @@ static int test_int64(void)
 		write_read_check(writer_schema, NULL, datum, "long");
 		avro_datum_decref(datum);
 	}
+
+	avro_datum_t  datum = avro_int64(10000);
+	test_json(datum, writer_schema, "10000");
+	avro_datum_decref(datum);
+
 	avro_schema_decref(writer_schema);
 	return 0;
 }
@@ -210,6 +240,11 @@ static int test_double(void)
 		write_read_check(schema, NULL, datum, "double");
 		avro_datum_decref(datum);
 	}
+
+	avro_datum_t  datum = avro_double(2000.0);
+	test_json(datum, schema, "2000.0");
+	avro_datum_decref(datum);
+
 	avro_schema_decref(schema);
 	return 0;
 }
@@ -223,6 +258,11 @@ static int test_float(void)
 		write_read_check(schema, NULL, datum, "float");
 		avro_datum_decref(datum);
 	}
+
+	avro_datum_t  datum = avro_float(2000.0);
+	test_json(datum, schema, "2000.0");
+	avro_datum_decref(datum);
+
 	avro_schema_decref(schema);
 	return 0;
 }
@@ -230,10 +270,12 @@ static int test_float(void)
 static int test_boolean(void)
 {
 	int i;
+	const char  *expected_json[] = { "false", "true" };
 	avro_schema_t schema = avro_schema_boolean();
 	for (i = 0; i <= 1; i++) {
 		avro_datum_t datum = avro_boolean(i);
 		write_read_check(schema, NULL, datum, "boolean");
+		test_json(datum, schema, expected_json[i]);
 		avro_datum_decref(datum);
 	}
 	avro_schema_decref(schema);
@@ -245,6 +287,7 @@ static int test_null(void)
 	avro_schema_t schema = avro_schema_null();
 	avro_datum_t datum = avro_null();
 	write_read_check(schema, NULL, datum, "null");
+	test_json(datum, schema, "null");
 	avro_datum_decref(datum);
 	return 0;
 }
@@ -265,6 +308,8 @@ static int test_record(void)
 	avro_record_set(datum, "age", age_datum);
 
 	write_read_check(schema, NULL, datum, "record");
+	test_json(datum, schema,
+		  "{\"name\": \"Joseph Campbell\", \"age\": 83}");
 
 	avro_datum_decref(name_datum);
 	avro_datum_decref(age_datum);
@@ -302,6 +347,7 @@ static int test_enum(void)
 	}
 
 	write_read_check(schema, NULL, datum, "enum");
+	test_json(datum, schema, "\"C\"");
 
 	avro_enum_set(datum, AVRO_CPP);
 	if (strcmp(avro_enum_get_name(datum, schema), "C++") != 0) {
@@ -310,6 +356,7 @@ static int test_enum(void)
 	}
 
 	write_read_check(schema, NULL, datum, "enum");
+	test_json(datum, schema, "\"C++\"");
 
 	avro_enum_set_name(datum, schema, "Python");
 	if (avro_enum_get(datum) != AVRO_PYTHON) {
@@ -318,6 +365,7 @@ static int test_enum(void)
 	}
 
 	write_read_check(schema, NULL, datum, "enum");
+	test_json(datum, schema, "\"Python\"");
 
 	avro_datum_decref(datum);
 	avro_schema_decref(schema);
@@ -345,6 +393,7 @@ static int test_array(void)
 	}
 
 	write_read_check(schema, NULL, datum, "array");
+	test_json(datum, schema, "[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]");
 	avro_datum_decref(datum);
 	avro_schema_decref(schema);
 	return 0;
@@ -382,6 +431,9 @@ static int test_map(void)
 	}
 
 	write_read_check(schema, NULL, datum, "map");
+	test_json(datum, schema,
+		  "{\"zero\": 0, \"one\": 1, \"two\": 2, \"three\": 3, "
+		  "\"four\": 4, \"five\": 5, \"six\": 6}");
 	avro_datum_decref(datum);
 	avro_schema_decref(schema);
 	return 0;
@@ -422,6 +474,13 @@ static int test_union(void)
 	}
 
 	write_read_check(schema, NULL, union_datum, "union");
+	test_json(union_datum, schema,
+		  "{\"string\": \"Follow your bliss.\"}");
+
+	avro_datum_decref(datum);
+	avro_union_set_discriminant(union_datum, schema, 2, &datum);
+	test_json(union_datum, schema, "null");
+
 	avro_datum_decref(union_datum);
 	avro_datum_decref(datum);
 	avro_datum_decref(union_datum1);
@@ -438,6 +497,7 @@ static int test_fixed(void)
 
 	datum = avro_wrapfixed("msg", bytes, sizeof(bytes));
 	write_read_check(schema, NULL, datum, "fixed");
+	test_json(datum, schema, "\"\\r\\n\\r\\n\\u000b\\n\\u000b\\n\"");
 	avro_datum_decref(datum);
 	avro_schema_decref(schema);
 



Mime
View raw message