avro-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mas...@apache.org
Subject svn commit: r908209 - in /hadoop/avro/trunk: ./ lang/c/ lang/c/docs/ lang/c/examples/ lang/c/src/ lang/c/tests/
Date Tue, 09 Feb 2010 20:42:03 GMT
Author: massie
Date: Tue Feb  9 20:42:02 2010
New Revision: 908209

URL: http://svn.apache.org/viewvc?rev=908209&view=rev
Log:
AVRO-412. Allow schema validation to be optional

Modified:
    hadoop/avro/trunk/CHANGES.txt
    hadoop/avro/trunk/lang/c/docs/index.txt
    hadoop/avro/trunk/lang/c/examples/Makefile.am
    hadoop/avro/trunk/lang/c/src/avro.h
    hadoop/avro/trunk/lang/c/src/datafile.c
    hadoop/avro/trunk/lang/c/src/datum.c
    hadoop/avro/trunk/lang/c/src/datum.h
    hadoop/avro/trunk/lang/c/src/datum_equal.c
    hadoop/avro/trunk/lang/c/src/datum_read.c
    hadoop/avro/trunk/lang/c/src/datum_validate.c
    hadoop/avro/trunk/lang/c/src/datum_write.c
    hadoop/avro/trunk/lang/c/tests/generate_interop_data.c
    hadoop/avro/trunk/lang/c/tests/test_avro_data.c
    hadoop/avro/trunk/lang/c/version.sh

Modified: hadoop/avro/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/CHANGES.txt?rev=908209&r1=908208&r2=908209&view=diff
==============================================================================
--- hadoop/avro/trunk/CHANGES.txt (original)
+++ hadoop/avro/trunk/CHANGES.txt Tue Feb  9 20:42:02 2010
@@ -320,6 +320,8 @@
 
     AVRO-261. Allow Schemas to be immutable (thiru)
 
+    AVRO-412. Allow schema validation to be optional (massie)
+
   OPTIMIZATIONS
 
     AVRO-172. More efficient schema processing (massie)

Modified: hadoop/avro/trunk/lang/c/docs/index.txt
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/docs/index.txt?rev=908209&r1=908208&r2=908209&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/docs/index.txt (original)
+++ hadoop/avro/trunk/lang/c/docs/index.txt Tue Feb  9 20:42:02 2010
@@ -34,9 +34,27 @@
 A C program is like a fast dance on a newly waxed dance floor by people carrying razors.
 ____
 
-The C implementation is still not quite ready for production use.
-The current code is being tested on +MacOS X+ and +Linux+.  We're
-always looking for contributions so, if you're a C hacker, please
+The C implementation has been tested on +MacOSX+ and +Linux+ but, over
+time, the number of support OSes should grow.  Please let us know if
+you're using +Avro C+ on other systems. There are no dependencies on 
+external libraries.  We embedded http://www.digip.org/jansson/[Jansson] into
++Avro C+ for parsing JSON into schema structures.  
+
+The C implementation supports:
+
+* binary encoding/decoding of all primitive and complex data types
+* storage to an Avro Object Container File
+* schema resolution, promotion and projection
+* validating and non-validating mode for writing Avro data
+
+The C implementation is lacking:
+
+* RPC
+
+To learn about the API, take a look at the examples and reference files
+later in this document.
+
+We're always looking for contributions so, if you're a C hacker, please
 feel free to http://hadoop.apache.org/avro/[submit patches to the
 project].
 
@@ -107,7 +125,7 @@
 
 [WARNING] 
 ===============================
-Don't "give" +Avro C+ a string that you haven't allocated from the heap with +malloc+.
+Don't "give" +Avro C+ a string that you haven't allocated from the heap with e.g. +malloc+
or +strdup+.
 
 For example, *don't* do this:
 ----
@@ -115,8 +133,31 @@
 ----
 ===============================
 
+== Schema Validation
+
+If you want to write a datum, you would use the following function
+
+[source,c]
+----
+int avro_write_data(avro_writer_t writer,
+                    avro_schema_t writers_schema, avro_datum_t datum);
+----
+
+If you pass in a +writers_schema+, then you +datum+ will be validated *before*
+it is sent to the +writer+.  This check ensures that your data has the 
+correct format.  If you are certain your datum is correct, you can pass
+a +NULL+ value for +writers_schema+ and +Avro C+ will not validate before
+writing.
+
+NOTE: Data written to an Avro File Object Container is always validated.
+
 == Examples
 
+[quote,Dante Hicks]
+____
+I'm not even supposed to be here today!
+____
+
 Imagine you're a free-lance hacker in Leonardo, New Jersey and you've 
 been approached by the owner of the local *Quick Stop Convenience* store.
 He wants you to create a contact database case he needs to call employees

Modified: hadoop/avro/trunk/lang/c/examples/Makefile.am
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/examples/Makefile.am?rev=908209&r1=908208&r2=908209&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/examples/Makefile.am (original)
+++ hadoop/avro/trunk/lang/c/examples/Makefile.am Tue Feb  9 20:42:02 2010
@@ -10,3 +10,5 @@
 
 quickstop_SOURCES=quickstop.c
 quickstop_LDADD=$(examples_LDADD)
+
+CLEANFILES=quickstop.db

Modified: hadoop/avro/trunk/lang/c/src/avro.h
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/avro.h?rev=908209&r1=908208&r2=908209&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/avro.h (original)
+++ hadoop/avro/trunk/lang/c/src/avro.h Tue Feb  9 20:42:02 2010
@@ -86,7 +86,7 @@
                               ||is_avro_fixed(obj))
 #define is_avro_map(obj)      (obj && avro_typeof(obj) == AVRO_MAP)
 #define is_avro_array(obj)    (obj && avro_typeof(obj) == AVRO_ARRAY)
-#define is_avro_union(obj)    (obj && avro_classof(obj) == AVRO_SCHEMA &&
avro_typeof(obj) == AVRO_UNION)
+#define is_avro_union(obj)    (obj && avro_typeof(obj) == AVRO_UNION)
 #define is_avro_complex_type(obj) (!(is_avro_primitive(obj))
 #define is_avro_link(obj)     (obj && avro_typeof(obj) == AVRO_LINK)
 
@@ -181,7 +181,7 @@
 avro_datum_t avro_boolean(int8_t i);
 avro_datum_t avro_null(void);
 avro_datum_t avro_record(const char *name, const char *space);
-avro_datum_t avro_enum(const char *name, const char *symbol);
+avro_datum_t avro_enum(const char *name, int i);
 avro_datum_t avro_fixed(const char *name, const char *bytes,
 			const int64_t size);
 avro_datum_t avro_wrapfixed(const char *name, const char *bytes,
@@ -190,7 +190,7 @@
 			    const int64_t size);
 avro_datum_t avro_map(void);
 avro_datum_t avro_array(void);
-avro_datum_t avro_union(const avro_schema_t schema, const avro_datum_t datum);
+avro_datum_t avro_union(int64_t discriminant, const avro_datum_t datum);
 
 /* getters */
 int avro_string_get(avro_datum_t datum, char **p);

Modified: hadoop/avro/trunk/lang/c/src/datafile.c
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/datafile.c?rev=908209&r1=908208&r2=908209&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/datafile.c (original)
+++ hadoop/avro/trunk/lang/c/src/datafile.c Tue Feb  9 20:42:02 2010
@@ -283,7 +283,7 @@
 		/* Write the sync marker */
 		check(rval, write_sync(w));
 		/* Reset the datum writer */
-		avro_writer_reset(w->writer);
+		avro_writer_reset(w->datum_writer);
 		w->block_count = 0;
 	}
 	return 0;

Modified: hadoop/avro/trunk/lang/c/src/datum.c
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/datum.c?rev=908209&r1=908208&r2=908209&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/datum.c (original)
+++ hadoop/avro/trunk/lang/c/src/datum.c Tue Feb  9 20:42:02 2010
@@ -380,6 +380,20 @@
 	return &obj;
 }
 
+avro_datum_t avro_union(int64_t discriminant, avro_datum_t value)
+{
+	struct avro_union_datum_t *datum =
+	    malloc(sizeof(struct avro_union_datum_t));
+	if (!datum) {
+		return NULL;
+	}
+	datum->discriminant = discriminant;
+	datum->value = avro_datum_incref(value);
+
+	avro_datum_init(&datum->obj, AVRO_UNION);
+	return &datum->obj;
+}
+
 avro_datum_t avro_record(const char *name, const char *space)
 {
 	struct avro_record_datum_t *datum =
@@ -398,11 +412,23 @@
 		free((void *)datum);
 		return NULL;
 	}
-	datum->fields = st_init_strtable_with_size(DEFAULT_TABLE_SIZE);
-	if (!datum->fields) {
-		free((void *)datum->space);
-		free((void *)datum->name);
-		free((void *)datum);
+	datum->field_order = st_init_numtable_with_size(DEFAULT_TABLE_SIZE);
+	if (!datum->field_order) {
+		if (space) {
+			free((void *)datum->space);
+		}
+		free((char *)datum->name);
+		free(datum);
+		return NULL;
+	}
+	datum->fields_byname = st_init_strtable_with_size(DEFAULT_TABLE_SIZE);
+	if (!datum->fields_byname) {
+		st_free_table(datum->field_order);
+		if (space) {
+			free((void *)datum->space);
+		}
+		free((char *)datum->name);
+		free(datum);
 		return NULL;
 	}
 
@@ -420,7 +446,7 @@
 	} val;
 	if (is_avro_datum(datum) && is_avro_record(datum) && field_name) {
 		if (st_lookup
-		    (avro_datum_to_record(datum)->fields,
+		    (avro_datum_to_record(datum)->fields_byname,
 		     (st_data_t) field_name, &(val.data))) {
 			*field = val.field;
 			return 0;
@@ -442,20 +468,25 @@
 			avro_datum_decref(old_field);
 		} else {
 			/* Inserting new value */
+			struct avro_record_datum_t *record =
+			    avro_datum_to_record(datum);
 			key = strdup(field_name);
 			if (!key) {
 				return ENOMEM;
 			}
+			st_insert(record->field_order,
+				  record->field_order->num_entries,
+				  (st_data_t) key);
 		}
 		avro_datum_incref(field_value);
-		st_insert(avro_datum_to_record(datum)->fields, (st_data_t) key,
-			  (st_data_t) field_value);
+		st_insert(avro_datum_to_record(datum)->fields_byname,
+			  (st_data_t) key, (st_data_t) field_value);
 		return 0;
 	}
 	return EINVAL;
 }
 
-avro_datum_t avro_enum(const char *name, const char *symbol)
+avro_datum_t avro_enum(const char *name, int i)
 {
 	struct avro_enum_datum_t *datum =
 	    malloc(sizeof(struct avro_enum_datum_t));
@@ -463,7 +494,7 @@
 		return NULL;
 	}
 	datum->name = strdup(name);
-	datum->symbol = strdup(symbol);
+	datum->value = i;
 
 	avro_datum_init(&datum->obj, AVRO_ENUM);
 	return &datum->obj;
@@ -740,9 +771,10 @@
 				if (record->space) {
 					free((void *)record->space);
 				}
-				st_foreach(record->fields,
+				st_foreach(record->fields_byname,
 					   char_datum_free_foreach, 0);
-				st_free_table(record->fields);
+				st_free_table(record->field_order);
+				st_free_table(record->fields_byname);
 				free(record);
 			}
 			break;
@@ -750,7 +782,6 @@
 				struct avro_enum_datum_t *enump;
 				enump = avro_datum_to_enum(datum);
 				free((void *)enump->name);
-				free((void *)enump->symbol);
 				free(enump);
 			}
 			break;
@@ -781,7 +812,12 @@
 				free(array);
 			}
 			break;
-		case AVRO_UNION:
+		case AVRO_UNION:{
+				struct avro_union_datum_t *unionp;
+				unionp = avro_datum_to_union(datum);
+				avro_datum_decref(unionp->value);
+				free(unionp);
+			}
 			break;
 		case AVRO_LINK:{
 				/* TODO */

Modified: hadoop/avro/trunk/lang/c/src/datum.h
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/datum.h?rev=908209&r1=908208&r2=908209&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/datum.h (original)
+++ hadoop/avro/trunk/lang/c/src/datum.h Tue Feb  9 20:42:02 2010
@@ -76,13 +76,14 @@
 	struct avro_obj_t obj;
 	const char *name;
 	const char *space;
-	st_table *fields;
+	st_table *field_order;
+	st_table *fields_byname;
 };
 
 struct avro_enum_datum_t {
 	struct avro_obj_t obj;
 	const char *name;
-	const char *symbol;
+	int value;
 };
 
 struct avro_array_datum_t {
@@ -90,6 +91,12 @@
 	st_table *els;
 };
 
+struct avro_union_datum_t {
+	struct avro_obj_t obj;
+	int64_t discriminant;
+	avro_datum_t value;
+};
+
 #define avro_datum_to_string(datum_)    (container_of(datum_, struct avro_string_datum_t,
obj))
 #define avro_datum_to_bytes(datum_)     (container_of(datum_, struct avro_bytes_datum_t,
obj))
 #define avro_datum_to_int32(datum_)     (container_of(datum_, struct avro_int32_datum_t,
obj))
@@ -102,5 +109,6 @@
 #define avro_datum_to_record(datum_)    (container_of(datum_, struct avro_record_datum_t,
obj))
 #define avro_datum_to_enum(datum_)      (container_of(datum_, struct avro_enum_datum_t, obj))
 #define avro_datum_to_array(datum_)     (container_of(datum_, struct avro_array_datum_t,
obj))
+#define avro_datum_to_union(datum_)	(container_of(datum_, struct avro_union_datum_t, obj))
 
 #endif

Modified: hadoop/avro/trunk/lang/c/src/datum_equal.c
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/datum_equal.c?rev=908209&r1=908208&r2=908209&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/datum_equal.c (original)
+++ hadoop/avro/trunk/lang/c/src/datum_equal.c Tue Feb  9 20:42:02 2010
@@ -76,7 +76,7 @@
 static int record_equal(struct avro_record_datum_t *a,
 			struct avro_record_datum_t *b)
 {
-	struct st_equal_args args = { 1, b->fields };
+	struct st_equal_args args = { 1, b->fields_byname };
 	if (strcmp(a->name, b->name)) {
 		/* This have different names */
 		return 0;
@@ -90,18 +90,16 @@
 		/* One has a namespace, one doesn't */
 		return 0;
 	}
-
-	if (a->fields->num_entries != b->fields->num_entries) {
+	if (a->fields_byname->num_entries != b->fields_byname->num_entries) {
 		return 0;
 	}
-	st_foreach(a->fields, st_equal_foreach, (st_data_t) & args);
+	st_foreach(a->fields_byname, st_equal_foreach, (st_data_t) & args);
 	return args.rval;
 }
 
 static int enum_equal(struct avro_enum_datum_t *a, struct avro_enum_datum_t *b)
 {
-	return strcmp(a->name, b->name) == 0
-	    && strcmp(a->symbol, b->symbol) == 0;
+	return strcmp(a->name, b->name) == 0 && a->value == b->value;
 }
 
 static int fixed_equal(struct avro_fixed_datum_t *a,
@@ -110,7 +108,14 @@
 	return a->size == b->size && memcmp(a->bytes, b->bytes, a->size)
== 0;
 }
 
-int avro_datum_equal(avro_datum_t a, avro_datum_t b)
+static int union_equal(struct avro_union_datum_t *a,
+		       struct avro_union_datum_t *b)
+{
+	/* XXX: not sure. a->discriminant == b->discriminant important? */
+	return avro_datum_equal(a->value, b->value);
+}
+
+int avro_datum_equal(const avro_datum_t a, const avro_datum_t b)
 {
 	if (!(is_avro_datum(a) && is_avro_datum(b))) {
 		return 0;
@@ -161,7 +166,9 @@
 				   avro_datum_to_fixed(b));
 
 	case AVRO_UNION:
-		break;
+		return union_equal(avro_datum_to_union(a),
+				   avro_datum_to_union(b));
+
 	case AVRO_LINK:
 		/*
 		 * TODO 

Modified: hadoop/avro/trunk/lang/c/src/datum_read.c
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/datum_read.c?rev=908209&r1=908208&r2=908209&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/datum_read.c (original)
+++ hadoop/avro/trunk/lang/c/src/datum_read.c Tue Feb  9 20:42:02 2010
@@ -20,6 +20,7 @@
 #include "encoding.h"
 #include "schema.h"
 #include "datum.h"
+#include "avro_private.h"
 
 int
 avro_schema_match(avro_schema_t writers_schema, avro_schema_t readers_schema)
@@ -99,20 +100,9 @@
 {
 	int rval;
 	int64_t index;
-	union {
-		st_data_t data;
-		char *sym;
-	} val;
 
-	rval = enc->read_long(reader, &index);
-	if (rval) {
-		return rval;
-	}
-
-	if (!st_lookup(writers_schema->symbols, index, &val.data)) {
-		return EINVAL;
-	}
-	*datum = avro_enum(writers_schema->name, val.sym);
+	check(rval, enc->read_long(reader, &index));
+	*datum = avro_enum(writers_schema->name, index);
 	return 0;
 }
 
@@ -230,21 +220,20 @@
 	   struct avro_union_schema_t *readers_schema, avro_datum_t * datum)
 {
 	int rval;
-	int64_t index;
+	int64_t discriminant;
+	avro_datum_t value;
 	union {
 		st_data_t data;
 		avro_schema_t schema;
 	} val;
-
-	rval = enc->read_long(reader, &index);
-	if (rval) {
-		return rval;
-	}
-
-	if (!st_lookup(writers_schema->branches, index, &val.data)) {
+	check(rval, enc->read_long(reader, &discriminant));
+	if (!st_lookup(writers_schema->branches, discriminant, &val.data)) {
 		return EILSEQ;
 	}
-	return avro_read_data(reader, val.schema, NULL, datum);
+	check(rval, avro_read_data(reader, val.schema, NULL, &value));
+	*datum = avro_union(discriminant, value);
+	avro_datum_decref(value);
+	return 0;
 }
 
 /* TODO: handle default values in fields */
@@ -310,27 +299,6 @@
 		return EINVAL;
 	}
 
-	/*
-	 * schema resolution 
-	 */
-	if (!is_avro_union(writers_schema) && is_avro_union(readers_schema)) {
-		struct avro_union_schema_t *union_schema =
-		    avro_schema_to_union(readers_schema);
-
-		for (i = 0; i < union_schema->branches->num_entries; i++) {
-			union {
-				st_data_t data;
-				avro_schema_t schema;
-			} val;
-			st_lookup(union_schema->branches, i, &val.data);
-			if (avro_schema_match(writers_schema, val.schema)) {
-				return avro_read_data(reader, writers_schema,
-						      val.schema, datum);
-			}
-		}
-		return EINVAL;
-	}
-
 	switch (avro_typeof(writers_schema)) {
 	case AVRO_NULL:
 		rval = enc->read_null(reader);

Modified: hadoop/avro/trunk/lang/c/src/datum_validate.c
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/datum_validate.c?rev=908209&r1=908208&r2=908209&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/datum_validate.c (original)
+++ hadoop/avro/trunk/lang/c/src/datum_validate.c Tue Feb  9 20:42:02 2010
@@ -84,15 +84,11 @@
 
 	case AVRO_ENUM:
 		if (is_avro_enum(datum)) {
-			struct avro_enum_schema_t *enump =
-			    avro_schema_to_enum(expected_schema);
-			struct avro_enum_datum_t *d = avro_datum_to_enum(datum);
-			union {
-				st_data_t data;
-				long idx;
-			} val;
-			return st_lookup(enump->symbols_byname,
-					 (st_data_t) d->symbol, &val.data);
+			long value = avro_datum_to_enum(datum)->value;
+			long max_value =
+			    avro_schema_to_enum(expected_schema)->symbols->
+			    num_entries;
+			return 0 <= value && value <= max_value;
 		}
 		return 0;
 
@@ -130,24 +126,25 @@
 		break;
 
 	case AVRO_UNION:
-		{
+		if (is_avro_union(datum)) {
 			struct avro_union_schema_t *union_schema =
 			    avro_schema_to_union(expected_schema);
+			struct avro_union_datum_t *union_datum =
+			    avro_datum_to_union(datum);
+			union {
+				st_data_t data;
+				avro_schema_t schema;
+			} val;
 
-			for (i = 0; i < union_schema->branches->num_entries;
-			     i++) {
-				union {
-					st_data_t data;
-					avro_schema_t schema;
-				} val;
-				st_lookup(union_schema->branches, i, &val.data);
-				if (avro_schema_datum_validate
-				    (val.schema, datum)) {
-					return 1;
-				}
+			if (!st_lookup
+			    (union_schema->branches, union_datum->discriminant,
+			     &val.data)) {
+				return 0;
 			}
+			return avro_schema_datum_validate(val.schema,
+							  union_datum->value);
 		}
-		return 0;
+		break;
 
 	case AVRO_RECORD:
 		if (is_avro_record(datum)) {

Modified: hadoop/avro/trunk/lang/c/src/datum_write.c
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/datum_write.c?rev=908209&r1=908208&r2=908209&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/datum_write.c (original)
+++ hadoop/avro/trunk/lang/c/src/datum_write.c Tue Feb  9 20:42:02 2010
@@ -21,27 +21,45 @@
 #include "datum.h"
 #include "encoding.h"
 
+static int write_datum(avro_writer_t writer, const avro_encoding_t * enc,
+		       avro_schema_t writers_schema, avro_datum_t datum);
+
 static int
 write_record(avro_writer_t writer, const avro_encoding_t * enc,
-	     struct avro_record_schema_t *record, avro_datum_t datum)
+	     struct avro_record_schema_t *schema, avro_datum_t datum)
 {
 	int rval;
 	long i;
+	avro_datum_t field_datum;
 
-	for (i = 0; i < record->fields->num_entries; i++) {
-		avro_datum_t field_datum;
-		union {
-			st_data_t data;
-			struct avro_record_field_t *field;
-		} val;
-		st_lookup(record->fields, i, &val.data);
-		rval = avro_record_get(datum, val.field->name, &field_datum);
-		if (rval) {
-			return rval;
+	if (schema) {
+		for (i = 0; i < schema->fields->num_entries; i++) {
+			union {
+				st_data_t data;
+				struct avro_record_field_t *field;
+			} val;
+			st_lookup(schema->fields, i, &val.data);
+			check(rval,
+			      avro_record_get(datum, val.field->name,
+					      &field_datum));
+			check(rval,
+			      write_datum(writer, enc, val.field->type,
+					  field_datum));
 		}
-		rval = avro_write_data(writer, val.field->type, field_datum);
-		if (rval) {
-			return rval;
+	} else {
+		/* No schema.  Just write the record datum */
+		struct avro_record_datum_t *record =
+		    avro_datum_to_record(datum);
+		for (i = 0; i < record->field_order->num_entries; i++) {
+			union {
+				st_data_t data;
+				char *name;
+			} val;
+			st_lookup(record->field_order, i, &val.data);
+			check(rval,
+			      avro_record_get(datum, val.name, &field_datum));
+			check(rval,
+			      write_datum(writer, enc, NULL, field_datum));
 		}
 	}
 	return 0;
@@ -51,15 +69,7 @@
 write_enum(avro_writer_t writer, const avro_encoding_t * enc,
 	   struct avro_enum_schema_t *enump, struct avro_enum_datum_t *datum)
 {
-	union {
-		st_data_t data;
-		long idx;
-	} val;
-	if (!st_lookup
-	    (enump->symbols_byname, (st_data_t) datum->symbol, &val.data)) {
-		return EINVAL;
-	}
-	return enc->write_long(writer, val.idx);
+	return enc->write_long(writer, datum->value);
 }
 
 struct write_map_args {
@@ -77,7 +87,7 @@
 		args->rval = rval;
 		return ST_STOP;
 	}
-	rval = avro_write_data(args->writer, args->values_schema, datum);
+	rval = write_datum(args->writer, args->enc, args->values_schema, datum);
 	if (rval) {
 		args->rval = rval;
 		return ST_STOP;
@@ -87,11 +97,12 @@
 
 static int
 write_map(avro_writer_t writer, const avro_encoding_t * enc,
-	  struct avro_map_schema_t *writer_schema,
+	  struct avro_map_schema_t *writers_schema,
 	  struct avro_map_datum_t *datum)
 {
 	int rval;
-	struct write_map_args args = { 0, writer, enc, writer_schema->values };
+	struct write_map_args args =
+	    { 0, writer, enc, writers_schema ? writers_schema->values : NULL };
 
 	if (datum->map->num_entries) {
 		rval = enc->write_long(writer, datum->map->num_entries);
@@ -129,11 +140,10 @@
 				avro_datum_t datum;
 			} val;
 			st_lookup(array->els, i, &val.data);
-			rval =
-			    avro_write_data(writer, schema->items, val.datum);
-			if (rval) {
-				return rval;
-			}
+			check(rval,
+			      write_datum(writer, enc,
+					  schema ? schema->items : NULL,
+					  val.datum));
 		}
 	}
 	return enc->write_long(writer, 0);
@@ -141,153 +151,138 @@
 
 static int
 write_union(avro_writer_t writer, const avro_encoding_t * enc,
-	    struct avro_union_schema_t *schema, avro_datum_t datum)
+	    struct avro_union_schema_t *schema,
+	    struct avro_union_datum_t *unionp)
 {
 	int rval;
-	long i;
+	avro_schema_t write_schema = NULL;
 
-	for (i = 0; i < schema->branches->num_entries; i++) {
+	check(rval, enc->write_long(writer, unionp->discriminant));
+	if (schema) {
 		union {
 			st_data_t data;
 			avro_schema_t schema;
 		} val;
-		st_lookup(schema->branches, i, &val.data);
-		if (avro_schema_datum_validate(val.schema, datum)) {
-			rval = enc->write_long(writer, i);
-			if (rval) {
-				return rval;
-			}
-			return avro_write_data(writer, val.schema, datum);
+		if (!st_lookup
+		    (schema->branches, unionp->discriminant, &val.data)) {
+			return EINVAL;
 		}
+		write_schema = val.schema;
 	}
-	return EINVAL;
+	return write_datum(writer, enc, write_schema, unionp->value);
 }
 
-int
-avro_write_data(avro_writer_t writer, avro_schema_t writer_schema,
-		avro_datum_t datum)
+static int write_datum(avro_writer_t writer, const avro_encoding_t * enc,
+		       avro_schema_t writers_schema, avro_datum_t datum)
 {
-	const avro_encoding_t *enc = &avro_binary_encoding;
-	int rval = -1;
+	int rval;
 
-	if (!writer || !(is_avro_schema(writer_schema) && is_avro_datum(datum))) {
-		return EINVAL;
+	if (is_avro_schema(writers_schema) && is_avro_link(writers_schema)) {
+		return write_datum(writer, enc,
+				   (avro_schema_to_link(writers_schema))->to,
+				   datum);
 	}
-	if (!avro_schema_datum_validate(writer_schema, datum)) {
-		return EINVAL;
-	}
-	switch (avro_typeof(writer_schema)) {
+
+	switch (avro_typeof(datum)) {
 	case AVRO_NULL:
-		rval = enc->write_null(writer);
-		break;
+		return enc->write_null(writer);
+
 	case AVRO_BOOLEAN:
-		rval =
-		    enc->write_boolean(writer, avro_datum_to_boolean(datum)->i);
-		break;
+		return enc->write_boolean(writer,
+					  avro_datum_to_boolean(datum)->i);
+
 	case AVRO_STRING:
-		rval =
-		    enc->write_string(writer, avro_datum_to_string(datum)->s);
-		break;
+		return enc->write_string(writer,
+					 avro_datum_to_string(datum)->s);
+
 	case AVRO_BYTES:
-		rval =
-		    enc->write_bytes(writer, avro_datum_to_bytes(datum)->bytes,
-				     avro_datum_to_bytes(datum)->size);
-		break;
+		return enc->write_bytes(writer,
+					avro_datum_to_bytes(datum)->bytes,
+					avro_datum_to_bytes(datum)->size);
+
 	case AVRO_INT32:
-		{
-			int32_t i;
-			if (is_avro_int32(datum)) {
-				i = avro_datum_to_int32(datum)->i32;
-			} else if (is_avro_int64(datum)) {
-				i = (int32_t) avro_datum_to_int64(datum)->i64;
-			} else {
-				assert(0
-				       &&
-				       "Serious bug in schema validation code");
+	case AVRO_INT64:{
+			int64_t val = avro_typeof(datum) == AVRO_INT32 ?
+			    avro_datum_to_int32(datum)->i32 :
+			    avro_datum_to_int64(datum)->i64;
+			if (is_avro_schema(writers_schema)) {
+				/* handle promotion */
+				if (is_avro_float(writers_schema)) {
+					return enc->write_float(writer,
+								(float)val);
+				} else if (is_avro_double(writers_schema)) {
+					return enc->write_double(writer,
+								 (double)val);
+				}
 			}
-			rval = enc->write_int(writer, i);
+			return enc->write_long(writer, val);
 		}
-		break;
-	case AVRO_INT64:
-		rval = enc->write_long(writer, avro_datum_to_int64(datum)->i64);
-		break;
-	case AVRO_FLOAT:
-		{
-			float f;
-			if (is_avro_int32(datum)) {
-				f = (float)(avro_datum_to_int32(datum)->i32);
-			} else if (is_avro_int64(datum)) {
-				f = (float)(avro_datum_to_int64(datum)->i64);
-			} else if (is_avro_float(datum)) {
-				f = avro_datum_to_float(datum)->f;
-			} else if (is_avro_double(datum)) {
-				f = (float)(avro_datum_to_double(datum)->d);
-			} else {
-				assert(0
-				       &&
-				       "Serious bug in schema validation code");
+
+	case AVRO_FLOAT:{
+			float val = avro_datum_to_float(datum)->f;
+			if (is_avro_schema(writers_schema)
+			    && is_avro_double(writers_schema)) {
+				/* handle promotion */
+				return enc->write_double(writer, (double)val);
 			}
-			rval = enc->write_float(writer, f);
+			return enc->write_float(writer, val);
 		}
-		break;
+
 	case AVRO_DOUBLE:
-		{
-			double d;
-			if (is_avro_int32(datum)) {
-				d = (double)(avro_datum_to_int32(datum)->i32);
-			} else if (is_avro_int64(datum)) {
-				d = (double)(avro_datum_to_int64(datum)->i64);
-			} else if (is_avro_float(datum)) {
-				d = (double)(avro_datum_to_float(datum)->f);
-			} else if (is_avro_double(datum)) {
-				d = avro_datum_to_double(datum)->d;
-			} else {
-				assert(0 && "Bug in schema validation code");
-			}
-			rval = enc->write_double(writer, d);
-		}
-		break;
+		return enc->write_double(writer,
+					 avro_datum_to_double(datum)->d);
 
 	case AVRO_RECORD:
-		rval =
-		    write_record(writer, enc,
-				 avro_schema_to_record(writer_schema), datum);
-		break;
+		return write_record(writer, enc,
+				    avro_schema_to_record(writers_schema),
+				    datum);
 
 	case AVRO_ENUM:
-		rval =
-		    write_enum(writer, enc, avro_schema_to_enum(writer_schema),
-			       avro_datum_to_enum(datum));
-		break;
+		return write_enum(writer, enc,
+				  avro_schema_to_enum(writers_schema),
+				  avro_datum_to_enum(datum));
 
 	case AVRO_FIXED:
-		return avro_write(writer, avro_datum_to_fixed(datum)->bytes,
+		return avro_write(writer,
+				  avro_datum_to_fixed(datum)->bytes,
 				  avro_datum_to_fixed(datum)->size);
 
 	case AVRO_MAP:
-		rval =
-		    write_map(writer, enc, avro_schema_to_map(writer_schema),
-			      avro_datum_to_map(datum));
-		break;
+		return write_map(writer, enc,
+				 avro_schema_to_map(writers_schema),
+				 avro_datum_to_map(datum));
+
 	case AVRO_ARRAY:
-		rval =
-		    write_array(writer, enc,
-				avro_schema_to_array(writer_schema),
-				avro_datum_to_array(datum));
-		break;
+		return write_array(writer, enc,
+				   avro_schema_to_array(writers_schema),
+				   avro_datum_to_array(datum));
 
 	case AVRO_UNION:
-		rval =
-		    write_union(writer, enc,
-				avro_schema_to_union(writer_schema), datum);
-		break;
+		return write_union(writer, enc,
+				   avro_schema_to_union(writers_schema),
+				   avro_datum_to_union(datum));
 
 	case AVRO_LINK:
-		rval =
-		    avro_write_data(writer,
-				    (avro_schema_to_link(writer_schema))->to,
-				    datum);
 		break;
 	}
-	return rval;
+
+	return 0;
+}
+
+int avro_write_data(avro_writer_t writer, avro_schema_t writers_schema,
+		    avro_datum_t datum)
+{
+	const avro_encoding_t *enc = &avro_binary_encoding;
+	int rval = -1;
+
+	if (!writer || !is_avro_datum(datum)) {
+		return EINVAL;
+	}
+	/* Only validate datum if a writer's schema is provided */
+	if (is_avro_schema(writers_schema)
+	    && !avro_schema_datum_validate(writers_schema, datum)) {
+		return EINVAL;
+	}
+	return write_datum(writer, &avro_binary_encoding,
+			   writers_schema, datum);
 }

Modified: hadoop/avro/trunk/lang/c/tests/generate_interop_data.c
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/tests/generate_interop_data.c?rev=908209&r1=908208&r2=908209&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/tests/generate_interop_data.c (original)
+++ hadoop/avro/trunk/lang/c/tests/generate_interop_data.c Tue Feb  9 20:42:02 2010
@@ -17,13 +17,19 @@
 	avro_datum_t interop;
 	avro_datum_t array_datum;
 	avro_datum_t node_datum;
+	avro_datum_t union_datum;
 	avro_datum_t out_datum;
+	enum Kind {
+		KIND_A,
+		KIND_B,
+		KIND_C
+	};
 
 	if (argc != 3) {
 		exit(EXIT_FAILURE);
 	}
 	snprintf(outpath, sizeof(outpath), "%s/c.avro", argv[2]);
-	fprintf(stderr, "Writing %s\n", outpath);
+	fprintf(stderr, "Writing to %s\n", outpath);
 
 	fp = fopen(argv[1], "r");
 	rval = fread(jsontext, 1, sizeof(jsontext) - 1, fp);
@@ -34,7 +40,7 @@
 	check(rval, avro_file_writer_create(outpath, schema, &file_writer));
 
 	/* TODO: create a method for generating random data from schema */
-	interop = avro_record("interop", "org.apache.avro");
+	interop = avro_record("Interop", "org.apache.avro");
 	avro_record_set(interop, "intField", avro_int32(42));
 	avro_record_set(interop, "longField", avro_int64(4242));
 	avro_record_set(interop, "stringField",
@@ -52,8 +58,9 @@
 	avro_record_set(interop, "arrayField", array_datum);
 
 	avro_record_set(interop, "mapField", avro_map());
-	avro_record_set(interop, "unionField", avro_double(1.61803399));
-	avro_record_set(interop, "enumField", avro_enum("Kind", "B"));
+	union_datum = avro_union(1, avro_double(1.61803399));
+	avro_record_set(interop, "unionField", union_datum);
+	avro_record_set(interop, "enumField", avro_enum("Kind", KIND_A));
 	avro_record_set(interop, "fixedField",
 			avro_fixed("MD5", "1234567890123456", 16));
 
@@ -66,14 +73,24 @@
 	rval = avro_file_writer_append(file_writer, interop);
 	if (rval) {
 		fprintf(stderr, "Unable to append data to interop file!\n");
+		exit(EXIT_FAILURE);
+	} else {
+		fprintf(stderr, "Successfully appended datum to file\n");
 	}
+
 	check(rval, avro_file_writer_close(file_writer));
+	fprintf(stderr, "Closed writer.\n");
 
 	check(rval, avro_file_reader(outpath, &file_reader));
+	fprintf(stderr, "Re-reading datum to verify\n");
 	check(rval, avro_file_reader_read(file_reader, NULL, &out_datum));
+	fprintf(stderr, "Verifying datum...");
 	if (!avro_datum_equal(interop, out_datum)) {
+		fprintf(stderr, "fail!\n");
 		exit(EXIT_FAILURE);
 	}
+	fprintf(stderr, "ok\n");
 	check(rval, avro_file_reader_close(file_reader));
+	fprintf(stderr, "Closed reader.\n");
 	return 0;
 }

Modified: hadoop/avro/trunk/lang/c/tests/test_avro_data.c
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/tests/test_avro_data.c?rev=908209&r1=908208&r2=908209&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/tests/test_avro_data.c (original)
+++ hadoop/avro/trunk/lang/c/tests/test_avro_data.c Tue Feb  9 20:42:02 2010
@@ -53,25 +53,38 @@
 		 avro_schema_t readers_schema, avro_datum_t datum, char *type)
 {
 	avro_datum_t datum_out;
-	reader = avro_reader_memory(buf, sizeof(buf));
-	writer = avro_writer_memory(buf, sizeof(buf));
+	int validate;
 
-	if (avro_write_data(writer, writers_schema, datum)) {
-		fprintf(stderr, "Unable to write %s\n", type);
-		exit(EXIT_FAILURE);
-	}
-	if (avro_read_data(reader, writers_schema, readers_schema, &datum_out)) {
-		fprintf(stderr, "Unable to read %s\n", type);
-		exit(EXIT_FAILURE);
-	}
-	if (!avro_datum_equal(datum, datum_out)) {
-		fprintf(stderr, "Unable to encode/decode %s\n", type);
-		exit(EXIT_FAILURE);
+	for (validate = 0; validate <= 1; validate++) {
+
+		reader = avro_reader_memory(buf, sizeof(buf));
+		writer = avro_writer_memory(buf, sizeof(buf));
+
+		/* Validating read/write */
+		if (avro_write_data
+		    (writer, validate ? writers_schema : NULL, datum)) {
+			fprintf(stderr, "Unable to write %s validate=%d\n",
+				type, validate);
+			exit(EXIT_FAILURE);
+		}
+		if (avro_read_data
+		    (reader, writers_schema, readers_schema, &datum_out)) {
+			fprintf(stderr, "Unable to read %s validate=%d\n", type,
+				validate);
+			exit(EXIT_FAILURE);
+		}
+		if (!avro_datum_equal(datum, datum_out)) {
+			fprintf(stderr,
+				"Unable to encode/decode %s validate=%d\n",
+				type, validate);
+			exit(EXIT_FAILURE);
+		}
+
+		avro_reader_dump(reader, stderr);
+		avro_datum_decref(datum_out);
+		avro_reader_free(reader);
+		avro_writer_free(writer);
 	}
-	avro_reader_dump(reader, stderr);
-	avro_datum_decref(datum_out);
-	avro_reader_free(reader);
-	avro_writer_free(writer);
 }
 
 static int test_string(void)
@@ -204,8 +217,15 @@
 
 static int test_enum(void)
 {
+	enum avro_languages {
+		AVRO_C,
+		AVRO_CPP,
+		AVRO_PYTHON,
+		AVRO_RUBY,
+		AVRO_JAVA
+	};
 	avro_schema_t schema = avro_schema_enum("language");
-	avro_datum_t datum = avro_enum("language", "C");
+	avro_datum_t datum = avro_enum("language", AVRO_C);
 
 	avro_schema_enum_symbol_append(schema, "C");
 	avro_schema_enum_symbol_append(schema, "C++");
@@ -262,6 +282,7 @@
 static int test_union(void)
 {
 	avro_schema_t schema = avro_schema_union();
+	avro_datum_t union_datum;
 	avro_datum_t datum;
 
 	avro_schema_union_append(schema, avro_schema_string());
@@ -269,8 +290,10 @@
 	avro_schema_union_append(schema, avro_schema_null());
 
 	datum = avro_wrapstring("Follow your bliss.");
+	union_datum = avro_union(0, datum);
 
-	write_read_check(schema, NULL, datum, "union");
+	write_read_check(schema, NULL, union_datum, "union");
+	avro_datum_decref(union_datum);
 	avro_datum_decref(datum);
 	avro_schema_decref(schema);
 	return 0;

Modified: hadoop/avro/trunk/lang/c/version.sh
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/version.sh?rev=908209&r1=908208&r2=908209&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/version.sh (original)
+++ hadoop/avro/trunk/lang/c/version.sh Tue Feb  9 20:42:02 2010
@@ -18,7 +18,7 @@
 #         libavro_binary_age = 0
 #         libavro_interface_age = 0
 #
-libavro_micro_version=17
+libavro_micro_version=18
 libavro_interface_age=0
 libavro_binary_age=0
 



Mime
View raw message