avro-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cutt...@apache.org
Subject svn commit: r781214 [2/3] - in /hadoop/avro/trunk/src/c++: ./ api/ impl/ jsonschemas/ parser/ scripts/ test/
Date Wed, 03 Jun 2009 00:00:50 GMT
Added: hadoop/avro/trunk/src/c++/api/Parser.hh
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/api/Parser.hh?rev=781214&view=auto
==============================================================================
--- hadoop/avro/trunk/src/c++/api/Parser.hh (added)
+++ hadoop/avro/trunk/src/c++/api/Parser.hh Wed Jun  3 00:00:48 2009
@@ -0,0 +1,148 @@
+#ifndef avro_Parser_hh__
+#define avro_Parser_hh__
+
+#include <stdint.h>
+#include <vector>
+#include <boost/noncopyable.hpp>
+
+#include "InputStreamer.hh"
+#include "Zigzag.hh"
+
+namespace avro {
+
+///
+/// Parses from an avro encoding to the requested type.  Assumes the next item
+/// in the avro binary data is the expected type.
+///
+
+class Parser : private boost::noncopyable
+{
+
+  public:
+
+    explicit Parser(InputStreamer &in) :
+        in_(in)
+    {}
+
+    void getNull() {}
+
+    bool getBool() {
+        uint8_t ival = 0;
+        in_.getByte(ival);
+        return(ival != 0);
+    }
+
+    int32_t getInt() {
+        uint32_t encoded = getVarInt();
+        return decodeZigzag32(encoded);
+    }
+
+    int64_t getLong() {
+        uint64_t encoded = getVarInt();
+        return decodeZigzag64(encoded);
+    }
+
+    float getFloat() {
+        union { 
+            float f;
+            uint32_t i;
+        } v;
+        in_.getWord(v.i);
+        return v.f;
+    }
+
+    double getDouble() {
+        union { 
+            double d;
+            uint64_t i;
+        } v;
+        in_.getLongWord(v.i);
+        return v.d;
+    }
+
+    void getBytes(std::vector<uint8_t> &val) {
+        int64_t size = getLong();
+        
+        val.reserve(size);
+        size_t bytes = 0;
+        uint8_t bval = 0;
+        while(bytes++ < static_cast<size_t>(size)) {
+            in_.getByte(bval);
+            val.push_back(bval);
+        }
+    }
+
+    void getString(std::string &val) {
+        int64_t size = getLong();
+        
+        val.reserve(size);
+        size_t bytes = 0;
+        uint8_t bval = 0;
+        while(bytes++ < static_cast<size_t>(size)) {
+            in_.getByte(bval);
+            val.push_back(bval);
+        }
+    }
+
+    void getFixed(std::vector<uint8_t> &val, size_t size) {
+        
+        val.reserve(size);
+        size_t bytes = 0;
+        uint8_t bval = 0;
+        while(bytes++ < size) {
+            in_.getByte(bval);
+            val.push_back(bval);
+        }
+    }
+
+    void getFixed(uint8_t *val, size_t size) {
+        
+        size_t bytes = 0;
+        uint8_t bval = 0;
+        while(bytes++ < size) {
+            in_.getByte(bval);
+            *val++ = bval;
+        }
+    }
+
+    void getRecord() { }
+
+    int64_t getArrayBlockSize() {
+        return getLong();
+    }
+
+    int64_t getUnion() { 
+        return getLong();
+    }
+
+    int64_t getEnum() {
+        return getLong();
+    }
+
+    int64_t getMapBlockSize() {
+        return getLong();
+    }
+
+  private:
+
+    uint64_t getVarInt() {
+        uint64_t encoded = 0;
+        uint8_t val = 0;
+        do {
+            encoded <<= 8;
+            in_.getByte(val);
+            encoded |= (val & 0x7F);
+
+        } while (val & 0x80);
+
+        return encoded;
+    }
+
+    InputStreamer &in_;
+
+};
+
+
+} // namespace avro
+
+#endif

Added: hadoop/avro/trunk/src/c++/api/Schema.hh
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/api/Schema.hh?rev=781214&view=auto
==============================================================================
--- hadoop/avro/trunk/src/c++/api/Schema.hh (added)
+++ hadoop/avro/trunk/src/c++/api/Schema.hh Wed Jun  3 00:00:48 2009
@@ -0,0 +1,144 @@
+#ifndef avro_Schema_hh__ 
+#define avro_Schema_hh__ 
+
+#include "NodeImpl.hh"
+
+/// \file
+///
+/// Schemas for representing all the avro types.  The compound schema objects
+/// allow composition from other schemas.
+///
+
+namespace avro {
+
+class ValidSchema;
+
+
+/// The root Schema object is a base class.  Nobody constructs this class directly.
+
+class Schema 
+{
+  public:
+
+    virtual ~Schema();
+
+    Type type() const {
+        return node_->type();
+    }
+
+    const NodePtr &root() const {
+        return node_;
+    }
+
+    NodePtr &root() {
+        return node_;
+    }
+
+  protected:
+
+    friend int compileJsonSchema(std::istream &is, ValidSchema &schema);
+
+    Schema();
+    explicit Schema(const NodePtr &node);
+    explicit Schema(Node *node);
+
+    NodePtr node_;
+};
+
+class NullSchema : public Schema
+{
+  public:
+    NullSchema(): Schema(new NodePrimitive(AVRO_NULL)) {}
+};
+
+class BoolSchema : public Schema
+{
+  public:
+    BoolSchema(): Schema(new NodePrimitive(AVRO_BOOL)) {}
+};
+
+class IntSchema : public Schema
+{
+  public:
+    IntSchema(): Schema(new NodePrimitive(AVRO_INT)) {}
+};
+
+class LongSchema : public Schema
+{
+  public:
+    LongSchema(): Schema(new NodePrimitive(AVRO_LONG)) {}
+};
+
+class FloatSchema : public Schema
+{
+  public:
+    FloatSchema(): Schema(new NodePrimitive(AVRO_FLOAT)) {}
+};
+
+class DoubleSchema : public Schema
+{
+  public:
+    DoubleSchema(): Schema(new NodePrimitive(AVRO_DOUBLE)) {}
+};
+
+class StringSchema : public Schema
+{
+  public:
+    StringSchema(): Schema(new NodePrimitive(AVRO_STRING)) {}
+};
+
+class BytesSchema : public Schema
+{
+  public:
+    BytesSchema(): Schema(new NodePrimitive(AVRO_BYTES)) {}
+};
+
+class RecordSchema : public Schema
+{
+  public:
+
+    RecordSchema(const std::string &name);
+    void addField(const std::string &name, const Schema &fieldSchema);
+};
+
+class EnumSchema : public Schema
+{
+  public:
+
+    EnumSchema(const std::string &name);
+    void addSymbol(const std::string &symbol);
+};
+
+class ArraySchema : public Schema
+{
+  public:
+
+    ArraySchema(const Schema &itemsSchema);
+};
+
+class MapSchema : public Schema
+{
+  public:
+
+    MapSchema(const Schema &valuesSchema);
+};
+
+
+class UnionSchema : public Schema
+{
+  public:
+
+    UnionSchema();
+    void addType(const Schema &typeSchema);
+};
+
+class FixedSchema : public Schema
+{
+  public:
+
+    FixedSchema(int size, const std::string &name);
+};
+
+} // namespace avro
+
+#endif

Added: hadoop/avro/trunk/src/c++/api/Serializer.hh
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/api/Serializer.hh?rev=781214&view=auto
==============================================================================
--- hadoop/avro/trunk/src/c++/api/Serializer.hh (added)
+++ hadoop/avro/trunk/src/c++/api/Serializer.hh Wed Jun  3 00:00:48 2009
@@ -0,0 +1,110 @@
+#ifndef avro_Serializer_hh__
+#define avro_Serializer_hh__
+
+#include <boost/noncopyable.hpp>
+
+#include "OutputStreamer.hh"
+#include "Zigzag.hh"
+
+namespace avro {
+
+/// Class for writing avro data to a stream.
+
+class Serializer : private boost::noncopyable
+{
+
+  public:
+
+    explicit Serializer(OutputStreamer &out) :
+        out_(out)
+    {}
+
+    void putNull() {}
+
+    void putBool(bool val) {
+        int8_t byte = (val != 0);
+        out_.putByte(byte);
+    }
+
+    void putInt(int32_t val) {
+        boost::array<uint8_t, 5> bytes;
+        size_t size = encodeInt32(val, bytes);
+        out_.putBytes(bytes.data(), size);
+    }
+
+    void putLong(int64_t val) {
+        boost::array<uint8_t, 9> bytes;
+        size_t size = encodeInt64(val, bytes);
+        out_.putBytes(bytes.data(), size);
+    }
+
+    void putFloat(float val) {
+        union {
+            float f;
+            int32_t i;
+        } v;
+    
+        v.f = val;
+        out_.putWord(v.i);
+    }
+
+    void putDouble(double val) {
+        union {
+            double d;
+            int64_t i;
+        } v;
+        
+        v.d = val;
+        out_.putLongWord(v.i);
+    }
+
+    void putBytes(const uint8_t *val, size_t size) {
+        this->putLong(size);
+        out_.putBytes(val, size);
+    }
+
+    void putFixed(const uint8_t *val, size_t size) {
+        out_.putBytes(val, size);
+    }
+
+    void putString(const std::string &val) {
+        putBytes(reinterpret_cast<const uint8_t *>(val.c_str()), val.size());
+    }
+
+    /* here for compatibility with ValidatingSerializer in templates: */
+
+    void beginRecord() {}
+
+    void beginArrayBlock(int64_t size) {
+        putLong(size);
+    }
+
+    void endArray() {
+        putLong(0);
+    }
+
+    void beginMapBlock(int64_t size) {
+        putLong(size);
+    }
+
+    void endMap() {
+        putLong(0);
+    }
+
+    void beginUnion(int64_t choice) {
+        putLong(choice);
+    }
+
+    void beginEnum(int64_t choice) {
+        putLong(choice);
+    }
+
+  private:
+
+    OutputStreamer &out_;
+
+};
+
+} // namespace avro
+
+#endif

Added: hadoop/avro/trunk/src/c++/api/SymbolMap.hh
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/api/SymbolMap.hh?rev=781214&view=auto
==============================================================================
--- hadoop/avro/trunk/src/c++/api/SymbolMap.hh (added)
+++ hadoop/avro/trunk/src/c++/api/SymbolMap.hh Wed Jun  3 00:00:48 2009
@@ -0,0 +1,60 @@
+#ifndef avro_SymbolMap_hh__
+#define avro_SymbolMap_hh__
+
+#include <map>
+
+#include "Node.hh"
+#include "Schema.hh"
+
+namespace avro {
+
+/// Avro schemas can include types that were previously defined with names in
+/// the same avro schema.  In order to identify new types, they are stored in a
+/// map so that the actual type may be identified by name.  This class
+/// implements the symbolic name to node mapping.
+///
+
+class SymbolMap 
+{
+
+  public:
+
+    SymbolMap()
+    {}
+
+    bool registerSymbol(const NodePtr &node) {
+
+        const std::string name = node->name();
+        if(name.empty()) {
+            throw Exception("Node must have a name to be registered");
+        }
+        bool added = false;
+        MapImpl::iterator lb = map_.lower_bound(name);
+
+        if(lb == map_.end() || map_.key_comp()(name, lb->first)) {
+            map_.insert(lb, std::make_pair(name, node));
+            added = true; 
+        }
+        return added;
+    }
+
+    bool hasSymbol(const std::string &name) const {
+        return map_.find(name) != map_.end();
+    }
+
+    NodePtr locateSymbol(const std::string &name) const {
+        MapImpl::const_iterator iter = map_.find(name);
+        return (iter == map_.end()) ? NodePtr() : iter->second;
+    }
+
+  private:
+
+    typedef std::map<std::string, NodePtr> MapImpl;
+
+    MapImpl map_;
+};
+
+
+} // namespace avro
+
+#endif

Added: hadoop/avro/trunk/src/c++/api/Types.hh
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/api/Types.hh?rev=781214&view=auto
==============================================================================
--- hadoop/avro/trunk/src/c++/api/Types.hh (added)
+++ hadoop/avro/trunk/src/c++/api/Types.hh Wed Jun  3 00:00:48 2009
@@ -0,0 +1,48 @@
+#ifndef avro_Types_hh__
+#define avro_Types_hh__
+
+#include <iostream>
+
+namespace avro {
+
+enum Type {
+
+    AVRO_STRING,
+    AVRO_BYTES,
+    AVRO_INT,
+    AVRO_LONG,
+    AVRO_FLOAT,
+    AVRO_DOUBLE,
+    AVRO_BOOL,
+    AVRO_NULL,
+
+    AVRO_RECORD,
+    AVRO_ENUM,
+    AVRO_ARRAY,
+    AVRO_MAP,
+    AVRO_UNION,
+    AVRO_FIXED,
+
+    AVRO_SYMBOLIC,
+
+    AVRO_NUM_TYPES,
+};
+
+inline bool isPrimitive(Type t) {
+    return (t >= AVRO_STRING) && (t < AVRO_RECORD);
+}
+
+inline bool isCompound(Type t) {
+    return (t>= AVRO_RECORD) && (t < AVRO_NUM_TYPES);
+}
+
+inline bool isAvroType(Type t) {
+    return (t >= AVRO_STRING) && (t < AVRO_NUM_TYPES);
+}
+
+std::ostream &operator<< (std::ostream &os, const avro::Type type);
+
+} // namespace avro
+
+
+#endif

Added: hadoop/avro/trunk/src/c++/api/ValidSchema.hh
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/api/ValidSchema.hh?rev=781214&view=auto
==============================================================================
--- hadoop/avro/trunk/src/c++/api/ValidSchema.hh (added)
+++ hadoop/avro/trunk/src/c++/api/ValidSchema.hh Wed Jun  3 00:00:48 2009
@@ -0,0 +1,55 @@
+#ifndef avro_ValidSchema_hh__ 
+#define avro_ValidSchema_hh__ 
+
+#include <boost/noncopyable.hpp>
+
+#include "Node.hh"
+#include "SymbolMap.hh"
+
+namespace avro {
+
+class Schema;
+
+/// A ValidSchema is basically a non-mutable Schema that has passed some
+/// minumum of sanity checks.  Once valididated, any Schema that is part of
+/// this ValidSchema is considered locked, and cannot be modified (an attempt
+/// to modify a locked Schema will throw).  Also, as it is validated, any
+/// recursive duplications of schemas are replaced with symbolic links to the
+/// original.
+///
+/// Once a Schema is converted to a valid schema it can be used in validating
+/// parsers/serializers, converted to a json schema, etc.
+///
+
+class ValidSchema : private boost::noncopyable
+{
+  public:
+
+    explicit ValidSchema(const Schema &schema);
+    ValidSchema();
+
+    void setSchema(const Schema &schema);
+
+    const NodePtr &root() const {
+        return node_;
+    }
+
+    void toJson(std::ostream &os);
+
+    void toFlatList(std::ostream &os);
+
+    NodePtr followSymbol(const std::string &name) const {
+        return symbolMap_.locateSymbol(name);
+    }
+
+  protected:
+
+    bool validate(const NodePtr &node);
+
+    SymbolMap symbolMap_;
+    NodePtr node_;
+};
+
+} // namespace avro
+
+#endif

Added: hadoop/avro/trunk/src/c++/api/ValidatingParser.hh
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/api/ValidatingParser.hh?rev=781214&view=auto
==============================================================================
--- hadoop/avro/trunk/src/c++/api/ValidatingParser.hh (added)
+++ hadoop/avro/trunk/src/c++/api/ValidatingParser.hh Wed Jun  3 00:00:48 2009
@@ -0,0 +1,108 @@
+#ifndef avro_ValidatingParser_hh__
+#define avro_ValidatingParser_hh__
+
+#include <stdint.h>
+#include <vector>
+#include <boost/noncopyable.hpp>
+
+#include "Parser.hh"
+#include "Validator.hh"
+
+namespace avro {
+
+class ValidSchema;
+class InputStreamer;
+
+/// As an avro object is being parsed from binary data to its C++
+/// representation, this parser will walk the parse tree and ensure that the
+/// correct type is being asked for.  If the user attempts to parse a type that
+/// does not match what the schema says, an exception will be thrown.  
+///
+/// The ValidatingParser object can also be used to tell what the next type is,
+/// so that callers can dynamically discover the contents.  It also tells
+/// the attribute names of the objects or their fields, if they exist.
+///
+
+class ValidatingParser : private boost::noncopyable
+{
+
+  public:
+
+    explicit ValidatingParser(const ValidSchema &schema, InputStreamer &in);
+
+    void getNull();
+
+    bool getBool();
+
+    int32_t getInt();
+
+    int64_t getLong();
+
+    float getFloat();
+
+    double getDouble();
+
+    void getBytes(std::vector<uint8_t> &val);
+
+    void getFixed(uint8_t *val, size_t size) {
+        checkSafeToGet(AVRO_FIXED);
+        checkSizeExpected(size);
+        validator_.advance();
+        parser_.getFixed(val, size);
+    }
+
+    void getFixed(std::vector<uint8_t> &val, size_t size) {
+        checkSafeToGet(AVRO_FIXED);
+        checkSizeExpected(size);
+        validator_.advance();
+        parser_.getFixed(val, size);
+    }
+
+    void getString(std::string &val);
+
+    void getRecord();
+
+    int64_t getArrayBlockSize();
+
+    int64_t getUnion();
+
+    int64_t getEnum();
+
+    int64_t getMapBlockSize();
+
+    Type nextType() const{
+        return validator_.nextTypeExpected();
+    }
+
+    bool getCurrentRecordName(std::string &name) const {
+        return validator_.getCurrentRecordName(name);
+    }
+
+    bool getNextFieldName(std::string &name) const {
+        return validator_.getNextFieldName(name);
+    }
+
+  private:
+
+    int64_t getCount();
+
+    void checkSafeToGet(Type type) const {
+        if(validator_.nextTypeExpected() != type) {
+            throw Exception("Type does not match");
+        }
+    }
+
+    void checkSizeExpected(int size) const {
+        if(validator_.nextSizeExpected() != size) {
+            throw Exception("Wrong size of for fixed");
+        }
+    }
+
+    Validator validator_;
+    Parser parser_;
+};
+
+
+} // namespace avro
+
+#endif

Added: hadoop/avro/trunk/src/c++/api/ValidatingSerializer.hh
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/api/ValidatingSerializer.hh?rev=781214&view=auto
==============================================================================
--- hadoop/avro/trunk/src/c++/api/ValidatingSerializer.hh (added)
+++ hadoop/avro/trunk/src/c++/api/ValidatingSerializer.hh Wed Jun  3 00:00:48 2009
@@ -0,0 +1,83 @@
+#ifndef avro_ValidatingSerializer_hh__
+#define avro_ValidatingSerializer_hh__
+
+#include <boost/noncopyable.hpp>
+
+#include "Serializer.hh"
+#include "Validator.hh"
+
+namespace avro {
+
+class ValidSchema;
+class OutputStreamer;
+
+/// This class walks the parse tree as data is being serialized, and throws if
+/// attempt to serialize a data type does not match the type expected in the
+/// schema.
+
+class ValidatingSerializer : private boost::noncopyable
+{
+
+  public:
+
+    ValidatingSerializer(const ValidSchema &schema, OutputStreamer &out);
+
+    void putNull();
+
+    void putBool(bool val);
+
+    void putInt(int32_t val);
+
+    void putLong(int64_t val);
+
+    void putFloat(float val);
+
+    void putDouble(double val);
+
+    void putString(const std::string &val);
+
+    void putBytes(const uint8_t *val, size_t size);
+
+    void putFixed(const uint8_t *val, size_t size) {
+        checkSafeToPut(AVRO_FIXED);
+        checkSizeExpected(size);
+        serializer_.putFixed(val, size);
+        validator_.advance();
+    }
+
+    void beginRecord();
+
+    void beginArrayBlock(int64_t size);
+    void endArray();
+
+    void beginMapBlock(int64_t size);
+    void endMap();
+
+    void beginUnion(int64_t choice);
+
+    void beginEnum(int64_t choice);
+
+  private:
+
+    void putCount(int64_t count);
+
+    void checkSafeToPut(Type type) const {
+        if(! validator_.typeIsExpected(type)) {
+            throw Exception("Type does not match schema");
+        }
+    }
+
+    void checkSizeExpected(int size) const {
+        if(validator_.nextSizeExpected() != size) {
+            throw Exception("Wrong size of for fixed");
+        }
+    }
+
+    Validator  validator_;
+    Serializer serializer_;
+
+};
+
+} // namespace avro
+
+#endif

Added: hadoop/avro/trunk/src/c++/api/Validator.hh
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/api/Validator.hh?rev=781214&view=auto
==============================================================================
--- hadoop/avro/trunk/src/c++/api/Validator.hh (added)
+++ hadoop/avro/trunk/src/c++/api/Validator.hh Wed Jun  3 00:00:48 2009
@@ -0,0 +1,88 @@
+#ifndef avro_Validating_hh__
+#define avro_Validating_hh__
+
+#include <boost/noncopyable.hpp>
+#include <vector>
+
+#include "Types.hh"
+#include "Node.hh"
+
+namespace avro {
+
+class ValidSchema;
+class OutputStreamer;
+
+/// This class is used by both the ValidatingSerializer and ValidationParser
+/// objects.  It advances the parse tree (containing logic how to advance
+/// through the various compound types, for example a record must advance
+/// through all leaf nodes but a union only skips to one), and reports which
+/// type is next.
+
+class Validator : private boost::noncopyable
+{
+    typedef uint64_t flag_t;
+
+  public:
+
+    Validator(const ValidSchema &schema);
+
+    void advance();
+    void advanceWithCount(int64_t val);
+
+    bool typeIsExpected(Type type) const {
+        return (expectedTypesFlag_ & typeToFlag(type));
+    }
+
+    Type nextTypeExpected() const {
+        return nextType_;
+    }
+
+    int nextSizeExpected() const;
+
+    bool getCurrentRecordName(std::string &name) const;
+    bool getNextFieldName(std::string &name) const;
+
+  private:
+
+    flag_t typeToFlag(Type type) const {
+        flag_t flag = (1L << type);
+        return flag;
+    }
+
+    void setupOperation(const NodePtr &node);
+
+    void setWaitingForCount();
+
+    void recordAdvance();
+    void enumAdvance();
+    void countingAdvance();
+    void unionAdvance();
+    void fixedAdvance();
+
+    void setupFlag(Type type);
+
+    const ValidSchema &schema_;
+    NodePtr parseTree_;
+
+    Type nextType_; 
+    flag_t expectedTypesFlag_;
+    bool compoundStarted_;
+    bool waitingForCount_;
+    int64_t count_;
+
+    struct CompoundType {
+        explicit CompoundType(const NodePtr &n) :
+            node(n), pos(0)
+        {}
+        NodePtr node;  ///< save the node
+        size_t  pos; ///< track the leaf position to visit
+    };
+
+    std::vector<CompoundType> compoundStack_;
+    std::vector<size_t> counters_;
+
+};
+
+} // namespace avro
+
+#endif

Added: hadoop/avro/trunk/src/c++/api/Zigzag.hh
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/api/Zigzag.hh?rev=781214&view=auto
==============================================================================
--- hadoop/avro/trunk/src/c++/api/Zigzag.hh (added)
+++ hadoop/avro/trunk/src/c++/api/Zigzag.hh Wed Jun  3 00:00:48 2009
@@ -0,0 +1,23 @@
+#ifndef avro_Encoding_hh__
+#define avro_Encoding_hh__
+
+#include <stdint.h>
+#include <boost/array.hpp>
+
+/// \file
+/// Functions for encoding and decoding integers with zigzag compression
+
+namespace avro {
+
+uint64_t encodeZigzag64(int64_t input);
+int64_t decodeZigzag64(uint64_t input);
+
+uint32_t encodeZigzag32(int32_t input);
+int32_t decodeZigzag32(uint32_t input);
+
+size_t encodeInt32(int32_t input, boost::array<uint8_t, 5> &output);
+size_t encodeInt64(int64_t input, boost::array<uint8_t, 9> &output);
+
+} // namespace avro
+
+#endif

Added: hadoop/avro/trunk/src/c++/impl/Compiler.cc
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/impl/Compiler.cc?rev=781214&view=auto
==============================================================================
--- hadoop/avro/trunk/src/c++/impl/Compiler.cc (added)
+++ hadoop/avro/trunk/src/c++/impl/Compiler.cc Wed Jun  3 00:00:48 2009
@@ -0,0 +1,176 @@
+#include "Compiler.hh"
+#include "InputStreamer.hh"
+#include "Types.hh"
+#include "Schema.hh"
+#include "ValidSchema.hh"
+
+extern void yyparse(void *ctx);
+
+namespace avro {
+
+//#define DEBUG_VERBOSE
+
+int
+compileJsonSchema(std::istream &is, ValidSchema &schema)
+{
+     CompilerContext myctx(is);
+     yyparse(&myctx);
+
+     Schema s(myctx.getRoot());
+
+     schema.setSchema(s);
+
+     return 1;
+}
+
+void 
+CompilerContext::add(const NodePtr &node)
+{
+    if(stack_.empty() ) {
+        root_ = node;
+    }
+    else {
+
+        NodePtr &owner = stack_.back();
+
+        owner->addLeaf(node);
+        if(owner->type() == AVRO_RECORD) {
+            owner->addName(fieldName_);
+        }   
+    }   
+}
+
+void 
+CompilerContext::addCompound(const NodePtr &node)
+{
+    add(node);
+    stack_.push_back(node);
+}
+
+void
+CompilerContext::endCompound(Type type)
+{
+#ifdef DEBUG_VERBOSE
+    std::cout << "Got end of " << type << '\n';
+#endif
+    assert(!stack_.empty());
+    stack_.pop_back();
+    inEnum_ = false;
+}
+
+void 
+CompilerContext::addRecord()
+{
+#ifdef DEBUG_VERBOSE
+    std::cout << "Adding record " << text_ << '\n';
+#endif
+    NodePtr node(new NodeRecord());
+    node->setName(text_);
+    addCompound(node);
+}
+
+void 
+CompilerContext::addEnum()
+{
+#ifdef DEBUG_VERBOSE
+    std::cout << "Adding enum " << text_ << '\n';
+#endif
+    NodePtr node(new NodeEnum());
+    node->setName(text_);
+    addCompound(node);
+    inEnum_ = true;
+}
+
+void 
+CompilerContext::addUnion()
+{
+#ifdef DEBUG_VERBOSE
+    std::cout << "Adding union\n";
+#endif
+    NodePtr node(new NodeUnion());
+    addCompound(node);
+}
+
+void 
+CompilerContext::addMap()
+{
+#ifdef DEBUG_VERBOSE
+    std::cout << "Adding map\n";
+#endif
+    NodePtr node(new NodeMap());
+    addCompound(node);
+}
+
+void 
+CompilerContext::addArray()
+{
+#ifdef DEBUG_VERBOSE
+    std::cout << "Adding array\n";
+#endif
+    NodePtr node(new NodeArray());
+    addCompound(node);
+}
+
+void 
+CompilerContext::addFixed()
+{
+#ifdef DEBUG_VERBOSE
+    std::cout << "Adding fixed " << text_ << '\n';
+#endif
+    NodePtr node(new NodeFixed());
+    node->setName(text_);
+    node->setFixedSize(size_);
+    add(node);
+} 
+
+void 
+CompilerContext::addPrimitive(Type type)
+{    
+#ifdef DEBUG_VERBOSE
+    std::cout << "Adding " << type << '\n';
+#endif
+    NodePtr node(new NodePrimitive(type));
+    add(node);
+}
+
+void 
+CompilerContext::addSize()
+{
+    size_ = atol(text_.c_str()); 
+#ifdef DEBUG_VERBOSE
+    std::cout << "Got size " << size_ << '\n';
+#endif
+}
+
+void 
+CompilerContext::addSymbol()
+{
+#ifdef DEBUG_VERBOSE
+    std::cout << "Adding symbol " << text_ << '\n';
+#endif
+    NodePtr node(new NodeSymbolic());
+    node->setName(text_);
+    add(node);
+}
+
+void 
+CompilerContext::addName()
+{
+    if(inEnum_) {
+#ifdef DEBUG_VERBOSE
+        std::cout << "Got enum symbol " << text_ << '\n';
+#endif
+        stack_.back()->addName(text_);
+    }
+}
+
+void 
+CompilerContext::addFieldName()
+{
+#ifdef DEBUG_VERBOSE
+    std::cout << "Got field name " << text_ << '\n';
+#endif
+    fieldName_ = text_;
+}
+
+} // namespace avro

Added: hadoop/avro/trunk/src/c++/impl/Node.cc
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/impl/Node.cc?rev=781214&view=auto
==============================================================================
--- hadoop/avro/trunk/src/c++/impl/Node.cc (added)
+++ hadoop/avro/trunk/src/c++/impl/Node.cc Wed Jun  3 00:00:48 2009
@@ -0,0 +1,18 @@
+#include <boost/regex.hpp>
+#include "Node.hh"
+
+namespace avro {
+
+Node::~Node()
+{ }
+
+void 
+Node::checkName(const std::string &name) const
+{
+    static const boost::regex exp("[A-Za-z_][A-Za-z0-9_]*");
+    if(!name.empty() && !boost::regex_match(name, exp)) {
+        throw Exception("Names must match [A-Za-z_][A-Za-z0-9_]*");
+    }
+}
+
+} // namespace avro

Added: hadoop/avro/trunk/src/c++/impl/NodeImpl.cc
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/impl/NodeImpl.cc?rev=781214&view=auto
==============================================================================
--- hadoop/avro/trunk/src/c++/impl/NodeImpl.cc (added)
+++ hadoop/avro/trunk/src/c++/impl/NodeImpl.cc Wed Jun  3 00:00:48 2009
@@ -0,0 +1,147 @@
+
+#include "NodeImpl.hh"
+
+namespace avro {
+
+/// Wrap an indentation in a struct for ostream operator<< 
+struct indent { 
+    indent(int depth) :
+        d(depth)
+    { }
+    int d; 
+};
+
+/// ostream operator for indent
+std::ostream& operator <<(std::ostream &os, indent x)
+{
+    static const std::string spaces("    ");
+    while(x.d--) {
+        os << spaces; 
+    }
+    return os;
+}
+
+void 
+NodePrimitive::printJson(std::ostream &os, int depth) const
+{
+    // printing long form is optional
+    /*
+    if(depth == 0) {
+        os << "{\n";
+        os << indent(depth+1) << "\"type\": " << '"' << type() << '"';
+        os << indent(depth) << "\n}";
+    }
+    else {
+        os << type();
+    }
+    */
+    os << '"' << type() << '"';
+}
+
+void 
+NodeSymbolic::printJson(std::ostream &os, int depth) const
+{
+    os << '\"' << nameAttribute_.get() << '\"';
+}
+
+void 
+NodeRecord::printJson(std::ostream &os, int depth) const
+{
+    os << "{\n";
+    os << indent(++depth) << "\"type\": \"record\",\n";
+    if(!nameAttribute_.get().empty()) {
+        os << indent(depth) << "\"name\": \"" << nameAttribute_.get() << "\",\n";
+    }
+    os << indent(depth) << "\"fields\": [\n";
+
+    int fields = leafAttributes_.size();
+    ++depth;
+    for(int i = 0; i < fields; ++i) {
+        if(i > 0) {
+            os << indent(depth) << "},\n";
+        }
+        os << indent(depth) << "{\n";
+        os << indent(++depth) << "\"name\": \"" << leafNamesAttributes_.at(i) << "\",\n";
+        os << indent(depth) << "\"type\": ";
+        leafAttributes_.at(i)->printJson(os, depth);
+        os << '\n';
+        --depth;
+    }
+    os << indent(depth) << "}\n";
+    os << indent(--depth) << "]\n";
+    os << indent(--depth) << '}';
+}
+
+void 
+NodeEnum::printJson(std::ostream &os, int depth) const
+{
+    os << "{\n";
+    os << indent(++depth) << "\"type\": \"enum\",\n";
+    if(!nameAttribute_.get().empty()) {
+        os << indent(depth) << "\"name\": \"" << nameAttribute_.get() << "\",\n";
+    }
+    os << indent(depth) << "\"symbols\": [\n";
+
+    int names = leafNamesAttributes_.size();
+    ++depth;
+    for(int i = 0; i < names; ++i) {
+        if(i > 0) {
+            os << ",\n";
+        }
+        os << indent(depth) << '\"' << leafNamesAttributes_.at(i) << '\"';
+    }
+    os << '\n';
+    os << indent(--depth) << "]\n";
+    os << indent(--depth) << '}';
+}
+
+void 
+NodeArray::printJson(std::ostream &os, int depth) const
+{
+    os << "{\n";
+    os << indent(depth+1) << "\"type\": \"array\",\n";
+    os << indent(depth+1) <<  "\"items\": ";
+    leafAttributes_.at(0)->printJson(os, depth);
+    os << '\n';
+    os << indent(depth) << '}';
+}
+
+void 
+NodeMap::printJson(std::ostream &os, int depth) const
+{
+    os << "{\n";
+    os << indent(depth+1) <<"\"type\": \"map\",\n";
+    os << indent(depth+1) << "\"values\": ";
+    leafAttributes_.at(1)->printJson(os, depth);
+    os << '\n';
+    os << indent(depth) << '}';
+}
+
+void 
+NodeUnion::printJson(std::ostream &os, int depth) const
+{
+    os << "[\n";
+    int fields = leafAttributes_.size();
+    ++depth;
+    for(int i = 0; i < fields; ++i) {
+        if(i > 0) {
+            os << ",\n";
+        }
+        os << indent(depth);
+        leafAttributes_.at(i)->printJson(os, depth);
+    }
+    os << '\n';
+    os << indent(--depth) << ']';
+}
+
+void 
+NodeFixed::printJson(std::ostream &os, int depth) const
+{
+    os << "{\n";
+    os << indent(++depth) << "\"type\": \"fixed\",\n";
+    os << indent(depth) << "\"size\": " << sizeAttribute_.get() << ",\n";
+    os << indent(depth) << "\"name\": " << nameAttribute_.get() << "\"\n";
+    os << indent(--depth) << '}';
+}
+
+} // namespace avro

Added: hadoop/avro/trunk/src/c++/impl/Schema.cc
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/impl/Schema.cc?rev=781214&view=auto
==============================================================================
--- hadoop/avro/trunk/src/c++/impl/Schema.cc (added)
+++ hadoop/avro/trunk/src/c++/impl/Schema.cc Wed Jun  3 00:00:48 2009
@@ -0,0 +1,90 @@
+
+#include "Schema.hh"
+
+namespace avro {
+
+Schema::Schema() 
+{ }
+
+Schema::~Schema() 
+{ }
+
+Schema::Schema(const NodePtr &node) :
+    node_(node)
+{ }
+
+Schema::Schema(Node *node) :
+    node_(node)
+{ }
+
+RecordSchema::RecordSchema(const std::string &name) :
+    Schema(new NodeRecord)
+{
+    node_->setName(name);
+}
+
+void
+RecordSchema::addField(const std::string &name, const Schema &fieldSchema) 
+{
+    node_->addLeaf(fieldSchema.root());
+    node_->addName(name);
+}
+
+EnumSchema::EnumSchema(const std::string &name) :
+    Schema(new NodeEnum)
+{
+    node_->setName(name);
+}
+
+void
+EnumSchema::addSymbol(const std::string &symbol)
+{
+    node_->addName(symbol);
+}
+
+ArraySchema::ArraySchema(const Schema &itemsSchema) :
+    Schema(new NodeArray)
+{
+    node_->addLeaf(itemsSchema.root());
+}
+
+MapSchema::MapSchema(const Schema &valuesSchema) :
+    Schema(new NodeMap)
+{
+    node_->addLeaf(valuesSchema.root());
+}
+
+UnionSchema::UnionSchema() :
+    Schema(new NodeUnion)
+{ }
+
+void
+UnionSchema::addType(const Schema &typeSchema) 
+{
+    if(typeSchema.type() == AVRO_UNION) {
+        throw Exception("Cannot add unions to unions");
+    }
+
+    if(typeSchema.type() == AVRO_RECORD) {
+        // check for duplicate records
+        size_t types = node_->leaves();
+        for(size_t i = 0; i < types; ++i) {
+            const NodePtr &leaf = node_->leafAt(i);
+            // TODO, more checks?
+            if(leaf->type() == AVRO_RECORD && leaf->name() == typeSchema.root()->name()) {
+                throw Exception("Records in unions cannot have duplicate names");
+            }
+        }
+    }
+
+    node_->addLeaf(typeSchema.root());
+}
+
+FixedSchema::FixedSchema(int size, const std::string &name) :
+    Schema(new NodeFixed)
+{
+    node_->setFixedSize(size);
+    node_->setName(name);
+}
+
+} // namespace avro

Added: hadoop/avro/trunk/src/c++/impl/Types.cc
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/impl/Types.cc?rev=781214&view=auto
==============================================================================
--- hadoop/avro/trunk/src/c++/impl/Types.cc (added)
+++ hadoop/avro/trunk/src/c++/impl/Types.cc Wed Jun  3 00:00:48 2009
@@ -0,0 +1,44 @@
+#include <iostream>
+#include <boost/static_assert.hpp>
+#include "Types.hh"
+
+namespace avro {
+
+namespace strings {
+const std::string typeToString[] = {
+    "string",
+    "bytes",
+    "int",
+    "long",
+    "float",
+    "double",
+    "boolean",
+    "null",
+    "record",
+    "enum",
+    "array",
+    "map",
+    "union",
+    "fixed",
+    "symbolic"
+};
+
+BOOST_STATIC_ASSERT( (sizeof(typeToString)/sizeof(std::string)) == (AVRO_NUM_TYPES) );
+
+} // namespace strings
+
+BOOST_STATIC_ASSERT( AVRO_NUM_TYPES < 64 );
+
+std::ostream &operator<< (std::ostream &os, const Type type)
+{
+    if(isAvroType(type)) {
+        os << strings::typeToString[type];
+    }
+    else {
+        os << static_cast<int>(type);
+    }
+    return os;
+}
+
+} // namespace avro
+

Added: hadoop/avro/trunk/src/c++/impl/ValidSchema.cc
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/impl/ValidSchema.cc?rev=781214&view=auto
==============================================================================
--- hadoop/avro/trunk/src/c++/impl/ValidSchema.cc (added)
+++ hadoop/avro/trunk/src/c++/impl/ValidSchema.cc Wed Jun  3 00:00:48 2009
@@ -0,0 +1,74 @@
+#include "ValidSchema.hh"
+#include "Schema.hh"
+#include "Node.hh"
+
+namespace avro {
+
+    ValidSchema::ValidSchema(const Schema &schema) :
+    node_(schema.root())
+{
+    validate(node_);
+}
+
+ValidSchema::ValidSchema() :
+   node_(NullSchema().root()) 
+{ }
+
+void
+ValidSchema::setSchema(const Schema &schema)
+{
+    const NodePtr &node(schema.root());
+    validate(schema.root());
+    node_ = node;
+}
+
+bool
+ValidSchema::validate(const NodePtr &node) 
+{
+    if(!node) {
+        node_ = new NodePrimitive(AVRO_NULL);
+    }
+
+    if(!node->isValid()) {
+        throw Exception("Schema is invalid");
+    }
+    if(node->hasName()) {
+        if(node->type() == AVRO_SYMBOLIC) {
+            if(!symbolMap_.hasSymbol(node->name())) {
+                throw Exception("Symbolic name not found");
+            }
+            return true;
+        }
+        bool registered = symbolMap_.registerSymbol(node);
+        if(!registered) {
+            return false;
+        }
+    }
+    node->lock();
+    size_t leaves = node->leaves();
+    for(size_t i = 0; i < leaves; ++i) {
+        const NodePtr &leaf(node->leafAt(i));
+
+        if(! validate(leaf)) {
+            node->setLeafToSymbolic(i);
+        }
+    }
+
+    return true;
+}
+
+void 
+ValidSchema::toJson(std::ostream &os)
+{ 
+    node_->printJson(os, 0);
+    os << '\n';
+}
+
+void 
+ValidSchema::toFlatList(std::ostream &os)
+{ 
+    node_->printBasicInfo(os);
+}
+
+} // namespace avro
+

Added: hadoop/avro/trunk/src/c++/impl/ValidatingParser.cc
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/impl/ValidatingParser.cc?rev=781214&view=auto
==============================================================================
--- hadoop/avro/trunk/src/c++/impl/ValidatingParser.cc (added)
+++ hadoop/avro/trunk/src/c++/impl/ValidatingParser.cc Wed Jun  3 00:00:48 2009
@@ -0,0 +1,128 @@
+#include <boost/static_assert.hpp>
+
+#include "ValidatingParser.hh"
+#include "ValidSchema.hh"
+#include "OutputStreamer.hh"
+
+namespace avro {
+
+ValidatingParser::ValidatingParser(const ValidSchema &schema, InputStreamer &in) :
+    validator_(schema),
+    parser_(in)
+{ }
+
+void
+ValidatingParser::getNull()
+{ 
+    checkSafeToGet(AVRO_NULL);
+    validator_.advance();
+    parser_.getNull();
+}
+
+int32_t
+ValidatingParser::getInt()
+{
+    checkSafeToGet(AVRO_INT);
+    int32_t val = parser_.getInt();
+    validator_.advance();
+    return val;
+}
+
+int64_t
+ValidatingParser::getLong()
+{
+    checkSafeToGet(AVRO_LONG);
+    int64_t val = parser_.getLong();
+    validator_.advance();
+    return val;
+}
+
+float
+ValidatingParser::getFloat()
+{
+    checkSafeToGet(AVRO_FLOAT);
+    validator_.advance();
+    return parser_.getFloat();
+}
+
+double
+ValidatingParser::getDouble()
+{
+    checkSafeToGet(AVRO_DOUBLE);
+    validator_.advance();
+    return parser_.getDouble();
+}
+
+bool
+ValidatingParser::getBool()
+{
+    checkSafeToGet(AVRO_BOOL);
+    validator_.advance();
+    return parser_.getBool();
+}
+
+void
+ValidatingParser::getString(std::string &val)
+{
+    checkSafeToGet(AVRO_STRING);
+    validator_.advance();
+    parser_.getString(val);
+}
+
+void
+ValidatingParser::getBytes(std::vector<uint8_t> &val)
+{
+    checkSafeToGet(AVRO_BYTES);
+    validator_.advance();
+    parser_.getBytes(val);
+}
+
+int64_t
+ValidatingParser::getCount()
+{
+    checkSafeToGet(AVRO_LONG);
+    int64_t val = parser_.getLong();
+    validator_.advanceWithCount(val);
+    return val;
+}
+
+void 
+ValidatingParser::getRecord()
+{
+    checkSafeToGet(AVRO_RECORD);
+    validator_.advance();
+}
+
+int64_t 
+ValidatingParser::getUnion()
+{
+    checkSafeToGet(AVRO_UNION);
+    validator_.advance();
+    return getCount();
+}
+
+int64_t 
+ValidatingParser::getEnum()
+{
+    checkSafeToGet(AVRO_ENUM);
+    validator_.advance();
+    return getCount();
+}
+
+int64_t 
+ValidatingParser::getMapBlockSize()
+{
+    checkSafeToGet(AVRO_MAP);
+    validator_.advance();
+    return getCount();
+}
+
+int64_t 
+ValidatingParser::getArrayBlockSize()
+{
+    checkSafeToGet(AVRO_ARRAY);
+    validator_.advance();
+    return getCount();
+}
+
+} // namepspace avro

Added: hadoop/avro/trunk/src/c++/impl/ValidatingSerializer.cc
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/impl/ValidatingSerializer.cc?rev=781214&view=auto
==============================================================================
--- hadoop/avro/trunk/src/c++/impl/ValidatingSerializer.cc (added)
+++ hadoop/avro/trunk/src/c++/impl/ValidatingSerializer.cc Wed Jun  3 00:00:48 2009
@@ -0,0 +1,139 @@
+#include <boost/static_assert.hpp>
+
+#include "ValidatingSerializer.hh"
+#include "ValidSchema.hh"
+#include "OutputStreamer.hh"
+
+namespace avro {
+
+ValidatingSerializer::ValidatingSerializer(const ValidSchema &schema, OutputStreamer &out) :
+    validator_(schema),
+    serializer_(out)
+{ }
+
+void
+ValidatingSerializer::putNull()
+{ 
+    checkSafeToPut(AVRO_NULL);
+    serializer_.putNull();
+    validator_.advance();
+}
+
+void
+ValidatingSerializer::putInt(int32_t val)
+{
+    checkSafeToPut(AVRO_INT);
+    serializer_.putInt(val);
+    validator_.advance();
+}
+
+void
+ValidatingSerializer::putLong(int64_t val)
+{
+    checkSafeToPut(AVRO_LONG);
+    serializer_.putLong(val);
+    validator_.advance();
+}
+
+
+void
+ValidatingSerializer::putFloat(float val)
+{
+    checkSafeToPut(AVRO_FLOAT);
+    serializer_.putFloat(val);
+    validator_.advance();
+}
+
+void
+ValidatingSerializer::putDouble(double val)
+{
+    checkSafeToPut(AVRO_DOUBLE);
+    serializer_.putDouble(val);
+    validator_.advance();
+}
+
+void
+ValidatingSerializer::putBool(bool val)
+{
+    checkSafeToPut(AVRO_BOOL);
+    serializer_.putBool(val);
+    validator_.advance();
+}
+
+void
+ValidatingSerializer::putString(const std::string &val)
+{
+    checkSafeToPut(AVRO_STRING);
+    serializer_.putString(val);
+    validator_.advance();
+}
+
+void
+ValidatingSerializer::putBytes(const uint8_t *val, size_t size)
+{
+    checkSafeToPut(AVRO_BYTES);
+    serializer_.putBytes(val, size);
+    validator_.advance();
+}
+
+void 
+ValidatingSerializer::putCount(int64_t count)
+{
+    checkSafeToPut(AVRO_LONG);
+    serializer_.putLong(count);
+    validator_.advanceWithCount(count);
+}
+
+void 
+ValidatingSerializer::beginRecord()
+{
+    checkSafeToPut(AVRO_RECORD);
+    validator_.advance();
+}
+
+void 
+ValidatingSerializer::beginArrayBlock(int64_t size)
+{
+    checkSafeToPut(AVRO_ARRAY);
+    validator_.advance();
+    putCount(size);
+}
+
+void 
+ValidatingSerializer::endArray()
+{
+    beginArrayBlock(0);
+}
+
+void 
+ValidatingSerializer::beginMapBlock(int64_t size)
+{
+    checkSafeToPut(AVRO_MAP);
+    validator_.advance();
+    putCount(size);
+}
+
+void 
+ValidatingSerializer::endMap()
+{
+    beginMapBlock(0);
+}
+
+void 
+ValidatingSerializer::beginUnion(int64_t choice)
+{
+    checkSafeToPut(AVRO_UNION);
+    validator_.advance();
+    putCount(choice);
+}
+
+void 
+ValidatingSerializer::beginEnum(int64_t choice)
+{
+    checkSafeToPut(AVRO_ENUM);
+    validator_.advance();
+    putCount(choice);
+}
+
+
+} // namepspace avro

Added: hadoop/avro/trunk/src/c++/impl/Validator.cc
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/impl/Validator.cc?rev=781214&view=auto
==============================================================================
--- hadoop/avro/trunk/src/c++/impl/Validator.cc (added)
+++ hadoop/avro/trunk/src/c++/impl/Validator.cc Wed Jun  3 00:00:48 2009
@@ -0,0 +1,280 @@
+#include <boost/static_assert.hpp>
+
+#include "Validator.hh"
+#include "ValidSchema.hh"
+
+namespace avro {
+
+Validator::Validator(const ValidSchema &schema) :
+    schema_(schema),
+    parseTree_(schema.root()),
+    nextType_(AVRO_NULL),
+    expectedTypesFlag_(0),
+    compoundStarted_(false),
+    waitingForCount_(false),
+    count_(0)
+{
+    setupOperation(parseTree_);
+}
+
+void 
+Validator::setWaitingForCount()
+{
+    waitingForCount_ = true;
+    count_ = 0;
+    expectedTypesFlag_ = typeToFlag(AVRO_INT) | typeToFlag(AVRO_LONG);
+    nextType_ = AVRO_LONG;
+}
+
+void
+Validator::recordAdvance()
+{
+    // record doesn't use this flag because it doesn't need to set
+    // up anything at the start, but just clear it
+    compoundStarted_ = false;
+
+    // determine the next record entry to process
+    size_t index = (compoundStack_.back().pos)++;
+
+    const NodePtr &node = compoundStack_.back().node;
+    if(index < node->leaves() ) {
+        setupOperation(node->leafAt(index));
+    }
+    else {
+        // done with this record, remove it from the processing stack
+        compoundStack_.pop_back();
+    }
+}
+
+void
+Validator::enumAdvance()
+{
+    if(compoundStarted_) {
+        setWaitingForCount();
+        compoundStarted_ = false;
+    }
+    else {
+        waitingForCount_ = false;
+        compoundStack_.pop_back();
+    }
+}
+
+void
+Validator::countingAdvance()
+{
+    const NodePtr &node = compoundStack_.back().node;
+
+    if(compoundStarted_) {
+        setWaitingForCount();
+        compoundStarted_ = false;
+    }
+    else if(waitingForCount_) {
+        waitingForCount_ = false;
+        if(count_ == 0) {
+            compoundStack_.pop_back();
+        }
+        else {
+            counters_.push_back(count_);
+            setupOperation(node->leafAt(0));
+        }
+    }
+    else {
+
+        size_t index = ++(compoundStack_.back().pos);
+
+        if(index < node->leaves() ) {
+            setupOperation(node->leafAt(index));
+        }
+        else {
+            compoundStack_.back().pos = 0;
+            int count = --counters_.back();
+            if(count == 0) {
+                counters_.pop_back();
+                compoundStarted_ = true;
+                nextType_ = node->type();
+                expectedTypesFlag_ = typeToFlag(nextType_);
+            }
+            else {
+                setupOperation(node->leafAt(0));
+            }
+        }
+    }
+}
+
+void
+Validator::unionAdvance()
+{
+    if(compoundStarted_) {
+        setWaitingForCount();
+        compoundStarted_ = false;
+    }
+    else {
+        waitingForCount_ = false;
+        NodePtr node = compoundStack_.back().node;
+
+        if(count_ < static_cast<int64_t>(node->leaves())) {
+            compoundStack_.pop_back();
+            setupOperation(node->leafAt(count_));
+        }
+        else {
+            throw Exception("Union out of range");
+        }
+    }
+}
+
+void
+Validator::fixedAdvance()
+{
+    compoundStarted_ = false;
+    compoundStack_.pop_back();
+}
+
+int 
+Validator::nextSizeExpected() const
+{
+    return compoundStack_.back().node->fixedSize();
+}
+
+void
+Validator::advance()
+{
+    typedef void (Validator::*AdvanceFunc)();
+
+    // only the compound types need advance functions here
+    static const AdvanceFunc funcs[] = {
+        0, // string
+        0, // bytes
+        0, // int
+        0, // long
+        0, // float
+        0, // double
+        0, // bool
+        0, // null
+        &Validator::recordAdvance,
+        &Validator::enumAdvance,
+        &Validator::countingAdvance,
+        &Validator::countingAdvance,
+        &Validator::unionAdvance,
+        &Validator::fixedAdvance,
+        0 // symbolic
+    };
+    BOOST_STATIC_ASSERT( (sizeof(funcs)/sizeof(AdvanceFunc)) == (AVRO_NUM_TYPES) );
+
+    expectedTypesFlag_ = 0;
+    // loop until we encounter a next expected type, or we've exited all compound types 
+    while(!expectedTypesFlag_ && !compoundStack_.empty() ) {
+    
+        Type type = compoundStack_.back().node->type();
+
+        AdvanceFunc func = funcs[type];
+
+        // only compound functions are put on the status stack so it is ok to
+        // assume that func is not null
+        assert(func);
+
+        ((this)->*(func))();
+    }
+}
+
+void
+Validator::advanceWithCount(int64_t count) 
+{
+    if(!waitingForCount_) {
+        throw Exception("Not expecting count");
+    }
+    else if(count_ < 0) {
+        throw Exception("Count cannot be negative");
+    }
+    count_ = count;
+
+    advance();
+}
+
+void
+Validator::setupFlag(Type type)
+{
+    // use flags instead of strictly types, so that we can be more lax about the type
+    // (for example, a long should be able to accept an int type, but not vice versa)
+    static const flag_t flags[] = {
+        typeToFlag(AVRO_STRING) | typeToFlag(AVRO_BYTES),
+        typeToFlag(AVRO_STRING) | typeToFlag(AVRO_BYTES),
+        typeToFlag(AVRO_INT),
+        typeToFlag(AVRO_INT) | typeToFlag(AVRO_LONG),
+        typeToFlag(AVRO_FLOAT),
+        typeToFlag(AVRO_DOUBLE),
+        typeToFlag(AVRO_BOOL),
+        typeToFlag(AVRO_NULL),
+        typeToFlag(AVRO_RECORD),
+        typeToFlag(AVRO_ENUM),
+        typeToFlag(AVRO_ARRAY),
+        typeToFlag(AVRO_MAP),
+        typeToFlag(AVRO_UNION),
+        typeToFlag(AVRO_FIXED),
+        0
+    };
+    BOOST_STATIC_ASSERT( (sizeof(flags)/sizeof(flag_t)) == (AVRO_NUM_TYPES) );
+
+    expectedTypesFlag_ = flags[type];
+}
+
+void
+Validator::setupOperation(const NodePtr &node)
+{
+    nextType_ = node->type();
+
+    if(nextType_ == AVRO_SYMBOLIC) {
+        NodePtr symNode ( schema_.followSymbol(node->name()) );
+        assert(symNode);
+        return setupOperation(symNode);
+    }
+
+    assert(nextType_ < AVRO_NUM_TYPES);
+
+    setupFlag(nextType_);
+
+    if(!isPrimitive(nextType_)) {
+        compoundStack_.push_back(CompoundType(node));
+        compoundStarted_ = true;
+    }
+}
+
+bool 
+Validator::getCurrentRecordName(std::string &name) const
+{
+    bool found = false;
+    name.clear();
+
+    int idx = -1;
+    // if the top of the stack is a record I want this record name
+    if(!compoundStack_.empty() && (isPrimitive(nextType_) || nextType_ == AVRO_RECORD)) {
+        idx = compoundStack_.size() -1;
+    }
+    else {
+        idx = compoundStack_.size() -2;
+    }
+    
+    if(idx >= 0 && compoundStack_[idx].node->type() == AVRO_RECORD) {
+        name = compoundStack_[idx].node->name();
+        found = true;
+    }
+    return found;
+}
+
+bool 
+Validator::getNextFieldName(std::string &name) const
+{
+    bool found = false;
+    name.clear();
+    int idx = isCompound(nextType_) ? compoundStack_.size()-2 : compoundStack_.size()-1;
+    if(idx >= 0 && compoundStack_[idx].node->type() == AVRO_RECORD) {
+        size_t pos = compoundStack_[idx].pos-1;
+        const NodePtr &node = compoundStack_[idx].node;
+        if(pos>= 0 && pos < node->leaves()) {
+            name = node->nameAt(pos);
+            found = true;
+        }
+    }
+    return found;
+}
+
+} // namepspace avro

Added: hadoop/avro/trunk/src/c++/impl/Zigzag.cc
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/impl/Zigzag.cc?rev=781214&view=auto
==============================================================================
--- hadoop/avro/trunk/src/c++/impl/Zigzag.cc (added)
+++ hadoop/avro/trunk/src/c++/impl/Zigzag.cc Wed Jun  3 00:00:48 2009
@@ -0,0 +1,78 @@
+
+#include "Zigzag.hh"
+
+namespace avro {
+
+uint64_t 
+encodeZigzag64(int64_t input)
+{
+    return ((input << 1) ^ (input >> 63));
+}
+
+int64_t 
+decodeZigzag64(uint64_t input)
+{
+    return ((input >> 1) ^ -(input & 1));
+}
+
+uint32_t 
+encodeZigzag32(int32_t input)
+{
+    return ((input << 1) ^ (input >> 31));
+}
+
+int32_t 
+decodeZigzag32(uint32_t input)
+{
+    return ((input >> 1) ^ -(input & 1));
+}
+
+size_t 
+encodeInt64(int64_t input, boost::array<uint8_t, 9> &output)
+{
+    // get the zigzag encoding 
+    uint64_t val = encodeZigzag64(input);
+
+    // put values in an array of bytes with variable length encoding
+    const int mask  = 0x7F;
+    output[0] = val & mask;
+    size_t bytesOut = 1;
+    while( val >>=7 ) {
+        output[bytesOut++] = (val & mask) | 0x80;
+    }
+
+    // arrange array so msb is first
+    int head = 0;
+    int tail = bytesOut - 1;
+    while(head < tail) {
+        std::swap(output[head++], output[tail--]);
+    }
+
+    return bytesOut;
+}
+
+size_t 
+encodeInt32(int32_t input, boost::array<uint8_t, 5> &output)
+{
+    // get the zigzag encoding 
+    uint32_t val = encodeZigzag32(input);
+
+    // put values in an array of bytes with variable length encoding
+    const int mask  = 0x7F;
+    output[0] = val & mask;
+    size_t bytesOut = 1;
+    while( val >>=7 ) {
+        output[bytesOut++] = (val & mask) | 0x80;
+    }
+
+    // arrange array so msb is first
+    int head = 0;
+    int tail = bytesOut - 1;
+    while(head < tail) {
+        std::swap(output[head++], output[tail--]);
+    }
+
+    return bytesOut;
+}
+
+} // namespace avro

Added: hadoop/avro/trunk/src/c++/jsonschemas/array
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/jsonschemas/array?rev=781214&view=auto
==============================================================================
--- hadoop/avro/trunk/src/c++/jsonschemas/array (added)
+++ hadoop/avro/trunk/src/c++/jsonschemas/array Wed Jun  3 00:00:48 2009
@@ -0,0 +1 @@
+{ "type" : "array", "items" : "int" }

Added: hadoop/avro/trunk/src/c++/jsonschemas/bigrecord
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/jsonschemas/bigrecord?rev=781214&view=auto
==============================================================================
--- hadoop/avro/trunk/src/c++/jsonschemas/bigrecord (added)
+++ hadoop/avro/trunk/src/c++/jsonschemas/bigrecord Wed Jun  3 00:00:48 2009
@@ -0,0 +1,64 @@
+{
+    "type": "record",
+    "name": "RootRecord",
+    "fields": [
+        {
+            "name": "mylong",
+            "type": "long"
+        },
+        {
+            "name": "mymap",
+            "type": {
+                "type": "map",
+                "values": "int"
+            }
+        },
+        {
+            "name": "myarray",
+            "type": {
+                "type": "array",
+                "items": "double"
+            }
+        },
+        {
+            "name": "myenum",
+            "type": {
+                "type": "enum",
+                "name": "ExampleEnum",
+                "symbols": [
+                    "zero",
+                    "one",
+                    "two",
+                    "three"
+                ]
+            }
+        },
+        {
+            "name": "myunion",
+            "type": [
+                "null",
+                {
+                    "type": "map",
+                    "values": "int"
+                },
+                "float"
+            ]
+        },
+        {
+            "name": "mybool",
+            "type": "boolean"
+        },
+        {
+            "name": "myfixed",
+            "type": {
+                "type": "fixed",
+                "size": 16,
+                "name": "md5"
+            }
+        },
+        {
+            "name": "anotherint",
+            "type": "int"
+        }
+    ]
+}

Added: hadoop/avro/trunk/src/c++/jsonschemas/enum
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/jsonschemas/enum?rev=781214&view=auto
==============================================================================
--- hadoop/avro/trunk/src/c++/jsonschemas/enum (added)
+++ hadoop/avro/trunk/src/c++/jsonschemas/enum Wed Jun  3 00:00:48 2009
@@ -0,0 +1,10 @@
+        {
+            "type": "enum",
+            "name": "myenum",
+            "symbols": [
+                "zero",
+                "int",
+                "two",
+                "three"
+            ]
+        }

Added: hadoop/avro/trunk/src/c++/jsonschemas/fixed
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/jsonschemas/fixed?rev=781214&view=auto
==============================================================================
--- hadoop/avro/trunk/src/c++/jsonschemas/fixed (added)
+++ hadoop/avro/trunk/src/c++/jsonschemas/fixed Wed Jun  3 00:00:48 2009
@@ -0,0 +1 @@
+{"type": "fixed", "size" : 16, "name":"md5" }

Added: hadoop/avro/trunk/src/c++/jsonschemas/nested
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/jsonschemas/nested?rev=781214&view=auto
==============================================================================
--- hadoop/avro/trunk/src/c++/jsonschemas/nested (added)
+++ hadoop/avro/trunk/src/c++/jsonschemas/nested Wed Jun  3 00:00:48 2009
@@ -0,0 +1,17 @@
+{
+    "type": "record",
+    "name": "LongList",
+    "fields": [
+        {
+            "name": "value",
+            "type": "long"
+        },
+        {
+            "name": "next",
+            "type": [
+                "LongList",
+                "null"
+            ]
+        }
+    ]
+}

Added: hadoop/avro/trunk/src/c++/jsonschemas/nested.error
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/jsonschemas/nested.error?rev=781214&view=auto
==============================================================================
--- hadoop/avro/trunk/src/c++/jsonschemas/nested.error (added)
+++ hadoop/avro/trunk/src/c++/jsonschemas/nested.error Wed Jun  3 00:00:48 2009
@@ -0,0 +1,17 @@
+{
+    "type": "record",
+    "name": "LongList",
+    "fields": [
+        {
+            "name": "value",
+            "type": "long"
+        },
+        {
+            "name": "next",
+            "type": [
+                "LongLister",
+                "null"
+            ]
+        }
+    ]
+}

Added: hadoop/avro/trunk/src/c++/jsonschemas/recinrec
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/jsonschemas/recinrec?rev=781214&view=auto
==============================================================================
--- hadoop/avro/trunk/src/c++/jsonschemas/recinrec (added)
+++ hadoop/avro/trunk/src/c++/jsonschemas/recinrec Wed Jun  3 00:00:48 2009
@@ -0,0 +1,18 @@
+{
+  "type": "record", 
+  "name": "Rec1",
+  "fields" : [
+    {"name": "val1", "type": "long"},           
+    {"name": "val2", "type": {
+          "type": "record", 
+          "name": "Rec2",
+          "fields" : [
+            {"name": "inval1", "type": "double"},           
+            {"name": "inval2", "type": "int" }
+          ]
+      }
+    },
+    {"name": "val3", "type": "float"}
+  ]
+}
+

Added: hadoop/avro/trunk/src/c++/jsonschemas/record
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/jsonschemas/record?rev=781214&view=auto
==============================================================================
--- hadoop/avro/trunk/src/c++/jsonschemas/record (added)
+++ hadoop/avro/trunk/src/c++/jsonschemas/record Wed Jun  3 00:00:48 2009
@@ -0,0 +1,9 @@
+{
+  "type": "record", 
+  "name": "LongList",
+  "fields" : [
+    {"name": "value", "type": "long"},           
+    {"name": "next", "type": "int" }
+  ]
+}
+

Added: hadoop/avro/trunk/src/c++/jsonschemas/record2
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/jsonschemas/record2?rev=781214&view=auto
==============================================================================
--- hadoop/avro/trunk/src/c++/jsonschemas/record2 (added)
+++ hadoop/avro/trunk/src/c++/jsonschemas/record2 Wed Jun  3 00:00:48 2009
@@ -0,0 +1,10 @@
+{
+  "type": "record", 
+  "name": "LongList",
+  "fields" : [
+    {"name": "value", "type": "long"},           
+    {"name": "next", "type": ["int"] },
+    {"name": "hello", "type": {"type" : "array" , "items" :"float"}}
+  ]
+}
+

Added: hadoop/avro/trunk/src/c++/jsonschemas/union
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/jsonschemas/union?rev=781214&view=auto
==============================================================================
--- hadoop/avro/trunk/src/c++/jsonschemas/union (added)
+++ hadoop/avro/trunk/src/c++/jsonschemas/union Wed Jun  3 00:00:48 2009
@@ -0,0 +1 @@
+[ "int" , "long" , "float" ]

Added: hadoop/avro/trunk/src/c++/jsonschemas/unionwithmap
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/jsonschemas/unionwithmap?rev=781214&view=auto
==============================================================================
--- hadoop/avro/trunk/src/c++/jsonschemas/unionwithmap (added)
+++ hadoop/avro/trunk/src/c++/jsonschemas/unionwithmap Wed Jun  3 00:00:48 2009
@@ -0,0 +1 @@
+[ "int" , "long" , {"type":"map", "values":[ "int", "long" ] } ]

Propchange: hadoop/avro/trunk/src/c++/parser/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Wed Jun  3 00:00:48 2009
@@ -0,0 +1,3 @@
+avro.tab.h
+avro.tab.c
+lex.yy.cc

Added: hadoop/avro/trunk/src/c++/parser/avro.l
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/parser/avro.l?rev=781214&view=auto
==============================================================================
--- hadoop/avro/trunk/src/c++/parser/avro.l (added)
+++ hadoop/avro/trunk/src/c++/parser/avro.l Wed Jun  3 00:00:48 2009
@@ -0,0 +1,110 @@
+%{
+#include "avro.tab.h"
+
+// this undef is a hack for my mac implementation
+#undef yyFlexLexer
+#include "Compiler.hh"
+
+#define YY_STACK_USED 1
+
+using std::cin;
+using std::cout;
+using std::cerr;
+
+%}
+
+%option c++
+%option noyywrap
+
+%{
+
+int yylex(int *val, void *ctx)
+{
+    avro::CompilerContext *c = static_cast<avro::CompilerContext *>(ctx);
+    int ret = c->lexer().yylex();
+    if(ret == AVRO_LEX_NAME || ret == AVRO_LEX_SYMBOL || ret == AVRO_LEX_SIZE) {
+        c->setText( c->lexer().YYText()) ;
+    }
+    return ret;
+}
+
+%}
+
+%x READTYPE
+%x STARTTYPE
+%x READNAME
+%x READFIELDS
+%x READSYMBOLS
+%s READSIZE
+%x INUNION
+%x INOBJECT
+
+ws [ \t\n]
+delim {ws}*:{ws}*
+avrotext [a-zA-Z_][a-zA-Z0-9_]*
+startunion \[
+startobject \{
+integer [0-9]+
+
+%%
+<READTYPE>int                  return AVRO_LEX_INT;
+<READTYPE>long                 return AVRO_LEX_LONG;
+<READTYPE>null                 return AVRO_LEX_NULL;
+<READTYPE>boolean              return AVRO_LEX_BOOL;
+<READTYPE>float                return AVRO_LEX_FLOAT;
+<READTYPE>double               return AVRO_LEX_DOUBLE;
+<READTYPE>string               return AVRO_LEX_STRING;
+<READTYPE>bytes                return AVRO_LEX_BYTES;
+<READTYPE>record               return AVRO_LEX_RECORD;
+<READTYPE>enum                 return AVRO_LEX_ENUM;
+<READTYPE>map                  return AVRO_LEX_MAP;
+<READTYPE>array                return AVRO_LEX_ARRAY;
+<READTYPE>fixed                return AVRO_LEX_FIXED;
+<READTYPE>{avrotext}           return AVRO_LEX_SYMBOL;
+<READTYPE>\"                   yy_pop_state(); 
+
+
+<READNAME>{avrotext}           return AVRO_LEX_NAME;
+<READNAME>\"                   yy_pop_state();
+
+<READFIELDS>\"type\"{delim}    yy_push_state(STARTTYPE); 
+<READFIELDS>\"name\"{delim}\"  yy_push_state( READNAME); 
+<READFIELDS>\]                 yy_pop_state(); 
+<READFIELDS>[,\{\}]            return yytext[0];
+<READFIELDS>{ws}               ;
+
+<READSYMBOLS>\"                 yy_push_state(READNAME); 
+<READSYMBOLS>,                  return yytext[0];
+<READSYMBOLS>\]                 yy_pop_state();
+<READSYMBOLS>{ws}               ;
+
+<READSIZE>{integer}             yy_pop_state(); return AVRO_LEX_SIZE;
+
+<INUNION>\"                 yy_push_state(READTYPE); 
+<INUNION>{startobject}      yy_push_state( INOBJECT); return yytext[0];
+<INUNION>\]                 yy_pop_state(); return yytext[0];
+<INUNION>,                  return yytext[0];
+<INUNION>{ws}               ;
+
+<INOBJECT>\"type\"{delim}      yy_push_state(STARTTYPE); return AVRO_LEX_TYPE;
+<INOBJECT>\"name\"{delim}\"    yy_push_state(READNAME); 
+<INOBJECT>\"size\"{delim}      yy_push_state(READSIZE);
+<INOBJECT>\"items\"{delim}     yy_push_state(STARTTYPE); return AVRO_LEX_ITEMS;
+<INOBJECT>\"values\"{delim}    yy_push_state(STARTTYPE); return AVRO_LEX_VALUES;
+<INOBJECT>\"fields\"{delim}\[  yy_push_state( READFIELDS); return AVRO_LEX_FIELDS; 
+<INOBJECT>\"symbols\"{delim}\[ yy_push_state( READSYMBOLS); 
+<INOBJECT>,                    return yytext[0];
+<INOBJECT>\}                   yy_pop_state(); return yytext[0];
+<INOBJECT>{ws}                 ;
+
+<STARTTYPE>\"                  yy_pop_state(); yy_push_state(READTYPE); 
+<STARTTYPE>{startunion}        yy_pop_state(); yy_push_state(INUNION); return yytext[0];
+<STARTTYPE>{startobject}       yy_pop_state(); yy_push_state(INOBJECT); return yytext[0];
+
+{startobject}                  yy_push_state( INOBJECT); return yytext[0];
+{startunion}                   yy_push_state( INUNION); return yytext[0];
+\"                             yy_push_state( READTYPE);
+{ws}                           ;
+
+%%
+

Added: hadoop/avro/trunk/src/c++/parser/avro.y
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/parser/avro.y?rev=781214&view=auto
==============================================================================
--- hadoop/avro/trunk/src/c++/parser/avro.y (added)
+++ hadoop/avro/trunk/src/c++/parser/avro.y Wed Jun  3 00:00:48 2009
@@ -0,0 +1,145 @@
+%{
+#include <stdio.h>
+#include "Compiler.hh"
+#define YYLEX_PARAM ctx
+#define YYPARSE_PARAM ctx
+
+void yyerror(const char *str)
+{
+    // fixme, do something better than this
+    fprintf(stderr,"error: %s\n",str);
+}
+ 
+extern void *lexer; 
+extern int yylex(int *, void *);
+  
+avro::CompilerContext &context(void *ctx) { 
+    return *static_cast<avro::CompilerContext *>(ctx);
+};
+  
+%}
+
+%pure-parser
+
+%token AVRO_LEX_INT AVRO_LEX_LONG AVRO_LEX_FLOAT AVRO_LEX_DOUBLE
+%token AVRO_LEX_BOOL AVRO_LEX_NULL AVRO_LEX_BYTES AVRO_LEX_STRING
+%token AVRO_LEX_RECORD AVRO_LEX_ENUM AVRO_LEX_ARRAY AVRO_LEX_MAP AVRO_LEX_UNION AVRO_LEX_FIXED
+%token AVRO_LEX_SYMBOL AVRO_LEX_SIZE
+%token AVRO_LEX_TYPE AVRO_LEX_ITEMS AVRO_LEX_NAME AVRO_LEX_VALUES AVRO_LEX_FIELDS 
+
+%%
+
+avroschema: 
+        primitive | avroobject | union_t
+        ;
+
+avroobject:
+        primitiveobject | record_t | array_t | map_t | enum_t | fixed_t
+        ;
+
+primitiveobject:
+        '{' AVRO_LEX_TYPE primitive '}'
+        ;
+
+primitive:
+        AVRO_LEX_INT    { context(ctx).addPrimitive(avro::AVRO_INT); }
+        |
+        AVRO_LEX_LONG   { context(ctx).addPrimitive(avro::AVRO_LONG); }
+        |
+        AVRO_LEX_FLOAT  { context(ctx).addPrimitive(avro::AVRO_FLOAT); }
+        |
+        AVRO_LEX_DOUBLE { context(ctx).addPrimitive(avro::AVRO_DOUBLE); }
+        |
+        AVRO_LEX_BOOL   { context(ctx).addPrimitive(avro::AVRO_BOOL); }
+        |
+        AVRO_LEX_NULL   { context(ctx).addPrimitive(avro::AVRO_NULL); }
+        |
+        AVRO_LEX_BYTES  { context(ctx).addPrimitive(avro::AVRO_BYTES); }
+        |
+        AVRO_LEX_STRING { context(ctx).addPrimitive(avro::AVRO_STRING); }
+        |
+        AVRO_LEX_SYMBOL { context(ctx).addSymbol(); }
+        ;
+
+recordtag: 
+        AVRO_LEX_TYPE AVRO_LEX_RECORD 
+        ;
+
+enumtag: 
+        AVRO_LEX_TYPE AVRO_LEX_ENUM 
+        ;
+
+arraytag:
+        AVRO_LEX_TYPE AVRO_LEX_ARRAY
+        { context(ctx).addArray(); }
+        ;
+
+maptag:
+        AVRO_LEX_TYPE AVRO_LEX_MAP
+        { context(ctx).addMap(); }
+        ;
+
+fixedtag:
+        AVRO_LEX_TYPE AVRO_LEX_FIXED
+        ;
+
+record_t:
+        '{' recordtag ',' name { context(ctx).addRecord() } ',' AVRO_LEX_FIELDS fieldlist '}'
+        { context(ctx).endCompound(avro::AVRO_RECORD); }
+        ;
+
+enum_t:
+       '{'  enumtag ',' name { context(ctx).addEnum() } ',' namelist '}'
+        { context(ctx).endCompound(avro::AVRO_ENUM); }
+        ;
+
+array_t: 
+       '{'  arraytag ',' AVRO_LEX_ITEMS avroschema '}'
+        { context(ctx).endCompound(avro::AVRO_ARRAY); }
+        ;
+
+map_t: 
+        '{' maptag ',' AVRO_LEX_VALUES avroschema '}'
+        { context(ctx).endCompound(avro::AVRO_MAP); }
+        ;
+
+union_t:
+        '[' { context(ctx).addUnion(); } unionlist ']'
+        { context(ctx).endCompound(avro::AVRO_UNION); }
+        ;
+
+fixed_t:
+        '{' fixedtag ',' size ',' name '}'
+        { context(ctx).addFixed(); }
+        ;
+
+name:
+        AVRO_LEX_NAME 
+        { context(ctx).addName(); }
+        ;
+
+size:
+        AVRO_LEX_SIZE 
+        { context(ctx).addSize(); }
+        ;
+
+namelist:
+        name | namelist ',' name
+        ;
+
+field:
+        '{' fieldname ',' avroschema '}'
+        ;   
+
+fieldname:
+        AVRO_LEX_NAME 
+        { context(ctx).addFieldName(); }
+        ;
+
+fieldlist:
+        field | fieldlist ',' field
+        ;
+
+unionlist: 
+        avroschema | unionlist ',' avroschema
+

Added: hadoop/avro/trunk/src/c++/scripts/gen.py
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/scripts/gen.py?rev=781214&view=auto
==============================================================================
--- hadoop/avro/trunk/src/c++/scripts/gen.py (added)
+++ hadoop/avro/trunk/src/c++/scripts/gen.py Wed Jun  3 00:00:48 2009
@@ -0,0 +1,405 @@
+#!/usr/bin/python
+
+done = False
+
+headers = '''
+#include <stdint.h>
+#include <string>
+#include <vector>
+#include <map>
+#include <boost/any.hpp>
+#include "Exception.hh"
+#include "AvroSerialize.hh"
+#include "AvroParse.hh"
+'''
+
+typeToC= { 'int' : 'int32_t', 'long' :'int64_t', 'float' : 'float', 'double' : 'double', 
+'boolean' : 'bool', 'null': 'avro::Null', 'string' : 'std::string', 'bytes' : 'std::vector<int8_t>'} 
+
+structList = []
+structNames = {} 
+forwardDeclareList = []
+
+def addStruct(name, declaration) :
+    if not structNames.has_key(name) :
+        structNames[name] = True
+        structList.append(declaration)
+
+def addForwardDeclare(declaration) :
+    code = 'struct ' + declaration + ';'
+    forwardDeclareList.append(code)
+
+def doPrimitive(type):
+    return (typeToC[type], type)
+
+def doSymbolic(args):
+    line = getNextLine()
+    if line[0] != 'end': print 'error'
+    addForwardDeclare(args[1])
+    return (args[1], args[1])
+
+recordfieldTemplate = '$type$ $name$\n'
+recordTemplate = '''struct $name$ {
+$recordfields$};
+
+template <typename Serializer>
+void serialize(Serializer &s, const $name$ &val, const boost::true_type &) {
+    s.beginRecord();
+$serializefields$
+}
+
+template <typename Parser>
+void parse(Parser &p, $name$ &val, const boost::true_type &) {
+    p.getRecord();
+$parsefields$
+}
+'''
+
+def doRecord(args):
+    structDef = recordTemplate;
+    typename = args[1];
+    structDef = structDef.replace('$name$', typename);
+    fields = ''
+    serializefields = ''
+    parsefields = ''
+    end = False
+    while not end:
+        line = getNextLine()
+        if line[0] == 'end': end = True
+        elif line[0] == 'name':
+            fieldname = line[1]
+            fieldline = getNextLine()
+            fieldtypename, fieldtype = genCode(fieldline)
+            fields += '    ' +  fieldtypename + ' ' + fieldname + ';\n'
+            serializefields += '    serialize(s, val.' + fieldname + ');\n'
+            parsefields += '    parse(p, val.' + fieldname + ');\n'
+    structDef = structDef.replace('$recordfields$', fields)
+    structDef = structDef.replace('$serializefields$', serializefields)
+    structDef = structDef.replace('$parsefields$', parsefields)
+    addStruct(typename, structDef)
+    return (typename,typename)
+
+uniontypestemplate = 'typedef $type$ Choice$N$Type'
+unionTemplate = '''struct $name$ {
+
+$typedeflist$
+
+    $name$() : choice(0), value(T0()) {}
+
+$setfuncs$
+    template<typename T>
+    const T &getValue() const {
+        return boost::any_cast<const T&>(value);
+    }
+
+    int64_t choice; 
+    boost::any value;
+};
+
+template <typename Serializer>
+void serialize(Serializer &s, const $name$ &val, const boost::true_type &) {
+    s.beginUnion(val.choice);
+    switch(val.choice) {
+$switchserialize$
+    default :
+        throw avro::Exception("Unrecognized union choice");
+    }
+}
+
+template <typename Parser>
+void parse(Parser &p, $name$ &val, const boost::true_type &) {
+    val.choice = p.getUnion();
+    switch(val.choice) {
+$switchparse$
+    default :
+        throw avro::Exception("Unrecognized union choice");
+    }
+}
+'''
+
+unionser = '    case $choice$:\n      serialize(s, val.getValue<$type$>());\n      break;\n'
+unionpar = '    case $choice$:\n      { $type$ chosenVal; parse(p, chosenVal); val.value = chosenVal; }\n      break;\n'
+
+setfunc =  '''    void set_$name$(const $type$ &val) {
+        choice = $N$;
+        value =  val;
+    };\n'''
+
+
+def doUnion(args):
+    structDef = unionTemplate
+    uniontypes = ''
+    switchserialize= ''
+    switchparse= ''
+    typename = 'Union_of'
+    setters = ''
+    i = 0
+    end = False
+    while not end:
+        line = getNextLine()
+        if line[0] == 'end': end = True
+        else :
+            uniontype, name = genCode(line)
+            typename += '_' + name
+            uniontypes += '    ' + 'typedef ' + uniontype + ' T' + str(i) + ';\n'
+            switch = unionser
+            switch = switch.replace('$choice$', str(i))
+            switch = switch.replace('$type$', uniontype)
+            switchserialize += switch 
+            switch = unionpar
+            switch = switch.replace('$choice$', str(i))
+            switch = switch.replace('$type$', uniontype)
+            switchparse += switch 
+            setter = setfunc
+            setter = setter.replace('$name$', name)
+            setter = setter.replace('$type$', uniontype)
+            setter = setter.replace('$N$', str(i))
+            setters += setter
+        i+= 1
+    structDef = structDef.replace('$name$', typename)
+    structDef = structDef.replace('$typedeflist$', uniontypes)
+    structDef = structDef.replace('$switchserialize$', switchserialize)
+    structDef = structDef.replace('$switchparse$', switchparse)
+    structDef = structDef.replace('$setfuncs$', setters)
+    addStruct(typename, structDef)
+    return (typename,typename)
+
+enumTemplate = '''struct $name$ {
+    enum EnumSymbols {
+        $enumsymbols$
+    };
+    EnumSymbols value;
+};
+
+template <typename Serializer>
+void serialize(Serializer &s, const $name$ &val, const boost::true_type &) {
+    s.beginEnum(val.value);
+}
+
+template <typename Parser>
+void parse(Parser &p, $name$ &val, const boost::true_type &) {
+    val.value = static_cast<$name$::EnumSymbols>(p.getEnum());
+}
+'''
+
+def doEnum(args):
+    structDef = enumTemplate;
+    typename = args[1]
+    structDef = structDef.replace('$name$', typename)
+    end = False
+    symbols = '';
+    while not end:
+        line = getNextLine()
+        if line[0] == 'end': end = True
+        elif line[0] == 'name':
+            if not symbols=='' : symbols += ', '
+            symbols += line[1]
+        else: print "error"
+    structDef = structDef.replace('$enumsymbols$', symbols);
+    addStruct(typename, structDef)
+    return (typename,typename)
+
+arrayTemplate = '''struct $name$ {
+    typedef $valuetype$ ValueType;
+    typedef std::vector<ValueType> ArrayType;
+    
+    void addValue(const ValueType &val) {
+        value.push_back(val);
+    }
+
+    ArrayType value;
+};
+
+template <typename Serializer>
+void serialize(Serializer &s, const $name$ &val, const boost::true_type &) {
+    const size_t size = val.value.size();
+    if(size) {
+        s.beginArrayBlock(size);
+        for(size_t i = 0; i < size; ++i) {
+            serialize(s, val.value[i]);
+        }
+    }
+    s.endArray();
+}
+
+template <typename Parser>
+void parse(Parser &p, $name$ &val, const boost::true_type &) {
+    val.value.clear();
+    while(1) {
+        int size = p.getArrayBlockSize();
+        if(size > 0) {
+            val.value.reserve(val.value.size() + size);
+            while (size-- > 0) { 
+                val.value.push_back($name$::ValueType());
+                parse(p, val.value.back());
+            }
+        }
+        else {
+            break;
+        }
+    } 
+}
+'''
+
+def doArray(args):
+    structDef = arrayTemplate
+    line = getNextLine()
+    arraytype, typename = genCode(line);
+    typename = 'Array_of_' + typename
+
+    structDef = structDef.replace('$name$', typename)
+    structDef = structDef.replace('$valuetype$', arraytype);
+
+    line = getNextLine()
+    if line[0] != 'end': print 'error'
+
+    addStruct(typename, structDef)
+    return (typename,typename)
+
+mapTemplate = '''struct $name$ {
+    typedef $valuetype$ ValueType;
+    typedef std::map<std::string, ValueType> MapType;
+    
+    void addValue(const std::string &key, const ValueType &val) {
+        value.insert(MapType::value_type(key, val));
+    }
+
+    MapType value;
+};
+
+template <typename Serializer>
+void serialize(Serializer &s, const $name$ &val, const boost::true_type &) {
+    if(val.value.size()) {
+        s.beginMapBlock(val.value.size());
+        $name$::MapType::const_iterator iter = val.value.begin();
+        $name$::MapType::const_iterator end  = val.value.end();
+        while(iter!=end) {
+            serialize(s, iter->first);
+            serialize(s, iter->second);
+            ++iter;
+        }
+    }
+    s.endMap();
+}
+
+template <typename Parser>
+void parse(Parser &p, $name$ &val, const boost::true_type &) {
+    val.value.clear();
+    while(1) {
+        int size = p.getMapBlockSize();
+        if(size > 0) {
+            while (size-- > 0) { 
+                std::string key;
+                parse(p, key);
+                $name$::ValueType m;
+                parse(p, m);
+                val.value.insert($name$::MapType::value_type(key, m));
+            }
+        }
+        else {
+            break;
+        }
+    } 
+}
+'''
+
+def doMap(args):
+    structDef = mapTemplate
+    line = getNextLine() # must be string
+    line = getNextLine()
+    maptype, typename = genCode(line);
+    typename = 'Map_of_' + typename
+
+    structDef = structDef.replace('$name$', typename);
+    structDef = structDef.replace('$valuetype$', maptype);
+
+    line = getNextLine()
+    if line[0] != 'end': print 'error'
+    addStruct(typename, structDef)
+    return (typename,typename)
+    
+fixedTemplate = '''struct $name$ {
+    enum {
+        fixedSize = $N$
+    };
+    uint8_t value[fixedSize];
+};
+
+template <typename Serializer>
+void serialize(Serializer &s, const $name$ &val, const boost::true_type &) {
+    s.putFixed(val.value, $name$::fixedSize);
+}
+
+template <typename Parser>
+void parse(Parser &p, $name$ &val, const boost::true_type &) {
+    p.getFixed(val.value, $name$::fixedSize);
+}
+'''
+
+def doFixed(args):
+    structDef = fixedTemplate
+    typename = args[1]
+    size = args[2]
+
+    line = getNextLine()
+    if line[0] != 'end': print 'error'
+
+    structDef = structDef.replace('$name$', typename);
+    structDef = structDef.replace('$N$', size);
+    addStruct(typename, structDef)
+    return (typename,typename)
+
+compoundBuilder= { 'record' : doRecord, 'union' : doUnion, 'enum' : doEnum, 
+'map' : doMap, 'array' : doArray, 'fixed' : doFixed, 'symbolic' : doSymbolic } 
+
+def genCode(inputs) :
+    type = inputs[0]
+    if typeToC.has_key(type) : 
+        result = doPrimitive(type)
+    else :
+        func = compoundBuilder[type]
+        result = func(inputs)
+    return result
+
+def getNextLine():
+    try:
+        line = raw_input()
+    except:
+        line = '';
+        globals()["done"] = True
+
+    if line == '':
+        globals()["done"] = True
+    return line.split(' ')
+    
+if __name__ == "__main__":
+    from sys import argv
+    if(len(argv) > 1): 
+        namespace = argv[1]
+    else:
+        namespace = 'avrouser'
+
+    inputs = getNextLine()
+    genCode(inputs)
+
+    print "#ifndef %s_AvroGenerated_hh__" % namespace
+    print "#define %s_AvroGenerated_hh__" % namespace
+    print headers
+    print "namespace %s {\n" % namespace
+
+    for x in forwardDeclareList:
+        print "%s\n" % x
+
+    for x in structList:
+        print "%s\n" % x
+
+    print "\n} // namespace %s\n" % namespace
+
+    print "namespace avro {\n"
+    for x in structNames:
+        print 'template <> struct is_serializable<%s::%s> : public boost::true_type{};' % (namespace, x)
+
+    print "\n} // namespace avro\n"
+
+    print "#endif // %s_AvroGenerated_hh__" % namespace
+

Propchange: hadoop/avro/trunk/src/c++/test/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Wed Jun  3 00:00:48 2009
@@ -0,0 +1 @@
+code.hh

Added: hadoop/avro/trunk/src/c++/test/precompile.cc
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/test/precompile.cc?rev=781214&view=auto
==============================================================================
--- hadoop/avro/trunk/src/c++/test/precompile.cc (added)
+++ hadoop/avro/trunk/src/c++/test/precompile.cc Wed Jun  3 00:00:48 2009
@@ -0,0 +1,21 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "Compiler.hh"
+#include "ValidSchema.hh"
+
+int main()
+{
+
+    try {
+        avro::ValidSchema schema;
+        avro::compileJsonSchema(std::cin, schema);
+
+        schema.toFlatList(std::cout);
+    }
+    catch (std::exception &e) {
+        std::cout << "Failed to parse or compile schema: " << e.what() << std::endl;
+    }
+
+    return 0;
+}

Added: hadoop/avro/trunk/src/c++/test/testgen.cc
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/test/testgen.cc?rev=781214&view=auto
==============================================================================
--- hadoop/avro/trunk/src/c++/test/testgen.cc (added)
+++ hadoop/avro/trunk/src/c++/test/testgen.cc Wed Jun  3 00:00:48 2009
@@ -0,0 +1,151 @@
+
+#include <string.h>
+#include <fstream>
+#include <sstream>
+
+#include "code.hh"
+#include "OutputStreamer.hh"
+#include "InputStreamer.hh"
+#include "Serializer.hh"
+#include "ValidatingSerializer.hh"
+#include "Parser.hh"
+#include "ValidatingParser.hh"
+#include "ValidSchema.hh"
+#include "Compiler.hh"
+
+void serialize(const avrouser::RootRecord &rec) 
+{
+    avro::ScreenStreamer os;
+    avro::Serializer s (os);
+
+    avro::serialize(s, rec);
+}
+
+void serializeValid(const avro::ValidSchema &valid, const avrouser::RootRecord &rec) 
+{
+    avro::ScreenStreamer os;
+    avro::ValidatingSerializer s (valid, os);
+
+    avro::serialize(s, rec);
+}
+
+void checkArray(const avrouser::Array_of_double &a1, const avrouser::Array_of_double &a2) 
+{
+    assert(a1.value.size() == a2.value.size());
+    for(size_t i = 0; i < a1.value.size(); ++i) {
+        assert(a1.value[i] == a2.value[i]);
+    }
+}
+
+void checkMap(const avrouser::Map_of_int &map1, const avrouser::Map_of_int &map2) 
+{
+    assert(map1.value.size() == map2.value.size());
+    avrouser::Map_of_int::MapType::const_iterator iter1 = map1.value.begin();
+    avrouser::Map_of_int::MapType::const_iterator end   = map1.value.end();
+    avrouser::Map_of_int::MapType::const_iterator iter2 = map2.value.begin();
+
+    while(iter1 != end) {
+        assert(iter1->first == iter2->first);
+        assert(iter1->second == iter2->second);
+        ++iter1;
+        ++iter2;
+    }
+}
+
+void checkOk(const avrouser::RootRecord &rec1, const avrouser::RootRecord &rec2)
+{
+    assert(rec1.mylong == rec1.mylong);
+    checkMap(rec1.mymap, rec2.mymap);
+    checkArray(rec1.myarray, rec2.myarray);
+
+    assert(rec1.myenum.value == rec2.myenum.value);
+
+    assert(rec1.myunion.choice == rec2.myunion.choice);
+    // in this test I know choice was 1
+    {
+        assert(rec1.myunion.choice == 1);
+        checkMap(rec1.myunion.getValue<avrouser::Map_of_int>(), rec2.myunion.getValue<avrouser::Map_of_int>());
+    }
+
+    assert(rec1.mybool == rec2.mybool);
+    for(int i = 0; i < static_cast<int>(avrouser::md5::fixedSize); ++i) {
+        assert(rec1.myfixed.value[i] == rec2.myfixed.value[i]);
+    }
+    assert(rec1.anotherint == rec1.anotherint);
+
+}
+
+void testParser(const avrouser::RootRecord &myRecord)
+{
+    std::ostringstream ostring;
+    avro::OStreamer os(ostring);
+    avro::Serializer s (os);
+
+    avro::serialize(s, myRecord); 
+
+    avrouser::RootRecord inRecord;
+    std::istringstream istring(ostring.str());
+    avro::IStreamer is(istring);
+    avro::Parser p(is);
+    avro::parse(p, inRecord);
+
+    checkOk(myRecord, inRecord);
+}
+
+void testParserValid(avro::ValidSchema &valid, const avrouser::RootRecord &myRecord)
+{
+    std::ostringstream ostring;
+    avro::OStreamer os(ostring);
+    avro::ValidatingSerializer s (valid, os);
+
+    avro::serialize(s, myRecord);
+
+    avrouser::RootRecord inRecord;
+    std::istringstream istring(ostring.str());
+    avro::IStreamer is(istring);
+    avro::ValidatingParser p(valid, is);
+    avro::parse(p, inRecord);
+
+    checkOk(myRecord, inRecord);
+}
+
+void runTests(const avrouser::RootRecord myRecord) 
+{
+    std::cout << "Serialize:\n";
+    serialize(myRecord);
+    std::cout << "end Serialize\n";
+
+    avro::ValidSchema schema;
+    std::ifstream in("jsonschemas/bigrecord");
+    avro::compileJsonSchema(in, schema);
+    std::cout << "Serialize validated:\n";
+    serializeValid(schema, myRecord);
+    std::cout << "end Serialize validated\n";
+
+    testParser(myRecord);
+
+    testParserValid(schema, myRecord);
+}
+
+int main() 
+{
+    uint8_t fixed[] =  {0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+
+    avrouser::RootRecord myRecord;
+    myRecord.mylong = 212;
+    myRecord.mymap.value.clear();
+    myRecord.myarray.addValue(3434.9);
+    myRecord.myarray.addValue(7343.9);
+    myRecord.myenum.value = avrouser::ExampleEnum::one;
+    avrouser::Map_of_int map;
+    map.addValue("one", 1);
+    map.addValue("two", 2);
+    myRecord.myunion.set_Map_of_int(map);
+    myRecord.mybool = true;
+    memcpy(myRecord.myfixed.value, fixed, avrouser::md5::fixedSize);
+    myRecord.anotherint = 4534;
+
+    runTests(myRecord);
+
+
+}

Added: hadoop/avro/trunk/src/c++/test/testparser.cc
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/test/testparser.cc?rev=781214&view=auto
==============================================================================
--- hadoop/avro/trunk/src/c++/test/testparser.cc (added)
+++ hadoop/avro/trunk/src/c++/test/testparser.cc Wed Jun  3 00:00:48 2009
@@ -0,0 +1,21 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "Compiler.hh"
+#include "ValidSchema.hh"
+
+int main()
+{
+
+    try {
+        avro::ValidSchema schema;
+        avro::compileJsonSchema(std::cin, schema);
+
+        schema.toJson(std::cout);
+    }
+    catch (std::exception &e) {
+        std::cout << "Failed to parse or compile schema: " << e.what() << std::endl;
+    }
+
+    return 0;
+}



Mime
View raw message