avro-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From th...@apache.org
Subject svn commit: r1089131 - in /avro/trunk: CHANGES.txt lang/c++/api/DataFile.hh lang/c++/impl/DataFile.cc lang/c++/test/DataFileTests.cc
Date Tue, 05 Apr 2011 16:54:09 GMT
Author: thiru
Date: Tue Apr  5 16:54:08 2011
New Revision: 1089131

URL: http://svn.apache.org/viewvc?rev=1089131&view=rev
Log:
AVRO-795. C++ Datafile reader makes it hard to build adaptive clients

Modified:
    avro/trunk/CHANGES.txt
    avro/trunk/lang/c++/api/DataFile.hh
    avro/trunk/lang/c++/impl/DataFile.cc
    avro/trunk/lang/c++/test/DataFileTests.cc

Modified: avro/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/avro/trunk/CHANGES.txt?rev=1089131&r1=1089130&r2=1089131&view=diff
==============================================================================
--- avro/trunk/CHANGES.txt (original)
+++ avro/trunk/CHANGES.txt Tue Apr  5 16:54:08 2011
@@ -37,6 +37,8 @@ Avro 1.5.1 (unreleased)
 
     AVRO-794. Makefile.am is no longer required in C++. (thiru)
 
+    AVRO-795. C++ Datafile reader makes it hard to build adaptive clients. (thiru)
+
   BUG FIXES
 
     AVRO-786. Java: Fix equals() to work on objects containing maps. (cutting)

Modified: avro/trunk/lang/c++/api/DataFile.hh
URL: http://svn.apache.org/viewvc/avro/trunk/lang/c%2B%2B/api/DataFile.hh?rev=1089131&r1=1089130&r2=1089131&view=diff
==============================================================================
--- avro/trunk/lang/c++/api/DataFile.hh (original)
+++ avro/trunk/lang/c++/api/DataFile.hh Tue Apr  5 16:54:08 2011
@@ -67,7 +67,7 @@ class DataFileWriterBase : boost::noncop
      */
     void sync();
 
-protected:
+public:
     Encoder& encoder() const { return *encoderPtr_; }
     
     void syncIfNeeded();
@@ -75,7 +75,6 @@ protected:
     void incr() {
         ++objectCount_;
     }
-public:
     /**
      * Constructs a data file writer with the given sync interval and name.
      */
@@ -104,23 +103,40 @@ public:
  *  An Avro datafile that can store objects of type T.
  */
 template <typename T>
-class DataFileWriter : public DataFileWriterBase {
+class DataFileWriter : boost::noncopyable {
+    std::auto_ptr<DataFileWriterBase> base_;
 public:
     /**
      * Constructs a new data file.
      */
     DataFileWriter(const char* filename, const ValidSchema& schema,
         size_t syncInterval = 16 * 1024) :
-        DataFileWriterBase(filename, schema, syncInterval) { }
+        base_(new DataFileWriterBase(filename, schema, syncInterval)) { }
 
     /**
      * Writes the given piece of data into the file.
      */
     void write(const T& datum) {
-        syncIfNeeded();
-        avro::encode(encoder(), datum);
-        incr();
+        base_->syncIfNeeded();
+        avro::encode(base_->encoder(), datum);
+        base_->incr();
     }
+
+    /**
+     * Closes the current file. Once closed this datafile object cannot be
+     * used for writing any more.
+     */
+    void close() { base_->close(); }
+
+    /**
+     * Returns the schema for this data file.
+     */
+    const ValidSchema& schema() const { return base_->schema(); }
+
+    /**
+     * Flushes any unwritten data into the file.
+     */
+    void flush() { base_->flush(); }
 };
 
 class DataFileReaderBase : boost::noncopyable {
@@ -140,7 +156,8 @@ class DataFileReaderBase : boost::noncop
 
     void readHeader();
 
-protected:
+    bool readDataBlock();
+public:
     Decoder& decoder() { return *dataDecoder_; }
 
     /**
@@ -149,20 +166,29 @@ protected:
     bool hasMore();
 
     void decr() { --objectCount_; }
-    bool readDataBlock();
 
-public:
     /**
      * Constructs the reader for the given file and the reader is
-     * expected to use the given schema.
+     * expected to use the schema that is used with data.
+     * This function should be called exactly once after constructing
+     * the DataFileReaderBase object.
      */
-    DataFileReaderBase(const char* filename, const ValidSchema& readerSchema);
+    DataFileReaderBase(const char* filename);
 
     /**
-     * Constructs the reader for the given file and the reader is
-     * expected to use the schema that is used with data.
+     * Initializes the reader so that the reader and writer schemas
+     * are the same.
      */
-    DataFileReaderBase(const char* filename);
+    void init();
+
+    /**
+     * Initializes the reader to read objects according to the given
+     * schema. This gives an opportinity for the reader to see the schema
+     * in the data file before deciding the right schema to use for reading.
+     * This must be called exactly once after constructing the
+     * DataFileReaderBase object.
+     */
+    void init(const ValidSchema& readerSchema);
 
     /**
      * Returns the schema for this object.
@@ -181,29 +207,78 @@ public:
 };
 
 template <typename T>
-class DataFileReader : public DataFileReaderBase {
+class DataFileReader : boost::noncopyable {
+    std::auto_ptr<DataFileReaderBase> base_;
 public:
     /**
      * Constructs the reader for the given file and the reader is
      * expected to use the given schema.
      */
     DataFileReader(const char* filename, const ValidSchema& readerSchema) :
-        DataFileReaderBase(filename, readerSchema) { }
+        base_(new DataFileReaderBase(filename)) {
+        base_->init(readerSchema);
+    }
 
     /**
      * Constructs the reader for the given file and the reader is
      * expected to use the schema that is used with data.
      */
-    DataFileReader(const char* filename) : DataFileReaderBase(filename) { }
+    DataFileReader(const char* filename) :
+        base_(new DataFileReaderBase(filename)) {
+        base_->init();
+    }
+
+
+    /**
+     * Constructs a reader using the reader base. This form of constructor
+     * allows the user to examine the schema of a given file and then
+     * decide to use the right type of data to be desrialize. Without this
+     * the user must know the type of data for the template _before_
+     * he knows the schema within the file.
+     * The schema present in the data file will be used for reading
+     * from this reader.
+     */
+    DataFileReader(std::auto_ptr<DataFileReaderBase> base) : base_(base) {
+        base_->init();
+    }
+
+    /**
+     * Constructs a reader using the reader base. This form of constructor
+     * allows the user to examine the schema of a given file and then
+     * decide to use the right type of data to be desrialize. Without this
+     * the user must know the type of data for the template _before_
+     * he knows the schema within the file.
+     * The argument readerSchema will be used for reading
+     * from this reader.
+     */
+    DataFileReader(std::auto_ptr<DataFileReaderBase> base,
+        const ValidSchema& readerSchema) : base_(base) {
+        base_->init(readerSchema);
+    }
 
     bool read(T& datum) {
-        if (hasMore()) {
-            decr();
-            avro::decode(decoder(), datum);
+        if (base_->hasMore()) {
+            base_->decr();
+            avro::decode(base_->decoder(), datum);
             return true;
         }
         return false;
     }
+
+    /**
+     * Returns the schema for this object.
+     */
+    const ValidSchema& readerSchema() { return base_->readerSchema(); }
+
+    /**
+     * Returns the schema stored with the data file.
+     */
+    const ValidSchema& dataSchema() { return base_->dataSchema(); }
+
+    /**
+     * Closes the reader. No further operation is possible on this reader.
+     */
+    void close() { return base_->close(); }
 };
 
 }   // namespace avro

Modified: avro/trunk/lang/c++/impl/DataFile.cc
URL: http://svn.apache.org/viewvc/avro/trunk/lang/c%2B%2B/impl/DataFile.cc?rev=1089131&r1=1089130&r2=1089131&view=diff
==============================================================================
--- avro/trunk/lang/c++/impl/DataFile.cc (original)
+++ avro/trunk/lang/c++/impl/DataFile.cc Tue Apr  5 16:54:08 2011
@@ -149,19 +149,27 @@ void DataFileWriterBase::setMetadata(con
     metadata_[key] = v;
 }
 
-DataFileReaderBase::DataFileReaderBase(const char* filename,
-    const ValidSchema& schema) :
+DataFileReaderBase::DataFileReaderBase(const char* filename) :
     filename_(filename), stream_(fileInputStream(filename)),
-    decoder_(binaryDecoder()), objectCount_(0), readerSchema_(schema)
+    decoder_(binaryDecoder()), objectCount_(0)
 {
     readHeader();
 }
 
-DataFileReaderBase::DataFileReaderBase(const char* filename) :
-    filename_(filename), stream_(fileInputStream(filename)),
-    decoder_(binaryDecoder()), objectCount_(0)
+void DataFileReaderBase::init()
 {
-    readHeader();
+    readerSchema_ = dataSchema_;
+    dataDecoder_  = binaryDecoder();
+    readDataBlock();
+}
+
+void DataFileReaderBase::init(const ValidSchema& readerSchema)
+{
+    readerSchema_ = readerSchema;
+    dataDecoder_  = (toString(readerSchema_) != toString(dataSchema_)) ?
+        resolvingDecoder(dataSchema_, readerSchema_, binaryDecoder()) :
+        binaryDecoder();
+    readDataBlock();
 }
 
 static void drain(InputStream& in)
@@ -310,12 +318,7 @@ void DataFileReaderBase::readHeader()
         throw Exception("Unknown codec in data file: " + toString(it->second));
     }
 
-    dataDecoder_  = (toString(readerSchema_) != toString(dataSchema_)) ?
-        resolvingDecoder(dataSchema_, readerSchema_, binaryDecoder()) :
-        binaryDecoder();
-        
     avro::decode(*decoder_, sync_);
-    readDataBlock();
 }
 
 }   // namespace avro

Modified: avro/trunk/lang/c++/test/DataFileTests.cc
URL: http://svn.apache.org/viewvc/avro/trunk/lang/c%2B%2B/test/DataFileTests.cc?rev=1089131&r1=1089130&r2=1089131&view=diff
==============================================================================
--- avro/trunk/lang/c++/test/DataFileTests.cc (original)
+++ avro/trunk/lang/c++/test/DataFileTests.cc Tue Apr  5 16:54:08 2011
@@ -33,6 +33,7 @@ using std::pair;
 using std::vector;
 using std::map;
 using std::istringstream;
+using std::ostringstream;
 
 using boost::array;
 using boost::shared_ptr;
@@ -55,15 +56,17 @@ struct Integer {
     int64_t re;
     Integer() : re(0) { }
     Integer(int64_t r) : re(r) { }
-
-    bool operator==(const Integer& oth) const {
-        return re == oth.re;
-    }
 };
 
 typedef Complex<int64_t> ComplexInteger;
 typedef Complex<double> ComplexDouble;
 
+struct Double {
+    double re;
+    Double() : re(0) { }
+    Double(double r) : re(r) { }
+};
+
 namespace avro {
 
 template <typename T> struct codec_traits<Complex<T> > {
@@ -84,6 +87,12 @@ template <> struct codec_traits<Integer>
     }
 };
 
+template <> struct codec_traits<Double> {
+    static void decode(Decoder& d, Double& c) {
+        avro::decode(d, c.re);
+    }
+};
+
 }
 
 static ValidSchema makeValidSchema(const char* schema)
@@ -108,6 +117,18 @@ static const char dsch[] = "{\"type\": \
         "{\"name\":\"re\", \"type\":\"double\"},"
         "{\"name\":\"im\", \"type\":\"double\"}"
     "]}";
+static const char dblsch[] = "{\"type\": \"record\","
+    "\"name\":\"ComplexDouble\", \"fields\": ["
+        "{\"name\":\"re\", \"type\":\"double\"}"
+    "]}";
+
+
+string toString(const ValidSchema& s)
+{
+    ostringstream oss;
+    s.toJson(oss);
+    return oss.str();
+}
 
 class DataFileTest {
     const char* filename;
@@ -263,6 +284,52 @@ public:
         BOOST_CHECK_EQUAL(i, 1000);
     }
 
+    /**
+     * Constructs the DataFileReader in two steps.
+     */
+    void testReadDoubleTwoStep() {
+        auto_ptr<avro::DataFileReaderBase>
+            base(new avro::DataFileReaderBase(filename));
+        avro::DataFileReader<ComplexDouble> df(base);
+        BOOST_CHECK_EQUAL(toString(writerSchema), toString(df.readerSchema()));
+        BOOST_CHECK_EQUAL(toString(writerSchema), toString(df.dataSchema()));
+        int i = 0;
+        ComplexDouble ci;
+        double re = 3.0;
+        double im = 5.0;
+        while (df.read(ci)) {
+            BOOST_CHECK_CLOSE(ci.re, re, 0.0001);
+            BOOST_CHECK_CLOSE(ci.im, im, 0.0001);
+            re += (im - 0.7);
+            im += 3.1;
+            ++i;
+        }
+        BOOST_CHECK_EQUAL(i, 1000);
+    }
+
+    /**
+     * Constructs the DataFileReader in two steps using a different
+     * reader schema.
+     */
+    void testReadDoubleTwoStepProject() {
+        auto_ptr<avro::DataFileReaderBase>
+            base(new avro::DataFileReaderBase(filename));
+        avro::DataFileReader<Double> df(base, readerSchema);
+
+        BOOST_CHECK_EQUAL(toString(readerSchema), toString(df.readerSchema()));
+        BOOST_CHECK_EQUAL(toString(writerSchema), toString(df.dataSchema()));
+        int i = 0;
+        Double ci;
+        double re = 3.0;
+        double im = 5.0;
+        while (df.read(ci)) {
+            BOOST_CHECK_CLOSE(ci.re, re, 0.0001);
+            re += (im - 0.7);
+            im += 3.1;
+            ++i;
+        }
+        BOOST_CHECK_EQUAL(i, 1000);
+    }
 };
 
 void addReaderTests(test_suite* ts, const shared_ptr<DataFileTest>& t)
@@ -288,9 +355,12 @@ init_unit_test_suite( int argc, char* ar
     ts->add(BOOST_CLASS_TEST_CASE(&DataFileTest::testWriteGeneric, t2));
     addReaderTests(ts, t2);
 
-    shared_ptr<DataFileTest> t3(new DataFileTest("test3.df", dsch, dsch));
+    shared_ptr<DataFileTest> t3(new DataFileTest("test3.df", dsch, dblsch));
     ts->add(BOOST_CLASS_TEST_CASE(&DataFileTest::testWriteDouble, t3));
     ts->add(BOOST_CLASS_TEST_CASE(&DataFileTest::testReadDouble, t3));
+    ts->add(BOOST_CLASS_TEST_CASE(&DataFileTest::testReadDoubleTwoStep, t3));
+    ts->add(BOOST_CLASS_TEST_CASE(&DataFileTest::testReadDoubleTwoStepProject,
+        t3));
     ts->add(BOOST_CLASS_TEST_CASE(&DataFileTest::testCleanup, t3));
     return ts;
 }



Mime
View raw message