parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From n...@apache.org
Subject parquet-cpp git commit: PARQUET-434: Add a ParquetFileReader class
Date Tue, 26 Jan 2016 23:55:23 GMT
Repository: parquet-cpp
Updated Branches:
  refs/heads/master 7555cff7d -> 9a1fd892f


PARQUET-434: Add a ParquetFileReader class

Starting point for build out.

Author: Wes McKinney <wes@cloudera.com>

Closes #20 from wesm/parquet-file-reader and squashes the following commits:

8a9e875 [Wes McKinney] Add PARQUET_TEST_DATA to setup_build_env.sh and add to README
f7cd165 [Wes McKinney] Create ParquetFileReader class with ParseMetaData method and a basic
unit test. Run valgrind and cpplint as part of Travis CI build


Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/9a1fd892
Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/9a1fd892
Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/9a1fd892

Branch: refs/heads/master
Commit: 9a1fd892fba00108ea79000e7fec06e2e895d988
Parents: 7555cff
Author: Wes McKinney <wes@cloudera.com>
Authored: Tue Jan 26 15:55:18 2016 -0800
Committer: Nong Li <nongli@gmail.com>
Committed: Tue Jan 26 15:55:18 2016 -0800

----------------------------------------------------------------------
 .travis.yml                       |   9 ++-
 CMakeLists.txt                    |   1 +
 README.md                         |  16 ++++-
 example/example_util.cc           |   2 +
 setup_build_env.sh                |   2 +
 src/parquet.cc                    |   6 +-
 src/parquet/CMakeLists.txt        |   2 +
 src/parquet/exception.h           |  49 ++++++++++++++
 src/parquet/parquet.h             |  82 +++++------------------
 src/parquet/reader-test.cc        |  41 ++++++++++--
 src/parquet/reader.cc             | 117 +++++++++++++++++++++++++++++++++
 src/parquet/reader.h              |  89 +++++++++++++++++++++++++
 src/parquet/thrift/CMakeLists.txt |   1 +
 src/parquet/thrift/util.h         |  46 +++++++++++++
 src/parquet/util/CMakeLists.txt   |   8 +--
 15 files changed, 393 insertions(+), 78 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/9a1fd892/.travis.yml
----------------------------------------------------------------------
diff --git a/.travis.yml b/.travis.yml
index 8e11840..0ac7c47 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -36,5 +36,12 @@ before_install:
 before_script:
     - source $TRAVIS_BUILD_DIR/ci/before_script_travis.sh
     - cmake $TRAVIS_BUILD_DIR
+    - export PARQUET_TEST_DATA=$TRAVIS_BUILD_DIR/data
 
-script: make
+script:
+- make
+- >
+  if [ $TRAVIS_OS_NAME == linux ]; then
+    valgrind --tool=memcheck --leak-check=yes ctest;
+  fi
+- make lint

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/9a1fd892/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4ac13de..2554e6c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -214,6 +214,7 @@ set(PARQUET_TEST_LINK_LIBS ${PARQUET_MIN_TEST_LIBS})
 
 set(LIBPARQUET_SRCS
   src/parquet.cc
+  src/parquet/reader.cc
 )
 
 set(LIBPARQUET_LINK_LIBS

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/9a1fd892/README.md
----------------------------------------------------------------------
diff --git a/README.md b/README.md
index 811d41e..a809713 100644
--- a/README.md
+++ b/README.md
@@ -15,7 +15,11 @@ ubuntu$ sudo apt-get install libboost-dev libsnappy-dev liblz4-dev
 mac$ brew install snappy lz4 thrift
 ```
 
-./setup_build_env.sh tries to automate setting up a build environment for you with third
party dependencies.  You use it by running `./setup_build_env.sh`.  By default, it will create
a build directory `build/`.  You can override the build directory by setting the BUILD_DIR
env variable to another location.
+`setup_build_env.sh` tries to automate setting up a build environment for you
+with third party dependencies.  You use it by running `source
+setup_build_env.sh`.  By default, it will create a build directory `build/`.
+You can override the build directory by setting the BUILD_DIR env variable to
+another location.
 
 Also feel free to take a look at our [.travis.yml](.travis.yml) to see how that build env
is set up.
 
@@ -42,6 +46,16 @@ with `make`, you can run the test suite by running
 ctest
 ```
 
+The test suite relies on an environment variable `PARQUET_TEST_DATA` pointing
+to the `data` directory in the source checkout, for example:
+
+```
+export PARQUET_TEST_DATA=`pwd`/data
+```
+
+If you run `source setup_build_env.sh` it will set this variable automatically,
+but you may also wish to put it in your `.bashrc` or somewhere else.
+
 See `ctest --help` for configuration details about ctest. On GNU/Linux systems,
 you can use valgrind with ctest to look for memory leaks:
 

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/9a1fd892/example/example_util.cc
----------------------------------------------------------------------
diff --git a/example/example_util.cc b/example/example_util.cc
index 412e6f5..07d8129 100644
--- a/example/example_util.cc
+++ b/example/example_util.cc
@@ -18,6 +18,8 @@
 #include <stdio.h>
 #include <string.h>
 
+#include "parquet/thrift/util.h"
+
 using namespace parquet;
 using namespace parquet_cpp;
 using namespace std;

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/9a1fd892/setup_build_env.sh
----------------------------------------------------------------------
diff --git a/setup_build_env.sh b/setup_build_env.sh
index 4b496d9..1cd7bb2 100755
--- a/setup_build_env.sh
+++ b/setup_build_env.sh
@@ -19,6 +19,8 @@ fi
 
 export GTEST_HOME=$BUILD_DIR/thirdparty/$GTEST_BASEDIR
 
+export PARQUET_TEST_DATA=$SOURCE_DIR/data
+
 cmake $SOURCE_DIR
 
 cd $SOURCE_DIR

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/9a1fd892/src/parquet.cc
----------------------------------------------------------------------
diff --git a/src/parquet.cc b/src/parquet.cc
index f71d32b..5a0f8f4 100644
--- a/src/parquet.cc
+++ b/src/parquet.cc
@@ -13,8 +13,6 @@
 // limitations under the License.
 
 #include "parquet/parquet.h"
-#include "parquet/encodings/encodings.h"
-#include "parquet/compression/codec.h"
 
 #include <algorithm>
 #include <string>
@@ -22,6 +20,10 @@
 
 #include <thrift/protocol/TDebugProtocol.h>
 
+#include "parquet/encodings/encodings.h"
+#include "parquet/compression/codec.h"
+#include "parquet/thrift/util.h"
+
 const int DATA_PAGE_SIZE = 64 * 1024;
 
 namespace parquet_cpp {

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/9a1fd892/src/parquet/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/parquet/CMakeLists.txt b/src/parquet/CMakeLists.txt
index f35af70..f08901e 100644
--- a/src/parquet/CMakeLists.txt
+++ b/src/parquet/CMakeLists.txt
@@ -18,6 +18,8 @@
 # Headers: top level
 install(FILES
   parquet.h
+  reader.h
+  exception.h
   DESTINATION include/parquet)
 
 ADD_PARQUET_TEST(reader-test)

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/9a1fd892/src/parquet/exception.h
----------------------------------------------------------------------
diff --git a/src/parquet/exception.h b/src/parquet/exception.h
new file mode 100644
index 0000000..7d94031
--- /dev/null
+++ b/src/parquet/exception.h
@@ -0,0 +1,49 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef PARQUET_EXCEPTION_H
+#define PARQUET_EXCEPTION_H
+
+#include <exception>
+#include <sstream>
+#include <string>
+
+namespace parquet_cpp {
+
+class ParquetException : public std::exception {
+ public:
+  static void EofException() { throw ParquetException("Unexpected end of stream."); }
+  static void NYI(const std::string& msg) {
+    std::stringstream ss;
+    ss << "Not yet implemented: " << msg << ".";
+    throw ParquetException(ss.str());
+  }
+
+  explicit ParquetException(const char* msg) : msg_(msg) {}
+  explicit ParquetException(const std::string& msg) : msg_(msg) {}
+  explicit ParquetException(const char* msg, exception& e) : msg_(msg) {}
+
+  virtual ~ParquetException() throw() {}
+  virtual const char* what() const throw() { return msg_.c_str(); }
+
+ private:
+  std::string msg_;
+};
+
+} // namespace parquet_cpp
+
+#endif // PARQUET_EXCEPTION_H

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/9a1fd892/src/parquet/parquet.h
----------------------------------------------------------------------
diff --git a/src/parquet/parquet.h b/src/parquet/parquet.h
index 320f003..a1af6b7 100644
--- a/src/parquet/parquet.h
+++ b/src/parquet/parquet.h
@@ -1,45 +1,36 @@
-// Copyright 2012 Cloudera Inc.
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
 //
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
+//   http://www.apache.org/licenses/LICENSE-2.0
 //
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
 
 #ifndef PARQUET_PARQUET_H
 #define PARQUET_PARQUET_H
 
 #include <exception>
-#include <sstream>
 #include <cstdint>
+#include <cstring>
 #include <memory>
 #include <string>
 #include <unordered_map>
 #include <vector>
 
-// Needed for thrift
-#include <boost/shared_ptr.hpp>
-
+#include "parquet/exception.h"
 #include "parquet/thrift/parquet_constants.h"
 #include "parquet/thrift/parquet_types.h"
 #include "parquet/util/rle-encoding.h"
 
-// TCompactProtocol requires some #defines to work right.
-#define SIGNED_RIGHT_SHIFT_IS 1
-#define ARITHMETIC_RIGHT_SHIFT 1
-#include <thrift/protocol/TCompactProtocol.h>
-#include <thrift/protocol/TDebugProtocol.h>
-#include <thrift/TApplicationException.h>
-
-#include <thrift/protocol/TBinaryProtocol.h>
-#include <thrift/transport/TBufferTransports.h>
-
 namespace std {
 
 template <>
@@ -61,26 +52,6 @@ struct ByteArray {
   const uint8_t* ptr;
 };
 
-class ParquetException : public std::exception {
- public:
-  static void EofException() { throw ParquetException("Unexpected end of stream."); }
-  static void NYI(const std::string& msg) {
-    std::stringstream ss;
-    ss << "Not yet implemented: " << msg << ".";
-    throw ParquetException(ss.str());
-  }
-
-  explicit ParquetException(const char* msg) : msg_(msg) {}
-  explicit ParquetException(const std::string& msg) : msg_(msg) {}
-  explicit ParquetException(const char* msg, exception& e) : msg_(msg) {}
-
-  virtual ~ParquetException() throw() {}
-  virtual const char* what() const throw() { return msg_.c_str(); }
-
- private:
-  std::string msg_;
-};
-
 // Interface for the column reader to get the bytes. The interface is a stream
 // interface, meaning the bytes in order and once a byte is read, it does not
 // need to be read again.
@@ -235,27 +206,6 @@ inline bool ColumnReader::ReadDefinitionRepetitionLevels(int* def_level,
int* re
   return *def_level == 0;
 }
 
-// Deserialize a thrift message from buf/len.  buf/len must at least contain
-// all the bytes needed to store the thrift message.  On return, len will be
-// set to the actual length of the header.
-template <class T>
-inline void DeserializeThriftMsg(const uint8_t* buf, uint32_t* len, T* deserialized_msg)
{
-  // Deserialize msg bytes into c++ thrift msg using memory transport.
-  boost::shared_ptr<apache::thrift::transport::TMemoryBuffer> tmem_transport(
-      new apache::thrift::transport::TMemoryBuffer(const_cast<uint8_t*>(buf), *len));
-  apache::thrift::protocol::TCompactProtocolFactoryT<
-      apache::thrift::transport::TMemoryBuffer> tproto_factory;
-  boost::shared_ptr<apache::thrift::protocol::TProtocol> tproto =
-      tproto_factory.getProtocol(tmem_transport);
-  try {
-    deserialized_msg->read(tproto.get());
-  } catch (apache::thrift::protocol::TProtocolException& e) {
-    throw ParquetException("Couldn't deserialize thrift.", e);
-  }
-  uint32_t bytes_left = tmem_transport->available_read();
-  *len = *len - bytes_left;
-}
-
 } // namespace parquet_cpp
 
 #endif

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/9a1fd892/src/parquet/reader-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/reader-test.cc b/src/parquet/reader-test.cc
index f6bf8b1..0f06f3f 100644
--- a/src/parquet/reader-test.cc
+++ b/src/parquet/reader-test.cc
@@ -15,12 +15,45 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <cstdlib>
+#include <iostream>
+#include <sstream>
+#include <string>
+
 #include <gtest/gtest.h>
 
-namespace parquet {
+#include "parquet/reader.h"
+
+using std::string;
+
+namespace parquet_cpp {
+
+const char* data_dir = std::getenv("PARQUET_TEST_DATA");
+
+
+class TestAllTypesPlain : public ::testing::Test {
+ public:
+  void SetUp() {
+    std::string dir_string(data_dir);
+
+    std::stringstream ss;
+    ss << dir_string << "/" << "alltypes_plain.parquet";
+    file_.Open(ss.str());
+    reader_.Open(&file_);
+  }
+
+  void TearDown() {
+    reader_.Close();
+  }
+
+ protected:
+  LocalFile file_;
+  ParquetFileReader reader_;
+};
+
 
-TEST(TestReader, ItWorks) {
-  ASSERT_TRUE(true);
+TEST_F(TestAllTypesPlain, ParseMetaData) {
+  reader_.ParseMetaData();
 }
 
-} // namespace parquet
+} // namespace parquet_cpp

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/9a1fd892/src/parquet/reader.cc
----------------------------------------------------------------------
diff --git a/src/parquet/reader.cc b/src/parquet/reader.cc
new file mode 100644
index 0000000..7ccd98c
--- /dev/null
+++ b/src/parquet/reader.cc
@@ -0,0 +1,117 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "parquet/reader.h"
+
+#include <cstdio>
+#include <vector>
+
+#include "parquet/exception.h"
+#include "parquet/thrift/util.h"
+
+namespace parquet_cpp {
+
+// ----------------------------------------------------------------------
+// LocalFile methods
+
+LocalFile::~LocalFile() {
+  // You must explicitly call Close
+}
+
+void LocalFile::Open(const std::string& path) {
+  path_ = path;
+  file_ = fopen(path_.c_str(), "r");
+  is_open_ = true;
+}
+
+void LocalFile::Close() {
+  if (is_open_) {
+    fclose(file_);
+    is_open_ = false;
+  }
+}
+
+size_t LocalFile::Size() {
+  fseek(file_, 0L, SEEK_END);
+  return Tell();
+}
+
+void LocalFile::Seek(size_t pos) {
+  fseek(file_, pos, SEEK_SET);
+}
+
+size_t LocalFile::Tell() {
+  return ftell(file_);
+}
+
+void LocalFile::Read(size_t nbytes, uint8_t* buffer,
+    size_t* bytes_read) {
+  *bytes_read = fread(buffer, 1, nbytes, file_);
+}
+
+// ----------------------------------------------------------------------
+// ParquetFileReader
+
+// 4 byte constant + 4 byte metadata len
+static constexpr uint32_t FOOTER_SIZE = 8;
+static constexpr uint8_t PARQUET_MAGIC[4] = {'P', 'A', 'R', '1'};
+
+void ParquetFileReader::Open(FileLike* buffer) {
+  buffer_ = buffer;
+}
+
+void ParquetFileReader::Close() {
+  buffer_->Close();
+}
+
+void ParquetFileReader::ParseMetaData() {
+  size_t filesize = buffer_->Size();
+
+  if (filesize < FOOTER_SIZE) {
+    throw ParquetException("Corrupted file, smaller than file footer");
+  }
+
+  size_t bytes_read;
+  uint8_t footer_buffer[FOOTER_SIZE];
+
+  buffer_->Seek(filesize - FOOTER_SIZE);
+  buffer_->Read(FOOTER_SIZE, footer_buffer, &bytes_read);
+
+  if (bytes_read != FOOTER_SIZE) {
+    throw ParquetException("Invalid parquet file. Corrupt footer.");
+  }
+  if (memcmp(footer_buffer + 4, PARQUET_MAGIC, 4) != 0) {
+    throw ParquetException("Invalid parquet file. Corrupt footer.");
+  }
+
+  uint32_t metadata_len = *reinterpret_cast<uint32_t*>(footer_buffer);
+  size_t metadata_start = filesize - FOOTER_SIZE - metadata_len;
+  if (metadata_start < 0) {
+    throw ParquetException("Invalid parquet file. File is less than file metadata size.");
+  }
+
+  buffer_->Seek(metadata_start);
+
+  std::vector<uint8_t> metadata_buffer(metadata_len);
+  buffer_->Read(metadata_len, &metadata_buffer[0], &bytes_read);
+  if (bytes_read != metadata_len) {
+    throw ParquetException("Invalid parquet file. Could not read metadata bytes.");
+  }
+  DeserializeThriftMsg(&metadata_buffer[0], &metadata_len, &metadata_);
+}
+
+} // namespace parquet_cpp

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/9a1fd892/src/parquet/reader.h
----------------------------------------------------------------------
diff --git a/src/parquet/reader.h b/src/parquet/reader.h
new file mode 100644
index 0000000..4a40e04
--- /dev/null
+++ b/src/parquet/reader.h
@@ -0,0 +1,89 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef PARQUET_FILE_READER_H
+#define PARQUET_FILE_READER_H
+
+#include <cstdint>
+#include <string>
+#include <stdio.h>
+
+#include "parquet/thrift/parquet_types.h"
+#include "parquet/parquet.h"
+
+namespace parquet_cpp {
+
+class FileLike {
+ public:
+  virtual ~FileLike() {}
+
+  virtual void Close() = 0;
+  virtual size_t Size() = 0;
+  virtual size_t Tell() = 0;
+  virtual void Seek(size_t pos) = 0;
+  virtual void Read(size_t nbytes, uint8_t* out, size_t* bytes_read) = 0;
+};
+
+
+class LocalFile : public FileLike {
+ public:
+  LocalFile() : file_(nullptr), is_open_(false) {}
+  virtual ~LocalFile();
+
+  void Open(const std::string& path);
+
+  virtual void Close();
+  virtual size_t Size();
+  virtual size_t Tell();
+  virtual void Seek(size_t pos);
+  virtual void Read(size_t nbytes, uint8_t* out, size_t* bytes_read);
+
+  bool is_open() const { return is_open_;}
+  const std::string& path() const { return path_;}
+
+ private:
+  std::string path_;
+  FILE* file_;
+  bool is_open_;
+};
+
+
+class ParquetFileReader {
+ public:
+  ParquetFileReader() : buffer_(nullptr) {}
+  ~ParquetFileReader() {}
+
+  // The class takes ownership of the passed file-like object
+  void Open(FileLike* buffer);
+
+  void Close();
+
+  void ParseMetaData();
+
+  const parquet::FileMetaData& metadata() const {
+    return metadata_;
+  }
+
+ private:
+  parquet::FileMetaData metadata_;
+  FileLike* buffer_;
+};
+
+
+} // namespace parquet_cpp
+
+#endif // PARQUET_FILE_READER_H

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/9a1fd892/src/parquet/thrift/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/parquet/thrift/CMakeLists.txt b/src/parquet/thrift/CMakeLists.txt
index e2a00c9..01e685e 100644
--- a/src/parquet/thrift/CMakeLists.txt
+++ b/src/parquet/thrift/CMakeLists.txt
@@ -26,4 +26,5 @@ set_target_properties(parquet_thrift
 install(FILES
   parquet_types.h
   parquet_constants.h
+  util.h
   DESTINATION include/parquet/thrift)

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/9a1fd892/src/parquet/thrift/util.h
----------------------------------------------------------------------
diff --git a/src/parquet/thrift/util.h b/src/parquet/thrift/util.h
new file mode 100644
index 0000000..ecf24c6
--- /dev/null
+++ b/src/parquet/thrift/util.h
@@ -0,0 +1,46 @@
+#ifndef PARQUET_THRIFT_UTIL_H
+#define PARQUET_THRIFT_UTIL_H
+
+#include <cstdint>
+
+// Needed for thrift
+#include <boost/shared_ptr.hpp>
+
+// TCompactProtocol requires some #defines to work right.
+#define SIGNED_RIGHT_SHIFT_IS 1
+#define ARITHMETIC_RIGHT_SHIFT 1
+#include <thrift/protocol/TCompactProtocol.h>
+#include <thrift/protocol/TDebugProtocol.h>
+#include <thrift/TApplicationException.h>
+
+#include <thrift/protocol/TBinaryProtocol.h>
+#include <thrift/transport/TBufferTransports.h>
+
+#include "parquet/exception.h"
+
+namespace parquet_cpp {
+
+// Deserialize a thrift message from buf/len.  buf/len must at least contain
+// all the bytes needed to store the thrift message.  On return, len will be
+// set to the actual length of the header.
+template <class T>
+inline void DeserializeThriftMsg(const uint8_t* buf, uint32_t* len, T* deserialized_msg)
{
+  // Deserialize msg bytes into c++ thrift msg using memory transport.
+  boost::shared_ptr<apache::thrift::transport::TMemoryBuffer> tmem_transport(
+      new apache::thrift::transport::TMemoryBuffer(const_cast<uint8_t*>(buf), *len));
+  apache::thrift::protocol::TCompactProtocolFactoryT<
+      apache::thrift::transport::TMemoryBuffer> tproto_factory;
+  boost::shared_ptr<apache::thrift::protocol::TProtocol> tproto =
+      tproto_factory.getProtocol(tmem_transport);
+  try {
+    deserialized_msg->read(tproto.get());
+  } catch (apache::thrift::protocol::TProtocolException& e) {
+    throw ParquetException("Couldn't deserialize thrift.", e);
+  }
+  uint32_t bytes_left = tmem_transport->available_read();
+  *len = *len - bytes_left;
+}
+
+} // namespace parquet_cpp
+
+#endif // PARQUET_THRIFT_UTIL_H

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/9a1fd892/src/parquet/util/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/parquet/util/CMakeLists.txt b/src/parquet/util/CMakeLists.txt
index 1b712f7..766214b 100644
--- a/src/parquet/util/CMakeLists.txt
+++ b/src/parquet/util/CMakeLists.txt
@@ -28,14 +28,14 @@ add_library(parquet_test_main
 
 if (APPLE)
   target_link_libraries(parquet_test_main
-	gtest
-	dl)
+    gtest
+    dl)
   set_target_properties(parquet_test_main
-		PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
+        PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
 else()
   target_link_libraries(parquet_test_main
     dl
-	gtest
+    gtest
     pthread
   )
 endif()


Mime
View raw message