arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From u..@apache.org
Subject arrow git commit: ARROW-1226: [C++] Docs cleaning in arrow/ipc. Doxyfile fixes, move ipc/metadata-internal.h symbols to internal NS
Date Fri, 06 Oct 2017 09:35:49 GMT
Repository: arrow
Updated Branches:
  refs/heads/master 898f5e273 -> 0a4c5b17f


ARROW-1226: [C++] Docs cleaning in arrow/ipc. Doxyfile fixes, move ipc/metadata-internal.h
symbols to internal NS

There were also some compiler warnings with clang-4.0 that I fixed here.

Author: Wes McKinney <wes.mckinney@twosigma.com>

Closes #1179 from wesm/ARROW-1226 and squashes the following commits:

b6e7131 [Wes McKinney] Fix documentation warnings
7b01563 [Wes McKinney] Clean up doxygen docs in arrow/ipc. Move metadata-internal.h symbols
to internal namespace. Fix Doxyfile so that full module paths show in generated files, like
arrow/ipc/reader.h


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/0a4c5b17
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/0a4c5b17
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/0a4c5b17

Branch: refs/heads/master
Commit: 0a4c5b17fb1e6268f6fa9d56f9644ce24aaef064
Parents: 898f5e2
Author: Wes McKinney <wes.mckinney@twosigma.com>
Authored: Fri Oct 6 11:35:43 2017 +0200
Committer: Uwe L. Korn <uwelk@xhochy.com>
Committed: Fri Oct 6 11:35:43 2017 +0200

----------------------------------------------------------------------
 cpp/apidoc/Doxyfile                    |  2 +-
 cpp/cmake_modules/SetupCxxFlags.cmake  |  2 +-
 cpp/src/arrow/ipc/dictionary.h         | 14 ++++--
 cpp/src/arrow/ipc/feather.h            | 44 ++++++++++++++++--
 cpp/src/arrow/ipc/json.h               | 39 ++++++++++++++--
 cpp/src/arrow/ipc/message.cc           |  4 +-
 cpp/src/arrow/ipc/message.h            |  5 ++
 cpp/src/arrow/ipc/metadata-internal.cc |  2 +
 cpp/src/arrow/ipc/metadata-internal.h  |  2 +
 cpp/src/arrow/ipc/reader.cc            | 15 +++---
 cpp/src/arrow/ipc/reader.h             | 57 +++++++++++++++++------
 cpp/src/arrow/ipc/writer.cc            | 17 ++++---
 cpp/src/arrow/ipc/writer.h             | 71 +++++++++++++++++++++--------
 cpp/src/arrow/python/helpers.cc        |  3 +-
 cpp/src/plasma/CMakeLists.txt          |  5 +-
 15 files changed, 214 insertions(+), 68 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/0a4c5b17/cpp/apidoc/Doxyfile
----------------------------------------------------------------------
diff --git a/cpp/apidoc/Doxyfile b/cpp/apidoc/Doxyfile
index fadb6d5..d5fd1ac 100644
--- a/cpp/apidoc/Doxyfile
+++ b/cpp/apidoc/Doxyfile
@@ -188,7 +188,7 @@ STRIP_FROM_PATH        =
 # specify the list of include paths that are normally passed to the compiler
 # using the -I flag.
 
-STRIP_FROM_INC_PATH    =
+STRIP_FROM_INC_PATH  = ../src
 
 # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but
 # less readable) file names. This can be useful is your file systems doesn't

http://git-wip-us.apache.org/repos/asf/arrow/blob/0a4c5b17/cpp/cmake_modules/SetupCxxFlags.cmake
----------------------------------------------------------------------
diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake
index 77bfac8..4b1950f 100644
--- a/cpp/cmake_modules/SetupCxxFlags.cmake
+++ b/cpp/cmake_modules/SetupCxxFlags.cmake
@@ -71,7 +71,7 @@ if ("${UPPERCASE_BUILD_WARNING_LEVEL}" STREQUAL "CHECKIN")
   elseif ("${COMPILER_FAMILY}" STREQUAL "clang")
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Weverything -Wno-c++98-compat \
 -Wno-c++98-compat-pedantic -Wno-deprecated -Wno-weak-vtables -Wno-padded \
--Wno-unused-parameter -Wno-undef \
+-Wno-comma -Wno-unused-parameter -Wno-undef \
 -Wno-shadow -Wno-switch-enum -Wno-exit-time-destructors \
 -Wno-global-constructors -Wno-weak-template-vtables -Wno-undefined-reinterpret-cast \
 -Wno-implicit-fallthrough -Wno-unreachable-code-return \

http://git-wip-us.apache.org/repos/asf/arrow/blob/0a4c5b17/cpp/src/arrow/ipc/dictionary.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/dictionary.h b/cpp/src/arrow/ipc/dictionary.h
index 4130e9b..4494b13 100644
--- a/cpp/src/arrow/ipc/dictionary.h
+++ b/cpp/src/arrow/ipc/dictionary.h
@@ -38,26 +38,30 @@ namespace ipc {
 using DictionaryMap = std::unordered_map<int64_t, std::shared_ptr<Array>>;
 using DictionaryTypeMap = std::unordered_map<int64_t, std::shared_ptr<Field>>;
 
-// Memoization data structure for handling shared dictionaries
+/// \brief Memoization data structure for handling shared dictionaries
 class ARROW_EXPORT DictionaryMemo {
  public:
   DictionaryMemo();
 
-  // Returns KeyError if dictionary not found
+  /// \brief Returns KeyError if dictionary not found
   Status GetDictionary(int64_t id, std::shared_ptr<Array>* dictionary) const;
 
-  /// Return id for dictionary, computing new id if necessary
+  /// \brief Return id for dictionary, computing new id if necessary
   int64_t GetId(const std::shared_ptr<Array>& dictionary);
 
+  /// \brief Return true if dictionary array object is in this memo
   bool HasDictionary(const std::shared_ptr<Array>& dictionary) const;
+
+  /// \brief Return true if we have a dictionary for the input id
   bool HasDictionaryId(int64_t id) const;
 
-  // Add a dictionary to the memo with a particular id. Returns KeyError if
-  // that dictionary already exists
+  /// \brief Add a dictionary to the memo with a particular id. Returns
+  /// KeyError if that dictionary already exists
   Status AddDictionary(int64_t id, const std::shared_ptr<Array>& dictionary);
 
   const DictionaryMap& id_to_dictionary() const { return id_to_dictionary_; }
 
+  /// \brief The number of dictionaries stored in the memo
   int size() const { return static_cast<int>(id_to_dictionary_.size()); }
 
  private:

http://git-wip-us.apache.org/repos/asf/arrow/blob/0a4c5b17/cpp/src/arrow/ipc/feather.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/feather.h b/cpp/src/arrow/ipc/feather.h
index 83cd151..7c884e9 100644
--- a/cpp/src/arrow/ipc/feather.h
+++ b/cpp/src/arrow/ipc/feather.h
@@ -48,28 +48,47 @@ static constexpr const int kFeatherVersion = 2;
 // ----------------------------------------------------------------------
 // Metadata accessor classes
 
+/// \class TableReader
+/// \brief An interface for reading columns from Feather files
 class ARROW_EXPORT TableReader {
  public:
   TableReader();
   ~TableReader();
 
+  /// \brief Open a Feather file from a RandomAccessFile interface
+  ///
+  /// \param[in] source a RandomAccessFile instance
+  /// \param[out] out the table reader
   static Status Open(const std::shared_ptr<io::RandomAccessFile>& source,
                      std::unique_ptr<TableReader>* out);
 
-  // Optional table description
-  //
-  // This does not return a const std::string& because a string has to be
-  // copied from the flatbuffer to be able to return a non-flatbuffer type
+  /// \brief Optional table description
+  ///
+  /// This does not return a const std::string& because a string has to be
+  /// copied from the flatbuffer to be able to return a non-flatbuffer type
   std::string GetDescription() const;
+
+  /// \brief Return true if the table has a description field populated
   bool HasDescription() const;
 
+  /// \brief Return the version number of the Feather file
   int version() const;
 
+  /// \brief Return the number of rows in the file
   int64_t num_rows() const;
+
+  /// \brief Return the number of columns in the file
   int64_t num_columns() const;
 
   std::string GetColumnName(int i) const;
 
+  /// \brief Read a column from the file as an arrow::Column.
+  ///
+  /// \param[in] i the column index to read
+  /// \param[out] out the returned column
+  /// \return Status
+  ///
+  /// This function is zero-copy if the file source supports zero-copy reads
   Status GetColumn(int i, std::shared_ptr<Column>* out);
 
  private:
@@ -77,19 +96,34 @@ class ARROW_EXPORT TableReader {
   std::unique_ptr<TableReaderImpl> impl_;
 };
 
+/// \class TableWriter
+/// \brief Interface for writing Feather files
 class ARROW_EXPORT TableWriter {
  public:
   ~TableWriter();
 
+  /// \brief Create a new TableWriter that writes to an OutputStream
+  /// \param[in] stream an output stream
+  /// \param[out] out the returned table writer
+  /// \return Status
   static Status Open(const std::shared_ptr<io::OutputStream>& stream,
                      std::unique_ptr<TableWriter>* out);
 
+  /// \brief Set the description field in the file metadata
   void SetDescription(const std::string& desc);
+
+  /// \brief Set the number of rows in the file
   void SetNumRows(int64_t num_rows);
 
+  /// \brief Append a column to the file
+  ///
+  /// \param[in] name the column name
+  /// \param[in] values the column values as a contiguous arrow::Array
+  /// \return Status
   Status Append(const std::string& name, const Array& values);
 
-  // We are done, write the file metadata and footer
+  /// \brief Finalize the file by writing the file metadata and footer
+  /// \return Status
   Status Finalize();
 
  private:

http://git-wip-us.apache.org/repos/asf/arrow/blob/0a4c5b17/cpp/src/arrow/ipc/json.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/json.h b/cpp/src/arrow/ipc/json.h
index 0391172..51f30f0 100644
--- a/cpp/src/arrow/ipc/json.h
+++ b/cpp/src/arrow/ipc/json.h
@@ -35,14 +35,29 @@ class Schema;
 
 namespace ipc {
 
+/// \class JsonWriter
+/// \brief Write the JSON representation of an Arrow record batch file or stream
+///
+/// This is used for integration testing
 class ARROW_EXPORT JsonWriter {
  public:
   ~JsonWriter();
 
+  /// \brief Create a new JSON writer that writes to memory
+  ///
+  /// \param[in] schema the schema of record batches
+  /// \param[out] out the returned writer object
+  /// \return Status
   static Status Open(const std::shared_ptr<Schema>& schema,
                      std::unique_ptr<JsonWriter>* out);
 
+  /// \brief Append a record batch
   Status WriteRecordBatch(const RecordBatch& batch);
+
+  /// \brief Finish the JSON payload and return as a std::string
+  ///
+  /// \param[out] result the JSON as as a std::string
+  /// \return Status
   Status Finish(std::string* result);
 
  private:
@@ -53,23 +68,41 @@ class ARROW_EXPORT JsonWriter {
   std::unique_ptr<JsonWriterImpl> impl_;
 };
 
-// TODO(wesm): Read from a file stream rather than an in-memory buffer
+/// \class JsonReader
+/// \brief Read the JSON representation of an Arrow record batch file or stream
+///
+/// This is used for integration testing
 class ARROW_EXPORT JsonReader {
  public:
   ~JsonReader();
 
+  /// \brief Create a new JSON reader
+  ///
+  /// \param[in] pool a MemoryPool to use for buffer allocations
+  /// \param[in] data a Buffer containing the JSON data
+  /// \param[out] reader the returned reader object
+  /// \return Status
   static Status Open(MemoryPool* pool, const std::shared_ptr<Buffer>& data,
                      std::unique_ptr<JsonReader>* reader);
 
-  // Use the default memory pool
+  /// \brief Create a new JSON reader that uses the default memory pool
+  ///
+  /// \param[in] data a Buffer containing the JSON data
+  /// \param[out] reader the returned reader object
+  /// \return Status
   static Status Open(const std::shared_ptr<Buffer>& data,
                      std::unique_ptr<JsonReader>* reader);
 
+  /// \brief Return the schema read from the JSON
   std::shared_ptr<Schema> schema() const;
 
+  /// \brief Return the number of record batches
   int num_record_batches() const;
 
-  // Read a record batch from the file
+  /// \brief Read a particular record batch from the file
+  ///
+  /// \param[in] i the record batch index, does not boundscheck
+  /// \param[out] batch the read record batch
   Status ReadRecordBatch(int i, std::shared_ptr<RecordBatch>* batch) const;
 
  private:

http://git-wip-us.apache.org/repos/asf/arrow/blob/0a4c5b17/cpp/src/arrow/ipc/message.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/message.cc b/cpp/src/arrow/ipc/message.cc
index 0c587ab..0dd5c72 100644
--- a/cpp/src/arrow/ipc/message.cc
+++ b/cpp/src/arrow/ipc/message.cc
@@ -44,7 +44,7 @@ class Message::MessageImpl {
     message_ = flatbuf::GetMessage(metadata_->data());
 
     // Check that the metadata version is supported
-    if (message_->version() < kMinMetadataVersion) {
+    if (message_->version() < internal::kMinMetadataVersion) {
       return Status::Invalid("Old metadata version not supported");
     }
 
@@ -166,7 +166,7 @@ Status Message::ReadFrom(const std::shared_ptr<Buffer>& metadata,
io::InputStrea
 
 Status Message::SerializeTo(io::OutputStream* file, int64_t* output_length) const {
   int32_t metadata_length = 0;
-  RETURN_NOT_OK(WriteMessage(*metadata(), file, &metadata_length));
+  RETURN_NOT_OK(internal::WriteMessage(*metadata(), file, &metadata_length));
 
   *output_length = metadata_length;
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/0a4c5b17/cpp/src/arrow/ipc/message.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/message.h b/cpp/src/arrow/ipc/message.h
index 522b3bd..67a95c7 100644
--- a/cpp/src/arrow/ipc/message.h
+++ b/cpp/src/arrow/ipc/message.h
@@ -53,6 +53,7 @@ constexpr int kMaxNestingDepth = 64;
 // individual fields metadata can be retrieved from very large schema without
 //
 
+/// \class Message
 /// \brief An IPC message including metadata and body
 class ARROW_EXPORT Message {
  public:
@@ -70,6 +71,7 @@ class ARROW_EXPORT Message {
   /// \param[in] metadata a buffer containing the Flatbuffer metadata
   /// \param[in] body a buffer containing the message body, which may be nullptr
   /// \param[out] out the created message
+  /// \return Status
   static Status Open(const std::shared_ptr<Buffer>& metadata,
                      const std::shared_ptr<Buffer>& body, std::unique_ptr<Message>*
out);
 
@@ -77,6 +79,7 @@ class ARROW_EXPORT Message {
   /// \param[in] metadata containing a serialized Message flatbuffer
   /// \param[in] stream an InputStream
   /// \param[out] out the created Message
+  /// \return Status
   ///
   /// \note If stream supports zero-copy, this is zero-copy
   static Status ReadFrom(const std::shared_ptr<Buffer>& metadata, io::InputStream*
stream,
@@ -98,8 +101,10 @@ class ARROW_EXPORT Message {
   /// \return buffer is nullptr if no body
   std::shared_ptr<Buffer> body() const;
 
+  /// \brief The Message type
   Type type() const;
 
+  /// \brief The Message metadata version
   MetadataVersion metadata_version() const;
 
   const void* header() const;

http://git-wip-us.apache.org/repos/asf/arrow/blob/0a4c5b17/cpp/src/arrow/ipc/metadata-internal.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/metadata-internal.cc b/cpp/src/arrow/ipc/metadata-internal.cc
index 8593dca..162afb9 100644
--- a/cpp/src/arrow/ipc/metadata-internal.cc
+++ b/cpp/src/arrow/ipc/metadata-internal.cc
@@ -46,6 +46,7 @@ namespace arrow {
 namespace flatbuf = org::apache::arrow::flatbuf;
 
 namespace ipc {
+namespace internal {
 
 using FBB = flatbuffers::FlatBufferBuilder;
 using DictionaryOffset = flatbuffers::Offset<flatbuf::DictionaryEncoding>;
@@ -933,5 +934,6 @@ Status WriteMessage(const Buffer& message, io::OutputStream* file,
   return Status::OK();
 }
 
+}  // namespace internal
 }  // namespace ipc
 }  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/0a4c5b17/cpp/src/arrow/ipc/metadata-internal.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/metadata-internal.h b/cpp/src/arrow/ipc/metadata-internal.h
index 83d1f13..309e758 100644
--- a/cpp/src/arrow/ipc/metadata-internal.h
+++ b/cpp/src/arrow/ipc/metadata-internal.h
@@ -45,6 +45,7 @@ class OutputStream;
 }  // namespace io
 
 namespace ipc {
+namespace internal {
 
 static constexpr flatbuf::MetadataVersion kCurrentMetadataVersion =
     flatbuf::MetadataVersion_V3;
@@ -130,6 +131,7 @@ Status WriteDictionaryMessage(const int64_t id, const int64_t length,
                               const std::vector<BufferMetadata>& buffers,
                               std::shared_ptr<Buffer>* out);
 
+}  // namespace internal
 }  // namespace ipc
 }  // namespace arrow
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/0a4c5b17/cpp/src/arrow/ipc/reader.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc
index 5fe9082..50eb903 100644
--- a/cpp/src/arrow/ipc/reader.cc
+++ b/cpp/src/arrow/ipc/reader.cc
@@ -51,6 +51,9 @@ namespace flatbuf = org::apache::arrow::flatbuf;
 
 namespace ipc {
 
+using internal::FileBlock;
+using internal::kArrowMagicBytes;
+
 // ----------------------------------------------------------------------
 // Record batch read path
 
@@ -422,7 +425,7 @@ class RecordBatchStreamReader::RecordBatchStreamReaderImpl {
     RETURN_NOT_OK(
         ReadMessageAndValidate(message_reader_.get(), Message::SCHEMA, false, &message));
 
-    RETURN_NOT_OK(GetDictionaryTypes(message->header(), &dictionary_types_));
+    RETURN_NOT_OK(internal::GetDictionaryTypes(message->header(), &dictionary_types_));
 
     // TODO(wesm): In future, we may want to reconcile the ids in the stream with
     // those found in the schema
@@ -431,7 +434,7 @@ class RecordBatchStreamReader::RecordBatchStreamReaderImpl {
       RETURN_NOT_OK(ReadNextDictionary());
     }
 
-    return GetSchema(message->header(), dictionary_memo_, &schema_);
+    return internal::GetSchema(message->header(), dictionary_memo_, &schema_);
   }
 
   Status ReadNext(std::shared_ptr<RecordBatch>* batch) {
@@ -588,7 +591,7 @@ class RecordBatchFileReader::RecordBatchFileReaderImpl {
   }
 
   Status ReadSchema() {
-    RETURN_NOT_OK(GetDictionaryTypes(footer_->schema(), &dictionary_fields_));
+    RETURN_NOT_OK(internal::GetDictionaryTypes(footer_->schema(), &dictionary_fields_));
 
     // Read all the dictionaries
     for (int i = 0; i < num_dictionaries(); ++i) {
@@ -611,7 +614,7 @@ class RecordBatchFileReader::RecordBatchFileReaderImpl {
     }
 
     // Get the schema
-    return GetSchema(footer_->schema(), *dictionary_memo_, &schema_);
+    return internal::GetSchema(footer_->schema(), *dictionary_memo_, &schema_);
   }
 
   Status Open(const std::shared_ptr<io::RandomAccessFile>& file, int64_t footer_offset)
{
@@ -732,8 +735,8 @@ Status ReadTensor(int64_t offset, io::RandomAccessFile* file,
   std::vector<int64_t> shape;
   std::vector<int64_t> strides;
   std::vector<std::string> dim_names;
-  RETURN_NOT_OK(
-      GetTensorMetadata(*message->metadata(), &type, &shape, &strides, &dim_names));
+  RETURN_NOT_OK(internal::GetTensorMetadata(*message->metadata(), &type, &shape,
&strides,
+                                            &dim_names));
   *out = std::make_shared<Tensor>(type, message->body(), shape, strides, dim_names);
   return Status::OK();
 }

http://git-wip-us.apache.org/repos/asf/arrow/blob/0a4c5b17/cpp/src/arrow/ipc/reader.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/reader.h b/cpp/src/arrow/ipc/reader.h
index 1db282f..7581fbd 100644
--- a/cpp/src/arrow/ipc/reader.h
+++ b/cpp/src/arrow/ipc/reader.h
@@ -48,6 +48,10 @@ using RecordBatchReader = ::arrow::RecordBatchReader;
 
 /// \class RecordBatchStreamReader
 /// \brief Synchronous batch stream reader that reads from io::InputStream
+///
+/// This class reads the schema (plus any dictionaries) as the first messages
+/// in the stream, followed by record batches. For more granular zero-copy
+/// reads see the ReadRecordBatch functions
 class ARROW_EXPORT RecordBatchStreamReader : public RecordBatchReader {
  public:
   virtual ~RecordBatchStreamReader();
@@ -68,11 +72,16 @@ class ARROW_EXPORT RecordBatchStreamReader : public RecordBatchReader
{
   /// \return Status
   static Status Open(io::InputStream* stream, std::shared_ptr<RecordBatchReader>* out);
 
-  /// \brief Version of Open that retains ownership of stream
+  /// \brief Open stream and retain ownership of stream object
+  /// \param[in] stream the input stream
+  /// \param[out] out the batch reader
+  /// \return Status
   static Status Open(const std::shared_ptr<io::InputStream>& stream,
                      std::shared_ptr<RecordBatchReader>* out);
 
+  /// \brief Returns the schema read from the stream
   std::shared_ptr<Schema> schema() const override;
+
   Status ReadNext(std::shared_ptr<RecordBatch>* batch) override;
 
  private:
@@ -88,11 +97,12 @@ class ARROW_EXPORT RecordBatchFileReader {
   ~RecordBatchFileReader();
 
   /// \brief Open a RecordBatchFileReader
-  // Open a file-like object that is assumed to be self-contained; i.e., the
-  // end of the file interface is the end of the Arrow file. Note that there
-  // can be any amount of data preceding the Arrow-formatted data, because we
-  // need only locate the end of the Arrow file stream to discover the metadata
-  // and then proceed to read the data into memory.
+  ///
+  /// Open a file-like object that is assumed to be self-contained; i.e., the
+  /// end of the file interface is the end of the Arrow file. Note that there
+  /// can be any amount of data preceding the Arrow-formatted data, because we
+  /// need only locate the end of the Arrow file stream to discover the metadata
+  /// and then proceed to read the data into memory.
   static Status Open(io::RandomAccessFile* file,
                      std::shared_ptr<RecordBatchFileReader>* reader);
 
@@ -102,31 +112,42 @@ class ARROW_EXPORT RecordBatchFileReader {
   /// metadata footer). The metadata must have been written with memory offsets
   /// relative to the start of the containing file
   ///
-  /// @param file the data source
-  /// @param footer_offset the position of the end of the Arrow "file"
+  /// \param[in] file the data source
+  /// \param[in] footer_offset the position of the end of the Arrow file
+  /// \param[out] reader the returned reader
+  /// \return Status
   static Status Open(io::RandomAccessFile* file, int64_t footer_offset,
                      std::shared_ptr<RecordBatchFileReader>* reader);
 
   /// \brief Version of Open that retains ownership of file
+  ///
+  /// \param[in] file the data source
+  /// \param[out] reader the returned reader
+  /// \return Status
   static Status Open(const std::shared_ptr<io::RandomAccessFile>& file,
                      std::shared_ptr<RecordBatchFileReader>* reader);
 
   /// \brief Version of Open that retains ownership of file
+  ///
+  /// \param[in] file the data source
+  /// \param[in] footer_offset the position of the end of the Arrow file
+  /// \param[out] reader the returned reader
+  /// \return Status
   static Status Open(const std::shared_ptr<io::RandomAccessFile>& file,
                      int64_t footer_offset,
                      std::shared_ptr<RecordBatchFileReader>* reader);
 
-  /// The schema includes any dictionaries
+  /// \brief The schema read from the file
   std::shared_ptr<Schema> schema() const;
 
-  /// Returns number of record batches in the file
+  /// \brief Returns the number of record batches in the file
   int num_record_batches() const;
 
-  /// Returns MetadataVersion in the file metadata
+  /// \brief Return the metadata version from the file metadata
   MetadataVersion version() const;
 
-  /// Read a record batch from the file. Does not copy memory if the input
-  /// source supports zero-copy.
+  /// \brief Read a particular record batch from the file. Does not copy memory
+  /// if the input source supports zero-copy.
   ///
   /// \param[in] i the index of the record batch to return
   /// \param[out] batch the read batch
@@ -142,10 +163,12 @@ class ARROW_EXPORT RecordBatchFileReader {
 
 // Generic read functions; does not copy data if the input supports zero copy reads
 
-/// \brief Read Schema from stream serialized as a sequence of IPC messages
+/// \brief Read Schema from stream serialized as a sequence of one or more IPC
+/// messages
 ///
 /// \param[in] stream an InputStream
 /// \param[out] out the output Schema
+/// \return Status
 ///
 /// If record batches follow the schema, it is better to use
 /// RecordBatchStreamReader
@@ -158,6 +181,7 @@ Status ReadSchema(io::InputStream* stream, std::shared_ptr<Schema>*
out);
 /// \param[in] schema the record batch schema
 /// \param[in] stream the file where the batch is located
 /// \param[out] out the read record batch
+/// \return Status
 ARROW_EXPORT
 Status ReadRecordBatch(const std::shared_ptr<Schema>& schema, io::InputStream*
stream,
                        std::shared_ptr<RecordBatch>* out);
@@ -168,11 +192,12 @@ Status ReadRecordBatch(const std::shared_ptr<Schema>& schema,
io::InputStream* s
 /// \param[in] schema the record batch schema
 /// \param[in] file a random access file
 /// \param[out] out the read record batch
+/// \return Status
 ARROW_EXPORT
 Status ReadRecordBatch(const Buffer& metadata, const std::shared_ptr<Schema>&
schema,
                        io::RandomAccessFile* file, std::shared_ptr<RecordBatch>* out);
 
-/// \brief Read record batch from fully encapulated Message
+/// \brief Read record batch from encapulated Message
 ///
 /// \param[in] message a message instance containing metadata and body
 /// \param[in] schema the record batch schema
@@ -189,6 +214,7 @@ Status ReadRecordBatch(const Message& message, const std::shared_ptr<Schema>&
sc
 /// \param[in] file a random access file
 /// \param[in] max_recursion_depth the maximum permitted nesting depth
 /// \param[out] out the read record batch
+/// \return Status
 ARROW_EXPORT
 Status ReadRecordBatch(const Buffer& metadata, const std::shared_ptr<Schema>&
schema,
                        int max_recursion_depth, io::RandomAccessFile* file,
@@ -199,6 +225,7 @@ Status ReadRecordBatch(const Buffer& metadata, const std::shared_ptr<Schema>&
sc
 /// \param[in] offset the file location of the start of the message
 /// \param[in] file the file where the batch is located
 /// \param[out] out the read tensor
+/// \return Status
 ARROW_EXPORT
 Status ReadTensor(int64_t offset, io::RandomAccessFile* file,
                   std::shared_ptr<Tensor>* out);

http://git-wip-us.apache.org/repos/asf/arrow/blob/0a4c5b17/cpp/src/arrow/ipc/writer.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/writer.cc b/cpp/src/arrow/ipc/writer.cc
index c321614..279a695 100644
--- a/cpp/src/arrow/ipc/writer.cc
+++ b/cpp/src/arrow/ipc/writer.cc
@@ -42,6 +42,9 @@
 namespace arrow {
 namespace ipc {
 
+using internal::FileBlock;
+using internal::kArrowMagicBytes;
+
 // ----------------------------------------------------------------------
 // Record batch write path
 
@@ -201,7 +204,7 @@ class RecordBatchSerializer : public ArrayVisitor {
     // itself as an int32_t.
     std::shared_ptr<Buffer> metadata_fb;
     RETURN_NOT_OK(WriteMetadataMessage(batch.num_rows(), *body_length, &metadata_fb));
-    RETURN_NOT_OK(WriteMessage(*metadata_fb, dst, metadata_length));
+    RETURN_NOT_OK(internal::WriteMessage(*metadata_fb, dst, metadata_length));
 
 #ifndef NDEBUG
     RETURN_NOT_OK(dst->Tell(&current_position));
@@ -491,8 +494,8 @@ class RecordBatchSerializer : public ArrayVisitor {
   // In some cases, intermediate buffers may need to be allocated (with sliced arrays)
   MemoryPool* pool_;
 
-  std::vector<FieldMetadata> field_nodes_;
-  std::vector<BufferMetadata> buffer_meta_;
+  std::vector<internal::FieldMetadata> field_nodes_;
+  std::vector<internal::BufferMetadata> buffer_meta_;
   std::vector<std::shared_ptr<Buffer>> buffers_;
 
   int64_t max_recursion_depth_;
@@ -593,8 +596,8 @@ Status WriteTensorHeader(const Tensor& tensor, io::OutputStream* dst,
                          int32_t* metadata_length, int64_t* body_length) {
   RETURN_NOT_OK(AlignStreamPosition(dst));
   std::shared_ptr<Buffer> metadata;
-  RETURN_NOT_OK(WriteTensorMessage(tensor, 0, &metadata));
-  return WriteMessage(*metadata, dst, metadata_length);
+  RETURN_NOT_OK(internal::WriteTensorMessage(tensor, 0, &metadata));
+  return internal::WriteMessage(*metadata, dst, metadata_length);
 }
 
 Status WriteTensor(const Tensor& tensor, io::OutputStream* dst, int32_t* metadata_length,
@@ -715,10 +718,10 @@ class SchemaWriter : public StreamBookKeeper {
 
   Status WriteSchema() {
     std::shared_ptr<Buffer> schema_fb;
-    RETURN_NOT_OK(WriteSchemaMessage(schema_, dictionary_memo_, &schema_fb));
+    RETURN_NOT_OK(internal::WriteSchemaMessage(schema_, dictionary_memo_, &schema_fb));
 
     int32_t metadata_length = 0;
-    RETURN_NOT_OK(WriteMessage(*schema_fb, sink_, &metadata_length));
+    RETURN_NOT_OK(internal::WriteMessage(*schema_fb, sink_, &metadata_length));
     RETURN_NOT_OK(UpdatePosition());
     DCHECK_EQ(0, position_ % 8) << "WriteSchema did not perform an aligned write";
     return Status::OK();

http://git-wip-us.apache.org/repos/asf/arrow/blob/0a4c5b17/cpp/src/arrow/ipc/writer.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/writer.h b/cpp/src/arrow/ipc/writer.h
index 585373b..cedac45 100644
--- a/cpp/src/arrow/ipc/writer.h
+++ b/cpp/src/arrow/ipc/writer.h
@@ -94,8 +94,17 @@ class ARROW_EXPORT RecordBatchStreamWriter : public RecordBatchWriter {
   static Status Open(io::OutputStream* sink, const std::shared_ptr<Schema>& schema,
                      std::shared_ptr<RecordBatchWriter>* out);
 
+  /// \brief Write a record batch to the stream
+  ///
+  /// \param[in] batch the record batch to write
+  /// \param[in] allow_64bit allow array lengths over INT32_MAX - 1
+  /// \return Status
   Status WriteRecordBatch(const RecordBatch& batch, bool allow_64bit = false) override;
+
+  /// \brief Close the stream by writing a 4-byte int32 0 EOS market
+  /// \return Status
   Status Close() override;
+
   void set_memory_pool(MemoryPool* pool) override;
 
  protected:
@@ -122,7 +131,15 @@ class ARROW_EXPORT RecordBatchFileWriter : public RecordBatchStreamWriter
{
   static Status Open(io::OutputStream* sink, const std::shared_ptr<Schema>& schema,
                      std::shared_ptr<RecordBatchWriter>* out);
 
+  /// \brief Write a record batch to the file
+  ///
+  /// \param[in] batch the record batch to write
+  /// \param[in] allow_64bit allow array lengths over INT32_MAX - 1
+  /// \return Status
   Status WriteRecordBatch(const RecordBatch& batch, bool allow_64bit = false) override;
+
+  /// \brief Close the file stream by writing the file footer and magic number
+  /// \return Status
   Status Close() override;
 
  private:
@@ -131,6 +148,21 @@ class ARROW_EXPORT RecordBatchFileWriter : public RecordBatchStreamWriter
{
   std::unique_ptr<RecordBatchFileWriterImpl> impl_;
 };
 
+/// \brief Low-level API for writing a record batch (without schema) to an OutputStream
+///
+/// \param[in] batch the record batch to write
+/// \param[in] buffer_start_offset the start offset to use in the buffer metadata,
+/// generally should be 0
+/// \param[in] dst an OutputStream
+/// \param[out] metadata_length the size of the length-prefixed flatbuffer
+/// including padding to a 64-byte boundary
+/// \param[out] body_length the size of the contiguous buffer block plus
+/// \param[in] max_recursion_depth the maximum permitted nesting schema depth
+/// \param[in] allow_64bit permit field lengths exceeding INT32_MAX. May not be
+/// readable by other Arrow implementations
+/// padding bytes
+/// \return Status
+///
 /// Write the RecordBatch (collection of equal-length Arrow arrays) to the
 /// output stream in a contiguous block. The record batch metadata is written as
 /// a flatbuffer (see format/Message.fbs -- the RecordBatch message type)
@@ -142,18 +174,6 @@ class ARROW_EXPORT RecordBatchFileWriter : public RecordBatchStreamWriter
{
 /// Finally, the absolute offsets (relative to the start of the output stream)
 /// to the end of the body and end of the metadata / data header (suffixed by
 /// the header size) is returned in out-variables
-///
-/// \param[in] buffer_start_offset the start offset to use in the buffer metadata,
-/// default should be 0
-/// \param[in] allow_64bit permit field lengths exceeding INT32_MAX. May not be
-/// readable by other Arrow implementations
-/// \param[out] metadata_length the size of the length-prefixed flatbuffer
-/// including padding to a 64-byte boundary
-/// \param[out] body_length the size of the contiguous buffer block plus
-/// padding bytes
-/// \return Status
-///
-/// Low-level API
 ARROW_EXPORT
 Status WriteRecordBatch(const RecordBatch& batch, int64_t buffer_start_offset,
                         io::OutputStream* dst, int32_t* metadata_length,
@@ -174,6 +194,7 @@ Status SerializeRecordBatch(const RecordBatch& batch, MemoryPool*
pool,
 /// \brief Write record batch to OutputStream
 ///
 /// \param[in] batch the record batch to write
+/// \param[in] pool a MemoryPool to use for temporary allocations, if needed
 /// \param[in] out the OutputStream to write the output to
 /// \return Status
 ///
@@ -194,7 +215,7 @@ ARROW_EXPORT
 Status SerializeSchema(const Schema& schema, MemoryPool* pool,
                        std::shared_ptr<Buffer>* out);
 
-/// \brief Write multiple record batches to OutputStream
+/// \brief Write multiple record batches to OutputStream, including schema
 /// \param[in] batches a vector of batches. Must all have same schema
 /// \param[out] dst an OutputStream
 /// \return Status
@@ -202,18 +223,30 @@ ARROW_EXPORT
 Status WriteRecordBatchStream(const std::vector<std::shared_ptr<RecordBatch>>&
batches,
                               io::OutputStream* dst);
 
-// Compute the precise number of bytes needed in a contiguous memory segment to
-// write the record batch. This involves generating the complete serialized
-// Flatbuffers metadata.
+/// \brief Compute the number of bytes needed to write a record batch including metadata
+///
+/// \param[in] batch the record batch to write
+/// \param[out] size the size of the complete encapsulated message
+/// \return Status
 ARROW_EXPORT
 Status GetRecordBatchSize(const RecordBatch& batch, int64_t* size);
 
-// Compute the precise number of bytes needed in a contiguous memory segment to
-// write the tensor including metadata, padding, and data
+/// \brief Compute the number of bytes needed to write a tensor including metadata
+///
+/// \param[in] tensor the tenseor to write
+/// \param[out] size the size of the complete encapsulated message
+/// \return Status
 ARROW_EXPORT
 Status GetTensorSize(const Tensor& tensor, int64_t* size);
 
-/// EXPERIMENTAL: Write arrow::Tensor as a contiguous message
+/// \brief EXPERIMENTAL: Write arrow::Tensor as a contiguous message
+///
+/// \param[in] tensor the Tensor to write
+/// \param[in] dst the OutputStream to write to
+/// \param[out] metadata_length the actual metadata length
+/// \param[out] body_length the acutal message body length
+/// \return Status
+///
 /// <metadata size><metadata><tensor data>
 ARROW_EXPORT
 Status WriteTensor(const Tensor& tensor, io::OutputStream* dst, int32_t* metadata_length,

http://git-wip-us.apache.org/repos/asf/arrow/blob/0a4c5b17/cpp/src/arrow/python/helpers.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/helpers.cc b/cpp/src/arrow/python/helpers.cc
index ad6a7f1..708d991 100644
--- a/cpp/src/arrow/python/helpers.cc
+++ b/cpp/src/arrow/python/helpers.cc
@@ -27,8 +27,7 @@ namespace py {
 
 #define GET_PRIMITIVE_TYPE(NAME, FACTORY) \
   case Type::NAME:                        \
-    return FACTORY();                     \
-    break;
+    return FACTORY()
 
 std::shared_ptr<DataType> GetPrimitiveType(Type::type type) {
   switch (type) {

http://git-wip-us.apache.org/repos/asf/arrow/blob/0a4c5b17/cpp/src/plasma/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/plasma/CMakeLists.txt b/cpp/src/plasma/CMakeLists.txt
index c933954..4b76f25 100644
--- a/cpp/src/plasma/CMakeLists.txt
+++ b/cpp/src/plasma/CMakeLists.txt
@@ -96,12 +96,13 @@ if ("${COMPILER_FAMILY}" STREQUAL "clang")
     PROPERTY COMPILE_FLAGS
     " -Wno-parentheses-equality \
 -Wno-shorten-64-to-32 \
--Wno-unused-macros ")
+-Wno-unused-macros")
 
   set_property(SOURCE thirdparty/xxhash.cc
     APPEND_STRING
     PROPERTY COMPILE_FLAGS
-    "-Wno-unused-macros")
+    "-Wno-unused-macros \
+-Wno-unreachable-code")
 endif()
 
 if ("${COMPILER_FAMILY}" STREQUAL "gcc")


Mime
View raw message