arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject arrow git commit: ARROW-1002: [C++] Fix inconsistency with padding at start of IPC file format
Date Tue, 16 May 2017 01:40:42 GMT
Repository: arrow
Updated Branches:
  refs/heads/master 47e289a9a -> ce0bb5338


ARROW-1002: [C++] Fix inconsistency with padding at start of IPC file format

cc @TheNeuralBit -- the 64-byte padding in the C++ file writer was incorrect (http://arrow.apache.org/docs/ipc.html
indicate padding to an 8-byte boundary), so this fixes that.

Author: Wes McKinney <wes.mckinney@twosigma.com>

Closes #693 from wesm/ARROW-1002 and squashes the following commits:

35c023f [Wes McKinney] Fix C++ inconsistency with padding at start of IPC file format


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/ce0bb533
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/ce0bb533
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/ce0bb533

Branch: refs/heads/master
Commit: ce0bb5338496785e6c46d9832b75105883bef5de
Parents: 47e289a
Author: Wes McKinney <wes.mckinney@twosigma.com>
Authored: Mon May 15 21:40:36 2017 -0400
Committer: Wes McKinney <wes.mckinney@twosigma.com>
Committed: Mon May 15 21:40:36 2017 -0400

----------------------------------------------------------------------
 cpp/src/arrow/ipc/writer.cc | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/ce0bb533/cpp/src/arrow/ipc/writer.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/writer.cc b/cpp/src/arrow/ipc/writer.cc
index ced0710..4f5edf2 100644
--- a/cpp/src/arrow/ipc/writer.cc
+++ b/cpp/src/arrow/ipc/writer.cc
@@ -509,7 +509,7 @@ class DictionaryWriter : public RecordBatchSerializer {
 };
 
 // Adds padding bytes if necessary to ensure all memory blocks are written on
-// 8-byte boundaries.
+// 64-byte boundaries.
 Status AlignStreamPosition(io::OutputStream* stream) {
   int64_t position;
   RETURN_NOT_OK(stream->Tell(&position));
@@ -687,9 +687,9 @@ class RecordBatchStreamWriter::RecordBatchStreamWriterImpl {
   }
 
   // Adds padding bytes if necessary to ensure all memory blocks are written on
-  // 8-byte boundaries.
-  Status Align() {
-    int64_t remainder = PaddedLength(position_) - position_;
+  // 64-byte (or other alignment) boundaries.
+  Status Align(int64_t alignment = kArrowAlignment) {
+    int64_t remainder = PaddedLength(position_, alignment) - position_;
     if (remainder > 0) { return Write(kPaddingBytes, remainder); }
     return Status::OK();
   }
@@ -701,12 +701,6 @@ class RecordBatchStreamWriter::RecordBatchStreamWriterImpl {
     return Status::OK();
   }
 
-  // Write and align
-  Status WriteAligned(const uint8_t* data, int64_t nbytes) {
-    RETURN_NOT_OK(Write(data, nbytes));
-    return Align();
-  }
-
   void set_memory_pool(MemoryPool* pool) { pool_ = pool; }
 
  protected:
@@ -762,8 +756,10 @@ class RecordBatchFileWriter::RecordBatchFileWriterImpl
   using BASE = RecordBatchStreamWriter::RecordBatchStreamWriterImpl;
 
   Status Start() override {
-    RETURN_NOT_OK(WriteAligned(
-        reinterpret_cast<const uint8_t*>(kArrowMagicBytes), strlen(kArrowMagicBytes)));
+    // It is only necessary to align to 8-byte boundary at the start of the file
+    RETURN_NOT_OK(Write(reinterpret_cast<const uint8_t*>(kArrowMagicBytes),
+            strlen(kArrowMagicBytes)));
+    RETURN_NOT_OK(Align(8));
 
     // We write the schema at the start of the file (and the end). This also
     // writes all the dictionaries at the beginning of the file


Mime
View raw message