arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject [3/3] arrow git commit: ARROW-1154: [C++] Import miscellaneous computational utility code from parquet-cpp
Date Tue, 27 Jun 2017 12:14:06 GMT
ARROW-1154: [C++] Import miscellaneous computational utility code from parquet-cpp

I will make a corresponding PR to parquet-cpp to ensure that this code migration is complete
enough.

Author: Wes McKinney <wes.mckinney@twosigma.com>

Closes #785 from wesm/ARROW-1154 and squashes the following commits:

08b54c98 [Wes McKinney] Fix variety of compiler warnings
ddc7354b [Wes McKinney] Fixes to get PARQUET-1045 working
f5cd0259 [Wes McKinney] Import miscellaneous computational utility code from parquet-cpp


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/b0652287
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/b0652287
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/b0652287

Branch: refs/heads/master
Commit: b06522870cb212989dc619b3c50e080b05772bce
Parents: cb5f2b9
Author: Wes McKinney <wes.mckinney@twosigma.com>
Authored: Tue Jun 27 08:13:59 2017 -0400
Committer: Wes McKinney <wes.mckinney@twosigma.com>
Committed: Tue Jun 27 08:13:59 2017 -0400

----------------------------------------------------------------------
 LICENSE.txt                             |   11 +
 cpp/CMakeLists.txt                      |    5 +
 cpp/src/arrow/builder-benchmark.cc      |    8 +-
 cpp/src/arrow/compare.cc                |    4 +-
 cpp/src/arrow/ipc/reader.cc             |    4 +-
 cpp/src/arrow/status.h                  |   16 +-
 cpp/src/arrow/util/CMakeLists.txt       |    8 +
 cpp/src/arrow/util/bit-stream-utils.h   |  397 +++
 cpp/src/arrow/util/bit-util-test.cc     |  166 +-
 cpp/src/arrow/util/bit-util.h           |  329 ++-
 cpp/src/arrow/util/bpacking.h           | 3342 ++++++++++++++++++++++++++
 cpp/src/arrow/util/compiler-util.h      |   59 +
 cpp/src/arrow/util/cpu-info.cc          |  206 ++
 cpp/src/arrow/util/cpu-info.h           |   92 +
 cpp/src/arrow/util/hash-util.h          |  258 ++
 cpp/src/arrow/util/logging.h            |   12 +-
 cpp/src/arrow/util/rle-encoding-test.cc |  460 ++++
 cpp/src/arrow/util/rle-encoding.h       |  598 +++++
 cpp/src/arrow/util/sse-util.h           |  237 ++
 19 files changed, 6191 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/b0652287/LICENSE.txt
----------------------------------------------------------------------
diff --git a/LICENSE.txt b/LICENSE.txt
index 95c506f..55823cb 100644
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -330,3 +330,14 @@ Apache 2.0 License or the under the 3-clause BSD license:
   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+--------------------------------------------------------------------------------
+
+This project includes code from Daniel Lemire's FrameOfReference project.
+
+https://github.com/lemire/FrameOfReference/blob/6ccaf9e97160f9a3b299e23a8ef739e711ef0c71/src/bpacking.cpp
+
+Copyright: 2013 Daniel Lemire
+Home page: http://lemire.me/en/
+Project page: https://github.com/lemire/FrameOfReference
+License: Apache License Version 2.0 http://www.apache.org/licenses/LICENSE-2.0

http://git-wip-us.apache.org/repos/asf/arrow/blob/b0652287/cpp/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index ca34101..1d1ffbe 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -137,6 +137,10 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
     "Build the plasma object store along with Arrow"
     OFF)
 
+  option(ARROW_USE_SSE
+    "Build with SSE4 optimizations"
+    OFF)
+
   option(ARROW_ZLIB_VENDORED
     "Build our own zlib (some libz.a aren't configured for static linking)"
     ON)
@@ -650,6 +654,7 @@ set(ARROW_SRCS
 
   src/arrow/util/bit-util.cc
   src/arrow/util/compression.cc
+  src/arrow/util/cpu-info.cc
   src/arrow/util/decimal.cc
   src/arrow/util/key_value_metadata.cc
 )

http://git-wip-us.apache.org/repos/asf/arrow/blob/b0652287/cpp/src/arrow/builder-benchmark.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/builder-benchmark.cc b/cpp/src/arrow/builder-benchmark.cc
index 3c49c63..bdfba8b 100644
--- a/cpp/src/arrow/builder-benchmark.cc
+++ b/cpp/src/arrow/builder-benchmark.cc
@@ -25,10 +25,10 @@ namespace arrow {
 
 constexpr int64_t kFinalSize = 256;
 
-#define ABORT_NOT_OK(s)                                 \
-  do {                                                  \
-    ::arrow::Status _s = (s);                           \
-    if (ARROW_PREDICT_FALSE(!_s.ok())) { exit(-1); }    \
+#define ABORT_NOT_OK(s)                              \
+  do {                                               \
+    ::arrow::Status _s = (s);                        \
+    if (ARROW_PREDICT_FALSE(!_s.ok())) { exit(-1); } \
   } while (0);
 
 static void BM_BuildPrimitiveArrayNoNulls(

http://git-wip-us.apache.org/repos/asf/arrow/blob/b0652287/cpp/src/arrow/compare.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc
index c2f4f84..390a406 100644
--- a/cpp/src/arrow/compare.cc
+++ b/cpp/src/arrow/compare.cc
@@ -323,8 +323,8 @@ class ArrayEqualsVisitor : public RangeEqualsVisitor {
       : RangeEqualsVisitor(right, 0, right.length(), 0) {}
 
   Status Visit(const NullArray& left) {
-      result_ = true;
-      return Status::OK();
+    result_ = true;
+    return Status::OK();
   }
 
   Status Visit(const BooleanArray& left) {

http://git-wip-us.apache.org/repos/asf/arrow/blob/b0652287/cpp/src/arrow/ipc/reader.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc
index 7fef847..cb747b6 100644
--- a/cpp/src/arrow/ipc/reader.cc
+++ b/cpp/src/arrow/ipc/reader.cc
@@ -188,8 +188,8 @@ class RecordBatchStreamReader::RecordBatchStreamReaderImpl {
     return ReadSchema();
   }
 
-  Status ReadNextMessage(Message::Type expected_type, bool allow_null,
-      std::shared_ptr<Message>* message) {
+  Status ReadNextMessage(
+      Message::Type expected_type, bool allow_null, std::shared_ptr<Message>* message)
{
     RETURN_NOT_OK(ReadMessage(stream_.get(), message));
 
     if (!(*message) && !allow_null) {

http://git-wip-us.apache.org/repos/asf/arrow/blob/b0652287/cpp/src/arrow/status.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/status.h b/cpp/src/arrow/status.h
index 9a75a58..bfb945c 100644
--- a/cpp/src/arrow/status.h
+++ b/cpp/src/arrow/status.h
@@ -23,10 +23,10 @@
 #include "arrow/util/visibility.h"
 
 // Return the given status if it is not OK.
-#define ARROW_RETURN_NOT_OK(s)                          \
-  do {                                                  \
-    ::arrow::Status _s = (s);                           \
-    if (ARROW_PREDICT_FALSE(!_s.ok())) { return _s; }   \
+#define ARROW_RETURN_NOT_OK(s)                        \
+  do {                                                \
+    ::arrow::Status _s = (s);                         \
+    if (ARROW_PREDICT_FALSE(!_s.ok())) { return _s; } \
   } while (0);
 
 // If 'to_call' returns a bad status, CHECK immediately with a logged message
@@ -43,10 +43,10 @@
 
 namespace arrow {
 
-#define RETURN_NOT_OK(s)                                \
-  do {                                                  \
-    Status _s = (s);                                    \
-    if (ARROW_PREDICT_FALSE(!_s.ok())) { return _s; }   \
+#define RETURN_NOT_OK(s)                              \
+  do {                                                \
+    Status _s = (s);                                  \
+    if (ARROW_PREDICT_FALSE(!_s.ok())) { return _s; } \
   } while (0);
 
 #define RETURN_NOT_OK_ELSE(s, else_) \

http://git-wip-us.apache.org/repos/asf/arrow/blob/b0652287/cpp/src/arrow/util/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt
index 1abcce4..bc1eeb2 100644
--- a/cpp/src/arrow/util/CMakeLists.txt
+++ b/cpp/src/arrow/util/CMakeLists.txt
@@ -22,11 +22,18 @@
 # Headers: top level
 install(FILES
   bit-util.h
+  bit-stream-utils.h
+  bpacking.h
+  compiler-util.h
   compression.h
+  cpu-info.h
   key_value_metadata.h
+  hash-util.h
   logging.h
   macros.h
   random.h
+  rle-encoding.h
+  sse-util.h
   stl.h
   visibility.h
   DESTINATION include/arrow/util)
@@ -56,4 +63,5 @@ ADD_ARROW_TEST(bit-util-test)
 ADD_ARROW_TEST(compression-test)
 ADD_ARROW_TEST(decimal-test)
 ADD_ARROW_TEST(key-value-metadata-test)
+ADD_ARROW_TEST(rle-encoding-test)
 ADD_ARROW_TEST(stl-util-test)

http://git-wip-us.apache.org/repos/asf/arrow/blob/b0652287/cpp/src/arrow/util/bit-stream-utils.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/bit-stream-utils.h b/cpp/src/arrow/util/bit-stream-utils.h
new file mode 100644
index 0000000..537fdc3
--- /dev/null
+++ b/cpp/src/arrow/util/bit-stream-utils.h
@@ -0,0 +1,397 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// From Apache Impala (incubating) as of 2016-01-29
+
+#ifndef ARROW_UTIL_BIT_STREAM_UTILS_H
+#define ARROW_UTIL_BIT_STREAM_UTILS_H
+
+#include <algorithm>
+#include <cstdint>
+#include <string.h>
+
+#include "arrow/util/bit-util.h"
+#include "arrow/util/bpacking.h"
+#include "arrow/util/compiler-util.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+
+/// Utility class to write bit/byte streams.  This class can write data to either be
+/// bit packed or byte aligned (and a single stream that has a mix of both).
+/// This class does not allocate memory.
+class BitWriter {
+ public:
+  /// buffer: buffer to write bits to.  Buffer should be preallocated with
+  /// 'buffer_len' bytes.
+  BitWriter(uint8_t* buffer, int buffer_len) : buffer_(buffer), max_bytes_(buffer_len) {
+    Clear();
+  }
+
+  void Clear() {
+    buffered_values_ = 0;
+    byte_offset_ = 0;
+    bit_offset_ = 0;
+  }
+
+  /// The number of current bytes written, including the current byte (i.e. may include a
+  /// fraction of a byte). Includes buffered values.
+  int bytes_written() const {
+    return byte_offset_ + static_cast<int>(BitUtil::Ceil(bit_offset_, 8));
+  }
+  uint8_t* buffer() const { return buffer_; }
+  int buffer_len() const { return max_bytes_; }
+
+  /// Writes a value to buffered_values_, flushing to buffer_ if necessary.  This is bit
+  /// packed.  Returns false if there was not enough space. num_bits must be <= 32.
+  bool PutValue(uint64_t v, int num_bits);
+
+  /// Writes v to the next aligned byte using num_bytes. If T is larger than
+  /// num_bytes, the extra high-order bytes will be ignored. Returns false if
+  /// there was not enough space.
+  template <typename T>
+  bool PutAligned(T v, int num_bytes);
+
+  /// Write a Vlq encoded int to the buffer.  Returns false if there was not enough
+  /// room.  The value is written byte aligned.
+  /// For more details on vlq:
+  /// en.wikipedia.org/wiki/Variable-length_quantity
+  bool PutVlqInt(uint32_t v);
+
+  // Writes an int zigzag encoded.
+  bool PutZigZagVlqInt(int32_t v);
+
+  /// Get a pointer to the next aligned byte and advance the underlying buffer
+  /// by num_bytes.
+  /// Returns NULL if there was not enough space.
+  uint8_t* GetNextBytePtr(int num_bytes = 1);
+
+  /// Flushes all buffered values to the buffer. Call this when done writing to
+  /// the buffer.  If 'align' is true, buffered_values_ is reset and any future
+  /// writes will be written to the next byte boundary.
+  void Flush(bool align = false);
+
+ private:
+  uint8_t* buffer_;
+  int max_bytes_;
+
+  /// Bit-packed values are initially written to this variable before being memcpy'd to
+  /// buffer_. This is faster than writing values byte by byte directly to buffer_.
+  uint64_t buffered_values_;
+
+  int byte_offset_;  // Offset in buffer_
+  int bit_offset_;   // Offset in buffered_values_
+};
+
+/// Utility class to read bit/byte stream.  This class can read bits or bytes
+/// that are either byte aligned or not.  It also has utilities to read multiple
+/// bytes in one read (e.g. encoded int).
+class BitReader {
+ public:
+  /// 'buffer' is the buffer to read from.  The buffer's length is 'buffer_len'.
+  BitReader(const uint8_t* buffer, int buffer_len)
+      : buffer_(buffer), max_bytes_(buffer_len), byte_offset_(0), bit_offset_(0) {
+    int num_bytes = std::min(8, max_bytes_ - byte_offset_);
+    memcpy(&buffered_values_, buffer_ + byte_offset_, num_bytes);
+  }
+
+  BitReader() : buffer_(NULL), max_bytes_(0) {}
+
+  void Reset(const uint8_t* buffer, int buffer_len) {
+    buffer_ = buffer;
+    max_bytes_ = buffer_len;
+    byte_offset_ = 0;
+    bit_offset_ = 0;
+    int num_bytes = std::min(8, max_bytes_ - byte_offset_);
+    memcpy(&buffered_values_, buffer_ + byte_offset_, num_bytes);
+  }
+
+  /// Gets the next value from the buffer.  Returns true if 'v' could be read or false if
+  /// there are not enough bytes left. num_bits must be <= 32.
+  template <typename T>
+  bool GetValue(int num_bits, T* v);
+
+  /// Get a number of values from the buffer. Return the number of values actually read.
+  template <typename T>
+  int GetBatch(int num_bits, T* v, int batch_size);
+
+  /// Reads a 'num_bytes'-sized value from the buffer and stores it in 'v'. T
+  /// needs to be a little-endian native type and big enough to store
+  /// 'num_bytes'. The value is assumed to be byte-aligned so the stream will
+  /// be advanced to the start of the next byte before 'v' is read. Returns
+  /// false if there are not enough bytes left.
+  template <typename T>
+  bool GetAligned(int num_bytes, T* v);
+
+  /// Reads a vlq encoded int from the stream.  The encoded int must start at
+  /// the beginning of a byte. Return false if there were not enough bytes in
+  /// the buffer.
+  bool GetVlqInt(int32_t* v);
+
+  // Reads a zigzag encoded int `into` v.
+  bool GetZigZagVlqInt(int32_t* v);
+
+  /// Returns the number of bytes left in the stream, not including the current
+  /// byte (i.e., there may be an additional fraction of a byte).
+  int bytes_left() {
+    return max_bytes_ - (byte_offset_ + static_cast<int>(BitUtil::Ceil(bit_offset_,
8)));
+  }
+
+  /// Maximum byte length of a vlq encoded int
+  static const int MAX_VLQ_BYTE_LEN = 5;
+
+ private:
+  const uint8_t* buffer_;
+  int max_bytes_;
+
+  /// Bytes are memcpy'd from buffer_ and values are read from this variable. This is
+  /// faster than reading values byte by byte directly from buffer_.
+  uint64_t buffered_values_;
+
+  int byte_offset_;  // Offset in buffer_
+  int bit_offset_;   // Offset in buffered_values_
+};
+
+inline bool BitWriter::PutValue(uint64_t v, int num_bits) {
+  // TODO: revisit this limit if necessary (can be raised to 64 by fixing some edge cases)
+  DCHECK_LE(num_bits, 32);
+  DCHECK_EQ(v >> num_bits, 0) << "v = " << v << ", num_bits = " <<
num_bits;
+
+  if (UNLIKELY(byte_offset_ * 8 + bit_offset_ + num_bits > max_bytes_ * 8)) return false;
+
+  buffered_values_ |= v << bit_offset_;
+  bit_offset_ += num_bits;
+
+  if (UNLIKELY(bit_offset_ >= 64)) {
+    // Flush buffered_values_ and write out bits of v that did not fit
+    memcpy(buffer_ + byte_offset_, &buffered_values_, 8);
+    buffered_values_ = 0;
+    byte_offset_ += 8;
+    bit_offset_ -= 64;
+    buffered_values_ = v >> (num_bits - bit_offset_);
+  }
+  DCHECK_LT(bit_offset_, 64);
+  return true;
+}
+
+inline void BitWriter::Flush(bool align) {
+  int num_bytes = static_cast<int>(BitUtil::Ceil(bit_offset_, 8));
+  DCHECK_LE(byte_offset_ + num_bytes, max_bytes_);
+  memcpy(buffer_ + byte_offset_, &buffered_values_, num_bytes);
+
+  if (align) {
+    buffered_values_ = 0;
+    byte_offset_ += num_bytes;
+    bit_offset_ = 0;
+  }
+}
+
+inline uint8_t* BitWriter::GetNextBytePtr(int num_bytes) {
+  Flush(/* align */ true);
+  DCHECK_LE(byte_offset_, max_bytes_);
+  if (byte_offset_ + num_bytes > max_bytes_) return NULL;
+  uint8_t* ptr = buffer_ + byte_offset_;
+  byte_offset_ += num_bytes;
+  return ptr;
+}
+
+template <typename T>
+inline bool BitWriter::PutAligned(T val, int num_bytes) {
+  uint8_t* ptr = GetNextBytePtr(num_bytes);
+  if (ptr == NULL) return false;
+  memcpy(ptr, &val, num_bytes);
+  return true;
+}
+
+inline bool BitWriter::PutVlqInt(uint32_t v) {
+  bool result = true;
+  while ((v & 0xFFFFFF80) != 0L) {
+    result &= PutAligned<uint8_t>(static_cast<uint8_t>((v & 0x7F) | 0x80),
1);
+    v >>= 7;
+  }
+  result &= PutAligned<uint8_t>(static_cast<uint8_t>(v & 0x7F), 1);
+  return result;
+}
+
+template <typename T>
+inline void GetValue_(int num_bits, T* v, int max_bytes, const uint8_t* buffer,
+    int* bit_offset, int* byte_offset, uint64_t* buffered_values) {
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4800)
+#endif
+  *v = static_cast<T>(
+      BitUtil::TrailingBits(*buffered_values, *bit_offset + num_bits) >> *bit_offset);
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+  *bit_offset += num_bits;
+  if (*bit_offset >= 64) {
+    *byte_offset += 8;
+    *bit_offset -= 64;
+
+    int bytes_remaining = max_bytes - *byte_offset;
+    if (LIKELY(bytes_remaining >= 8)) {
+      memcpy(buffered_values, buffer + *byte_offset, 8);
+    } else {
+      memcpy(buffered_values, buffer + *byte_offset, bytes_remaining);
+    }
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4800 4805)
+#endif
+    // Read bits of v that crossed into new buffered_values_
+    *v = *v | static_cast<T>(BitUtil::TrailingBits(*buffered_values, *bit_offset)
+                             << (num_bits - *bit_offset));
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+    DCHECK_LE(*bit_offset, 64);
+  }
+}
+
+template <typename T>
+inline bool BitReader::GetValue(int num_bits, T* v) {
+  return GetBatch(num_bits, v, 1) == 1;
+}
+
+template <typename T>
+inline int BitReader::GetBatch(int num_bits, T* v, int batch_size) {
+  DCHECK(buffer_ != NULL);
+  // TODO: revisit this limit if necessary
+  DCHECK_LE(num_bits, 32);
+  DCHECK_LE(num_bits, static_cast<int>(sizeof(T) * 8));
+
+  int bit_offset = bit_offset_;
+  int byte_offset = byte_offset_;
+  uint64_t buffered_values = buffered_values_;
+  int max_bytes = max_bytes_;
+  const uint8_t* buffer = buffer_;
+
+  uint64_t needed_bits = num_bits * batch_size;
+  uint64_t remaining_bits = (max_bytes - byte_offset) * 8 - bit_offset;
+  if (remaining_bits < needed_bits) {
+    batch_size = static_cast<int>(remaining_bits) / num_bits;
+  }
+
+  int i = 0;
+  if (UNLIKELY(bit_offset != 0)) {
+    for (; i < batch_size && bit_offset != 0; ++i) {
+      GetValue_(num_bits, &v[i], max_bytes, buffer, &bit_offset, &byte_offset,
+          &buffered_values);
+    }
+  }
+
+  if (sizeof(T) == 4) {
+    int num_unpacked = unpack32(reinterpret_cast<const uint32_t*>(buffer + byte_offset),
+        reinterpret_cast<uint32_t*>(v + i), batch_size - i, num_bits);
+    i += num_unpacked;
+    byte_offset += num_unpacked * num_bits / 8;
+  } else {
+    const int buffer_size = 1024;
+    uint32_t unpack_buffer[buffer_size];
+    while (i < batch_size) {
+      int unpack_size = std::min(buffer_size, batch_size - i);
+      int num_unpacked = unpack32(reinterpret_cast<const uint32_t*>(buffer + byte_offset),
+          unpack_buffer, unpack_size, num_bits);
+      if (num_unpacked == 0) { break; }
+      for (int k = 0; k < num_unpacked; ++k) {
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4800)
+#endif
+        v[i + k] = static_cast<T>(unpack_buffer[k]);
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+      }
+      i += num_unpacked;
+      byte_offset += num_unpacked * num_bits / 8;
+    }
+  }
+
+  int bytes_remaining = max_bytes - byte_offset;
+  if (bytes_remaining >= 8) {
+    memcpy(&buffered_values, buffer + byte_offset, 8);
+  } else {
+    memcpy(&buffered_values, buffer + byte_offset, bytes_remaining);
+  }
+
+  for (; i < batch_size; ++i) {
+    GetValue_(
+        num_bits, &v[i], max_bytes, buffer, &bit_offset, &byte_offset, &buffered_values);
+  }
+
+  bit_offset_ = bit_offset;
+  byte_offset_ = byte_offset;
+  buffered_values_ = buffered_values;
+
+  return batch_size;
+}
+
+template <typename T>
+inline bool BitReader::GetAligned(int num_bytes, T* v) {
+  DCHECK_LE(num_bytes, static_cast<int>(sizeof(T)));
+  int bytes_read = static_cast<int>(BitUtil::Ceil(bit_offset_, 8));
+  if (UNLIKELY(byte_offset_ + bytes_read + num_bytes > max_bytes_)) return false;
+
+  // Advance byte_offset to next unread byte and read num_bytes
+  byte_offset_ += bytes_read;
+  memcpy(v, buffer_ + byte_offset_, num_bytes);
+  byte_offset_ += num_bytes;
+
+  // Reset buffered_values_
+  bit_offset_ = 0;
+  int bytes_remaining = max_bytes_ - byte_offset_;
+  if (LIKELY(bytes_remaining >= 8)) {
+    memcpy(&buffered_values_, buffer_ + byte_offset_, 8);
+  } else {
+    memcpy(&buffered_values_, buffer_ + byte_offset_, bytes_remaining);
+  }
+  return true;
+}
+
+inline bool BitReader::GetVlqInt(int32_t* v) {
+  *v = 0;
+  int shift = 0;
+  int num_bytes = 0;
+  uint8_t byte = 0;
+  do {
+    if (!GetAligned<uint8_t>(1, &byte)) return false;
+    *v |= (byte & 0x7F) << shift;
+    shift += 7;
+    DCHECK_LE(++num_bytes, MAX_VLQ_BYTE_LEN);
+  } while ((byte & 0x80) != 0);
+  return true;
+}
+
+inline bool BitWriter::PutZigZagVlqInt(int32_t v) {
+  uint32_t u = (v << 1) ^ (v >> 31);
+  return PutVlqInt(u);
+}
+
+inline bool BitReader::GetZigZagVlqInt(int32_t* v) {
+  int32_t u_signed;
+  if (!GetVlqInt(&u_signed)) return false;
+  uint32_t u = static_cast<uint32_t>(u_signed);
+  *reinterpret_cast<uint32_t*>(v) = (u >> 1) ^ -(static_cast<int32_t>(u
& 1));
+  return true;
+}
+
+}  // namespace arrow
+
+#endif  // ARROW_UTIL_BIT_STREAM_UTILS_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/b0652287/cpp/src/arrow/util/bit-util-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/bit-util-test.cc b/cpp/src/arrow/util/bit-util-test.cc
index cb2fd1a..cd94558 100644
--- a/cpp/src/arrow/util/bit-util-test.cc
+++ b/cpp/src/arrow/util/bit-util-test.cc
@@ -15,18 +15,29 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "arrow/util/bit-util.h"
-
 #include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <iostream>
+#include <limits>
 #include <vector>
 
 #include "gtest/gtest.h"
 
+#include <boost/utility.hpp>
+
 #include "arrow/buffer.h"
 #include "arrow/test-util.h"
+#include "arrow/util/bit-stream-utils.h"
+#include "arrow/util/bit-util.h"
+#include "arrow/util/cpu-info.h"
 
 namespace arrow {
 
+static void EnsureCpuInfoInitialized() {
+  if (!CpuInfo::initialized()) { CpuInfo::Init(); }
+}
+
 TEST(BitUtilTests, TestIsMultipleOf64) {
   using BitUtil::IsMultipleOf64;
   EXPECT_TRUE(IsMultipleOf64(64));
@@ -109,4 +120,155 @@ TEST(BitUtilTests, TestCopyBitmap) {
   }
 }
 
+TEST(BitUtil, Ceil) {
+  EXPECT_EQ(BitUtil::Ceil(0, 1), 0);
+  EXPECT_EQ(BitUtil::Ceil(1, 1), 1);
+  EXPECT_EQ(BitUtil::Ceil(1, 2), 1);
+  EXPECT_EQ(BitUtil::Ceil(1, 8), 1);
+  EXPECT_EQ(BitUtil::Ceil(7, 8), 1);
+  EXPECT_EQ(BitUtil::Ceil(8, 8), 1);
+  EXPECT_EQ(BitUtil::Ceil(9, 8), 2);
+  EXPECT_EQ(BitUtil::Ceil(9, 9), 1);
+  EXPECT_EQ(BitUtil::Ceil(10000000000, 10), 1000000000);
+  EXPECT_EQ(BitUtil::Ceil(10, 10000000000), 1);
+  EXPECT_EQ(BitUtil::Ceil(100000000000, 10000000000), 10);
+}
+
+TEST(BitUtil, RoundUp) {
+  EXPECT_EQ(BitUtil::RoundUp(0, 1), 0);
+  EXPECT_EQ(BitUtil::RoundUp(1, 1), 1);
+  EXPECT_EQ(BitUtil::RoundUp(1, 2), 2);
+  EXPECT_EQ(BitUtil::RoundUp(6, 2), 6);
+  EXPECT_EQ(BitUtil::RoundUp(7, 3), 9);
+  EXPECT_EQ(BitUtil::RoundUp(9, 9), 9);
+  EXPECT_EQ(BitUtil::RoundUp(10000000001, 10), 10000000010);
+  EXPECT_EQ(BitUtil::RoundUp(10, 10000000000), 10000000000);
+  EXPECT_EQ(BitUtil::RoundUp(100000000000, 10000000000), 100000000000);
+}
+
+TEST(BitUtil, RoundDown) {
+  EXPECT_EQ(BitUtil::RoundDown(0, 1), 0);
+  EXPECT_EQ(BitUtil::RoundDown(1, 1), 1);
+  EXPECT_EQ(BitUtil::RoundDown(1, 2), 0);
+  EXPECT_EQ(BitUtil::RoundDown(6, 2), 6);
+  EXPECT_EQ(BitUtil::RoundDown(7, 3), 6);
+  EXPECT_EQ(BitUtil::RoundDown(9, 9), 9);
+  EXPECT_EQ(BitUtil::RoundDown(10000000001, 10), 10000000000);
+  EXPECT_EQ(BitUtil::RoundDown(10, 10000000000), 0);
+  EXPECT_EQ(BitUtil::RoundDown(100000000000, 10000000000), 100000000000);
+}
+
+TEST(BitUtil, Popcount) {
+  EnsureCpuInfoInitialized();
+
+  EXPECT_EQ(BitUtil::Popcount(BOOST_BINARY(0 1 0 1 0 1 0 1)), 4);
+  EXPECT_EQ(BitUtil::PopcountNoHw(BOOST_BINARY(0 1 0 1 0 1 0 1)), 4);
+  EXPECT_EQ(BitUtil::Popcount(BOOST_BINARY(1 1 1 1 0 1 0 1)), 6);
+  EXPECT_EQ(BitUtil::PopcountNoHw(BOOST_BINARY(1 1 1 1 0 1 0 1)), 6);
+  EXPECT_EQ(BitUtil::Popcount(BOOST_BINARY(1 1 1 1 1 1 1 1)), 8);
+  EXPECT_EQ(BitUtil::PopcountNoHw(BOOST_BINARY(1 1 1 1 1 1 1 1)), 8);
+  EXPECT_EQ(BitUtil::Popcount(0), 0);
+  EXPECT_EQ(BitUtil::PopcountNoHw(0), 0);
+}
+
+TEST(BitUtil, TrailingBits) {
+  EXPECT_EQ(BitUtil::TrailingBits(BOOST_BINARY(1 1 1 1 1 1 1 1), 0), 0);
+  EXPECT_EQ(BitUtil::TrailingBits(BOOST_BINARY(1 1 1 1 1 1 1 1), 1), 1);
+  EXPECT_EQ(BitUtil::TrailingBits(BOOST_BINARY(1 1 1 1 1 1 1 1), 64),
+      BOOST_BINARY(1 1 1 1 1 1 1 1));
+  EXPECT_EQ(BitUtil::TrailingBits(BOOST_BINARY(1 1 1 1 1 1 1 1), 100),
+      BOOST_BINARY(1 1 1 1 1 1 1 1));
+  EXPECT_EQ(BitUtil::TrailingBits(0, 1), 0);
+  EXPECT_EQ(BitUtil::TrailingBits(0, 64), 0);
+  EXPECT_EQ(BitUtil::TrailingBits(1LL << 63, 0), 0);
+  EXPECT_EQ(BitUtil::TrailingBits(1LL << 63, 63), 0);
+  EXPECT_EQ(BitUtil::TrailingBits(1LL << 63, 64), 1LL << 63);
+}
+
+TEST(BitUtil, ByteSwap) {
+  EXPECT_EQ(BitUtil::ByteSwap(static_cast<uint32_t>(0)), 0);
+  EXPECT_EQ(BitUtil::ByteSwap(static_cast<uint32_t>(0x11223344)), 0x44332211);
+
+  EXPECT_EQ(BitUtil::ByteSwap(static_cast<int32_t>(0)), 0);
+  EXPECT_EQ(BitUtil::ByteSwap(static_cast<int32_t>(0x11223344)), 0x44332211);
+
+  EXPECT_EQ(BitUtil::ByteSwap(static_cast<uint64_t>(0)), 0);
+  EXPECT_EQ(
+      BitUtil::ByteSwap(static_cast<uint64_t>(0x1122334455667788)), 0x8877665544332211);
+
+  EXPECT_EQ(BitUtil::ByteSwap(static_cast<int64_t>(0)), 0);
+  EXPECT_EQ(
+      BitUtil::ByteSwap(static_cast<int64_t>(0x1122334455667788)), 0x8877665544332211);
+
+  EXPECT_EQ(BitUtil::ByteSwap(static_cast<int16_t>(0)), 0);
+  EXPECT_EQ(BitUtil::ByteSwap(static_cast<int16_t>(0x1122)), 0x2211);
+
+  EXPECT_EQ(BitUtil::ByteSwap(static_cast<uint16_t>(0)), 0);
+  EXPECT_EQ(BitUtil::ByteSwap(static_cast<uint16_t>(0x1122)), 0x2211);
+}
+
+TEST(BitUtil, Log2) {
+  EXPECT_EQ(BitUtil::Log2(1), 0);
+  EXPECT_EQ(BitUtil::Log2(2), 1);
+  EXPECT_EQ(BitUtil::Log2(3), 2);
+  EXPECT_EQ(BitUtil::Log2(4), 2);
+  EXPECT_EQ(BitUtil::Log2(5), 3);
+  EXPECT_EQ(BitUtil::Log2(INT_MAX), 31);
+  EXPECT_EQ(BitUtil::Log2(UINT_MAX), 32);
+  EXPECT_EQ(BitUtil::Log2(ULLONG_MAX), 64);
+}
+
+TEST(BitUtil, RoundUpToPowerOf2) {
+  EXPECT_EQ(BitUtil::RoundUpToPowerOf2(7, 8), 8);
+  EXPECT_EQ(BitUtil::RoundUpToPowerOf2(8, 8), 8);
+  EXPECT_EQ(BitUtil::RoundUpToPowerOf2(9, 8), 16);
+}
+
+TEST(BitUtil, RoundDownToPowerOf2) {
+  EXPECT_EQ(BitUtil::RoundDownToPowerOf2(7, 8), 0);
+  EXPECT_EQ(BitUtil::RoundDownToPowerOf2(8, 8), 8);
+  EXPECT_EQ(BitUtil::RoundDownToPowerOf2(9, 8), 8);
+}
+
+TEST(BitUtil, RoundUpDown) {
+  EXPECT_EQ(BitUtil::RoundUpNumBytes(7), 1);
+  EXPECT_EQ(BitUtil::RoundUpNumBytes(8), 1);
+  EXPECT_EQ(BitUtil::RoundUpNumBytes(9), 2);
+  EXPECT_EQ(BitUtil::RoundDownNumBytes(7), 0);
+  EXPECT_EQ(BitUtil::RoundDownNumBytes(8), 1);
+  EXPECT_EQ(BitUtil::RoundDownNumBytes(9), 1);
+
+  EXPECT_EQ(BitUtil::RoundUpNumi32(31), 1);
+  EXPECT_EQ(BitUtil::RoundUpNumi32(32), 1);
+  EXPECT_EQ(BitUtil::RoundUpNumi32(33), 2);
+  EXPECT_EQ(BitUtil::RoundDownNumi32(31), 0);
+  EXPECT_EQ(BitUtil::RoundDownNumi32(32), 1);
+  EXPECT_EQ(BitUtil::RoundDownNumi32(33), 1);
+
+  EXPECT_EQ(BitUtil::RoundUpNumi64(63), 1);
+  EXPECT_EQ(BitUtil::RoundUpNumi64(64), 1);
+  EXPECT_EQ(BitUtil::RoundUpNumi64(65), 2);
+  EXPECT_EQ(BitUtil::RoundDownNumi64(63), 0);
+  EXPECT_EQ(BitUtil::RoundDownNumi64(64), 1);
+  EXPECT_EQ(BitUtil::RoundDownNumi64(65), 1);
+}
+
+void TestZigZag(int32_t v) {
+  uint8_t buffer[BitReader::MAX_VLQ_BYTE_LEN];
+  BitWriter writer(buffer, sizeof(buffer));
+  BitReader reader(buffer, sizeof(buffer));
+  writer.PutZigZagVlqInt(v);
+  int32_t result;
+  EXPECT_TRUE(reader.GetZigZagVlqInt(&result));
+  EXPECT_EQ(v, result);
+}
+
+TEST(BitStreamUtil, ZigZag) {
+  TestZigZag(0);
+  TestZigZag(1);
+  TestZigZag(-1);
+  TestZigZag(std::numeric_limits<int32_t>::max());
+  TestZigZag(-std::numeric_limits<int32_t>::max());
+}
+
 }  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/b0652287/cpp/src/arrow/util/bit-util.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/bit-util.h b/cpp/src/arrow/util/bit-util.h
index 90a1c3e..bba9d2d 100644
--- a/cpp/src/arrow/util/bit-util.h
+++ b/cpp/src/arrow/util/bit-util.h
@@ -18,15 +18,77 @@
 #ifndef ARROW_UTIL_BIT_UTIL_H
 #define ARROW_UTIL_BIT_UTIL_H
 
+#if defined(__APPLE__)
+#include <machine/endian.h>
+#elif defined(_WIN32)
+#define __LITTLE_ENDIAN 1
+#else
+#include <endian.h>
+#endif
+
+#if defined(_MSC_VER)
+#define ARROW_BYTE_SWAP64 _byteswap_uint64
+#define ARROW_BYTE_SWAP32 _byteswap_ulong
+#else
+#define ARROW_BYTE_SWAP64 __builtin_bswap64
+#define ARROW_BYTE_SWAP32 __builtin_bswap32
+#endif
+
 #include <cstdint>
 #include <limits>
 #include <memory>
 #include <vector>
 
+#include "arrow/util/compiler-util.h"
 #include "arrow/util/visibility.h"
 
+#ifdef ARROW_USE_SSE
+#include "arrow/util/cpu-info.h"
+#include "arrow/util/sse-util.h"
+#endif
+
 namespace arrow {
 
+#define INIT_BITSET(valid_bits_vector, valid_bits_index)        \
+  int byte_offset_##valid_bits_vector = (valid_bits_index) / 8; \
+  int bit_offset_##valid_bits_vector = (valid_bits_index) % 8;  \
+  uint8_t bitset_##valid_bits_vector = valid_bits_vector[byte_offset_##valid_bits_vector];
+
+#define READ_NEXT_BITSET(valid_bits_vector)                                          \
+  bit_offset_##valid_bits_vector++;                                                  \
+  if (bit_offset_##valid_bits_vector == 8) {                                         \
+    bit_offset_##valid_bits_vector = 0;                                              \
+    byte_offset_##valid_bits_vector++;                                               \
+    bitset_##valid_bits_vector = valid_bits_vector[byte_offset_##valid_bits_vector]; \
+  }
+
+// TODO(wesm): The source from Impala was depending on boost::make_unsigned
+//
+// We add a partial stub implementation here
+
+template <typename T>
+struct make_unsigned {};
+
+template <>
+struct make_unsigned<int8_t> {
+  typedef uint8_t type;
+};
+
+template <>
+struct make_unsigned<int16_t> {
+  typedef uint16_t type;
+};
+
+template <>
+struct make_unsigned<int32_t> {
+  typedef uint32_t type;
+};
+
+template <>
+struct make_unsigned<int64_t> {
+  typedef uint64_t type;
+};
+
 class Buffer;
 class MemoryPool;
 class MutableBuffer;
@@ -67,6 +129,11 @@ static inline void SetBit(uint8_t* bits, int64_t i) {
   bits[i / 8] |= kBitmask[i % 8];
 }
 
+/// Set bit if is_set is true, but cannot clear bit
+static inline void SetArrayBit(uint8_t* bits, int i, bool is_set) {
+  if (is_set) { SetBit(bits, i); }
+}
+
 static inline void SetBitTo(uint8_t* bits, int64_t i, bool bit_is_set) {
   // TODO: speed up. See https://graphics.stanford.edu/~seander/bithacks.html
   // "Conditionally set or clear bits without branching"
@@ -77,6 +144,18 @@ static inline void SetBitTo(uint8_t* bits, int64_t i, bool bit_is_set)
{
   }
 }
 
+// Returns the minimum number of bits needed to represent the value of 'x'
+static inline int NumRequiredBits(uint64_t x) {
+  for (int i = 63; i >= 0; --i) {
+    if (x & (UINT64_C(1) << i)) return i + 1;
+  }
+  return 0;
+}
+
+/// Returns the smallest power of two that contains v. Taken from
+/// http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
+/// TODO: Pick a better name, as it is not clear what happens when the input is
+/// already a power of two.
 static inline int64_t NextPower2(int64_t n) {
   n--;
   n |= n >> 1;
@@ -97,12 +176,66 @@ static inline bool IsMultipleOf8(int64_t n) {
   return (n & 7) == 0;
 }
 
+/// Returns the ceil of value/divisor
+static inline int64_t Ceil(int64_t value, int64_t divisor) {
+  return value / divisor + (value % divisor != 0);
+}
+
 /// Returns 'value' rounded up to the nearest multiple of 'factor'
 inline int64_t RoundUp(int64_t value, int64_t factor) {
   return (value + (factor - 1)) / factor * factor;
 }
 
-inline int64_t RoundUpToMultipleOf64(int64_t num) {
+/// Returns 'value' rounded down to the nearest multiple of 'factor'
+static inline int64_t RoundDown(int64_t value, int64_t factor) {
+  return (value / factor) * factor;
+}
+
+/// Returns 'value' rounded up to the nearest multiple of 'factor' when factor is
+/// a power of two
+static inline int RoundUpToPowerOf2(int value, int factor) {
+  // DCHECK((factor > 0) && ((factor & (factor - 1)) == 0));
+  return (value + (factor - 1)) & ~(factor - 1);
+}
+
+static inline int RoundDownToPowerOf2(int value, int factor) {
+  // DCHECK((factor > 0) && ((factor & (factor - 1)) == 0));
+  return value & ~(factor - 1);
+}
+
+/// Specialized round up and down functions for frequently used factors,
+/// like 8 (bits->bytes), 32 (bits->i32), and 64 (bits->i64).
+/// Returns the rounded up number of bytes that fit the number of bits.
+static inline uint32_t RoundUpNumBytes(uint32_t bits) {
+  return (bits + 7) >> 3;
+}
+
+/// Returns the rounded down number of bytes that fit the number of bits.
+static inline uint32_t RoundDownNumBytes(uint32_t bits) {
+  return bits >> 3;
+}
+
+/// Returns the rounded up to 32 multiple. Used for conversions of bits to i32.
+static inline uint32_t RoundUpNumi32(uint32_t bits) {
+  return (bits + 31) >> 5;
+}
+
+/// Returns the rounded up 32 multiple.
+static inline uint32_t RoundDownNumi32(uint32_t bits) {
+  return bits >> 5;
+}
+
+/// Returns the rounded up to 64 multiple. Used for conversions of bits to i64.
+static inline uint32_t RoundUpNumi64(uint32_t bits) {
+  return (bits + 63) >> 6;
+}
+
+/// Returns the rounded down to 64 multiple.
+static inline uint32_t RoundDownNumi64(uint32_t bits) {
+  return bits >> 6;
+}
+
+static inline int64_t RoundUpToMultipleOf64(int64_t num) {
   // TODO(wesm): is this definitely needed?
   // DCHECK_GE(num, 0);
   constexpr int64_t round_to = 64;
@@ -114,6 +247,200 @@ inline int64_t RoundUpToMultipleOf64(int64_t num) {
   return num;
 }
 
+/// Non hw accelerated pop count.
+/// TODO: we don't use this in any perf sensitive code paths currently.  There
+/// might be a much faster way to implement this.
+static inline int PopcountNoHw(uint64_t x) {
+  int count = 0;
+  for (; x != 0; ++count)
+    x &= x - 1;
+  return count;
+}
+
+/// Returns the number of set bits in x
+static inline int Popcount(uint64_t x) {
+#ifdef ARROW_USE_SSE
+  if (LIKELY(CpuInfo::IsSupported(CpuInfo::POPCNT))) {
+    return POPCNT_popcnt_u64(x);
+  } else {
+    return PopcountNoHw(x);
+  }
+#else
+  return PopcountNoHw(x);
+#endif
+}
+
+// Compute correct population count for various-width signed integers
+template <typename T>
+static inline int PopcountSigned(T v) {
+  // Converting to same-width unsigned then extending preserves the bit pattern.
+  return BitUtil::Popcount(static_cast<typename make_unsigned<T>::type>(v));
+}
+
+/// Returns the 'num_bits' least-significant bits of 'v'.
+static inline uint64_t TrailingBits(uint64_t v, int num_bits) {
+  if (UNLIKELY(num_bits == 0)) return 0;
+  if (UNLIKELY(num_bits >= 64)) return v;
+  int n = 64 - num_bits;
+  return (v << n) >> n;
+}
+
+/// Returns ceil(log2(x)).
+/// TODO: this could be faster if we use __builtin_clz.  Fix this if this ever shows up
+/// in a hot path.
+static inline int Log2(uint64_t x) {
+  // DCHECK_GT(x, 0);
+  if (x == 1) return 0;
+  // Compute result = ceil(log2(x))
+  //                = floor(log2(x - 1)) + 1, for x > 1
+  // by finding the position of the most significant bit (1-indexed) of x - 1
+  // (floor(log2(n)) = MSB(n) (0-indexed))
+  --x;
+  int result = 1;
+  while (x >>= 1)
+    ++result;
+  return result;
+}
+
+/// Swaps the byte order (i.e. endianess)
+static inline int64_t ByteSwap(int64_t value) {
+  return ARROW_BYTE_SWAP64(value);
+}
+static inline uint64_t ByteSwap(uint64_t value) {
+  return static_cast<uint64_t>(ARROW_BYTE_SWAP64(value));
+}
+static inline int32_t ByteSwap(int32_t value) {
+  return ARROW_BYTE_SWAP32(value);
+}
+static inline uint32_t ByteSwap(uint32_t value) {
+  return static_cast<uint32_t>(ARROW_BYTE_SWAP32(value));
+}
+static inline int16_t ByteSwap(int16_t value) {
+  constexpr int16_t m = static_cast<int16_t>(0xff);
+  return static_cast<int16_t>(((value >> 8) & m) | ((value & m) <<
8));
+}
+static inline uint16_t ByteSwap(uint16_t value) {
+  return static_cast<uint16_t>(ByteSwap(static_cast<int16_t>(value)));
+}
+
+/// Write the swapped bytes into dst. Src and st cannot overlap.
+static inline void ByteSwap(void* dst, const void* src, int len) {
+  switch (len) {
+    case 1:
+      *reinterpret_cast<int8_t*>(dst) = *reinterpret_cast<const int8_t*>(src);
+      return;
+    case 2:
+      *reinterpret_cast<int16_t*>(dst) = ByteSwap(*reinterpret_cast<const int16_t*>(src));
+      return;
+    case 4:
+      *reinterpret_cast<int32_t*>(dst) = ByteSwap(*reinterpret_cast<const int32_t*>(src));
+      return;
+    case 8:
+      *reinterpret_cast<int64_t*>(dst) = ByteSwap(*reinterpret_cast<const int64_t*>(src));
+      return;
+    default:
+      break;
+  }
+
+  uint8_t* d = reinterpret_cast<uint8_t*>(dst);
+  const uint8_t* s = reinterpret_cast<const uint8_t*>(src);
+  for (int i = 0; i < len; ++i) {
+    d[i] = s[len - i - 1];
+  }
+}
+
+/// Converts to big endian format (if not already in big endian) from the
+/// machine's native endian format.
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+static inline int64_t ToBigEndian(int64_t value) {
+  return ByteSwap(value);
+}
+static inline uint64_t ToBigEndian(uint64_t value) {
+  return ByteSwap(value);
+}
+static inline int32_t ToBigEndian(int32_t value) {
+  return ByteSwap(value);
+}
+static inline uint32_t ToBigEndian(uint32_t value) {
+  return ByteSwap(value);
+}
+static inline int16_t ToBigEndian(int16_t value) {
+  return ByteSwap(value);
+}
+static inline uint16_t ToBigEndian(uint16_t value) {
+  return ByteSwap(value);
+}
+#else
+static inline int64_t ToBigEndian(int64_t val) {
+  return val;
+}
+static inline uint64_t ToBigEndian(uint64_t val) {
+  return val;
+}
+static inline int32_t ToBigEndian(int32_t val) {
+  return val;
+}
+static inline uint32_t ToBigEndian(uint32_t val) {
+  return val;
+}
+static inline int16_t ToBigEndian(int16_t val) {
+  return val;
+}
+static inline uint16_t ToBigEndian(uint16_t val) {
+  return val;
+}
+#endif
+
+/// Converts from big endian format to the machine's native endian format.
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+static inline int64_t FromBigEndian(int64_t value) {
+  return ByteSwap(value);
+}
+static inline uint64_t FromBigEndian(uint64_t value) {
+  return ByteSwap(value);
+}
+static inline int32_t FromBigEndian(int32_t value) {
+  return ByteSwap(value);
+}
+static inline uint32_t FromBigEndian(uint32_t value) {
+  return ByteSwap(value);
+}
+static inline int16_t FromBigEndian(int16_t value) {
+  return ByteSwap(value);
+}
+static inline uint16_t FromBigEndian(uint16_t value) {
+  return ByteSwap(value);
+}
+#else
+static inline int64_t FromBigEndian(int64_t val) {
+  return val;
+}
+static inline uint64_t FromBigEndian(uint64_t val) {
+  return val;
+}
+static inline int32_t FromBigEndian(int32_t val) {
+  return val;
+}
+static inline uint32_t FromBigEndian(uint32_t val) {
+  return val;
+}
+static inline int16_t FromBigEndian(int16_t val) {
+  return val;
+}
+static inline uint16_t FromBigEndian(uint16_t val) {
+  return val;
+}
+#endif
+
+// Logical right shift for signed integer types
+// This is needed because the C >> operator does arithmetic right shift
+// Negative shift amounts lead to undefined behavior
+template <typename T>
+static T ShiftRightLogical(T v, int shift) {
+  // Conversion to unsigned ensures most significant bits always filled with 0's
+  return static_cast<typename make_unsigned<T>::type>(v) >> shift;
+}
+
 void BytesToBits(const std::vector<uint8_t>& bytes, uint8_t* bits);
 ARROW_EXPORT Status BytesToBits(const std::vector<uint8_t>&, std::shared_ptr<Buffer>*);
 


Mime
View raw message