arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject arrow git commit: ARROW-317: Add Slice, Copy methods to Buffer
Date Tue, 18 Oct 2016 01:18:41 GMT
Repository: arrow
Updated Branches:
  refs/heads/master 732a2059d -> 676c32cce


ARROW-317: Add Slice, Copy methods to Buffer

There's also a little bit of naming cleanup in `bit-util.h`, pardon the diff noise.

Author: Wes McKinney <wes.mckinney@twosigma.com>

Closes #177 from wesm/ARROW-317 and squashes the following commits:

0666b22 [Wes McKinney] Fix up pyarrow usage of BitUtil
3ab4e7a [Wes McKinney] Add Slice, Copy methods to Buffer
cb9519d [Wes McKinney] Use more conforming names in bit-util.h


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/676c32cc
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/676c32cc
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/676c32cc

Branch: refs/heads/master
Commit: 676c32ccea6274c75b2750453c1ddbc5f645c037
Parents: 732a205
Author: Wes McKinney <wes.mckinney@twosigma.com>
Authored: Mon Oct 17 21:18:30 2016 -0400
Committer: Wes McKinney <wes.mckinney@twosigma.com>
Committed: Mon Oct 17 21:18:30 2016 -0400

----------------------------------------------------------------------
 cpp/src/arrow/array.cc                |  3 ++-
 cpp/src/arrow/array.h                 |  2 +-
 cpp/src/arrow/builder.cc              | 12 ++++-----
 cpp/src/arrow/column-benchmark.cc     |  2 +-
 cpp/src/arrow/ipc/adapter.cc          |  5 ++--
 cpp/src/arrow/ipc/test-common.h       |  3 ++-
 cpp/src/arrow/test-util.h             |  6 ++---
 cpp/src/arrow/types/list.cc           |  2 +-
 cpp/src/arrow/types/primitive-test.cc | 16 ++++++------
 cpp/src/arrow/types/primitive.cc      | 13 +++++-----
 cpp/src/arrow/types/primitive.h       | 12 ++++-----
 cpp/src/arrow/util/bit-util-test.cc   | 36 +++++++++++++-------------
 cpp/src/arrow/util/bit-util.cc        | 10 ++++----
 cpp/src/arrow/util/bit-util.h         | 29 ++++++++++-----------
 cpp/src/arrow/util/buffer-test.cc     | 41 ++++++++++++++++++++++++++++++
 cpp/src/arrow/util/buffer.cc          | 28 +++++++++++++++++++-
 cpp/src/arrow/util/buffer.h           | 23 +++++++++++++----
 python/src/pyarrow/adapters/pandas.cc | 20 +++++++--------
 18 files changed, 173 insertions(+), 90 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/676c32cc/cpp/src/arrow/array.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array.cc b/cpp/src/arrow/array.cc
index d6b081f..e432a53 100644
--- a/cpp/src/arrow/array.cc
+++ b/cpp/src/arrow/array.cc
@@ -19,6 +19,7 @@
 
 #include <cstdint>
 
+#include "arrow/util/bit-util.h"
 #include "arrow/util/buffer.h"
 #include "arrow/util/status.h"
 
@@ -43,7 +44,7 @@ bool Array::EqualsExact(const Array& other) const {
     return false;
   }
   if (null_count_ > 0) {
-    return null_bitmap_->Equals(*other.null_bitmap_, util::bytes_for_bits(length_));
+    return null_bitmap_->Equals(*other.null_bitmap_, BitUtil::BytesForBits(length_));
   }
   return true;
 }

http://git-wip-us.apache.org/repos/asf/arrow/blob/676c32cc/cpp/src/arrow/array.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array.h b/cpp/src/arrow/array.h
index c7ffb23..ff37323 100644
--- a/cpp/src/arrow/array.h
+++ b/cpp/src/arrow/array.h
@@ -45,7 +45,7 @@ class ARROW_EXPORT Array {
 
   // Determine if a slot is null. For inner loops. Does *not* boundscheck
   bool IsNull(int i) const {
-    return null_count_ > 0 && util::bit_not_set(null_bitmap_data_, i);
+    return null_count_ > 0 && BitUtil::BitNotSet(null_bitmap_data_, i);
   }
 
   int32_t length() const { return length_; }

http://git-wip-us.apache.org/repos/asf/arrow/blob/676c32cc/cpp/src/arrow/builder.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc
index 1fba961..151b257 100644
--- a/cpp/src/arrow/builder.cc
+++ b/cpp/src/arrow/builder.cc
@@ -31,7 +31,7 @@ Status ArrayBuilder::AppendToBitmap(bool is_valid) {
     // TODO(emkornfield) doubling isn't great default allocation practice
     // see https://github.com/facebook/folly/blob/master/folly/docs/FBVector.md
     // fo discussion
-    RETURN_NOT_OK(Resize(util::next_power2(capacity_ + 1)));
+    RETURN_NOT_OK(Resize(BitUtil::NextPower2(capacity_ + 1)));
   }
   UnsafeAppendToBitmap(is_valid);
   return Status::OK();
@@ -45,7 +45,7 @@ Status ArrayBuilder::AppendToBitmap(const uint8_t* valid_bytes, int32_t
length)
 }
 
 Status ArrayBuilder::Init(int32_t capacity) {
-  int32_t to_alloc = util::ceil_byte(capacity) / 8;
+  int32_t to_alloc = BitUtil::CeilByte(capacity) / 8;
   null_bitmap_ = std::make_shared<PoolBuffer>(pool_);
   RETURN_NOT_OK(null_bitmap_->Resize(to_alloc));
   // Buffers might allocate more then necessary to satisfy padding requirements
@@ -58,7 +58,7 @@ Status ArrayBuilder::Init(int32_t capacity) {
 
 Status ArrayBuilder::Resize(int32_t new_bits) {
   if (!null_bitmap_) { return Init(new_bits); }
-  int32_t new_bytes = util::ceil_byte(new_bits) / 8;
+  int32_t new_bytes = BitUtil::CeilByte(new_bits) / 8;
   int32_t old_bytes = null_bitmap_->size();
   RETURN_NOT_OK(null_bitmap_->Resize(new_bytes));
   null_bitmap_data_ = null_bitmap_->mutable_data();
@@ -82,7 +82,7 @@ Status ArrayBuilder::Advance(int32_t elements) {
 Status ArrayBuilder::Reserve(int32_t elements) {
   if (length_ + elements > capacity_) {
     // TODO(emkornfield) power of 2 growth is potentially suboptimal
-    int32_t new_capacity = util::next_power2(length_ + elements);
+    int32_t new_capacity = BitUtil::NextPower2(length_ + elements);
     return Resize(new_capacity);
   }
   return Status::OK();
@@ -96,7 +96,7 @@ Status ArrayBuilder::SetNotNull(int32_t length) {
 
 void ArrayBuilder::UnsafeAppendToBitmap(bool is_valid) {
   if (is_valid) {
-    util::set_bit(null_bitmap_data_, length_);
+    BitUtil::SetBit(null_bitmap_data_, length_);
   } else {
     ++null_count_;
   }
@@ -118,7 +118,7 @@ void ArrayBuilder::UnsafeSetNotNull(int32_t length) {
   const int32_t new_length = length + length_;
   // TODO(emkornfield) Optimize for large values of length?
   for (int32_t i = length_; i < new_length; ++i) {
-    util::set_bit(null_bitmap_data_, i);
+    BitUtil::SetBit(null_bitmap_data_, i);
   }
   length_ = new_length;
 }

http://git-wip-us.apache.org/repos/asf/arrow/blob/676c32cc/cpp/src/arrow/column-benchmark.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/column-benchmark.cc b/cpp/src/arrow/column-benchmark.cc
index edea094..f429a81 100644
--- a/cpp/src/arrow/column-benchmark.cc
+++ b/cpp/src/arrow/column-benchmark.cc
@@ -29,7 +29,7 @@ std::shared_ptr<Array> MakePrimitive(int32_t length, int32_t null_count
= 0) {
   auto data = std::make_shared<PoolBuffer>(pool);
   auto null_bitmap = std::make_shared<PoolBuffer>(pool);
   data->Resize(length * sizeof(typename ArrayType::value_type));
-  null_bitmap->Resize(util::bytes_for_bits(length));
+  null_bitmap->Resize(BitUtil::BytesForBits(length));
   return std::make_shared<ArrayType>(length, data, 10, null_bitmap);
 }
 }  // anonymous namespace

http://git-wip-us.apache.org/repos/asf/arrow/blob/676c32cc/cpp/src/arrow/ipc/adapter.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/adapter.cc b/cpp/src/arrow/ipc/adapter.cc
index f84cb26..74786bf 100644
--- a/cpp/src/arrow/ipc/adapter.cc
+++ b/cpp/src/arrow/ipc/adapter.cc
@@ -37,6 +37,7 @@
 #include "arrow/types/primitive.h"
 #include "arrow/types/string.h"
 #include "arrow/types/struct.h"
+#include "arrow/util/bit-util.h"
 #include "arrow/util/buffer.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/status.h"
@@ -49,7 +50,7 @@ namespace ipc {
 
 namespace {
 Status CheckMultipleOf64(int64_t size) {
-  if (util::is_multiple_of_64(size)) { return Status::OK(); }
+  if (BitUtil::IsMultipleOf64(size)) { return Status::OK(); }
   return Status::Invalid(
       "Attempted to write a buffer that "
       "wasn't a multiple of 64 bytes");
@@ -155,7 +156,7 @@ class RecordBatchWriter {
       // The buffer might be null if we are handling zero row lengths.
       if (buffer) {
         size = buffer->size();
-        padding = util::RoundUpToMultipleOf64(size) - size;
+        padding = BitUtil::RoundUpToMultipleOf64(size) - size;
       }
 
       // TODO(wesm): We currently have no notion of shared memory page id's,

http://git-wip-us.apache.org/repos/asf/arrow/blob/676c32cc/cpp/src/arrow/ipc/test-common.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/test-common.h b/cpp/src/arrow/ipc/test-common.h
index 13bbbeb..784e238 100644
--- a/cpp/src/arrow/ipc/test-common.h
+++ b/cpp/src/arrow/ipc/test-common.h
@@ -31,6 +31,7 @@
 #include "arrow/types/primitive.h"
 #include "arrow/types/string.h"
 #include "arrow/types/struct.h"
+#include "arrow/util/bit-util.h"
 #include "arrow/util/buffer.h"
 #include "arrow/util/memory-pool.h"
 
@@ -263,7 +264,7 @@ Status MakeStruct(std::shared_ptr<RecordBatch>* out) {
   std::vector<uint8_t> null_bytes(list_batch->num_rows(), 1);
   null_bytes[0] = 0;
   std::shared_ptr<Buffer> null_bitmask;
-  RETURN_NOT_OK(util::bytes_to_bits(null_bytes, &null_bitmask));
+  RETURN_NOT_OK(BitUtil::BytesToBits(null_bytes, &null_bitmask));
   ArrayPtr with_nulls(
       new StructArray(type, list_batch->num_rows(), columns, 1, null_bitmask));
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/676c32cc/cpp/src/arrow/test-util.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/test-util.h b/cpp/src/arrow/test-util.h
index e632ffb..ac56f5e 100644
--- a/cpp/src/arrow/test-util.h
+++ b/cpp/src/arrow/test-util.h
@@ -69,7 +69,7 @@ class TestBase : public ::testing::Test {
     auto data = std::make_shared<PoolBuffer>(pool_);
     auto null_bitmap = std::make_shared<PoolBuffer>(pool_);
     EXPECT_OK(data->Resize(length * sizeof(typename ArrayType::value_type)));
-    EXPECT_OK(null_bitmap->Resize(util::bytes_for_bits(length)));
+    EXPECT_OK(null_bitmap->Resize(BitUtil::BytesForBits(length)));
     return std::make_shared<ArrayType>(length, data, 10, null_bitmap);
   }
 
@@ -152,7 +152,7 @@ static inline int bitmap_popcount(const uint8_t* data, int length) {
   // versions of popcount but the code complexity is likely not worth it)
   const int loop_tail_index = fast_counts * pop_len;
   for (int i = loop_tail_index; i < length; ++i) {
-    if (util::get_bit(data, i)) { ++count; }
+    if (BitUtil::GetBit(data, i)) { ++count; }
   }
 
   return count;
@@ -170,7 +170,7 @@ std::shared_ptr<Buffer> bytes_to_null_buffer(const std::vector<uint8_t>&
bytes)
   std::shared_ptr<Buffer> out;
 
   // TODO(wesm): error checking
-  util::bytes_to_bits(bytes, &out);
+  BitUtil::BytesToBits(bytes, &out);
   return out;
 }
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/676c32cc/cpp/src/arrow/types/list.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/list.cc b/cpp/src/arrow/types/list.cc
index ef2ec22..4b1e821 100644
--- a/cpp/src/arrow/types/list.cc
+++ b/cpp/src/arrow/types/list.cc
@@ -30,7 +30,7 @@ bool ListArray::EqualsExact(const ListArray& other) const {
   bool equal_null_bitmap = true;
   if (null_count_ > 0) {
     equal_null_bitmap =
-        null_bitmap_->Equals(*other.null_bitmap_, util::bytes_for_bits(length_));
+        null_bitmap_->Equals(*other.null_bitmap_, BitUtil::BytesForBits(length_));
   }
 
   if (!equal_null_bitmap) { return false; }

http://git-wip-us.apache.org/repos/asf/arrow/blob/676c32cc/cpp/src/arrow/types/primitive-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/primitive-test.cc b/cpp/src/arrow/types/primitive-test.cc
index 121bd47..e47f6dc 100644
--- a/cpp/src/arrow/types/primitive-test.cc
+++ b/cpp/src/arrow/types/primitive-test.cc
@@ -236,7 +236,7 @@ void TestPrimitiveBuilder<PBoolean>::Check(
 
   for (int i = 0; i < result->length(); ++i) {
     if (nullable) { ASSERT_EQ(valid_bytes_[i] == 0, result->IsNull(i)) << i; }
-    bool actual = util::get_bit(result->raw_data(), i);
+    bool actual = BitUtil::GetBit(result->raw_data(), i);
     ASSERT_EQ(static_cast<bool>(draws_[i]), actual) << i;
   }
   ASSERT_TRUE(result->EqualsExact(*expected.get()));
@@ -258,8 +258,8 @@ TYPED_TEST(TestPrimitiveBuilder, TestInit) {
 
   int n = 1000;
   ASSERT_OK(this->builder_->Reserve(n));
-  ASSERT_EQ(util::next_power2(n), this->builder_->capacity());
-  ASSERT_EQ(util::next_power2(TypeTraits<Type>::bytes_required(n)),
+  ASSERT_EQ(BitUtil::NextPower2(n), this->builder_->capacity());
+  ASSERT_EQ(BitUtil::NextPower2(TypeTraits<Type>::bytes_required(n)),
       this->builder_->data()->size());
 
   // unsure if this should go in all builder classes
@@ -409,10 +409,10 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendScalar) {
   }
 
   ASSERT_EQ(size, this->builder_->length());
-  ASSERT_EQ(util::next_power2(size), this->builder_->capacity());
+  ASSERT_EQ(BitUtil::NextPower2(size), this->builder_->capacity());
 
   ASSERT_EQ(size, this->builder_nn_->length());
-  ASSERT_EQ(util::next_power2(size), this->builder_nn_->capacity());
+  ASSERT_EQ(BitUtil::NextPower2(size), this->builder_nn_->capacity());
 
   this->Check(this->builder_, true);
   this->Check(this->builder_nn_, false);
@@ -444,7 +444,7 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendVector) {
   ASSERT_OK(this->builder_nn_->Append(draws.data() + K, size - K));
 
   ASSERT_EQ(size, this->builder_->length());
-  ASSERT_EQ(util::next_power2(size), this->builder_->capacity());
+  ASSERT_EQ(BitUtil::NextPower2(size), this->builder_->capacity());
 
   this->Check(this->builder_, true);
   this->Check(this->builder_nn_, false);
@@ -472,7 +472,7 @@ TYPED_TEST(TestPrimitiveBuilder, TestResize) {
   ASSERT_EQ(cap, this->builder_->capacity());
 
   ASSERT_EQ(TypeTraits<Type>::bytes_required(cap), this->builder_->data()->size());
-  ASSERT_EQ(util::bytes_for_bits(cap), this->builder_->null_bitmap()->size());
+  ASSERT_EQ(BitUtil::BytesForBits(cap), this->builder_->null_bitmap()->size());
 }
 
 TYPED_TEST(TestPrimitiveBuilder, TestReserve) {
@@ -484,7 +484,7 @@ TYPED_TEST(TestPrimitiveBuilder, TestReserve) {
   ASSERT_OK(this->builder_->Advance(100));
   ASSERT_OK(this->builder_->Reserve(kMinBuilderCapacity));
 
-  ASSERT_EQ(util::next_power2(kMinBuilderCapacity + 100), this->builder_->capacity());
+  ASSERT_EQ(BitUtil::NextPower2(kMinBuilderCapacity + 100), this->builder_->capacity());
 }
 
 }  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/676c32cc/cpp/src/arrow/types/primitive.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/primitive.cc b/cpp/src/arrow/types/primitive.cc
index 3a05ccf..d2288ba 100644
--- a/cpp/src/arrow/types/primitive.cc
+++ b/cpp/src/arrow/types/primitive.cc
@@ -19,6 +19,7 @@
 
 #include <memory>
 
+#include "arrow/util/bit-util.h"
 #include "arrow/util/buffer.h"
 #include "arrow/util/logging.h"
 
@@ -41,7 +42,7 @@ bool PrimitiveArray::EqualsExact(const PrimitiveArray& other) const
{
 
   if (null_count_ > 0) {
     bool equal_bitmap =
-        null_bitmap_->Equals(*other.null_bitmap_, util::ceil_byte(length_) / 8);
+        null_bitmap_->Equals(*other.null_bitmap_, BitUtil::CeilByte(length_) / 8);
     if (!equal_bitmap) { return false; }
 
     const uint8_t* this_data = raw_data_;
@@ -156,9 +157,9 @@ Status PrimitiveBuilder<BooleanType>::Append(
     if ((valid_bytes != nullptr) && !valid_bytes[i]) continue;
 
     if (values[i] > 0) {
-      util::set_bit(raw_data_, length_ + i);
+      BitUtil::SetBit(raw_data_, length_ + i);
     } else {
-      util::clear_bit(raw_data_, length_ + i);
+      BitUtil::ClearBit(raw_data_, length_ + i);
     }
   }
 
@@ -196,20 +197,20 @@ bool BooleanArray::EqualsExact(const BooleanArray& other) const
{
 
   if (null_count_ > 0) {
     bool equal_bitmap =
-        null_bitmap_->Equals(*other.null_bitmap_, util::bytes_for_bits(length_));
+        null_bitmap_->Equals(*other.null_bitmap_, BitUtil::BytesForBits(length_));
     if (!equal_bitmap) { return false; }
 
     const uint8_t* this_data = raw_data_;
     const uint8_t* other_data = other.raw_data_;
 
     for (int i = 0; i < length_; ++i) {
-      if (!IsNull(i) && util::get_bit(this_data, i) != util::get_bit(other_data,
i)) {
+      if (!IsNull(i) && BitUtil::GetBit(this_data, i) != BitUtil::GetBit(other_data,
i)) {
         return false;
       }
     }
     return true;
   } else {
-    return data_->Equals(*other.data_, util::bytes_for_bits(length_));
+    return data_->Equals(*other.data_, BitUtil::BytesForBits(length_));
   }
 }
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/676c32cc/cpp/src/arrow/types/primitive.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/primitive.h b/cpp/src/arrow/types/primitive.h
index f21470d..c71df58 100644
--- a/cpp/src/arrow/types/primitive.h
+++ b/cpp/src/arrow/types/primitive.h
@@ -173,7 +173,7 @@ class ARROW_EXPORT NumericBuilder : public PrimitiveBuilder<T> {
 
   // Does not capacity-check; make sure to call Reserve beforehand
   void UnsafeAppend(value_type val) {
-    util::set_bit(null_bitmap_data_, length_);
+    BitUtil::SetBit(null_bitmap_data_, length_);
     raw_data_[length_++] = val;
   }
 
@@ -290,7 +290,7 @@ class ARROW_EXPORT BooleanArray : public PrimitiveArray {
 
   const uint8_t* raw_data() const { return reinterpret_cast<const uint8_t*>(raw_data_);
}
 
-  bool Value(int i) const { return util::get_bit(raw_data(), i); }
+  bool Value(int i) const { return BitUtil::GetBit(raw_data(), i); }
 };
 
 template <>
@@ -298,7 +298,7 @@ struct TypeTraits<BooleanType> {
   typedef BooleanArray ArrayType;
 
   static inline int bytes_required(int elements) {
-    return util::bytes_for_bits(elements);
+    return BitUtil::BytesForBits(elements);
   }
 };
 
@@ -314,11 +314,11 @@ class ARROW_EXPORT BooleanBuilder : public PrimitiveBuilder<BooleanType>
{
   // Scalar append
   Status Append(bool val) {
     Reserve(1);
-    util::set_bit(null_bitmap_data_, length_);
+    BitUtil::SetBit(null_bitmap_data_, length_);
     if (val) {
-      util::set_bit(raw_data_, length_);
+      BitUtil::SetBit(raw_data_, length_);
     } else {
-      util::clear_bit(raw_data_, length_);
+      BitUtil::ClearBit(raw_data_, length_);
     }
     ++length_;
     return Status::OK();

http://git-wip-us.apache.org/repos/asf/arrow/blob/676c32cc/cpp/src/arrow/util/bit-util-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/bit-util-test.cc b/cpp/src/arrow/util/bit-util-test.cc
index e1d8a08..cfdee04 100644
--- a/cpp/src/arrow/util/bit-util-test.cc
+++ b/cpp/src/arrow/util/bit-util-test.cc
@@ -22,33 +22,33 @@
 namespace arrow {
 
 TEST(UtilTests, TestIsMultipleOf64) {
-  using util::is_multiple_of_64;
-  EXPECT_TRUE(is_multiple_of_64(64));
-  EXPECT_TRUE(is_multiple_of_64(0));
-  EXPECT_TRUE(is_multiple_of_64(128));
-  EXPECT_TRUE(is_multiple_of_64(192));
-  EXPECT_FALSE(is_multiple_of_64(23));
-  EXPECT_FALSE(is_multiple_of_64(32));
+  using BitUtil::IsMultipleOf64;
+  EXPECT_TRUE(IsMultipleOf64(64));
+  EXPECT_TRUE(IsMultipleOf64(0));
+  EXPECT_TRUE(IsMultipleOf64(128));
+  EXPECT_TRUE(IsMultipleOf64(192));
+  EXPECT_FALSE(IsMultipleOf64(23));
+  EXPECT_FALSE(IsMultipleOf64(32));
 }
 
 TEST(UtilTests, TestNextPower2) {
-  using util::next_power2;
+  using BitUtil::NextPower2;
 
-  ASSERT_EQ(8, next_power2(6));
-  ASSERT_EQ(8, next_power2(8));
+  ASSERT_EQ(8, NextPower2(6));
+  ASSERT_EQ(8, NextPower2(8));
 
-  ASSERT_EQ(1, next_power2(1));
-  ASSERT_EQ(256, next_power2(131));
+  ASSERT_EQ(1, NextPower2(1));
+  ASSERT_EQ(256, NextPower2(131));
 
-  ASSERT_EQ(1024, next_power2(1000));
+  ASSERT_EQ(1024, NextPower2(1000));
 
-  ASSERT_EQ(4096, next_power2(4000));
+  ASSERT_EQ(4096, NextPower2(4000));
 
-  ASSERT_EQ(65536, next_power2(64000));
+  ASSERT_EQ(65536, NextPower2(64000));
 
-  ASSERT_EQ(1LL << 32, next_power2((1LL << 32) - 1));
-  ASSERT_EQ(1LL << 31, next_power2((1LL << 31) - 1));
-  ASSERT_EQ(1LL << 62, next_power2((1LL << 62) - 1));
+  ASSERT_EQ(1LL << 32, NextPower2((1LL << 32) - 1));
+  ASSERT_EQ(1LL << 31, NextPower2((1LL << 31) - 1));
+  ASSERT_EQ(1LL << 62, NextPower2((1LL << 62) - 1));
 }
 
 }  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/676c32cc/cpp/src/arrow/util/bit-util.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/bit-util.cc b/cpp/src/arrow/util/bit-util.cc
index 475576e..7e1cb18 100644
--- a/cpp/src/arrow/util/bit-util.cc
+++ b/cpp/src/arrow/util/bit-util.cc
@@ -24,20 +24,20 @@
 
 namespace arrow {
 
-void util::bytes_to_bits(const std::vector<uint8_t>& bytes, uint8_t* bits) {
+void BitUtil::BytesToBits(const std::vector<uint8_t>& bytes, uint8_t* bits) {
   for (size_t i = 0; i < bytes.size(); ++i) {
-    if (bytes[i] > 0) { set_bit(bits, i); }
+    if (bytes[i] > 0) { SetBit(bits, i); }
   }
 }
 
-Status util::bytes_to_bits(
+Status BitUtil::BytesToBits(
     const std::vector<uint8_t>& bytes, std::shared_ptr<Buffer>* out) {
-  int bit_length = util::bytes_for_bits(bytes.size());
+  int bit_length = BitUtil::BytesForBits(bytes.size());
 
   auto buffer = std::make_shared<PoolBuffer>();
   RETURN_NOT_OK(buffer->Resize(bit_length));
   memset(buffer->mutable_data(), 0, bit_length);
-  bytes_to_bits(bytes, buffer->mutable_data());
+  BytesToBits(bytes, buffer->mutable_data());
 
   *out = buffer;
   return Status::OK();

http://git-wip-us.apache.org/repos/asf/arrow/blob/676c32cc/cpp/src/arrow/util/bit-util.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/bit-util.h b/cpp/src/arrow/util/bit-util.h
index c33ef27..13b7e19 100644
--- a/cpp/src/arrow/util/bit-util.h
+++ b/cpp/src/arrow/util/bit-util.h
@@ -30,39 +30,39 @@ namespace arrow {
 class Buffer;
 class Status;
 
-namespace util {
+namespace BitUtil {
 
-static inline int64_t ceil_byte(int64_t size) {
+static inline int64_t CeilByte(int64_t size) {
   return (size + 7) & ~7;
 }
 
-static inline int64_t bytes_for_bits(int64_t size) {
-  return ceil_byte(size) / 8;
+static inline int64_t BytesForBits(int64_t size) {
+  return CeilByte(size) / 8;
 }
 
-static inline int64_t ceil_2bytes(int64_t size) {
+static inline int64_t Ceil2Bytes(int64_t size) {
   return (size + 15) & ~15;
 }
 
 static constexpr uint8_t kBitmask[] = {1, 2, 4, 8, 16, 32, 64, 128};
 
-static inline bool get_bit(const uint8_t* bits, int i) {
+static inline bool GetBit(const uint8_t* bits, int i) {
   return static_cast<bool>(bits[i / 8] & kBitmask[i % 8]);
 }
 
-static inline bool bit_not_set(const uint8_t* bits, int i) {
+static inline bool BitNotSet(const uint8_t* bits, int i) {
   return (bits[i / 8] & kBitmask[i % 8]) == 0;
 }
 
-static inline void clear_bit(uint8_t* bits, int i) {
+static inline void ClearBit(uint8_t* bits, int i) {
   bits[i / 8] &= ~kBitmask[i % 8];
 }
 
-static inline void set_bit(uint8_t* bits, int i) {
+static inline void SetBit(uint8_t* bits, int i) {
   bits[i / 8] |= kBitmask[i % 8];
 }
 
-static inline int64_t next_power2(int64_t n) {
+static inline int64_t NextPower2(int64_t n) {
   n--;
   n |= n >> 1;
   n |= n >> 2;
@@ -74,7 +74,7 @@ static inline int64_t next_power2(int64_t n) {
   return n;
 }
 
-static inline bool is_multiple_of_64(int64_t n) {
+static inline bool IsMultipleOf64(int64_t n) {
   return (n & 63) == 0;
 }
 
@@ -90,11 +90,10 @@ inline int64_t RoundUpToMultipleOf64(int64_t num) {
   return num;
 }
 
-void bytes_to_bits(const std::vector<uint8_t>& bytes, uint8_t* bits);
-ARROW_EXPORT Status bytes_to_bits(const std::vector<uint8_t>&, std::shared_ptr<Buffer>*);
-
-}  // namespace util
+void BytesToBits(const std::vector<uint8_t>& bytes, uint8_t* bits);
+ARROW_EXPORT Status BytesToBits(const std::vector<uint8_t>&, std::shared_ptr<Buffer>*);
 
+}  // namespace BitUtil
 }  // namespace arrow
 
 #endif  // ARROW_UTIL_BIT_UTIL_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/676c32cc/cpp/src/arrow/util/buffer-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/buffer-test.cc b/cpp/src/arrow/util/buffer-test.cc
index cc4ec98..095b07b 100644
--- a/cpp/src/arrow/util/buffer-test.cc
+++ b/cpp/src/arrow/util/buffer-test.cc
@@ -31,6 +31,18 @@ namespace arrow {
 
 class TestBuffer : public ::testing::Test {};
 
+TEST_F(TestBuffer, IsMutableFlag) {
+  Buffer buf(nullptr, 0);
+
+  ASSERT_FALSE(buf.is_mutable());
+
+  MutableBuffer mbuf(nullptr, 0);
+  ASSERT_TRUE(mbuf.is_mutable());
+
+  PoolBuffer pbuf;
+  ASSERT_TRUE(pbuf.is_mutable());
+}
+
 TEST_F(TestBuffer, Resize) {
   PoolBuffer buf;
 
@@ -96,4 +108,33 @@ TEST_F(TestBuffer, EqualsWithSameBuffer) {
   pool->Free(rawBuffer, bufferSize);
 }
 
+TEST_F(TestBuffer, Copy) {
+  std::string data_str = "some data to copy";
+
+  auto data = reinterpret_cast<const uint8_t*>(data_str.c_str());
+
+  Buffer buf(data, data_str.size());
+
+  std::shared_ptr<Buffer> out;
+
+  ASSERT_OK(buf.Copy(5, 4, &out));
+
+  Buffer expected(data + 5, 4);
+  ASSERT_TRUE(out->Equals(expected));
+}
+
+TEST_F(TestBuffer, SliceBuffer) {
+  std::string data_str = "some data to slice";
+
+  auto data = reinterpret_cast<const uint8_t*>(data_str.c_str());
+
+  auto buf = std::make_shared<Buffer>(data, data_str.size());
+
+  std::shared_ptr<Buffer> out = SliceBuffer(buf, 5, 4);
+  Buffer expected(data + 5, 4);
+  ASSERT_TRUE(out->Equals(expected));
+
+  ASSERT_EQ(2, buf.use_count());
+}
+
 }  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/676c32cc/cpp/src/arrow/util/buffer.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/buffer.cc b/cpp/src/arrow/util/buffer.cc
index 6faa048..a230259 100644
--- a/cpp/src/arrow/util/buffer.cc
+++ b/cpp/src/arrow/util/buffer.cc
@@ -36,6 +36,32 @@ Buffer::Buffer(const std::shared_ptr<Buffer>& parent, int64_t
offset, int64_t si
 
 Buffer::~Buffer() {}
 
+Status Buffer::Copy(
+    int64_t start, int64_t nbytes, MemoryPool* pool, std::shared_ptr<Buffer>* out)
const {
+  // Sanity checks
+  DCHECK_LT(start, size_);
+  DCHECK_LE(nbytes, size_ - start);
+
+  auto new_buffer = std::make_shared<PoolBuffer>(pool);
+  RETURN_NOT_OK(new_buffer->Resize(nbytes));
+
+  std::memcpy(new_buffer->mutable_data(), data() + start, nbytes);
+
+  *out = new_buffer;
+  return Status::OK();
+}
+
+Status Buffer::Copy(int64_t start, int64_t nbytes, std::shared_ptr<Buffer>* out) const
{
+  return Copy(start, nbytes, default_memory_pool(), out);
+}
+
+std::shared_ptr<Buffer> SliceBuffer(
+    const std::shared_ptr<Buffer>& buffer, int64_t offset, int64_t length) {
+  DCHECK_LT(offset, buffer->size());
+  DCHECK_LE(length, buffer->size() - offset);
+  return std::make_shared<Buffer>(buffer, offset, length);
+}
+
 std::shared_ptr<Buffer> MutableBuffer::GetImmutableView() {
   return std::make_shared<Buffer>(this->get_shared_ptr(), 0, size());
 }
@@ -52,7 +78,7 @@ PoolBuffer::~PoolBuffer() {
 Status PoolBuffer::Reserve(int64_t new_capacity) {
   if (!mutable_data_ || new_capacity > capacity_) {
     uint8_t* new_data;
-    new_capacity = util::RoundUpToMultipleOf64(new_capacity);
+    new_capacity = BitUtil::RoundUpToMultipleOf64(new_capacity);
     if (mutable_data_) {
       RETURN_NOT_OK(pool_->Allocate(new_capacity, &new_data));
       memcpy(new_data, mutable_data_, size_);

http://git-wip-us.apache.org/repos/asf/arrow/blob/676c32cc/cpp/src/arrow/util/buffer.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/buffer.h b/cpp/src/arrow/util/buffer.h
index bc0df86..04ad6c2 100644
--- a/cpp/src/arrow/util/buffer.h
+++ b/cpp/src/arrow/util/buffer.h
@@ -43,7 +43,8 @@ class Status;
 // The following invariant is always true: Size < Capacity
 class ARROW_EXPORT Buffer : public std::enable_shared_from_this<Buffer> {
  public:
-  Buffer(const uint8_t* data, int64_t size) : data_(data), size_(size), capacity_(size) {}
+  Buffer(const uint8_t* data, int64_t size)
+      : is_mutable_(false), data_(data), size_(size), capacity_(size) {}
   virtual ~Buffer();
 
   // An offset into data that is owned by another buffer, but we want to be
@@ -57,6 +58,8 @@ class ARROW_EXPORT Buffer : public std::enable_shared_from_this<Buffer>
{
 
   std::shared_ptr<Buffer> get_shared_ptr() { return shared_from_this(); }
 
+  bool is_mutable() const { return is_mutable_; }
+
   // Return true if both buffers are the same size and contain the same bytes
   // up to the number of compared bytes
   bool Equals(const Buffer& other, int64_t nbytes) const {
@@ -71,18 +74,22 @@ class ARROW_EXPORT Buffer : public std::enable_shared_from_this<Buffer>
{
                (data_ == other.data_ || !memcmp(data_, other.data_, size_)));
   }
 
+  // Copy section of buffer into a new Buffer
+  Status Copy(int64_t start, int64_t nbytes, MemoryPool* pool,
+      std::shared_ptr<Buffer>* out) const;
+
+  // Default memory pool
+  Status Copy(int64_t start, int64_t nbytes, std::shared_ptr<Buffer>* out) const;
+
   int64_t capacity() const { return capacity_; }
   const uint8_t* data() const { return data_; }
 
   int64_t size() const { return size_; }
 
-  // Returns true if this Buffer is referencing memory (possibly) owned by some
-  // other buffer
-  bool is_shared() const { return static_cast<bool>(parent_); }
-
   const std::shared_ptr<Buffer> parent() const { return parent_; }
 
  protected:
+  bool is_mutable_;
   const uint8_t* data_;
   int64_t size_;
   int64_t capacity_;
@@ -94,10 +101,16 @@ class ARROW_EXPORT Buffer : public std::enable_shared_from_this<Buffer>
{
   DISALLOW_COPY_AND_ASSIGN(Buffer);
 };
 
+// Construct a view on passed buffer at the indicated offset and length. This
+// function cannot fail and does not error checking (except in debug builds)
+std::shared_ptr<Buffer> SliceBuffer(
+    const std::shared_ptr<Buffer>& buffer, int64_t offset, int64_t length);
+
 // A Buffer whose contents can be mutated. May or may not own its data.
 class ARROW_EXPORT MutableBuffer : public Buffer {
  public:
   MutableBuffer(uint8_t* data, int64_t size) : Buffer(data, size) {
+    is_mutable_ = true;
     mutable_data_ = data;
   }
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/676c32cc/python/src/pyarrow/adapters/pandas.cc
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/adapters/pandas.cc b/python/src/pyarrow/adapters/pandas.cc
index 5902b83..7e70be7 100644
--- a/python/src/pyarrow/adapters/pandas.cc
+++ b/python/src/pyarrow/adapters/pandas.cc
@@ -44,7 +44,7 @@ using arrow::Field;
 using arrow::DataType;
 using arrow::Status;
 
-namespace util = arrow::util;
+namespace BitUtil = arrow::BitUtil;
 
 // ----------------------------------------------------------------------
 // Serialization
@@ -148,7 +148,7 @@ class ArrowSerializer {
   }
 
   Status InitNullBitmap() {
-    int null_bytes = util::bytes_for_bits(length_);
+    int null_bytes = BitUtil::BytesForBits(length_);
 
     null_bitmap_ = std::make_shared<arrow::PoolBuffer>(pool_);
     RETURN_NOT_OK(null_bitmap_->Resize(null_bytes));
@@ -206,7 +206,7 @@ class ArrowSerializer {
 
     PyObject** objects = reinterpret_cast<PyObject**>(PyArray_DATA(arr_));
 
-    int nbytes = util::bytes_for_bits(length_);
+    int nbytes = BitUtil::BytesForBits(length_);
     auto data = std::make_shared<arrow::PoolBuffer>(pool_);
     RETURN_NOT_OK(data->Resize(nbytes));
     uint8_t* bitmap = data->mutable_data();
@@ -215,12 +215,12 @@ class ArrowSerializer {
     int64_t null_count = 0;
     for (int64_t i = 0; i < length_; ++i) {
       if (objects[i] == Py_True) {
-        util::set_bit(bitmap, i);
-        util::set_bit(null_bitmap_data_, i);
+        BitUtil::SetBit(bitmap, i);
+        BitUtil::SetBit(null_bitmap_data_, i);
       } else if (objects[i] != Py_False) {
         ++null_count;
       } else {
-        util::set_bit(null_bitmap_data_, i);
+        BitUtil::SetBit(null_bitmap_data_, i);
       }
     }
 
@@ -253,7 +253,7 @@ static int64_t MaskToBitmap(PyArrayObject* mask, int64_t length, uint8_t*
bitmap
     if (mask_values[i]) {
       ++null_count;
     } else {
-      util::set_bit(bitmap, i);
+      BitUtil::SetBit(bitmap, i);
     }
   }
   return null_count;
@@ -272,7 +272,7 @@ static int64_t ValuesToBitmap(const void* data, int64_t length, uint8_t*
bitmap)
     if (traits::isnull(values[i])) {
       ++null_count;
     } else {
-      util::set_bit(bitmap, i);
+      BitUtil::SetBit(bitmap, i);
     }
   }
 
@@ -402,7 +402,7 @@ inline Status ArrowSerializer<NPY_BOOL>::ConvertData() {
     return Status::Invalid("no support for strided data yet");
   }
 
-  int nbytes = util::bytes_for_bits(length_);
+  int nbytes = BitUtil::BytesForBits(length_);
   auto buffer = std::make_shared<arrow::PoolBuffer>(pool_);
   RETURN_NOT_OK(buffer->Resize(nbytes));
 
@@ -413,7 +413,7 @@ inline Status ArrowSerializer<NPY_BOOL>::ConvertData() {
   memset(bitmap, 0, nbytes);
   for (int i = 0; i < length_; ++i) {
     if (values[i] > 0) {
-      util::set_bit(bitmap, i);
+      BitUtil::SetBit(bitmap, i);
     }
   }
 


Mime
View raw message