arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject arrow git commit: ARROW-19: Add an externalized MemoryPool interface for use in builder classes
Date Thu, 03 Mar 2016 22:02:28 GMT
Repository: arrow
Updated Branches:
  refs/heads/master 1000d110c -> e41802085


ARROW-19: Add an externalized MemoryPool interface for use in builder classes

Memory management will be an ongoing concern, but this is a stride in the right direction.
Applications requiring custom memory management will be able to implement a subclass of MemoryPool;
we can evolve its API as user needs evolve.

Author: Wes McKinney <wesm@apache.org>

Closes #8 from wesm/ARROW-19 and squashes the following commits:

08d3895 [Wes McKinney] Some include cleanup
e319a36 [Wes McKinney] cpplint fixes
abca6eb [Wes McKinney] Add a MemoryPool abstract interface, change builder instances to request
memory from pool via Buffer subclass


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/e4180208
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/e4180208
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/e4180208

Branch: refs/heads/master
Commit: e418020852ad4fa148b07f21f5b4d47230fe4c5b
Parents: 1000d11
Author: Wes McKinney <wesm@apache.org>
Authored: Thu Mar 3 14:02:53 2016 -0800
Committer: Wes McKinney <wesm@apache.org>
Committed: Thu Mar 3 14:02:53 2016 -0800

----------------------------------------------------------------------
 cpp/CMakeLists.txt                     |  2 +-
 cpp/src/arrow/array-test.cc            | 10 +++-
 cpp/src/arrow/array.h                  |  1 -
 cpp/src/arrow/builder.cc               |  2 +-
 cpp/src/arrow/builder.h                | 22 ++++----
 cpp/src/arrow/types/construct.cc       | 13 ++---
 cpp/src/arrow/types/construct.h        |  4 +-
 cpp/src/arrow/types/list-test.cc       |  2 +-
 cpp/src/arrow/types/list.h             |  7 ++-
 cpp/src/arrow/types/primitive-test.cc  |  5 +-
 cpp/src/arrow/types/primitive.h        | 12 +++--
 cpp/src/arrow/types/string-test.cc     | 29 +++++------
 cpp/src/arrow/types/string.h           |  9 ++--
 cpp/src/arrow/types/struct.cc          |  1 +
 cpp/src/arrow/types/test-common.h      |  8 ++-
 cpp/src/arrow/types/union.cc           |  1 +
 cpp/src/arrow/util/CMakeLists.txt      |  3 ++
 cpp/src/arrow/util/bit-util.cc         |  2 +-
 cpp/src/arrow/util/bit-util.h          |  3 +-
 cpp/src/arrow/util/buffer-test.cc      |  6 +--
 cpp/src/arrow/util/buffer.cc           | 36 +++++++++----
 cpp/src/arrow/util/buffer.h            | 36 +++++++++----
 cpp/src/arrow/util/memory-pool-test.cc | 47 +++++++++++++++++
 cpp/src/arrow/util/memory-pool.cc      | 78 +++++++++++++++++++++++++++++
 cpp/src/arrow/util/memory-pool.h       | 41 +++++++++++++++
 25 files changed, 301 insertions(+), 79 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 5ddd9da..d2c840a 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -434,7 +434,7 @@ if (UNIX)
   add_custom_target(lint ${BUILD_SUPPORT_DIR}/cpplint.py
   --verbose=2
   --linelength=90
-  --filter=-whitespace/comments,-readability/todo,-build/header_guard
+  --filter=-whitespace/comments,-readability/todo,-build/header_guard,-build/c++11
     `find ${CMAKE_CURRENT_SOURCE_DIR}/src -name \\*.cc -or -name \\*.h`)
 endif (UNIX)
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/array-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array-test.cc b/cpp/src/arrow/array-test.cc
index 5ecf916..16afb9b 100644
--- a/cpp/src/arrow/array-test.cc
+++ b/cpp/src/arrow/array-test.cc
@@ -18,6 +18,7 @@
 #include <gtest/gtest.h>
 
 #include <cstdint>
+#include <cstdlib>
 #include <memory>
 #include <string>
 #include <vector>
@@ -28,6 +29,8 @@
 #include "arrow/types/integer.h"
 #include "arrow/types/primitive.h"
 #include "arrow/util/buffer.h"
+#include "arrow/util/memory-pool.h"
+#include "arrow/util/status.h"
 
 using std::string;
 using std::vector;
@@ -41,8 +44,10 @@ static TypePtr int32_nn = TypePtr(new Int32Type(false));
 class TestArray : public ::testing::Test {
  public:
   void SetUp() {
-    auto data = std::make_shared<OwnedMutableBuffer>();
-    auto nulls = std::make_shared<OwnedMutableBuffer>();
+    pool_ = GetDefaultMemoryPool();
+
+    auto data = std::make_shared<PoolBuffer>(pool_);
+    auto nulls = std::make_shared<PoolBuffer>(pool_);
 
     ASSERT_OK(data->Resize(400));
     ASSERT_OK(nulls->Resize(128));
@@ -51,6 +56,7 @@ class TestArray : public ::testing::Test {
   }
 
  protected:
+  MemoryPool* pool_;
   std::unique_ptr<Int32Array> arr_;
 };
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/array.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array.h b/cpp/src/arrow/array.h
index c95450d..0eaa28d 100644
--- a/cpp/src/arrow/array.h
+++ b/cpp/src/arrow/array.h
@@ -19,7 +19,6 @@
 #define ARROW_ARRAY_H
 
 #include <cstdint>
-#include <cstdlib>
 #include <memory>
 
 #include "arrow/type.h"

http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/builder.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc
index 1fd7471..cb85067 100644
--- a/cpp/src/arrow/builder.cc
+++ b/cpp/src/arrow/builder.cc
@@ -30,7 +30,7 @@ Status ArrayBuilder::Init(int64_t capacity) {
 
   if (nullable_) {
     int64_t to_alloc = util::ceil_byte(capacity) / 8;
-    nulls_ = std::make_shared<OwnedMutableBuffer>();
+    nulls_ = std::make_shared<PoolBuffer>(pool_);
     RETURN_NOT_OK(nulls_->Resize(to_alloc));
     null_bits_ = nulls_->mutable_data();
     memset(null_bits_, 0, to_alloc);

http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/builder.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/builder.h b/cpp/src/arrow/builder.h
index b43668a..456bb04 100644
--- a/cpp/src/arrow/builder.h
+++ b/cpp/src/arrow/builder.h
@@ -23,25 +23,27 @@
 #include <vector>
 
 #include "arrow/type.h"
-#include "arrow/util/buffer.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/status.h"
 
 namespace arrow {
 
 class Array;
+class MemoryPool;
+class PoolBuffer;
 
 static constexpr int64_t MIN_BUILDER_CAPACITY = 1 << 8;
 
 // Base class for all data array builders
 class ArrayBuilder {
  public:
-  explicit ArrayBuilder(const TypePtr& type)
-      : type_(type),
-        nullable_(type_->nullable),
-        nulls_(nullptr), null_bits_(nullptr),
-        length_(0),
-        capacity_(0) {}
+  explicit ArrayBuilder(MemoryPool* pool, const TypePtr& type) :
+      pool_(pool),
+      type_(type),
+      nullable_(type_->nullable),
+      nulls_(nullptr), null_bits_(nullptr),
+      length_(0),
+      capacity_(0) {}
 
   virtual ~ArrayBuilder() {}
 
@@ -71,18 +73,20 @@ class ArrayBuilder {
   // this function responsibly.
   Status Advance(int64_t elements);
 
-  const std::shared_ptr<OwnedMutableBuffer>& nulls() const { return nulls_;}
+  const std::shared_ptr<PoolBuffer>& nulls() const { return nulls_;}
 
   // Creates new array object to hold the contents of the builder and transfers
   // ownership of the data
   virtual Status ToArray(Array** out) = 0;
 
  protected:
+  MemoryPool* pool_;
+
   TypePtr type_;
   bool nullable_;
 
   // If the type is not nullable, then null_ is nullptr after initialization
-  std::shared_ptr<OwnedMutableBuffer> nulls_;
+  std::shared_ptr<PoolBuffer> nulls_;
   uint8_t* null_bits_;
 
   // Array length, so far. Also, the index of the next element to be added

http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/types/construct.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/construct.cc b/cpp/src/arrow/types/construct.cc
index 5176caf..e1bb990 100644
--- a/cpp/src/arrow/types/construct.cc
+++ b/cpp/src/arrow/types/construct.cc
@@ -32,12 +32,13 @@ class ArrayBuilder;
 // Initially looked at doing this with vtables, but shared pointers makes it
 // difficult
 
-#define BUILDER_CASE(ENUM, BuilderType)                         \
-    case TypeEnum::ENUM:                                        \
-      *out = static_cast<ArrayBuilder*>(new BuilderType(type)); \
+#define BUILDER_CASE(ENUM, BuilderType)                                 \
+    case TypeEnum::ENUM:                                                \
+      *out = static_cast<ArrayBuilder*>(new BuilderType(pool, type));   \
       return Status::OK();
 
-Status make_builder(const TypePtr& type, ArrayBuilder** out) {
+Status make_builder(MemoryPool* pool, const TypePtr& type,
+    ArrayBuilder** out) {
   switch (type->type) {
     BUILDER_CASE(UINT8, UInt8Builder);
     BUILDER_CASE(INT8, Int8Builder);
@@ -59,10 +60,10 @@ Status make_builder(const TypePtr& type, ArrayBuilder** out) {
       {
         ListType* list_type = static_cast<ListType*>(type.get());
         ArrayBuilder* value_builder;
-        RETURN_NOT_OK(make_builder(list_type->value_type, &value_builder));
+        RETURN_NOT_OK(make_builder(pool, list_type->value_type, &value_builder));
 
         // The ListBuilder takes ownership of the value_builder
-        ListBuilder* builder = new ListBuilder(type, value_builder);
+        ListBuilder* builder = new ListBuilder(pool, type, value_builder);
         *out = static_cast<ArrayBuilder*>(builder);
         return Status::OK();
       }

http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/types/construct.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/construct.h b/cpp/src/arrow/types/construct.h
index c0bfedd..b5ba436 100644
--- a/cpp/src/arrow/types/construct.h
+++ b/cpp/src/arrow/types/construct.h
@@ -23,9 +23,11 @@
 namespace arrow {
 
 class ArrayBuilder;
+class MemoryPool;
 class Status;
 
-Status make_builder(const TypePtr& type, ArrayBuilder** out);
+Status make_builder(MemoryPool* pool, const TypePtr& type,
+    ArrayBuilder** out);
 
 } // namespace arrow
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/types/list-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/list-test.cc b/cpp/src/arrow/types/list-test.cc
index 47673ff..abfc8a3 100644
--- a/cpp/src/arrow/types/list-test.cc
+++ b/cpp/src/arrow/types/list-test.cc
@@ -76,7 +76,7 @@ class TestListBuilder : public TestBuilder {
     type_ = TypePtr(new ListType(value_type_));
 
     ArrayBuilder* tmp;
-    ASSERT_OK(make_builder(type_, &tmp));
+    ASSERT_OK(make_builder(pool_, type_, &tmp));
     builder_.reset(static_cast<ListBuilder*>(tmp));
   }
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/types/list.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/list.h b/cpp/src/arrow/types/list.h
index 0f11162..4ca0f13 100644
--- a/cpp/src/arrow/types/list.h
+++ b/cpp/src/arrow/types/list.h
@@ -34,6 +34,8 @@
 
 namespace arrow {
 
+class MemoryPool;
+
 struct ListType : public DataType {
   // List can contain any other logical value type
   TypePtr value_type;
@@ -100,8 +102,9 @@ class ListArray : public Array {
 // have been appended to the child array)
 class ListBuilder : public Int32Builder {
  public:
-  ListBuilder(const TypePtr& type, ArrayBuilder* value_builder)
-      : Int32Builder(type) {
+  ListBuilder(MemoryPool* pool, const TypePtr& type,
+      ArrayBuilder* value_builder)
+      : Int32Builder(pool, type) {
     value_builder_.reset(value_builder);
   }
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/types/primitive-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/primitive-test.cc b/cpp/src/arrow/types/primitive-test.cc
index 1296860..3484294 100644
--- a/cpp/src/arrow/types/primitive-test.cc
+++ b/cpp/src/arrow/types/primitive-test.cc
@@ -18,7 +18,6 @@
 #include <gtest/gtest.h>
 
 #include <cstdint>
-#include <cstdlib>
 #include <memory>
 #include <string>
 #include <vector>
@@ -104,10 +103,10 @@ class TestPrimitiveBuilder : public TestBuilder {
     type_nn_ = Attrs::type(false);
 
     ArrayBuilder* tmp;
-    ASSERT_OK(make_builder(type_, &tmp));
+    ASSERT_OK(make_builder(pool_, type_, &tmp));
     builder_.reset(static_cast<BuilderType*>(tmp));
 
-    ASSERT_OK(make_builder(type_nn_, &tmp));
+    ASSERT_OK(make_builder(pool_, type_nn_, &tmp));
     builder_nn_.reset(static_cast<BuilderType*>(tmp));
   }
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/types/primitive.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/primitive.h b/cpp/src/arrow/types/primitive.h
index a419112..c5ae0f7 100644
--- a/cpp/src/arrow/types/primitive.h
+++ b/cpp/src/arrow/types/primitive.h
@@ -20,6 +20,7 @@
 
 #include <cstdint>
 #include <cstring>
+#include <memory>
 #include <string>
 
 #include "arrow/array.h"
@@ -31,6 +32,8 @@
 
 namespace arrow {
 
+class MemoryPool;
+
 template <typename Derived>
 struct PrimitiveType : public DataType {
   explicit PrimitiveType(bool nullable = true)
@@ -113,8 +116,9 @@ class PrimitiveBuilder : public ArrayBuilder {
  public:
   typedef typename Type::c_type T;
 
-  explicit PrimitiveBuilder(const TypePtr& type)
-      : ArrayBuilder(type), values_(nullptr) {
+  explicit PrimitiveBuilder(MemoryPool* pool, const TypePtr& type) :
+      ArrayBuilder(pool, type),
+      values_(nullptr) {
     elsize_ = sizeof(T);
   }
 
@@ -139,7 +143,7 @@ class PrimitiveBuilder : public ArrayBuilder {
   Status Init(int64_t capacity) {
     RETURN_NOT_OK(ArrayBuilder::Init(capacity));
 
-    values_ = std::make_shared<OwnedMutableBuffer>();
+    values_ = std::make_shared<PoolBuffer>(pool_);
     return values_->Resize(capacity * elsize_);
   }
 
@@ -231,7 +235,7 @@ class PrimitiveBuilder : public ArrayBuilder {
   }
 
  protected:
-  std::shared_ptr<OwnedMutableBuffer> values_;
+  std::shared_ptr<PoolBuffer> values_;
   int64_t elsize_;
 };
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/types/string-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/string-test.cc b/cpp/src/arrow/types/string-test.cc
index 6dba3fd..a2d87ea 100644
--- a/cpp/src/arrow/types/string-test.cc
+++ b/cpp/src/arrow/types/string-test.cc
@@ -31,12 +31,9 @@
 #include "arrow/types/test-common.h"
 #include "arrow/util/status.h"
 
-using std::string;
-using std::unique_ptr;
-using std::vector;
-
 namespace arrow {
 
+class Buffer;
 
 TEST(TypesTest, TestCharType) {
   CharType t1(5);
@@ -45,7 +42,7 @@ TEST(TypesTest, TestCharType) {
   ASSERT_TRUE(t1.nullable);
   ASSERT_EQ(t1.size, 5);
 
-  ASSERT_EQ(t1.ToString(), string("char(5)"));
+  ASSERT_EQ(t1.ToString(), std::string("char(5)"));
 
   // Test copy constructor
   CharType t2 = t1;
@@ -63,7 +60,7 @@ TEST(TypesTest, TestVarcharType) {
   ASSERT_EQ(t1.size, 5);
   ASSERT_EQ(t1.physical_type.size, 6);
 
-  ASSERT_EQ(t1.ToString(), string("varchar(5)"));
+  ASSERT_EQ(t1.ToString(), std::string("varchar(5)"));
 
   // Test copy constructor
   VarcharType t2 = t1;
@@ -78,7 +75,7 @@ TEST(TypesTest, TestStringType) {
   StringType str_nn(false);
 
   ASSERT_EQ(str.type, TypeEnum::STRING);
-  ASSERT_EQ(str.name(), string("string"));
+  ASSERT_EQ(str.name(), std::string("string"));
   ASSERT_TRUE(str.nullable);
   ASSERT_FALSE(str_nn.nullable);
 }
@@ -111,11 +108,11 @@ class TestStringContainer : public ::testing::Test  {
   }
 
  protected:
-  vector<int32_t> offsets_;
-  vector<char> chars_;
-  vector<uint8_t> nulls_;
+  std::vector<int32_t> offsets_;
+  std::vector<char> chars_;
+  std::vector<uint8_t> nulls_;
 
-  vector<string> expected_;
+  std::vector<std::string> expected_;
 
   std::shared_ptr<Buffer> value_buf_;
   std::shared_ptr<Buffer> offsets_buf_;
@@ -175,7 +172,7 @@ class TestStringBuilder : public TestBuilder {
     type_ = TypePtr(new StringType());
 
     ArrayBuilder* tmp;
-    ASSERT_OK(make_builder(type_, &tmp));
+    ASSERT_OK(make_builder(pool_, type_, &tmp));
     builder_.reset(static_cast<StringBuilder*>(tmp));
   }
 
@@ -188,8 +185,8 @@ class TestStringBuilder : public TestBuilder {
  protected:
   TypePtr type_;
 
-  unique_ptr<StringBuilder> builder_;
-  unique_ptr<StringArray> result_;
+  std::unique_ptr<StringBuilder> builder_;
+  std::unique_ptr<StringArray> result_;
 };
 
 TEST_F(TestStringBuilder, TestAttrs) {
@@ -197,8 +194,8 @@ TEST_F(TestStringBuilder, TestAttrs) {
 }
 
 TEST_F(TestStringBuilder, TestScalarAppend) {
-  vector<string> strings = {"a", "bb", "", "", "ccc"};
-  vector<uint8_t> is_null = {0, 0, 0, 1, 0};
+  std::vector<std::string> strings = {"a", "bb", "", "", "ccc"};
+  std::vector<uint8_t> is_null = {0, 0, 0, 1, 0};
 
   int N = strings.size();
   int reps = 1000;

http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/types/string.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/string.h b/cpp/src/arrow/types/string.h
index 30d6e24..d0690d9 100644
--- a/cpp/src/arrow/types/string.h
+++ b/cpp/src/arrow/types/string.h
@@ -27,12 +27,13 @@
 #include "arrow/type.h"
 #include "arrow/types/integer.h"
 #include "arrow/types/list.h"
-#include "arrow/util/buffer.h"
 #include "arrow/util/status.h"
 
 namespace arrow {
 
 class ArrayBuilder;
+class Buffer;
+class MemoryPool;
 
 struct CharType : public DataType {
   int size;
@@ -148,8 +149,9 @@ class StringArray : public ListArray {
 
 class StringBuilder : public ListBuilder {
  public:
-  explicit StringBuilder(const TypePtr& type) :
-      ListBuilder(type, static_cast<ArrayBuilder*>(new UInt8Builder(value_type_)))
{
+  explicit StringBuilder(MemoryPool* pool, const TypePtr& type) :
+      ListBuilder(pool, type,
+          static_cast<ArrayBuilder*>(new UInt8Builder(pool, value_type_))) {
     byte_builder_ = static_cast<UInt8Builder*>(value_builder_.get());
   }
 
@@ -171,6 +173,7 @@ class StringBuilder : public ListBuilder {
   }
 
  protected:
+  std::shared_ptr<ListBuilder> list_builder_;
   UInt8Builder* byte_builder_;
 
   static TypePtr value_type_;

http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/types/struct.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/struct.cc b/cpp/src/arrow/types/struct.cc
index b7be5d8..a245656 100644
--- a/cpp/src/arrow/types/struct.cc
+++ b/cpp/src/arrow/types/struct.cc
@@ -17,6 +17,7 @@
 
 #include "arrow/types/struct.h"
 
+#include <cstdlib>
 #include <memory>
 #include <sstream>
 #include <string>

http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/types/test-common.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/test-common.h b/cpp/src/arrow/types/test-common.h
index 267e48a..3ecb0de 100644
--- a/cpp/src/arrow/types/test-common.h
+++ b/cpp/src/arrow/types/test-common.h
@@ -25,6 +25,7 @@
 
 #include "arrow/test-util.h"
 #include "arrow/type.h"
+#include "arrow/util/memory-pool.h"
 
 using std::unique_ptr;
 
@@ -33,12 +34,15 @@ namespace arrow {
 class TestBuilder : public ::testing::Test {
  public:
   void SetUp() {
+    pool_ = GetDefaultMemoryPool();
     type_ = TypePtr(new UInt8Type());
     type_nn_ = TypePtr(new UInt8Type(false));
-    builder_.reset(new UInt8Builder(type_));
-    builder_nn_.reset(new UInt8Builder(type_nn_));
+    builder_.reset(new UInt8Builder(pool_, type_));
+    builder_nn_.reset(new UInt8Builder(pool_, type_nn_));
   }
  protected:
+  MemoryPool* pool_;
+
   TypePtr type_;
   TypePtr type_nn_;
   unique_ptr<ArrayBuilder> builder_;

http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/types/union.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/union.cc b/cpp/src/arrow/types/union.cc
index 54f41a7..db3f817 100644
--- a/cpp/src/arrow/types/union.cc
+++ b/cpp/src/arrow/types/union.cc
@@ -17,6 +17,7 @@
 
 #include "arrow/types/union.h"
 
+#include <cstdlib>
 #include <sstream>
 #include <string>
 #include <vector>

http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/util/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt
index ff8db6a..c53f307 100644
--- a/cpp/src/arrow/util/CMakeLists.txt
+++ b/cpp/src/arrow/util/CMakeLists.txt
@@ -22,6 +22,7 @@
 set(UTIL_SRCS
   bit-util.cc
   buffer.cc
+  memory-pool.cc
   status.cc
 )
 
@@ -39,6 +40,7 @@ install(FILES
   bit-util.h
   buffer.h
   macros.h
+  memory-pool.h
   status.h
   DESTINATION include/arrow/util)
 
@@ -79,3 +81,4 @@ endif()
 
 ADD_ARROW_TEST(bit-util-test)
 ADD_ARROW_TEST(buffer-test)
+ADD_ARROW_TEST(memory-pool-test)

http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/util/bit-util.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/bit-util.cc b/cpp/src/arrow/util/bit-util.cc
index d2ddd65..dbac0a4 100644
--- a/cpp/src/arrow/util/bit-util.cc
+++ b/cpp/src/arrow/util/bit-util.cc
@@ -33,7 +33,7 @@ Status util::bytes_to_bits(uint8_t* bytes, int length,
     std::shared_ptr<Buffer>* out) {
   int bit_length = ceil_byte(length) / 8;
 
-  auto buffer = std::make_shared<OwnedMutableBuffer>();
+  auto buffer = std::make_shared<PoolBuffer>();
   RETURN_NOT_OK(buffer->Resize(bit_length));
   memset(buffer->mutable_data(), 0, bit_length);
   bytes_to_bits(bytes, length, buffer->mutable_data());

http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/util/bit-util.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/bit-util.h b/cpp/src/arrow/util/bit-util.h
index 61dffa3..9ae6127 100644
--- a/cpp/src/arrow/util/bit-util.h
+++ b/cpp/src/arrow/util/bit-util.h
@@ -22,10 +22,9 @@
 #include <cstdlib>
 #include <memory>
 
-#include "arrow/util/buffer.h"
-
 namespace arrow {
 
+class Buffer;
 class Status;
 
 namespace util {

http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/util/buffer-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/buffer-test.cc b/cpp/src/arrow/util/buffer-test.cc
index edfd08e..9f1fd91 100644
--- a/cpp/src/arrow/util/buffer-test.cc
+++ b/cpp/src/arrow/util/buffer-test.cc
@@ -16,10 +16,8 @@
 // under the License.
 
 #include <gtest/gtest.h>
-#include <cstdlib>
 #include <cstdint>
 #include <limits>
-#include <memory>
 #include <string>
 
 #include "arrow/test-util.h"
@@ -34,7 +32,7 @@ class TestBuffer : public ::testing::Test {
 };
 
 TEST_F(TestBuffer, Resize) {
-  OwnedMutableBuffer buf;
+  PoolBuffer buf;
 
   ASSERT_EQ(0, buf.size());
   ASSERT_OK(buf.Resize(100));
@@ -49,7 +47,7 @@ TEST_F(TestBuffer, Resize) {
 
 TEST_F(TestBuffer, ResizeOOM) {
   // realloc fails, even though there may be no explicit limit
-  OwnedMutableBuffer buf;
+  PoolBuffer buf;
   ASSERT_OK(buf.Resize(100));
   int64_t to_alloc = std::numeric_limits<int64_t>::max();
   ASSERT_RAISES(OutOfMemory, buf.Resize(to_alloc));

http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/util/buffer.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/buffer.cc b/cpp/src/arrow/util/buffer.cc
index 2fb34d5..3f3807d 100644
--- a/cpp/src/arrow/util/buffer.cc
+++ b/cpp/src/arrow/util/buffer.cc
@@ -19,6 +19,7 @@
 
 #include <cstdint>
 
+#include "arrow/util/memory-pool.h"
 #include "arrow/util/status.h"
 
 namespace arrow {
@@ -34,19 +35,34 @@ std::shared_ptr<Buffer> MutableBuffer::GetImmutableView() {
   return std::make_shared<Buffer>(this->get_shared_ptr(), 0, size());
 }
 
-OwnedMutableBuffer::OwnedMutableBuffer() :
-    MutableBuffer(nullptr, 0) {}
+PoolBuffer::PoolBuffer(MemoryPool* pool) :
+    ResizableBuffer(nullptr, 0) {
+  if (pool == nullptr) {
+    pool = GetDefaultMemoryPool();
+  }
+  pool_ = pool;
+}
 
-Status OwnedMutableBuffer::Resize(int64_t new_size) {
-  size_ = new_size;
-  try {
-    buffer_owner_.resize(new_size);
-  } catch (const std::bad_alloc& e) {
-    return Status::OutOfMemory("resize failed");
+Status PoolBuffer::Reserve(int64_t new_capacity) {
+  if (!mutable_data_ || new_capacity > capacity_) {
+    uint8_t* new_data;
+    if (mutable_data_) {
+      RETURN_NOT_OK(pool_->Allocate(new_capacity, &new_data));
+      memcpy(new_data, mutable_data_, size_);
+      pool_->Free(mutable_data_, capacity_);
+    } else {
+      RETURN_NOT_OK(pool_->Allocate(new_capacity, &new_data));
+    }
+    mutable_data_ = new_data;
+    data_ = mutable_data_;
+    capacity_ = new_capacity;
   }
-  data_ = buffer_owner_.data();
-  mutable_data_ = buffer_owner_.data();
+  return Status::OK();
+}
 
+Status PoolBuffer::Resize(int64_t new_size) {
+  RETURN_NOT_OK(Reserve(new_size));
+  size_ = new_size;
   return Status::OK();
 }
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/util/buffer.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/buffer.h b/cpp/src/arrow/util/buffer.h
index 3e41839..8704723 100644
--- a/cpp/src/arrow/util/buffer.h
+++ b/cpp/src/arrow/util/buffer.h
@@ -19,15 +19,14 @@
 #define ARROW_UTIL_BUFFER_H
 
 #include <cstdint>
-#include <cstdlib>
 #include <cstring>
 #include <memory>
-#include <vector>
 
 #include "arrow/util/macros.h"
 
 namespace arrow {
 
+class MemoryPool;
 class Status;
 
 // ----------------------------------------------------------------------
@@ -115,17 +114,34 @@ class MutableBuffer : public Buffer {
   uint8_t* mutable_data_;
 };
 
-// A MutableBuffer whose memory is owned by the class instance. For example,
-// for reading data out of files that you want to deallocate when this class is
-// garbage-collected
-class OwnedMutableBuffer : public MutableBuffer {
+class ResizableBuffer : public MutableBuffer {
  public:
-  OwnedMutableBuffer();
-  Status Resize(int64_t new_size);
+  // Change buffer reported size to indicated size, allocating memory if
+  // necessary
+  virtual Status Resize(int64_t new_size) = 0;
+
+  // Ensure that buffer has enough memory allocated to fit the indicated
+  // capacity. Does not change buffer's reported size
+  virtual Status Reserve(int64_t new_capacity) = 0;
+
+ protected:
+  ResizableBuffer(uint8_t* data, int64_t size) :
+      MutableBuffer(data, size),
+      capacity_(size) {}
+
+  int64_t capacity_;
+};
+
+// A Buffer whose lifetime is tied to a particular MemoryPool
+class PoolBuffer : public ResizableBuffer {
+ public:
+  explicit PoolBuffer(MemoryPool* pool = nullptr);
+
+  virtual Status Resize(int64_t new_size);
+  virtual Status Reserve(int64_t new_capacity);
 
  private:
-  // TODO: aligned allocations
-  std::vector<uint8_t> buffer_owner_;
+  MemoryPool* pool_;
 };
 
 } // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/util/memory-pool-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/memory-pool-test.cc b/cpp/src/arrow/util/memory-pool-test.cc
new file mode 100644
index 0000000..954b5f9
--- /dev/null
+++ b/cpp/src/arrow/util/memory-pool-test.cc
@@ -0,0 +1,47 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+#include <cstdint>
+#include <limits>
+
+#include "arrow/test-util.h"
+#include "arrow/util/memory-pool.h"
+#include "arrow/util/status.h"
+
+namespace arrow {
+
+TEST(DefaultMemoryPool, MemoryTracking) {
+  MemoryPool* pool = GetDefaultMemoryPool();
+
+  uint8_t* data;
+  ASSERT_OK(pool->Allocate(100, &data));
+  ASSERT_EQ(100, pool->bytes_allocated());
+
+  pool->Free(data, 100);
+  ASSERT_EQ(0, pool->bytes_allocated());
+}
+
+TEST(DefaultMemoryPool, OOM) {
+  MemoryPool* pool = GetDefaultMemoryPool();
+
+  uint8_t* data;
+  int64_t to_alloc = std::numeric_limits<int64_t>::max();
+  ASSERT_RAISES(OutOfMemory, pool->Allocate(to_alloc, &data));
+}
+
+} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/util/memory-pool.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/memory-pool.cc b/cpp/src/arrow/util/memory-pool.cc
new file mode 100644
index 0000000..5820346
--- /dev/null
+++ b/cpp/src/arrow/util/memory-pool.cc
@@ -0,0 +1,78 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/memory-pool.h"
+
+#include <cstdlib>
+#include <sstream>
+#include <mutex>
+
+#include "arrow/util/status.h"
+
+namespace arrow {
+
+MemoryPool::~MemoryPool() {}
+
+class InternalMemoryPool : public MemoryPool {
+ public:
+  InternalMemoryPool() : bytes_allocated_(0) {}
+  virtual ~InternalMemoryPool();
+
+  Status Allocate(int64_t size, uint8_t** out) override;
+
+  void Free(uint8_t* buffer, int64_t size) override;
+
+  int64_t bytes_allocated() const override;
+
+ private:
+  mutable std::mutex pool_lock_;
+  int64_t bytes_allocated_;
+};
+
+Status InternalMemoryPool::Allocate(int64_t size, uint8_t** out) {
+  std::lock_guard<std::mutex> guard(pool_lock_);
+  *out = static_cast<uint8_t*>(std::malloc(size));
+  if (*out == nullptr) {
+    std::stringstream ss;
+    ss << "malloc of size " << size << " failed";
+    return Status::OutOfMemory(ss.str());
+  }
+
+  bytes_allocated_ += size;
+
+  return Status::OK();
+}
+
+int64_t InternalMemoryPool::bytes_allocated() const {
+  std::lock_guard<std::mutex> guard(pool_lock_);
+  return bytes_allocated_;
+}
+
+void InternalMemoryPool::Free(uint8_t* buffer, int64_t size) {
+  std::lock_guard<std::mutex> guard(pool_lock_);
+  std::free(buffer);
+  bytes_allocated_ -= size;
+}
+
+InternalMemoryPool::~InternalMemoryPool() {}
+
+MemoryPool* GetDefaultMemoryPool() {
+  static InternalMemoryPool default_memory_pool;
+  return &default_memory_pool;
+}
+
+} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/util/memory-pool.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/memory-pool.h b/cpp/src/arrow/util/memory-pool.h
new file mode 100644
index 0000000..a7cb10d
--- /dev/null
+++ b/cpp/src/arrow/util/memory-pool.h
@@ -0,0 +1,41 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_UTIL_MEMORY_POOL_H
+#define ARROW_UTIL_MEMORY_POOL_H
+
+#include <cstdint>
+
+namespace arrow {
+
+class Status;
+
+class MemoryPool {
+ public:
+  virtual ~MemoryPool();
+
+  virtual Status Allocate(int64_t size, uint8_t** out) = 0;
+  virtual void Free(uint8_t* buffer, int64_t size) = 0;
+
+  virtual int64_t bytes_allocated() const = 0;
+};
+
+MemoryPool* GetDefaultMemoryPool();
+
+} // namespace arrow
+
+#endif // ARROW_UTIL_MEMORY_POOL_H


Mime
View raw message