arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject arrow git commit: ARROW-898: [C++/Python] Use shared_ptr to avoid copying KeyValueMetadata, add to Field type also
Date Thu, 27 Apr 2017 13:28:59 GMT
Repository: arrow
Updated Branches:
  refs/heads/master 3fdeac74c -> e876abbdf


ARROW-898: [C++/Python] Use shared_ptr to avoid copying KeyValueMetadata, add to Field type also

This also adds support for adding and removing schema-level metadata to the Python Schema wrapper object. Need to do the same for Field but putting this up for @cpcloud to review since he's working on using this in parquet-cpp

Author: Wes McKinney <wes.mckinney@twosigma.com>

Closes #605 from wesm/ARROW-898 and squashes the following commits:

03873f7 [Wes McKinney] RemoveMetadata not return Status
c621c2c [Wes McKinney] Add metadata methods to Field, some code cleaning
581b9fa [Wes McKinney] Add unit tests for passing metadata to Field constructor
51fae29 [Wes McKinney] Add metadata to Field
2ce4003 [Wes McKinney] Test sharing metadata
48aa3ca [Wes McKinney] Use shared_ptr<const T> for KeyValueMetadata so metadata can be shared / not copied


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/e876abbd
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/e876abbd
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/e876abbd

Branch: refs/heads/master
Commit: e876abbdf4f7bac53ae5d56f4680259f021ea8d9
Parents: 3fdeac7
Author: Wes McKinney <wes.mckinney@twosigma.com>
Authored: Thu Apr 27 09:28:54 2017 -0400
Committer: Wes McKinney <wes.mckinney@twosigma.com>
Committed: Thu Apr 27 09:28:54 2017 -0400

----------------------------------------------------------------------
 cpp/src/arrow/builder.cc                      |  11 +-
 cpp/src/arrow/io/io-memory-benchmark.cc       |   6 +-
 cpp/src/arrow/io/memory.cc                    |   4 +-
 cpp/src/arrow/ipc/metadata.cc                 |  38 ++--
 cpp/src/arrow/type-test.cc                    |  61 ++++--
 cpp/src/arrow/type.cc                         |  60 ++++--
 cpp/src/arrow/type.h                          |  32 ++-
 cpp/src/arrow/util/key-value-metadata-test.cc |   9 +
 cpp/src/arrow/util/key_value_metadata.cc      |   4 +
 cpp/src/arrow/util/key_value_metadata.h       |   6 +
 cpp/src/arrow/util/memory.h                   |   8 +-
 python/pyarrow/_array.pxd                     |   2 -
 python/pyarrow/_array.pyx                     | 220 +++++++++++++++++----
 python/pyarrow/_table.pyx                     |   7 +-
 python/pyarrow/includes/libarrow.pxd          |  31 ++-
 python/pyarrow/tests/pandas_examples.py       |   4 +-
 python/pyarrow/tests/test_convert_pandas.py   |  64 +++---
 python/pyarrow/tests/test_schema.py           |  48 ++++-
 18 files changed, 466 insertions(+), 149 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/e876abbd/cpp/src/arrow/builder.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc
index 4ecb8d3..ab43c2a 100644
--- a/cpp/src/arrow/builder.cc
+++ b/cpp/src/arrow/builder.cc
@@ -406,17 +406,16 @@ Status DecimalBuilder::Finish(std::shared_ptr<Array>* out) {
 
 ListBuilder::ListBuilder(MemoryPool* pool, std::shared_ptr<ArrayBuilder> value_builder,
     const std::shared_ptr<DataType>& type)
-    : ArrayBuilder(pool,
-          type ? type : std::static_pointer_cast<DataType>(
-                            std::make_shared<ListType>(value_builder->type()))),
+    : ArrayBuilder(
+          pool, type ? type : std::static_pointer_cast<DataType>(
+                                  std::make_shared<ListType>(value_builder->type()))),
       offset_builder_(pool),
       value_builder_(value_builder) {}
 
 ListBuilder::ListBuilder(MemoryPool* pool, std::shared_ptr<Array> values,
     const std::shared_ptr<DataType>& type)
-    : ArrayBuilder(pool,
-          type ? type : std::static_pointer_cast<DataType>(
-                            std::make_shared<ListType>(values->type()))),
+    : ArrayBuilder(pool, type ? type : std::static_pointer_cast<DataType>(
+                                           std::make_shared<ListType>(values->type()))),
       offset_builder_(pool),
       values_(values) {}
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/e876abbd/cpp/src/arrow/io/io-memory-benchmark.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/io/io-memory-benchmark.cc b/cpp/src/arrow/io/io-memory-benchmark.cc
index 59b511a..6aa9577 100644
--- a/cpp/src/arrow/io/io-memory-benchmark.cc
+++ b/cpp/src/arrow/io/io-memory-benchmark.cc
@@ -26,7 +26,7 @@
 namespace arrow {
 
 static void BM_SerialMemcopy(benchmark::State& state) {  // NOLINT non-const reference
-  constexpr int64_t kTotalSize = 100 * 1024 * 1024; // 100MB
+  constexpr int64_t kTotalSize = 100 * 1024 * 1024;      // 100MB
 
   auto buffer1 = std::make_shared<PoolBuffer>(default_memory_pool());
   buffer1->Resize(kTotalSize);
@@ -43,7 +43,7 @@ static void BM_SerialMemcopy(benchmark::State& state) {  // NOLINT non-const ref
 }
 
 static void BM_ParallelMemcopy(benchmark::State& state) {  // NOLINT non-const reference
-  constexpr int64_t kTotalSize = 100 * 1024 * 1024; // 100MB
+  constexpr int64_t kTotalSize = 100 * 1024 * 1024;        // 100MB
 
   auto buffer1 = std::make_shared<PoolBuffer>(default_memory_pool());
   buffer1->Resize(kTotalSize);
@@ -72,4 +72,4 @@ BENCHMARK(BM_ParallelMemcopy)
     ->MinTime(1.0)
     ->UseRealTime();
 
-} // namespace arrow
+}  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/e876abbd/cpp/src/arrow/io/memory.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/io/memory.cc b/cpp/src/arrow/io/memory.cc
index 95c6206..faf02d2 100644
--- a/cpp/src/arrow/io/memory.cc
+++ b/cpp/src/arrow/io/memory.cc
@@ -140,8 +140,8 @@ Status FixedSizeBufferWriter::Tell(int64_t* position) {
 
 Status FixedSizeBufferWriter::Write(const uint8_t* data, int64_t nbytes) {
   if (nbytes > memcopy_threshold_ && memcopy_num_threads_ > 1) {
-    parallel_memcopy(mutable_data_ + position_, data, nbytes,
-                     memcopy_blocksize_, memcopy_num_threads_);
+    parallel_memcopy(mutable_data_ + position_, data, nbytes, memcopy_blocksize_,
+        memcopy_num_threads_);
   } else {
     memcpy(mutable_data_ + position_, data, nbytes);
   }

http://git-wip-us.apache.org/repos/asf/arrow/blob/e876abbd/cpp/src/arrow/ipc/metadata.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/metadata.cc b/cpp/src/arrow/ipc/metadata.cc
index c0b518a..706ab2e 100644
--- a/cpp/src/arrow/ipc/metadata.cc
+++ b/cpp/src/arrow/ipc/metadata.cc
@@ -593,20 +593,27 @@ static Status SchemaToFlatbuffer(FBB& fbb, const Schema& schema,
     field_offsets.push_back(offset);
   }
 
+  auto fb_offsets = fbb.CreateVector(field_offsets);
+
   /// Custom metadata
-  const auto& custom_metadata_ = schema.custom_metadata();
-  std::vector<KeyValueOffset> key_value_offsets;
-  size_t metadata_size = custom_metadata_.size();
-  key_value_offsets.reserve(metadata_size);
-  for (size_t i = 0; i < metadata_size; ++i) {
-    const auto& key = custom_metadata_.key(i);
-    const auto& value = custom_metadata_.value(i);
-    key_value_offsets.push_back(
-        flatbuf::CreateKeyValue(fbb, fbb.CreateString(key), fbb.CreateString(value)));
+  const KeyValueMetadata* metadata = schema.metadata().get();
+
+  if (metadata != nullptr) {
+    std::vector<KeyValueOffset> key_value_offsets;
+    size_t metadata_size = metadata->size();
+    key_value_offsets.reserve(metadata_size);
+    for (size_t i = 0; i < metadata_size; ++i) {
+      const auto& key = metadata->key(i);
+      const auto& value = metadata->value(i);
+      key_value_offsets.push_back(
+          flatbuf::CreateKeyValue(fbb, fbb.CreateString(key), fbb.CreateString(value)));
+    }
+    *out = flatbuf::CreateSchema(
+        fbb, endianness(), fb_offsets, fbb.CreateVector(key_value_offsets));
+  } else {
+    *out = flatbuf::CreateSchema(fbb, endianness(), fb_offsets);
   }
 
-  *out = flatbuf::CreateSchema(fbb, endianness(), fbb.CreateVector(field_offsets),
-      fbb.CreateVector(key_value_offsets));
   return Status::OK();
 }
 
@@ -955,17 +962,16 @@ Status GetSchema(const void* opaque_schema, const DictionaryMemo& dictionary_mem
     RETURN_NOT_OK(FieldFromFlatbuffer(field, dictionary_memo, &fields[i]));
   }
 
-  KeyValueMetadata custom_metadata;
+  auto metadata = std::make_shared<KeyValueMetadata>();
   auto fb_metadata = schema->custom_metadata();
   if (fb_metadata != nullptr) {
-    custom_metadata.reserve(fb_metadata->size());
-
+    metadata->reserve(fb_metadata->size());
     for (const auto& pair : *fb_metadata) {
-      custom_metadata.Append(pair->key()->str(), pair->value()->str());
+      metadata->Append(pair->key()->str(), pair->value()->str());
     }
   }
 
-  *out = std::make_shared<Schema>(fields, custom_metadata);
+  *out = std::make_shared<Schema>(fields, metadata);
   return Status::OK();
 }
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/e876abbd/cpp/src/arrow/type-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type-test.cc b/cpp/src/arrow/type-test.cc
index 8e2dfd5..e73adec 100644
--- a/cpp/src/arrow/type-test.cc
+++ b/cpp/src/arrow/type-test.cc
@@ -23,6 +23,7 @@
 
 #include "gtest/gtest.h"
 
+#include "arrow/test-util.h"
 #include "arrow/type.h"
 
 using std::shared_ptr;
@@ -50,6 +51,40 @@ TEST(TestField, Equals) {
   ASSERT_FALSE(f0.Equals(f0_nn));
 }
 
+TEST(TestField, TestMetadataConstruction) {
+  auto metadata = std::shared_ptr<KeyValueMetadata>(
+      new KeyValueMetadata({"foo", "bar"}, {"bizz", "buzz"}));
+  auto metadata2 = metadata->Copy();
+  auto f0 = field("f0", int32(), true, metadata);
+  auto f1 = field("f0", int32(), true, metadata2);
+  ASSERT_TRUE(metadata->Equals(*f0->metadata()));
+  ASSERT_TRUE(f0->Equals(*f1));
+}
+
+TEST(TestField, TestAddMetadata) {
+  auto metadata = std::shared_ptr<KeyValueMetadata>(
+      new KeyValueMetadata({"foo", "bar"}, {"bizz", "buzz"}));
+  auto f0 = field("f0", int32());
+  auto f1 = field("f0", int32(), true, metadata);
+  std::shared_ptr<Field> f2;
+  ASSERT_OK(f0->AddMetadata(metadata, &f2));
+
+  ASSERT_FALSE(f2->Equals(*f0));
+  ASSERT_TRUE(f2->Equals(*f1));
+
+  // Not copied
+  ASSERT_TRUE(metadata.get() == f1->metadata().get());
+}
+
+TEST(TestField, TestRemoveMetadata) {
+  auto metadata = std::shared_ptr<KeyValueMetadata>(
+      new KeyValueMetadata({"foo", "bar"}, {"bizz", "buzz"}));
+  auto f0 = field("f0", int32());
+  auto f1 = field("f0", int32(), true, metadata);
+  std::shared_ptr<Field> f2 = f1->RemoveMetadata();
+  ASSERT_TRUE(f2->metadata() == nullptr);
+}
+
 class TestSchema : public ::testing::Test {
  public:
   void SetUp() {}
@@ -117,38 +152,42 @@ TEST_F(TestSchema, GetFieldByName) {
   ASSERT_TRUE(result == nullptr);
 }
 
-TEST_F(TestSchema, TestCustomMetadataConstruction) {
+TEST_F(TestSchema, TestMetadataConstruction) {
   auto f0 = field("f0", int32());
   auto f1 = field("f1", uint8(), false);
   auto f2 = field("f2", utf8());
   vector<shared_ptr<Field>> fields = {f0, f1, f2};
-  KeyValueMetadata metadata({"foo", "bar"}, {"bizz", "buzz"});
+  auto metadata = std::shared_ptr<KeyValueMetadata>(
+      new KeyValueMetadata({"foo", "bar"}, {"bizz", "buzz"}));
   auto schema = std::make_shared<Schema>(fields, metadata);
-  ASSERT_TRUE(metadata.Equals(schema->custom_metadata()));
+  ASSERT_TRUE(metadata->Equals(*schema->metadata()));
 }
 
-TEST_F(TestSchema, TestAddCustomMetadata) {
+TEST_F(TestSchema, TestAddMetadata) {
   auto f0 = field("f0", int32());
   auto f1 = field("f1", uint8(), false);
   auto f2 = field("f2", utf8());
   vector<shared_ptr<Field>> fields = {f0, f1, f2};
-  KeyValueMetadata metadata({"foo", "bar"}, {"bizz", "buzz"});
+  auto metadata = std::shared_ptr<KeyValueMetadata>(
+      new KeyValueMetadata({"foo", "bar"}, {"bizz", "buzz"}));
   auto schema = std::make_shared<Schema>(fields);
   std::shared_ptr<Schema> new_schema;
-  schema->AddCustomMetadata(metadata, &new_schema);
-  ASSERT_TRUE(metadata.Equals(new_schema->custom_metadata()));
+  schema->AddMetadata(metadata, &new_schema);
+  ASSERT_TRUE(metadata->Equals(*new_schema->metadata()));
+
+  // Not copied
+  ASSERT_TRUE(metadata.get() == new_schema->metadata().get());
 }
 
-TEST_F(TestSchema, TestRemoveCustomMetadata) {
+TEST_F(TestSchema, TestRemoveMetadata) {
   auto f0 = field("f0", int32());
   auto f1 = field("f1", uint8(), false);
   auto f2 = field("f2", utf8());
   vector<shared_ptr<Field>> fields = {f0, f1, f2};
   KeyValueMetadata metadata({"foo", "bar"}, {"bizz", "buzz"});
   auto schema = std::make_shared<Schema>(fields);
-  std::shared_ptr<Schema> new_schema;
-  schema->RemoveCustomMetadata(&new_schema);
-  ASSERT_EQ(0, new_schema->custom_metadata().size());
+  std::shared_ptr<Schema> new_schema = schema->RemoveMetadata();
+  ASSERT_TRUE(new_schema->metadata() == nullptr);
 }
 
 #define PRIMITIVE_TEST(KLASS, ENUM, NAME)        \

http://git-wip-us.apache.org/repos/asf/arrow/blob/e876abbd/cpp/src/arrow/type.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc
index f59f8fb..b1e322c 100644
--- a/cpp/src/arrow/type.cc
+++ b/cpp/src/arrow/type.cc
@@ -31,10 +31,31 @@
 
 namespace arrow {
 
+Status Field::AddMetadata(const std::shared_ptr<const KeyValueMetadata>& metadata,
+    std::shared_ptr<Field>* out) const {
+  *out = std::make_shared<Field>(name_, type_, nullable_, metadata);
+  return Status::OK();
+}
+
+std::shared_ptr<Field> Field::RemoveMetadata() const {
+  return std::make_shared<Field>(name_, type_, nullable_);
+}
+
 bool Field::Equals(const Field& other) const {
-  return (this == &other) ||
-         (this->name_ == other.name_ && this->nullable_ == other.nullable_ &&
-             this->type_->Equals(*other.type_.get()));
+  if (this == &other) {
+    return true;
+  }
+  if (this->name_ == other.name_ && this->nullable_ == other.nullable_ &&
+      this->type_->Equals(*other.type_.get())) {
+    if (metadata_ == nullptr && other.metadata_ == nullptr) {
+      return true;
+    } else if ((metadata_ == nullptr) ^ (other.metadata_ == nullptr)) {
+      return false;
+    } else {
+      return metadata_->Equals(*other.metadata_);
+    }
+  }
+  return false;
 }
 
 bool Field::Equals(const std::shared_ptr<Field>& other) const {
@@ -233,8 +254,8 @@ std::string NullType::ToString() const {
 // Schema implementation
 
 Schema::Schema(const std::vector<std::shared_ptr<Field>>& fields,
-    const KeyValueMetadata& custom_metadata)
-    : fields_(fields), custom_metadata_(custom_metadata) {}
+    const std::shared_ptr<const KeyValueMetadata>& metadata)
+    : fields_(fields), metadata_(metadata) {}
 
 bool Schema::Equals(const Schema& other) const {
   if (this == &other) { return true; }
@@ -266,26 +287,25 @@ Status Schema::AddField(
   DCHECK_GE(i, 0);
   DCHECK_LE(i, this->num_fields());
 
-  *out = std::make_shared<Schema>(AddVectorElement(fields_, i, field), custom_metadata_);
+  *out = std::make_shared<Schema>(AddVectorElement(fields_, i, field), metadata_);
   return Status::OK();
 }
 
-Status Schema::AddCustomMetadata(
-    const KeyValueMetadata& custom_metadata, std::shared_ptr<Schema>* out) const {
-  *out = std::make_shared<Schema>(fields_, custom_metadata);
+Status Schema::AddMetadata(const std::shared_ptr<const KeyValueMetadata>& metadata,
+    std::shared_ptr<Schema>* out) const {
+  *out = std::make_shared<Schema>(fields_, metadata);
   return Status::OK();
 }
 
-Status Schema::RemoveCustomMetadata(std::shared_ptr<Schema>* out) {
-  *out = std::make_shared<Schema>(fields_, KeyValueMetadata());
-  return Status::OK();
+std::shared_ptr<Schema> Schema::RemoveMetadata() const {
+  return std::make_shared<Schema>(fields_);
 }
 
 Status Schema::RemoveField(int i, std::shared_ptr<Schema>* out) const {
   DCHECK_GE(i, 0);
   DCHECK_LT(i, this->num_fields());
 
-  *out = std::make_shared<Schema>(DeleteVectorElement(fields_, i), custom_metadata_);
+  *out = std::make_shared<Schema>(DeleteVectorElement(fields_, i), metadata_);
   return Status::OK();
 }
 
@@ -298,6 +318,15 @@ std::string Schema::ToString() const {
     buffer << field->ToString();
     ++i;
   }
+
+  if (metadata_) {
+    buffer << "\n-- metadata --";
+    for (int64_t i = 0; i < metadata_->size(); ++i) {
+      buffer << "\n" << metadata_->key(i) << ": "
+             << metadata_->value(i);
+    }
+  }
+
   return buffer.str();
 }
 
@@ -391,8 +420,9 @@ std::shared_ptr<DataType> dictionary(const std::shared_ptr<DataType>& index_type
 }
 
 std::shared_ptr<Field> field(
-    const std::string& name, const TypePtr& type, bool nullable) {
-  return std::make_shared<Field>(name, type, nullable);
+    const std::string& name, const std::shared_ptr<DataType>& type, bool nullable,
+    const std::shared_ptr<const KeyValueMetadata>& metadata) {
+  return std::make_shared<Field>(name, type, nullable, metadata);
 }
 
 std::shared_ptr<DataType> decimal(int precision, int scale) {

http://git-wip-us.apache.org/repos/asf/arrow/blob/e876abbd/cpp/src/arrow/type.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index dc94561..bb25857 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -203,8 +203,16 @@ class NoExtraMeta {};
 class ARROW_EXPORT Field {
  public:
   Field(const std::string& name, const std::shared_ptr<DataType>& type,
-      bool nullable = true)
-      : name_(name), type_(type), nullable_(nullable) {}
+      bool nullable = true,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = nullptr)
+    : name_(name), type_(type), nullable_(nullable), metadata_(metadata) {}
+
+  std::shared_ptr<const KeyValueMetadata> metadata() const { return metadata_; }
+
+  Status AddMetadata(const std::shared_ptr<const KeyValueMetadata>& metadata,
+      std::shared_ptr<Field>* out) const;
+
+  std::shared_ptr<Field> RemoveMetadata() const;
 
   bool Equals(const Field& other) const;
   bool Equals(const std::shared_ptr<Field>& other) const;
@@ -224,6 +232,9 @@ class ARROW_EXPORT Field {
 
   // Fields can be nullable
   bool nullable_;
+
+  // The field's metadata, if any
+  std::shared_ptr<const KeyValueMetadata> metadata_;
 };
 
 typedef std::shared_ptr<Field> FieldPtr;
@@ -679,7 +690,7 @@ class ARROW_EXPORT DictionaryType : public FixedWidthType {
 class ARROW_EXPORT Schema {
  public:
   explicit Schema(const std::vector<std::shared_ptr<Field>>& fields,
-      const KeyValueMetadata& custom_metadata = KeyValueMetadata());
+      const std::shared_ptr<const KeyValueMetadata>& metadata = nullptr);
 
   // Returns true if all of the schema fields are equal
   bool Equals(const Schema& other) const;
@@ -691,7 +702,7 @@ class ARROW_EXPORT Schema {
   std::shared_ptr<Field> GetFieldByName(const std::string& name);
 
   const std::vector<std::shared_ptr<Field>>& fields() const { return fields_; }
-  const KeyValueMetadata& custom_metadata() const { return custom_metadata_; }
+  std::shared_ptr<const KeyValueMetadata> metadata() const { return metadata_; }
 
   // Render a string representation of the schema suitable for debugging
   std::string ToString() const;
@@ -700,16 +711,18 @@ class ARROW_EXPORT Schema {
       int i, const std::shared_ptr<Field>& field, std::shared_ptr<Schema>* out) const;
   Status RemoveField(int i, std::shared_ptr<Schema>* out) const;
 
-  Status AddCustomMetadata(
-      const KeyValueMetadata& metadata, std::shared_ptr<Schema>* out) const;
-  Status RemoveCustomMetadata(std::shared_ptr<Schema>* out);
+  Status AddMetadata(const std::shared_ptr<const KeyValueMetadata>& metadata,
+      std::shared_ptr<Schema>* out) const;
+
+  std::shared_ptr<Schema> RemoveMetadata() const;
 
   int num_fields() const { return static_cast<int>(fields_.size()); }
 
  private:
   std::vector<std::shared_ptr<Field>> fields_;
   std::unordered_map<std::string, int> name_to_index_;
-  KeyValueMetadata custom_metadata_;
+
+  std::shared_ptr<const KeyValueMetadata> metadata_;
 };
 
 // ----------------------------------------------------------------------
@@ -741,7 +754,8 @@ std::shared_ptr<DataType> ARROW_EXPORT dictionary(
     const std::shared_ptr<DataType>& index_type, const std::shared_ptr<Array>& values);
 
 std::shared_ptr<Field> ARROW_EXPORT field(
-    const std::string& name, const std::shared_ptr<DataType>& type, bool nullable = true);
+    const std::string& name, const std::shared_ptr<DataType>& type, bool nullable = true,
+    const std::shared_ptr<const KeyValueMetadata>& metadata = nullptr);
 
 // ----------------------------------------------------------------------
 //

http://git-wip-us.apache.org/repos/asf/arrow/blob/e876abbd/cpp/src/arrow/util/key-value-metadata-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/key-value-metadata-test.cc b/cpp/src/arrow/util/key-value-metadata-test.cc
index aadc989..59cfdf5 100644
--- a/cpp/src/arrow/util/key-value-metadata-test.cc
+++ b/cpp/src/arrow/util/key-value-metadata-test.cc
@@ -72,6 +72,15 @@ TEST(KeyValueMetadataTest, StringAppend) {
   ASSERT_EQ("red", metadata.value(3));
 }
 
+TEST(KeyValueMetadataTest, Copy) {
+  std::vector<std::string> keys = {"foo", "bar"};
+  std::vector<std::string> values = {"bizz", "buzz"};
+
+  KeyValueMetadata metadata(keys, values);
+  auto metadata2 = metadata.Copy();
+  ASSERT_TRUE(metadata.Equals(*metadata2));
+}
+
 TEST(KeyValueMetadataTest, Equals) {
   std::vector<std::string> keys = {"foo", "bar"};
   std::vector<std::string> values = {"bizz", "buzz"};

http://git-wip-us.apache.org/repos/asf/arrow/blob/e876abbd/cpp/src/arrow/util/key_value_metadata.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/key_value_metadata.cc b/cpp/src/arrow/util/key_value_metadata.cc
index c91478b..8bddd5d 100644
--- a/cpp/src/arrow/util/key_value_metadata.cc
+++ b/cpp/src/arrow/util/key_value_metadata.cc
@@ -91,6 +91,10 @@ std::string KeyValueMetadata::value(int64_t i) const {
   return values_[static_cast<size_t>(i)];
 }
 
+std::shared_ptr<KeyValueMetadata> KeyValueMetadata::Copy() const {
+  return std::make_shared<KeyValueMetadata>(keys_, values_);
+}
+
 bool KeyValueMetadata::Equals(const KeyValueMetadata& other) const {
   return size() == other.size() &&
          std::equal(keys_.cbegin(), keys_.cend(), other.keys_.cbegin()) &&

http://git-wip-us.apache.org/repos/asf/arrow/blob/e876abbd/cpp/src/arrow/util/key_value_metadata.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/key_value_metadata.h b/cpp/src/arrow/util/key_value_metadata.h
index 713b2c0..bae4ad8 100644
--- a/cpp/src/arrow/util/key_value_metadata.h
+++ b/cpp/src/arrow/util/key_value_metadata.h
@@ -19,10 +19,12 @@
 #define ARROW_UTIL_KEY_VALUE_METADATA_H
 
 #include <cstdint>
+#include <memory>
 #include <string>
 #include <unordered_map>
 #include <vector>
 
+#include "arrow/util/macros.h"
 #include "arrow/util/visibility.h"
 
 namespace arrow {
@@ -44,11 +46,15 @@ class ARROW_EXPORT KeyValueMetadata {
   std::string key(int64_t i) const;
   std::string value(int64_t i) const;
 
+  std::shared_ptr<KeyValueMetadata> Copy() const;
+
   bool Equals(const KeyValueMetadata& other) const;
 
  private:
   std::vector<std::string> keys_;
   std::vector<std::string> values_;
+
+  DISALLOW_COPY_AND_ASSIGN(KeyValueMetadata);
 };
 
 }  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/e876abbd/cpp/src/arrow/util/memory.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/memory.h b/cpp/src/arrow/util/memory.h
index 7feeb29..c5c17ef 100644
--- a/cpp/src/arrow/util/memory.h
+++ b/cpp/src/arrow/util/memory.h
@@ -31,7 +31,7 @@ uint8_t* pointer_logical_and(const uint8_t* address, uintptr_t bits) {
 // A helper function for doing memcpy with multiple threads. This is required
 // to saturate the memory bandwidth of modern cpus.
 void parallel_memcopy(uint8_t* dst, const uint8_t* src, int64_t nbytes,
-                      uintptr_t block_size, int num_threads) {
+    uintptr_t block_size, int num_threads) {
   std::vector<std::thread> threadpool(num_threads);
   uint8_t* left = pointer_logical_and(src + block_size - 1, ~(block_size - 1));
   uint8_t* right = pointer_logical_and(src + nbytes, ~(block_size - 1));
@@ -52,8 +52,8 @@ void parallel_memcopy(uint8_t* dst, const uint8_t* src, int64_t nbytes,
 
   // Start all threads first and handle leftovers while threads run.
   for (int i = 0; i < num_threads; i++) {
-    threadpool[i] = std::thread(memcpy, dst + prefix + i * chunk_size,
-        left + i * chunk_size, chunk_size);
+    threadpool[i] = std::thread(
+        memcpy, dst + prefix + i * chunk_size, left + i * chunk_size, chunk_size);
   }
 
   memcpy(dst, src, prefix);
@@ -64,6 +64,6 @@ void parallel_memcopy(uint8_t* dst, const uint8_t* src, int64_t nbytes,
   }
 }
 
-} // namespace arrow
+}  // namespace arrow
 
 #endif  // ARROW_UTIL_MEMORY_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/e876abbd/python/pyarrow/_array.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/_array.pxd b/python/pyarrow/_array.pxd
index 4d5db86..464de31 100644
--- a/python/pyarrow/_array.pxd
+++ b/python/pyarrow/_array.pxd
@@ -81,8 +81,6 @@ cdef class Schema:
     cdef init(self, const vector[shared_ptr[CField]]& fields)
     cdef init_schema(self, const shared_ptr[CSchema]& schema)
 
-    cpdef dict custom_metadata(self)
-
 
 cdef class Scalar:
     cdef readonly:

http://git-wip-us.apache.org/repos/asf/arrow/blob/e876abbd/python/pyarrow/_array.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/_array.pyx b/python/pyarrow/_array.pyx
index 2fb20b7..658f4b3 100644
--- a/python/pyarrow/_array.pyx
+++ b/python/pyarrow/_array.pyx
@@ -172,7 +172,14 @@ cdef class DecimalType(FixedSizeBinaryType):
 
 
 cdef class Field:
+    """
+    Represents a named field, with a data type, nullability, and optional
+    metadata
 
+    Notes
+    -----
+    Do not use this class's constructor directly; use pyarrow.field
+    """
     def __cinit__(self):
         pass
 
@@ -181,32 +188,77 @@ cdef class Field:
         self.field = field.get()
         self.type = box_data_type(field.get().type())
 
-    @classmethod
-    def from_py(cls, object name, DataType type, bint nullable=True):
-        cdef Field result = Field()
-        result.type = type
-        result.sp_field.reset(new CField(tobytes(name), type.sp_type,
-                                         nullable))
-        result.field = result.sp_field.get()
+    def equals(self, Field other):
+        """
+        Test if this field is equal to the other
+        """
+        return self.field.Equals(deref(other.field))
 
-        return result
+    def __str__(self):
+        self._check_null()
+        return 'pyarrow.Field<{0}>'.format(frombytes(self.field.ToString()))
 
     def __repr__(self):
-        return 'Field({0!r}, type={1})'.format(self.name, str(self.type))
+        return self.__str__()
 
     property nullable:
 
         def __get__(self):
+            self._check_null()
             return self.field.nullable()
 
     property name:
 
         def __get__(self):
-            if box_field(self.sp_field) is None:
-                raise ReferenceError(
-                    'Field not initialized (references NULL pointer)')
+            self._check_null()
             return frombytes(self.field.name())
 
+    property metadata:
+
+        def __get__(self):
+            self._check_null()
+            return box_metadata(self.field.metadata().get())
+
+    def _check_null(self):
+        if self.field == NULL:
+            raise ReferenceError(
+                'Field not initialized (references NULL pointer)')
+
+    def add_metadata(self, dict metadata):
+        """
+        Add metadata as dict of string keys and values to Field
+
+        Parameters
+        ----------
+        metadata : dict
+            Keys and values must be string-like / coercible to bytes
+
+        Returns
+        -------
+        field : pyarrow.Field
+        """
+        cdef shared_ptr[CKeyValueMetadata] c_meta
+        convert_metadata(metadata, &c_meta)
+
+        cdef shared_ptr[CField] new_field
+        with nogil:
+            check_status(self.field.AddMetadata(c_meta, &new_field))
+
+        return box_field(new_field)
+
+    def remove_metadata(self):
+        """
+        Create new field without metadata, if any
+
+        Returns
+        -------
+        field : pyarrow.Field
+        """
+        cdef shared_ptr[CField] new_field
+        with nogil:
+            new_field = self.field.RemoveMetadata()
+        return box_field(new_field)
+
 
 cdef class Schema:
 
@@ -226,6 +278,14 @@ cdef class Schema:
 
         return result
 
+    cdef init(self, const vector[shared_ptr[CField]]& fields):
+        self.schema = new CSchema(fields)
+        self.sp_schema.reset(self.schema)
+
+    cdef init_schema(self, const shared_ptr[CSchema]& schema):
+        self.schema = schema.get()
+        self.sp_schema = schema
+
     property names:
 
         def __get__(self):
@@ -236,20 +296,10 @@ cdef class Schema:
                 result.append(name)
             return result
 
-    cdef init(self, const vector[shared_ptr[CField]]& fields):
-        self.schema = new CSchema(fields)
-        self.sp_schema.reset(self.schema)
-
-    cdef init_schema(self, const shared_ptr[CSchema]& schema):
-        self.schema = schema.get()
-        self.sp_schema = schema
+    property metadata:
 
-    cpdef dict custom_metadata(self):
-        cdef:
-            CKeyValueMetadata metadata = self.schema.custom_metadata()
-            unordered_map[c_string, c_string] result
-        metadata.ToUnorderedMap(&result)
-        return result
+        def __get__(self):
+            return box_metadata(self.schema.metadata().get())
 
     def equals(self, other):
         """
@@ -274,23 +324,40 @@ cdef class Schema:
         """
         return box_field(self.schema.GetFieldByName(tobytes(name)))
 
-    @classmethod
-    def from_fields(cls, fields):
-        cdef:
-            Schema result
-            Field field
-            vector[shared_ptr[CField]] c_fields
+    def add_metadata(self, dict metadata):
+        """
+        Add metadata as dict of string keys and values to Schema
 
-        c_fields.resize(len(fields))
+        Parameters
+        ----------
+        metadata : dict
+            Keys and values must be string-like / coercible to bytes
 
-        for i in range(len(fields)):
-            field = fields[i]
-            c_fields[i] = field.sp_field
+        Returns
+        -------
+        schema : pyarrow.Schema
+        """
+        cdef shared_ptr[CKeyValueMetadata] c_meta
+        convert_metadata(metadata, &c_meta)
 
-        result = Schema()
-        result.init(c_fields)
+        cdef shared_ptr[CSchema] new_schema
+        with nogil:
+            check_status(self.schema.AddMetadata(c_meta, &new_schema))
 
-        return result
+        return box_schema(new_schema)
+
+    def remove_metadata(self):
+        """
+        Create new schema without metadata, if any
+
+        Returns
+        -------
+        schema : pyarrow.Schema
+        """
+        cdef shared_ptr[CSchema] new_schema
+        with nogil:
+            new_schema = self.schema.RemoveMetadata()
+        return box_schema(new_schema)
 
     def __str__(self):
         return frombytes(self.schema.ToString())
@@ -299,6 +366,15 @@ cdef class Schema:
         return self.__str__()
 
 
+cdef box_metadata(const CKeyValueMetadata* metadata):
+    cdef unordered_map[c_string, c_string] result
+    if metadata != NULL:
+        metadata.ToUnorderedMap(&result)
+        return result
+    else:
+        return None
+
+
 cdef dict _type_cache = {}
 
 
@@ -315,8 +391,49 @@ cdef DataType primitive_type(Type type):
 #------------------------------------------------------------
 # Type factory functions
 
-def field(name, type, bint nullable=True):
-    return Field.from_py(name, type, nullable)
+cdef int convert_metadata(dict metadata,
+                          shared_ptr[CKeyValueMetadata]* out) except -1:
+    cdef:
+        shared_ptr[CKeyValueMetadata] meta = (
+            make_shared[CKeyValueMetadata]())
+        c_string key, value
+
+    for py_key, py_value in metadata.items():
+        key = tobytes(py_key)
+        value = tobytes(py_value)
+        meta.get().Append(key, value)
+    out[0] = meta
+    return 0
+
+
+def field(name, DataType type, bint nullable=True, dict metadata=None):
+    """
+    Create a pyarrow.Field instance
+
+    Parameters
+    ----------
+    name : string or bytes
+    type : pyarrow.DataType
+    nullable : boolean, default True
+    metadata : dict, default None
+        Keys and values must be coercible to bytes
+
+    Returns
+    -------
+    field : pyarrow.Field
+    """
+    cdef:
+        shared_ptr[CKeyValueMetadata] c_meta
+        Field result = Field()
+
+    if metadata is not None:
+        convert_metadata(metadata, &c_meta)
+
+    result.sp_field.reset(new CField(tobytes(name), type.sp_type,
+                                     nullable, c_meta))
+    result.field = result.sp_field.get()
+    result.type = type
+    return result
 
 
 cdef set PRIMITIVE_TYPES = set([
@@ -554,7 +671,28 @@ def struct(fields):
 
 
 def schema(fields):
-    return Schema.from_fields(fields)
+    """
+    Construct pyarrow.Schema from collection of fields
+
+    Parameters
+    ----------
+    field : list or iterable
+
+    Returns
+    -------
+    schema : pyarrow.Schema
+    """
+    cdef:
+        Schema result
+        Field field
+        vector[shared_ptr[CField]] c_fields
+
+    for i, field in enumerate(fields):
+        c_fields.push_back(field.sp_field)
+
+    result = Schema()
+    result.init(c_fields)
+    return result
 
 
 cdef DataType box_data_type(const shared_ptr[CDataType]& type):

http://git-wip-us.apache.org/repos/asf/arrow/blob/e876abbd/python/pyarrow/_table.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/_table.pyx b/python/pyarrow/_table.pyx
index ed0782b..599e046 100644
--- a/python/pyarrow/_table.pyx
+++ b/python/pyarrow/_table.pyx
@@ -272,11 +272,12 @@ cdef class Column:
         return chunked_array
 
 
-cdef CKeyValueMetadata key_value_metadata_from_dict(dict metadata):
+cdef shared_ptr[const CKeyValueMetadata] key_value_metadata_from_dict(
+    dict metadata):
     cdef:
         unordered_map[c_string, c_string] unordered_metadata = metadata
-        CKeyValueMetadata c_metadata = CKeyValueMetadata(unordered_metadata)
-    return c_metadata
+    return (<shared_ptr[const CKeyValueMetadata]>
+            make_shared[CKeyValueMetadata](unordered_metadata))
 
 
 cdef int _schema_from_arrays(

http://git-wip-us.apache.org/repos/asf/arrow/blob/e876abbd/python/pyarrow/includes/libarrow.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index ef1a332..8a730b3 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -23,6 +23,10 @@ cdef extern from "arrow/util/key_value_metadata.h" namespace "arrow" nogil:
     cdef cppclass CKeyValueMetadata" arrow::KeyValueMetadata":
         CKeyValueMetadata()
         CKeyValueMetadata(const unordered_map[c_string, c_string]&)
+
+        c_bool Equals(const CKeyValueMetadata& other)
+
+        void Append(const c_string& key, const c_string& value)
         void ToUnorderedMap(unordered_map[c_string, c_string]*) const
 
 cdef extern from "arrow/api.h" namespace "arrow" nogil:
@@ -168,25 +172,48 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         shared_ptr[CDataType] type()
         c_bool nullable()
 
+        c_string ToString()
+        c_bool Equals(const CField& other)
+
+        shared_ptr[const CKeyValueMetadata] metadata()
+
         CField(const c_string& name, const shared_ptr[CDataType]& type,
                c_bool nullable)
 
+        CField(const c_string& name, const shared_ptr[CDataType]& type,
+               c_bool nullable, const shared_ptr[CKeyValueMetadata]& metadata)
+
+        # Removed const in Cython so don't have to cast to get code to generate
+        CStatus AddMetadata(const shared_ptr[CKeyValueMetadata]& metadata,
+                            shared_ptr[CField]* out)
+        shared_ptr[CField] RemoveMetadata()
+
+
     cdef cppclass CStructType" arrow::StructType"(CDataType):
         CStructType(const vector[shared_ptr[CField]]& fields)
 
     cdef cppclass CSchema" arrow::Schema":
         CSchema(const vector[shared_ptr[CField]]& fields)
         CSchema(const vector[shared_ptr[CField]]& fields,
-                const CKeyValueMetadata& custom_metadata)
+                const shared_ptr[const CKeyValueMetadata]& metadata)
+
+        # Does not actually exist, but gets Cython to not complain
+        CSchema(const vector[shared_ptr[CField]]& fields,
+                const shared_ptr[CKeyValueMetadata]& metadata)
 
         c_bool Equals(const CSchema& other)
 
         shared_ptr[CField] field(int i)
-        const CKeyValueMetadata& custom_metadata() const
+        shared_ptr[const CKeyValueMetadata] metadata()
         shared_ptr[CField] GetFieldByName(c_string& name)
         int num_fields()
         c_string ToString()
 
+        # Removed const in Cython so don't have to cast to get code to generate
+        CStatus AddMetadata(const shared_ptr[CKeyValueMetadata]& metadata,
+                            shared_ptr[CSchema]* out)
+        shared_ptr[CSchema] RemoveMetadata()
+
     cdef cppclass CBooleanArray" arrow::BooleanArray"(CArray):
         c_bool Value(int i)
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/e876abbd/python/pyarrow/tests/pandas_examples.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/pandas_examples.py b/python/pyarrow/tests/pandas_examples.py
index e081c38..313a3ae 100644
--- a/python/pyarrow/tests/pandas_examples.py
+++ b/python/pyarrow/tests/pandas_examples.py
@@ -73,7 +73,7 @@ def dataframe_with_arrays():
     ]
 
     df = pd.DataFrame(arrays)
-    schema = pa.Schema.from_fields(fields)
+    schema = pa.schema(fields)
 
     return df, schema
 
@@ -114,6 +114,6 @@ def dataframe_with_lists():
     ]
 
     df = pd.DataFrame(arrays)
-    schema = pa.Schema.from_fields(fields)
+    schema = pa.schema(fields)
 
     return df, schema

http://git-wip-us.apache.org/repos/asf/arrow/blob/e876abbd/python/pyarrow/tests/test_convert_pandas.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py
index f360234..2779da3 100644
--- a/python/pyarrow/tests/test_convert_pandas.py
+++ b/python/pyarrow/tests/test_convert_pandas.py
@@ -106,10 +106,10 @@ class TestPandasConversion(unittest.TestCase):
         for numpy_dtype, arrow_dtype in dtypes:
             values = np.random.randn(num_values)
             data[numpy_dtype] = values.astype(numpy_dtype)
-            fields.append(pa.Field.from_py(numpy_dtype, arrow_dtype))
+            fields.append(pa.field(numpy_dtype, arrow_dtype))
 
         df = pd.DataFrame(data)
-        schema = pa.Schema.from_fields(fields)
+        schema = pa.schema(fields)
         self._check_pandas_roundtrip(df, expected_schema=schema)
 
     def test_float_nulls(self):
@@ -127,7 +127,7 @@ class TestPandasConversion(unittest.TestCase):
 
             arr = pa.Array.from_pandas(values, null_mask)
             arrays.append(arr)
-            fields.append(pa.Field.from_py(name, arrow_dtype))
+            fields.append(pa.field(name, arrow_dtype))
             values[null_mask] = np.nan
 
             expected_cols.append(values)
@@ -136,7 +136,7 @@ class TestPandasConversion(unittest.TestCase):
                                 columns=names)
 
         table = pa.Table.from_arrays(arrays, names)
-        assert table.schema.equals(pa.Schema.from_fields(fields))
+        assert table.schema.equals(pa.schema(fields))
         result = table.to_pandas()
         tm.assert_frame_equal(result, ex_frame)
 
@@ -159,10 +159,10 @@ class TestPandasConversion(unittest.TestCase):
                                        min(info.max, np.iinfo('i8').max),
                                        size=num_values)
             data[dtype] = values.astype(dtype)
-            fields.append(pa.Field.from_py(dtype, arrow_dtype))
+            fields.append(pa.field(dtype, arrow_dtype))
 
         df = pd.DataFrame(data)
-        schema = pa.Schema.from_fields(fields)
+        schema = pa.schema(fields)
         self._check_pandas_roundtrip(df, expected_schema=schema)
 
     def test_integer_with_nulls(self):
@@ -200,8 +200,8 @@ class TestPandasConversion(unittest.TestCase):
         np.random.seed(0)
 
         df = pd.DataFrame({'bools': np.random.randn(num_values) > 0})
-        field = pa.Field.from_py('bools', pa.bool_())
-        schema = pa.Schema.from_fields([field])
+        field = pa.field('bools', pa.bool_())
+        schema = pa.schema([field])
         self._check_pandas_roundtrip(df, expected_schema=schema)
 
     def test_boolean_nulls(self):
@@ -217,8 +217,8 @@ class TestPandasConversion(unittest.TestCase):
         expected = values.astype(object)
         expected[mask] = None
 
-        field = pa.Field.from_py('bools', pa.bool_())
-        schema = pa.Schema.from_fields([field])
+        field = pa.field('bools', pa.bool_())
+        schema = pa.schema([field])
         ex_frame = pd.DataFrame({'bools': expected})
 
         table = pa.Table.from_arrays([arr], ['bools'])
@@ -230,16 +230,16 @@ class TestPandasConversion(unittest.TestCase):
     def test_boolean_object_nulls(self):
         arr = np.array([False, None, True] * 100, dtype=object)
         df = pd.DataFrame({'bools': arr})
-        field = pa.Field.from_py('bools', pa.bool_())
-        schema = pa.Schema.from_fields([field])
+        field = pa.field('bools', pa.bool_())
+        schema = pa.schema([field])
         self._check_pandas_roundtrip(df, expected_schema=schema)
 
     def test_unicode(self):
         repeats = 1000
         values = [u'foo', None, u'bar', u'maƱana', np.nan]
         df = pd.DataFrame({'strings': values * repeats})
-        field = pa.Field.from_py('strings', pa.string())
-        schema = pa.Schema.from_fields([field])
+        field = pa.field('strings', pa.string())
+        schema = pa.schema([field])
 
         self._check_pandas_roundtrip(df, expected_schema=schema)
 
@@ -257,7 +257,7 @@ class TestPandasConversion(unittest.TestCase):
     def test_fixed_size_bytes(self):
         values = [b'foo', None, b'bar', None, None, b'hey']
         df = pd.DataFrame({'strings': values})
-        schema = pa.Schema.from_fields([pa.field('strings', pa.binary(3))])
+        schema = pa.schema([pa.field('strings', pa.binary(3))])
         table = pa.Table.from_pandas(df, schema=schema)
         assert table.schema[0].type == schema[0].type
         assert table.schema[0].name == schema[0].name
@@ -267,7 +267,7 @@ class TestPandasConversion(unittest.TestCase):
     def test_fixed_size_bytes_does_not_accept_varying_lengths(self):
         values = [b'foo', None, b'ba', None, None, b'hey']
         df = pd.DataFrame({'strings': values})
-        schema = pa.Schema.from_fields([pa.field('strings', pa.binary(3))])
+        schema = pa.schema([pa.field('strings', pa.binary(3))])
         with self.assertRaises(pa.ArrowInvalid):
             pa.Table.from_pandas(df, schema=schema)
 
@@ -279,8 +279,8 @@ class TestPandasConversion(unittest.TestCase):
                 '2010-08-13T05:46:57.437'],
                 dtype='datetime64[ms]')
             })
-        field = pa.Field.from_py('datetime64', pa.timestamp('ms'))
-        schema = pa.Schema.from_fields([field])
+        field = pa.field('datetime64', pa.timestamp('ms'))
+        schema = pa.schema([field])
         self._check_pandas_roundtrip(df, timestamps_to_ms=True,
                                      expected_schema=schema)
 
@@ -291,8 +291,8 @@ class TestPandasConversion(unittest.TestCase):
                 '2010-08-13T05:46:57.437699912'],
                 dtype='datetime64[ns]')
             })
-        field = pa.Field.from_py('datetime64', pa.timestamp('ns'))
-        schema = pa.Schema.from_fields([field])
+        field = pa.field('datetime64', pa.timestamp('ns'))
+        schema = pa.schema([field])
         self._check_pandas_roundtrip(df, timestamps_to_ms=False,
                                      expected_schema=schema)
 
@@ -304,8 +304,8 @@ class TestPandasConversion(unittest.TestCase):
                 '2010-08-13T05:46:57.437'],
                 dtype='datetime64[ms]')
             })
-        field = pa.Field.from_py('datetime64', pa.timestamp('ms'))
-        schema = pa.Schema.from_fields([field])
+        field = pa.field('datetime64', pa.timestamp('ms'))
+        schema = pa.schema([field])
         self._check_pandas_roundtrip(df, timestamps_to_ms=True,
                                      expected_schema=schema)
 
@@ -316,8 +316,8 @@ class TestPandasConversion(unittest.TestCase):
                 '2010-08-13T05:46:57.437699912'],
                 dtype='datetime64[ns]')
             })
-        field = pa.Field.from_py('datetime64', pa.timestamp('ns'))
-        schema = pa.Schema.from_fields([field])
+        field = pa.field('datetime64', pa.timestamp('ns'))
+        schema = pa.schema([field])
         self._check_pandas_roundtrip(df, timestamps_to_ms=False,
                                      expected_schema=schema)
 
@@ -353,8 +353,8 @@ class TestPandasConversion(unittest.TestCase):
                      datetime.date(1970, 1, 1),
                      datetime.date(2040, 2, 26)]})
         table = pa.Table.from_pandas(df)
-        field = pa.Field.from_py('date', pa.date32())
-        schema = pa.Schema.from_fields([field])
+        field = pa.field('date', pa.date32())
+        schema = pa.schema([field])
         assert table.schema.equals(schema)
         result = table.to_pandas()
         expected = df.copy()
@@ -526,8 +526,8 @@ class TestPandasConversion(unittest.TestCase):
             ]
         })
         converted = pa.Table.from_pandas(expected)
-        field = pa.Field.from_py('decimals', pa.decimal(7, 3))
-        schema = pa.Schema.from_fields([field])
+        field = pa.field('decimals', pa.decimal(7, 3))
+        schema = pa.schema([field])
         assert converted.schema.equals(schema)
 
     def test_decimal_32_to_pandas(self):
@@ -549,8 +549,8 @@ class TestPandasConversion(unittest.TestCase):
             ]
         })
         converted = pa.Table.from_pandas(expected)
-        field = pa.Field.from_py('decimals', pa.decimal(12, 6))
-        schema = pa.Schema.from_fields([field])
+        field = pa.field('decimals', pa.decimal(12, 6))
+        schema = pa.schema([field])
         assert converted.schema.equals(schema)
 
     def test_decimal_64_to_pandas(self):
@@ -572,8 +572,8 @@ class TestPandasConversion(unittest.TestCase):
             ]
         })
         converted = pa.Table.from_pandas(expected)
-        field = pa.Field.from_py('decimals', pa.decimal(26, 11))
-        schema = pa.Schema.from_fields([field])
+        field = pa.field('decimals', pa.decimal(26, 11))
+        schema = pa.schema([field])
         assert converted.schema.equals(schema)
 
     def test_decimal_128_to_pandas(self):

http://git-wip-us.apache.org/repos/asf/arrow/blob/e876abbd/python/pyarrow/tests/test_schema.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_schema.py b/python/pyarrow/tests/test_schema.py
index da704f3..b3abc0f 100644
--- a/python/pyarrow/tests/test_schema.py
+++ b/python/pyarrow/tests/test_schema.py
@@ -118,7 +118,7 @@ def test_field():
     assert f.name == 'foo'
     assert f.nullable
     assert f.type is t
-    assert repr(f) == "Field('foo', type=string)"
+    assert repr(f) == "pyarrow.Field<foo: string>"
 
     f = pa.field('foo', t, False)
     assert not f.nullable
@@ -152,6 +152,52 @@ def test_field_empty():
         repr(f)
 
 
+def test_field_add_remove_metadata():
+    f0 = pa.field('foo', pa.int32())
+
+    assert f0.metadata is None
+
+    metadata = {b'foo': b'bar', b'pandas': b'badger'}
+
+    f1 = f0.add_metadata(metadata)
+    assert f1.metadata == metadata
+
+    f3 = f1.remove_metadata()
+    assert f3.metadata is None
+
+    # idempotent
+    f4 = f3.remove_metadata()
+    assert f4.metadata is None
+
+    f5 = pa.field('foo', pa.int32(), True, metadata)
+    f6 = f0.add_metadata(metadata)
+    assert f5.equals(f6)
+
+
+def test_schema_add_remove_metadata():
+    fields = [
+        pa.field('foo', pa.int32()),
+        pa.field('bar', pa.string()),
+        pa.field('baz', pa.list_(pa.int8()))
+    ]
+
+    s1 = pa.schema(fields)
+
+    assert s1.metadata is None
+
+    metadata = {b'foo': b'bar', b'pandas': b'badger'}
+
+    s2 = s1.add_metadata(metadata)
+    assert s2.metadata == metadata
+
+    s3 = s2.remove_metadata()
+    assert s3.metadata is None
+
+    # idempotent
+    s4 = s3.remove_metadata()
+    assert s4.metadata is None
+
+
 def test_schema_equals():
     fields = [
         pa.field('foo', pa.int32()),


Mime
View raw message