arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject arrow git commit: ARROW-483: [C++/Python] Provide access to "custom_metadata" Field attribute in IPC setting
Date Tue, 25 Apr 2017 21:36:36 GMT
Repository: arrow
Updated Branches:
  refs/heads/master 949249d9e -> 7d433dc27


ARROW-483: [C++/Python] Provide access to "custom_metadata" Field attribute in IPC setting

Author: Phillip Cloud <cpcloud@gmail.com>

Closes #588 from cpcloud/ARROW-483 and squashes the following commits:

f671ba4 [Phillip Cloud] ARROW-483: [C++/Python] Provide access to "custom_metadata" Field
attribute in IPC setting


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/7d433dc2
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/7d433dc2
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/7d433dc2

Branch: refs/heads/master
Commit: 7d433dc27bf70b5d80b8c88261a19cdc615defdb
Parents: 949249d
Author: Phillip Cloud <cpcloud@gmail.com>
Authored: Tue Apr 25 17:36:31 2017 -0400
Committer: Wes McKinney <wes.mckinney@twosigma.com>
Committed: Tue Apr 25 17:36:31 2017 -0400

----------------------------------------------------------------------
 cpp/CMakeLists.txt                            |  1 +
 cpp/src/arrow/array.cc                        |  2 +-
 cpp/src/arrow/builder.cc                      | 13 ++-
 cpp/src/arrow/ipc/metadata.cc                 | 30 ++++++-
 cpp/src/arrow/type-test.cc                    | 34 ++++++++
 cpp/src/arrow/type.cc                         | 20 ++++-
 cpp/src/arrow/type.h                          | 10 ++-
 cpp/src/arrow/util/CMakeLists.txt             |  2 +
 cpp/src/arrow/util/key-value-metadata-test.cc | 87 +++++++++++++++++++
 cpp/src/arrow/util/key_value_metadata.cc      | 99 ++++++++++++++++++++++
 cpp/src/arrow/util/key_value_metadata.h       | 56 ++++++++++++
 format/Schema.fbs                             |  2 +-
 python/.gitignore                             |  1 +
 python/pyarrow/_array.pxd                     |  2 +
 python/pyarrow/_array.pyx                     |  7 ++
 python/pyarrow/_table.pyx                     | 64 ++++++++------
 python/pyarrow/includes/common.pxd            |  3 +-
 python/pyarrow/includes/libarrow.pxd          | 11 ++-
 18 files changed, 401 insertions(+), 43 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/7d433dc2/cpp/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 2d8c00f..5abe5f1 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -944,6 +944,7 @@ set(ARROW_SRCS
 
   src/arrow/util/bit-util.cc
   src/arrow/util/decimal.cc
+  src/arrow/util/key_value_metadata.cc
 )
 
 if (ARROW_IPC)

http://git-wip-us.apache.org/repos/asf/arrow/blob/7d433dc2/cpp/src/arrow/array.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array.cc b/cpp/src/arrow/array.cc
index e640bbd..76dda2c 100644
--- a/cpp/src/arrow/array.cc
+++ b/cpp/src/arrow/array.cc
@@ -113,7 +113,7 @@ Status Array::Validate() const {
 static inline void ConformSliceParams(
     int64_t array_offset, int64_t array_length, int64_t* offset, int64_t* length) {
   DCHECK_LE(*offset, array_length);
-  DCHECK_GE(offset, 0);
+  DCHECK_NE(offset, nullptr);
   *length = std::min(array_length - *offset, *length);
   *offset = array_offset + *offset;
 }

http://git-wip-us.apache.org/repos/asf/arrow/blob/7d433dc2/cpp/src/arrow/builder.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc
index d85eb32..4ecb8d3 100644
--- a/cpp/src/arrow/builder.cc
+++ b/cpp/src/arrow/builder.cc
@@ -363,8 +363,6 @@ ARROW_EXPORT Status DecimalBuilder::Append(const decimal::Decimal128&
value) {
   return Status::OK();
 }
 
-template ARROW_EXPORT Status DecimalBuilder::Append(const decimal::Decimal128& val);
-
 Status DecimalBuilder::Init(int64_t capacity) {
   RETURN_NOT_OK(FixedSizeBinaryBuilder::Init(capacity));
   if (byte_width_ == 16) {
@@ -408,16 +406,17 @@ Status DecimalBuilder::Finish(std::shared_ptr<Array>* out) {
 
 ListBuilder::ListBuilder(MemoryPool* pool, std::shared_ptr<ArrayBuilder> value_builder,
     const std::shared_ptr<DataType>& type)
-    : ArrayBuilder(
-          pool, type ? type : std::static_pointer_cast<DataType>(
-                                  std::make_shared<ListType>(value_builder->type()))),
+    : ArrayBuilder(pool,
+          type ? type : std::static_pointer_cast<DataType>(
+                            std::make_shared<ListType>(value_builder->type()))),
       offset_builder_(pool),
       value_builder_(value_builder) {}
 
 ListBuilder::ListBuilder(MemoryPool* pool, std::shared_ptr<Array> values,
     const std::shared_ptr<DataType>& type)
-    : ArrayBuilder(pool, type ? type : std::static_pointer_cast<DataType>(
-                                           std::make_shared<ListType>(values->type()))),
+    : ArrayBuilder(pool,
+          type ? type : std::static_pointer_cast<DataType>(
+                            std::make_shared<ListType>(values->type()))),
       offset_builder_(pool),
       values_(values) {}
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/7d433dc2/cpp/src/arrow/ipc/metadata.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/metadata.cc b/cpp/src/arrow/ipc/metadata.cc
index 791948b..c0b518a 100644
--- a/cpp/src/arrow/ipc/metadata.cc
+++ b/cpp/src/arrow/ipc/metadata.cc
@@ -45,6 +45,7 @@ namespace ipc {
 using FBB = flatbuffers::FlatBufferBuilder;
 using DictionaryOffset = flatbuffers::Offset<flatbuf::DictionaryEncoding>;
 using FieldOffset = flatbuffers::Offset<flatbuf::Field>;
+using KeyValueOffset = flatbuffers::Offset<flatbuf::KeyValue>;
 using RecordBatchOffset = flatbuffers::Offset<flatbuf::RecordBatch>;
 using VectorLayoutOffset = flatbuffers::Offset<arrow::flatbuf::VectorLayout>;
 using Offset = flatbuffers::Offset<void>;
@@ -583,6 +584,7 @@ flatbuf::Endianness endianness() {
 
 static Status SchemaToFlatbuffer(FBB& fbb, const Schema& schema,
     DictionaryMemo* dictionary_memo, flatbuffers::Offset<flatbuf::Schema>* out) {
+  /// Fields
   std::vector<FieldOffset> field_offsets;
   for (int i = 0; i < schema.num_fields(); ++i) {
     std::shared_ptr<Field> field = schema.field(i);
@@ -591,7 +593,20 @@ static Status SchemaToFlatbuffer(FBB& fbb, const Schema& schema,
     field_offsets.push_back(offset);
   }
 
-  *out = flatbuf::CreateSchema(fbb, endianness(), fbb.CreateVector(field_offsets));
+  /// Custom metadata
+  const auto& custom_metadata_ = schema.custom_metadata();
+  std::vector<KeyValueOffset> key_value_offsets;
+  size_t metadata_size = custom_metadata_.size();
+  key_value_offsets.reserve(metadata_size);
+  for (size_t i = 0; i < metadata_size; ++i) {
+    const auto& key = custom_metadata_.key(i);
+    const auto& value = custom_metadata_.value(i);
+    key_value_offsets.push_back(
+        flatbuf::CreateKeyValue(fbb, fbb.CreateString(key), fbb.CreateString(value)));
+  }
+
+  *out = flatbuf::CreateSchema(fbb, endianness(), fbb.CreateVector(field_offsets),
+      fbb.CreateVector(key_value_offsets));
   return Status::OK();
 }
 
@@ -939,7 +954,18 @@ Status GetSchema(const void* opaque_schema, const DictionaryMemo&
dictionary_mem
     const flatbuf::Field* field = schema->fields()->Get(i);
     RETURN_NOT_OK(FieldFromFlatbuffer(field, dictionary_memo, &fields[i]));
   }
-  *out = std::make_shared<Schema>(fields);
+
+  KeyValueMetadata custom_metadata;
+  auto fb_metadata = schema->custom_metadata();
+  if (fb_metadata != nullptr) {
+    custom_metadata.reserve(fb_metadata->size());
+
+    for (const auto& pair : *fb_metadata) {
+      custom_metadata.Append(pair->key()->str(), pair->value()->str());
+    }
+  }
+
+  *out = std::make_shared<Schema>(fields, custom_metadata);
   return Status::OK();
 }
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/7d433dc2/cpp/src/arrow/type-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type-test.cc b/cpp/src/arrow/type-test.cc
index dec7268..8e2dfd5 100644
--- a/cpp/src/arrow/type-test.cc
+++ b/cpp/src/arrow/type-test.cc
@@ -117,6 +117,40 @@ TEST_F(TestSchema, GetFieldByName) {
   ASSERT_TRUE(result == nullptr);
 }
 
+TEST_F(TestSchema, TestCustomMetadataConstruction) {
+  auto f0 = field("f0", int32());
+  auto f1 = field("f1", uint8(), false);
+  auto f2 = field("f2", utf8());
+  vector<shared_ptr<Field>> fields = {f0, f1, f2};
+  KeyValueMetadata metadata({"foo", "bar"}, {"bizz", "buzz"});
+  auto schema = std::make_shared<Schema>(fields, metadata);
+  ASSERT_TRUE(metadata.Equals(schema->custom_metadata()));
+}
+
+TEST_F(TestSchema, TestAddCustomMetadata) {
+  auto f0 = field("f0", int32());
+  auto f1 = field("f1", uint8(), false);
+  auto f2 = field("f2", utf8());
+  vector<shared_ptr<Field>> fields = {f0, f1, f2};
+  KeyValueMetadata metadata({"foo", "bar"}, {"bizz", "buzz"});
+  auto schema = std::make_shared<Schema>(fields);
+  std::shared_ptr<Schema> new_schema;
+  schema->AddCustomMetadata(metadata, &new_schema);
+  ASSERT_TRUE(metadata.Equals(new_schema->custom_metadata()));
+}
+
+TEST_F(TestSchema, TestRemoveCustomMetadata) {
+  auto f0 = field("f0", int32());
+  auto f1 = field("f1", uint8(), false);
+  auto f2 = field("f2", utf8());
+  vector<shared_ptr<Field>> fields = {f0, f1, f2};
+  KeyValueMetadata metadata({"foo", "bar"}, {"bizz", "buzz"});
+  auto schema = std::make_shared<Schema>(fields);
+  std::shared_ptr<Schema> new_schema;
+  schema->RemoveCustomMetadata(&new_schema);
+  ASSERT_EQ(0, new_schema->custom_metadata().size());
+}
+
 #define PRIMITIVE_TEST(KLASS, ENUM, NAME)        \
   TEST(TypesTest, TestPrimitive_##ENUM) {        \
     KLASS tp;                                    \

http://git-wip-us.apache.org/repos/asf/arrow/blob/7d433dc2/cpp/src/arrow/type.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc
index 2e454ae..f59f8fb 100644
--- a/cpp/src/arrow/type.cc
+++ b/cpp/src/arrow/type.cc
@@ -24,6 +24,7 @@
 #include "arrow/array.h"
 #include "arrow/compare.h"
 #include "arrow/status.h"
+#include "arrow/util/key_value_metadata.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/stl.h"
 #include "arrow/visitor.h"
@@ -231,7 +232,9 @@ std::string NullType::ToString() const {
 // ----------------------------------------------------------------------
 // Schema implementation
 
-Schema::Schema(const std::vector<std::shared_ptr<Field>>& fields) : fields_(fields)
{}
+Schema::Schema(const std::vector<std::shared_ptr<Field>>& fields,
+    const KeyValueMetadata& custom_metadata)
+    : fields_(fields), custom_metadata_(custom_metadata) {}
 
 bool Schema::Equals(const Schema& other) const {
   if (this == &other) { return true; }
@@ -263,7 +266,18 @@ Status Schema::AddField(
   DCHECK_GE(i, 0);
   DCHECK_LE(i, this->num_fields());
 
-  *out = std::make_shared<Schema>(AddVectorElement(fields_, i, field));
+  *out = std::make_shared<Schema>(AddVectorElement(fields_, i, field), custom_metadata_);
+  return Status::OK();
+}
+
+Status Schema::AddCustomMetadata(
+    const KeyValueMetadata& custom_metadata, std::shared_ptr<Schema>* out) const
{
+  *out = std::make_shared<Schema>(fields_, custom_metadata);
+  return Status::OK();
+}
+
+Status Schema::RemoveCustomMetadata(std::shared_ptr<Schema>* out) {
+  *out = std::make_shared<Schema>(fields_, KeyValueMetadata());
   return Status::OK();
 }
 
@@ -271,7 +285,7 @@ Status Schema::RemoveField(int i, std::shared_ptr<Schema>* out)
const {
   DCHECK_GE(i, 0);
   DCHECK_LT(i, this->num_fields());
 
-  *out = std::make_shared<Schema>(DeleteVectorElement(fields_, i));
+  *out = std::make_shared<Schema>(DeleteVectorElement(fields_, i), custom_metadata_);
   return Status::OK();
 }
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/7d433dc2/cpp/src/arrow/type.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index ea4ea03..dc94561 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -28,6 +28,7 @@
 
 #include "arrow/status.h"
 #include "arrow/type_fwd.h"
+#include "arrow/util/key_value_metadata.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/visibility.h"
 #include "arrow/visitor.h"
@@ -677,7 +678,8 @@ class ARROW_EXPORT DictionaryType : public FixedWidthType {
 
 class ARROW_EXPORT Schema {
  public:
-  explicit Schema(const std::vector<std::shared_ptr<Field>>& fields);
+  explicit Schema(const std::vector<std::shared_ptr<Field>>& fields,
+      const KeyValueMetadata& custom_metadata = KeyValueMetadata());
 
   // Returns true if all of the schema fields are equal
   bool Equals(const Schema& other) const;
@@ -689,6 +691,7 @@ class ARROW_EXPORT Schema {
   std::shared_ptr<Field> GetFieldByName(const std::string& name);
 
   const std::vector<std::shared_ptr<Field>>& fields() const { return fields_;
}
+  const KeyValueMetadata& custom_metadata() const { return custom_metadata_; }
 
   // Render a string representation of the schema suitable for debugging
   std::string ToString() const;
@@ -697,11 +700,16 @@ class ARROW_EXPORT Schema {
       int i, const std::shared_ptr<Field>& field, std::shared_ptr<Schema>*
out) const;
   Status RemoveField(int i, std::shared_ptr<Schema>* out) const;
 
+  Status AddCustomMetadata(
+      const KeyValueMetadata& metadata, std::shared_ptr<Schema>* out) const;
+  Status RemoveCustomMetadata(std::shared_ptr<Schema>* out);
+
   int num_fields() const { return static_cast<int>(fields_.size()); }
 
  private:
   std::vector<std::shared_ptr<Field>> fields_;
   std::unordered_map<std::string, int> name_to_index_;
+  KeyValueMetadata custom_metadata_;
 };
 
 // ----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/arrow/blob/7d433dc2/cpp/src/arrow/util/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt
index b22c8ac..ac7e866 100644
--- a/cpp/src/arrow/util/CMakeLists.txt
+++ b/cpp/src/arrow/util/CMakeLists.txt
@@ -26,6 +26,7 @@ install(FILES
   macros.h
   random.h
   visibility.h
+  key_value_metadata.h
   DESTINATION include/arrow/util)
 
 #######################################
@@ -52,3 +53,4 @@ endif()
 ADD_ARROW_TEST(bit-util-test)
 ADD_ARROW_TEST(stl-util-test)
 ADD_ARROW_TEST(decimal-test)
+ADD_ARROW_TEST(key-value-metadata-test)

http://git-wip-us.apache.org/repos/asf/arrow/blob/7d433dc2/cpp/src/arrow/util/key-value-metadata-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/key-value-metadata-test.cc b/cpp/src/arrow/util/key-value-metadata-test.cc
new file mode 100644
index 0000000..aadc989
--- /dev/null
+++ b/cpp/src/arrow/util/key-value-metadata-test.cc
@@ -0,0 +1,87 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "gtest/gtest.h"
+
+#include "arrow/util/key_value_metadata.h"
+
+#include "arrow/test-util.h"
+
+namespace arrow {
+
+TEST(KeyValueMetadataTest, SimpleConstruction) {
+  KeyValueMetadata metadata;
+  ASSERT_EQ(0, metadata.size());
+}
+
+TEST(KeyValueMetadataTest, StringVectorConstruction) {
+  std::vector<std::string> keys = {"foo", "bar"};
+  std::vector<std::string> values = {"bizz", "buzz"};
+
+  KeyValueMetadata metadata(keys, values);
+  ASSERT_EQ("foo", metadata.key(0));
+  ASSERT_EQ("bar", metadata.key(1));
+  ASSERT_EQ("bizz", metadata.value(0));
+  ASSERT_EQ("buzz", metadata.value(1));
+  ASSERT_EQ(2, metadata.size());
+}
+
+TEST(KeyValueMetadataTest, StringMapConstruction) {
+  std::unordered_map<std::string, std::string> pairs = {{"foo", "bizz"}, {"bar", "buzz"}};
+  std::unordered_map<std::string, std::string> result_map;
+  result_map.reserve(pairs.size());
+
+  KeyValueMetadata metadata(pairs);
+  metadata.ToUnorderedMap(&result_map);
+  ASSERT_EQ(pairs, result_map);
+  ASSERT_EQ(2, metadata.size());
+}
+
+TEST(KeyValueMetadataTest, StringAppend) {
+  std::vector<std::string> keys = {"foo", "bar"};
+  std::vector<std::string> values = {"bizz", "buzz"};
+
+  KeyValueMetadata metadata(keys, values);
+  ASSERT_EQ("foo", metadata.key(0));
+  ASSERT_EQ("bar", metadata.key(1));
+  ASSERT_EQ("bizz", metadata.value(0));
+  ASSERT_EQ("buzz", metadata.value(1));
+  ASSERT_EQ(2, metadata.size());
+
+  metadata.Append("purple", "orange");
+  metadata.Append("blue", "red");
+
+  ASSERT_EQ("purple", metadata.key(2));
+  ASSERT_EQ("blue", metadata.key(3));
+
+  ASSERT_EQ("orange", metadata.value(2));
+  ASSERT_EQ("red", metadata.value(3));
+}
+
+TEST(KeyValueMetadataTest, Equals) {
+  std::vector<std::string> keys = {"foo", "bar"};
+  std::vector<std::string> values = {"bizz", "buzz"};
+
+  KeyValueMetadata metadata(keys, values);
+  KeyValueMetadata metadata2(keys, values);
+  KeyValueMetadata metadata3(keys, {"buzz", "bizz"});
+
+  ASSERT_TRUE(metadata.Equals(metadata2));
+  ASSERT_FALSE(metadata.Equals(metadata3));
+}
+
+}  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/7d433dc2/cpp/src/arrow/util/key_value_metadata.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/key_value_metadata.cc b/cpp/src/arrow/util/key_value_metadata.cc
new file mode 100644
index 0000000..c91478b
--- /dev/null
+++ b/cpp/src/arrow/util/key_value_metadata.cc
@@ -0,0 +1,99 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <algorithm>
+
+#include "arrow/util/key_value_metadata.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+
+static std::vector<std::string> UnorderedMapKeys(
+    const std::unordered_map<std::string, std::string>& map) {
+  std::vector<std::string> keys;
+  keys.reserve(map.size());
+  for (const auto& pair : map) {
+    keys.push_back(pair.first);
+  }
+  return keys;
+}
+
+static std::vector<std::string> UnorderedMapValues(
+    const std::unordered_map<std::string, std::string>& map) {
+  std::vector<std::string> values;
+  values.reserve(map.size());
+  for (const auto& pair : map) {
+    values.push_back(pair.second);
+  }
+  return values;
+}
+
+KeyValueMetadata::KeyValueMetadata() : keys_(), values_() {}
+
+KeyValueMetadata::KeyValueMetadata(
+    const std::unordered_map<std::string, std::string>& map)
+    : keys_(UnorderedMapKeys(map)), values_(UnorderedMapValues(map)) {}
+
+KeyValueMetadata::KeyValueMetadata(
+    const std::vector<std::string>& keys, const std::vector<std::string>&
values)
+    : keys_(keys), values_(values) {
+  DCHECK_EQ(keys.size(), values.size());
+}
+
+void KeyValueMetadata::ToUnorderedMap(
+    std::unordered_map<std::string, std::string>* out) const {
+  DCHECK_NE(out, nullptr);
+  const int64_t n = size();
+  out->reserve(n);
+  for (int64_t i = 0; i < n; ++i) {
+    out->insert(std::make_pair(key(i), value(i)));
+  }
+}
+
+void KeyValueMetadata::Append(const std::string& key, const std::string& value) {
+  keys_.push_back(key);
+  values_.push_back(value);
+}
+
+void KeyValueMetadata::reserve(int64_t n) {
+  DCHECK_GE(n, 0);
+  const auto m = static_cast<size_t>(n);
+  keys_.reserve(m);
+  values_.reserve(m);
+}
+
+int64_t KeyValueMetadata::size() const {
+  DCHECK_EQ(keys_.size(), values_.size());
+  return static_cast<int64_t>(keys_.size());
+}
+
+std::string KeyValueMetadata::key(int64_t i) const {
+  DCHECK_GE(i, 0);
+  return keys_[static_cast<size_t>(i)];
+}
+
+std::string KeyValueMetadata::value(int64_t i) const {
+  DCHECK_GE(i, 0);
+  return values_[static_cast<size_t>(i)];
+}
+
+bool KeyValueMetadata::Equals(const KeyValueMetadata& other) const {
+  return size() == other.size() &&
+         std::equal(keys_.cbegin(), keys_.cend(), other.keys_.cbegin()) &&
+         std::equal(values_.cbegin(), values_.cend(), other.values_.cbegin());
+}
+}  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/7d433dc2/cpp/src/arrow/util/key_value_metadata.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/key_value_metadata.h b/cpp/src/arrow/util/key_value_metadata.h
new file mode 100644
index 0000000..713b2c0
--- /dev/null
+++ b/cpp/src/arrow/util/key_value_metadata.h
@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_UTIL_KEY_VALUE_METADATA_H
+#define ARROW_UTIL_KEY_VALUE_METADATA_H
+
+#include <cstdint>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class ARROW_EXPORT KeyValueMetadata {
+ public:
+  KeyValueMetadata();
+  KeyValueMetadata(
+      const std::vector<std::string>& keys, const std::vector<std::string>&
values);
+  explicit KeyValueMetadata(const std::unordered_map<std::string, std::string>&
map);
+
+  void ToUnorderedMap(std::unordered_map<std::string, std::string>* out) const;
+
+  void Append(const std::string& key, const std::string& value);
+
+  void reserve(int64_t n);
+  int64_t size() const;
+
+  std::string key(int64_t i) const;
+  std::string value(int64_t i) const;
+
+  bool Equals(const KeyValueMetadata& other) const;
+
+ private:
+  std::vector<std::string> keys_;
+  std::vector<std::string> values_;
+};
+
+}  // namespace arrow
+
+#endif  //  ARROW_UTIL_KEY_VALUE_METADATA_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/7d433dc2/format/Schema.fbs
----------------------------------------------------------------------
diff --git a/format/Schema.fbs b/format/Schema.fbs
index b48859f..8de5c6d 100644
--- a/format/Schema.fbs
+++ b/format/Schema.fbs
@@ -200,7 +200,7 @@ table VectorLayout {
 
 table KeyValue {
   key: string;
-  value: [ubyte];
+  value: string;
 }
 
 /// ----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/arrow/blob/7d433dc2/python/.gitignore
----------------------------------------------------------------------
diff --git a/python/.gitignore b/python/.gitignore
index ba40c3e..6c0d5a9 100644
--- a/python/.gitignore
+++ b/python/.gitignore
@@ -33,3 +33,4 @@ coverage.xml
 
 # benchmark working dir
 .asv
+pyarrow/_table_api.h

http://git-wip-us.apache.org/repos/asf/arrow/blob/7d433dc2/python/pyarrow/_array.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/_array.pxd b/python/pyarrow/_array.pxd
index 464de31..4d5db86 100644
--- a/python/pyarrow/_array.pxd
+++ b/python/pyarrow/_array.pxd
@@ -81,6 +81,8 @@ cdef class Schema:
     cdef init(self, const vector[shared_ptr[CField]]& fields)
     cdef init_schema(self, const shared_ptr[CSchema]& schema)
 
+    cpdef dict custom_metadata(self)
+
 
 cdef class Scalar:
     cdef readonly:

http://git-wip-us.apache.org/repos/asf/arrow/blob/7d433dc2/python/pyarrow/_array.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/_array.pyx b/python/pyarrow/_array.pyx
index 1c571ba..2fb20b7 100644
--- a/python/pyarrow/_array.pyx
+++ b/python/pyarrow/_array.pyx
@@ -244,6 +244,13 @@ cdef class Schema:
         self.schema = schema.get()
         self.sp_schema = schema
 
+    cpdef dict custom_metadata(self):
+        cdef:
+            CKeyValueMetadata metadata = self.schema.custom_metadata()
+            unordered_map[c_string, c_string] result
+        metadata.ToUnorderedMap(&result)
+        return result
+
     def equals(self, other):
         """
         Test if this schema is equal to the other

http://git-wip-us.apache.org/repos/asf/arrow/blob/7d433dc2/python/pyarrow/_table.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/_table.pyx b/python/pyarrow/_table.pyx
index 78fec75..ed0782b 100644
--- a/python/pyarrow/_table.pyx
+++ b/python/pyarrow/_table.pyx
@@ -34,7 +34,6 @@ from pyarrow._error import ArrowException
 from pyarrow._array import field
 from pyarrow.compat import frombytes, tobytes
 
-
 from collections import OrderedDict
 
 
@@ -273,15 +272,22 @@ cdef class Column:
         return chunked_array
 
 
-cdef _schema_from_arrays(arrays, names, shared_ptr[CSchema]* schema):
+cdef CKeyValueMetadata key_value_metadata_from_dict(dict metadata):
+    cdef:
+        unordered_map[c_string, c_string] unordered_metadata = metadata
+        CKeyValueMetadata c_metadata = CKeyValueMetadata(unordered_metadata)
+    return c_metadata
+
+
+cdef int _schema_from_arrays(
+        arrays, names, dict metadata, shared_ptr[CSchema]* schema) except -1:
     cdef:
         Array arr
         Column col
         c_string c_name
         vector[shared_ptr[CField]] fields
-        cdef shared_ptr[CDataType] type_
-
-    cdef int K = len(arrays)
+        shared_ptr[CDataType] type_
+        int K = len(arrays)
 
     fields.resize(K)
 
@@ -306,15 +312,16 @@ cdef _schema_from_arrays(arrays, names, shared_ptr[CSchema]* schema):
     else:
         raise TypeError(type(arrays[0]))
 
-    schema.reset(new CSchema(fields))
-
+    schema.reset(new CSchema(fields, key_value_metadata_from_dict(metadata)))
+    return 0
 
 
-cdef _dataframe_to_arrays(df, timestamps_to_ms, Schema schema):
+cdef tuple _dataframe_to_arrays(df, bint timestamps_to_ms, Schema schema):
     cdef:
         list names = []
         list arrays = []
         DataType type = None
+        dict metadata = {}
 
     for name in df.columns:
         col = df[name]
@@ -326,7 +333,7 @@ cdef _dataframe_to_arrays(df, timestamps_to_ms, Schema schema):
         names.append(name)
         arrays.append(arr)
 
-    return names, arrays
+    return names, arrays, metadata
 
 
 cdef class RecordBatch:
@@ -486,11 +493,11 @@ cdef class RecordBatch:
         -------
         pyarrow.table.RecordBatch
         """
-        names, arrays = _dataframe_to_arrays(df, False, schema)
-        return cls.from_arrays(arrays, names)
+        names, arrays, metadata = _dataframe_to_arrays(df, False, schema)
+        return cls.from_arrays(arrays, names, metadata)
 
     @staticmethod
-    def from_arrays(arrays, names):
+    def from_arrays(list arrays, list names, dict metadata=None):
         """
         Construct a RecordBatch from multiple pyarrow.Arrays
 
@@ -512,15 +519,17 @@ cdef class RecordBatch:
             shared_ptr[CRecordBatch] batch
             vector[shared_ptr[CArray]] c_arrays
             int64_t num_rows
+            int64_t i
+            int64_t number_of_arrays = len(arrays)
 
-        if len(arrays) == 0:
+        if not number_of_arrays:
             raise ValueError('Record batch cannot contain no arrays (for now)')
 
         num_rows = len(arrays[0])
-        _schema_from_arrays(arrays, names, &schema)
+        _schema_from_arrays(arrays, names, metadata or {}, &schema)
 
-        for i in range(len(arrays)):
-            arr = arrays[i]
+        c_arrays.reserve(len(arrays))
+        for arr in arrays:
             c_arrays.push_back(arr.sp_array)
 
         batch.reset(new CRecordBatch(schema, num_rows, c_arrays))
@@ -656,13 +665,13 @@ cdef class Table:
         >>> pa.Table.from_pandas(df)
         <pyarrow.table.Table object at 0x7f05d1fb1b40>
         """
-        names, arrays = _dataframe_to_arrays(df,
+        names, arrays, metadata = _dataframe_to_arrays(df,
                                              timestamps_to_ms=timestamps_to_ms,
                                              schema=schema)
-        return cls.from_arrays(arrays, names=names)
+        return cls.from_arrays(arrays, names=names, metadata=metadata)
 
     @staticmethod
-    def from_arrays(arrays, names=None):
+    def from_arrays(arrays, names=None, dict metadata=None):
         """
         Construct a Table from Arrow arrays or columns
 
@@ -680,22 +689,25 @@ cdef class Table:
 
         """
         cdef:
-            vector[shared_ptr[CField]] fields
             vector[shared_ptr[CColumn]] columns
             shared_ptr[CSchema] schema
             shared_ptr[CTable] table
+            size_t K = len(arrays)
 
-        _schema_from_arrays(arrays, names, &schema)
+        _schema_from_arrays(arrays, names, metadata or {}, &schema)
 
-        cdef int K = len(arrays)
-        columns.resize(K)
+        columns.reserve(K)
 
         for i in range(K):
             if isinstance(arrays[i], Array):
-                columns[i].reset(new CColumn(schema.get().field(i),
-                                             (<Array> arrays[i]).sp_array))
+                columns.push_back(
+                    make_shared[CColumn](
+                        schema.get().field(i),
+                        (<Array> arrays[i]).sp_array
+                    )
+                )
             elif isinstance(arrays[i], Column):
-                columns[i] = (<Column> arrays[i]).sp_column
+                columns.push_back((<Column> arrays[i]).sp_column)
             else:
                 raise ValueError(type(arrays[i]))
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/7d433dc2/python/pyarrow/includes/common.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/includes/common.pxd b/python/pyarrow/includes/common.pxd
index 44723fa..cc3b4b6 100644
--- a/python/pyarrow/includes/common.pxd
+++ b/python/pyarrow/includes/common.pxd
@@ -19,9 +19,10 @@
 
 from libc.stdint cimport *
 from libcpp cimport bool as c_bool
-from libcpp.memory cimport shared_ptr, unique_ptr
+from libcpp.memory cimport shared_ptr, unique_ptr, make_shared
 from libcpp.string cimport string as c_string
 from libcpp.vector cimport vector
+from libcpp.unordered_map cimport unordered_map
 
 from cpython cimport PyObject
 cimport cpython

http://git-wip-us.apache.org/repos/asf/arrow/blob/7d433dc2/python/pyarrow/includes/libarrow.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 473a0b9..ef1a332 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -1,4 +1,4 @@
-#t Licensed to the Apache Software Foundation (ASF) under one
+# Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
@@ -19,6 +19,12 @@
 
 from pyarrow.includes.common cimport *
 
+cdef extern from "arrow/util/key_value_metadata.h" namespace "arrow" nogil:
+    cdef cppclass CKeyValueMetadata" arrow::KeyValueMetadata":
+        CKeyValueMetadata()
+        CKeyValueMetadata(const unordered_map[c_string, c_string]&)
+        void ToUnorderedMap(unordered_map[c_string, c_string]*) const
+
 cdef extern from "arrow/api.h" namespace "arrow" nogil:
 
     enum Type" arrow::Type::type":
@@ -170,10 +176,13 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
 
     cdef cppclass CSchema" arrow::Schema":
         CSchema(const vector[shared_ptr[CField]]& fields)
+        CSchema(const vector[shared_ptr[CField]]& fields,
+                const CKeyValueMetadata& custom_metadata)
 
         c_bool Equals(const CSchema& other)
 
         shared_ptr[CField] field(int i)
+        const CKeyValueMetadata& custom_metadata() const
         shared_ptr[CField] GetFieldByName(c_string& name)
         int num_fields()
         c_string ToString()


Mime
View raw message