arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject [2/3] arrow git commit: ARROW-67: C++ metadata flatbuffer serialization and data movement to memory maps
Date Wed, 23 Mar 2016 01:45:23 GMT
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/ipc/metadata.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/metadata.cc b/cpp/src/arrow/ipc/metadata.cc
new file mode 100644
index 0000000..642f21a
--- /dev/null
+++ b/cpp/src/arrow/ipc/metadata.cc
@@ -0,0 +1,238 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/ipc/metadata.h"
+
+#include <flatbuffers/flatbuffers.h>
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+// Generated C++ flatbuffer IDL
+#include "arrow/ipc/Message_generated.h"
+#include "arrow/ipc/metadata-internal.h"
+
+#include "arrow/schema.h"
+#include "arrow/util/buffer.h"
+#include "arrow/util/status.h"
+
+namespace arrow {
+
+namespace flatbuf = apache::arrow::flatbuf;
+
+namespace ipc {
+
+Status WriteSchema(const Schema* schema, std::shared_ptr<Buffer>* out) {
+  MessageBuilder message;
+  RETURN_NOT_OK(message.SetSchema(schema));
+  RETURN_NOT_OK(message.Finish());
+  return message.GetBuffer(out);
+}
+
+//----------------------------------------------------------------------
+// Message reader
+
+class Message::Impl {
+ public:
+  explicit Impl(const std::shared_ptr<Buffer>& buffer,
+      const flatbuf::Message* message) :
+      buffer_(buffer),
+      message_(message) {}
+
+  Message::Type type() const {
+    switch (message_->header_type()) {
+      case flatbuf::MessageHeader_Schema:
+        return Message::SCHEMA;
+      case flatbuf::MessageHeader_DictionaryBatch:
+        return Message::DICTIONARY_BATCH;
+      case flatbuf::MessageHeader_RecordBatch:
+        return Message::RECORD_BATCH;
+      default:
+        return Message::NONE;
+    }
+  }
+
+  const void* header() const {
+    return message_->header();
+  }
+
+  int64_t body_length() const {
+    return message_->bodyLength();
+  }
+
+ private:
+  // Owns the memory this message accesses
+  std::shared_ptr<Buffer> buffer_;
+
+  const flatbuf::Message* message_;
+};
+
+class SchemaMessage::Impl {
+ public:
+  explicit Impl(const void* schema) :
+      schema_(static_cast<const flatbuf::Schema*>(schema)) {}
+
+  const flatbuf::Field* field(int i) const {
+    return schema_->fields()->Get(i);
+  }
+
+  int num_fields() const {
+    return schema_->fields()->size();
+  }
+
+ private:
+  const flatbuf::Schema* schema_;
+};
+
+Message::Message() {}
+
+Status Message::Open(const std::shared_ptr<Buffer>& buffer,
+    std::shared_ptr<Message>* out) {
+  std::shared_ptr<Message> result(new Message());
+
+  // The buffer is prefixed by its size as int32_t
+  const uint8_t* fb_head = buffer->data() + sizeof(int32_t);
+  const flatbuf::Message* message = flatbuf::GetMessage(fb_head);
+
+  // TODO(wesm): verify message
+  result->impl_.reset(new Impl(buffer, message));
+  *out = result;
+
+  return Status::OK();
+}
+
+Message::Type Message::type() const {
+  return impl_->type();
+}
+
+int64_t Message::body_length() const {
+  return impl_->body_length();
+}
+
+std::shared_ptr<Message> Message::get_shared_ptr() {
+  return this->shared_from_this();
+}
+
+std::shared_ptr<SchemaMessage> Message::GetSchema() {
+  return std::make_shared<SchemaMessage>(this->shared_from_this(),
+      impl_->header());
+}
+
+SchemaMessage::SchemaMessage(const std::shared_ptr<Message>& message,
+    const void* schema) {
+  message_ = message;
+  impl_.reset(new Impl(schema));
+}
+
+int SchemaMessage::num_fields() const {
+  return impl_->num_fields();
+}
+
+Status SchemaMessage::GetField(int i, std::shared_ptr<Field>* out) const {
+  const flatbuf::Field* field = impl_->field(i);
+  return FieldFromFlatbuffer(field, out);
+}
+
+Status SchemaMessage::GetSchema(std::shared_ptr<Schema>* out) const {
+  std::vector<std::shared_ptr<Field>> fields(num_fields());
+  for (int i = 0; i < this->num_fields(); ++i) {
+    RETURN_NOT_OK(GetField(i, &fields[i]));
+  }
+  *out = std::make_shared<Schema>(fields);
+  return Status::OK();
+}
+
+class RecordBatchMessage::Impl {
+ public:
+  explicit Impl(const void* batch) :
+      batch_(static_cast<const flatbuf::RecordBatch*>(batch)) {
+    nodes_ = batch_->nodes();
+    buffers_ = batch_->buffers();
+  }
+
+  const flatbuf::FieldNode* field(int i) const {
+    return nodes_->Get(i);
+  }
+
+  const flatbuf::Buffer* buffer(int i) const {
+    return buffers_->Get(i);
+  }
+
+  int32_t length() const {
+    return batch_->length();
+  }
+
+  int num_buffers() const {
+    return batch_->buffers()->size();
+  }
+
+  int num_fields() const {
+    return batch_->nodes()->size();
+  }
+
+ private:
+  const flatbuf::RecordBatch* batch_;
+  const flatbuffers::Vector<const flatbuf::FieldNode*>* nodes_;
+  const flatbuffers::Vector<const flatbuf::Buffer*>* buffers_;
+};
+
+std::shared_ptr<RecordBatchMessage> Message::GetRecordBatch() {
+  return std::make_shared<RecordBatchMessage>(this->shared_from_this(),
+      impl_->header());
+}
+
+RecordBatchMessage::RecordBatchMessage(const std::shared_ptr<Message>& message,
+    const void* batch) {
+  message_ = message;
+  impl_.reset(new Impl(batch));
+}
+
+// TODO(wesm): Copying the flatbuffer data isn't great, but this will do for
+// now
+FieldMetadata RecordBatchMessage::field(int i) const {
+  const flatbuf::FieldNode* node = impl_->field(i);
+
+  FieldMetadata result;
+  result.length = node->length();
+  result.null_count = node->null_count();
+  return result;
+}
+
+BufferMetadata RecordBatchMessage::buffer(int i) const {
+  const flatbuf::Buffer* buffer = impl_->buffer(i);
+
+  BufferMetadata result;
+  result.page = buffer->page();
+  result.offset = buffer->offset();
+  result.length = buffer->length();
+  return result;
+}
+
+int32_t RecordBatchMessage::length() const {
+  return impl_->length();
+}
+
+int RecordBatchMessage::num_buffers() const {
+  return impl_->num_buffers();
+}
+
+int RecordBatchMessage::num_fields() const {
+  return impl_->num_fields();
+}
+
+} // namespace ipc
+} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/ipc/metadata.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/metadata.h b/cpp/src/arrow/ipc/metadata.h
new file mode 100644
index 0000000..c728852
--- /dev/null
+++ b/cpp/src/arrow/ipc/metadata.h
@@ -0,0 +1,146 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// C++ object model and user API for interprocess schema messaging
+
+#ifndef ARROW_IPC_METADATA_H
+#define ARROW_IPC_METADATA_H
+
+#include <cstdint>
+#include <memory>
+
+namespace arrow {
+
+class Buffer;
+struct Field;
+class Schema;
+class Status;
+
+namespace ipc {
+
+//----------------------------------------------------------------------
+// Message read/write APIs
+
+// Serialize arrow::Schema as a Flatbuffer
+Status WriteSchema(const Schema* schema, std::shared_ptr<Buffer>* out);
+
+//----------------------------------------------------------------------
+
+// Read interface classes. We do not fully deserialize the flatbuffers so that
+// individual fields metadata can be retrieved from very large schema without
+//
+
+class Message;
+
+// Container for serialized Schema metadata contained in an IPC message
+class SchemaMessage {
+ public:
+  // Accepts an opaque flatbuffer pointer
+  SchemaMessage(const std::shared_ptr<Message>& message, const void* schema);
+
+  int num_fields() const;
+
+  // Construct an arrow::Field for the i-th value in the metadata
+  Status GetField(int i, std::shared_ptr<Field>* out) const;
+
+  // Construct a complete Schema from the message. May be expensive for very
+  // large schemas if you are only interested in a few fields
+  Status GetSchema(std::shared_ptr<Schema>* out) const;
+
+ private:
+  // Parent, owns the flatbuffer data
+  std::shared_ptr<Message> message_;
+
+  class Impl;
+  std::unique_ptr<Impl> impl_;
+};
+
+// Field metadata
+struct FieldMetadata {
+  int32_t length;
+  int32_t null_count;
+};
+
+struct BufferMetadata {
+  int32_t page;
+  int64_t offset;
+  int64_t length;
+};
+
+// Container for serialized record batch metadata contained in an IPC message
+class RecordBatchMessage {
+ public:
+  // Accepts an opaque flatbuffer pointer
+  RecordBatchMessage(const std::shared_ptr<Message>& message,
+      const void* batch_meta);
+
+  FieldMetadata field(int i) const;
+  BufferMetadata buffer(int i) const;
+
+  int32_t length() const;
+  int num_buffers() const;
+  int num_fields() const;
+
+ private:
+  // Parent, owns the flatbuffer data
+  std::shared_ptr<Message> message_;
+
+  class Impl;
+  std::unique_ptr<Impl> impl_;
+};
+
+class DictionaryBatchMessage {
+ public:
+  int64_t id() const;
+  std::unique_ptr<RecordBatchMessage> data() const;
+};
+
+class Message : public std::enable_shared_from_this<Message> {
+ public:
+  enum Type {
+    NONE,
+    SCHEMA,
+    DICTIONARY_BATCH,
+    RECORD_BATCH
+  };
+
+  static Status Open(const std::shared_ptr<Buffer>& buffer,
+    std::shared_ptr<Message>* out);
+
+  std::shared_ptr<Message> get_shared_ptr();
+
+  int64_t body_length() const;
+
+  Type type() const;
+
+  // These methods only to be invoked if you have checked the message type
+  std::shared_ptr<SchemaMessage> GetSchema();
+  std::shared_ptr<RecordBatchMessage> GetRecordBatch();
+  std::shared_ptr<DictionaryBatchMessage> GetDictionaryBatch();
+
+ private:
+  Message();
+
+  // Hide serialization details from user API
+  class Impl;
+  std::unique_ptr<Impl> impl_;
+};
+
+} // namespace ipc
+} // namespace arrow
+
+#endif // ARROW_IPC_METADATA_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/ipc/test-common.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/test-common.h b/cpp/src/arrow/ipc/test-common.h
new file mode 100644
index 0000000..0fccce9
--- /dev/null
+++ b/cpp/src/arrow/ipc/test-common.h
@@ -0,0 +1,53 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_IPC_TEST_COMMON_H
+#define ARROW_IPC_TEST_COMMON_H
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace arrow {
+namespace ipc {
+
+class MemoryMapFixture {
+ public:
+  void TearDown() {
+    for (auto path : tmp_files_) {
+      std::remove(path.c_str());
+    }
+  }
+
+  void CreateFile(const std::string path, int64_t size) {
+    FILE* file = fopen(path.c_str(), "w");
+    if (file != nullptr) {
+      tmp_files_.push_back(path);
+    }
+    ftruncate(fileno(file), size);
+    fclose(file);
+  }
+
+ private:
+  std::vector<std::string> tmp_files_;
+};
+
+} // namespace ipc
+} // namespace arrow
+
+#endif // ARROW_IPC_TEST_COMMON_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/schema-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/schema-test.cc b/cpp/src/arrow/schema-test.cc
new file mode 100644
index 0000000..a1de1dc
--- /dev/null
+++ b/cpp/src/arrow/schema-test.cc
@@ -0,0 +1,104 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+
+#include "arrow/schema.h"
+#include "arrow/type.h"
+
+using std::shared_ptr;
+using std::vector;
+
+namespace arrow {
+
+const auto INT32 = std::make_shared<Int32Type>();
+
+TEST(TestField, Basics) {
+  Field f0("f0", INT32);
+  Field f0_nn("f0", INT32, false);
+
+  ASSERT_EQ(f0.name, "f0");
+  ASSERT_EQ(f0.type->ToString(), INT32->ToString());
+
+  ASSERT_TRUE(f0.nullable);
+  ASSERT_FALSE(f0_nn.nullable);
+}
+
+TEST(TestField, Equals) {
+  Field f0("f0", INT32);
+  Field f0_nn("f0", INT32, false);
+  Field f0_other("f0", INT32);
+
+  ASSERT_EQ(f0, f0_other);
+  ASSERT_NE(f0, f0_nn);
+}
+
+class TestSchema : public ::testing::Test {
+ public:
+  void SetUp() {}
+};
+
+TEST_F(TestSchema, Basics) {
+  auto f0 = std::make_shared<Field>("f0", INT32);
+  auto f1 = std::make_shared<Field>("f1", std::make_shared<UInt8Type>(), false);
+  auto f1_optional = std::make_shared<Field>("f1", std::make_shared<UInt8Type>());
+
+  auto f2 = std::make_shared<Field>("f2", std::make_shared<StringType>());
+
+  vector<shared_ptr<Field>> fields = {f0, f1, f2};
+  auto schema = std::make_shared<Schema>(fields);
+
+  ASSERT_EQ(3, schema->num_fields());
+  ASSERT_EQ(f0, schema->field(0));
+  ASSERT_EQ(f1, schema->field(1));
+  ASSERT_EQ(f2, schema->field(2));
+
+  auto schema2 = std::make_shared<Schema>(fields);
+
+  vector<shared_ptr<Field>> fields3 = {f0, f1_optional, f2};
+  auto schema3 = std::make_shared<Schema>(fields3);
+  ASSERT_TRUE(schema->Equals(schema2));
+  ASSERT_FALSE(schema->Equals(schema3));
+
+  ASSERT_TRUE(schema->Equals(*schema2.get()));
+  ASSERT_FALSE(schema->Equals(*schema3.get()));
+}
+
+TEST_F(TestSchema, ToString) {
+  auto f0 = std::make_shared<Field>("f0", INT32);
+  auto f1 = std::make_shared<Field>("f1", std::make_shared<UInt8Type>(), false);
+  auto f2 = std::make_shared<Field>("f2", std::make_shared<StringType>());
+  auto f3 = std::make_shared<Field>("f3",
+      std::make_shared<ListType>(std::make_shared<Int16Type>()));
+
+  vector<shared_ptr<Field>> fields = {f0, f1, f2, f3};
+  auto schema = std::make_shared<Schema>(fields);
+
+  std::string result = schema->ToString();
+  std::string expected = R"(f0: int32
+f1: uint8 not null
+f2: string
+f3: list<item: int16>)";
+
+  ASSERT_EQ(expected, result);
+}
+
+} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/schema.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/schema.cc b/cpp/src/arrow/schema.cc
new file mode 100644
index 0000000..18aad0e
--- /dev/null
+++ b/cpp/src/arrow/schema.cc
@@ -0,0 +1,63 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/schema.h"
+
+#include <memory>
+#include <string>
+#include <sstream>
+#include <vector>
+
+#include "arrow/type.h"
+
+namespace arrow {
+
+Schema::Schema(const std::vector<std::shared_ptr<Field>>& fields) :
+    fields_(fields) {}
+
+bool Schema::Equals(const Schema& other) const {
+  if (this == &other) return true;
+  if (num_fields() != other.num_fields()) {
+    return false;
+  }
+  for (int i = 0; i < num_fields(); ++i) {
+    if (!field(i)->Equals(*other.field(i).get())) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool Schema::Equals(const std::shared_ptr<Schema>& other) const {
+  return Equals(*other.get());
+}
+
+std::string Schema::ToString() const {
+  std::stringstream buffer;
+
+  int i = 0;
+  for (auto field : fields_) {
+    if (i > 0) {
+      buffer << std::endl;
+    }
+    buffer << field->ToString();
+    ++i;
+  }
+  return buffer.str();
+}
+
+} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/schema.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/schema.h b/cpp/src/arrow/schema.h
new file mode 100644
index 0000000..52f3c1c
--- /dev/null
+++ b/cpp/src/arrow/schema.h
@@ -0,0 +1,55 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_SCHEMA_H
+#define ARROW_SCHEMA_H
+
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace arrow {
+
+struct Field;
+
+class Schema {
+ public:
+  explicit Schema(const std::vector<std::shared_ptr<Field>>& fields);
+
+  // Returns true if all of the schema fields are equal
+  bool Equals(const Schema& other) const;
+  bool Equals(const std::shared_ptr<Schema>& other) const;
+
+  // Return the ith schema element. Does not boundscheck
+  const std::shared_ptr<Field>& field(int i) const {
+    return fields_[i];
+  }
+
+  // Render a string representation of the schema suitable for debugging
+  std::string ToString() const;
+
+  int num_fields() const {
+    return fields_.size();
+  }
+
+ private:
+  std::vector<std::shared_ptr<Field>> fields_;
+};
+
+} // namespace arrow
+
+#endif  // ARROW_FIELD_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table-test.cc b/cpp/src/arrow/table-test.cc
new file mode 100644
index 0000000..4c7b8f8
--- /dev/null
+++ b/cpp/src/arrow/table-test.cc
@@ -0,0 +1,128 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+
+#include "arrow/column.h"
+#include "arrow/schema.h"
+#include "arrow/table.h"
+#include "arrow/test-util.h"
+#include "arrow/type.h"
+#include "arrow/types/primitive.h"
+#include "arrow/util/status.h"
+
+using std::shared_ptr;
+using std::vector;
+
+namespace arrow {
+
+const auto INT16 = std::make_shared<Int16Type>();
+const auto UINT8 = std::make_shared<UInt8Type>();
+const auto INT32 = std::make_shared<Int32Type>();
+
+class TestTable : public TestBase {
+ public:
+  void MakeExample1(int length) {
+    auto f0 = std::make_shared<Field>("f0", INT32);
+    auto f1 = std::make_shared<Field>("f1", UINT8);
+    auto f2 = std::make_shared<Field>("f2", INT16);
+
+    vector<shared_ptr<Field>> fields = {f0, f1, f2};
+    schema_ = std::make_shared<Schema>(fields);
+
+    columns_ = {
+      std::make_shared<Column>(schema_->field(0), MakePrimitive<Int32Array>(length)),
+      std::make_shared<Column>(schema_->field(1), MakePrimitive<UInt8Array>(length)),
+      std::make_shared<Column>(schema_->field(2), MakePrimitive<Int16Array>(length))
+    };
+  }
+
+ protected:
+  std::unique_ptr<Table> table_;
+  shared_ptr<Schema> schema_;
+  vector<std::shared_ptr<Column>> columns_;
+};
+
+TEST_F(TestTable, EmptySchema) {
+  auto empty_schema = shared_ptr<Schema>(new Schema({}));
+  table_.reset(new Table("data", empty_schema, columns_));
+  ASSERT_OK(table_->ValidateColumns());
+  ASSERT_EQ(0, table_->num_rows());
+  ASSERT_EQ(0, table_->num_columns());
+}
+
+TEST_F(TestTable, Ctors) {
+  int length = 100;
+  MakeExample1(length);
+
+  std::string name = "data";
+
+  table_.reset(new Table(name, schema_, columns_));
+  ASSERT_OK(table_->ValidateColumns());
+  ASSERT_EQ(name, table_->name());
+  ASSERT_EQ(length, table_->num_rows());
+  ASSERT_EQ(3, table_->num_columns());
+
+  table_.reset(new Table(name, schema_, columns_, length));
+  ASSERT_OK(table_->ValidateColumns());
+  ASSERT_EQ(name, table_->name());
+  ASSERT_EQ(length, table_->num_rows());
+}
+
+TEST_F(TestTable, Metadata) {
+  int length = 100;
+  MakeExample1(length);
+
+  std::string name = "data";
+  table_.reset(new Table(name, schema_, columns_));
+
+  ASSERT_TRUE(table_->schema()->Equals(schema_));
+
+  auto col = table_->column(0);
+  ASSERT_EQ(schema_->field(0)->name, col->name());
+  ASSERT_EQ(schema_->field(0)->type, col->type());
+}
+
+TEST_F(TestTable, InvalidColumns) {
+  // Check that columns are all the same length
+  int length = 100;
+  MakeExample1(length);
+
+  table_.reset(new Table("data", schema_, columns_, length - 1));
+  ASSERT_RAISES(Invalid, table_->ValidateColumns());
+
+  columns_.clear();
+
+  // Wrong number of columns
+  table_.reset(new Table("data", schema_, columns_, length));
+  ASSERT_RAISES(Invalid, table_->ValidateColumns());
+
+  columns_ = {
+    std::make_shared<Column>(schema_->field(0), MakePrimitive<Int32Array>(length)),
+    std::make_shared<Column>(schema_->field(1), MakePrimitive<UInt8Array>(length)),
+    std::make_shared<Column>(schema_->field(2), MakePrimitive<Int16Array>(length - 1))
+  };
+
+  table_.reset(new Table("data", schema_, columns_, length));
+  ASSERT_RAISES(Invalid, table_->ValidateColumns());
+}
+
+} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table.cc b/cpp/src/arrow/table.cc
new file mode 100644
index 0000000..e405c1d
--- /dev/null
+++ b/cpp/src/arrow/table.cc
@@ -0,0 +1,86 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/table.h"
+
+#include <cstdlib>
+#include <memory>
+#include <sstream>
+
+#include "arrow/column.h"
+#include "arrow/schema.h"
+#include "arrow/util/status.h"
+
+namespace arrow {
+
+RowBatch::RowBatch(const std::shared_ptr<Schema>& schema, int num_rows,
+    const std::vector<std::shared_ptr<Array>>& columns) :
+    schema_(schema),
+    num_rows_(num_rows),
+    columns_(columns) {}
+
+const std::string& RowBatch::column_name(int i) const {
+  return schema_->field(i)->name;
+}
+
+Table::Table(const std::string& name, const std::shared_ptr<Schema>& schema,
+    const std::vector<std::shared_ptr<Column>>& columns) :
+    name_(name),
+    schema_(schema),
+    columns_(columns) {
+  if (columns.size() == 0) {
+    num_rows_ = 0;
+  } else {
+    num_rows_ = columns[0]->length();
+  }
+}
+
+Table::Table(const std::string& name, const std::shared_ptr<Schema>& schema,
+    const std::vector<std::shared_ptr<Column>>& columns, int64_t num_rows) :
+    name_(name),
+    schema_(schema),
+    columns_(columns),
+    num_rows_(num_rows) {}
+
+Status Table::ValidateColumns() const {
+  if (num_columns() != schema_->num_fields()) {
+    return Status::Invalid("Number of columns did not match schema");
+  }
+
+  // Make sure columns are all the same length
+  for (size_t i = 0; i < columns_.size(); ++i) {
+    const Column* col = columns_[i].get();
+    if (col == nullptr) {
+      std::stringstream ss;
+      ss << "Column " << i << " named " << col->name()
+         << " was null";
+      return Status::Invalid(ss.str());
+    }
+    if (col->length() != num_rows_) {
+      std::stringstream ss;
+      ss << "Column " << i << " named " << col->name()
+         << " expected length "
+         << num_rows_
+         << " but got length "
+         << col->length();
+      return Status::Invalid(ss.str());
+    }
+  }
+  return Status::OK();
+}
+
+} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table.h b/cpp/src/arrow/table.h
new file mode 100644
index 0000000..e2f73a2
--- /dev/null
+++ b/cpp/src/arrow/table.h
@@ -0,0 +1,128 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_TABLE_H
+#define ARROW_TABLE_H
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace arrow {
+
+class Array;
+class Column;
+class Schema;
+class Status;
+
+// A row batch is a simpler and more rigid table data structure intended for
+// use primarily in shared memory IPC. It contains a schema (metadata) and a
+// corresponding vector of equal-length Arrow arrays
+class RowBatch {
+ public:
+  // num_rows is a parameter to allow for row batches of a particular size not
+  // having any materialized columns. Each array should have the same length as
+  // num_rows
+  RowBatch(const std::shared_ptr<Schema>& schema, int num_rows,
+      const std::vector<std::shared_ptr<Array>>& columns);
+
+  // @returns: the table's schema
+  const std::shared_ptr<Schema>& schema() const {
+    return schema_;
+  }
+
+  // @returns: the i-th column
+  // Note: Does not boundscheck
+  const std::shared_ptr<Array>& column(int i) const {
+    return columns_[i];
+  }
+
+  const std::string& column_name(int i) const;
+
+  // @returns: the number of columns in the table
+  int num_columns() const {
+    return columns_.size();
+  }
+
+  // @returns: the number of rows (the corresponding length of each column)
+  int64_t num_rows() const {
+    return num_rows_;
+  }
+
+ private:
+  std::shared_ptr<Schema> schema_;
+  int num_rows_;
+  std::vector<std::shared_ptr<Array>> columns_;
+};
+
+// Immutable container of fixed-length columns conforming to a particular schema
+class Table {
+ public:
+  // If columns is zero-length, the table's number of rows is zero
+  Table(const std::string& name, const std::shared_ptr<Schema>& schema,
+      const std::vector<std::shared_ptr<Column>>& columns);
+
+  // num_rows is a parameter to allow for tables of a particular size not
+  // having any materialized columns. Each column should therefore have the
+  // same length as num_rows -- you can validate this using
+  // Table::ValidateColumns
+  Table(const std::string& name, const std::shared_ptr<Schema>& schema,
+      const std::vector<std::shared_ptr<Column>>& columns, int64_t num_rows);
+
+  // @returns: the table's name, if any (may be length 0)
+  const std::string& name() const {
+    return name_;
+  }
+
+  // @returns: the table's schema
+  const std::shared_ptr<Schema>& schema() const {
+    return schema_;
+  }
+
+  // Note: Does not boundscheck
+  // @returns: the i-th column
+  const std::shared_ptr<Column>& column(int i) const {
+    return columns_[i];
+  }
+
+  // @returns: the number of columns in the table
+  int num_columns() const {
+    return columns_.size();
+  }
+
+  // @returns: the number of rows (the corresponding length of each column)
+  int64_t num_rows() const {
+    return num_rows_;
+  }
+
+  // After construction, perform any checks to validate the input arguments
+  Status ValidateColumns() const;
+
+ private:
+  // The table's name, optional
+  std::string name_;
+
+  std::shared_ptr<Schema> schema_;
+  std::vector<std::shared_ptr<Column>> columns_;
+
+  int64_t num_rows_;
+};
+
+} // namespace arrow
+
+#endif  // ARROW_TABLE_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/CMakeLists.txt b/cpp/src/arrow/table/CMakeLists.txt
deleted file mode 100644
index d9f00e7..0000000
--- a/cpp/src/arrow/table/CMakeLists.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-#######################################
-# arrow_table
-#######################################
-
-# Headers: top level
-install(FILES
-  column.h
-  schema.h
-  table.h
-  DESTINATION include/arrow/table)
-
-ADD_ARROW_TEST(column-test)
-ADD_ARROW_TEST(schema-test)
-ADD_ARROW_TEST(table-test)
-
-ADD_ARROW_BENCHMARK(column-benchmark)

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/column-benchmark.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/column-benchmark.cc b/cpp/src/arrow/table/column-benchmark.cc
deleted file mode 100644
index c01146d..0000000
--- a/cpp/src/arrow/table/column-benchmark.cc
+++ /dev/null
@@ -1,55 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-
-#include "benchmark/benchmark.h"
-
-#include "arrow/test-util.h"
-#include "arrow/table/test-common.h"
-#include "arrow/types/integer.h"
-#include "arrow/util/memory-pool.h"
-
-namespace arrow {
-namespace {
-  template <typename ArrayType>
-  std::shared_ptr<Array> MakePrimitive(int32_t length, int32_t null_count = 0) {
-    auto pool = GetDefaultMemoryPool();
-    auto data = std::make_shared<PoolBuffer>(pool);
-    auto nulls = std::make_shared<PoolBuffer>(pool);
-    data->Resize(length * sizeof(typename ArrayType::value_type));
-    nulls->Resize(util::bytes_for_bits(length));
-    return std::make_shared<ArrayType>(length, data, 10, nulls);
-  }
-}  // anonymous namespace
-
-
-static void BM_BuildInt32ColumnByChunk(benchmark::State& state) { //NOLINT non-const reference
-  ArrayVector arrays;
-  for (int chunk_n = 0; chunk_n < state.range_x(); ++chunk_n) {
-    arrays.push_back(MakePrimitive<Int32Array>(100, 10));
-  }
-  const auto INT32 = std::make_shared<Int32Type>();
-  const auto field = std::make_shared<Field>("c0", INT32);
-  std::unique_ptr<Column> column;
-  while (state.KeepRunning()) {
-    column.reset(new Column(field, arrays));
-  }
-}
-
-BENCHMARK(BM_BuildInt32ColumnByChunk)->Range(5, 50000);
-
-}  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/column-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/column-test.cc b/cpp/src/arrow/table/column-test.cc
deleted file mode 100644
index 3b102e4..0000000
--- a/cpp/src/arrow/table/column-test.cc
+++ /dev/null
@@ -1,75 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <gtest/gtest.h>
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "arrow/table/column.h"
-#include "arrow/table/schema.h"
-#include "arrow/table/test-common.h"
-#include "arrow/test-util.h"
-#include "arrow/type.h"
-#include "arrow/types/integer.h"
-
-using std::shared_ptr;
-using std::vector;
-
-namespace arrow {
-
-const auto INT32 = std::make_shared<Int32Type>();
-
-class TestColumn : public TestBase {
- protected:
-  std::shared_ptr<ChunkedArray> data_;
-  std::unique_ptr<Column> column_;
-};
-
-TEST_F(TestColumn, BasicAPI) {
-  ArrayVector arrays;
-  arrays.push_back(MakePrimitive<Int32Array>(100));
-  arrays.push_back(MakePrimitive<Int32Array>(100, 10));
-  arrays.push_back(MakePrimitive<Int32Array>(100, 20));
-
-  auto field = std::make_shared<Field>("c0", INT32);
-  column_.reset(new Column(field, arrays));
-
-  ASSERT_EQ("c0", column_->name());
-  ASSERT_TRUE(column_->type()->Equals(INT32));
-  ASSERT_EQ(300, column_->length());
-  ASSERT_EQ(30, column_->null_count());
-  ASSERT_EQ(3, column_->data()->num_chunks());
-}
-
-TEST_F(TestColumn, ChunksInhomogeneous) {
-  ArrayVector arrays;
-  arrays.push_back(MakePrimitive<Int32Array>(100));
-  arrays.push_back(MakePrimitive<Int32Array>(100, 10));
-
-  auto field = std::make_shared<Field>("c0", INT32);
-  column_.reset(new Column(field, arrays));
-
-  ASSERT_OK(column_->ValidateData());
-
-  arrays.push_back(MakePrimitive<Int16Array>(100, 10));
-  column_.reset(new Column(field, arrays));
-  ASSERT_RAISES(Invalid, column_->ValidateData());
-}
-
-} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/column.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/column.cc b/cpp/src/arrow/table/column.cc
deleted file mode 100644
index 573e650..0000000
--- a/cpp/src/arrow/table/column.cc
+++ /dev/null
@@ -1,68 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/table/column.h"
-
-#include <memory>
-#include <sstream>
-
-#include "arrow/type.h"
-#include "arrow/util/status.h"
-
-namespace arrow {
-
-ChunkedArray::ChunkedArray(const ArrayVector& chunks) :
-    chunks_(chunks) {
-  length_ = 0;
-  for (const std::shared_ptr<Array>& chunk : chunks) {
-    length_ += chunk->length();
-    null_count_ += chunk->null_count();
-  }
-}
-
-Column::Column(const std::shared_ptr<Field>& field, const ArrayVector& chunks) :
-    field_(field) {
-  data_ = std::make_shared<ChunkedArray>(chunks);
-}
-
-Column::Column(const std::shared_ptr<Field>& field,
-    const std::shared_ptr<Array>& data) :
-    field_(field) {
-  data_ = std::make_shared<ChunkedArray>(ArrayVector({data}));
-}
-
-Column::Column(const std::shared_ptr<Field>& field,
-    const std::shared_ptr<ChunkedArray>& data) :
-    field_(field),
-    data_(data) {}
-
-Status Column::ValidateData() {
-  for (int i = 0; i < data_->num_chunks(); ++i) {
-    const std::shared_ptr<DataType>& type = data_->chunk(i)->type();
-    if (!this->type()->Equals(type)) {
-      std::stringstream ss;
-      ss << "In chunk " << i << " expected type "
-         << this->type()->ToString()
-         << " but saw "
-         << type->ToString();
-      return Status::Invalid(ss.str());
-    }
-  }
-  return Status::OK();
-}
-
-} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/column.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/column.h b/cpp/src/arrow/table/column.h
deleted file mode 100644
index dfc7516..0000000
--- a/cpp/src/arrow/table/column.h
+++ /dev/null
@@ -1,105 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifndef ARROW_TABLE_COLUMN_H
-#define ARROW_TABLE_COLUMN_H
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "arrow/array.h"
-#include "arrow/type.h"
-
-namespace arrow {
-
-typedef std::vector<std::shared_ptr<Array> > ArrayVector;
-
-// A data structure managing a list of primitive Arrow arrays logically as one
-// large array
-class ChunkedArray {
- public:
-  explicit ChunkedArray(const ArrayVector& chunks);
-
-  // @returns: the total length of the chunked array; computed on construction
-  int64_t length() const {
-    return length_;
-  }
-
-  int64_t null_count() const {
-    return null_count_;
-  }
-
-  int num_chunks() const {
-    return chunks_.size();
-  }
-
-  const std::shared_ptr<Array>& chunk(int i) const {
-    return chunks_[i];
-  }
-
- protected:
-  ArrayVector chunks_;
-  int64_t length_;
-  int64_t null_count_;
-};
-
-// An immutable column data structure consisting of a field (type metadata) and
-// a logical chunked data array (which can be validated as all being the same
-// type).
-class Column {
- public:
-  Column(const std::shared_ptr<Field>& field, const ArrayVector& chunks);
-  Column(const std::shared_ptr<Field>& field,
-      const std::shared_ptr<ChunkedArray>& data);
-
-  Column(const std::shared_ptr<Field>& field, const std::shared_ptr<Array>& data);
-
-  int64_t length() const {
-    return data_->length();
-  }
-
-  int64_t null_count() const {
-    return data_->null_count();
-  }
-
-  // @returns: the column's name in the passed metadata
-  const std::string& name() const {
-    return field_->name;
-  }
-
-  // @returns: the column's type according to the metadata
-  const std::shared_ptr<DataType>& type() const {
-    return field_->type;
-  }
-
-  // @returns: the column's data as a chunked logical array
-  const std::shared_ptr<ChunkedArray>& data() const {
-    return data_;
-  }
-  // Verify that the column's array data is consistent with the passed field's
-  // metadata
-  Status ValidateData();
-
- protected:
-  std::shared_ptr<Field> field_;
-  std::shared_ptr<ChunkedArray> data_;
-};
-
-} // namespace arrow
-
-#endif  // ARROW_TABLE_COLUMN_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/schema-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/schema-test.cc b/cpp/src/arrow/table/schema-test.cc
deleted file mode 100644
index 9dfade2..0000000
--- a/cpp/src/arrow/table/schema-test.cc
+++ /dev/null
@@ -1,110 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <gtest/gtest.h>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "arrow/table/schema.h"
-#include "arrow/type.h"
-#include "arrow/types/string.h"
-
-using std::shared_ptr;
-using std::vector;
-
-namespace arrow {
-
-const auto INT32 = std::make_shared<Int32Type>();
-
-TEST(TestField, Basics) {
-  shared_ptr<DataType> ftype = INT32;
-  shared_ptr<DataType> ftype_nn = std::make_shared<Int32Type>(false);
-  Field f0("f0", ftype);
-  Field f0_nn("f0", ftype_nn);
-
-  ASSERT_EQ(f0.name, "f0");
-  ASSERT_EQ(f0.type->ToString(), ftype->ToString());
-
-  ASSERT_TRUE(f0.nullable());
-  ASSERT_FALSE(f0_nn.nullable());
-}
-
-TEST(TestField, Equals) {
-  shared_ptr<DataType> ftype = INT32;
-  shared_ptr<DataType> ftype_nn = std::make_shared<Int32Type>(false);
-
-  Field f0("f0", ftype);
-  Field f0_nn("f0", ftype_nn);
-  Field f0_other("f0", ftype);
-
-  ASSERT_EQ(f0, f0_other);
-  ASSERT_NE(f0, f0_nn);
-}
-
-class TestSchema : public ::testing::Test {
- public:
-  void SetUp() {}
-};
-
-TEST_F(TestSchema, Basics) {
-  auto f0 = std::make_shared<Field>("f0", INT32);
-  auto f1 = std::make_shared<Field>("f1", std::make_shared<UInt8Type>(false));
-  auto f1_optional = std::make_shared<Field>("f1", std::make_shared<UInt8Type>());
-
-  auto f2 = std::make_shared<Field>("f2", std::make_shared<StringType>());
-
-  vector<shared_ptr<Field> > fields = {f0, f1, f2};
-  auto schema = std::make_shared<Schema>(fields);
-
-  ASSERT_EQ(3, schema->num_fields());
-  ASSERT_EQ(f0, schema->field(0));
-  ASSERT_EQ(f1, schema->field(1));
-  ASSERT_EQ(f2, schema->field(2));
-
-  auto schema2 = std::make_shared<Schema>(fields);
-
-  vector<shared_ptr<Field> > fields3 = {f0, f1_optional, f2};
-  auto schema3 = std::make_shared<Schema>(fields3);
-  ASSERT_TRUE(schema->Equals(schema2));
-  ASSERT_FALSE(schema->Equals(schema3));
-
-  ASSERT_TRUE(schema->Equals(*schema2.get()));
-  ASSERT_FALSE(schema->Equals(*schema3.get()));
-}
-
-TEST_F(TestSchema, ToString) {
-  auto f0 = std::make_shared<Field>("f0", std::make_shared<Int32Type>());
-  auto f1 = std::make_shared<Field>("f1", std::make_shared<UInt8Type>(false));
-  auto f2 = std::make_shared<Field>("f2", std::make_shared<StringType>());
-  auto f3 = std::make_shared<Field>("f3",
-      std::make_shared<ListType>(std::make_shared<Int16Type>()));
-
-  vector<shared_ptr<Field> > fields = {f0, f1, f2, f3};
-  auto schema = std::make_shared<Schema>(fields);
-
-  std::string result = schema->ToString();
-  std::string expected = R"(f0 int32
-f1 uint8 not null
-f2 string
-f3 list<int16>
-)";
-
-  ASSERT_EQ(expected, result);
-}
-
-} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/schema.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/schema.cc b/cpp/src/arrow/table/schema.cc
deleted file mode 100644
index d49d0a7..0000000
--- a/cpp/src/arrow/table/schema.cc
+++ /dev/null
@@ -1,58 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/table/schema.h"
-
-#include <memory>
-#include <string>
-#include <sstream>
-#include <vector>
-
-#include "arrow/type.h"
-
-namespace arrow {
-
-Schema::Schema(const std::vector<std::shared_ptr<Field> >& fields) :
-    fields_(fields) {}
-
-bool Schema::Equals(const Schema& other) const {
-  if (this == &other) return true;
-  if (num_fields() != other.num_fields()) {
-    return false;
-  }
-  for (int i = 0; i < num_fields(); ++i) {
-    if (!field(i)->Equals(*other.field(i).get())) {
-      return false;
-    }
-  }
-  return true;
-}
-
-bool Schema::Equals(const std::shared_ptr<Schema>& other) const {
-  return Equals(*other.get());
-}
-
-std::string Schema::ToString() const {
-  std::stringstream buffer;
-
-  for (auto field : fields_) {
-    buffer << field->ToString() << std::endl;
-  }
-  return buffer.str();
-}
-
-} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/schema.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/schema.h b/cpp/src/arrow/table/schema.h
deleted file mode 100644
index 103f01b..0000000
--- a/cpp/src/arrow/table/schema.h
+++ /dev/null
@@ -1,55 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifndef ARROW_SCHEMA_H
-#define ARROW_SCHEMA_H
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "arrow/type.h"
-
-namespace arrow {
-
-class Schema {
- public:
-  explicit Schema(const std::vector<std::shared_ptr<Field> >& fields);
-
-  // Returns true if all of the schema fields are equal
-  bool Equals(const Schema& other) const;
-  bool Equals(const std::shared_ptr<Schema>& other) const;
-
-  // Return the ith schema element. Does not boundscheck
-  const std::shared_ptr<Field>& field(int i) const {
-    return fields_[i];
-  }
-
-  // Render a string representation of the schema suitable for debugging
-  std::string ToString() const;
-
-  int num_fields() const {
-    return fields_.size();
-  }
-
- private:
-  std::vector<std::shared_ptr<Field> > fields_;
-};
-
-} // namespace arrow
-
-#endif  // ARROW_FIELD_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/table-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/table-test.cc b/cpp/src/arrow/table/table-test.cc
deleted file mode 100644
index 8b354e8..0000000
--- a/cpp/src/arrow/table/table-test.cc
+++ /dev/null
@@ -1,128 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <gtest/gtest.h>
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "arrow/table/column.h"
-#include "arrow/table/schema.h"
-#include "arrow/table/table.h"
-#include "arrow/table/test-common.h"
-#include "arrow/test-util.h"
-#include "arrow/type.h"
-#include "arrow/types/integer.h"
-
-using std::shared_ptr;
-using std::vector;
-
-namespace arrow {
-
-const auto INT16 = std::make_shared<Int16Type>();
-const auto UINT8 = std::make_shared<UInt8Type>();
-const auto INT32 = std::make_shared<Int32Type>();
-
-class TestTable : public TestBase {
- public:
-  void MakeExample1(int length) {
-    auto f0 = std::make_shared<Field>("f0", INT32);
-    auto f1 = std::make_shared<Field>("f1", UINT8);
-    auto f2 = std::make_shared<Field>("f2", INT16);
-
-    vector<shared_ptr<Field> > fields = {f0, f1, f2};
-    schema_ = std::make_shared<Schema>(fields);
-
-    columns_ = {
-      std::make_shared<Column>(schema_->field(0), MakePrimitive<Int32Array>(length)),
-      std::make_shared<Column>(schema_->field(1), MakePrimitive<UInt8Array>(length)),
-      std::make_shared<Column>(schema_->field(2), MakePrimitive<Int16Array>(length))
-    };
-  }
-
- protected:
-  std::unique_ptr<Table> table_;
-  shared_ptr<Schema> schema_;
-  vector<std::shared_ptr<Column> > columns_;
-};
-
-TEST_F(TestTable, EmptySchema) {
-  auto empty_schema = shared_ptr<Schema>(new Schema({}));
-  table_.reset(new Table("data", empty_schema, columns_));
-  ASSERT_OK(table_->ValidateColumns());
-  ASSERT_EQ(0, table_->num_rows());
-  ASSERT_EQ(0, table_->num_columns());
-}
-
-TEST_F(TestTable, Ctors) {
-  int length = 100;
-  MakeExample1(length);
-
-  std::string name = "data";
-
-  table_.reset(new Table(name, schema_, columns_));
-  ASSERT_OK(table_->ValidateColumns());
-  ASSERT_EQ(name, table_->name());
-  ASSERT_EQ(length, table_->num_rows());
-  ASSERT_EQ(3, table_->num_columns());
-
-  table_.reset(new Table(name, schema_, columns_, length));
-  ASSERT_OK(table_->ValidateColumns());
-  ASSERT_EQ(name, table_->name());
-  ASSERT_EQ(length, table_->num_rows());
-}
-
-TEST_F(TestTable, Metadata) {
-  int length = 100;
-  MakeExample1(length);
-
-  std::string name = "data";
-  table_.reset(new Table(name, schema_, columns_));
-
-  ASSERT_TRUE(table_->schema()->Equals(schema_));
-
-  auto col = table_->column(0);
-  ASSERT_EQ(schema_->field(0)->name, col->name());
-  ASSERT_EQ(schema_->field(0)->type, col->type());
-}
-
-TEST_F(TestTable, InvalidColumns) {
-  // Check that columns are all the same length
-  int length = 100;
-  MakeExample1(length);
-
-  table_.reset(new Table("data", schema_, columns_, length - 1));
-  ASSERT_RAISES(Invalid, table_->ValidateColumns());
-
-  columns_.clear();
-
-  // Wrong number of columns
-  table_.reset(new Table("data", schema_, columns_, length));
-  ASSERT_RAISES(Invalid, table_->ValidateColumns());
-
-  columns_ = {
-    std::make_shared<Column>(schema_->field(0), MakePrimitive<Int32Array>(length)),
-    std::make_shared<Column>(schema_->field(1), MakePrimitive<UInt8Array>(length)),
-    std::make_shared<Column>(schema_->field(2), MakePrimitive<Int16Array>(length - 1))
-  };
-
-  table_.reset(new Table("data", schema_, columns_, length));
-  ASSERT_RAISES(Invalid, table_->ValidateColumns());
-}
-
-} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/table.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/table.cc b/cpp/src/arrow/table/table.cc
deleted file mode 100644
index 0c788b8..0000000
--- a/cpp/src/arrow/table/table.cc
+++ /dev/null
@@ -1,73 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/table/table.h"
-
-#include <memory>
-#include <sstream>
-
-#include "arrow/table/column.h"
-#include "arrow/table/schema.h"
-#include "arrow/type.h"
-#include "arrow/util/status.h"
-
-namespace arrow {
-
-Table::Table(const std::string& name, const std::shared_ptr<Schema>& schema,
-    const std::vector<std::shared_ptr<Column> >& columns) :
-    name_(name),
-    schema_(schema),
-    columns_(columns) {
-  if (columns.size() == 0) {
-    num_rows_ = 0;
-  } else {
-    num_rows_ = columns[0]->length();
-  }
-}
-
-Table::Table(const std::string& name, const std::shared_ptr<Schema>& schema,
-    const std::vector<std::shared_ptr<Column> >& columns, int64_t num_rows) :
-    name_(name),
-    schema_(schema),
-    columns_(columns),
-    num_rows_(num_rows) {}
-
-Status Table::ValidateColumns() const {
-  if (num_columns() != schema_->num_fields()) {
-    return Status::Invalid("Number of columns did not match schema");
-  }
-
-  if (columns_.size() == 0) {
-    return Status::OK();
-  }
-
-  // Make sure columns are all the same length
-  for (size_t i = 0; i < columns_.size(); ++i) {
-    const Column* col = columns_[i].get();
-    if (col->length() != num_rows_) {
-      std::stringstream ss;
-      ss << "Column " << i << " expected length "
-         << num_rows_
-         << " but got length "
-         << col->length();
-      return Status::Invalid(ss.str());
-    }
-  }
-  return Status::OK();
-}
-
-} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/table.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/table.h b/cpp/src/arrow/table/table.h
deleted file mode 100644
index b012938..0000000
--- a/cpp/src/arrow/table/table.h
+++ /dev/null
@@ -1,82 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifndef ARROW_TABLE_TABLE_H
-#define ARROW_TABLE_TABLE_H
-
-#include <memory>
-#include <string>
-#include <vector>
-
-namespace arrow {
-
-class Column;
-class Schema;
-class Status;
-
-// Immutable container of fixed-length columns conforming to a particular schema
-class Table {
- public:
-  // If columns is zero-length, the table's number of rows is zero
-  Table(const std::string& name, const std::shared_ptr<Schema>& schema,
-      const std::vector<std::shared_ptr<Column> >& columns);
-
-  Table(const std::string& name, const std::shared_ptr<Schema>& schema,
-      const std::vector<std::shared_ptr<Column> >& columns, int64_t num_rows);
-
-  // @returns: the table's name, if any (may be length 0)
-  const std::string& name() const {
-    return name_;
-  }
-
-  // @returns: the table's schema
-  const std::shared_ptr<Schema>& schema() const {
-    return schema_;
-  }
-
-  // Note: Does not boundscheck
-  // @returns: the i-th column
-  const std::shared_ptr<Column>& column(int i) const {
-    return columns_[i];
-  }
-
-  // @returns: the number of columns in the table
-  int num_columns() const {
-    return columns_.size();
-  }
-
-  // @returns: the number of rows (the corresponding length of each column)
-  int64_t num_rows() const {
-    return num_rows_;
-  }
-
-  // After construction, perform any checks to validate the input arguments
-  Status ValidateColumns() const;
-
- private:
-  // The table's name, optional
-  std::string name_;
-
-  std::shared_ptr<Schema> schema_;
-  std::vector<std::shared_ptr<Column> > columns_;
-
-  int64_t num_rows_;
-};
-
-} // namespace arrow
-
-#endif  // ARROW_TABLE_TABLE_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/test-common.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/test-common.h b/cpp/src/arrow/table/test-common.h
deleted file mode 100644
index 50a5f6a..0000000
--- a/cpp/src/arrow/table/test-common.h
+++ /dev/null
@@ -1,54 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <gtest/gtest.h>
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "arrow/table/column.h"
-#include "arrow/table/schema.h"
-#include "arrow/table/table.h"
-#include "arrow/test-util.h"
-#include "arrow/type.h"
-#include "arrow/util/bit-util.h"
-#include "arrow/util/buffer.h"
-#include "arrow/util/memory-pool.h"
-
-namespace arrow {
-
-class TestBase : public ::testing::Test {
- public:
-  void SetUp() {
-    pool_ = GetDefaultMemoryPool();
-  }
-
-  template <typename ArrayType>
-  std::shared_ptr<Array> MakePrimitive(int32_t length, int32_t null_count = 0) {
-    auto data = std::make_shared<PoolBuffer>(pool_);
-    auto nulls = std::make_shared<PoolBuffer>(pool_);
-    EXPECT_OK(data->Resize(length * sizeof(typename ArrayType::value_type)));
-    EXPECT_OK(nulls->Resize(util::bytes_for_bits(length)));
-    return std::make_shared<ArrayType>(length, data, 10, nulls);
-  }
-
- protected:
-  MemoryPool* pool_;
-};
-
-} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/test-util.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/test-util.h b/cpp/src/arrow/test-util.h
index 0898c8e..a9fb2a7 100644
--- a/cpp/src/arrow/test-util.h
+++ b/cpp/src/arrow/test-util.h
@@ -18,26 +18,39 @@
 #ifndef ARROW_TEST_UTIL_H_
 #define ARROW_TEST_UTIL_H_
 
-#include <gtest/gtest.h>
+#include <cstdint>
 #include <memory>
+#include <random>
 #include <string>
 #include <vector>
 
+#include "gtest/gtest.h"
+
+#include "arrow/type.h"
+#include "arrow/column.h"
+#include "arrow/schema.h"
+#include "arrow/table.h"
 #include "arrow/util/bit-util.h"
+#include "arrow/util/buffer.h"
+#include "arrow/util/memory-pool.h"
 #include "arrow/util/random.h"
 #include "arrow/util/status.h"
 
 #define ASSERT_RAISES(ENUM, expr)               \
   do {                                          \
     Status s = (expr);                          \
-    ASSERT_TRUE(s.Is##ENUM());                  \
+    if (!s.Is##ENUM()) {                        \
+      FAIL() << s.ToString();                   \
+    }                                           \
   } while (0)
 
 
 #define ASSERT_OK(expr)                         \
   do {                                          \
     Status s = (expr);                          \
-    ASSERT_TRUE(s.ok());                        \
+    if (!s.ok()) {                              \
+        FAIL() << s.ToString();                 \
+    }                                           \
   } while (0)
 
 
@@ -50,6 +63,27 @@
 
 namespace arrow {
 
+class TestBase : public ::testing::Test {
+ public:
+  void SetUp() {
+    pool_ = default_memory_pool();
+  }
+
+  template <typename ArrayType>
+  std::shared_ptr<Array> MakePrimitive(int32_t length, int32_t null_count = 0) {
+    auto data = std::make_shared<PoolBuffer>(pool_);
+    auto nulls = std::make_shared<PoolBuffer>(pool_);
+    EXPECT_OK(data->Resize(length * sizeof(typename ArrayType::value_type)));
+    EXPECT_OK(nulls->Resize(util::bytes_for_bits(length)));
+    return std::make_shared<ArrayType>(length, data, 10, nulls);
+  }
+
+ protected:
+  MemoryPool* pool_;
+};
+
+namespace test {
+
 template <typename T>
 void randint(int64_t N, T lower, T upper, std::vector<T>* out) {
   Random rng(random_seed());
@@ -84,6 +118,33 @@ void random_nulls(int64_t n, double pct_null, std::vector<bool>* nulls) {
   }
 }
 
+static inline void random_bytes(int n, uint32_t seed, uint8_t* out) {
+  std::mt19937 gen(seed);
+  std::uniform_int_distribution<int> d(0, 255);
+
+  for (int i = 0; i < n; ++i) {
+    out[i] = d(gen) & 0xFF;
+  }
+}
+
+template <typename T>
+void rand_uniform_int(int n, uint32_t seed, T min_value, T max_value, T* out) {
+  std::mt19937 gen(seed);
+  std::uniform_int_distribution<T> d(min_value, max_value);
+  for (int i = 0; i < n; ++i) {
+    out[i] = d(gen);
+  }
+}
+
+static inline int bitmap_popcount(const uint8_t* data, int length) {
+  int count = 0;
+  for (int i = 0; i < length; ++i) {
+    // TODO: accelerate this
+    if (util::get_bit(data, i)) ++count;
+  }
+  return count;
+}
+
 static inline int null_count(const std::vector<uint8_t>& nulls) {
   int result = 0;
   for (size_t i = 0; i < nulls.size(); ++i) {
@@ -102,6 +163,7 @@ std::shared_ptr<Buffer> bytes_to_null_buffer(uint8_t* bytes, int length) {
   return out;
 }
 
+} // namespace test
 } // namespace arrow
 
 #endif // ARROW_TEST_UTIL_H_

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/type.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc
index 0a2e817..f7f835e 100644
--- a/cpp/src/arrow/type.cc
+++ b/cpp/src/arrow/type.cc
@@ -24,45 +24,37 @@ namespace arrow {
 
 std::string Field::ToString() const {
   std::stringstream ss;
-  ss << this->name << " " << this->type->ToString();
+  ss << this->name << ": " << this->type->ToString();
+  if (!this->nullable) {
+    ss << " not null";
+  }
   return ss.str();
 }
 
 DataType::~DataType() {}
 
-StringType::StringType(bool nullable)
-    : DataType(LogicalType::STRING, nullable) {}
-
-StringType::StringType(const StringType& other)
-    : StringType(other.nullable) {}
+StringType::StringType() : DataType(Type::STRING) {}
 
 std::string StringType::ToString() const {
   std::string result(name());
-  if (!nullable) {
-    result.append(" not null");
-  }
   return result;
 }
 
 std::string ListType::ToString() const {
   std::stringstream s;
-  s << "list<" << value_type->ToString() << ">";
-  if (!this->nullable) {
-    s << " not null";
-  }
+  s << "list<" << value_field()->ToString() << ">";
   return s.str();
 }
 
 std::string StructType::ToString() const {
   std::stringstream s;
   s << "struct<";
-  for (size_t i = 0; i < fields_.size(); ++i) {
+  for (int i = 0; i < this->num_children(); ++i) {
     if (i > 0) s << ", ";
-    const std::shared_ptr<Field>& field = fields_[i];
+    const std::shared_ptr<Field>& field = this->child(i);
     s << field->name << ": " << field->type->ToString();
   }
   s << ">";
-  if (!nullable) s << " not null";
   return s.str();
 }
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/type.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index 00b01ea..5984b67 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -18,62 +18,34 @@
 #ifndef ARROW_TYPE_H
 #define ARROW_TYPE_H
 
+#include <cstdint>
 #include <memory>
 #include <string>
 #include <vector>
 
 namespace arrow {
 
-// Physical data type that describes the memory layout of values. See details
-// for each type
-enum class LayoutEnum: char {
-  // A physical type consisting of some non-negative number of bytes
-  BYTE = 0,
-
-  // A physical type consisting of some non-negative number of bits
-  BIT = 1,
-
-  // A parametric variable-length value type. Full specification requires a
-  // child logical type
-  LIST = 2,
-
-  // A collection of multiple equal-length child arrays. Parametric type taking
-  // 1 or more child logical types
-  STRUCT = 3,
-
-  // An array with heterogeneous value types. Parametric types taking 1 or more
-  // child logical types
-  DENSE_UNION = 4,
-  SPARSE_UNION = 5
-};
-
-
-struct LayoutType {
-  LayoutEnum type;
-  explicit LayoutType(LayoutEnum type) : type(type) {}
-};
-
 // Data types in this library are all *logical*. They can be expressed as
 // either a primitive physical type (bytes or bits of some fixed size), a
 // nested type consisting of other data types, or another data type (e.g. a
 // timestamp encoded as an int64)
-struct LogicalType {
+struct Type {
   enum type {
     // A degenerate NULL type represented as 0 bytes/bits
     NA = 0,
 
-    // Little-endian integer types
-    UINT8 = 1,
-    INT8 = 2,
-    UINT16 = 3,
-    INT16 = 4,
-    UINT32 = 5,
-    INT32 = 6,
-    UINT64 = 7,
-    INT64 = 8,
-
     // A boolean value represented as 1 bit
-    BOOL = 9,
+    BOOL = 1,
+
+    // Little-endian integer types
+    UINT8 = 2,
+    INT8 = 3,
+    UINT16 = 4,
+    INT16 = 5,
+    UINT32 = 6,
+    INT32 = 7,
+    UINT64 = 8,
+    INT64 = 9,
 
     // 4-byte floating point value
     FLOAT = 10,
@@ -131,30 +103,38 @@ struct LogicalType {
   };
 };
 
+struct Field;
+
 struct DataType {
-  LogicalType::type type;
-  bool nullable;
+  Type::type type;
 
-  explicit DataType(LogicalType::type type, bool nullable = true) :
-      type(type),
-      nullable(nullable) {}
+  std::vector<std::shared_ptr<Field>> children_;
+
+  explicit DataType(Type::type type) :
+      type(type) {}
 
   virtual ~DataType();
 
   bool Equals(const DataType* other) {
     // Call with a pointer so more friendly to subclasses
-    return this == other || (this->type == other->type &&
-        this->nullable == other->nullable);
+    return this == other || (this->type == other->type);
   }
 
   bool Equals(const std::shared_ptr<DataType>& other) {
     return Equals(other.get());
   }
 
+  const std::shared_ptr<Field>& child(int i) const {
+    return children_[i];
+  }
+
+  int num_children() const {
+    return children_.size();
+  }
+
   virtual std::string ToString() const = 0;
 };
 
-typedef std::shared_ptr<LayoutType> LayoutPtr;
 typedef std::shared_ptr<DataType> TypePtr;
 
 // A field is a piece of metadata that includes (for now) a name and a data
@@ -166,9 +146,13 @@ struct Field {
   // The field's data type
   TypePtr type;
 
-  Field(const std::string& name, const TypePtr& type) :
+  // Fields can be nullable
+  bool nullable;
+
+  Field(const std::string& name, const TypePtr& type, bool nullable = true) :
       name(name),
-      type(type) {}
+      type(type),
+      nullable(nullable) {}
 
   bool operator==(const Field& other) const {
     return this->Equals(other);
@@ -180,6 +164,7 @@ struct Field {
 
   bool Equals(const Field& other) const {
     return (this == &other) || (this->name == other.name &&
+        this->nullable == other.nullable &&
         this->type->Equals(other.type.get()));
   }
 
@@ -187,36 +172,12 @@ struct Field {
     return Equals(*other.get());
   }
 
-  bool nullable() const {
-    return this->type->nullable;
-  }
-
   std::string ToString() const;
 };
 
-struct BytesType : public LayoutType {
-  int size;
-
-  explicit BytesType(int size)
-      : LayoutType(LayoutEnum::BYTE),
-        size(size) {}
-
-  BytesType(const BytesType& other)
-      : BytesType(other.size) {}
-};
-
-struct ListLayoutType : public LayoutType {
-  LayoutPtr value_type;
-
-  explicit ListLayoutType(const LayoutPtr& value_type)
-      : LayoutType(LayoutEnum::BYTE),
-        value_type(value_type) {}
-};
-
 template <typename Derived>
 struct PrimitiveType : public DataType {
-  explicit PrimitiveType(bool nullable = true)
-      : DataType(Derived::type_enum, nullable) {}
+  PrimitiveType() : DataType(Derived::type_enum) {}
 
   std::string ToString() const override;
 };
@@ -224,22 +185,19 @@ struct PrimitiveType : public DataType {
 template <typename Derived>
 inline std::string PrimitiveType<Derived>::ToString() const {
   std::string result(static_cast<const Derived*>(this)->name());
-  if (!nullable) {
-    result.append(" not null");
-  }
   return result;
 }
 
-#define PRIMITIVE_DECL(TYPENAME, C_TYPE, ENUM, SIZE, NAME)          \
-  typedef C_TYPE c_type;                                            \
-  static constexpr LogicalType::type type_enum = LogicalType::ENUM; \
-  static constexpr int size = SIZE;                                 \
-                                                                    \
-  explicit TYPENAME(bool nullable = true)                           \
-      : PrimitiveType<TYPENAME>(nullable) {}                        \
-                                                                    \
-  static const char* name() {                                       \
-    return NAME;                                                    \
+#define PRIMITIVE_DECL(TYPENAME, C_TYPE, ENUM, SIZE, NAME)  \
+  typedef C_TYPE c_type;                                    \
+  static constexpr Type::type type_enum = Type::ENUM;       \
+  static constexpr int size = SIZE;                         \
+                                                            \
+  TYPENAME()                                                \
+      : PrimitiveType<TYPENAME>() {}                        \
+                                                            \
+  static const char* name() {                               \
+    return NAME;                                            \
   }
 
 struct NullType : public PrimitiveType<NullType> {
@@ -292,11 +250,23 @@ struct DoubleType : public PrimitiveType<DoubleType> {
 
 struct ListType : public DataType {
   // List can contain any other logical value type
-  TypePtr value_type;
+  explicit ListType(const std::shared_ptr<DataType>& value_type)
+      : DataType(Type::LIST) {
+    children_ = {std::make_shared<Field>("item", value_type)};
+  }
+
+  explicit ListType(const std::shared_ptr<Field>& value_field)
+      : DataType(Type::LIST) {
+    children_ = {value_field};
+  }
 
-  explicit ListType(const TypePtr& value_type, bool nullable = true)
-      : DataType(LogicalType::LIST, nullable),
-        value_type(value_type) {}
+  const std::shared_ptr<Field>& value_field() const {
+    return children_[0];
+  }
+
+  const std::shared_ptr<DataType>& value_type() const {
+    return children_[0]->type;
+  }
 
   static char const *name() {
     return "list";
@@ -307,9 +277,7 @@ struct ListType : public DataType {
 
 // String is a logical type consisting of a physical list of 1-byte values
 struct StringType : public DataType {
-  explicit StringType(bool nullable = true);
-
-  StringType(const StringType& other);
+  StringType();
 
   static char const *name() {
     return "string";
@@ -319,20 +287,9 @@ struct StringType : public DataType {
 };
 
 struct StructType : public DataType {
-  std::vector<std::shared_ptr<Field> > fields_;
-
-  explicit StructType(const std::vector<std::shared_ptr<Field> >& fields,
-      bool nullable = true)
-      : DataType(LogicalType::STRUCT, nullable) {
-    fields_ = fields;
-  }
-
-  const std::shared_ptr<Field>& field(int i) const {
-    return fields_[i];
-  }
-
-  int num_children() const {
-    return fields_.size();
+  explicit StructType(const std::vector<std::shared_ptr<Field>>& fields)
+      : DataType(Type::STRUCT) {
+    children_ = fields;
   }
 
   std::string ToString() const override;

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/CMakeLists.txt b/cpp/src/arrow/types/CMakeLists.txt
index 57cabde..595b3be 100644
--- a/cpp/src/arrow/types/CMakeLists.txt
+++ b/cpp/src/arrow/types/CMakeLists.txt
@@ -26,8 +26,6 @@ install(FILES
   construct.h
   datetime.h
   decimal.h
-  floating.h
-  integer.h
   json.h
   list.h
   primitive.h

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/boolean.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/boolean.h b/cpp/src/arrow/types/boolean.h
index a5023d7..1cb91f9 100644
--- a/cpp/src/arrow/types/boolean.h
+++ b/cpp/src/arrow/types/boolean.h
@@ -22,7 +22,7 @@
 
 namespace arrow {
 
-typedef PrimitiveArrayImpl<BooleanType> BooleanArray;
+// typedef PrimitiveArrayImpl<BooleanType> BooleanArray;
 
 class BooleanBuilder : public ArrayBuilder {
 };

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/collection.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/collection.h b/cpp/src/arrow/types/collection.h
index 42a9c92..46d84f1 100644
--- a/cpp/src/arrow/types/collection.h
+++ b/cpp/src/arrow/types/collection.h
@@ -25,7 +25,7 @@
 
 namespace arrow {
 
-template <LogicalType::type T>
+template <Type::type T>
 struct CollectionType : public DataType {
   std::vector<TypePtr> child_types_;
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/construct.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/construct.cc b/cpp/src/arrow/types/construct.cc
index 43f01a3..290decd 100644
--- a/cpp/src/arrow/types/construct.cc
+++ b/cpp/src/arrow/types/construct.cc
@@ -19,24 +19,26 @@
 
 #include <memory>
 
-#include "arrow/types/floating.h"
-#include "arrow/types/integer.h"
+#include "arrow/type.h"
+#include "arrow/types/primitive.h"
 #include "arrow/types/list.h"
 #include "arrow/types/string.h"
+#include "arrow/util/buffer.h"
 #include "arrow/util/status.h"
 
 namespace arrow {
 
 class ArrayBuilder;
 
-// Initially looked at doing this with vtables, but shared pointers makes it
-// difficult
-
 #define BUILDER_CASE(ENUM, BuilderType)         \
-    case LogicalType::ENUM:                     \
+    case Type::ENUM:                            \
       out->reset(new BuilderType(pool, type));  \
       return Status::OK();
 
+// Initially looked at doing this with vtables, but shared pointers makes it
+// difficult
+//
+// TODO(wesm): come up with a less monolithic strategy
 Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
     std::shared_ptr<ArrayBuilder>* out) {
   switch (type->type) {
@@ -56,30 +58,41 @@ Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
 
     BUILDER_CASE(STRING, StringBuilder);
 
-    case LogicalType::LIST:
+    case Type::LIST:
       {
         std::shared_ptr<ArrayBuilder> value_builder;
 
         const std::shared_ptr<DataType>& value_type = static_cast<ListType*>(
-            type.get())->value_type;
+            type.get())->value_type();
         RETURN_NOT_OK(MakeBuilder(pool, value_type, &value_builder));
         out->reset(new ListBuilder(pool, type, value_builder));
         return Status::OK();
       }
-    // BUILDER_CASE(CHAR, CharBuilder);
-
-    // BUILDER_CASE(VARCHAR, VarcharBuilder);
-    // BUILDER_CASE(BINARY, BinaryBuilder);
-
-    // BUILDER_CASE(DATE, DateBuilder);
-    // BUILDER_CASE(TIMESTAMP, TimestampBuilder);
-    // BUILDER_CASE(TIME, TimeBuilder);
+    default:
+      return Status::NotImplemented(type->ToString());
+  }
+}
 
-    // BUILDER_CASE(LIST, ListBuilder);
-    // BUILDER_CASE(STRUCT, StructBuilder);
-    // BUILDER_CASE(DENSE_UNION, DenseUnionBuilder);
-    // BUILDER_CASE(SPARSE_UNION, SparseUnionBuilder);
+#define MAKE_PRIMITIVE_ARRAY_CASE(ENUM, ArrayType)                      \
+    case Type::ENUM:                                                    \
+      out->reset(new ArrayType(type, length, data, null_count, nulls)); \
+      return Status::OK();
 
+Status MakePrimitiveArray(const std::shared_ptr<DataType>& type,
+    int32_t length, const std::shared_ptr<Buffer>& data,
+    int32_t null_count, const std::shared_ptr<Buffer>& nulls,
+    std::shared_ptr<Array>* out) {
+  switch (type->type) {
+    MAKE_PRIMITIVE_ARRAY_CASE(UINT8, UInt8Array);
+    MAKE_PRIMITIVE_ARRAY_CASE(INT8, Int8Array);
+    MAKE_PRIMITIVE_ARRAY_CASE(UINT16, UInt16Array);
+    MAKE_PRIMITIVE_ARRAY_CASE(INT16, Int16Array);
+    MAKE_PRIMITIVE_ARRAY_CASE(UINT32, UInt32Array);
+    MAKE_PRIMITIVE_ARRAY_CASE(INT32, Int32Array);
+    MAKE_PRIMITIVE_ARRAY_CASE(UINT64, UInt64Array);
+    MAKE_PRIMITIVE_ARRAY_CASE(INT64, Int64Array);
+    MAKE_PRIMITIVE_ARRAY_CASE(FLOAT, FloatArray);
+    MAKE_PRIMITIVE_ARRAY_CASE(DOUBLE, DoubleArray);
     default:
       return Status::NotImplemented(type->ToString());
   }

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/construct.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/construct.h b/cpp/src/arrow/types/construct.h
index 59ebe1a..089c484 100644
--- a/cpp/src/arrow/types/construct.h
+++ b/cpp/src/arrow/types/construct.h
@@ -18,19 +18,26 @@
 #ifndef ARROW_TYPES_CONSTRUCT_H
 #define ARROW_TYPES_CONSTRUCT_H
 
+#include <cstdint>
 #include <memory>
 
-#include "arrow/type.h"
-
 namespace arrow {
 
+class Array;
 class ArrayBuilder;
+class Buffer;
+struct DataType;
 class MemoryPool;
 class Status;
 
 Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
     std::shared_ptr<ArrayBuilder>* out);
 
+Status MakePrimitiveArray(const std::shared_ptr<DataType>& type,
+    int32_t length, const std::shared_ptr<Buffer>& data,
+    int32_t null_count, const std::shared_ptr<Buffer>& nulls,
+    std::shared_ptr<Array>* out);
+
 } // namespace arrow
 
 #endif // ARROW_BUILDER_H_

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/datetime.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/datetime.h b/cpp/src/arrow/types/datetime.h
index 765fc29..e57b66a 100644
--- a/cpp/src/arrow/types/datetime.h
+++ b/cpp/src/arrow/types/datetime.h
@@ -31,8 +31,8 @@ struct DateType : public DataType {
 
   Unit unit;
 
-  explicit DateType(Unit unit = Unit::DAY, bool nullable = true)
-      : DataType(LogicalType::DATE, nullable),
+  explicit DateType(Unit unit = Unit::DAY)
+      : DataType(Type::DATE),
         unit(unit) {}
 
   DateType(const DateType& other)
@@ -41,10 +41,6 @@ struct DateType : public DataType {
   static char const *name() {
     return "date";
   }
-
-  // virtual std::string ToString() {
-  //   return name();
-  // }
 };
 
 
@@ -58,8 +54,8 @@ struct TimestampType : public DataType {
 
   Unit unit;
 
-  explicit TimestampType(Unit unit = Unit::MILLI, bool nullable = true)
-      : DataType(LogicalType::TIMESTAMP, nullable),
+  explicit TimestampType(Unit unit = Unit::MILLI)
+      : DataType(Type::TIMESTAMP),
         unit(unit) {}
 
   TimestampType(const TimestampType& other)
@@ -68,10 +64,6 @@ struct TimestampType : public DataType {
   static char const *name() {
     return "timestamp";
   }
-
-  // virtual std::string ToString() {
-  //   return name();
-  // }
 };
 
 } // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/floating.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/floating.cc b/cpp/src/arrow/types/floating.cc
deleted file mode 100644
index bde2826..0000000
--- a/cpp/src/arrow/types/floating.cc
+++ /dev/null
@@ -1,22 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/types/floating.h"
-
-namespace arrow {
-
-} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/floating.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/floating.h b/cpp/src/arrow/types/floating.h
deleted file mode 100644
index e752278..0000000
--- a/cpp/src/arrow/types/floating.h
+++ /dev/null
@@ -1,36 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifndef ARROW_TYPES_FLOATING_H
-#define ARROW_TYPES_FLOATING_H
-
-#include <string>
-
-#include "arrow/types/primitive.h"
-#include "arrow/type.h"
-
-namespace arrow {
-
-typedef PrimitiveArrayImpl<FloatType> FloatArray;
-typedef PrimitiveArrayImpl<DoubleType> DoubleArray;
-
-typedef PrimitiveBuilder<FloatType, FloatArray> FloatBuilder;
-typedef PrimitiveBuilder<DoubleType, DoubleArray> DoubleBuilder;
-
-} // namespace arrow
-
-#endif // ARROW_TYPES_FLOATING_H


Mime
View raw message