arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject arrow git commit: ARROW-519: [C++] Refactor array comparison code into a compare.h / compare.cc in part to resolve Xcode 6.1 linker issue
Date Mon, 30 Jan 2017 02:27:26 GMT
Repository: arrow
Updated Branches:
  refs/heads/master 4226adfbc -> 7ac320bde


ARROW-519: [C++] Refactor array comparison code into a compare.h / compare.cc in part to resolve Xcode 6.1 linker issue

This should also pave the way for more user-friendly reporting of "why are the arrays not equal" per ARROW-517

Author: Wes McKinney <wes.mckinney@twosigma.com>

Closes #308 from wesm/ARROW-519 and squashes the following commits:

85b0bf8 [Wes McKinney] Fix invalid memory access when doing RangeEquals on BinaryArray with all empty strings
f5f4593 [Wes McKinney] Remove unused function in pandas.cc. Fix Binary RangeEquals for arrays of length-0 strings
2118ef4 [Wes McKinney] cpplint, compiler warnings
ad54cc6 [Wes McKinney] Remove unneeded ARROW_EXPORT
342a8e6 [Wes McKinney] Refactor array comparison code into a compare.h header and compilation unit. Use visitor pattern. Also may resolve Xcode bug reported in ARROW-519


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/7ac320bd
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/7ac320bd
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/7ac320bd

Branch: refs/heads/master
Commit: 7ac320bde52ae47007dadac7398e22a203c6a48d
Parents: 4226adf
Author: Wes McKinney <wes.mckinney@twosigma.com>
Authored: Sun Jan 29 21:27:17 2017 -0500
Committer: Wes McKinney <wes.mckinney@twosigma.com>
Committed: Sun Jan 29 21:27:17 2017 -0500

----------------------------------------------------------------------
 cpp/CMakeLists.txt                    |   1 +
 cpp/src/arrow/CMakeLists.txt          |   1 +
 cpp/src/arrow/array-primitive-test.cc |   4 +-
 cpp/src/arrow/array-string-test.cc    |  48 ++-
 cpp/src/arrow/array.cc                | 334 ++-----------------
 cpp/src/arrow/array.h                 | 145 +-------
 cpp/src/arrow/compare.cc              | 516 +++++++++++++++++++++++++++++
 cpp/src/arrow/compare.h               |  46 +++
 cpp/src/arrow/util/macros.h           |   2 +
 python/CMakeLists.txt                 |   3 +
 python/src/pyarrow/adapters/pandas.cc |   8 -
 11 files changed, 641 insertions(+), 467 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/7ac320bd/cpp/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index a0f89f3..ff2c1a6 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -771,6 +771,7 @@ set(ARROW_SRCS
   src/arrow/buffer.cc
   src/arrow/builder.cc
   src/arrow/column.cc
+  src/arrow/compare.cc
   src/arrow/memory_pool.cc
   src/arrow/pretty_print.cc
   src/arrow/schema.cc

http://git-wip-us.apache.org/repos/asf/arrow/blob/7ac320bd/cpp/src/arrow/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index e5e36ed..b002bb7 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -20,6 +20,7 @@ install(FILES
   api.h
   array.h
   column.h
+  compare.h
   buffer.h
   builder.h
   memory_pool.h

http://git-wip-us.apache.org/repos/asf/arrow/blob/7ac320bd/cpp/src/arrow/array-primitive-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array-primitive-test.cc b/cpp/src/arrow/array-primitive-test.cc
index 443abac..c839fb9 100644
--- a/cpp/src/arrow/array-primitive-test.cc
+++ b/cpp/src/arrow/array-primitive-test.cc
@@ -135,7 +135,7 @@ class TestPrimitiveBuilder : public TestBuilder {
     ASSERT_EQ(nullptr, builder->data());
 
     ASSERT_EQ(ex_null_count, result->null_count());
-    ASSERT_TRUE(result->EqualsExact(*expected.get()));
+    ASSERT_TRUE(result->Equals(*expected));
   }
 
  protected:
@@ -238,7 +238,7 @@ void TestPrimitiveBuilder<PBoolean>::Check(
     bool actual = BitUtil::GetBit(result->raw_data(), i);
     ASSERT_EQ(static_cast<bool>(draws_[i]), actual) << i;
   }
-  ASSERT_TRUE(result->EqualsExact(*expected.get()));
+  ASSERT_TRUE(result->Equals(*expected));
 }
 
 typedef ::testing::Types<PBoolean, PUInt8, PUInt16, PUInt32, PUInt64, PInt8, PInt16,

http://git-wip-us.apache.org/repos/asf/arrow/blob/7ac320bd/cpp/src/arrow/array-string-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array-string-test.cc b/cpp/src/arrow/array-string-test.cc
index 024bfd5..5ea384a 100644
--- a/cpp/src/arrow/array-string-test.cc
+++ b/cpp/src/arrow/array-string-test.cc
@@ -51,7 +51,7 @@ TEST(TypesTest, TestStringType) {
 // ----------------------------------------------------------------------
 // String container
 
-class TestStringContainer : public ::testing::Test {
+class TestStringArray : public ::testing::Test {
  public:
   void SetUp() {
     chars_ = {'a', 'b', 'b', 'c', 'c', 'c'};
@@ -90,20 +90,20 @@ class TestStringContainer : public ::testing::Test {
   std::shared_ptr<StringArray> strings_;
 };
 
-TEST_F(TestStringContainer, TestArrayBasics) {
+TEST_F(TestStringArray, TestArrayBasics) {
   ASSERT_EQ(length_, strings_->length());
   ASSERT_EQ(1, strings_->null_count());
   ASSERT_OK(strings_->Validate());
 }
 
-TEST_F(TestStringContainer, TestType) {
+TEST_F(TestStringArray, TestType) {
   TypePtr type = strings_->type();
 
   ASSERT_EQ(Type::STRING, type->type);
   ASSERT_EQ(Type::STRING, strings_->type_enum());
 }
 
-TEST_F(TestStringContainer, TestListFunctions) {
+TEST_F(TestStringArray, TestListFunctions) {
   int pos = 0;
   for (size_t i = 0; i < expected_.size(); ++i) {
     ASSERT_EQ(pos, strings_->value_offset(i));
@@ -112,12 +112,12 @@ TEST_F(TestStringContainer, TestListFunctions) {
   }
 }
 
-TEST_F(TestStringContainer, TestDestructor) {
+TEST_F(TestStringArray, TestDestructor) {
   auto arr = std::make_shared<StringArray>(
       length_, offsets_buf_, value_buf_, null_count_, null_bitmap_);
 }
 
-TEST_F(TestStringContainer, TestGetString) {
+TEST_F(TestStringArray, TestGetString) {
   for (size_t i = 0; i < expected_.size(); ++i) {
     if (valid_bytes_[i] == 0) {
       ASSERT_TRUE(strings_->IsNull(i));
@@ -127,7 +127,7 @@ TEST_F(TestStringContainer, TestGetString) {
   }
 }
 
-TEST_F(TestStringContainer, TestEmptyStringComparison) {
+TEST_F(TestStringArray, TestEmptyStringComparison) {
   offsets_ = {0, 0, 0, 0, 0, 0};
   offsets_buf_ = test::GetBufferFromVector(offsets_);
   length_ = offsets_.size() - 1;
@@ -212,7 +212,7 @@ TEST_F(TestStringBuilder, TestZeroLength) {
 // Binary container type
 // TODO(emkornfield) there should be some way to refactor these to avoid code duplicating
 // with String
-class TestBinaryContainer : public ::testing::Test {
+class TestBinaryArray : public ::testing::Test {
  public:
   void SetUp() {
     chars_ = {'a', 'b', 'b', 'c', 'c', 'c'};
@@ -252,20 +252,20 @@ class TestBinaryContainer : public ::testing::Test {
   std::shared_ptr<BinaryArray> strings_;
 };
 
-TEST_F(TestBinaryContainer, TestArrayBasics) {
+TEST_F(TestBinaryArray, TestArrayBasics) {
   ASSERT_EQ(length_, strings_->length());
   ASSERT_EQ(1, strings_->null_count());
   ASSERT_OK(strings_->Validate());
 }
 
-TEST_F(TestBinaryContainer, TestType) {
+TEST_F(TestBinaryArray, TestType) {
   TypePtr type = strings_->type();
 
   ASSERT_EQ(Type::BINARY, type->type);
   ASSERT_EQ(Type::BINARY, strings_->type_enum());
 }
 
-TEST_F(TestBinaryContainer, TestListFunctions) {
+TEST_F(TestBinaryArray, TestListFunctions) {
   int pos = 0;
   for (size_t i = 0; i < expected_.size(); ++i) {
     ASSERT_EQ(pos, strings_->value_offset(i));
@@ -274,12 +274,12 @@ TEST_F(TestBinaryContainer, TestListFunctions) {
   }
 }
 
-TEST_F(TestBinaryContainer, TestDestructor) {
+TEST_F(TestBinaryArray, TestDestructor) {
   auto arr = std::make_shared<BinaryArray>(
       length_, offsets_buf_, value_buf_, null_count_, null_bitmap_);
 }
 
-TEST_F(TestBinaryContainer, TestGetValue) {
+TEST_F(TestBinaryArray, TestGetValue) {
   for (size_t i = 0; i < expected_.size(); ++i) {
     if (valid_bytes_[i] == 0) {
       ASSERT_TRUE(strings_->IsNull(i));
@@ -291,6 +291,28 @@ TEST_F(TestBinaryContainer, TestGetValue) {
   }
 }
 
+TEST_F(TestBinaryArray, TestEqualsEmptyStrings) {
+  BinaryBuilder builder(default_memory_pool(), arrow::binary());
+
+  std::string empty_string("");
+
+  builder.Append(empty_string);
+  builder.Append(empty_string);
+  builder.Append(empty_string);
+  builder.Append(empty_string);
+  builder.Append(empty_string);
+
+  std::shared_ptr<Array> left_arr;
+  ASSERT_OK(builder.Finish(&left_arr));
+
+  const BinaryArray& left = static_cast<const BinaryArray&>(*left_arr);
+  std::shared_ptr<Array> right = std::make_shared<BinaryArray>(
+      left.length(), left.offsets(), nullptr, left.null_count(), left.null_bitmap());
+
+  ASSERT_TRUE(left.Equals(right));
+  ASSERT_TRUE(left.RangeEquals(0, left.length(), 0, right));
+}
+
 class TestBinaryBuilder : public TestBuilder {
  public:
   void SetUp() {

http://git-wip-us.apache.org/repos/asf/arrow/blob/7ac320bd/cpp/src/arrow/array.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array.cc b/cpp/src/arrow/array.cc
index aa4a692..6fc7fb6 100644
--- a/cpp/src/arrow/array.cc
+++ b/cpp/src/arrow/array.cc
@@ -22,6 +22,7 @@
 #include <sstream>
 
 #include "arrow/buffer.h"
+#include "arrow/compare.h"
 #include "arrow/status.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/bit-util.h"
@@ -51,43 +52,42 @@ Array::Array(const std::shared_ptr<DataType>& type, int32_t length, int32_t null
   if (null_bitmap_) { null_bitmap_data_ = null_bitmap_->data(); }
 }
 
-bool Array::BaseEquals(const std::shared_ptr<Array>& other) const {
-  if (this == other.get()) { return true; }
-  if (!other) { return false; }
-  return EqualsExact(*other.get());
+bool Array::Equals(const Array& arr) const {
+  bool are_equal = false;
+  Status error = ArrayEquals(*this, arr, &are_equal);
+  if (!error.ok()) { DCHECK(false) << "Arrays not comparable: " << error.ToString(); }
+  return are_equal;
 }
 
-bool Array::EqualsExact(const Array& other) const {
-  if (this == &other) { return true; }
-  if (length_ != other.length_ || null_count_ != other.null_count_ ||
-      type_enum() != other.type_enum()) {
-    return false;
-  }
-  if (null_count_ > 0) {
-    return null_bitmap_->Equals(*other.null_bitmap_, BitUtil::BytesForBits(length_));
-  }
-  return true;
+bool Array::Equals(const std::shared_ptr<Array>& arr) const {
+  if (!arr) { return false; }
+  return Equals(*arr);
 }
 
-bool Array::ApproxEquals(const std::shared_ptr<Array>& arr) const {
-  return Equals(arr);
+bool Array::ApproxEquals(const Array& arr) const {
+  bool are_equal = false;
+  Status error = ArrayApproxEquals(*this, arr, &are_equal);
+  if (!error.ok()) { DCHECK(false) << "Arrays not comparable: " << error.ToString(); }
+  return are_equal;
 }
 
-Status Array::Validate() const {
-  return Status::OK();
-}
-
-bool NullArray::Equals(const std::shared_ptr<Array>& arr) const {
-  if (this == arr.get()) { return true; }
-  if (Type::NA != arr->type_enum()) { return false; }
-  return arr->length() == length_;
+bool Array::ApproxEquals(const std::shared_ptr<Array>& arr) const {
+  if (!arr) { return false; }
+  return ApproxEquals(*arr);
 }
 
-bool NullArray::RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_index,
+bool Array::RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx,
     const std::shared_ptr<Array>& arr) const {
   if (!arr) { return false; }
-  if (Type::NA != arr->type_enum()) { return false; }
-  return true;
+  bool are_equal = false;
+  Status error =
+      ArrayRangeEquals(*this, *arr, start_idx, end_idx, other_start_idx, &are_equal);
+  if (!error.ok()) { DCHECK(false) << "Arrays not comparable: " << error.ToString(); }
+  return are_equal;
+}
+
+Status Array::Validate() const {
+  return Status::OK();
 }
 
 Status NullArray::Accept(ArrayVisitor* visitor) const {
@@ -105,36 +105,6 @@ PrimitiveArray::PrimitiveArray(const std::shared_ptr<DataType>& type, int32_t le
   raw_data_ = data == nullptr ? nullptr : data_->data();
 }
 
-bool PrimitiveArray::EqualsExact(const PrimitiveArray& other) const {
-  if (!Array::EqualsExact(other)) { return false; }
-
-  if (null_count_ > 0) {
-    const uint8_t* this_data = raw_data_;
-    const uint8_t* other_data = other.raw_data_;
-
-    auto size_meta = dynamic_cast<const FixedWidthType*>(type_.get());
-    int value_byte_size = size_meta->bit_width() / 8;
-    DCHECK_GT(value_byte_size, 0);
-
-    for (int i = 0; i < length_; ++i) {
-      if (!IsNull(i) && memcmp(this_data, other_data, value_byte_size)) { return false; }
-      this_data += value_byte_size;
-      other_data += value_byte_size;
-    }
-    return true;
-  } else {
-    if (length_ == 0 && other.length_ == 0) { return true; }
-    return data_->Equals(*other.data_, length_);
-  }
-}
-
-bool PrimitiveArray::Equals(const std::shared_ptr<Array>& arr) const {
-  if (this == arr.get()) { return true; }
-  if (!arr) { return false; }
-  if (this->type_enum() != arr->type_enum()) { return false; }
-  return EqualsExact(static_cast<const PrimitiveArray&>(*arr.get()));
-}
-
 template <typename T>
 Status NumericArray<T>::Accept(ArrayVisitor* visitor) const {
   return visitor->Visit(*this);
@@ -150,6 +120,7 @@ template class NumericArray<Int32Type>;
 template class NumericArray<Int64Type>;
 template class NumericArray<TimestampType>;
 template class NumericArray<DateType>;
+template class NumericArray<TimeType>;
 template class NumericArray<HalfFloatType>;
 template class NumericArray<FloatType>;
 template class NumericArray<DoubleType>;
@@ -167,50 +138,6 @@ BooleanArray::BooleanArray(const std::shared_ptr<DataType>& type, int32_t length
     const std::shared_ptr<Buffer>& null_bitmap)
     : PrimitiveArray(type, length, data, null_count, null_bitmap) {}
 
-bool BooleanArray::EqualsExact(const BooleanArray& other) const {
-  if (this == &other) return true;
-  if (null_count_ != other.null_count_) { return false; }
-
-  if (null_count_ > 0) {
-    bool equal_bitmap =
-        null_bitmap_->Equals(*other.null_bitmap_, BitUtil::BytesForBits(length_));
-    if (!equal_bitmap) { return false; }
-
-    const uint8_t* this_data = raw_data_;
-    const uint8_t* other_data = other.raw_data_;
-
-    for (int i = 0; i < length_; ++i) {
-      if (!IsNull(i) && BitUtil::GetBit(this_data, i) != BitUtil::GetBit(other_data, i)) {
-        return false;
-      }
-    }
-    return true;
-  } else {
-    return data_->Equals(*other.data_, BitUtil::BytesForBits(length_));
-  }
-}
-
-bool BooleanArray::Equals(const std::shared_ptr<Array>& arr) const {
-  if (this == arr.get()) return true;
-  if (Type::BOOL != arr->type_enum()) { return false; }
-  return EqualsExact(static_cast<const BooleanArray&>(*arr.get()));
-}
-
-bool BooleanArray::RangeEquals(int32_t start_idx, int32_t end_idx,
-    int32_t other_start_idx, const std::shared_ptr<Array>& arr) const {
-  if (this == arr.get()) { return true; }
-  if (!arr) { return false; }
-  if (this->type_enum() != arr->type_enum()) { return false; }
-  const auto other = static_cast<BooleanArray*>(arr.get());
-  for (int32_t i = start_idx, o_i = other_start_idx; i < end_idx; ++i, ++o_i) {
-    const bool is_null = IsNull(i);
-    if (is_null != arr->IsNull(o_i) || (!is_null && Value(i) != other->Value(o_i))) {
-      return false;
-    }
-  }
-  return true;
-}
-
 Status BooleanArray::Accept(ArrayVisitor* visitor) const {
   return visitor->Visit(*this);
 }
@@ -218,56 +145,6 @@ Status BooleanArray::Accept(ArrayVisitor* visitor) const {
 // ----------------------------------------------------------------------
 // ListArray
 
-bool ListArray::EqualsExact(const ListArray& other) const {
-  if (this == &other) { return true; }
-  if (null_count_ != other.null_count_) { return false; }
-
-  bool equal_offsets =
-      offsets_buffer_->Equals(*other.offsets_buffer_, (length_ + 1) * sizeof(int32_t));
-  if (!equal_offsets) { return false; }
-  bool equal_null_bitmap = true;
-  if (null_count_ > 0) {
-    equal_null_bitmap =
-        null_bitmap_->Equals(*other.null_bitmap_, BitUtil::BytesForBits(length_));
-  }
-
-  if (!equal_null_bitmap) { return false; }
-
-  return values()->Equals(other.values());
-}
-
-bool ListArray::Equals(const std::shared_ptr<Array>& arr) const {
-  if (this == arr.get()) { return true; }
-  if (this->type_enum() != arr->type_enum()) { return false; }
-  return EqualsExact(static_cast<const ListArray&>(*arr.get()));
-}
-
-bool ListArray::RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx,
-    const std::shared_ptr<Array>& arr) const {
-  if (this == arr.get()) { return true; }
-  if (!arr) { return false; }
-  if (this->type_enum() != arr->type_enum()) { return false; }
-  const auto other = static_cast<ListArray*>(arr.get());
-  for (int32_t i = start_idx, o_i = other_start_idx; i < end_idx; ++i, ++o_i) {
-    const bool is_null = IsNull(i);
-    if (is_null != arr->IsNull(o_i)) { return false; }
-    if (is_null) continue;
-    const int32_t begin_offset = offset(i);
-    const int32_t end_offset = offset(i + 1);
-    const int32_t other_begin_offset = other->offset(o_i);
-    const int32_t other_end_offset = other->offset(o_i + 1);
-    // Underlying can't be equal if the size isn't equal
-    if (end_offset - begin_offset != other_end_offset - other_begin_offset) {
-      return false;
-    }
-    if (!values_->RangeEquals(
-            begin_offset, end_offset, other_begin_offset, other->values())) {
-      return false;
-    }
-  }
-  return true;
-}
-
 Status ListArray::Validate() const {
   if (length_ < 0) { return Status::Invalid("Length was negative"); }
   if (!offsets_buffer_) { return Status::Invalid("offsets_buffer_ was null"); }
@@ -350,51 +227,6 @@ Status BinaryArray::Validate() const {
   return Status::OK();
 }
 
-bool BinaryArray::EqualsExact(const BinaryArray& other) const {
-  if (!Array::EqualsExact(other)) { return false; }
-
-  bool equal_offsets =
-      offsets_buffer_->Equals(*other.offsets_buffer_, (length_ + 1) * sizeof(int32_t));
-  if (!equal_offsets) { return false; }
-
-  if (!data_buffer_ && !(other.data_buffer_)) { return true; }
-
-  return data_buffer_->Equals(*other.data_buffer_, raw_offsets()[length_]);
-}
-
-bool BinaryArray::Equals(const std::shared_ptr<Array>& arr) const {
-  if (this == arr.get()) { return true; }
-  if (this->type_enum() != arr->type_enum()) { return false; }
-  return EqualsExact(static_cast<const BinaryArray&>(*arr.get()));
-}
-
-bool BinaryArray::RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx,
-    const std::shared_ptr<Array>& arr) const {
-  if (this == arr.get()) { return true; }
-  if (!arr) { return false; }
-  if (this->type_enum() != arr->type_enum()) { return false; }
-  const auto other = static_cast<const BinaryArray*>(arr.get());
-  for (int32_t i = start_idx, o_i = other_start_idx; i < end_idx; ++i, ++o_i) {
-    const bool is_null = IsNull(i);
-    if (is_null != arr->IsNull(o_i)) { return false; }
-    if (is_null) continue;
-    const int32_t begin_offset = offset(i);
-    const int32_t end_offset = offset(i + 1);
-    const int32_t other_begin_offset = other->offset(o_i);
-    const int32_t other_end_offset = other->offset(o_i + 1);
-    // Underlying can't be equal if the size isn't equal
-    if (end_offset - begin_offset != other_end_offset - other_begin_offset) {
-      return false;
-    }
-
-    if (std::memcmp(data_ + begin_offset, other->data_ + other_begin_offset,
-            end_offset - begin_offset)) {
-      return false;
-    }
-  }
-  return true;
-}
-
 Status BinaryArray::Accept(ArrayVisitor* visitor) const {
   return visitor->Visit(*this);
 }
@@ -421,36 +253,6 @@ std::shared_ptr<Array> StructArray::field(int32_t pos) const {
   return field_arrays_[pos];
 }
 
-bool StructArray::Equals(const std::shared_ptr<Array>& arr) const {
-  if (this == arr.get()) { return true; }
-  if (!arr) { return false; }
-  if (this->type_enum() != arr->type_enum()) { return false; }
-  if (null_count_ != arr->null_count()) { return false; }
-  return RangeEquals(0, length_, 0, arr);
-}
-
-bool StructArray::RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx,
-    const std::shared_ptr<Array>& arr) const {
-  if (this == arr.get()) { return true; }
-  if (!arr) { return false; }
-  if (Type::STRUCT != arr->type_enum()) { return false; }
-  const auto& other = static_cast<const StructArray&>(*arr.get());
-
-  bool equal_fields = true;
-  for (int32_t i = start_idx, o_i = other_start_idx; i < end_idx; ++i, ++o_i) {
-    if (IsNull(i) != arr->IsNull(o_i)) { return false; }
-    if (IsNull(i)) continue;
-    for (size_t j = 0; j < field_arrays_.size(); ++j) {
-      // TODO: really we should be comparing stretches of non-null data rather
-      // than looking at one value at a time.
-      equal_fields = field(j)->RangeEquals(i, i + 1, o_i, other.field(j));
-      if (!equal_fields) { return false; }
-    }
-  }
-
-  return true;
-}
-
 Status StructArray::Validate() const {
   if (length_ < 0) { return Status::Invalid("Length was negative"); }
 
@@ -511,67 +313,6 @@ std::shared_ptr<Array> UnionArray::child(int32_t pos) const {
   return children_[pos];
 }
 
-bool UnionArray::Equals(const std::shared_ptr<Array>& arr) const {
-  if (this == arr.get()) { return true; }
-  if (!arr) { return false; }
-  if (!this->type_->Equals(arr->type())) { return false; }
-  if (null_count_ != arr->null_count()) { return false; }
-  return RangeEquals(0, length_, 0, arr);
-}
-
-bool UnionArray::RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx,
-    const std::shared_ptr<Array>& arr) const {
-  if (this == arr.get()) { return true; }
-  if (!arr) { return false; }
-  if (Type::UNION != arr->type_enum()) { return false; }
-  const auto& other = static_cast<const UnionArray&>(*arr.get());
-
-  const UnionMode union_mode = mode();
-  if (union_mode != other.mode()) { return false; }
-
-  // Define a mapping from the type id to child number
-  const auto& type_codes = static_cast<const UnionType&>(*arr->type().get()).type_ids;
-  uint8_t max_code = 0;
-  for (uint8_t code : type_codes) {
-    if (code > max_code) { max_code = code; }
-  }
-
-  // Store mapping in a vector for constant time lookups
-  std::vector<uint8_t> type_id_to_child_num(max_code + 1);
-  for (uint8_t i = 0; i < static_cast<uint8_t>(type_codes.size()); ++i) {
-    type_id_to_child_num[type_codes[i]] = i;
-  }
-
-  const uint8_t* this_ids = raw_type_ids();
-  const uint8_t* other_ids = other.raw_type_ids();
-
-  uint8_t id, child_num;
-  for (int32_t i = start_idx, o_i = other_start_idx; i < end_idx; ++i, ++o_i) {
-    if (IsNull(i) != other.IsNull(o_i)) { return false; }
-    if (IsNull(i)) continue;
-    if (this_ids[i] != other_ids[o_i]) { return false; }
-
-    id = this_ids[i];
-    child_num = type_id_to_child_num[id];
-
-    // TODO(wesm): really we should be comparing stretches of non-null data
-    // rather than looking at one value at a time.
-    if (union_mode == UnionMode::SPARSE) {
-      if (!child(child_num)->RangeEquals(i, i + 1, o_i, other.child(child_num))) {
-        return false;
-      }
-    } else {
-      const int32_t offset = offsets_[i];
-      const int32_t o_offset = other.offsets_[i];
-      if (!child(child_num)->RangeEquals(
-              offset, offset + 1, o_offset, other.child(child_num))) {
-        return false;
-      }
-    }
-  }
-  return true;
-}
-
 Status UnionArray::Validate() const {
   if (length_ < 0) { return Status::Invalid("Length was negative"); }
 
@@ -624,25 +365,6 @@ std::shared_ptr<Array> DictionaryArray::dictionary() const {
   return dict_type_->dictionary();
 }
 
-bool DictionaryArray::EqualsExact(const DictionaryArray& other) const {
-  if (!dictionary()->Equals(other.dictionary())) { return false; }
-  return indices_->Equals(other.indices());
-}
-
-bool DictionaryArray::Equals(const std::shared_ptr<Array>& arr) const {
-  if (this == arr.get()) { return true; }
-  if (Type::DICTIONARY != arr->type_enum()) { return false; }
-  return EqualsExact(static_cast<const DictionaryArray&>(*arr.get()));
-}
-
-bool DictionaryArray::RangeEquals(int32_t start_idx, int32_t end_idx,
-    int32_t other_start_idx, const std::shared_ptr<Array>& arr) const {
-  if (Type::DICTIONARY != arr->type_enum()) { return false; }
-  const auto& dict_other = static_cast<const DictionaryArray&>(*arr.get());
-  if (!dictionary()->Equals(dict_other.dictionary())) { return false; }
-  return indices_->RangeEquals(start_idx, end_idx, other_start_idx, dict_other.indices());
-}
-
 Status DictionaryArray::Accept(ArrayVisitor* visitor) const {
   return visitor->Visit(*this);
 }

http://git-wip-us.apache.org/repos/asf/arrow/blob/7ac320bd/cpp/src/arrow/array.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array.h b/cpp/src/arrow/array.h
index 4f4b727..3b6e93f 100644
--- a/cpp/src/arrow/array.h
+++ b/cpp/src/arrow/array.h
@@ -102,15 +102,16 @@ class ARROW_EXPORT Array {
   /// Note that for `null_count == 0`, this can be a `nullptr`.
   const uint8_t* null_bitmap_data() const { return null_bitmap_data_; }
 
-  bool BaseEquals(const std::shared_ptr<Array>& arr) const;
-  bool EqualsExact(const Array& arr) const;
-  virtual bool Equals(const std::shared_ptr<Array>& arr) const = 0;
-  virtual bool ApproxEquals(const std::shared_ptr<Array>& arr) const;
+  bool Equals(const Array& arr) const;
+  bool Equals(const std::shared_ptr<Array>& arr) const;
+
+  bool ApproxEquals(const std::shared_ptr<Array>& arr) const;
+  bool ApproxEquals(const Array& arr) const;
 
   /// Compare if the range of slots specified are equal for the given array and
   /// this array.  end_idx exclusive.  This methods does not bounds check.
-  virtual bool RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx,
-      const std::shared_ptr<Array>& arr) const = 0;
+  bool RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx,
+      const std::shared_ptr<Array>& arr) const;
 
   /// Determines if the array is internally consistent.
   ///
@@ -142,10 +143,6 @@ class ARROW_EXPORT NullArray : public Array {
 
   explicit NullArray(int32_t length) : NullArray(std::make_shared<NullType>(), length) {}
 
-  bool Equals(const std::shared_ptr<Array>& arr) const override;
-  bool RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_index,
-      const std::shared_ptr<Array>& arr) const override;
-
   Status Accept(ArrayVisitor* visitor) const override;
 };
 
@@ -159,9 +156,6 @@ class ARROW_EXPORT PrimitiveArray : public Array {
 
   std::shared_ptr<Buffer> data() const { return data_; }
 
-  bool EqualsExact(const PrimitiveArray& other) const;
-  bool Equals(const std::shared_ptr<Array>& arr) const override;
-
  protected:
   PrimitiveArray(const std::shared_ptr<DataType>& type, int32_t length,
       const std::shared_ptr<Buffer>& data, int32_t null_count = 0,
@@ -184,28 +178,6 @@ class ARROW_EXPORT NumericArray : public PrimitiveArray {
       const std::shared_ptr<Buffer>& null_bitmap = nullptr)
       : PrimitiveArray(type, length, data, null_count, null_bitmap) {}
 
-  bool EqualsExact(const NumericArray<TypeClass>& other) const {
-    return PrimitiveArray::EqualsExact(static_cast<const PrimitiveArray&>(other));
-  }
-
-  bool ApproxEquals(const std::shared_ptr<Array>& arr) const override {
-    return PrimitiveArray::Equals(arr);
-  }
-
-  bool RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx,
-      const std::shared_ptr<Array>& arr) const override {
-    if (this == arr.get()) { return true; }
-    if (!arr) { return false; }
-    if (this->type_enum() != arr->type_enum()) { return false; }
-    const auto other = static_cast<NumericArray<TypeClass>*>(arr.get());
-    for (int32_t i = start_idx, o_i = other_start_idx; i < end_idx; ++i, ++o_i) {
-      const bool is_null = IsNull(i);
-      if (is_null != arr->IsNull(o_i) || (!is_null && Value(i) != other->Value(o_i))) {
-        return false;
-      }
-    }
-    return true;
-  }
   const value_type* raw_data() const {
     return reinterpret_cast<const value_type*>(raw_data_);
   }
@@ -215,78 +187,6 @@ class ARROW_EXPORT NumericArray : public PrimitiveArray {
   value_type Value(int i) const { return raw_data()[i]; }
 };
 
-template <>
-inline bool NumericArray<FloatType>::ApproxEquals(
-    const std::shared_ptr<Array>& arr) const {
-  if (this == arr.get()) { return true; }
-  if (!arr) { return false; }
-  if (this->type_enum() != arr->type_enum()) { return false; }
-
-  const auto& other = *static_cast<NumericArray<FloatType>*>(arr.get());
-
-  if (this == &other) { return true; }
-  if (null_count_ != other.null_count_) { return false; }
-
-  auto this_data = reinterpret_cast<const float*>(raw_data_);
-  auto other_data = reinterpret_cast<const float*>(other.raw_data_);
-
-  static constexpr float EPSILON = 1E-5;
-
-  if (length_ == 0 && other.length_ == 0) { return true; }
-
-  if (null_count_ > 0) {
-    bool equal_bitmap =
-        null_bitmap_->Equals(*other.null_bitmap_, BitUtil::CeilByte(length_) / 8);
-    if (!equal_bitmap) { return false; }
-
-    for (int i = 0; i < length_; ++i) {
-      if (IsNull(i)) continue;
-      if (fabs(this_data[i] - other_data[i]) > EPSILON) { return false; }
-    }
-  } else {
-    for (int i = 0; i < length_; ++i) {
-      if (fabs(this_data[i] - other_data[i]) > EPSILON) { return false; }
-    }
-  }
-  return true;
-}
-
-template <>
-inline bool NumericArray<DoubleType>::ApproxEquals(
-    const std::shared_ptr<Array>& arr) const {
-  if (this == arr.get()) { return true; }
-  if (!arr) { return false; }
-  if (this->type_enum() != arr->type_enum()) { return false; }
-
-  const auto& other = *static_cast<NumericArray<DoubleType>*>(arr.get());
-
-  if (this == &other) { return true; }
-  if (null_count_ != other.null_count_) { return false; }
-
-  auto this_data = reinterpret_cast<const double*>(raw_data_);
-  auto other_data = reinterpret_cast<const double*>(other.raw_data_);
-
-  if (length_ == 0 && other.length_ == 0) { return true; }
-
-  static constexpr double EPSILON = 1E-5;
-
-  if (null_count_ > 0) {
-    bool equal_bitmap =
-        null_bitmap_->Equals(*other.null_bitmap_, BitUtil::CeilByte(length_) / 8);
-    if (!equal_bitmap) { return false; }
-
-    for (int i = 0; i < length_; ++i) {
-      if (IsNull(i)) continue;
-      if (fabs(this_data[i] - other_data[i]) > EPSILON) { return false; }
-    }
-  } else {
-    for (int i = 0; i < length_; ++i) {
-      if (fabs(this_data[i] - other_data[i]) > EPSILON) { return false; }
-    }
-  }
-  return true;
-}
-
 class ARROW_EXPORT BooleanArray : public PrimitiveArray {
  public:
   using TypeClass = BooleanType;
@@ -297,11 +197,6 @@ class ARROW_EXPORT BooleanArray : public PrimitiveArray {
       const std::shared_ptr<Buffer>& data, int32_t null_count = 0,
       const std::shared_ptr<Buffer>& null_bitmap = nullptr);
 
-  bool EqualsExact(const BooleanArray& other) const;
-  bool Equals(const std::shared_ptr<Array>& arr) const override;
-  bool RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx,
-      const std::shared_ptr<Array>& arr) const override;
-
   Status Accept(ArrayVisitor* visitor) const override;
 
   const uint8_t* raw_data() const { return reinterpret_cast<const uint8_t*>(raw_data_); }
@@ -345,12 +240,6 @@ class ARROW_EXPORT ListArray : public Array {
   int32_t value_offset(int i) const { return offsets_[i]; }
   int32_t value_length(int i) const { return offsets_[i + 1] - offsets_[i]; }
 
-  bool EqualsExact(const ListArray& other) const;
-  bool Equals(const std::shared_ptr<Array>& arr) const override;
-
-  bool RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx,
-      const std::shared_ptr<Array>& arr) const override;
-
   Status Accept(ArrayVisitor* visitor) const override;
 
  protected:
@@ -396,11 +285,6 @@ class ARROW_EXPORT BinaryArray : public Array {
   int32_t value_offset(int i) const { return offsets_[i]; }
   int32_t value_length(int i) const { return offsets_[i + 1] - offsets_[i]; }
 
-  bool EqualsExact(const BinaryArray& other) const;
-  bool Equals(const std::shared_ptr<Array>& arr) const override;
-  bool RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx,
-      const std::shared_ptr<Array>& arr) const override;
-
   Status Validate() const override;
 
   Status Accept(ArrayVisitor* visitor) const override;
@@ -459,11 +343,6 @@ class ARROW_EXPORT StructArray : public Array {
 
   const std::vector<std::shared_ptr<Array>>& fields() const { return field_arrays_; }
 
-  bool EqualsExact(const StructArray& other) const;
-  bool Equals(const std::shared_ptr<Array>& arr) const override;
-  bool RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx,
-      const std::shared_ptr<Array>& arr) const override;
-
   Status Accept(ArrayVisitor* visitor) const override;
 
  protected:
@@ -500,11 +379,6 @@ class ARROW_EXPORT UnionArray : public Array {
 
   const std::vector<std::shared_ptr<Array>>& children() const { return children_; }
 
-  bool EqualsExact(const UnionArray& other) const;
-  bool Equals(const std::shared_ptr<Array>& arr) const override;
-  bool RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx,
-      const std::shared_ptr<Array>& arr) const override;
-
   Status Accept(ArrayVisitor* visitor) const override;
 
  protected:
@@ -555,11 +429,6 @@ class ARROW_EXPORT DictionaryArray : public Array {
 
   const DictionaryType* dict_type() { return dict_type_; }
 
-  bool EqualsExact(const DictionaryArray& other) const;
-  bool Equals(const std::shared_ptr<Array>& arr) const override;
-  bool RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx,
-      const std::shared_ptr<Array>& arr) const override;
-
   Status Accept(ArrayVisitor* visitor) const override;
 
  protected:

http://git-wip-us.apache.org/repos/asf/arrow/blob/7ac320bd/cpp/src/arrow/compare.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc
new file mode 100644
index 0000000..d039bba
--- /dev/null
+++ b/cpp/src/arrow/compare.cc
@@ -0,0 +1,516 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Functions for comparing Arrow data structures
+
+#include "arrow/compare.h"
+
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/bit-util.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+
+// ----------------------------------------------------------------------
+// Public method implementations
+
+class RangeEqualsVisitor : public ArrayVisitor {
+ public:
+  RangeEqualsVisitor(const Array& right, int32_t left_start_idx, int32_t left_end_idx,
+      int32_t right_start_idx)
+      : right_(right),
+        left_start_idx_(left_start_idx),
+        left_end_idx_(left_end_idx),
+        right_start_idx_(right_start_idx),
+        result_(false) {}
+
+  Status Visit(const NullArray& left) override {
+    UNUSED(left);
+    result_ = true;
+    return Status::OK();
+  }
+
+  template <typename ArrayType>
+  inline Status CompareValues(const ArrayType& left) {
+    const auto& right = static_cast<const ArrayType&>(right_);
+
+    for (int32_t i = left_start_idx_, o_i = right_start_idx_; i < left_end_idx_;
+         ++i, ++o_i) {
+      const bool is_null = left.IsNull(i);
+      if (is_null != right.IsNull(o_i) ||
+          (!is_null && left.Value(i) != right.Value(o_i))) {
+        result_ = false;
+        return Status::OK();
+      }
+    }
+    result_ = true;
+    return Status::OK();
+  }
+
+  bool CompareBinaryRange(const BinaryArray& left) const {
+    const auto& right = static_cast<const BinaryArray&>(right_);
+
+    for (int32_t i = left_start_idx_, o_i = right_start_idx_; i < left_end_idx_;
+         ++i, ++o_i) {
+      const bool is_null = left.IsNull(i);
+      if (is_null != right.IsNull(o_i)) { return false; }
+      if (is_null) continue;
+      const int32_t begin_offset = left.offset(i);
+      const int32_t end_offset = left.offset(i + 1);
+      const int32_t right_begin_offset = right.offset(o_i);
+      const int32_t right_end_offset = right.offset(o_i + 1);
+      // Underlying can't be equal if the size isn't equal
+      if (end_offset - begin_offset != right_end_offset - right_begin_offset) {
+        return false;
+      }
+
+      if (end_offset - begin_offset > 0 &&
+          std::memcmp(left.data()->data() + begin_offset,
+              right.data()->data() + right_begin_offset, end_offset - begin_offset)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  Status Visit(const BooleanArray& left) override {
+    return CompareValues<BooleanArray>(left);
+  }
+
+  Status Visit(const Int8Array& left) override { return CompareValues<Int8Array>(left); }
+
+  Status Visit(const Int16Array& left) override {
+    return CompareValues<Int16Array>(left);
+  }
+  Status Visit(const Int32Array& left) override {
+    return CompareValues<Int32Array>(left);
+  }
+  Status Visit(const Int64Array& left) override {
+    return CompareValues<Int64Array>(left);
+  }
+  Status Visit(const UInt8Array& left) override {
+    return CompareValues<UInt8Array>(left);
+  }
+  Status Visit(const UInt16Array& left) override {
+    return CompareValues<UInt16Array>(left);
+  }
+  Status Visit(const UInt32Array& left) override {
+    return CompareValues<UInt32Array>(left);
+  }
+  Status Visit(const UInt64Array& left) override {
+    return CompareValues<UInt64Array>(left);
+  }
+  Status Visit(const FloatArray& left) override {
+    return CompareValues<FloatArray>(left);
+  }
+  Status Visit(const DoubleArray& left) override {
+    return CompareValues<DoubleArray>(left);
+  }
+
+  Status Visit(const HalfFloatArray& left) override {
+    return Status::NotImplemented("Half float type");
+  }
+
+  Status Visit(const StringArray& left) override {
+    result_ = CompareBinaryRange(left);
+    return Status::OK();
+  }
+
+  Status Visit(const BinaryArray& left) override {
+    result_ = CompareBinaryRange(left);
+    return Status::OK();
+  }
+
+  Status Visit(const DateArray& left) override { return CompareValues<DateArray>(left); }
+
+  Status Visit(const TimeArray& left) override { return CompareValues<TimeArray>(left); }
+
+  Status Visit(const TimestampArray& left) override {
+    return CompareValues<TimestampArray>(left);
+  }
+
+  Status Visit(const IntervalArray& left) override {
+    return CompareValues<IntervalArray>(left);
+  }
+
+  Status Visit(const DecimalArray& left) override {
+    return Status::NotImplemented("Decimal type");
+  }
+
+  bool CompareLists(const ListArray& left) {
+    const auto& right = static_cast<const ListArray&>(right_);
+
+    const std::shared_ptr<Array>& left_values = left.values();
+    const std::shared_ptr<Array>& right_values = right.values();
+
+    for (int32_t i = left_start_idx_, o_i = right_start_idx_; i < left_end_idx_;
+         ++i, ++o_i) {
+      const bool is_null = left.IsNull(i);
+      if (is_null != right.IsNull(o_i)) { return false; }
+      if (is_null) continue;
+      const int32_t begin_offset = left.offset(i);
+      const int32_t end_offset = left.offset(i + 1);
+      const int32_t right_begin_offset = right.offset(o_i);
+      const int32_t right_end_offset = right.offset(o_i + 1);
+      // Underlying can't be equal if the size isn't equal
+      if (end_offset - begin_offset != right_end_offset - right_begin_offset) {
+        return false;
+      }
+      if (!left_values->RangeEquals(
+              begin_offset, end_offset, right_begin_offset, right_values)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  Status Visit(const ListArray& left) override {
+    result_ = CompareLists(left);
+    return Status::OK();
+  }
+
+  bool CompareStructs(const StructArray& left) {
+    const auto& right = static_cast<const StructArray&>(right_);
+    bool equal_fields = true;
+    for (int32_t i = left_start_idx_, o_i = right_start_idx_; i < left_end_idx_;
+         ++i, ++o_i) {
+      if (left.IsNull(i) != right.IsNull(o_i)) { return false; }
+      if (left.IsNull(i)) continue;
+      for (size_t j = 0; j < left.fields().size(); ++j) {
+        // TODO: really we should be comparing stretches of non-null data rather
+        // than looking at one value at a time.
+        equal_fields = left.field(j)->RangeEquals(i, i + 1, o_i, right.field(j));
+        if (!equal_fields) { return false; }
+      }
+    }
+    return true;
+  }
+
+  Status Visit(const StructArray& left) override {
+    result_ = CompareStructs(left);
+    return Status::OK();
+  }
+
+  bool CompareUnions(const UnionArray& left) const {
+    const auto& right = static_cast<const UnionArray&>(right_);
+
+    const UnionMode union_mode = left.mode();
+    if (union_mode != right.mode()) { return false; }
+
+    const auto& left_type = static_cast<const UnionType&>(*left.type());
+
+    // Define a mapping from the type id to child number
+    uint8_t max_code = 0;
+
+    const std::vector<uint8_t> type_codes = left_type.type_ids;
+    for (size_t i = 0; i < type_codes.size(); ++i) {
+      const uint8_t code = type_codes[i];
+      if (code > max_code) { max_code = code; }
+    }
+
+    // Store mapping in a vector for constant time lookups
+    std::vector<uint8_t> type_id_to_child_num(max_code + 1);
+    for (uint8_t i = 0; i < static_cast<uint8_t>(type_codes.size()); ++i) {
+      type_id_to_child_num[type_codes[i]] = i;
+    }
+
+    const uint8_t* left_ids = left.raw_type_ids();
+    const uint8_t* right_ids = right.raw_type_ids();
+
+    uint8_t id, child_num;
+    for (int32_t i = left_start_idx_, o_i = right_start_idx_; i < left_end_idx_;
+         ++i, ++o_i) {
+      if (left.IsNull(i) != right.IsNull(o_i)) { return false; }
+      if (left.IsNull(i)) continue;
+      if (left_ids[i] != right_ids[o_i]) { return false; }
+
+      id = left_ids[i];
+      child_num = type_id_to_child_num[id];
+
+      // TODO(wesm): really we should be comparing stretches of non-null data
+      // rather than looking at one value at a time.
+      if (union_mode == UnionMode::SPARSE) {
+        if (!left.child(child_num)->RangeEquals(i, i + 1, o_i, right.child(child_num))) {
+          return false;
+        }
+      } else {
+        const int32_t offset = left.raw_offsets()[i];
+        const int32_t o_offset = right.raw_offsets()[i];
+        if (!left.child(child_num)->RangeEquals(
+                offset, offset + 1, o_offset, right.child(child_num))) {
+          return false;
+        }
+      }
+    }
+    return true;
+  }
+
+  Status Visit(const UnionArray& left) override {
+    result_ = CompareUnions(left);
+    return Status::OK();
+  }
+
+  Status Visit(const DictionaryArray& left) override {
+    const auto& right = static_cast<const DictionaryArray&>(right_);
+    if (!left.dictionary()->Equals(right.dictionary())) {
+      result_ = false;
+      return Status::OK();
+    }
+    result_ = left.indices()->RangeEquals(
+        left_start_idx_, left_end_idx_, right_start_idx_, right.indices());
+    return Status::OK();
+  }
+
+  bool result() const { return result_; }
+
+ protected:
+  const Array& right_;
+  int32_t left_start_idx_;
+  int32_t left_end_idx_;
+  int32_t right_start_idx_;
+
+  bool result_;
+};
+
+class EqualsVisitor : public RangeEqualsVisitor {
+ public:
+  explicit EqualsVisitor(const Array& right)
+      : RangeEqualsVisitor(right, 0, right.length(), 0) {}
+
+  Status Visit(const NullArray& left) override { return Status::OK(); }
+
+  Status Visit(const BooleanArray& left) override {
+    const auto& right = static_cast<const BooleanArray&>(right_);
+    if (left.null_count() > 0) {
+      const uint8_t* left_data = left.data()->data();
+      const uint8_t* right_data = right.data()->data();
+
+      for (int i = 0; i < left.length(); ++i) {
+        if (!left.IsNull(i) &&
+            BitUtil::GetBit(left_data, i) != BitUtil::GetBit(right_data, i)) {
+          result_ = false;
+          return Status::OK();
+        }
+      }
+      result_ = true;
+    } else {
+      result_ = left.data()->Equals(*right.data(), BitUtil::BytesForBits(left.length()));
+    }
+    return Status::OK();
+  }
+
+  bool IsEqualPrimitive(const PrimitiveArray& left) {
+    const auto& right = static_cast<const PrimitiveArray&>(right_);
+    if (left.null_count() > 0) {
+      const uint8_t* left_data = left.data()->data();
+      const uint8_t* right_data = right.data()->data();
+      const auto& size_meta = dynamic_cast<const FixedWidthType&>(*left.type());
+      const int value_byte_size = size_meta.bit_width() / 8;
+      DCHECK_GT(value_byte_size, 0);
+
+      for (int i = 0; i < left.length(); ++i) {
+        if (!left.IsNull(i) && memcmp(left_data, right_data, value_byte_size)) {
+          return false;
+        }
+        left_data += value_byte_size;
+        right_data += value_byte_size;
+      }
+      return true;
+    } else {
+      if (left.length() == 0) { return true; }
+      return left.data()->Equals(*right.data(), left.length());
+    }
+  }
+
+  Status ComparePrimitive(const PrimitiveArray& left) {
+    result_ = IsEqualPrimitive(left);
+    return Status::OK();
+  }
+
+  Status Visit(const Int8Array& left) override { return ComparePrimitive(left); }
+
+  Status Visit(const Int16Array& left) override { return ComparePrimitive(left); }
+
+  Status Visit(const Int32Array& left) override { return ComparePrimitive(left); }
+
+  Status Visit(const Int64Array& left) override { return ComparePrimitive(left); }
+
+  Status Visit(const UInt8Array& left) override { return ComparePrimitive(left); }
+
+  Status Visit(const UInt16Array& left) override { return ComparePrimitive(left); }
+
+  Status Visit(const UInt32Array& left) override { return ComparePrimitive(left); }
+
+  Status Visit(const UInt64Array& left) override { return ComparePrimitive(left); }
+
+  Status Visit(const FloatArray& left) override { return ComparePrimitive(left); }
+
+  Status Visit(const DoubleArray& left) override { return ComparePrimitive(left); }
+
+  Status Visit(const DateArray& left) override { return ComparePrimitive(left); }
+
+  Status Visit(const TimeArray& left) override { return ComparePrimitive(left); }
+
+  Status Visit(const TimestampArray& left) override { return ComparePrimitive(left); }
+
+  Status Visit(const IntervalArray& left) override { return ComparePrimitive(left); }
+
+  bool CompareBinary(const BinaryArray& left) {
+    const auto& right = static_cast<const BinaryArray&>(right_);
+    bool equal_offsets =
+        left.offsets()->Equals(*right.offsets(), (left.length() + 1) * sizeof(int32_t));
+    if (!equal_offsets) { return false; }
+    if (!left.data() && !(right.data())) { return true; }
+    return left.data()->Equals(*right.data(), left.raw_offsets()[left.length()]);
+  }
+
+  Status Visit(const StringArray& left) override {
+    result_ = CompareBinary(left);
+    return Status::OK();
+  }
+
+  Status Visit(const BinaryArray& left) override {
+    result_ = CompareBinary(left);
+    return Status::OK();
+  }
+
+  Status Visit(const ListArray& left) override {
+    const auto& right = static_cast<const ListArray&>(right_);
+    if (!left.offsets()->Equals(
+            *right.offsets(), (left.length() + 1) * sizeof(int32_t))) {
+      result_ = false;
+    } else {
+      result_ = left.values()->Equals(right.values());
+    }
+    return Status::OK();
+  }
+
+  Status Visit(const DictionaryArray& left) override {
+    const auto& right = static_cast<const DictionaryArray&>(right_);
+    if (!left.dictionary()->Equals(right.dictionary())) {
+      result_ = false;
+    } else {
+      result_ = left.indices()->Equals(right.indices());
+    }
+    return Status::OK();
+  }
+};
+
+template <typename TYPE>
+inline bool FloatingApproxEquals(
+    const NumericArray<TYPE>& left, const NumericArray<TYPE>& right) {
+  using T = typename TYPE::c_type;
+
+  auto left_data = reinterpret_cast<const T*>(left.data()->data());
+  auto right_data = reinterpret_cast<const T*>(right.data()->data());
+
+  static constexpr T EPSILON = 1E-5;
+
+  if (left.length() == 0 && right.length() == 0) { return true; }
+
+  if (left.null_count() > 0) {
+    for (int32_t i = 0; i < left.length(); ++i) {
+      if (left.IsNull(i)) continue;
+      if (fabs(left_data[i] - right_data[i]) > EPSILON) { return false; }
+    }
+  } else {
+    for (int32_t i = 0; i < left.length(); ++i) {
+      if (fabs(left_data[i] - right_data[i]) > EPSILON) { return false; }
+    }
+  }
+  return true;
+}
+
+class ApproxEqualsVisitor : public EqualsVisitor {
+ public:
+  using EqualsVisitor::EqualsVisitor;
+
+  Status Visit(const FloatArray& left) override {
+    result_ =
+        FloatingApproxEquals<FloatType>(left, static_cast<const FloatArray&>(right_));
+    return Status::OK();
+  }
+
+  Status Visit(const DoubleArray& left) override {
+    result_ =
+        FloatingApproxEquals<DoubleType>(left, static_cast<const DoubleArray&>(right_));
+    return Status::OK();
+  }
+};
+
+static bool BaseDataEquals(const Array& left, const Array& right) {
+  if (left.length() != right.length() || left.null_count() != right.null_count() ||
+      left.type_enum() != right.type_enum()) {
+    return false;
+  }
+  if (left.null_count() > 0) {
+    return left.null_bitmap()->Equals(
+        *right.null_bitmap(), BitUtil::BytesForBits(left.length()));
+  }
+  return true;
+}
+
+Status ArrayEquals(const Array& left, const Array& right, bool* are_equal) {
+  // The arrays are the same object
+  if (&left == &right) {
+    *are_equal = true;
+  } else if (!BaseDataEquals(left, right)) {
+    *are_equal = false;
+  } else {
+    EqualsVisitor visitor(right);
+    RETURN_NOT_OK(left.Accept(&visitor));
+    *are_equal = visitor.result();
+  }
+  return Status::OK();
+}
+
+Status ArrayRangeEquals(const Array& left, const Array& right, int32_t left_start_idx,
+    int32_t left_end_idx, int32_t right_start_idx, bool* are_equal) {
+  if (&left == &right) {
+    *are_equal = true;
+  } else if (left.type_enum() != right.type_enum()) {
+    *are_equal = false;
+  } else {
+    RangeEqualsVisitor visitor(right, left_start_idx, left_end_idx, right_start_idx);
+    RETURN_NOT_OK(left.Accept(&visitor));
+    *are_equal = visitor.result();
+  }
+  return Status::OK();
+}
+
+Status ArrayApproxEquals(const Array& left, const Array& right, bool* are_equal) {
+  // The arrays are the same object
+  if (&left == &right) {
+    *are_equal = true;
+  } else if (!BaseDataEquals(left, right)) {
+    *are_equal = false;
+  } else {
+    ApproxEqualsVisitor visitor(right);
+    RETURN_NOT_OK(left.Accept(&visitor));
+    *are_equal = visitor.result();
+  }
+  return Status::OK();
+}
+
+}  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/7ac320bd/cpp/src/arrow/compare.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/compare.h b/cpp/src/arrow/compare.h
new file mode 100644
index 0000000..2093b65
--- /dev/null
+++ b/cpp/src/arrow/compare.h
@@ -0,0 +1,46 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Functions for comparing Arrow data structures
+
+#ifndef ARROW_COMPARE_H
+#define ARROW_COMPARE_H
+
+#include <cstdint>
+
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class Array;
+class Status;
+
+/// Returns true if the arrays are exactly equal
+Status ARROW_EXPORT ArrayEquals(const Array& left, const Array& right, bool* are_equal);
+
+/// Returns true if the arrays are approximately equal. For non-floating point
+/// types, this is equivalent to ArrayEquals(left, right)
+Status ARROW_EXPORT ArrayApproxEquals(
+    const Array& left, const Array& right, bool* are_equal);
+
+/// Returns true if indicated equal-length segment of arrays is exactly equal
+Status ARROW_EXPORT ArrayRangeEquals(const Array& left, const Array& right,
+    int32_t start_idx, int32_t end_idx, int32_t other_start_idx, bool* are_equal);
+
+}  // namespace arrow
+
+#endif  // ARROW_COMPARE_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/7ac320bd/cpp/src/arrow/util/macros.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/macros.h b/cpp/src/arrow/util/macros.h
index e2bb355..c4a62a4 100644
--- a/cpp/src/arrow/util/macros.h
+++ b/cpp/src/arrow/util/macros.h
@@ -25,4 +25,6 @@
   TypeName& operator=(const TypeName&) = delete
 #endif
 
+#define UNUSED(x) (void)x
+
 #endif  // ARROW_UTIL_MACROS_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/7ac320bd/python/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index d63fff4..942e74b 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -74,6 +74,9 @@ include(SetupCxxFlags)
 # Add common flags
 set(CMAKE_CXX_FLAGS "${CXX_COMMON_FLAGS} ${CMAKE_CXX_FLAGS}")
 
+# Enable perf and other tools to work properly
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-omit-frame-pointer")
+
 # Suppress Cython warnings
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-variable")
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/7ac320bd/python/src/pyarrow/adapters/pandas.cc
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/adapters/pandas.cc b/python/src/pyarrow/adapters/pandas.cc
index feafa3d..920779f 100644
--- a/python/src/pyarrow/adapters/pandas.cc
+++ b/python/src/pyarrow/adapters/pandas.cc
@@ -153,14 +153,6 @@ static inline bool PyObject_is_string(const PyObject* obj) {
 #endif
 }
 
-static inline bool PyObject_is_bool(const PyObject* obj) {
-#if PY_MAJOR_VERSION >= 3
-  return PyString_Check(obj) || PyBytes_Check(obj);
-#else
-  return PyString_Check(obj) || PyUnicode_Check(obj);
-#endif
-}
-
 template <int TYPE>
 static int64_t ValuesToBitmap(const void* data, int64_t length, uint8_t* bitmap) {
   typedef npy_traits<TYPE> traits;


Mime
View raw message