Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id A04AD200C3A for ; Fri, 31 Mar 2017 16:20:29 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 9EFAD160B80; Fri, 31 Mar 2017 14:20:29 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 965B4160B79 for ; Fri, 31 Mar 2017 16:20:28 +0200 (CEST) Received: (qmail 22080 invoked by uid 500); 31 Mar 2017 14:20:27 -0000 Mailing-List: contact commits-help@arrow.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@arrow.apache.org Delivered-To: mailing list commits@arrow.apache.org Received: (qmail 22071 invoked by uid 99); 31 Mar 2017 14:20:27 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 31 Mar 2017 14:20:27 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id A141BDFE59; Fri, 31 Mar 2017 14:20:27 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: wesm@apache.org To: commits@arrow.apache.org Message-Id: X-Mailer: ASF-Git Admin Mailer Subject: arrow git commit: ARROW-603: [C++] Add RecordBatch::Validate method, call in RecordBatch ctor in debug builds Date: Fri, 31 Mar 2017 14:20:27 +0000 (UTC) archived-at: Fri, 31 Mar 2017 14:20:29 -0000 Repository: arrow Updated Branches: refs/heads/master 4915ecf1e -> f5967ed68 ARROW-603: [C++] Add RecordBatch::Validate method, call in RecordBatch ctor in debug builds This function will help catch malformed RecordBatch objects during development Author: Wes McKinney Closes #466 from wesm/ARROW-603 and squashes the following commits: dfdb048 [Wes McKinney] Fix incorrect clang-tidy name 5a51f69 [Wes McKinney] Add RecordBatch::Validate method, call in RecordBatch ctor in debug builds Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/f5967ed6 Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/f5967ed6 Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/f5967ed6 Branch: refs/heads/master Commit: f5967ed682e63dd752d0120573bb33f42dd56e27 Parents: 4915ecf Author: Wes McKinney Authored: Fri Mar 31 10:20:20 2017 -0400 Committer: Wes McKinney Committed: Fri Mar 31 10:20:20 2017 -0400 ---------------------------------------------------------------------- cpp/cmake_modules/FindClangTools.cmake | 29 +++++++---- cpp/src/arrow/io/io-hdfs-test.cc | 7 ++- cpp/src/arrow/table-test.cc | 76 ++++++++++++++++++++--------- cpp/src/arrow/table.cc | 25 ++++++++++ cpp/src/arrow/table.h | 4 ++ 5 files changed, 104 insertions(+), 37 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/f5967ed6/cpp/cmake_modules/FindClangTools.cmake ---------------------------------------------------------------------- diff --git a/cpp/cmake_modules/FindClangTools.cmake b/cpp/cmake_modules/FindClangTools.cmake index c07c7d2..0e9430b 100644 --- a/cpp/cmake_modules/FindClangTools.cmake +++ b/cpp/cmake_modules/FindClangTools.cmake @@ -27,16 +27,21 @@ # This module defines # CLANG_TIDY_BIN, The path to the clang tidy binary # CLANG_TIDY_FOUND, Whether clang tidy was found -# CLANG_FORMAT_BIN, The path to the clang format binary +# CLANG_FORMAT_BIN, The path to the clang format binary # CLANG_TIDY_FOUND, Whether clang format was found -find_program(CLANG_TIDY_BIN - NAMES clang-tidy-3.8 clang-tidy-3.7 clang-tidy-3.6 clang-tidy - PATHS ${ClangTools_PATH} $ENV{CLANG_TOOLS_PATH} /usr/local/bin /usr/bin +find_program(CLANG_TIDY_BIN + NAMES clang-tidy-4.0 + clang-tidy-3.9 + clang-tidy-3.8 + clang-tidy-3.7 + clang-tidy-3.6 + clang-tidy + PATHS ${ClangTools_PATH} $ENV{CLANG_TOOLS_PATH} /usr/local/bin /usr/bin NO_DEFAULT_PATH ) -if ( "${CLANG_TIDY_BIN}" STREQUAL "CLANG_TIDY_BIN-NOTFOUND" ) +if ( "${CLANG_TIDY_BIN}" STREQUAL "CLANG_TIDY_BIN-NOTFOUND" ) set(CLANG_TIDY_FOUND 0) message("clang-tidy not found") else() @@ -44,17 +49,21 @@ else() message("clang-tidy found at ${CLANG_TIDY_BIN}") endif() -find_program(CLANG_FORMAT_BIN - NAMES clang-format-3.8 clang-format-3.7 clang-format-3.6 clang-format - PATHS ${ClangTools_PATH} $ENV{CLANG_TOOLS_PATH} /usr/local/bin /usr/bin +find_program(CLANG_FORMAT_BIN + NAMES clang-format-4.0 + clang-format-3.9 + clang-format-3.8 + clang-format-3.7 + clang-format-3.6 + clang-format + PATHS ${ClangTools_PATH} $ENV{CLANG_TOOLS_PATH} /usr/local/bin /usr/bin NO_DEFAULT_PATH ) -if ( "${CLANG_FORMAT_BIN}" STREQUAL "CLANG_FORMAT_BIN-NOTFOUND" ) +if ( "${CLANG_FORMAT_BIN}" STREQUAL "CLANG_FORMAT_BIN-NOTFOUND" ) set(CLANG_FORMAT_FOUND 0) message("clang-format not found") else() set(CLANG_FORMAT_FOUND 1) message("clang-format found at ${CLANG_FORMAT_BIN}") endif() - http://git-wip-us.apache.org/repos/asf/arrow/blob/f5967ed6/cpp/src/arrow/io/io-hdfs-test.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/io/io-hdfs-test.cc b/cpp/src/arrow/io/io-hdfs-test.cc index af59e96..f3140be 100644 --- a/cpp/src/arrow/io/io-hdfs-test.cc +++ b/cpp/src/arrow/io/io-hdfs-test.cc @@ -78,10 +78,9 @@ class TestHdfsClient : public ::testing::Test { LibHdfsShim* driver_shim; client_ = nullptr; - scratch_dir_ = - boost::filesystem::unique_path( - boost::filesystem::temp_directory_path() / "arrow-hdfs/scratch-%%%%") - .string(); + scratch_dir_ = boost::filesystem::unique_path( + boost::filesystem::temp_directory_path() / "arrow-hdfs/scratch-%%%%") + .string(); loaded_driver_ = false; http://git-wip-us.apache.org/repos/asf/arrow/blob/f5967ed6/cpp/src/arrow/table-test.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/table-test.cc b/cpp/src/arrow/table-test.cc index 3853306..cd32f4a 100644 --- a/cpp/src/arrow/table-test.cc +++ b/cpp/src/arrow/table-test.cc @@ -127,8 +127,8 @@ TEST_F(TestColumn, BasicAPI) { arrays.push_back(MakePrimitive(100, 10)); arrays.push_back(MakePrimitive(100, 20)); - auto field = std::make_shared("c0", int32()); - column_.reset(new Column(field, arrays)); + auto f0 = field("c0", int32()); + column_.reset(new Column(f0, arrays)); ASSERT_EQ("c0", column_->name()); ASSERT_TRUE(column_->type()->Equals(int32())); @@ -137,7 +137,7 @@ TEST_F(TestColumn, BasicAPI) { ASSERT_EQ(3, column_->data()->num_chunks()); // nullptr array should not break - column_.reset(new Column(field, std::shared_ptr(nullptr))); + column_.reset(new Column(f0, std::shared_ptr(nullptr))); ASSERT_NE(column_.get(), nullptr); } @@ -146,13 +146,13 @@ TEST_F(TestColumn, ChunksInhomogeneous) { arrays.push_back(MakePrimitive(100)); arrays.push_back(MakePrimitive(100, 10)); - auto field = std::make_shared("c0", int32()); - column_.reset(new Column(field, arrays)); + auto f0 = field("c0", int32()); + column_.reset(new Column(f0, arrays)); ASSERT_OK(column_->ValidateData()); arrays.push_back(MakePrimitive(100, 10)); - column_.reset(new Column(field, arrays)); + column_.reset(new Column(f0, arrays)); ASSERT_RAISES(Invalid, column_->ValidateData()); } @@ -164,8 +164,8 @@ TEST_F(TestColumn, Equals) { arrays_one_.push_back(array); arrays_another_.push_back(array); - one_field_ = std::make_shared("column", int32()); - another_field_ = std::make_shared("column", int32()); + one_field_ = field("column", int32()); + another_field_ = field("column", int32()); Construct(); ASSERT_TRUE(one_col_->Equals(one_col_)); @@ -174,13 +174,13 @@ TEST_F(TestColumn, Equals) { ASSERT_TRUE(one_col_->Equals(*another_col_.get())); // Field is different - another_field_ = std::make_shared("two", int32()); + another_field_ = field("two", int32()); Construct(); ASSERT_FALSE(one_col_->Equals(another_col_)); ASSERT_FALSE(one_col_->Equals(*another_col_.get())); // ChunkedArray is different - another_field_ = std::make_shared("column", int32()); + another_field_ = field("column", int32()); arrays_another_.push_back(array); Construct(); ASSERT_FALSE(one_col_->Equals(another_col_)); @@ -190,9 +190,9 @@ TEST_F(TestColumn, Equals) { class TestTable : public TestBase { public: void MakeExample1(int length) { - auto f0 = std::make_shared("f0", int32()); - auto f1 = std::make_shared("f1", uint8()); - auto f2 = std::make_shared("f2", int16()); + auto f0 = field("f0", int32()); + auto f1 = field("f1", uint8()); + auto f2 = field("f2", int16()); vector> fields = {f0, f1, f2}; schema_ = std::make_shared(fields); @@ -279,9 +279,9 @@ TEST_F(TestTable, Equals) { ASSERT_TRUE(table_->Equals(*table_)); // Differing schema - auto f0 = std::make_shared("f3", int32()); - auto f1 = std::make_shared("f4", uint8()); - auto f2 = std::make_shared("f5", int16()); + auto f0 = field("f3", int32()); + auto f1 = field("f4", uint8()); + auto f2 = field("f5", int16()); vector> fields = {f0, f1, f2}; auto other_schema = std::make_shared(fields); ASSERT_FALSE(table_->Equals(Table(other_schema, columns_))); @@ -389,9 +389,9 @@ class TestRecordBatch : public TestBase {}; TEST_F(TestRecordBatch, Equals) { const int length = 10; - auto f0 = std::make_shared("f0", int32()); - auto f1 = std::make_shared("f1", uint8()); - auto f2 = std::make_shared("f2", int16()); + auto f0 = field("f0", int32()); + auto f1 = field("f1", uint8()); + auto f2 = field("f2", int16()); vector> fields = {f0, f1, f2}; auto schema = std::make_shared(fields); @@ -401,21 +401,51 @@ TEST_F(TestRecordBatch, Equals) { auto a2 = MakePrimitive(length); RecordBatch b1(schema, length, {a0, a1, a2}); - RecordBatch b2(schema, 5, {a0, a1, a2}); RecordBatch b3(schema, length, {a0, a1}); RecordBatch b4(schema, length, {a0, a1, a1}); ASSERT_TRUE(b1.Equals(b1)); - ASSERT_FALSE(b1.Equals(b2)); ASSERT_FALSE(b1.Equals(b3)); ASSERT_FALSE(b1.Equals(b4)); } +#ifdef NDEBUG +// In debug builds, RecordBatch ctor aborts if you construct an invalid one + +TEST_F(TestRecordBatch, Validate) { + const int length = 10; + + auto f0 = field("f0", int32()); + auto f1 = field("f1", uint8()); + auto f2 = field("f2", int16()); + + auto schema = std::shared_ptr(new Schema({f0, f1, f2})); + + auto a0 = MakePrimitive(length); + auto a1 = MakePrimitive(length); + auto a2 = MakePrimitive(length); + auto a3 = MakePrimitive(5); + + RecordBatch b1(schema, length, {a0, a1, a2}); + + ASSERT_OK(b1.Validate()); + + // Length mismatch + RecordBatch b2(schema, length, {a0, a1, a3}); + ASSERT_RAISES(Invalid, b2.Validate()); + + // Type mismatch + RecordBatch b3(schema, length, {a0, a1, a0}); + ASSERT_RAISES(Invalid, b3.Validate()); +} + +#endif + TEST_F(TestRecordBatch, Slice) { const int length = 10; - auto f0 = std::make_shared("f0", int32()); - auto f1 = std::make_shared("f1", uint8()); + auto f0 = field("f0", int32()); + auto f1 = field("f1", uint8()); vector> fields = {f0, f1}; auto schema = std::make_shared(fields); http://git-wip-us.apache.org/repos/asf/arrow/blob/f5967ed6/cpp/src/arrow/table.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/table.cc b/cpp/src/arrow/table.cc index 8e283f4..da61fbb 100644 --- a/cpp/src/arrow/table.cc +++ b/cpp/src/arrow/table.cc @@ -139,6 +139,11 @@ Status Column::ValidateData() { // ---------------------------------------------------------------------- // RecordBatch methods +void AssertBatchValid(const RecordBatch& batch) { + Status s = batch.Validate(); + if (!s.ok()) { DCHECK(false) << s.ToString(); } +} + RecordBatch::RecordBatch(const std::shared_ptr& schema, int64_t num_rows, const std::vector>& columns) : schema_(schema), num_rows_(num_rows), columns_(columns) {} @@ -190,6 +195,26 @@ std::shared_ptr RecordBatch::Slice(int64_t offset, int64_t length) return std::make_shared(schema_, num_rows, arrays); } +Status RecordBatch::Validate() const { + for (int i = 0; i < num_columns(); ++i) { + const Array& arr = *columns_[i]; + if (arr.length() != num_rows_) { + std::stringstream ss; + ss << "Number of rows in column " << i << " did not match batch: " << arr.length() + << " vs " << num_rows_; + return Status::Invalid(ss.str()); + } + const auto& schema_type = *schema_->field(i)->type; + if (!arr.type()->Equals(schema_type)) { + std::stringstream ss; + ss << "Column " << i << " type not match schema: " << arr.type()->ToString() + << " vs " << schema_type.ToString(); + return Status::Invalid(ss.str()); + } + } + return Status::OK(); +} + // ---------------------------------------------------------------------- // Table methods http://git-wip-us.apache.org/repos/asf/arrow/blob/f5967ed6/cpp/src/arrow/table.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/table.h b/cpp/src/arrow/table.h index 7b739c9..0f35dd8 100644 --- a/cpp/src/arrow/table.h +++ b/cpp/src/arrow/table.h @@ -140,6 +140,10 @@ class ARROW_EXPORT RecordBatch { std::shared_ptr Slice(int64_t offset); std::shared_ptr Slice(int64_t offset, int64_t length); + /// Returns error status is there is something wrong with the record batch + /// contents, like a schema/array mismatch or inconsistent lengths + Status Validate() const; + private: std::shared_ptr schema_; int64_t num_rows_;