Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id CD0B4200C2B for ; Thu, 2 Mar 2017 20:41:50 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id CB809160B6A; Thu, 2 Mar 2017 19:41:50 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id F0AE4160B6F for ; Thu, 2 Mar 2017 20:41:49 +0100 (CET) Received: (qmail 97532 invoked by uid 500); 2 Mar 2017 19:41:49 -0000 Mailing-List: contact commits-help@arrow.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@arrow.apache.org Delivered-To: mailing list commits@arrow.apache.org Received: (qmail 97523 invoked by uid 99); 2 Mar 2017 19:41:49 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 02 Mar 2017 19:41:49 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 18032DFCA1; Thu, 2 Mar 2017 19:41:49 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: wesm@apache.org To: commits@arrow.apache.org Message-Id: <6b8fc917d8514482bf9cb1707241da72@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: arrow git commit: ARROW-576: [C++] Complete file/stream implementation for union types Date: Thu, 2 Mar 2017 19:41:49 +0000 (UTC) archived-at: Thu, 02 Mar 2017 19:41:51 -0000 Repository: arrow Updated Branches: refs/heads/master 2c3bd9311 -> 0637e05d5 ARROW-576: [C++] Complete file/stream implementation for union types Author: Wes McKinney Closes #356 from wesm/ARROW-576 and squashes the following commits: e239ba1 [Wes McKinney] Fix miniconda links 12fde46 [Wes McKinney] Complete metadata roundtrip for unions Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/0637e05d Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/0637e05d Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/0637e05d Branch: refs/heads/master Commit: 0637e05d59f20363a9103ffad5712f981314c4df Parents: 2c3bd93 Author: Wes McKinney Authored: Thu Mar 2 14:41:29 2017 -0500 Committer: Wes McKinney Committed: Thu Mar 2 14:41:29 2017 -0500 ---------------------------------------------------------------------- ci/travis_install_conda.sh | 4 +- cpp/src/arrow/ipc/ipc-file-test.cc | 2 +- cpp/src/arrow/ipc/metadata-internal.cc | 101 ++++++++++++++++++---------- 3 files changed, 67 insertions(+), 40 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/0637e05d/ci/travis_install_conda.sh ---------------------------------------------------------------------- diff --git a/ci/travis_install_conda.sh b/ci/travis_install_conda.sh index ffa017c..9c13b1b 100644 --- a/ci/travis_install_conda.sh +++ b/ci/travis_install_conda.sh @@ -15,9 +15,9 @@ set -e if [ $TRAVIS_OS_NAME == "linux" ]; then - MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh" + MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh" else - MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda-latest-MacOSX-x86_64.sh" + MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh" fi wget -O miniconda.sh $MINICONDA_URL http://git-wip-us.apache.org/repos/asf/arrow/blob/0637e05d/cpp/src/arrow/ipc/ipc-file-test.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/ipc/ipc-file-test.cc b/cpp/src/arrow/ipc/ipc-file-test.cc index e58f2cf..0c95c8e 100644 --- a/cpp/src/arrow/ipc/ipc-file-test.cc +++ b/cpp/src/arrow/ipc/ipc-file-test.cc @@ -180,7 +180,7 @@ TEST_P(TestStreamFormat, RoundTrip) { #define BATCH_CASES() \ ::testing::Values(&MakeIntRecordBatch, &MakeListRecordBatch, &MakeNonNullRecordBatch, \ &MakeZeroLengthRecordBatch, &MakeDeeplyNestedList, &MakeStringTypesRecordBatch, \ - &MakeStruct, &MakeDictionary); + &MakeStruct, &MakeUnion, &MakeDictionary); INSTANTIATE_TEST_CASE_P(FileRoundTripTests, TestFileFormat, BATCH_CASES()); INSTANTIATE_TEST_CASE_P(StreamRoundTripTests, TestStreamFormat, BATCH_CASES()); http://git-wip-us.apache.org/repos/asf/arrow/blob/0637e05d/cpp/src/arrow/ipc/metadata-internal.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/ipc/metadata-internal.cc b/cpp/src/arrow/ipc/metadata-internal.cc index 1cc4a23..17a3a5f 100644 --- a/cpp/src/arrow/ipc/metadata-internal.cc +++ b/cpp/src/arrow/ipc/metadata-internal.cc @@ -78,43 +78,6 @@ static Status FloatFromFlatuffer( return Status::OK(); } -static Status TypeFromFlatbuffer(flatbuf::Type type, const void* type_data, - const std::vector>& children, std::shared_ptr* out) { - switch (type) { - case flatbuf::Type_NONE: - return Status::Invalid("Type metadata cannot be none"); - case flatbuf::Type_Int: - return IntFromFlatbuffer(static_cast(type_data), out); - case flatbuf::Type_FloatingPoint: - return FloatFromFlatuffer( - static_cast(type_data), out); - case flatbuf::Type_Binary: - *out = binary(); - return Status::OK(); - case flatbuf::Type_Utf8: - *out = utf8(); - return Status::OK(); - case flatbuf::Type_Bool: - *out = boolean(); - return Status::OK(); - case flatbuf::Type_Decimal: - case flatbuf::Type_Timestamp: - case flatbuf::Type_List: - if (children.size() != 1) { - return Status::Invalid("List must have exactly 1 child field"); - } - *out = std::make_shared(children[0]); - return Status::OK(); - case flatbuf::Type_Struct_: - *out = std::make_shared(children); - return Status::OK(); - case flatbuf::Type_Union: - return Status::NotImplemented("Type is not implemented"); - default: - return Status::Invalid("Unrecognized type"); - } -} - // Forward declaration static Status FieldToFlatbuffer(FBB& fbb, const std::shared_ptr& field, DictionaryMemo* dictionary_memo, FieldOffset* offset); @@ -153,6 +116,32 @@ static Status StructToFlatbuffer(FBB& fbb, const std::shared_ptr& type return Status::OK(); } +// ---------------------------------------------------------------------- +// Union implementation + +static Status UnionFromFlatbuffer(const flatbuf::Union* union_data, + const std::vector>& children, std::shared_ptr* out) { + UnionMode mode = union_data->mode() == flatbuf::UnionMode_Sparse ? UnionMode::SPARSE + : UnionMode::DENSE; + + std::vector type_codes; + + const flatbuffers::Vector* fb_type_ids = union_data->typeIds(); + if (fb_type_ids == nullptr) { + for (uint8_t i = 0; i < children.size(); ++i) { + type_codes.push_back(i); + } + } else { + for (int32_t id : (*fb_type_ids)) { + // TODO(wesm): can these values exceed 255? + type_codes.push_back(static_cast(id)); + } + } + + *out = union_(children, type_codes, mode); + return Status::OK(); +} + static Status UnionToFlatBuffer(FBB& fbb, const std::shared_ptr& type, std::vector* out_children, DictionaryMemo* dictionary_memo, Offset* offset) { @@ -181,6 +170,44 @@ static Status UnionToFlatBuffer(FBB& fbb, const std::shared_ptr& type, *offset = IntToFlatbuffer(fbb, BIT_WIDTH, IS_SIGNED); \ break; +static Status TypeFromFlatbuffer(flatbuf::Type type, const void* type_data, + const std::vector>& children, std::shared_ptr* out) { + switch (type) { + case flatbuf::Type_NONE: + return Status::Invalid("Type metadata cannot be none"); + case flatbuf::Type_Int: + return IntFromFlatbuffer(static_cast(type_data), out); + case flatbuf::Type_FloatingPoint: + return FloatFromFlatuffer( + static_cast(type_data), out); + case flatbuf::Type_Binary: + *out = binary(); + return Status::OK(); + case flatbuf::Type_Utf8: + *out = utf8(); + return Status::OK(); + case flatbuf::Type_Bool: + *out = boolean(); + return Status::OK(); + case flatbuf::Type_Decimal: + case flatbuf::Type_Timestamp: + case flatbuf::Type_List: + if (children.size() != 1) { + return Status::Invalid("List must have exactly 1 child field"); + } + *out = std::make_shared(children[0]); + return Status::OK(); + case flatbuf::Type_Struct_: + *out = std::make_shared(children); + return Status::OK(); + case flatbuf::Type_Union: + return UnionFromFlatbuffer( + static_cast(type_data), children, out); + default: + return Status::Invalid("Unrecognized type"); + } +} + // TODO(wesm): Convert this to visitor pattern static Status TypeToFlatbuffer(FBB& fbb, const std::shared_ptr& type, std::vector* children, std::vector* layout,