Return-Path: X-Original-To: apmail-parquet-commits-archive@minotaur.apache.org Delivered-To: apmail-parquet-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 2B5A818F07 for ; Mon, 15 Feb 2016 23:55:24 +0000 (UTC) Received: (qmail 93938 invoked by uid 500); 15 Feb 2016 23:55:24 -0000 Delivered-To: apmail-parquet-commits-archive@parquet.apache.org Received: (qmail 93895 invoked by uid 500); 15 Feb 2016 23:55:24 -0000 Mailing-List: contact commits-help@parquet.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@parquet.apache.org Delivered-To: mailing list commits@parquet.apache.org Received: (qmail 93883 invoked by uid 99); 15 Feb 2016 23:55:24 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 15 Feb 2016 23:55:24 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id DD27EE057C; Mon, 15 Feb 2016 23:55:23 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: julien@apache.org To: commits@parquet.apache.org Date: Mon, 15 Feb 2016 23:55:23 -0000 Message-Id: <07654795c21a47d6991444219e00dda5@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [1/2] parquet-cpp git commit: PARQUET-446: Hide Thrift compiled headers and Boost from public API, #include scrubbing Repository: parquet-cpp Updated Branches: refs/heads/master 05cd4ec28 -> b71e826f0 http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b71e826f/src/parquet/schema/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/src/parquet/schema/CMakeLists.txt b/src/parquet/schema/CMakeLists.txt index 0902ccf..8aa9969 100644 --- a/src/parquet/schema/CMakeLists.txt +++ b/src/parquet/schema/CMakeLists.txt @@ -17,9 +17,8 @@ # Headers: top level install(FILES - builder.h - converter.h descriptor.h + printer.h types.h DESTINATION include/parquet/schema) http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b71e826f/src/parquet/schema/converter.cc ---------------------------------------------------------------------- diff --git a/src/parquet/schema/converter.cc b/src/parquet/schema/converter.cc index 9b45cc9..9eb59b0 100644 --- a/src/parquet/schema/converter.cc +++ b/src/parquet/schema/converter.cc @@ -17,9 +17,10 @@ #include "parquet/schema/converter.h" -#include - #include "parquet/exception.h" +#include "parquet/schema/descriptor.h" +#include "parquet/schema/types.h" +#include "parquet/thrift/parquet_types.h" using parquet::SchemaElement; @@ -46,7 +47,7 @@ std::unique_ptr FlatSchemaConverter::Convert() { std::unique_ptr FlatSchemaConverter::NextNode() { const SchemaElement& element = Next(); - size_t node_id = next_id(); + int node_id = next_id(); const void* opaque_element = static_cast(&element); @@ -56,7 +57,7 @@ std::unique_ptr FlatSchemaConverter::NextNode() { } else { // Group NodeVector fields; - for (size_t i = 0; i < element.num_children; ++i) { + for (int i = 0; i < element.num_children; ++i) { std::unique_ptr field = NextNode(); fields.push_back(NodePtr(field.release())); } @@ -82,25 +83,6 @@ std::shared_ptr FromParquet(const std::vector& return descr; } -// ---------------------------------------------------------------------- -// Conversion back to Parquet metadata - -// TODO: decide later what to do with these. When converting back only need to -// write into a parquet::SchemaElement - -// FieldRepetitionType::type ToParquet(Repetition::type type) { -// return static_cast(type); -// } - -// parquet::ConvertedType::type ToParquet(LogicalType::type type) { -// // item 0 is NONE -// return static_cast(static_cast(type) - 1); -// } - -// parquet::Type::type ToParquet(Type::type type) { -// return static_cast(type); -// } - } // namespace schema } // namespace parquet_cpp http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b71e826f/src/parquet/schema/converter.h ---------------------------------------------------------------------- diff --git a/src/parquet/schema/converter.h b/src/parquet/schema/converter.h index cde48c9..055eb69 100644 --- a/src/parquet/schema/converter.h +++ b/src/parquet/schema/converter.h @@ -25,20 +25,20 @@ #ifndef PARQUET_SCHEMA_CONVERTER_H #define PARQUET_SCHEMA_CONVERTER_H -#include #include -#include #include -#include "parquet/schema/descriptor.h" -#include "parquet/schema/types.h" - -#include "parquet/thrift/parquet_types.h" +namespace parquet { class SchemaElement;} namespace parquet_cpp { +class SchemaDescriptor; + namespace schema { +class GroupNode; +class Node; + // ---------------------------------------------------------------------- // Conversion from Parquet Thrift metadata @@ -47,7 +47,7 @@ std::shared_ptr FromParquet( class FlatSchemaConverter { public: - FlatSchemaConverter(const parquet::SchemaElement* elements, size_t length) : + FlatSchemaConverter(const parquet::SchemaElement* elements, int length) : elements_(elements), length_(length), pos_(0), @@ -57,11 +57,11 @@ class FlatSchemaConverter { private: const parquet::SchemaElement* elements_; - size_t length_; - size_t pos_; - size_t current_id_; + int length_; + int pos_; + int current_id_; - size_t next_id() { + int next_id() { return current_id_++; } http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b71e826f/src/parquet/schema/descriptor.h ---------------------------------------------------------------------- diff --git a/src/parquet/schema/descriptor.h b/src/parquet/schema/descriptor.h index d27dcc1..7991dea 100644 --- a/src/parquet/schema/descriptor.h +++ b/src/parquet/schema/descriptor.h @@ -19,12 +19,14 @@ #define PARQUET_SCHEMA_DESCRIPTOR_H #include +#include #include #include #include #include #include "parquet/schema/types.h" +#include "parquet/types.h" namespace parquet_cpp { http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b71e826f/src/parquet/schema/printer.cc ---------------------------------------------------------------------- diff --git a/src/parquet/schema/printer.cc b/src/parquet/schema/printer.cc index 9c43e8e..2aa2940 100644 --- a/src/parquet/schema/printer.cc +++ b/src/parquet/schema/printer.cc @@ -17,15 +17,19 @@ #include "parquet/schema/printer.h" +#include #include +#include "parquet/schema/types.h" +#include "parquet/types.h" + namespace parquet_cpp { namespace schema { class SchemaPrinter : public Node::Visitor { public: - explicit SchemaPrinter(std::ostream& stream, size_t indent_width) : + explicit SchemaPrinter(std::ostream& stream, int indent_width) : stream_(stream), indent_(0), indent_width_(2) {} @@ -40,8 +44,8 @@ class SchemaPrinter : public Node::Visitor { std::ostream& stream_; - size_t indent_; - size_t indent_width_; + int indent_; + int indent_width_; }; static void PrintRepLevel(Repetition::type repetition, std::ostream& stream) { @@ -103,7 +107,7 @@ void SchemaPrinter::Visit(const GroupNode* node) { stream_ << " group " << node->name() << " {" << std::endl; indent_ += indent_width_; - for (size_t i = 0; i < node->field_count(); ++i) { + for (int i = 0; i < node->field_count(); ++i) { node->field(i)->Visit(this); } indent_ -= indent_width_; @@ -129,7 +133,7 @@ void SchemaPrinter::Visit(const Node* node) { } void PrintSchema(const Node* schema, std::ostream& stream, - size_t indent_width) { + int indent_width) { SchemaPrinter printer(stream, indent_width); printer.Visit(schema); } http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b71e826f/src/parquet/schema/printer.h ---------------------------------------------------------------------- diff --git a/src/parquet/schema/printer.h b/src/parquet/schema/printer.h index 535262f..6df78d0 100644 --- a/src/parquet/schema/printer.h +++ b/src/parquet/schema/printer.h @@ -20,16 +20,16 @@ #ifndef PARQUET_SCHEMA_PRINTER_H #define PARQUET_SCHEMA_PRINTER_H -#include "parquet/schema/types.h" - #include namespace parquet_cpp { namespace schema { +class Node; + void PrintSchema(const Node* schema, std::ostream& stream, - size_t indent_width = 2); + int indent_width = 2); } // namespace schema http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b71e826f/src/parquet/schema/schema-converter-test.cc ---------------------------------------------------------------------- diff --git a/src/parquet/schema/schema-converter-test.cc b/src/parquet/schema/schema-converter-test.cc index f2dadf2..93cfd24 100644 --- a/src/parquet/schema/schema-converter-test.cc +++ b/src/parquet/schema/schema-converter-test.cc @@ -15,17 +15,19 @@ // specific language governing permissions and limitations // under the License. -#include +#include +#include #include #include #include -#include "parquet/util/test-common.h" +#include "parquet/exception.h" #include "parquet/schema/converter.h" -#include "parquet/thrift/parquet_types.h" - #include "parquet/schema/test-util.h" +#include "parquet/schema/types.h" +#include "parquet/thrift/parquet_types.h" +#include "parquet/types.h" using std::string; using std::vector; http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b71e826f/src/parquet/schema/schema-descriptor-test.cc ---------------------------------------------------------------------- diff --git a/src/parquet/schema/schema-descriptor-test.cc b/src/parquet/schema/schema-descriptor-test.cc index 1328bed..c63df54 100644 --- a/src/parquet/schema/schema-descriptor-test.cc +++ b/src/parquet/schema/schema-descriptor-test.cc @@ -18,16 +18,15 @@ // Schema / column descriptor correctness tests (from flat Parquet schemas) #include +#include #include #include #include -#include "parquet/util/test-common.h" -#include "parquet/schema/converter.h" +#include "parquet/exception.h" #include "parquet/schema/descriptor.h" - -#include "parquet/thrift/parquet_types.h" +#include "parquet/schema/types.h" using std::string; using std::vector; http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b71e826f/src/parquet/schema/schema-printer-test.cc ---------------------------------------------------------------------- diff --git a/src/parquet/schema/schema-printer-test.cc b/src/parquet/schema/schema-printer-test.cc index c21429a..094829b 100644 --- a/src/parquet/schema/schema-printer-test.cc +++ b/src/parquet/schema/schema-printer-test.cc @@ -15,15 +15,15 @@ // specific language governing permissions and limitations // under the License. -#include +#include + +#include #include #include -#include -#include "parquet/util/test-common.h" - #include "parquet/schema/printer.h" -#include "parquet/schema/test-util.h" +#include "parquet/schema/types.h" +#include "parquet/types.h" using std::string; using std::vector; http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b71e826f/src/parquet/schema/schema-types-test.cc ---------------------------------------------------------------------- diff --git a/src/parquet/schema/schema-types-test.cc b/src/parquet/schema/schema-types-test.cc index 72d38c0..cac7dc5 100644 --- a/src/parquet/schema/schema-types-test.cc +++ b/src/parquet/schema/schema-types-test.cc @@ -15,15 +15,16 @@ // specific language governing permissions and limitations // under the License. -#include +#include + +#include #include #include -#include -#include "parquet/util/test-common.h" - -#include "parquet/schema/types.h" #include "parquet/schema/test-util.h" +#include "parquet/schema/types.h" +#include "parquet/thrift/parquet_types.h" +#include "parquet/types.h" using std::string; using std::vector; http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b71e826f/src/parquet/schema/types.cc ---------------------------------------------------------------------- diff --git a/src/parquet/schema/types.cc b/src/parquet/schema/types.cc index e088eed..fae7c84 100644 --- a/src/parquet/schema/types.cc +++ b/src/parquet/schema/types.cc @@ -19,7 +19,9 @@ #include +#include "parquet/exception.h" #include "parquet/thrift/parquet_types.h" +#include "parquet/thrift/util.h" namespace parquet_cpp { @@ -72,7 +74,7 @@ bool GroupNode::EqualsInternal(const GroupNode* other) const { if (this->field_count() != other->field_count()) { return false; } - for (size_t i = 0; i < this->field_count(); ++i) { + for (int i = 0; i < this->field_count(); ++i) { if (!this->field(i)->Equals(other->field(i).get())) { return false; } @@ -94,19 +96,6 @@ void GroupNode::Visit(Node::Visitor* visitor) { // ---------------------------------------------------------------------- // Node construction from Parquet metadata -static Type::type ConvertEnum(parquet::Type::type type) { - return static_cast(type); -} - -static LogicalType::type ConvertEnum(parquet::ConvertedType::type type) { - // item 0 is NONE - return static_cast(static_cast(type) + 1); -} - -static Repetition::type ConvertEnum(parquet::FieldRepetitionType::type type) { - return static_cast(type); -} - struct NodeParams { explicit NodeParams(const std::string& name) : name(name) {} @@ -119,9 +108,9 @@ struct NodeParams { static inline NodeParams GetNodeParams(const parquet::SchemaElement* element) { NodeParams params(element->name); - params.repetition = ConvertEnum(element->repetition_type); + params.repetition = FromThrift(element->repetition_type); if (element->__isset.converted_type) { - params.logical_type = ConvertEnum(element->converted_type); + params.logical_type = FromThrift(element->converted_type); } else { params.logical_type = LogicalType::NONE; } @@ -145,7 +134,7 @@ std::unique_ptr PrimitiveNode::FromParquet(const void* opaque_element, std::unique_ptr result = std::unique_ptr( new PrimitiveNode(params.name, params.repetition, - ConvertEnum(element->type), params.logical_type, node_id)); + FromThrift(element->type), params.logical_type, node_id)); if (element->type == parquet::Type::FIXED_LEN_BYTE_ARRAY) { result->SetTypeLength(element->type_length); http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b71e826f/src/parquet/schema/types.h ---------------------------------------------------------------------- diff --git a/src/parquet/schema/types.h b/src/parquet/schema/types.h index 82db233..83b9fd2 100644 --- a/src/parquet/schema/types.h +++ b/src/parquet/schema/types.h @@ -26,7 +26,6 @@ #include #include -#include "parquet/exception.h" #include "parquet/types.h" #include "parquet/util/macros.h" @@ -254,11 +253,11 @@ class GroupNode : public Node { virtual bool Equals(const Node* other) const; - const NodePtr& field(size_t i) const { + const NodePtr& field(int i) const { return fields_[i]; } - size_t field_count() const { + int field_count() const { return fields_.size(); } http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b71e826f/src/parquet/thrift/serializer-test.cc ---------------------------------------------------------------------- diff --git a/src/parquet/thrift/serializer-test.cc b/src/parquet/thrift/serializer-test.cc index e89b108..756fd10 100644 --- a/src/parquet/thrift/serializer-test.cc +++ b/src/parquet/thrift/serializer-test.cc @@ -15,18 +15,15 @@ // specific language governing permissions and limitations // under the License. -#include -#include -#include -#include - #include +#include +#include +#include + +#include "parquet/column/test-util.h" #include "parquet/thrift/parquet_types.h" #include "parquet/thrift/util.h" -#include "parquet/column/page.h" -#include "parquet/column/reader.h" -#include "parquet/column/test-util.h" using std::string; @@ -59,12 +56,12 @@ TEST_F(TestThrift, TestSerializerDeserializer) { uint32_t header_size = 1024; // Deserialize the serialized page buffer ASSERT_NO_THROW(DeserializeThriftMsg(reinterpret_cast(serialized_buffer.c_str()), - &header_size, &out_page_header)); + &header_size, &out_page_header)); ASSERT_LE(stats_size, header_size); ASSERT_GE(max_header_len, header_size); ASSERT_EQ(parquet::Encoding::PLAIN, out_page_header.data_page_header.encoding); - ASSERT_EQ(parquet::Encoding::RLE, out_page_header.data_page_header.definition_level_encoding); + ASSERT_EQ(parquet::Encoding::RLE, out_page_header.data_page_header.definition_level_encoding); ASSERT_EQ(parquet::Encoding::RLE, out_page_header.data_page_header.repetition_level_encoding); for(int i = 0; i < stats_size; i++){ EXPECT_EQ(i % 255, (reinterpret_cast http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b71e826f/src/parquet/thrift/util.h ---------------------------------------------------------------------- diff --git a/src/parquet/thrift/util.h b/src/parquet/thrift/util.h index a472dc2..8c34197 100644 --- a/src/parquet/thrift/util.h +++ b/src/parquet/thrift/util.h @@ -17,11 +17,39 @@ #include #include -#include "parquet/util/logging.h" #include "parquet/exception.h" +#include "parquet/util/logging.h" +#include "parquet/thrift/parquet_types.h" namespace parquet_cpp { +// ---------------------------------------------------------------------- +// Convert Thrift enums to / from parquet_cpp enums + +static inline Type::type FromThrift(parquet::Type::type type) { + return static_cast(type); +} + +static inline LogicalType::type FromThrift(parquet::ConvertedType::type type) { + // item 0 is NONE + return static_cast(static_cast(type) + 1); +} + +static inline Repetition::type FromThrift(parquet::FieldRepetitionType::type type) { + return static_cast(type); +} + +static inline Encoding::type FromThrift(parquet::Encoding::type type) { + return static_cast(type); +} + +static inline Compression::type FromThrift(parquet::CompressionCodec::type type) { + return static_cast(type); +} + +// ---------------------------------------------------------------------- +// Thrift struct serialization / deserialization utilities + // Deserialize a thrift message from buf/len. buf/len must at least contain // all the bytes needed to store the thrift message. On return, len will be // set to the actual length of the header. http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b71e826f/src/parquet/types.h ---------------------------------------------------------------------- diff --git a/src/parquet/types.h b/src/parquet/types.h index 2d15cad..8c5e123 100644 --- a/src/parquet/types.h +++ b/src/parquet/types.h @@ -108,7 +108,6 @@ struct Encoding { // Compression, mirrors parquet::CompressionCodec struct Compression { enum type { - NONE, UNCOMPRESSED, SNAPPY, GZIP, http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b71e826f/src/parquet/util/bit-util.h ---------------------------------------------------------------------- diff --git a/src/parquet/util/bit-util.h b/src/parquet/util/bit-util.h index eac5346..714911c 100644 --- a/src/parquet/util/bit-util.h +++ b/src/parquet/util/bit-util.h @@ -26,7 +26,7 @@ #include #endif -#include +#include #include "parquet/util/compiler-util.h" #include "parquet/util/cpu-info.h" @@ -34,10 +34,35 @@ namespace parquet_cpp { -using boost::make_unsigned; +// TODO(wesm): The source from Impala was depending on boost::make_unsigned +// +// We add a partial stub implementation here + +template +struct make_unsigned { +}; + +template <> +struct make_unsigned { + typedef uint8_t type; +}; + +template <> +struct make_unsigned { + typedef uint16_t type; +}; + +template <> +struct make_unsigned { + typedef uint32_t type; +}; + +template <> +struct make_unsigned { + typedef uint64_t type; +}; /// Utility class to do standard bit tricks -/// TODO: is this in boost or something else like that? class BitUtil { public: /// Returns the ceil of value/divisor http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b71e826f/src/parquet/util/input.h ---------------------------------------------------------------------- diff --git a/src/parquet/util/input.h b/src/parquet/util/input.h index 4fd9cd7..d9b07fd 100644 --- a/src/parquet/util/input.h +++ b/src/parquet/util/input.h @@ -18,6 +18,7 @@ #ifndef PARQUET_UTIL_INPUT_H #define PARQUET_UTIL_INPUT_H +#include #include #include #include http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b71e826f/src/parquet/util/output-test.cc ---------------------------------------------------------------------- diff --git a/src/parquet/util/output-test.cc b/src/parquet/util/output-test.cc index 84f5b57..5fbca4a 100644 --- a/src/parquet/util/output-test.cc +++ b/src/parquet/util/output-test.cc @@ -15,10 +15,12 @@ // specific language governing permissions and limitations // under the License. -#include - #include +#include +#include +#include + #include "parquet/util/output.h" #include "parquet/util/test-common.h" http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b71e826f/src/parquet/util/output.cc ---------------------------------------------------------------------- diff --git a/src/parquet/util/output.cc b/src/parquet/util/output.cc index 9748a69..f0c8989 100644 --- a/src/parquet/util/output.cc +++ b/src/parquet/util/output.cc @@ -17,9 +17,7 @@ #include "parquet/util/output.h" -#include #include -#include #include "parquet/exception.h" http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b71e826f/src/parquet/util/output.h ---------------------------------------------------------------------- diff --git a/src/parquet/util/output.h b/src/parquet/util/output.h index e83b261..be25abd 100644 --- a/src/parquet/util/output.h +++ b/src/parquet/util/output.h @@ -19,7 +19,6 @@ #define PARQUET_UTIL_OUTPUT_H #include -#include #include namespace parquet_cpp {