Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id B6B7C200CD6 for ; Mon, 31 Jul 2017 17:15:02 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id B55C8165659; Mon, 31 Jul 2017 15:15:02 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 6F53C16565C for ; Mon, 31 Jul 2017 17:15:00 +0200 (CEST) Received: (qmail 45805 invoked by uid 500); 31 Jul 2017 15:14:59 -0000 Mailing-List: contact commits-help@parquet.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@parquet.apache.org Delivered-To: mailing list commits@parquet.apache.org Received: (qmail 45712 invoked by uid 99); 31 Jul 2017 15:14:58 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 31 Jul 2017 15:14:58 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 43554E9635; Mon, 31 Jul 2017 15:14:58 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: wesm@apache.org To: commits@parquet.apache.org Date: Mon, 31 Jul 2017 15:14:58 -0000 Message-Id: <1a95472dcf844671b1bb97c186dc2251@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [1/5] parquet-cpp git commit: PARQUET-1068: Modify .clang-format to use straight Google format with 90-character line width archived-at: Mon, 31 Jul 2017 15:15:02 -0000 Repository: parquet-cpp Updated Branches: refs/heads/master af96ff0fa -> b6f3caeb0 http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/reader-test.cc ---------------------------------------------------------------------- diff --git a/src/parquet/reader-test.cc b/src/parquet/reader-test.cc index cb40abb..cefa452 100644 --- a/src/parquet/reader-test.cc +++ b/src/parquet/reader-test.cc @@ -15,10 +15,10 @@ // specific language governing permissions and limitations // under the License. -#include -#include #include #include +#include +#include #include #include #include @@ -227,8 +227,8 @@ TEST_F(TestLocalFile, OpenWithMetadata) { printer.DebugPrint(ss, columns, true); // Make sure OpenFile passes on the external metadata, too - auto reader2 = ParquetFileReader::OpenFile( - alltypes_plain(), false, default_reader_properties(), metadata); + auto reader2 = ParquetFileReader::OpenFile(alltypes_plain(), false, + default_reader_properties(), metadata); // Compare pointers ASSERT_EQ(metadata.get(), reader2->metadata().get()); http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/schema-test.cc ---------------------------------------------------------------------- diff --git a/src/parquet/schema-test.cc b/src/parquet/schema-test.cc index 203a312..faacb76 100644 --- a/src/parquet/schema-test.cc +++ b/src/parquet/schema-test.cc @@ -41,7 +41,8 @@ using format::SchemaElement; namespace schema { static inline SchemaElement NewPrimitive(const std::string& name, - FieldRepetitionType::type repetition, format::Type::type type, int id = 0) { + FieldRepetitionType::type repetition, + format::Type::type type, int id = 0) { SchemaElement result; result.__set_name(name); result.__set_repetition_type(repetition); @@ -52,7 +53,8 @@ static inline SchemaElement NewPrimitive(const std::string& name, } static inline SchemaElement NewGroup(const std::string& name, - FieldRepetitionType::type repetition, int num_children, int id = 0) { + FieldRepetitionType::type repetition, + int num_children, int id = 0) { SchemaElement result; result.__set_name(name); result.__set_repetition_type(repetition); @@ -156,8 +158,8 @@ TEST_F(TestPrimitiveNode, FromParquet) { ASSERT_EQ(LogicalType::UTF8, prim_node_->logical_type()); // FIXED_LEN_BYTE_ARRAY - elt = NewPrimitive( - name_, FieldRepetitionType::OPTIONAL, format::Type::FIXED_LEN_BYTE_ARRAY, 0); + elt = NewPrimitive(name_, FieldRepetitionType::OPTIONAL, + format::Type::FIXED_LEN_BYTE_ARRAY, 0); elt.__set_type_length(16); Convert(&elt); @@ -168,8 +170,8 @@ TEST_F(TestPrimitiveNode, FromParquet) { ASSERT_EQ(16, prim_node_->type_length()); // ConvertedType::Decimal - elt = NewPrimitive( - name_, FieldRepetitionType::OPTIONAL, format::Type::FIXED_LEN_BYTE_ARRAY, 0); + elt = NewPrimitive(name_, FieldRepetitionType::OPTIONAL, + format::Type::FIXED_LEN_BYTE_ARRAY, 0); elt.__set_converted_type(ConvertedType::DECIMAL); elt.__set_type_length(6); elt.__set_scale(2); @@ -197,21 +199,21 @@ TEST_F(TestPrimitiveNode, Equals) { ASSERT_TRUE(node1.Equals(&node5)); PrimitiveNode flba1("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY, - LogicalType::DECIMAL, 12, 4, 2); + LogicalType::DECIMAL, 12, 4, 2); PrimitiveNode flba2("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY, - LogicalType::DECIMAL, 1, 4, 2); + LogicalType::DECIMAL, 1, 4, 2); flba2.SetTypeLength(12); PrimitiveNode flba3("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY, - LogicalType::DECIMAL, 1, 4, 2); + LogicalType::DECIMAL, 1, 4, 2); flba3.SetTypeLength(16); PrimitiveNode flba4("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY, - LogicalType::DECIMAL, 12, 4, 0); + LogicalType::DECIMAL, 12, 4, 0); PrimitiveNode flba5("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY, - LogicalType::NONE, 12, 4, 0); + LogicalType::NONE, 12, 4, 0); ASSERT_TRUE(flba1.Equals(&flba2)); ASSERT_FALSE(flba1.Equals(&flba3)); @@ -222,52 +224,59 @@ TEST_F(TestPrimitiveNode, Equals) { TEST_F(TestPrimitiveNode, PhysicalLogicalMapping) { ASSERT_NO_THROW( PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::INT32, LogicalType::INT_32)); - ASSERT_NO_THROW(PrimitiveNode::Make( - "foo", Repetition::REQUIRED, Type::BYTE_ARRAY, LogicalType::JSON)); + ASSERT_NO_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::BYTE_ARRAY, + LogicalType::JSON)); ASSERT_THROW( PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::INT32, LogicalType::JSON), ParquetException); - ASSERT_NO_THROW(PrimitiveNode::Make( - "foo", Repetition::REQUIRED, Type::INT64, LogicalType::TIMESTAMP_MILLIS)); + ASSERT_NO_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::INT64, + LogicalType::TIMESTAMP_MILLIS)); ASSERT_THROW( PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::INT32, LogicalType::INT_64), ParquetException); - ASSERT_THROW(PrimitiveNode::Make( - "foo", Repetition::REQUIRED, Type::BYTE_ARRAY, LogicalType::INT_8), - ParquetException); - ASSERT_THROW(PrimitiveNode::Make( - "foo", Repetition::REQUIRED, Type::BYTE_ARRAY, LogicalType::INTERVAL), - ParquetException); - ASSERT_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED, - Type::FIXED_LEN_BYTE_ARRAY, LogicalType::ENUM), - ParquetException); - ASSERT_NO_THROW(PrimitiveNode::Make( - "foo", Repetition::REQUIRED, Type::BYTE_ARRAY, LogicalType::ENUM)); + ASSERT_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::BYTE_ARRAY, + LogicalType::INT_8), + ParquetException); + ASSERT_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::BYTE_ARRAY, + LogicalType::INTERVAL), + ParquetException); ASSERT_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED, - Type::FIXED_LEN_BYTE_ARRAY, LogicalType::DECIMAL, 0, 2, 4), + Type::FIXED_LEN_BYTE_ARRAY, LogicalType::ENUM), + ParquetException); + ASSERT_NO_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::BYTE_ARRAY, + LogicalType::ENUM)); + ASSERT_THROW( + PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY, + LogicalType::DECIMAL, 0, 2, 4), ParquetException); ASSERT_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::FLOAT, - LogicalType::DECIMAL, 0, 2, 4), - ParquetException); - ASSERT_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED, - Type::FIXED_LEN_BYTE_ARRAY, LogicalType::DECIMAL, 0, 4, 0), + LogicalType::DECIMAL, 0, 2, 4), + ParquetException); + ASSERT_THROW( + PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY, + LogicalType::DECIMAL, 0, 4, 0), ParquetException); - ASSERT_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED, - Type::FIXED_LEN_BYTE_ARRAY, LogicalType::DECIMAL, 10, 0, 4), + ASSERT_THROW( + PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY, + LogicalType::DECIMAL, 10, 0, 4), ParquetException); - ASSERT_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED, - Type::FIXED_LEN_BYTE_ARRAY, LogicalType::DECIMAL, 10, 4, -1), + ASSERT_THROW( + PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY, + LogicalType::DECIMAL, 10, 4, -1), ParquetException); - ASSERT_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED, - Type::FIXED_LEN_BYTE_ARRAY, LogicalType::DECIMAL, 10, 2, 4), + ASSERT_THROW( + PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY, + LogicalType::DECIMAL, 10, 2, 4), ParquetException); ASSERT_NO_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED, - Type::FIXED_LEN_BYTE_ARRAY, LogicalType::DECIMAL, 10, 6, 4)); + Type::FIXED_LEN_BYTE_ARRAY, LogicalType::DECIMAL, + 10, 6, 4)); ASSERT_NO_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED, - Type::FIXED_LEN_BYTE_ARRAY, LogicalType::INTERVAL, 12)); + Type::FIXED_LEN_BYTE_ARRAY, LogicalType::INTERVAL, + 12)); ASSERT_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED, - Type::FIXED_LEN_BYTE_ARRAY, LogicalType::INTERVAL, 10), - ParquetException); + Type::FIXED_LEN_BYTE_ARRAY, LogicalType::INTERVAL, 10), + ParquetException); } // ---------------------------------------------------------------------- @@ -372,10 +381,14 @@ bool check_for_parent_consistency(const GroupNode* node) { // Each node should have the group as parent for (int i = 0; i < node->field_count(); i++) { const NodePtr& field = node->field(i); - if (field->parent() != node) { return false; } + if (field->parent() != node) { + return false; + } if (field->is_group()) { const GroupNode* group = static_cast(field.get()); - if (!check_for_parent_consistency(group)) { return false; } + if (!check_for_parent_consistency(group)) { + return false; + } } } return true; @@ -468,8 +481,8 @@ class TestSchemaFlatten : public ::testing::Test { TEST_F(TestSchemaFlatten, DecimalMetadata) { // Checks that DecimalMetadata is only set for DecimalTypes - NodePtr node = PrimitiveNode::Make( - "decimal", Repetition::REQUIRED, Type::INT64, LogicalType::DECIMAL, -1, 8, 4); + NodePtr node = PrimitiveNode::Make("decimal", Repetition::REQUIRED, Type::INT64, + LogicalType::DECIMAL, -1, 8, 4); NodePtr group = GroupNode::Make("group", Repetition::REPEATED, {node}, LogicalType::LIST); Flatten(reinterpret_cast(group.get())); @@ -526,8 +539,8 @@ TEST_F(TestSchemaFlatten, NestedExample) { } TEST(TestColumnDescriptor, TestAttrs) { - NodePtr node = PrimitiveNode::Make( - "name", Repetition::OPTIONAL, Type::BYTE_ARRAY, LogicalType::UTF8); + NodePtr node = PrimitiveNode::Make("name", Repetition::OPTIONAL, Type::BYTE_ARRAY, + LogicalType::UTF8); ColumnDescriptor descr(node, 4, 1); ASSERT_EQ("name", descr.name()); @@ -540,7 +553,7 @@ TEST(TestColumnDescriptor, TestAttrs) { // Test FIXED_LEN_BYTE_ARRAY node = PrimitiveNode::Make("name", Repetition::OPTIONAL, Type::FIXED_LEN_BYTE_ARRAY, - LogicalType::DECIMAL, 12, 10, 4); + LogicalType::DECIMAL, 12, 10, 4); descr = ColumnDescriptor(node, 4, 1); ASSERT_EQ(Type::FIXED_LEN_BYTE_ARRAY, descr.physical_type()); @@ -572,8 +585,8 @@ TEST_F(TestSchemaDescriptor, Equals) { NodePtr item1 = Int64("item1", Repetition::REQUIRED); NodePtr item2 = Boolean("item2", Repetition::OPTIONAL); NodePtr item3 = Int32("item3", Repetition::REPEATED); - NodePtr list(GroupNode::Make( - "records", Repetition::REPEATED, {item1, item2, item3}, LogicalType::LIST)); + NodePtr list(GroupNode::Make("records", Repetition::REPEATED, {item1, item2, item3}, + LogicalType::LIST)); NodePtr bag(GroupNode::Make("bag", Repetition::OPTIONAL, {list})); NodePtr bag2(GroupNode::Make("bag", Repetition::REQUIRED, {list})); @@ -624,8 +637,8 @@ TEST_F(TestSchemaDescriptor, BuildTree) { NodePtr item1 = Int64("item1", Repetition::REQUIRED); NodePtr item2 = Boolean("item2", Repetition::OPTIONAL); NodePtr item3 = Int32("item3", Repetition::REPEATED); - NodePtr list(GroupNode::Make( - "records", Repetition::REPEATED, {item1, item2, item3}, LogicalType::LIST)); + NodePtr list(GroupNode::Make("records", Repetition::REPEATED, {item1, item2, item3}, + LogicalType::LIST)); NodePtr bag(GroupNode::Make("bag", Repetition::OPTIONAL, {list})); fields.push_back(bag); @@ -705,8 +718,8 @@ TEST(TestSchemaPrinter, Examples) { NodePtr bag(GroupNode::Make("bag", Repetition::OPTIONAL, {list})); fields.push_back(bag); - fields.push_back(PrimitiveNode::Make( - "c", Repetition::REQUIRED, Type::INT32, LogicalType::DECIMAL, -1, 3, 2)); + fields.push_back(PrimitiveNode::Make("c", Repetition::REQUIRED, Type::INT32, + LogicalType::DECIMAL, -1, 3, 2)); NodePtr schema = GroupNode::Make("schema", Repetition::REPEATED, fields); http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/schema.cc ---------------------------------------------------------------------- diff --git a/src/parquet/schema.cc b/src/parquet/schema.cc index 5fc51fe..ddd8ac1 100644 --- a/src/parquet/schema.cc +++ b/src/parquet/schema.cc @@ -72,15 +72,15 @@ std::shared_ptr ColumnPath::extend(const std::string& node_name) con std::string ColumnPath::ToDotString() const { std::stringstream ss; for (auto it = path_.cbegin(); it != path_.cend(); ++it) { - if (it != path_.cbegin()) { ss << "."; } + if (it != path_.cbegin()) { + ss << "."; + } ss << *it; } return ss.str(); } -const std::vector& ColumnPath::ToDotVector() const { - return path_; -} +const std::vector& ColumnPath::ToDotVector() const { return path_; } // ---------------------------------------------------------------------- // Base node @@ -96,16 +96,14 @@ bool Node::EqualsInternal(const Node* other) const { repetition_ == other->repetition_ && logical_type_ == other->logical_type_; } -void Node::SetParent(const Node* parent) { - parent_ = parent; -} +void Node::SetParent(const Node* parent) { parent_ = parent; } // ---------------------------------------------------------------------- // Primitive node PrimitiveNode::PrimitiveNode(const std::string& name, Repetition::type repetition, - Type::type type, LogicalType::type logical_type, int length, int precision, int scale, - int id) + Type::type type, LogicalType::type logical_type, int length, + int precision, int scale, int id) : Node(Node::PRIMITIVE, name, repetition, logical_type, id), physical_type_(type), type_length_(length) { @@ -224,13 +222,13 @@ bool PrimitiveNode::EqualsInternal(const PrimitiveNode* other) const { } bool PrimitiveNode::Equals(const Node* other) const { - if (!Node::EqualsInternal(other)) { return false; } + if (!Node::EqualsInternal(other)) { + return false; + } return EqualsInternal(static_cast(other)); } -void PrimitiveNode::Visit(Node::Visitor* visitor) { - visitor->Visit(this); -} +void PrimitiveNode::Visit(Node::Visitor* visitor) { visitor->Visit(this); } void PrimitiveNode::VisitConst(Node::ConstVisitor* visitor) const { visitor->Visit(this); @@ -240,16 +238,24 @@ void PrimitiveNode::VisitConst(Node::ConstVisitor* visitor) const { // Group node bool GroupNode::EqualsInternal(const GroupNode* other) const { - if (this == other) { return true; } - if (this->field_count() != other->field_count()) { return false; } + if (this == other) { + return true; + } + if (this->field_count() != other->field_count()) { + return false; + } for (int i = 0; i < this->field_count(); ++i) { - if (!this->field(i)->Equals(other->field(i).get())) { return false; } + if (!this->field(i)->Equals(other->field(i).get())) { + return false; + } } return true; } bool GroupNode::Equals(const Node* other) const { - if (!Node::EqualsInternal(other)) { return false; } + if (!Node::EqualsInternal(other)) { + return false; + } return EqualsInternal(static_cast(other)); } @@ -264,7 +270,9 @@ int GroupNode::FieldIndex(const std::string& name) const { int GroupNode::FieldIndex(const Node& node) const { int result = FieldIndex(node.name()); - if (result < 0) { return -1; } + if (result < 0) { + return -1; + } DCHECK(result < field_count()); if (!node.Equals(field(result).get())) { // Same name but not the same node @@ -273,13 +281,9 @@ int GroupNode::FieldIndex(const Node& node) const { return result; } -void GroupNode::Visit(Node::Visitor* visitor) { - visitor->Visit(this); -} +void GroupNode::Visit(Node::Visitor* visitor) { visitor->Visit(this); } -void GroupNode::VisitConst(Node::ConstVisitor* visitor) const { - visitor->Visit(this); -} +void GroupNode::VisitConst(Node::ConstVisitor* visitor) const { visitor->Visit(this); } // ---------------------------------------------------------------------- // Node construction from Parquet metadata @@ -304,25 +308,25 @@ static inline NodeParams GetNodeParams(const format::SchemaElement* element) { return params; } -std::unique_ptr GroupNode::FromParquet( - const void* opaque_element, int node_id, const NodeVector& fields) { +std::unique_ptr GroupNode::FromParquet(const void* opaque_element, int node_id, + const NodeVector& fields) { const format::SchemaElement* element = static_cast(opaque_element); NodeParams params = GetNodeParams(element); - return std::unique_ptr(new GroupNode( - params.name, params.repetition, fields, params.logical_type, node_id)); + return std::unique_ptr(new GroupNode(params.name, params.repetition, fields, + params.logical_type, node_id)); } -std::unique_ptr PrimitiveNode::FromParquet( - const void* opaque_element, int node_id) { +std::unique_ptr PrimitiveNode::FromParquet(const void* opaque_element, + int node_id) { const format::SchemaElement* element = static_cast(opaque_element); NodeParams params = GetNodeParams(element); std::unique_ptr result = - std::unique_ptr(new PrimitiveNode(params.name, params.repetition, - FromThrift(element->type), params.logical_type, element->type_length, - element->precision, element->scale, node_id)); + std::unique_ptr(new PrimitiveNode( + params.name, params.repetition, FromThrift(element->type), params.logical_type, + element->type_length, element->precision, element->scale, node_id)); // Return as unique_ptr to the base type return std::unique_ptr(result.release()); @@ -442,8 +446,8 @@ class SchemaVisitor : public Node::ConstVisitor { std::vector* elements_; }; -SchemaFlattener::SchemaFlattener( - const GroupNode* schema, std::vector* out) +SchemaFlattener::SchemaFlattener(const GroupNode* schema, + std::vector* out) : root_(schema), elements_(out) {} void SchemaFlattener::Flatten() { @@ -546,7 +550,9 @@ void SchemaPrinter::Visit(const GroupNode* node) { PrintRepLevel(node->repetition(), stream_); stream_ << " group " << node->name(); auto lt = node->logical_type(); - if (lt != LogicalType::NONE) { stream_ << " (" << LogicalTypeToString(lt) << ")"; } + if (lt != LogicalType::NONE) { + stream_ << " (" << LogicalTypeToString(lt) << ")"; + } stream_ << " {" << std::endl; } @@ -609,17 +615,21 @@ void SchemaDescriptor::Init(const NodePtr& schema) { } bool SchemaDescriptor::Equals(const SchemaDescriptor& other) const { - if (this->num_columns() != other.num_columns()) { return false; } + if (this->num_columns() != other.num_columns()) { + return false; + } for (int i = 0; i < this->num_columns(); ++i) { - if (!this->Column(i)->Equals(*other.Column(i))) { return false; } + if (!this->Column(i)->Equals(*other.Column(i))) { + return false; + } } return true; } void SchemaDescriptor::BuildTree(const NodePtr& node, int16_t max_def_level, - int16_t max_rep_level, const NodePtr& base) { + int16_t max_rep_level, const NodePtr& base) { if (node->is_optional()) { ++max_def_level; } else if (node->is_repeated()) { @@ -639,19 +649,22 @@ void SchemaDescriptor::BuildTree(const NodePtr& node, int16_t max_def_level, // Primitive node, append to leaves leaves_.push_back(ColumnDescriptor(node, max_def_level, max_rep_level, this)); leaf_to_base_.emplace(static_cast(leaves_.size()) - 1, base); - leaf_to_idx_.emplace( - node->path()->ToDotString(), static_cast(leaves_.size()) - 1); + leaf_to_idx_.emplace(node->path()->ToDotString(), + static_cast(leaves_.size()) - 1); } } ColumnDescriptor::ColumnDescriptor(const schema::NodePtr& node, - int16_t max_definition_level, int16_t max_repetition_level, - const SchemaDescriptor* schema_descr) + int16_t max_definition_level, + int16_t max_repetition_level, + const SchemaDescriptor* schema_descr) : node_(node), max_definition_level_(max_definition_level), max_repetition_level_(max_repetition_level), schema_descr_(schema_descr) { - if (!node_->is_primitive()) { throw ParquetException("Must be a primitive type"); } + if (!node_->is_primitive()) { + throw ParquetException("Must be a primitive type"); + } primitive_node_ = static_cast(node_.get()); } @@ -677,7 +690,9 @@ int SchemaDescriptor::ColumnIndex(const std::string& node_path) const { int SchemaDescriptor::ColumnIndex(const Node& node) const { int result = ColumnIndex(node.path()->ToDotString()); - if (result < 0) { return -1; } + if (result < 0) { + return -1; + } DCHECK(result < num_columns()); if (!node.Equals(Column(result)->schema_node().get())) { // Same path but not the same node @@ -699,9 +714,7 @@ int ColumnDescriptor::type_precision() const { return primitive_node_->decimal_metadata().precision; } -int ColumnDescriptor::type_length() const { - return primitive_node_->type_length(); -} +int ColumnDescriptor::type_length() const { return primitive_node_->type_length(); } const std::shared_ptr ColumnDescriptor::path() const { return primitive_node_->path(); http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/schema.h ---------------------------------------------------------------------- diff --git a/src/parquet/schema.h b/src/parquet/schema.h index 856f72d..e240b82 100644 --- a/src/parquet/schema.h +++ b/src/parquet/schema.h @@ -108,7 +108,7 @@ class PARQUET_EXPORT Node { enum type { PRIMITIVE, GROUP }; Node(Node::type type, const std::string& name, Repetition::type repetition, - LogicalType::type logical_type = LogicalType::NONE, int id = -1) + LogicalType::type logical_type = LogicalType::NONE, int id = -1) : type_(type), name_(name), repetition_(repetition), @@ -195,10 +195,11 @@ class PARQUET_EXPORT PrimitiveNode : public Node { static std::unique_ptr FromParquet(const void* opaque_element, int id); static inline NodePtr Make(const std::string& name, Repetition::type repetition, - Type::type type, LogicalType::type logical_type = LogicalType::NONE, - int length = -1, int precision = -1, int scale = -1) { - return NodePtr(new PrimitiveNode( - name, repetition, type, logical_type, length, precision, scale)); + Type::type type, + LogicalType::type logical_type = LogicalType::NONE, + int length = -1, int precision = -1, int scale = -1) { + return NodePtr(new PrimitiveNode(name, repetition, type, logical_type, length, + precision, scale)); } bool Equals(const Node* other) const override; @@ -215,8 +216,8 @@ class PARQUET_EXPORT PrimitiveNode : public Node { private: PrimitiveNode(const std::string& name, Repetition::type repetition, Type::type type, - LogicalType::type logical_type = LogicalType::NONE, int length = -1, - int precision = -1, int scale = -1, int id = -1); + LogicalType::type logical_type = LogicalType::NONE, int length = -1, + int precision = -1, int scale = -1, int id = -1); Type::type physical_type_; int32_t type_length_; @@ -243,11 +244,12 @@ class PARQUET_EXPORT GroupNode : public Node { public: // Like PrimitiveNode, GroupNode::FromParquet accepts an opaque void* to avoid exporting // parquet::SchemaElement into the public API - static std::unique_ptr FromParquet( - const void* opaque_element, int id, const NodeVector& fields); + static std::unique_ptr FromParquet(const void* opaque_element, int id, + const NodeVector& fields); static inline NodePtr Make(const std::string& name, Repetition::type repetition, - const NodeVector& fields, LogicalType::type logical_type = LogicalType::NONE) { + const NodeVector& fields, + LogicalType::type logical_type = LogicalType::NONE) { return NodePtr(new GroupNode(name, repetition, fields, logical_type)); } @@ -265,8 +267,8 @@ class PARQUET_EXPORT GroupNode : public Node { private: GroupNode(const std::string& name, Repetition::type repetition, - const NodeVector& fields, LogicalType::type logical_type = LogicalType::NONE, - int id = -1) + const NodeVector& fields, LogicalType::type logical_type = LogicalType::NONE, + int id = -1) : Node(Node::GROUP, name, repetition, logical_type, id), fields_(fields) { field_name_to_idx_.clear(); auto field_idx = 0; @@ -290,10 +292,10 @@ class PARQUET_EXPORT GroupNode : public Node { // ---------------------------------------------------------------------- // Convenience primitive type factory functions -#define PRIMITIVE_FACTORY(FuncName, TYPE) \ - static inline NodePtr FuncName( \ - const std::string& name, Repetition::type repetition = Repetition::OPTIONAL) { \ - return PrimitiveNode::Make(name, repetition, Type::TYPE); \ +#define PRIMITIVE_FACTORY(FuncName, TYPE) \ + static inline NodePtr FuncName(const std::string& name, \ + Repetition::type repetition = Repetition::OPTIONAL) { \ + return PrimitiveNode::Make(name, repetition, Type::TYPE); \ } PRIMITIVE_FACTORY(Boolean, BOOLEAN); @@ -304,8 +306,8 @@ PRIMITIVE_FACTORY(Float, FLOAT); PRIMITIVE_FACTORY(Double, DOUBLE); PRIMITIVE_FACTORY(ByteArray, BYTE_ARRAY); -void PARQUET_EXPORT PrintSchema( - const schema::Node* schema, std::ostream& stream, int indent_width = 2); +void PARQUET_EXPORT PrintSchema(const schema::Node* schema, std::ostream& stream, + int indent_width = 2); } // namespace schema @@ -317,7 +319,8 @@ void PARQUET_EXPORT PrintSchema( class PARQUET_EXPORT ColumnDescriptor { public: ColumnDescriptor(const schema::NodePtr& node, int16_t max_definition_level, - int16_t max_repetition_level, const SchemaDescriptor* schema_descr = nullptr); + int16_t max_repetition_level, + const SchemaDescriptor* schema_descr = nullptr); bool Equals(const ColumnDescriptor& other) const; @@ -402,7 +405,7 @@ class PARQUET_EXPORT SchemaDescriptor { const schema::GroupNode* group_node_; void BuildTree(const schema::NodePtr& node, int16_t max_def_level, - int16_t max_rep_level, const schema::NodePtr& base); + int16_t max_rep_level, const schema::NodePtr& base); // Result of leaf node / tree analysis std::vector leaves_; http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/statistics-test.cc ---------------------------------------------------------------------- diff --git a/src/parquet/statistics-test.cc b/src/parquet/statistics-test.cc index cbc761d..26352c1 100644 --- a/src/parquet/statistics-test.cc +++ b/src/parquet/statistics-test.cc @@ -68,13 +68,13 @@ class TestRowGroupStatistics : public PrimitiveTypedTest { std::string encoded_max = statistics1.EncodeMax(); TypedStats statistics2(this->schema_.Column(0), encoded_min, encoded_max, - this->values_.size(), 0, 0, true); + this->values_.size(), 0, 0, true); TypedStats statistics3(this->schema_.Column(0)); std::vector valid_bits( BitUtil::RoundUpNumBytes(static_cast(this->values_.size())) + 1, 255); - statistics3.UpdateSpaced( - this->values_ptr_, valid_bits.data(), 0, this->values_.size(), 0); + statistics3.UpdateSpaced(this->values_ptr_, valid_bits.data(), 0, + this->values_.size(), 0); std::string encoded_min_spaced = statistics3.EncodeMin(); std::string encoded_max_spaced = statistics3.EncodeMax(); @@ -108,13 +108,13 @@ class TestRowGroupStatistics : public PrimitiveTypedTest { TypedStats statistics1(this->schema_.Column(0)); this->GenerateData(1000); - statistics1.Update( - this->values_ptr_, this->values_.size() - num_null[0], num_null[0]); + statistics1.Update(this->values_ptr_, this->values_.size() - num_null[0], + num_null[0]); TypedStats statistics2(this->schema_.Column(0)); this->GenerateData(1000); - statistics2.Update( - this->values_ptr_, this->values_.size() - num_null[1], num_null[1]); + statistics2.Update(this->values_ptr_, this->values_.size() - num_null[1], + num_null[1]); TypedStats total(this->schema_.Column(0)); total.Merge(statistics1); @@ -149,14 +149,14 @@ class TestRowGroupStatistics : public PrimitiveTypedTest { int64_t batch_null_count = i ? null_count : 0; DCHECK(null_count <= num_values); // avoid too much headache std::vector definition_levels(batch_null_count, 0); - definition_levels.insert( - definition_levels.end(), batch_num_values - batch_null_count, 1); + definition_levels.insert(definition_levels.end(), + batch_num_values - batch_null_count, 1); auto beg = this->values_.begin() + i * num_values / 2; auto end = beg + batch_num_values; std::vector batch = GetDeepCopy(std::vector(beg, end)); T* batch_values_ptr = GetValuesPointer(batch); - column_writer->WriteBatch( - batch_num_values, definition_levels.data(), nullptr, batch_values_ptr); + column_writer->WriteBatch(batch_num_values, definition_levels.data(), nullptr, + batch_values_ptr); DeepFree(batch); } column_writer->Close(); @@ -263,12 +263,13 @@ void TestRowGroupStatistics::TestMinMaxEncode() { // encoded is same as unencoded ASSERT_EQ(encoded_min, - std::string((const char*)statistics1.min().ptr, statistics1.min().len)); + std::string((const char*)statistics1.min().ptr, statistics1.min().len)); ASSERT_EQ(encoded_max, - std::string((const char*)statistics1.max().ptr, statistics1.max().len)); + std::string((const char*)statistics1.max().ptr, statistics1.max().len)); TypedRowGroupStatistics statistics2(this->schema_.Column(0), encoded_min, - encoded_max, this->values_.size(), 0, 0, true); + encoded_max, this->values_.size(), 0, + 0, true); ASSERT_EQ(encoded_min, statistics2.EncodeMin()); ASSERT_EQ(encoded_max, statistics2.EncodeMax()); @@ -277,7 +278,7 @@ void TestRowGroupStatistics::TestMinMaxEncode() { } using TestTypes = ::testing::Types; + ByteArrayType, FLBAType, BooleanType>; TYPED_TEST_CASE(TestRowGroupStatistics, TestTypes); @@ -316,19 +317,20 @@ TEST(CorruptStatistics, Basics) { schema::NodePtr node; std::vector fields; // Test Physical Types - fields.push_back(schema::PrimitiveNode::Make( - "col1", Repetition::OPTIONAL, Type::INT32, LogicalType::NONE)); - fields.push_back(schema::PrimitiveNode::Make( - "col2", Repetition::OPTIONAL, Type::BYTE_ARRAY, LogicalType::NONE)); + fields.push_back(schema::PrimitiveNode::Make("col1", Repetition::OPTIONAL, Type::INT32, + LogicalType::NONE)); + fields.push_back(schema::PrimitiveNode::Make("col2", Repetition::OPTIONAL, + Type::BYTE_ARRAY, LogicalType::NONE)); // Test Logical Types - fields.push_back(schema::PrimitiveNode::Make( - "col3", Repetition::OPTIONAL, Type::INT32, LogicalType::DATE)); - fields.push_back(schema::PrimitiveNode::Make( - "col4", Repetition::OPTIONAL, Type::INT32, LogicalType::UINT_32)); + fields.push_back(schema::PrimitiveNode::Make("col3", Repetition::OPTIONAL, Type::INT32, + LogicalType::DATE)); + fields.push_back(schema::PrimitiveNode::Make("col4", Repetition::OPTIONAL, Type::INT32, + LogicalType::UINT_32)); fields.push_back(schema::PrimitiveNode::Make("col5", Repetition::OPTIONAL, - Type::FIXED_LEN_BYTE_ARRAY, LogicalType::INTERVAL, 12)); - fields.push_back(schema::PrimitiveNode::Make( - "col6", Repetition::OPTIONAL, Type::BYTE_ARRAY, LogicalType::UTF8)); + Type::FIXED_LEN_BYTE_ARRAY, + LogicalType::INTERVAL, 12)); + fields.push_back(schema::PrimitiveNode::Make("col6", Repetition::OPTIONAL, + Type::BYTE_ARRAY, LogicalType::UTF8)); node = schema::GroupNode::Make("schema", Repetition::REQUIRED, fields); schema.Init(node); http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/statistics.cc ---------------------------------------------------------------------- diff --git a/src/parquet/statistics.cc b/src/parquet/statistics.cc index d99140e..12d1f5b 100644 --- a/src/parquet/statistics.cc +++ b/src/parquet/statistics.cc @@ -30,8 +30,8 @@ using arrow::MemoryPool; namespace parquet { template -TypedRowGroupStatistics::TypedRowGroupStatistics( - const ColumnDescriptor* schema, MemoryPool* pool) +TypedRowGroupStatistics::TypedRowGroupStatistics(const ColumnDescriptor* schema, + MemoryPool* pool) : pool_(pool), min_buffer_(AllocateBuffer(pool_, 0)), max_buffer_(AllocateBuffer(pool_, 0)) { @@ -41,8 +41,10 @@ TypedRowGroupStatistics::TypedRowGroupStatistics( template TypedRowGroupStatistics::TypedRowGroupStatistics(const typename DType::c_type& min, - const typename DType::c_type& max, int64_t num_values, int64_t null_count, - int64_t distinct_count) + const typename DType::c_type& max, + int64_t num_values, + int64_t null_count, + int64_t distinct_count) : pool_(default_memory_pool()), min_buffer_(AllocateBuffer(pool_, 0)), max_buffer_(AllocateBuffer(pool_, 0)) { @@ -56,9 +58,10 @@ TypedRowGroupStatistics::TypedRowGroupStatistics(const typename DType::c_ } template -TypedRowGroupStatistics::TypedRowGroupStatistics(const ColumnDescriptor* schema, - const std::string& encoded_min, const std::string& encoded_max, int64_t num_values, - int64_t null_count, int64_t distinct_count, bool has_min_max, MemoryPool* pool) +TypedRowGroupStatistics::TypedRowGroupStatistics( + const ColumnDescriptor* schema, const std::string& encoded_min, + const std::string& encoded_max, int64_t num_values, int64_t null_count, + int64_t distinct_count, bool has_min_max, MemoryPool* pool) : pool_(pool), min_buffer_(AllocateBuffer(pool_, 0)), max_buffer_(AllocateBuffer(pool_, 0)) { @@ -68,8 +71,12 @@ TypedRowGroupStatistics::TypedRowGroupStatistics(const ColumnDescriptor* SetDescr(schema); - if (!encoded_min.empty()) { PlainDecode(encoded_min, &min_); } - if (!encoded_max.empty()) { PlainDecode(encoded_max, &max_); } + if (!encoded_min.empty()) { + PlainDecode(encoded_min, &min_); + } + if (!encoded_max.empty()) { + PlainDecode(encoded_max, &max_); + } has_min_max_ = has_min_max; } @@ -85,8 +92,8 @@ void TypedRowGroupStatistics::Reset() { } template -void TypedRowGroupStatistics::Update( - const T* values, int64_t num_not_null, int64_t num_null) { +void TypedRowGroupStatistics::Update(const T* values, int64_t num_not_null, + int64_t num_null) { DCHECK(num_not_null >= 0); DCHECK(num_null >= 0); @@ -109,8 +116,10 @@ void TypedRowGroupStatistics::Update( template void TypedRowGroupStatistics::UpdateSpaced(const T* values, - const uint8_t* valid_bits, int64_t valid_bits_offset, int64_t num_not_null, - int64_t num_null) { + const uint8_t* valid_bits, + int64_t valid_bits_offset, + int64_t num_not_null, + int64_t num_null) { DCHECK(num_not_null >= 0); DCHECK(num_null >= 0); @@ -126,7 +135,9 @@ void TypedRowGroupStatistics::UpdateSpaced(const T* values, int64_t length = num_null + num_not_null; int64_t i = 0; for (; i < length; i++) { - if (bitset_valid_bits & (1 << bit_offset_valid_bits)) { break; } + if (bitset_valid_bits & (1 << bit_offset_valid_bits)) { + break; + } READ_NEXT_BITSET(valid_bits); } T min = values[i]; @@ -216,8 +227,8 @@ void TypedRowGroupStatistics::PlainEncode(const T& src, std::string* dst) template void TypedRowGroupStatistics::PlainDecode(const std::string& src, T* dst) { PlainDecoder decoder(descr()); - decoder.SetData( - 1, reinterpret_cast(src.c_str()), static_cast(src.size())); + decoder.SetData(1, reinterpret_cast(src.c_str()), + static_cast(src.size())); decoder.Decode(dst, 1); } http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/statistics.h ---------------------------------------------------------------------- diff --git a/src/parquet/statistics.h b/src/parquet/statistics.h index c6a2487..12d0555 100644 --- a/src/parquet/statistics.h +++ b/src/parquet/statistics.h @@ -134,15 +134,15 @@ class TypedRowGroupStatistics : public RowGroupStatistics { using T = typename DType::c_type; TypedRowGroupStatistics(const ColumnDescriptor* schema, - ::arrow::MemoryPool* pool = ::arrow::default_memory_pool()); + ::arrow::MemoryPool* pool = ::arrow::default_memory_pool()); TypedRowGroupStatistics(const T& min, const T& max, int64_t num_values, - int64_t null_count, int64_t distinct_count); + int64_t null_count, int64_t distinct_count); TypedRowGroupStatistics(const ColumnDescriptor* schema, const std::string& encoded_min, - const std::string& encoded_max, int64_t num_values, int64_t null_count, - int64_t distinct_count, bool has_min_max, - ::arrow::MemoryPool* pool = ::arrow::default_memory_pool()); + const std::string& encoded_max, int64_t num_values, + int64_t null_count, int64_t distinct_count, bool has_min_max, + ::arrow::MemoryPool* pool = ::arrow::default_memory_pool()); bool HasMinMax() const override; void Reset() override; @@ -150,7 +150,7 @@ class TypedRowGroupStatistics : public RowGroupStatistics { void Update(const T* values, int64_t num_not_null, int64_t num_null); void UpdateSpaced(const T* values, const uint8_t* valid_bits, int64_t valid_bits_spaced, - int64_t num_not_null, int64_t num_null); + int64_t num_not_null, int64_t num_null); const T& min() const; const T& max() const; @@ -178,8 +178,8 @@ inline void TypedRowGroupStatistics::Copy(const T& src, T* dst, PoolBuffe } template <> -inline void TypedRowGroupStatistics::Copy( - const FLBA& src, FLBA* dst, PoolBuffer* buffer) { +inline void TypedRowGroupStatistics::Copy(const FLBA& src, FLBA* dst, + PoolBuffer* buffer) { if (dst->ptr == src.ptr) return; uint32_t len = descr_->type_length(); PARQUET_THROW_NOT_OK(buffer->Resize(len, false)); @@ -188,8 +188,9 @@ inline void TypedRowGroupStatistics::Copy( } template <> -inline void TypedRowGroupStatistics::Copy( - const ByteArray& src, ByteArray* dst, PoolBuffer* buffer) { +inline void TypedRowGroupStatistics::Copy(const ByteArray& src, + ByteArray* dst, + PoolBuffer* buffer) { if (dst->ptr == src.ptr) return; PARQUET_THROW_NOT_OK(buffer->Resize(src.len, false)); std::memcpy(buffer->mutable_data(), src.ptr, src.len); http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/test-specialization.h ---------------------------------------------------------------------- diff --git a/src/parquet/test-specialization.h b/src/parquet/test-specialization.h index 6bd1dee..4719fdc 100644 --- a/src/parquet/test-specialization.h +++ b/src/parquet/test-specialization.h @@ -40,8 +40,8 @@ void InitValues(int num_values, vector& values, vector& buf } template <> -void InitValues( - int num_values, vector& values, vector& buffer) { +void InitValues(int num_values, vector& values, + vector& buffer) { int max_byte_array_len = 12; int num_bytes = max_byte_array_len + sizeof(uint32_t); size_t nbytes = num_values * num_bytes; @@ -59,7 +59,7 @@ void InitValues(int num_values, vector& values, vector& buf template <> void InitValues(int num_values, vector& values, vector& buffer) { random_Int96_numbers(num_values, 0, std::numeric_limits::min(), - std::numeric_limits::max(), values.data()); + std::numeric_limits::max(), values.data()); } inline std::string TestColumnName(int i) { @@ -79,8 +79,8 @@ class PrimitiveTypedTest : public ::testing::Test { for (int i = 0; i < num_columns; ++i) { std::string name = TestColumnName(i); - fields.push_back(schema::PrimitiveNode::Make( - name, repetition, TestType::type_num, LogicalType::NONE, FLBA_LENGTH)); + fields.push_back(schema::PrimitiveNode::Make(name, repetition, TestType::type_num, + LogicalType::NONE, FLBA_LENGTH)); } node_ = schema::GroupNode::Make("schema", Repetition::REQUIRED, fields); schema_.Init(node_); http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/test-util.h ---------------------------------------------------------------------- diff --git a/src/parquet/test-util.h b/src/parquet/test-util.h index 8657a7f..356486b 100644 --- a/src/parquet/test-util.h +++ b/src/parquet/test-util.h @@ -53,12 +53,12 @@ namespace test { template static void InitValues(int num_values, vector& values, vector& buffer) { random_numbers(num_values, 0, std::numeric_limits::min(), - std::numeric_limits::max(), values.data()); + std::numeric_limits::max(), values.data()); } template -static void InitDictValues( - int num_values, int num_dicts, vector& values, vector& buffer) { +static void InitDictValues(int num_values, int num_dicts, vector& values, + vector& buffer) { int repeat_factor = num_values / num_dicts; InitValues(num_dicts, values, buffer); // add some repeated values @@ -112,7 +112,7 @@ class DataPageBuilder { have_values_(false) {} void AppendDefLevels(const vector& levels, int16_t max_level, - Encoding::type encoding = Encoding::RLE) { + Encoding::type encoding = Encoding::RLE) { AppendLevels(levels, max_level, encoding); num_values_ = std::max(static_cast(levels.size()), num_values_); @@ -121,7 +121,7 @@ class DataPageBuilder { } void AppendRepLevels(const vector& levels, int16_t max_level, - Encoding::type encoding = Encoding::RLE) { + Encoding::type encoding = Encoding::RLE) { AppendLevels(levels, max_level, encoding); num_values_ = std::max(static_cast(levels.size()), num_values_); @@ -130,7 +130,7 @@ class DataPageBuilder { } void AppendValues(const ColumnDescriptor* d, const vector& values, - Encoding::type encoding = Encoding::PLAIN) { + Encoding::type encoding = Encoding::PLAIN) { PlainEncoder encoder(d); encoder.Put(&values[0], static_cast(values.size())); std::shared_ptr values_sink = encoder.FlushValues(); @@ -162,8 +162,8 @@ class DataPageBuilder { bool have_values_; // Used internally for both repetition and definition levels - void AppendLevels( - const vector& levels, int16_t max_level, Encoding::type encoding) { + void AppendLevels(const vector& levels, int16_t max_level, + Encoding::type encoding) { if (encoding != Encoding::RLE) { ParquetException::NYI("only rle encoding currently implemented"); } @@ -176,7 +176,7 @@ class DataPageBuilder { // size. LevelEncoder encoder; encoder.Init(encoding, max_level, static_cast(levels.size()), - encode_buffer.data(), static_cast(encode_buffer.size())); + encode_buffer.data(), static_cast(encode_buffer.size())); encoder.Encode(static_cast(levels.size()), levels.data()); @@ -187,8 +187,9 @@ class DataPageBuilder { }; template <> -void DataPageBuilder::AppendValues( - const ColumnDescriptor* d, const vector& values, Encoding::type encoding) { +void DataPageBuilder::AppendValues(const ColumnDescriptor* d, + const vector& values, + Encoding::type encoding) { if (encoding != Encoding::PLAIN) { ParquetException::NYI("only plain encoding currently implemented"); } @@ -203,17 +204,22 @@ void DataPageBuilder::AppendValues( } template -static shared_ptr MakeDataPage(const ColumnDescriptor* d, - const vector& values, int num_vals, Encoding::type encoding, - const uint8_t* indices, int indices_size, const vector& def_levels, - int16_t max_def_level, const vector& rep_levels, int16_t max_rep_level) { +static shared_ptr MakeDataPage( + const ColumnDescriptor* d, const vector& values, int num_vals, + Encoding::type encoding, const uint8_t* indices, int indices_size, + const vector& def_levels, int16_t max_def_level, + const vector& rep_levels, int16_t max_rep_level) { int num_values = 0; InMemoryOutputStream page_stream; test::DataPageBuilder page_builder(&page_stream); - if (!rep_levels.empty()) { page_builder.AppendRepLevels(rep_levels, max_rep_level); } - if (!def_levels.empty()) { page_builder.AppendDefLevels(def_levels, max_def_level); } + if (!rep_levels.empty()) { + page_builder.AppendRepLevels(rep_levels, max_rep_level); + } + if (!def_levels.empty()) { + page_builder.AppendDefLevels(def_levels, max_def_level); + } if (encoding == Encoding::PLAIN) { page_builder.AppendValues(d, values, encoding); @@ -226,7 +232,8 @@ static shared_ptr MakeDataPage(const ColumnDescriptor* d, auto buffer = page_stream.GetBuffer(); return std::make_shared(buffer, num_values, encoding, - page_builder.def_level_encoding(), page_builder.rep_level_encoding()); + page_builder.def_level_encoding(), + page_builder.rep_level_encoding()); } template @@ -287,9 +294,10 @@ shared_ptr DictionaryPageBuilder::AppendValues( } template -static shared_ptr MakeDictPage(const ColumnDescriptor* d, - const vector& values, const vector& values_per_page, - Encoding::type encoding, vector>& rle_indices) { +static shared_ptr MakeDictPage( + const ColumnDescriptor* d, const vector& values, + const vector& values_per_page, Encoding::type encoding, + vector>& rle_indices) { InMemoryOutputStream page_stream; test::DictionaryPageBuilder page_builder(d); int num_pages = static_cast(values_per_page.size()); @@ -303,17 +311,19 @@ static shared_ptr MakeDictPage(const ColumnDescriptor* d, auto buffer = page_builder.WriteDict(); - return std::make_shared( - buffer, page_builder.num_values(), Encoding::PLAIN); + return std::make_shared(buffer, page_builder.num_values(), + Encoding::PLAIN); } // Given def/rep levels and values create multiple dict pages template static void PaginateDict(const ColumnDescriptor* d, - const vector& values, const vector& def_levels, - int16_t max_def_level, const vector& rep_levels, int16_t max_rep_level, - int num_levels_per_page, const vector& values_per_page, - vector>& pages, Encoding::type encoding = Encoding::RLE_DICTIONARY) { + const vector& values, + const vector& def_levels, int16_t max_def_level, + const vector& rep_levels, int16_t max_rep_level, + int num_levels_per_page, const vector& values_per_page, + vector>& pages, + Encoding::type encoding = Encoding::RLE_DICTIONARY) { int num_pages = static_cast(values_per_page.size()); vector> rle_indices; shared_ptr dict_page = @@ -332,8 +342,9 @@ static void PaginateDict(const ColumnDescriptor* d, rep_level_start = i * num_levels_per_page; rep_level_end = (i + 1) * num_levels_per_page; } - shared_ptr data_page = MakeDataPage(d, {}, values_per_page[i], - encoding, rle_indices[i]->data(), static_cast(rle_indices[i]->size()), + shared_ptr data_page = MakeDataPage( + d, {}, values_per_page[i], encoding, rle_indices[i]->data(), + static_cast(rle_indices[i]->size()), slice(def_levels, def_level_start, def_level_end), max_def_level, slice(rep_levels, rep_level_start, rep_level_end), max_rep_level); pages.push_back(data_page); @@ -343,10 +354,12 @@ static void PaginateDict(const ColumnDescriptor* d, // Given def/rep levels and values create multiple plain pages template static void PaginatePlain(const ColumnDescriptor* d, - const vector& values, const vector& def_levels, - int16_t max_def_level, const vector& rep_levels, int16_t max_rep_level, - int num_levels_per_page, const vector& values_per_page, - vector>& pages, Encoding::type encoding = Encoding::PLAIN) { + const vector& values, + const vector& def_levels, int16_t max_def_level, + const vector& rep_levels, int16_t max_rep_level, + int num_levels_per_page, const vector& values_per_page, + vector>& pages, + Encoding::type encoding = Encoding::PLAIN) { int num_pages = static_cast(values_per_page.size()); int def_level_start = 0; int def_level_end = 0; @@ -362,10 +375,11 @@ static void PaginatePlain(const ColumnDescriptor* d, rep_level_start = i * num_levels_per_page; rep_level_end = (i + 1) * num_levels_per_page; } - shared_ptr page = MakeDataPage(d, - slice(values, value_start, value_start + values_per_page[i]), values_per_page[i], - encoding, NULL, 0, slice(def_levels, def_level_start, def_level_end), - max_def_level, slice(rep_levels, rep_level_start, rep_level_end), max_rep_level); + shared_ptr page = MakeDataPage( + d, slice(values, value_start, value_start + values_per_page[i]), + values_per_page[i], encoding, NULL, 0, + slice(def_levels, def_level_start, def_level_end), max_def_level, + slice(rep_levels, rep_level_start, rep_level_end), max_rep_level); pages.push_back(page); value_start += values_per_page[i]; } @@ -374,9 +388,10 @@ static void PaginatePlain(const ColumnDescriptor* d, // Generates pages from randomly generated data template static int MakePages(const ColumnDescriptor* d, int num_pages, int levels_per_page, - vector& def_levels, vector& rep_levels, - vector& values, vector& buffer, - vector>& pages, Encoding::type encoding = Encoding::PLAIN) { + vector& def_levels, vector& rep_levels, + vector& values, vector& buffer, + vector>& pages, + Encoding::type encoding = Encoding::PLAIN) { int num_levels = levels_per_page * num_pages; int num_values = 0; uint32_t seed = 0; @@ -411,13 +426,13 @@ static int MakePages(const ColumnDescriptor* d, int num_pages, int levels_per_pa if (encoding == Encoding::PLAIN) { InitValues(num_values, values, buffer); PaginatePlain(d, values, def_levels, max_def_level, rep_levels, max_rep_level, - levels_per_page, values_per_page, pages); + levels_per_page, values_per_page, pages); } else if (encoding == Encoding::RLE_DICTIONARY || encoding == Encoding::PLAIN_DICTIONARY) { // Calls InitValues and repeats the data InitDictValues(num_values, levels_per_page, values, buffer); PaginateDict(d, values, def_levels, max_def_level, rep_levels, max_rep_level, - levels_per_page, values_per_page, pages); + levels_per_page, values_per_page, pages); } return num_values; http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/thrift.h ---------------------------------------------------------------------- diff --git a/src/parquet/thrift.h b/src/parquet/thrift.h index 7fa0de3..7a23e41 100644 --- a/src/parquet/thrift.h +++ b/src/parquet/thrift.h @@ -30,9 +30,9 @@ #include #include -#include #include #include +#include #include "parquet/exception.h" #include "parquet/parquet_types.h" http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/types-test.cc ---------------------------------------------------------------------- diff --git a/src/parquet/types-test.cc b/src/parquet/types-test.cc index ba3fe69..eedeaae 100644 --- a/src/parquet/types-test.cc +++ b/src/parquet/types-test.cc @@ -44,10 +44,10 @@ TEST(TestLogicalTypeToString, LogicalTypes) { ASSERT_STREQ("DATE", LogicalTypeToString(LogicalType::DATE).c_str()); ASSERT_STREQ("TIME_MILLIS", LogicalTypeToString(LogicalType::TIME_MILLIS).c_str()); ASSERT_STREQ("TIME_MICROS", LogicalTypeToString(LogicalType::TIME_MICROS).c_str()); - ASSERT_STREQ( - "TIMESTAMP_MILLIS", LogicalTypeToString(LogicalType::TIMESTAMP_MILLIS).c_str()); - ASSERT_STREQ( - "TIMESTAMP_MICROS", LogicalTypeToString(LogicalType::TIMESTAMP_MICROS).c_str()); + ASSERT_STREQ("TIMESTAMP_MILLIS", + LogicalTypeToString(LogicalType::TIMESTAMP_MILLIS).c_str()); + ASSERT_STREQ("TIMESTAMP_MICROS", + LogicalTypeToString(LogicalType::TIMESTAMP_MICROS).c_str()); ASSERT_STREQ("UINT_8", LogicalTypeToString(LogicalType::UINT_8).c_str()); ASSERT_STREQ("UINT_16", LogicalTypeToString(LogicalType::UINT_16).c_str()); ASSERT_STREQ("UINT_32", LogicalTypeToString(LogicalType::UINT_32).c_str()); @@ -106,10 +106,10 @@ TEST(TypePrinter, StatisticsTypes) { smin = std::string("abcdefgh"); smax = std::string("ijklmnop"); - ASSERT_STREQ( - "abcdefgh ", FormatStatValue(Type::FIXED_LEN_BYTE_ARRAY, smin.c_str()).c_str()); - ASSERT_STREQ( - "ijklmnop ", FormatStatValue(Type::FIXED_LEN_BYTE_ARRAY, smax.c_str()).c_str()); + ASSERT_STREQ("abcdefgh ", + FormatStatValue(Type::FIXED_LEN_BYTE_ARRAY, smin.c_str()).c_str()); + ASSERT_STREQ("ijklmnop ", + FormatStatValue(Type::FIXED_LEN_BYTE_ARRAY, smax.c_str()).c_str()); } } // namespace parquet http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/util/buffer-builder.h ---------------------------------------------------------------------- diff --git a/src/parquet/util/buffer-builder.h b/src/parquet/util/buffer-builder.h index b72e70d..26f134e 100644 --- a/src/parquet/util/buffer-builder.h +++ b/src/parquet/util/buffer-builder.h @@ -20,8 +20,8 @@ #ifndef PARQUET_UTIL_BUFFER_BUILDER_H #define PARQUET_UTIL_BUFFER_BUILDER_H -#include #include +#include namespace parquet { http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/util/comparison-test.cc ---------------------------------------------------------------------- diff --git a/src/parquet/util/comparison-test.cc b/src/parquet/util/comparison-test.cc index 7ef0338..8401983 100644 --- a/src/parquet/util/comparison-test.cc +++ b/src/parquet/util/comparison-test.cc @@ -67,8 +67,9 @@ TEST(Comparison, FLBA) { auto arr1 = FLBAFromString(a); auto arr2 = FLBAFromString(b); - NodePtr node = PrimitiveNode::Make("FLBA", Repetition::REQUIRED, - Type::FIXED_LEN_BYTE_ARRAY, LogicalType::NONE, static_cast(a.size())); + NodePtr node = + PrimitiveNode::Make("FLBA", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY, + LogicalType::NONE, static_cast(a.size())); ColumnDescriptor descr(node, 0, 0); Compare less(&descr); ASSERT_TRUE(less(arr1, arr2)); http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/util/comparison.h ---------------------------------------------------------------------- diff --git a/src/parquet/util/comparison.h b/src/parquet/util/comparison.h index 103f4c5..edd3df1 100644 --- a/src/parquet/util/comparison.h +++ b/src/parquet/util/comparison.h @@ -51,8 +51,8 @@ template <> inline bool Compare::operator()(const FLBA& a, const FLBA& b) { auto aptr = reinterpret_cast(a.ptr); auto bptr = reinterpret_cast(b.ptr); - return std::lexicographical_compare( - aptr, aptr + type_length_, bptr, bptr + type_length_); + return std::lexicographical_compare(aptr, aptr + type_length_, bptr, + bptr + type_length_); } } // namespace parquet http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/util/memory.cc ---------------------------------------------------------------------- diff --git a/src/parquet/util/memory.cc b/src/parquet/util/memory.cc index 39c43fb..5051c7b 100644 --- a/src/parquet/util/memory.cc +++ b/src/parquet/util/memory.cc @@ -140,9 +140,7 @@ uint8_t* ChunkedAllocator::Allocate(int size) { return result; } -uint8_t* ChunkedAllocator::Allocate(int size) { - return Allocate(size); -} +uint8_t* ChunkedAllocator::Allocate(int size) { return Allocate(size); } void ChunkedAllocator::Clear() { current_chunk_idx_ = -1; @@ -335,9 +333,7 @@ ArrowInputFile::ArrowInputFile( const std::shared_ptr<::arrow::io::ReadableFileInterface>& file) : file_(file) {} -::arrow::io::FileInterface* ArrowInputFile::file_interface() { - return file_.get(); -} +::arrow::io::FileInterface* ArrowInputFile::file_interface() { return file_.get(); } int64_t ArrowInputFile::Size() const { int64_t size; @@ -374,9 +370,7 @@ ArrowOutputStream::ArrowOutputStream( const std::shared_ptr<::arrow::io::OutputStream> file) : file_(file) {} -::arrow::io::FileInterface* ArrowOutputStream::file_interface() { - return file_.get(); -} +::arrow::io::FileInterface* ArrowOutputStream::file_interface() { return file_.get(); } // Copy bytes into the output stream void ArrowOutputStream::Write(const uint8_t* data, int64_t length) { @@ -391,8 +385,8 @@ InMemoryInputStream::InMemoryInputStream(const std::shared_ptr& buffer) len_ = buffer_->size(); } -InMemoryInputStream::InMemoryInputStream( - RandomAccessSource* source, int64_t start, int64_t num_bytes) +InMemoryInputStream::InMemoryInputStream(RandomAccessSource* source, int64_t start, + int64_t num_bytes) : offset_(0) { buffer_ = source->ReadAt(start, num_bytes); if (buffer_->size() < num_bytes) { @@ -412,24 +406,22 @@ const uint8_t* InMemoryInputStream::Read(int64_t num_to_read, int64_t* num_bytes return result; } -void InMemoryInputStream::Advance(int64_t num_bytes) { - offset_ += num_bytes; -} +void InMemoryInputStream::Advance(int64_t num_bytes) { offset_ += num_bytes; } // ---------------------------------------------------------------------- // In-memory output stream InMemoryOutputStream::InMemoryOutputStream(MemoryPool* pool, int64_t initial_capacity) : size_(0), capacity_(initial_capacity) { - if (initial_capacity == 0) { initial_capacity = kInMemoryDefaultCapacity; } + if (initial_capacity == 0) { + initial_capacity = kInMemoryDefaultCapacity; + } buffer_ = AllocateBuffer(pool, initial_capacity); } InMemoryOutputStream::~InMemoryOutputStream() {} -uint8_t* InMemoryOutputStream::Head() { - return buffer_->mutable_data() + size_; -} +uint8_t* InMemoryOutputStream::Head() { return buffer_->mutable_data() + size_; } void InMemoryOutputStream::Write(const uint8_t* data, int64_t length) { if (size_ + length > capacity_) { @@ -444,9 +436,7 @@ void InMemoryOutputStream::Write(const uint8_t* data, int64_t length) { size_ += length; } -int64_t InMemoryOutputStream::Tell() { - return size_; -} +int64_t InMemoryOutputStream::Tell() { return size_; } std::shared_ptr InMemoryOutputStream::GetBuffer() { PARQUET_THROW_NOT_OK(buffer_->Resize(size_)); @@ -459,7 +449,8 @@ std::shared_ptr InMemoryOutputStream::GetBuffer() { // BufferedInputStream BufferedInputStream::BufferedInputStream(MemoryPool* pool, int64_t buffer_size, - RandomAccessSource* source, int64_t start, int64_t num_bytes) + RandomAccessSource* source, int64_t start, + int64_t num_bytes) : source_(source), stream_offset_(start), stream_end_(start + num_bytes) { buffer_ = AllocateBuffer(pool, buffer_size); buffer_size_ = buffer_->size(); @@ -502,13 +493,17 @@ void BufferedInputStream::Advance(int64_t num_bytes) { std::shared_ptr AllocateBuffer(MemoryPool* pool, int64_t size) { auto result = std::make_shared(pool); - if (size > 0) { PARQUET_THROW_NOT_OK(result->Resize(size)); } + if (size > 0) { + PARQUET_THROW_NOT_OK(result->Resize(size)); + } return result; } std::unique_ptr AllocateUniqueBuffer(MemoryPool* pool, int64_t size) { std::unique_ptr result(new PoolBuffer(pool)); - if (size > 0) { PARQUET_THROW_NOT_OK(result->Resize(size)); } + if (size > 0) { + PARQUET_THROW_NOT_OK(result->Resize(size)); + } return result; } http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/util/memory.h ---------------------------------------------------------------------- diff --git a/src/parquet/util/memory.h b/src/parquet/util/memory.h index d73a3de..04dcca4 100644 --- a/src/parquet/util/memory.h +++ b/src/parquet/util/memory.h @@ -414,7 +414,7 @@ class PARQUET_EXPORT InMemoryInputStream : public InputStream { class PARQUET_EXPORT BufferedInputStream : public InputStream { public: BufferedInputStream(::arrow::MemoryPool* pool, int64_t buffer_size, - RandomAccessSource* source, int64_t start, int64_t end); + RandomAccessSource* source, int64_t start, int64_t end); virtual const uint8_t* Peek(int64_t num_to_peek, int64_t* num_bytes); virtual const uint8_t* Read(int64_t num_to_read, int64_t* num_bytes); @@ -429,11 +429,11 @@ class PARQUET_EXPORT BufferedInputStream : public InputStream { int64_t buffer_size_; }; -std::shared_ptr PARQUET_EXPORT AllocateBuffer( - ::arrow::MemoryPool* pool, int64_t size = 0); +std::shared_ptr PARQUET_EXPORT AllocateBuffer(::arrow::MemoryPool* pool, + int64_t size = 0); -std::unique_ptr PARQUET_EXPORT AllocateUniqueBuffer( - ::arrow::MemoryPool* pool, int64_t size = 0); +std::unique_ptr PARQUET_EXPORT AllocateUniqueBuffer(::arrow::MemoryPool* pool, + int64_t size = 0); } // namespace parquet http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/util/schema-util.h ---------------------------------------------------------------------- diff --git a/src/parquet/util/schema-util.h b/src/parquet/util/schema-util.h index e199c21..9187962 100644 --- a/src/parquet/util/schema-util.h +++ b/src/parquet/util/schema-util.h @@ -35,7 +35,9 @@ using parquet::schema::Node; using parquet::LogicalType; inline bool str_endswith_tuple(const std::string& str) { - if (str.size() >= 6) { return str.substr(str.size() - 6, 6) == "_tuple"; } + if (str.size() >= 6) { + return str.substr(str.size() - 6, 6) == "_tuple"; + } return false; } @@ -63,16 +65,21 @@ inline bool IsSimpleStruct(const NodePtr& node) { // Coalesce a list of schema fields indices which are the roots of the // columns referred by a list of column indices inline bool ColumnIndicesToFieldIndices(const SchemaDescriptor& descr, - const std::vector& column_indices, std::vector* out) { + const std::vector& column_indices, + std::vector* out) { const GroupNode* group = descr.group_node(); std::unordered_set already_added; out->clear(); for (auto& column_idx : column_indices) { auto field_node = descr.GetColumnRoot(column_idx); auto field_idx = group->FieldIndex(field_node->name()); - if (field_idx < 0) { return false; } + if (field_idx < 0) { + return false; + } auto insertion = already_added.insert(field_idx); - if (insertion.second) { out->push_back(field_idx); } + if (insertion.second) { + out->push_back(field_idx); + } } return true; http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/src/parquet/util/test-common.h ---------------------------------------------------------------------- diff --git a/src/parquet/util/test-common.h b/src/parquet/util/test-common.h index 2327aeb..1043378 100644 --- a/src/parquet/util/test-common.h +++ b/src/parquet/util/test-common.h @@ -32,7 +32,7 @@ namespace parquet { namespace test { typedef ::testing::Types + DoubleType, ByteArrayType, FLBAType> ParquetTypes; template @@ -46,7 +46,9 @@ static inline void assert_vector_equal(const vector& left, const vector& r template static inline bool vector_equal(const vector& left, const vector& right) { - if (left.size() != right.size()) { return false; } + if (left.size() != right.size()) { + return false; + } for (size_t i = 0; i < left.size(); ++i) { if (left[i] != right[i]) { @@ -61,7 +63,9 @@ static inline bool vector_equal(const vector& left, const vector& right) { template static vector slice(const vector& values, int start, int end) { - if (end < start) { return vector(0); } + if (end < start) { + return vector(0); + } vector out(end - start); for (int i = start; i < end; ++i) { @@ -130,8 +134,8 @@ void random_numbers(int n, uint32_t seed, float min_value, float max_value, floa } template <> -void random_numbers( - int n, uint32_t seed, double min_value, double max_value, double* out) { +void random_numbers(int n, uint32_t seed, double min_value, double max_value, + double* out) { std::mt19937 gen(seed); std::uniform_real_distribution d(min_value, max_value); for (int i = 0; i < n; ++i) { @@ -139,8 +143,8 @@ void random_numbers( } } -void random_Int96_numbers( - int n, uint32_t seed, int32_t min_value, int32_t max_value, Int96* out) { +void random_Int96_numbers(int n, uint32_t seed, int32_t min_value, int32_t max_value, + Int96* out) { std::mt19937 gen(seed); std::uniform_int_distribution d(min_value, max_value); for (int i = 0; i < n; ++i) { @@ -162,8 +166,8 @@ void random_fixed_byte_array(int n, uint32_t seed, uint8_t* buf, int len, FLBA* } } -void random_byte_array( - int n, uint32_t seed, uint8_t* buf, ByteArray* out, int min_size, int max_size) { +void random_byte_array(int n, uint32_t seed, uint8_t* buf, ByteArray* out, int min_size, + int max_size) { std::mt19937 gen(seed); std::uniform_int_distribution d1(min_size, max_size); std::uniform_int_distribution d2(0, 255); http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b6f3caeb/tools/parquet-scan.cc ---------------------------------------------------------------------- diff --git a/tools/parquet-scan.cc b/tools/parquet-scan.cc index 8ab15a4..5bf2b18 100644 --- a/tools/parquet-scan.cc +++ b/tools/parquet-scan.cc @@ -49,7 +49,9 @@ int main(int argc, char** argv) { } } else if ((param = std::strstr(argv[i], BATCH_SIZE_PREFIX.c_str()))) { value = std::strtok(param + BATCH_SIZE_PREFIX.length(), " "); - if (value) { batch_size = std::atoi(value); } + if (value) { + batch_size = std::atoi(value); + } } else { filename = argv[i]; } @@ -84,8 +86,9 @@ int main(int argc, char** argv) { int64_t values_read = 0; while (col_reader->HasNext()) { - total_rows[col] += ScanAllValues(batch_size, def_levels.data(), - rep_levels.data(), values.data(), &values_read, col_reader.get()); + total_rows[col] += + ScanAllValues(batch_size, def_levels.data(), rep_levels.data(), + values.data(), &values_read, col_reader.get()); } col++; }