parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From u..@apache.org
Subject parquet-cpp git commit: PARQUET-778: Standardize the schema output to match the parquet-mr format
Date Fri, 18 Nov 2016 17:46:00 GMT
Repository: parquet-cpp
Updated Branches:
  refs/heads/master 309ff6cde -> 3e0e5da1c


PARQUET-778:  Standardize the schema output to match the parquet-mr format

- root node name is preceded by 'message'
- byte_array type is named 'binary'
- column entries end with a semicolon
- add logical type output

Author: Mike Trinkala <trink@acm.org>

Closes #192 from trink/standardize_schema and squashes the following commits:

30ea22f [Mike Trinkala] Incorporate review feedback
fce684c [Mike Trinkala] Standardize the schema output to match the parquet-mr format


Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/3e0e5da1
Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/3e0e5da1
Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/3e0e5da1

Branch: refs/heads/master
Commit: 3e0e5da1c329dfbc62c673140ee5f87d8ff12443
Parents: 309ff6c
Author: Mike Trinkala <trink@acm.org>
Authored: Fri Nov 18 18:45:38 2016 +0100
Committer: Uwe L. Korn <uwelk@xhochy.com>
Committed: Fri Nov 18 18:45:38 2016 +0100

----------------------------------------------------------------------
 src/parquet/schema/printer.cc             | 30 ++++++++++++++++++++++----
 src/parquet/schema/schema-printer-test.cc | 14 +++++++-----
 2 files changed, 35 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/3e0e5da1/src/parquet/schema/printer.cc
----------------------------------------------------------------------
diff --git a/src/parquet/schema/printer.cc b/src/parquet/schema/printer.cc
index b190398..c4ab3e7 100644
--- a/src/parquet/schema/printer.cc
+++ b/src/parquet/schema/printer.cc
@@ -83,7 +83,7 @@ static void PrintType(const PrimitiveNode* node, std::ostream& stream)
{
       stream << "double";
       break;
     case Type::BYTE_ARRAY:
-      stream << "byte_array";
+      stream << "binary";
       break;
     case Type::FIXED_LEN_BYTE_ARRAY:
       stream << "fixed_len_byte_array(" << node->type_length() << ")";
@@ -93,16 +93,38 @@ static void PrintType(const PrimitiveNode* node, std::ostream& stream)
{
   }
 }
 
+static void PrintLogicalType(const PrimitiveNode* node, std::ostream& stream) {
+  auto lt = node->logical_type();
+  if (lt == LogicalType::DECIMAL) {
+    stream << " (" <<  LogicalTypeToString(lt) << "(" <<
+      node->decimal_metadata().precision << "," <<
+      node->decimal_metadata().scale << "))";
+  } else if (lt != LogicalType::NONE) {
+    stream << " (" << LogicalTypeToString(lt) << ")";
+  }
+}
+
 void SchemaPrinter::Visit(const PrimitiveNode* node) {
   PrintRepLevel(node->repetition(), stream_);
   stream_ << " ";
   PrintType(node, stream_);
-  stream_ << " " << node->name() << std::endl;
+  stream_ << " " << node->name();
+  PrintLogicalType(node, stream_);
+  stream_ << ";" << std::endl;
 }
 
 void SchemaPrinter::Visit(const GroupNode* node) {
-  PrintRepLevel(node->repetition(), stream_);
-  stream_ << " group " << node->name() << " {" << std::endl;
+  if (!node->parent()) {
+    stream_ << "message " << node->name() << " {" << std::endl;
+  } else {
+    PrintRepLevel(node->repetition(), stream_);
+    stream_ << " group " << node->name();
+    auto lt = node->logical_type();
+    if (lt != LogicalType::NONE) {
+      stream_ << " (" << LogicalTypeToString(lt) << ")";
+    }
+    stream_  << " {" << std::endl;
+  }
 
   indent_ += indent_width_;
   for (int i = 0; i < node->field_count(); ++i) {

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/3e0e5da1/src/parquet/schema/schema-printer-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/schema/schema-printer-test.cc b/src/parquet/schema/schema-printer-test.cc
index 286aea9..e594f6f 100644
--- a/src/parquet/schema/schema-printer-test.cc
+++ b/src/parquet/schema/schema-printer-test.cc
@@ -51,17 +51,21 @@ TEST(TestSchemaPrinter, Examples) {
   NodePtr bag(GroupNode::Make("bag", Repetition::OPTIONAL, {list}));
   fields.push_back(bag);
 
+  fields.push_back(PrimitiveNode::Make("c", Repetition::REQUIRED, Type::INT32,
+                                       LogicalType::DECIMAL, -1, 3, 2));
+
   NodePtr schema = GroupNode::Make("schema", Repetition::REPEATED, fields);
 
   std::string result = Print(schema);
-  std::string expected = R"(repeated group schema {
-  required int32 a
+  std::string expected = R"(message schema {
+  required int32 a;
   optional group bag {
-    repeated group b {
-      optional int64 item1
-      required boolean item2
+    repeated group b (LIST) {
+      optional int64 item1;
+      required boolean item2;
     }
   }
+  required int32 c (DECIMAL(3,2));
 }
 )";
   ASSERT_EQ(expected, result);


Mime
View raw message