parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject [parquet-cpp] branch master updated: PARQUET-1256: Add --print-key-value-metadata option to parquet_reader tool
Date Fri, 17 Aug 2018 21:52:36 GMT
This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-cpp.git


The following commit(s) were added to refs/heads/master by this push:
     new 991e4a5  PARQUET-1256: Add --print-key-value-metadata option to parquet_reader tool
991e4a5 is described below

commit 991e4a5f362714ca15facbd6b074ea44d756e0d0
Author: Jacek Pliszka <Jacek.Pliszka@gmail.com>
AuthorDate: Fri Aug 17 17:52:29 2018 -0400

    PARQUET-1256: Add --print-key-value-metadata option to parquet_reader tool
    
    This is a minor change useful for debugging.
    
    Now parquet_reader tool has --print-key-value-metadata which when present, dump of key
values of file metadata is done.
    
    Created https://issues.apache.org/jira/browse/PARQUET-1256
    
    Author: Jacek Pliszka <Jacek.Pliszka@gmail.com>
    
    Closes #450 from JacekPliszka/master and squashes the following commits:
    
    0d9a108 [Jacek Pliszka] Added --print-key-value-metadata option to parquet_reader tool
---
 src/parquet/printer.cc  | 14 +++++++++++++-
 src/parquet/printer.h   |  3 ++-
 tools/parquet_reader.cc |  8 ++++++--
 3 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/src/parquet/printer.cc b/src/parquet/printer.cc
index 3f18a5c..9f26a41 100644
--- a/src/parquet/printer.cc
+++ b/src/parquet/printer.cc
@@ -33,13 +33,25 @@ namespace parquet {
 #define COL_WIDTH "30"
 
 void ParquetFilePrinter::DebugPrint(std::ostream& stream, std::list<int> selected_columns,
-                                    bool print_values, const char* filename) {
+                                    bool print_values, bool print_key_value_metadata,
+                                    const char* filename) {
   const FileMetaData* file_metadata = fileReader->metadata().get();
 
   stream << "File Name: " << filename << "\n";
   stream << "Version: " << file_metadata->version() << "\n";
   stream << "Created By: " << file_metadata->created_by() << "\n";
   stream << "Total rows: " << file_metadata->num_rows() << "\n";
+
+  if (print_key_value_metadata) {
+    auto key_value_metadata = file_metadata->key_value_metadata();
+    int64_t size_of_key_value_metadata = key_value_metadata->size();
+    stream << "Key Value File Metadata: " << size_of_key_value_metadata <<
" entries\n";
+    for (int64_t i = 0; i < size_of_key_value_metadata; i++) {
+      stream << " Key nr " << i << " " << key_value_metadata->key(i)
<< ": "
+             << key_value_metadata->value(i) << "\n";
+    }
+  }
+
   stream << "Number of RowGroups: " << file_metadata->num_row_groups() <<
"\n";
   stream << "Number of Real Columns: "
          << file_metadata->schema()->group_node()->field_count() <<
"\n";
diff --git a/src/parquet/printer.h b/src/parquet/printer.h
index 3b82882..1113c3f 100644
--- a/src/parquet/printer.h
+++ b/src/parquet/printer.h
@@ -38,7 +38,8 @@ class PARQUET_EXPORT ParquetFilePrinter {
   ~ParquetFilePrinter() {}
 
   void DebugPrint(std::ostream& stream, std::list<int> selected_columns,
-                  bool print_values = true, const char* fileame = "No Name");
+                  bool print_values = true, bool print_key_value_metadata = false,
+                  const char* filename = "No Name");
 
   void JSONPrint(std::ostream& stream, std::list<int> selected_columns,
                  const char* filename = "No Name");
diff --git a/tools/parquet_reader.cc b/tools/parquet_reader.cc
index 7ef59dc..34bdfc1 100644
--- a/tools/parquet_reader.cc
+++ b/tools/parquet_reader.cc
@@ -24,13 +24,14 @@
 int main(int argc, char** argv) {
   if (argc > 5 || argc < 2) {
     std::cerr << "Usage: parquet_reader [--only-metadata] [--no-memory-map] [--json]"
-                 "[--columns=...] <file>"
+                 "[--print-key-value-metadata] [--columns=...] <file>"
               << std::endl;
     return -1;
   }
 
   std::string filename;
   bool print_values = true;
+  bool print_key_value_metadata = false;
   bool memory_map = true;
   bool format_json = false;
 
@@ -42,6 +43,8 @@ int main(int argc, char** argv) {
   for (int i = 1; i < argc; i++) {
     if ((param = std::strstr(argv[i], "--only-metadata"))) {
       print_values = false;
+    } else if ((param = std::strstr(argv[i], "--print-key-value-metadata"))) {
+      print_key_value_metadata = true;
     } else if ((param = std::strstr(argv[i], "--no-memory-map"))) {
       memory_map = false;
     } else if ((param = std::strstr(argv[i], "--json"))) {
@@ -64,7 +67,8 @@ int main(int argc, char** argv) {
     if (format_json) {
       printer.JSONPrint(std::cout, columns, filename.c_str());
     } else {
-      printer.DebugPrint(std::cout, columns, print_values, filename.c_str());
+      printer.DebugPrint(std::cout, columns, print_values,
+        print_key_value_metadata, filename.c_str());
     }
   } catch (const std::exception& e) {
     std::cerr << "Parquet error: " << e.what() << std::endl;


Mime
View raw message