parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jul...@apache.org
Subject parquet-mr git commit: PARQUET-668 - Provide option to disable auto crop feature in dump
Date Wed, 03 Aug 2016 21:14:30 GMT
Repository: parquet-mr
Updated Branches:
  refs/heads/master 5c85b8dda -> ea402becc


PARQUET-668 - Provide option to disable auto crop feature in dump

https://issues.apache.org/jira/browse/PARQUET-668

1. Added option `--disable-crop`
2. Updated `README.md` to reflect changes

Author: djhworld <djharperuk@gmail.com>

Closes #358 from djhworld/master and squashes the following commits:

493c3d0 [djhworld] PARQUET-668: Removed usage instructions from README, replaced with --help
flag
696a5e6 [djhworld] PARQUET-668 -> Updated README.md to fix issue in usage string
6cbf59b [djhworld] PARQUET-668 - Provide option to disable auto crop feature in DumpCommand
output


Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/ea402bec
Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/ea402bec
Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/ea402bec

Branch: refs/heads/master
Commit: ea402becca436dc1a8e47ac9385a3db475b49355
Parents: 5c85b8d
Author: djhworld <djharperuk@gmail.com>
Authored: Wed Aug 3 14:14:26 2016 -0700
Committer: Julien Le Dem <julien@dremio.com>
Committed: Wed Aug 3 14:14:26 2016 -0700

----------------------------------------------------------------------
 parquet-tools/README.md                         | 47 +++-----------------
 .../parquet/tools/command/DumpCommand.java      | 31 +++++++++----
 2 files changed, 27 insertions(+), 51 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/ea402bec/parquet-tools/README.md
----------------------------------------------------------------------
diff --git a/parquet-tools/README.md b/parquet-tools/README.md
index d60e1b4..49506f3 100644
--- a/parquet-tools/README.md
+++ b/parquet-tools/README.md
@@ -61,50 +61,13 @@ java jar ./parquet-tools-<VERSION>.jar <command> my_parquet_file.lzo.parquet
 
 ## Commands Usage
 
-To run it on hadoop, you should use "hadoop jar" instead of "java jar"
+To see usage instructions for all commands: 
 
-```sh
-usage: java -jar ./parquet-tools-<VERSION>.jar cat [option...] <input>
-where option is one of:
-       --debug     Disable color output even if supported
-    -h,--help      Show this help string
-       --no-color  Disable color output even if supported
-where <input> is the parquet file to print to stdout
-
-usage: java -jar ./parquet-tools-<VERSION>.jar head [option...] <input>
-where option is one of:
-       --debug          Disable color output even if supported
-    -h,--help           Show this help string
-    -n,--records <arg>  The number of records to show (default: 5)
-       --no-color       Disable color output even if supported
-where <input> is the parquet file to print to stdout
-
-usage: java -jar ./parquet-tools-<VERSION>.jar schema [option...] <input>
-where option is one of:
-    -d,--detailed <arg>  Show detailed information about the schema.
-       --debug           Disable color output even if supported
-    -h,--help            Show this help string
-       --no-color        Disable color output even if supported
-where <input> is the parquet file containing the schema to show
-
-usage: java -jar ./parquet-tools-<VERSION>.jar meta [option...] <input>
-where option is one of:
-       --debug     Disable color output even if supported
-    -h,--help      Show this help string
-       --no-color  Disable color output even if supported
-where <input> is the parquet file to print to stdout
-
-usage: java -jar dump [option...] <input>
-where option is one of:
-    -c,--column <arg>  Dump only the given column, can be specified more than
-                       once
-    -d,--disable-data  Do not dump column data
-       --debug         Disable color output even if supported
-    -h,--help          Show this help string
-    -m,--disable-meta  Do not dump row group and page metadata
-       --no-color      Disable color output even if supported
-where <input> is the parquet file to print to stdout
 ```
+java jar ./parquet-tools-<VERSION>.jar --help
+```
+
+**Note:** To run it on hadoop, you should use `hadoop jar` instead of `java jar`
 
 ## Meta Legend
 

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/ea402bec/parquet-tools/src/main/java/org/apache/parquet/tools/command/DumpCommand.java
----------------------------------------------------------------------
diff --git a/parquet-tools/src/main/java/org/apache/parquet/tools/command/DumpCommand.java
b/parquet-tools/src/main/java/org/apache/parquet/tools/command/DumpCommand.java
index 6d5e106..c4ed407 100644
--- a/parquet-tools/src/main/java/org/apache/parquet/tools/command/DumpCommand.java
+++ b/parquet-tools/src/main/java/org/apache/parquet/tools/command/DumpCommand.java
@@ -85,6 +85,10 @@ public class DumpCommand extends ArgsOnlyCommand {
                                  .withDescription("Do not dump column data")
                                  .create('d');
 
+        Option nocrop = OptionBuilder.withLongOpt("disable-crop")
+                                 .withDescription("Do not crop the output based on console
width")
+                                 .create('n');
+
         Option cl = OptionBuilder.withLongOpt("column")
                                  .withDescription("Dump only the given column, can be specified
more than once")
                                  .hasArgs()
@@ -92,6 +96,7 @@ public class DumpCommand extends ArgsOnlyCommand {
 
         OPTIONS.addOption(md);
         OPTIONS.addOption(dt);
+        OPTIONS.addOption(nocrop);
         OPTIONS.addOption(cl);
     }
 
@@ -122,17 +127,9 @@ public class DumpCommand extends ArgsOnlyCommand {
         ParquetMetadata metaData = ParquetFileReader.readFooter(conf, inpath, NO_FILTER);
         MessageType schema = metaData.getFileMetaData().getSchema();
 
-        PrettyPrintWriter out = PrettyPrintWriter.stdoutPrettyPrinter()
-                                                 .withAutoColumn()
-                                                 .withAutoCrop()
-                                                 .withWhitespaceHandler(WhiteSpaceHandler.ELIMINATE_NEWLINES)
-                                                 .withColumnPadding(1)
-                                                 .withMaxBufferedLines(1000000)
-                                                 .withFlushOnTab()
-                                                 .build();
-
         boolean showmd = !options.hasOption('m');
         boolean showdt = !options.hasOption('d');
+        boolean cropoutput = !options.hasOption('n');
 
         Set<String> showColumns = null;
         if (options.hasOption('c')) {
@@ -140,6 +137,7 @@ public class DumpCommand extends ArgsOnlyCommand {
             showColumns = new HashSet<String>(Arrays.asList(cols));
         }
 
+        PrettyPrintWriter out = prettyPrintWriter(cropoutput);
         dump(out, metaData, schema, inpath, showmd, showdt, showColumns);
     }
 
@@ -346,6 +344,21 @@ public class DumpCommand extends ArgsOnlyCommand {
         return new BigInteger(data);
     }
 
+    private static PrettyPrintWriter prettyPrintWriter(boolean cropOutput) {
+        PrettyPrintWriter.Builder builder = PrettyPrintWriter.stdoutPrettyPrinter()
+                .withAutoColumn()
+                .withWhitespaceHandler(WhiteSpaceHandler.ELIMINATE_NEWLINES)
+                .withColumnPadding(1)
+                .withMaxBufferedLines(1000000)
+                .withFlushOnTab();
+
+        if (cropOutput) {
+            builder.withAutoCrop();
+        }
+
+        return builder.build();
+    }
+
     private static final class DumpGroupConverter extends GroupConverter {
         @Override public void start() { }
         @Override public void end() { }


Mime
View raw message