parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jul...@apache.org
Subject incubator-parquet-mr git commit: PARQUET-122: make task side metadata true by default
Date Fri, 07 Nov 2014 19:02:39 GMT
Repository: incubator-parquet-mr
Updated Branches:
  refs/heads/master f1da5e927 -> 92e6d7160


PARQUET-122: make task side metadata true by default

Author: julien <julien@twitter.com>

Closes #78 from julienledem/task_side_metadata_default_true and squashes the following commits:

32451a7 [julien] make task side metadata true by default


Project: http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/commit/92e6d716
Tree: http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/tree/92e6d716
Diff: http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/diff/92e6d716

Branch: refs/heads/master
Commit: 92e6d716069686583b852a6dcf12af986d6dc694
Parents: f1da5e9
Author: julien <julien@twitter.com>
Authored: Fri Nov 7 11:02:27 2014 -0800
Committer: julien <julien@twitter.com>
Committed: Fri Nov 7 11:02:27 2014 -0800

----------------------------------------------------------------------
 .../main/java/parquet/hadoop/ParquetInputFormat.java | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/92e6d716/parquet-hadoop/src/main/java/parquet/hadoop/ParquetInputFormat.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/main/java/parquet/hadoop/ParquetInputFormat.java b/parquet-hadoop/src/main/java/parquet/hadoop/ParquetInputFormat.java
index d79ca51..47ac7ea 100644
--- a/parquet-hadoop/src/main/java/parquet/hadoop/ParquetInputFormat.java
+++ b/parquet-hadoop/src/main/java/parquet/hadoop/ParquetInputFormat.java
@@ -15,6 +15,7 @@
  */
 package parquet.hadoop;
 
+import static java.lang.Boolean.TRUE;
 import static parquet.Preconditions.checkArgument;
 
 import java.io.IOException;
@@ -55,7 +56,6 @@ import parquet.hadoop.api.ReadSupport;
 import parquet.hadoop.api.ReadSupport.ReadContext;
 import parquet.hadoop.metadata.BlockMetaData;
 import parquet.hadoop.metadata.ColumnChunkMetaData;
-import parquet.hadoop.metadata.FileMetaData;
 import parquet.hadoop.metadata.GlobalMetaData;
 import parquet.hadoop.metadata.ParquetMetadata;
 import parquet.hadoop.util.ConfigurationUtil;
@@ -73,6 +73,12 @@ import parquet.schema.MessageTypeParser;
  * The requestedSchema will control how the original records get projected by the loader.
  * It must be a subset of the original schema. Only the columns needed to reconstruct the
records with the requestedSchema will be scanned.
  *
+ * @see #READ_SUPPORT_CLASS
+ * @see #UNBOUND_RECORD_FILTER
+ * @see #STRICT_TYPE_CHECKING
+ * @see #FILTER_PREDICATE
+ * @see #TASK_SIDE_METADATA
+ *
  * @author Julien Le Dem
  *
  * @param <T> the type of the materialized records
@@ -101,6 +107,11 @@ public class ParquetInputFormat<T> extends FileInputFormat<Void,
T> {
    */
   public static final String FILTER_PREDICATE = "parquet.private.read.filter.predicate";
 
+  /**
+   * key to turn on or off task side metadata loading (default true)
+   * if true then metadata is read on the task side and some tasks may finish immediately.
+   * if false metadata is read on the client which is slower if there is a lot of metadata
but tasks will only be spawn if there is work to do.
+   */
   public static final String TASK_SIDE_METADATA = "parquet.task.side.metadata";
 
   private static final int MIN_FOOTER_CACHE_SIZE = 100;
@@ -110,7 +121,7 @@ public class ParquetInputFormat<T> extends FileInputFormat<Void,
T> {
   }
 
   public static boolean isTaskSideMetaData(Configuration configuration) {
-    return configuration.getBoolean(TASK_SIDE_METADATA, Boolean.FALSE);
+    return configuration.getBoolean(TASK_SIDE_METADATA, TRUE);
   }
 
   public static void setReadSupportClass(Job job,  Class<?> readSupportClass) {


Mime
View raw message