parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jul...@apache.org
Subject git commit: PARQUET-22: Backport of HIVE-6938 adding rename support for parquet
Date Sat, 19 Jul 2014 01:35:35 GMT
Repository: incubator-parquet-mr
Updated Branches:
  refs/heads/master fb0104896 -> f28423863


PARQUET-22: Backport of HIVE-6938 adding rename support for parquet

This patch was included in hive after the moving the Serde to hive (included in hive 0.14+).
 Backport is required for use with previous versions.

Author: Daniel Weeks <dweeks@netflix.com>

Closes #13 from dcw-netflix/backport-hive-6938-rename and squashes the following commits:

453367b [Daniel Weeks] Backport of HIVE-6938 adding rename support for parquet


Project: http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/commit/f2842386
Tree: http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/tree/f2842386
Diff: http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/diff/f2842386

Branch: refs/heads/master
Commit: f284238631cb1026b4977f6f0b7ef342260d35c5
Parents: fb01048
Author: Daniel Weeks <dweeks@netflix.com>
Authored: Fri Jul 18 18:35:12 2014 -0700
Committer: julien <julien@twitter.com>
Committed: Fri Jul 18 18:35:12 2014 -0700

----------------------------------------------------------------------
 .../parquet/read/DataWritableReadSupport.java   | 31 ++++++++++++++++++--
 1 file changed, 28 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/f2842386/parquet-hive/parquet-hive-storage-handler/src/main/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java
----------------------------------------------------------------------
diff --git a/parquet-hive/parquet-hive-storage-handler/src/main/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java
b/parquet-hive/parquet-hive-storage-handler/src/main/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java
index a3a680c..cc85ecb 100644
--- a/parquet-hive/parquet-hive-storage-handler/src/main/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java
+++ b/parquet-hive/parquet-hive-storage-handler/src/main/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java
@@ -45,6 +45,7 @@ public class DataWritableReadSupport extends ReadSupport<ArrayWritable>
{
 
   private static final String TABLE_SCHEMA = "table_schema";
   public static final String HIVE_SCHEMA_KEY = "HIVE_TABLE_SCHEMA";
+  public static final String PARQUET_COLUMN_INDEX_ACCESS = "parquet.column.index.access";
 
   /**
    * From a string which columns names (including hive column), return a list
@@ -93,7 +94,8 @@ public class DataWritableReadSupport extends ReadSupport<ArrayWritable>
{
       for (final Integer idx : indexColumnsWanted) {
         typeListWanted.add(tableSchema.getType(listColumns.get(idx)));
       }
-      requestedSchemaByUser = new MessageType(fileSchema.getName(), typeListWanted);
+      requestedSchemaByUser = resolveSchemaAccess(new MessageType(fileSchema.getName(),
+              typeListWanted), fileSchema, configuration);
 
       return new ReadContext(requestedSchemaByUser, contextMetadata);
     } else {
@@ -121,8 +123,31 @@ public class DataWritableReadSupport extends ReadSupport<ArrayWritable>
{
       throw new IllegalStateException("ReadContext not initialized properly. " +
         "Don't know the Hive Schema.");
     }
-    final MessageType tableSchema = MessageTypeParser.
-        parseMessageType(metadata.get(HIVE_SCHEMA_KEY));
+    final MessageType tableSchema = resolveSchemaAccess(MessageTypeParser.
+        parseMessageType(metadata.get(HIVE_SCHEMA_KEY)), fileSchema, configuration);
+
     return new DataWritableRecordConverter(readContext.getRequestedSchema(), tableSchema);
   }
+
+  /**
+  * Determine the file column names based on the position within the requested columns and
+  * use that as the requested schema.
+  */
+  private MessageType resolveSchemaAccess(MessageType requestedSchema, MessageType fileSchema,
+          Configuration configuration) {
+    if(configuration.getBoolean(PARQUET_COLUMN_INDEX_ACCESS, false)) {
+      final List<String> listColumns = getColumns(configuration.get(IOConstants.COLUMNS));
+
+      List<Type> requestedTypes = new ArrayList<Type>();
+
+      for(Type t : requestedSchema.getFields()) {
+        int index = listColumns.indexOf(t.getName());
+        requestedTypes.add(fileSchema.getType(index));
+      }
+
+      requestedSchema = new MessageType(requestedSchema.getName(), requestedTypes);
+    }
+
+    return requestedSchema;
+  }
 }


Mime
View raw message