parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From b...@apache.org
Subject parquet-mr git commit: PARQUET-290: Add data model to Avro reader builder
Date Thu, 02 Jul 2015 00:30:34 GMT
Repository: parquet-mr
Updated Branches:
  refs/heads/master 2f2c8b1cc -> c334a1bca


PARQUET-290: Add data model to Avro reader builder

This PR currently includes #203, which will be removed when it is merged.

Author: Ryan Blue <blue@apache.org>

Closes #204 from rdblue/PARQUET-290-data-model-builder and squashes the following commits:

d257a2c [Ryan Blue] PARQUET-290: Add Avro data model to reader builder.


Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/c334a1bc
Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/c334a1bc
Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/c334a1bc

Branch: refs/heads/master
Commit: c334a1bca8338c92e76f0f1cf2ef4884e3eb5dbd
Parents: 2f2c8b1
Author: Ryan Blue <blue@apache.org>
Authored: Wed Jul 1 17:30:29 2015 -0700
Committer: Ryan Blue <blue@apache.org>
Committed: Wed Jul 1 17:30:29 2015 -0700

----------------------------------------------------------------------
 .../apache/parquet/avro/AvroParquetReader.java  | 48 +++++++++++++++++++-
 .../apache/parquet/avro/AvroReadSupport.java    | 14 +++++-
 .../parquet/avro/SpecificDataSupplier.java      |  2 +-
 pom.xml                                         |  2 +
 4 files changed, 63 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/c334a1bc/parquet-avro/src/main/java/org/apache/parquet/avro/AvroParquetReader.java
----------------------------------------------------------------------
diff --git a/parquet-avro/src/main/java/org/apache/parquet/avro/AvroParquetReader.java b/parquet-avro/src/main/java/org/apache/parquet/avro/AvroParquetReader.java
index c4a010c..0433bb9 100644
--- a/parquet-avro/src/main/java/org/apache/parquet/avro/AvroParquetReader.java
+++ b/parquet-avro/src/main/java/org/apache/parquet/avro/AvroParquetReader.java
@@ -20,11 +20,14 @@ package org.apache.parquet.avro;
 
 import java.io.IOException;
 
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.specific.SpecificData;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 
 import org.apache.parquet.filter.UnboundRecordFilter;
 import org.apache.parquet.hadoop.ParquetReader;
+import org.apache.parquet.hadoop.api.ReadSupport;
 
 /**
  * Read Avro records from a Parquet file.
@@ -32,7 +35,7 @@ import org.apache.parquet.hadoop.ParquetReader;
 public class AvroParquetReader<T> extends ParquetReader<T> {
 
   public static <T> Builder<T> builder(Path file) {
-    return ParquetReader.builder(new AvroReadSupport<T>(), file);
+    return new Builder<T>(file);
   }
 
   /**
@@ -66,4 +69,47 @@ public class AvroParquetReader<T> extends ParquetReader<T>
{
   public AvroParquetReader(Configuration conf, Path file, UnboundRecordFilter unboundRecordFilter)
throws IOException {
     super(conf, file, new AvroReadSupport<T>(), unboundRecordFilter);
   }
+
+  public static class Builder<T> extends ParquetReader.Builder {
+
+    private GenericData model = null;
+    private boolean enableCompatibility = true;
+    private boolean isReflect = true;
+
+    private Builder(Path path) {
+      super(path);
+    }
+
+    public Builder<T> withDataModel(GenericData model) {
+      this.model = model;
+
+      // only generic and specific are supported by AvroIndexedRecordConverter
+      if (model.getClass() != GenericData.class &&
+          model.getClass() != SpecificData.class) {
+        isReflect = true;
+      }
+
+      return this;
+    }
+
+    public Builder<T> disableCompatibility() {
+      this.enableCompatibility = false;
+      return this;
+    }
+
+    public Builder<T> withCompatibility(boolean enableCompatibility) {
+      this.enableCompatibility = enableCompatibility;
+      return this;
+    }
+
+    @Override
+    protected ReadSupport<T> getReadSupport() {
+      if (isReflect) {
+        conf.setBoolean(AvroReadSupport.AVRO_COMPATIBILITY, false);
+      } else {
+        conf.setBoolean(AvroReadSupport.AVRO_COMPATIBILITY, enableCompatibility);
+      }
+      return new AvroReadSupport<T>(model);
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/c334a1bc/parquet-avro/src/main/java/org/apache/parquet/avro/AvroReadSupport.java
----------------------------------------------------------------------
diff --git a/parquet-avro/src/main/java/org/apache/parquet/avro/AvroReadSupport.java b/parquet-avro/src/main/java/org/apache/parquet/avro/AvroReadSupport.java
index 9036a63..e73e8af 100644
--- a/parquet-avro/src/main/java/org/apache/parquet/avro/AvroReadSupport.java
+++ b/parquet-avro/src/main/java/org/apache/parquet/avro/AvroReadSupport.java
@@ -68,6 +68,15 @@ public class AvroReadSupport<T> extends ReadSupport<T> {
     configuration.set(AVRO_DATA_SUPPLIER, clazz.getName());
   }
 
+  private GenericData model = null;
+
+  public AvroReadSupport() {
+  }
+
+  public AvroReadSupport(GenericData model) {
+    this.model = model;
+  }
+
   @Override
   public ReadContext init(Configuration configuration,
                           Map<String, String> keyValueMetaData,
@@ -130,7 +139,10 @@ public class AvroReadSupport<T> extends ReadSupport<T> {
         parquetSchema, avroSchema, model);
   }
 
-  private static GenericData getDataModel(Configuration conf) {
+  private GenericData getDataModel(Configuration conf) {
+    if (model != null) {
+      return model;
+    }
     Class<? extends AvroDataSupplier> suppClass = conf.getClass(
         AVRO_DATA_SUPPLIER, SpecificDataSupplier.class, AvroDataSupplier.class);
     return ReflectionUtils.newInstance(suppClass, conf).get();

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/c334a1bc/parquet-avro/src/main/java/org/apache/parquet/avro/SpecificDataSupplier.java
----------------------------------------------------------------------
diff --git a/parquet-avro/src/main/java/org/apache/parquet/avro/SpecificDataSupplier.java
b/parquet-avro/src/main/java/org/apache/parquet/avro/SpecificDataSupplier.java
index 2bd2599..c9ed577 100644
--- a/parquet-avro/src/main/java/org/apache/parquet/avro/SpecificDataSupplier.java
+++ b/parquet-avro/src/main/java/org/apache/parquet/avro/SpecificDataSupplier.java
@@ -21,7 +21,7 @@ package org.apache.parquet.avro;
 import org.apache.avro.generic.GenericData;
 import org.apache.avro.specific.SpecificData;
 
-class SpecificDataSupplier implements AvroDataSupplier {
+public class SpecificDataSupplier implements AvroDataSupplier {
   @Override
   public GenericData get() {
     return SpecificData.get();

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/c334a1bc/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index d48021c..177c649 100644
--- a/pom.xml
+++ b/pom.xml
@@ -230,6 +230,8 @@
                      <exclude>org/apache/parquet/Version</exclude>
                      <exclude>org/apache/parquet/schema/**</exclude> <!--
methods moved to new superclass -->
                      <exclude>org/apache/parquet/thrift/ThriftSchemaConvertVisitor</exclude>
<!-- not public -->
+                     <exclude>org/apache/parquet/avro/AvroParquetReader</exclude>
<!-- returns subclass of old return class -->
+                     <exclude>org/apache/parquet/avro/SpecificDataSupplier</exclude>
<!-- made public -->
                    </excludes>
                  </requireBackwardCompatibility>
                </rules>


Mime
View raw message