parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From lu...@apache.org
Subject parquet-mr git commit: PARQUET-175 reading custom protobuf class
Date Thu, 30 Apr 2015 10:39:29 GMT
Repository: parquet-mr
Updated Branches:
  refs/heads/master 9993450ad -> 98f54c158


PARQUET-175 reading custom protobuf class

 Changes to be committed:
	modified:   parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoReadSupport.java
	modified:   parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoInputOutputFormatTest.java
	modified:   parquet-protobuf/src/test/resources/TestProtobuf.proto

Author: Nalezenec, Lukas <lukas.nalezenec@gmail.com>

Closes #183 from lukasnalezenec/master and squashes the following commits:

796cd39 [Nalezenec, Lukas] PARQUET-175 Allow setting of a custom protobuf class when reading
parquet file using parquet-protobuf.


Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/98f54c15
Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/98f54c15
Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/98f54c15

Branch: refs/heads/master
Commit: 98f54c158acb12a26fa6f335b1665accd2aed347
Parents: 9993450
Author: Nalezenec, Lukas <lukas.nalezenec@gmail.com>
Authored: Thu Apr 30 12:33:56 2015 +0200
Committer: Nalezenec, Lukas <lukas.nalezenec@gmail.com>
Committed: Thu Apr 30 12:33:56 2015 +0200

----------------------------------------------------------------------
 .../apache/parquet/proto/ProtoReadSupport.java  | 24 ++++++++++--
 .../proto/ProtoInputOutputFormatTest.java       | 41 ++++++++++++++++----
 .../src/test/resources/TestProtobuf.proto       | 10 +++++
 3 files changed, 63 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/98f54c15/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoReadSupport.java
----------------------------------------------------------------------
diff --git a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoReadSupport.java
b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoReadSupport.java
index bc7f4d2..e6921db 100644
--- a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoReadSupport.java
+++ b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoReadSupport.java
@@ -46,6 +46,16 @@ public class ProtoReadSupport<T extends Message> extends ReadSupport<T>
{
     configuration.set(PB_REQUESTED_PROJECTION, requestedProjection);
   }
 
+  /**
+   * Set name of protobuf class to be used for reading data.
+   * If no class is set, value from file header is used.
+   * Note that the value in header is present only if the file was written
+   * using parquet-protobuf project, it will fail otherwise.
+   * */
+  public static void setProtobufClass(Configuration configuration, String protobufClass)
{
+    configuration.set(PB_CLASS, protobufClass);
+  }
+
   @Override
   public ReadContext init(InitContext context) {
     String requestedProjectionString = context.getConfiguration().get(PB_REQUESTED_PROJECTION);
@@ -63,16 +73,22 @@ public class ProtoReadSupport<T extends Message> extends ReadSupport<T>
{
 
   @Override
   public RecordMaterializer<T> prepareForRead(Configuration configuration, Map<String,
String> keyValueMetaData, MessageType fileSchema, ReadContext readContext) {
-    String strProtoClass = keyValueMetaData.get(PB_CLASS);
+    String headerProtoClass = keyValueMetaData.get(PB_CLASS);
+    String configuredProtoClass = configuration.get(PB_CLASS);
+
+    if (configuredProtoClass != null) {
+      LOG.debug("Replacing class " + headerProtoClass + " by " + configuredProtoClass);
+      headerProtoClass = configuredProtoClass;
+    }
 
-    if (strProtoClass == null) {
+    if (headerProtoClass == null) {
       throw new RuntimeException("I Need parameter " + PB_CLASS + " with Protocol Buffer
class");
     }
 
-    LOG.debug("Reading data with Protocol Buffer class" + strProtoClass);
+    LOG.debug("Reading data with Protocol Buffer class " + headerProtoClass);
 
     MessageType requestedSchema = readContext.getRequestedSchema();
-    Class<? extends Message> protobufClass = Protobufs.getProtobufClass(strProtoClass);
+    Class<? extends Message> protobufClass = Protobufs.getProtobufClass(headerProtoClass);
     return new ProtoRecordMaterializer(requestedSchema, protobufClass);
   }
 

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/98f54c15/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoInputOutputFormatTest.java
----------------------------------------------------------------------
diff --git a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoInputOutputFormatTest.java
b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoInputOutputFormatTest.java
index 51927f2..5c6ebca 100644
--- a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoInputOutputFormatTest.java
+++ b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoInputOutputFormatTest.java
@@ -19,24 +19,21 @@
 package org.apache.parquet.proto;
 
 import com.google.protobuf.Message;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
-import org.junit.Test;
-import org.apache.parquet.Log;
+import org.apache.parquet.proto.test.TestProtobuf;
+import org.apache.parquet.proto.test.TestProtobuf.FirstCustomClassMessage;
+import org.apache.parquet.proto.test.TestProtobuf.SecondCustomClassMessage;
 import org.apache.parquet.proto.utils.ReadUsingMR;
 import org.apache.parquet.proto.utils.WriteUsingMR;
-import org.apache.parquet.proto.test.TestProtobuf;
+import org.junit.Test;
 
 import java.util.List;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.*;
 
 
 public class ProtoInputOutputFormatTest {
 
-  private static final Log LOG = Log.getLog(ProtoInputOutputFormatTest.class);
-
   /**
    * Writes Protocol Buffer using first MR job, reads written file using
    * second job and compares input and output.
@@ -96,6 +93,34 @@ public class ProtoInputOutputFormatTest {
   }
 
   /**
+   * When user specified protobuffer class in configuration,
+   * It should replace class specified in header.
+   * */
+  @Test
+  public void testCustomProtoClass() throws Exception {
+    FirstCustomClassMessage.Builder inputMessage;
+    inputMessage = FirstCustomClassMessage.newBuilder();
+    inputMessage.setString("writtenString");
+
+    Path outputPath = new WriteUsingMR().write(new Message[]{inputMessage.build()});
+    ReadUsingMR readUsingMR = new ReadUsingMR();
+    String customClass = SecondCustomClassMessage.class.getName();
+    ProtoReadSupport.setProtobufClass(readUsingMR.getConfiguration(), customClass);
+    List<Message> result = readUsingMR.read(outputPath);
+
+    assertEquals(1, result.size());
+    Message msg = result.get(0);
+    assertFalse("Class from header returned.",
+            msg instanceof FirstCustomClassMessage);
+    assertTrue("Custom class was not used",
+            msg instanceof SecondCustomClassMessage);
+
+    String stringValue;
+    stringValue = ((SecondCustomClassMessage) msg).getString();
+    assertEquals("writtenString", stringValue);
+  }
+
+  /**
    * Runs job that writes input to file and then job reading data back.
    */
   public static List<Message> runMRJobs(Message... messages) throws Exception {

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/98f54c15/parquet-protobuf/src/test/resources/TestProtobuf.proto
----------------------------------------------------------------------
diff --git a/parquet-protobuf/src/test/resources/TestProtobuf.proto b/parquet-protobuf/src/test/resources/TestProtobuf.proto
index d0e8845..afa0f63 100644
--- a/parquet-protobuf/src/test/resources/TestProtobuf.proto
+++ b/parquet-protobuf/src/test/resources/TestProtobuf.proto
@@ -126,4 +126,14 @@ message HighIndexMessage {
     repeated int32 repeatedInt = 50000;
 }
 
+//custom proto class - ProtoInputOutputFormatTest
+
+message FirstCustomClassMessage {
+    optional string string = 11;
+}
+
+message SecondCustomClassMessage {
+    optional string string = 11;
+}
+
 //please place your unit test Protocol Buffer definitions here.


Mime
View raw message