parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dwe...@apache.org
Subject parquet-mr git commit: PARQUET-266: Add support for lists of primitives to Pig schema converter
Date Fri, 05 Jun 2015 17:33:17 GMT
Repository: parquet-mr
Updated Branches:
  refs/heads/master 918609f2c -> 2e62764c0


PARQUET-266: Add support for lists of primitives to Pig schema converter

Author: Christian Rolf <christian.rolf@adello.com>

Closes #209 from ccrolf/PigPrimitivesList and squashes the following commits:

5a69273 [Christian Rolf] Add support for lists of primitives to Pig schema converter


Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/2e62764c
Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/2e62764c
Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/2e62764c

Branch: refs/heads/master
Commit: 2e62764c0c386632e87ee8d12d0505848df1015e
Parents: 918609f
Author: Christian Rolf <christian.rolf@adello.com>
Authored: Fri Jun 5 10:32:54 2015 -0700
Committer: Daniel Weeks <dweeks@netflix.com>
Committed: Fri Jun 5 10:32:54 2015 -0700

----------------------------------------------------------------------
 .../apache/parquet/pig/PigSchemaConverter.java  |  9 ++++--
 .../parquet/pig/TestPigSchemaConverter.java     | 33 ++++++++++++++++++++
 2 files changed, 40 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/2e62764c/parquet-pig/src/main/java/org/apache/parquet/pig/PigSchemaConverter.java
----------------------------------------------------------------------
diff --git a/parquet-pig/src/main/java/org/apache/parquet/pig/PigSchemaConverter.java b/parquet-pig/src/main/java/org/apache/parquet/pig/PigSchemaConverter.java
index 87a9841..e3e4b53 100644
--- a/parquet-pig/src/main/java/org/apache/parquet/pig/PigSchemaConverter.java
+++ b/parquet-pig/src/main/java/org/apache/parquet/pig/PigSchemaConverter.java
@@ -61,6 +61,7 @@ import org.apache.parquet.schema.Type.Repetition;
  */
 public class PigSchemaConverter {
   private static final Log LOG = Log.getLog(PigSchemaConverter.class);
+  static final String ARRAY_VALUE_NAME = "value";
   private ColumnAccess columnAccess;
 
   public PigSchemaConverter() {
@@ -288,8 +289,12 @@ public class PigSchemaConverter {
         s.getField(0).alias = null;
         return new FieldSchema(fieldName, s, DataType.MAP);
       case LIST:
-        if (parquetGroupType.getFieldCount()!= 1 || parquetGroupType.getType(0).isPrimitive())
{
-          throw new SchemaConversionException("Invalid list type " + parquetGroupType );
+        Type type = parquetGroupType.getType(0);
+        if (parquetGroupType.getFieldCount()!= 1 || type.isPrimitive()) {
+          // an array is effectively a bag
+          Schema primitiveSchema = new Schema(getSimpleFieldSchema(parquetGroupType.getFieldName(0),
type));
+          Schema tupleSchema = new Schema(new FieldSchema(ARRAY_VALUE_NAME, primitiveSchema,
DataType.TUPLE));
+          return new FieldSchema(fieldName, tupleSchema, DataType.BAG);
         }
         GroupType tupleType = parquetGroupType.getType(0).asGroupType();
         if (!tupleType.isRepetition(Repetition.REPEATED)) {

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/2e62764c/parquet-pig/src/test/java/org/apache/parquet/pig/TestPigSchemaConverter.java
----------------------------------------------------------------------
diff --git a/parquet-pig/src/test/java/org/apache/parquet/pig/TestPigSchemaConverter.java
b/parquet-pig/src/test/java/org/apache/parquet/pig/TestPigSchemaConverter.java
index 8b61b4b..7641232 100644
--- a/parquet-pig/src/test/java/org/apache/parquet/pig/TestPigSchemaConverter.java
+++ b/parquet-pig/src/test/java/org/apache/parquet/pig/TestPigSchemaConverter.java
@@ -33,6 +33,11 @@ import java.util.Set;
 import org.apache.parquet.schema.PrimitiveType;
 import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
 import org.apache.parquet.schema.Type;
+import org.apache.parquet.schema.GroupType;
+import org.apache.parquet.schema.OriginalType;
+import org.apache.parquet.schema.PrimitiveType;
+import org.apache.parquet.schema.Type;
+import org.apache.parquet.schema.Types;
 import org.apache.pig.impl.logicalLayer.schema.Schema;
 import org.apache.pig.impl.util.Utils;
 import org.junit.Test;
@@ -77,6 +82,23 @@ public class TestPigSchemaConverter {
     testPigConversion("a:map[{bag: (a:int)}]");
   }
 
+  @Test
+  public void testListsOfPrimitive() throws Exception {
+    for (Type.Repetition repetition : Type.Repetition.values()) {
+      for (Type.Repetition valueRepetition : Type.Repetition.values()) {
+        for (PrimitiveType.PrimitiveTypeName primitiveTypeName : PrimitiveType.PrimitiveTypeName.values())
{
+          if (primitiveTypeName != PrimitiveType.PrimitiveTypeName.INT96) { // INT96 is NYI
+            Types.PrimitiveBuilder<PrimitiveType> value = Types.primitive(primitiveTypeName,
valueRepetition);
+            if (primitiveTypeName == PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY)
+              value.length(1);
+            GroupType type = Types.buildGroup(repetition).addField(value.named("b")).as(OriginalType.LIST).named("a");
+            pigSchemaConverter.convertField(type); // no exceptions, please
+          }
+        }
+      }
+    }
+  }
+
   private void testConversion(String pigSchemaString, String schemaString) throws Exception
{
     Schema pigSchema = Utils.getSchemaFromString(pigSchemaString);
     MessageType schema = pigSchemaConverter.convert(pigSchema);
@@ -182,6 +204,17 @@ public class TestPigSchemaConverter {
         "  }\n" +
         "}\n");
   }
+
+  @Test
+  public void testListOfPrimitiveIsABag() throws Exception {
+    testFixedConversion(
+        "message pig_schema {\n" +
+        "  optional group a (LIST) {\n" +
+        "    repeated binary b (UTF8);\n" +
+        "  }\n" +
+        "}\n",
+        "a:{" + PigSchemaConverter.ARRAY_VALUE_NAME + ":(b: chararray)}");
+  }
   
   private void testFixedConversion(String schemaString, String pigSchemaString)
       throws Exception {


Mime
View raw message