parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From alexleven...@apache.org
Subject parquet-mr git commit: PARQUET-252 : support nested container type for parquet-scrooge
Date Mon, 04 May 2015 19:08:51 GMT
Repository: parquet-mr
Updated Branches:
  refs/heads/master 7fc799839 -> 890b387d7


PARQUET-252 : support nested container type for parquet-scrooge

resubmit

Author: Tianshuo Deng <tdeng@twitter.com>

Closes #185 from tsdeng/scrooge_nested_container and squashes the following commits:

b29465f [Tianshuo Deng] retrigger jenkins
4542c1a [Tianshuo Deng] support nested container type for parquet-scrooge


Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/890b387d
Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/890b387d
Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/890b387d

Branch: refs/heads/master
Commit: 890b387d713d04c22406db6d5a5fc9b51bec2df5
Parents: 7fc7998
Author: Tianshuo Deng <tdeng@twitter.com>
Authored: Mon May 4 12:08:41 2015 -0700
Committer: Alex Levenson <alexlevenson@twitter.com>
Committed: Mon May 4 12:08:41 2015 -0700

----------------------------------------------------------------------
 changelog.sh                                    |   1 -
 .../parquet/scrooge/ScroogeStructConverter.java | 316 +++++++++++--------
 .../scrooge/ScroogeStructConverterTest.java     | 152 ++++++---
 parquet-scrooge/src/test/thrift/test.thrift     |  93 +++++-
 4 files changed, 373 insertions(+), 189 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/890b387d/changelog.sh
----------------------------------------------------------------------
diff --git a/changelog.sh b/changelog.sh
index af7ad8b..a6eae33 100755
--- a/changelog.sh
+++ b/changelog.sh
@@ -61,4 +61,3 @@ do
     echo "* ISSUE [$PR](https://github.com/Parquet/parquet-mr/pull/$PR): ${DESC}"
   fi
 done
-

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/890b387d/parquet-scrooge/src/main/java/org/apache/parquet/scrooge/ScroogeStructConverter.java
----------------------------------------------------------------------
diff --git a/parquet-scrooge/src/main/java/org/apache/parquet/scrooge/ScroogeStructConverter.java
b/parquet-scrooge/src/main/java/org/apache/parquet/scrooge/ScroogeStructConverter.java
index c112224..98eaf75 100644
--- a/parquet-scrooge/src/main/java/org/apache/parquet/scrooge/ScroogeStructConverter.java
+++ b/parquet-scrooge/src/main/java/org/apache/parquet/scrooge/ScroogeStructConverter.java
@@ -18,24 +18,31 @@
  */
 package org.apache.parquet.scrooge;
 
-import com.twitter.scrooge.ThriftStructCodec;
-import com.twitter.scrooge.ThriftStructFieldInfo;
-import org.apache.parquet.thrift.struct.ThriftField;
-import org.apache.parquet.thrift.struct.ThriftType;
-import org.apache.parquet.thrift.struct.ThriftType.StructType.StructOrUnionType;
-import org.apache.parquet.thrift.struct.ThriftTypeID;
+import java.lang.reflect.Field;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.lang.reflect.ParameterizedType;
+import java.util.ArrayList;
+import java.util.LinkedList;
+import java.util.List;
+
 import scala.collection.JavaConversions;
 import scala.collection.JavaConversions$;
 import scala.collection.Seq;
 import scala.reflect.Manifest;
 
-import java.lang.reflect.*;
-import java.util.ArrayList;
-import java.util.LinkedList;
-import java.util.List;
+import com.twitter.scrooge.ThriftStructCodec;
+import com.twitter.scrooge.ThriftStructFieldInfo;
 
+import org.apache.parquet.thrift.struct.ThriftField;
+import org.apache.parquet.thrift.struct.ThriftType;
+import org.apache.parquet.thrift.struct.ThriftType.StructType.StructOrUnionType;
+import org.apache.parquet.thrift.struct.ThriftTypeID;
 import static org.apache.parquet.thrift.struct.ThriftField.Requirement;
-import static org.apache.parquet.thrift.struct.ThriftField.Requirement.*;
+import static org.apache.parquet.thrift.struct.ThriftField.Requirement.DEFAULT;
+import static org.apache.parquet.thrift.struct.ThriftField.Requirement.REQUIRED;
+import static org.apache.parquet.thrift.struct.ThriftField.Requirement.OPTIONAL;
+
 
 /**
  * Class to convert a scrooge generated class to {@link ThriftType.StructType}. {@link ScroogeReadSupport
} uses this
@@ -47,18 +54,30 @@ public class ScroogeStructConverter {
 
   /**
    * convert a given scrooge generated class to {@link ThriftType.StructType}
-   *
-   * @param scroogeClass
-   * @return
-   * @throws Exception
    */
   public ThriftType.StructType convert(Class scroogeClass) {
     return convertStructFromClass(scroogeClass);
   }
 
+  private static String mapKeyName(String fieldName) {
+    return fieldName + "_map_key";
+  }
+
+  private static String mapValueName(String fieldName) {
+    return fieldName + "_map_value";
+  }
+
+  private static String listElemName(String fieldName) {
+    return fieldName + "_list_elem";
+  }
+
+  private static String setElemName(String fieldName) {
+    return fieldName + "_set_elem";
+  }
+
   private Class getCompanionClass(Class klass) {
     try {
-     return Class.forName(klass.getName() + "$");
+      return Class.forName(klass.getName() + "$");
     } catch (ClassNotFoundException e) {
       throw new ScroogeSchemaConversionException("Can not find companion object for scrooge
class " + klass, e);
     }
@@ -69,9 +88,9 @@ public class ScroogeStructConverter {
   }
 
   private ThriftType.StructType convertCompanionClassToStruct(Class<?> companionClass)
{
-    ThriftStructCodec companionObject = null;
+    ThriftStructCodec<?> companionObject;
     try {
-      companionObject = (ThriftStructCodec<?>)companionClass.getField("MODULE$").get(null);
+      companionObject = (ThriftStructCodec<?>) companionClass.getField("MODULE$").get(null);
     } catch (ReflectiveOperationException e) {
       throw new ScroogeSchemaConversionException("Can not get ThriftStructCodec from companion
object of " + companionClass.getName(), e);
     }
@@ -88,9 +107,9 @@ public class ScroogeStructConverter {
     return new ThriftType.StructType(children, structOrUnionType);
   }
 
-  private Iterable<ThriftStructFieldInfo> getFieldInfos(ThriftStructCodec c) {
+  private Iterable<ThriftStructFieldInfo> getFieldInfos(ThriftStructCodec<?>
c) {
     Class<? extends ThriftStructCodec> klass = c.getClass();
-    if (isUnion(klass)){
+    if (isUnion(klass)) {
       // Union needs special treatment since currently scrooge does not generates the fieldInfos
       // field in the parent union class
       return getFieldInfosForUnion(klass);
@@ -98,9 +117,14 @@ public class ScroogeStructConverter {
       //each struct has a generated fieldInfos method to provide metadata to its fields
       try {
         Object r = klass.getMethod("fieldInfos").invoke(c);
-        Iterable<ThriftStructFieldInfo> a = JavaConversions$.MODULE$.asJavaIterable((scala.collection.Iterable<ThriftStructFieldInfo>)r);
-        return a;
-      } catch (ReflectiveOperationException e) {
+        return JavaConversions$.MODULE$.asJavaIterable((scala.collection.Iterable<ThriftStructFieldInfo>)
r);
+      } catch (ClassCastException e) {
+        throw new ScroogeSchemaConversionException("can not get field Info from: " + c.toString(),
e);
+      } catch (InvocationTargetException e) {
+        throw new ScroogeSchemaConversionException("can not get field Info from: " + c.toString(),
e);
+      } catch (NoSuchMethodException e) {
+        throw new ScroogeSchemaConversionException("can not get field Info from: " + c.toString(),
e);
+      } catch (IllegalAccessException e) {
         throw new ScroogeSchemaConversionException("can not get field Info from: " + c.toString(),
e);
       }
     }
@@ -109,57 +133,25 @@ public class ScroogeStructConverter {
 
   private Iterable<ThriftStructFieldInfo> getFieldInfosForUnion(Class klass) {
     ArrayList<ThriftStructFieldInfo> fields = new ArrayList<ThriftStructFieldInfo>();
-    for(Field f: klass.getDeclaredFields()){
-       if (f.getType().equals(Manifest.class)) {
-         Class unionClass = (Class)((ParameterizedType)f.getGenericType()).getActualTypeArguments()[0];
-         Class companionUnionClass = getCompanionClass(unionClass);
-         try {
-           Object companionUnionObj = companionUnionClass.getField("MODULE$").get(null);
-           ThriftStructFieldInfo info = (ThriftStructFieldInfo)companionUnionClass.getMethod("fieldInfo").invoke(companionUnionObj);
-           fields.add(info);
-         } catch (ReflectiveOperationException e) {
-           throw new ScroogeSchemaConversionException("can not find fiedInfo for " + unionClass,
e);
-         }
+    for (Field f : klass.getDeclaredFields()) {
+      if (f.getType().equals(Manifest.class)) {
+        Class unionClass = (Class) ((ParameterizedType) f.getGenericType()).getActualTypeArguments()[0];
+        Class companionUnionClass = getCompanionClass(unionClass);
+        try {
+          Object companionUnionObj = companionUnionClass.getField("MODULE$").get(null);
+          ThriftStructFieldInfo info = (ThriftStructFieldInfo) companionUnionClass.getMethod("fieldInfo").invoke(companionUnionObj);
+          fields.add(info);
+        } catch (ReflectiveOperationException e) {
+          throw new ScroogeSchemaConversionException("can not find fiedInfo for " + unionClass,
e);
+        }
       }
     }
     return fields;
   }
 
-  private boolean isUnion(Class klass){
-    for(Field f: klass.getDeclaredFields()) {
-         if (f.getName().equals("Union"))
-           return true;
-    }
-    return false;
-  }
-
-
-  private Requirement getRequirementType(ThriftStructFieldInfo f) {
-    if (f.isOptional() && !f.isRequired()) {
-      return OPTIONAL;
-    } else if (f.isRequired() && !f.isOptional()) {
-      return REQUIRED;
-    } else if (!f.isOptional() && !f.isRequired()) {
-      return DEFAULT;
-    } else {
-      throw new ScroogeSchemaConversionException("can not determine requirement type for
: " + f.toString()
-              + ", isOptional=" + f.isOptional() + ", isRequired=" + f.isRequired());
-    }
-  }
 
   /**
-   * Convert thrift field in scrooge to ThriftField in parquet
-   * Use reflection to detect if a field is optional or required since scrooge does not provide
requirement information
-   * in generated classes.
-   * This will not correctly recognize fields that are not specified with a requirement type
eg.
-   * struct Address {
-   * 1: string street
-   * }
-   * street will be identified as "REQUIRED"
-   *
-   * @param scroogeField
-   * @return
-   * @throws Exception
+   * Convert a field in scrooge to ThriftField in parquet
    */
   public ThriftField toThriftField(ThriftStructFieldInfo scroogeField) {
     Requirement requirement = getRequirementType(scroogeField);
@@ -169,70 +161,87 @@ public class ScroogeStructConverter {
     ThriftTypeID typeId = ThriftTypeID.fromByte(thriftTypeByte);
     ThriftType thriftType;
     switch (typeId) {
-    case BOOL:
-      thriftType = new ThriftType.BoolType();
-      break;
-    case BYTE:
-      thriftType = new ThriftType.ByteType();
-      break;
-    case DOUBLE:
-      thriftType = new ThriftType.DoubleType();
-      break;
-    case I16:
-      thriftType = new ThriftType.I16Type();
-      break;
-    case I32:
-      thriftType = new ThriftType.I32Type();
-      break;
-    case I64:
-      thriftType = new ThriftType.I64Type();
-      break;
-    case STRING:
-      thriftType = new ThriftType.StringType();
-      break;
-    case STRUCT:
-      thriftType = convertStructTypeField(scroogeField);
-      break;
-    case MAP:
-      thriftType = convertMapTypeField(scroogeField, requirement);
-      break;
-    case SET:
-      thriftType = convertSetTypeField(scroogeField, requirement);
-      break;
-    case LIST:
-      thriftType = convertListTypeField(scroogeField, requirement);
-      break;
-    case ENUM:
-      thriftType = convertEnumTypeField(scroogeField);
-      break;
-    case STOP:
-    case VOID:
-    default:
-      throw new IllegalArgumentException("can't convert type " + typeId);
+      case BOOL:
+        thriftType = new ThriftType.BoolType();
+        break;
+      case BYTE:
+        thriftType = new ThriftType.ByteType();
+        break;
+      case DOUBLE:
+        thriftType = new ThriftType.DoubleType();
+        break;
+      case I16:
+        thriftType = new ThriftType.I16Type();
+        break;
+      case I32:
+        thriftType = new ThriftType.I32Type();
+        break;
+      case I64:
+        thriftType = new ThriftType.I64Type();
+        break;
+      case STRING:
+        thriftType = new ThriftType.StringType();
+        break;
+      case STRUCT:
+        thriftType = convertStructTypeField(scroogeField);
+        break;
+      case MAP:
+        thriftType = convertMapTypeField(scroogeField, requirement);
+        break;
+      case SET:
+        thriftType = convertSetTypeField(scroogeField, requirement);
+        break;
+      case LIST:
+        thriftType = convertListTypeField(scroogeField, requirement);
+        break;
+      case ENUM:
+        thriftType = convertEnumTypeField(scroogeField);
+        break;
+      case STOP:
+      case VOID:
+      default:
+        throw new IllegalArgumentException("can't convert type " + typeId);
     }
     return new ThriftField(fieldName, fieldId, requirement, thriftType);
   }
 
   private ThriftType convertSetTypeField(ThriftStructFieldInfo f, Requirement requirement)
{
-    ThriftType elementType = convertClassToThriftType(f.valueManifest().get().runtimeClass());
+    return convertSetTypeField(f.tfield().name, f.valueManifest().get(), requirement);
+  }
+
+  private ThriftType convertSetTypeField(String fieldName, Manifest<?> valueManifest,
Requirement requirement) {
+    String elemName = setElemName(fieldName);
+    ThriftType elementType = convertClassToThriftType(elemName, requirement, valueManifest);
     //Set only has one sub-field as element field, therefore using hard-coded 1 as fieldId,
     //it's the same as the solution used in ElephantBird
-    ThriftField elementField = generateFieldWithoutId(f.tfield().name, requirement, elementType);
+    ThriftField elementField = generateFieldWithoutId(elemName, requirement, elementType);
     return new ThriftType.SetType(elementField);
   }
 
   private ThriftType convertListTypeField(ThriftStructFieldInfo f, Requirement requirement)
{
-    ThriftType elementType = convertClassToThriftType(f.valueManifest().get().runtimeClass());
-    ThriftField elementField = generateFieldWithoutId(f.tfield().name, requirement, elementType);
+    return convertListTypeField(f.tfield().name, f.valueManifest().get(), requirement);
+  }
+
+  private ThriftType convertListTypeField(String fieldName, Manifest<?> valueManifest,
Requirement requirement) {
+    String elemName = listElemName(fieldName);
+    ThriftType elementType = convertClassToThriftType(elemName, requirement, valueManifest);
+    ThriftField elementField = generateFieldWithoutId(elemName, requirement, elementType);
     return new ThriftType.ListType(elementField);
   }
 
   private ThriftType convertMapTypeField(ThriftStructFieldInfo f, Requirement requirement)
{
-    ThriftType keyType = convertClassToThriftType(f.keyManifest().get().runtimeClass());
-    ThriftField keyField = generateFieldWithoutId(f.tfield().name + "_map_key", requirement,
keyType);
+    return convertMapTypeField(f.tfield().name, f.keyManifest().get(), f.valueManifest().get(),
requirement);
+  }
 
-    ThriftType valueType = convertClassToThriftType(f.valueManifest().get().runtimeClass());
-    ThriftField valueField = generateFieldWithoutId(f.tfield().name + "_map_value", requirement,
valueType);
+  private ThriftType convertMapTypeField(String fieldName, Manifest<?> keyManifest,
Manifest<?> valueManifest, Requirement requirement) {
+
+    String keyName = mapKeyName(fieldName);
+    String valueName = mapValueName(fieldName);
+    ThriftType keyType = convertClassToThriftType(keyName, requirement, keyManifest);
+    ThriftField keyField = generateFieldWithoutId(keyName, requirement, keyType);
+
+    ThriftType valueType = convertClassToThriftType(valueName, requirement, valueManifest);
+    ThriftField valueField = generateFieldWithoutId(valueName, requirement, valueType);
 
     return new ThriftType.MapType(keyField, valueField);
   }
@@ -240,26 +249,20 @@ public class ScroogeStructConverter {
   /**
    * Generate artificial field, this kind of fields do not have a field ID.
    * To be consistent with the behavior in ElephantBird, here uses 1 as the field ID
-   *
-   * @param fieldName
-   * @param requirement
-   * @param thriftType
-   * @return
    */
   private ThriftField generateFieldWithoutId(String fieldName, Requirement requirement, ThriftType
thriftType) {
-    return new ThriftField(fieldName, (short)1, requirement, thriftType);
+    return new ThriftField(fieldName, (short) 1, requirement, thriftType);
   }
 
   /**
    * In composite types,  such as the type of the key in a map, since we use reflection to
get the type class, this method
    * does conversion based on the class provided.
    *
-   * @param typeClass
-   * @return
-   * @throws Exception
+   * @return converted ThriftType
    */
-  private ThriftType convertClassToThriftType(Class typeClass) {
-    if (typeClass == boolean.class) {
+  private ThriftType convertClassToThriftType(String name, Requirement requirement, Manifest<?>
typeManifest) {
+    Class typeClass = typeManifest.runtimeClass();
+    if (typeManifest.runtimeClass() == boolean.class) {
       return new ThriftType.BoolType();
     } else if (typeClass == byte.class) {
       return new ThriftType.ByteType();
@@ -273,6 +276,19 @@ public class ScroogeStructConverter {
       return new ThriftType.I64Type();
     } else if (typeClass == String.class) {
       return new ThriftType.StringType();
+    } else if (typeClass == scala.collection.Seq.class) {
+      Manifest<?> a = typeManifest.typeArguments().apply(0);
+      return convertListTypeField(name, a, requirement);
+    } else if (typeClass == scala.collection.Set.class) {
+      Manifest<?> setElementManifest = typeManifest.typeArguments().apply(0);
+      return convertSetTypeField(name, setElementManifest, requirement);
+    } else if (typeClass == scala.collection.Map.class) {
+      List<Manifest<?>> ms = JavaConversions.seqAsJavaList(typeManifest.typeArguments());
+      Manifest keyManifest = ms.get(0);
+      Manifest valueManifest = ms.get(1);
+      return convertMapTypeField(name, keyManifest, valueManifest, requirement);
+    } else if (com.twitter.scrooge.ThriftEnum.class.isAssignableFrom(typeClass)) {
+      return convertEnumTypeField(typeClass, name);
     } else {
       return convertStructFromClass(typeClass);
     }
@@ -291,41 +307,65 @@ public class ScroogeStructConverter {
     Object cObject = companionObjectClass.getField("MODULE$").get(null);
     Method listMethod = companionObjectClass.getMethod("list", new Class[]{});
     Object result = listMethod.invoke(cObject, null);
-    return JavaConversions.seqAsJavaList((Seq)result);
+    return JavaConversions.seqAsJavaList((Seq) result);
   }
 
   public ThriftType convertEnumTypeField(ThriftStructFieldInfo f) {
-    List<ThriftType.EnumValue> enumValues = new ArrayList<ThriftType.EnumValue>();
+    return convertEnumTypeField(f.manifest().runtimeClass(), f.tfield().name);
+  }
 
-    String enumName = f.manifest().runtimeClass().getName();
+  private ThriftType convertEnumTypeField(Class enumClass, String fieldName) {
+    List<ThriftType.EnumValue> enumValues = new ArrayList<ThriftType.EnumValue>();
+    String enumName = enumClass.getName();
     try {
       List enumCollection = getEnumList(enumName);
       for (Object enumObj : enumCollection) {
-        ScroogeEnumDesc enumDesc = ScroogeEnumDesc.getEnumDesc(enumObj);
-        //be compatible with thrift generated enum which have capitalized name
+        ScroogeEnumDesc enumDesc = ScroogeEnumDesc.fromEnum(enumObj);
         enumValues.add(new ThriftType.EnumValue(enumDesc.id, enumDesc.originalName));
       }
       return new ThriftType.EnumType(enumValues);
     } catch (ReflectiveOperationException e) {
-      throw new ScroogeSchemaConversionException("Can not convert enum field " + f, e);
+      throw new ScroogeSchemaConversionException("Can not convert enum field " + fieldName,
e);
+    } catch (RuntimeException e) {
+      throw new ScroogeSchemaConversionException("Can not convert enum field " + fieldName,
e);
+    }
+
+  }
+
+  //In scrooge generated class, if a class is a union, then it must have a field called "Union"
+  private boolean isUnion(Class klass) {
+    for (Field f : klass.getDeclaredFields()) {
+      if (f.getName().equals("Union"))
+        return true;
     }
+    return false;
+  }
 
+
+  private Requirement getRequirementType(ThriftStructFieldInfo f) {
+    if (f.isOptional() && !f.isRequired()) {
+      return OPTIONAL;
+    } else if (f.isRequired() && !f.isOptional()) {
+      return REQUIRED;
+    } else if (!f.isOptional() && !f.isRequired()) {
+      return DEFAULT;
+    } else {
+      throw new ScroogeSchemaConversionException("can not determine requirement type for
: " + f.toString()
+          + ", isOptional=" + f.isOptional() + ", isRequired=" + f.isRequired());
+    }
   }
 
   private static class ScroogeEnumDesc {
     private int id;
-    private String name;
     private String originalName;
 
-    public static ScroogeEnumDesc getEnumDesc(Object rawScroogeEnum) throws NoSuchMethodException,
InvocationTargetException, IllegalAccessException {
+    public static ScroogeEnumDesc fromEnum(Object rawScroogeEnum) throws NoSuchMethodException,
InvocationTargetException, IllegalAccessException {
       Class enumClass = rawScroogeEnum.getClass();
       Method valueMethod = enumClass.getMethod("value", new Class[]{});
-      Method nameMethod = enumClass.getMethod("name", new Class[]{});
       Method originalNameMethod = enumClass.getMethod("originalName", new Class[]{});
       ScroogeEnumDesc result = new ScroogeEnumDesc();
-      result.id = (Integer)valueMethod.invoke(rawScroogeEnum, null);
-      result.name = (String)nameMethod.invoke(rawScroogeEnum, null);
-      result.originalName = (String)originalNameMethod.invoke(rawScroogeEnum, null);
+      result.id = (Integer) valueMethod.invoke(rawScroogeEnum, null);
+      result.originalName = (String) originalNameMethod.invoke(rawScroogeEnum, null);
       return result;
     }
   }

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/890b387d/parquet-scrooge/src/test/java/org/apache/parquet/scrooge/ScroogeStructConverterTest.java
----------------------------------------------------------------------
diff --git a/parquet-scrooge/src/test/java/org/apache/parquet/scrooge/ScroogeStructConverterTest.java
b/parquet-scrooge/src/test/java/org/apache/parquet/scrooge/ScroogeStructConverterTest.java
index 7b9f6b0..5037934 100644
--- a/parquet-scrooge/src/test/java/org/apache/parquet/scrooge/ScroogeStructConverterTest.java
+++ b/parquet-scrooge/src/test/java/org/apache/parquet/scrooge/ScroogeStructConverterTest.java
@@ -18,9 +18,22 @@
  */
 package org.apache.parquet.scrooge;
 
+import org.apache.thrift.TBase;
 import org.junit.Test;
 
+import org.apache.parquet.schema.MessageType;
 import org.apache.parquet.scrooge.test.AddressWithStreetWithDefaultRequirement;
+import org.apache.parquet.scrooge.test.ListNestEnum;
+import org.apache.parquet.scrooge.test.ListNestMap;
+import org.apache.parquet.scrooge.test.ListNestSet;
+import org.apache.parquet.scrooge.test.MapNestList;
+import org.apache.parquet.scrooge.test.MapNestMap;
+import org.apache.parquet.scrooge.test.MapNestSet;
+import org.apache.parquet.scrooge.test.NestedList;
+import org.apache.parquet.scrooge.test.SetNestList;
+import org.apache.parquet.scrooge.test.SetNestMap;
+import org.apache.parquet.scrooge.test.SetNestSet;
+import org.apache.parquet.scrooge.test.StringAndBinary;
 import org.apache.parquet.scrooge.test.TestFieldOfEnum;
 import org.apache.parquet.scrooge.test.TestListPrimitive;
 import org.apache.parquet.scrooge.test.TestMapComplex;
@@ -30,94 +43,137 @@ import org.apache.parquet.scrooge.test.TestOptionalMap;
 import org.apache.parquet.scrooge.test.TestPersonWithAllInformation;
 import org.apache.parquet.scrooge.test.TestSetPrimitive;
 import org.apache.parquet.scrooge.test.TestUnion;
-import org.apache.parquet.scrooge.test.StringAndBinary;
 import org.apache.parquet.thrift.ThriftSchemaConverter;
 import org.apache.parquet.thrift.struct.ThriftType;
+
 import static org.junit.Assert.assertEquals;
 
 /**
  * Test convert scrooge schema to Parquet Schema
  */
 public class ScroogeStructConverterTest {
+
+  /**
+   * Convert ThriftStructs from a thrift class and a scrooge class, assert
+   * they are the same
+   * @param scroogeClass
+   */
+  private void shouldConvertConsistentlyWithThriftStructConverter(Class scroogeClass) throws
ClassNotFoundException {
+      Class<? extends TBase<?, ?>> thriftClass = (Class<? extends TBase<?,
?>>)Class.forName(scroogeClass.getName().replaceFirst("org.apache.parquet.scrooge.test",
"org.apache.parquet.thrift.test"));
+      ThriftType.StructType structFromThriftSchemaConverter = new ThriftSchemaConverter().toStructType(thriftClass);
+      ThriftType.StructType structFromScroogeSchemaConverter = new ScroogeStructConverter().convert(scroogeClass);
+
+      assertEquals(toParquetSchema(structFromThriftSchemaConverter), toParquetSchema(structFromScroogeSchemaConverter));
+  }
+
+  private MessageType toParquetSchema(ThriftType.StructType struct) {
+    ThriftSchemaConverter sc = new ThriftSchemaConverter();
+    return sc.convert(struct);
+  }
+
   @Test
-  public void testConvertPrimitiveMapKey() throws Exception{
-    ThriftType.StructType scroogeMap = new ScroogeStructConverter().convert(TestMapPrimitiveKey.class);
-    ThriftType.StructType expected = new ThriftSchemaConverter().toStructType(org.apache.parquet.thrift.test.TestMapPrimitiveKey.class);
-    assertEquals(expected,scroogeMap);
+  public void testConvertPrimitiveMapKey() throws Exception {
+    shouldConvertConsistentlyWithThriftStructConverter(TestMapPrimitiveKey.class);
   }
 
   @Test
   public void testBinary() throws Exception {
-    ThriftType.StructType scroogeBinary = new ScroogeStructConverter().convert(StringAndBinary.class);
-    ThriftType.StructType expected = new ThriftSchemaConverter().toStructType(org.apache.parquet.thrift.test.StringAndBinary.class);
-    assertEquals(expected, scroogeBinary);
+    shouldConvertConsistentlyWithThriftStructConverter(StringAndBinary.class);
   }
 
   @Test
   public void testUnion() throws Exception {
-    ThriftType.StructType expected = new ThriftSchemaConverter().toStructType(org.apache.parquet.thrift.test.TestUnion.class);
-    ThriftType.StructType scroogeUnion = new ScroogeStructConverter().convert(TestUnion.class);
-    assertEquals(expected, scroogeUnion);
+    shouldConvertConsistentlyWithThriftStructConverter(TestUnion.class);
   }
 
   @Test
-  public void testConvertPrimitiveMapValue() throws Exception{
-    ThriftType.StructType scroogeMap = new ScroogeStructConverter().convert(TestMapPrimitiveValue.class);
-    ThriftType.StructType expected = new ThriftSchemaConverter().toStructType(org.apache.parquet.thrift.test.TestMapPrimitiveValue.class);
-    assertEquals(expected,scroogeMap);
+  public void testConvertPrimitiveMapValue() throws Exception {
+    shouldConvertConsistentlyWithThriftStructConverter(TestMapPrimitiveValue.class);
   }
 
   @Test
-  public void testConvertPrimitiveList() throws Exception{
-    ThriftType.StructType scroogeList = new ScroogeStructConverter().convert(TestListPrimitive.class);
-    ThriftType.StructType expected = new ThriftSchemaConverter().toStructType(org.apache.parquet.thrift.test.TestListPrimitive.class);
-    assertEquals(expected, scroogeList);
+  public void testConvertPrimitiveList() throws Exception {
+    shouldConvertConsistentlyWithThriftStructConverter(TestListPrimitive.class);
   }
 
   @Test
-     public void testConvertPrimitiveSet() throws Exception{
-    ThriftType.StructType scroogeList = new ScroogeStructConverter().convert(TestSetPrimitive.class);
-    ThriftType.StructType expected = new ThriftSchemaConverter().toStructType(org.apache.parquet.thrift.test.TestSetPrimitive.class);
-    assertEquals(expected, scroogeList);
+  public void testConvertPrimitiveSet() throws Exception {
+    shouldConvertConsistentlyWithThriftStructConverter(TestSetPrimitive.class);
   }
 
   @Test
-  public void testConvertEnum() throws Exception{
-    ThriftType.StructType scroogeList = new ScroogeStructConverter().convert(TestFieldOfEnum.class);
-    ThriftType.StructType expected = new ThriftSchemaConverter().toStructType(org.apache.parquet.thrift.test.TestFieldOfEnum.class);
-    assertEquals(expected, scroogeList);
+  public void testConvertEnum() throws Exception {
+    shouldConvertConsistentlyWithThriftStructConverter(TestFieldOfEnum.class);
   }
 
   @Test
-  public void testMapComplex() throws Exception{
-    ThriftType.StructType scroogePerson = new ScroogeStructConverter().convert(TestMapComplex.class);
-    ThriftType.StructType expected = new ThriftSchemaConverter().toStructType(org.apache.parquet.thrift.test.TestMapComplex.class);
-    assertEquals(expected, scroogePerson);
+  public void testMapComplex() throws Exception {
+    shouldConvertConsistentlyWithThriftStructConverter(TestMapComplex.class);
   }
 
   @Test
-  public void testConvertStruct() throws Exception{
-    ThriftType.StructType scroogePerson = new ScroogeStructConverter().convert(TestPersonWithAllInformation.class);
-    ThriftType.StructType expected = new ThriftSchemaConverter().toStructType(org.apache.parquet.thrift.test.TestPersonWithAllInformation.class);
-    assertEquals(expected, scroogePerson);
+  public void testConvertStruct() throws Exception {
+    shouldConvertConsistentlyWithThriftStructConverter(TestPersonWithAllInformation.class);
+  }
+
+  @Test
+  public void testDefaultFields() throws Exception {
+    shouldConvertConsistentlyWithThriftStructConverter(AddressWithStreetWithDefaultRequirement.class);
+  }
+
+  @Test
+  public void testConvertOptionalPrimitiveMap() throws Exception {
+    shouldConvertConsistentlyWithThriftStructConverter(TestOptionalMap.class);
+  }
+
+  @Test
+  public void testConvertNestedList() throws Exception {
+    shouldConvertConsistentlyWithThriftStructConverter(NestedList.class);
+  }
+
+  @Test
+  public void testConvertListNestMap() throws Exception {
+    shouldConvertConsistentlyWithThriftStructConverter(ListNestMap.class);
+  }
+
+  @Test
+  public void testConvertListNestEnum() throws Exception {
+    shouldConvertConsistentlyWithThriftStructConverter(ListNestEnum.class);
+  }
+
+  @Test
+  public void testConvertMapNestList() throws Exception {
+    shouldConvertConsistentlyWithThriftStructConverter(MapNestList.class);
+  }
+
+  @Test
+  public void testConvertMapNestMap() throws Exception {
+    shouldConvertConsistentlyWithThriftStructConverter(MapNestMap.class);
   }
 
-/**
- * TODO: DEFAULT requirement can not be identified, since scrooge does not store the requirement
type in generated class
- * Current solution uses reflection based on following rules:
- * if the getter returns option, then it's optional, otherwise it's required
- */
   @Test
-  public void testDefaultFields() throws Exception{
-    ThriftType.StructType scroogePerson = new ScroogeStructConverter().convert(AddressWithStreetWithDefaultRequirement.class);
-    ThriftType.StructType expected = new ThriftSchemaConverter().toStructType(org.apache.parquet.thrift.test.AddressWithStreetWithDefaultRequirement.class);
-    assertEquals(expected.toJSON(), scroogePerson.toJSON());
+  public void testConvertMapNestSet() throws Exception {
+    shouldConvertConsistentlyWithThriftStructConverter(MapNestSet.class);
   }
 
   @Test
-  public void testConvertOptionalPrimitiveMap() throws Exception{
-    ThriftType.StructType scroogeMap = new ScroogeStructConverter().convert(TestOptionalMap.class);
-    ThriftType.StructType expected = new ThriftSchemaConverter().toStructType(org.apache.parquet.thrift.test.TestOptionalMap.class);
-    assertEquals(expected,scroogeMap);
+  public void testConvertListNestSet() throws Exception {
+    shouldConvertConsistentlyWithThriftStructConverter(ListNestSet.class);
   }
+
+  @Test
+  public void testConvertSetNestSet() throws Exception {
+    shouldConvertConsistentlyWithThriftStructConverter(SetNestSet.class);
+  }
+
+  @Test
+  public void testConvertSetNestList() throws Exception {
+    shouldConvertConsistentlyWithThriftStructConverter(SetNestList.class);
+  }
+
+  @Test
+  public void testConvertSetNestMap() throws Exception {
+    shouldConvertConsistentlyWithThriftStructConverter(SetNestMap.class);
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/890b387d/parquet-scrooge/src/test/thrift/test.thrift
----------------------------------------------------------------------
diff --git a/parquet-scrooge/src/test/thrift/test.thrift b/parquet-scrooge/src/test/thrift/test.thrift
index bfb71f6..fd9eed4 100644
--- a/parquet-scrooge/src/test/thrift/test.thrift
+++ b/parquet-scrooge/src/test/thrift/test.thrift
@@ -169,8 +169,97 @@ struct TestFieldOfEnum{
 }
 
 struct StringAndBinary {
-  1: required string s;
-  2: required binary b;
+  1: required string s
+  2: required binary b
+}
+
+#fixture fox nested structures
+struct NestedList {
+  1: required list<list<Address>> rll
+  2: required list<list<list<Address>>> rlll
+  3: optional list<list<Address>> oll
+  4: optional list<list<list<Address>>> olll
+  5: list<list<Address>> ll
+  6: list<list<list<Address>>> lll
+}
+
+struct ListNestMap {
+  1: required list<map<Phone, Address>> rlm
+  2: required list<list<map<Phone, Address>>> rllm
+  3: optional list<map<Phone, Address>> olm
+  4: optional list<list<map<Phone, Address>>> ollm
+  5: list<map<Phone, Address>> lm
+  6: list<list<map<Phone, Address>>> llm
+}
+
+struct ListNestSet {
+   1: required list<set<Address>> rls
+   2: required list<list<set<Address>>> rlls
+   3: optional list<set<Address>> ols
+   4: optional list<list<set<Address>>> olls
+   5: list<set<Address>> ls
+   6: list<list<set<Address>>> lls
+}
+
+struct ListNestEnum {
+   1: required list<Operation> rle
+}
+
+struct MapNestMap {
+  1: required map<map<Phone, Address>, map<Address, Phone>> rmm
+  2: required map<map<map<Phone,Address>, Address>, map<Address, Phone>>
rmmm
+  3: optional map<map<Phone, Address>, map<Address, Phone>> omm
+  4: optional map<map<map<Phone,Address>, Address>, map<Address, Phone>>
ommm
+  5: map<map<Phone, Address>, map<Address, Phone>> mm
+  6: map<map<map<Phone,Address>, Address>, map<Address, Phone>> mmm
+}
+
+struct MapNestList {
+  1: required map<list<Phone>, list<Address>> rml
+  2: required map<list<list<Phone>>, list<list<Address>>> rmll
+  3: optional map<list<Phone>, list<Address>> oml
+  4: optional map<list<list<Phone>>, list<list<Address>>> omll
+  5: map<list<Phone>, list<Address>> ml
+  6: map<list<list<Phone>>, list<list<Address>>> mll
+}
+
+struct MapNestSet {
+  1: required map<set<Phone>, set<Address>> rms
+  2: required map<set<set<Phone>>, set<set<Address>>> rmss
+  3: optional map<set<Phone>, set<Address>> oms
+  4: optional map<set<set<Phone>>, set<set<Address>>> omss
+  5: map<set<Phone>, set<Address>> ms
+  6: map<set<set<Phone>>, set<set<Address>>> mss
+}
+
+struct SetNestSet {
+  1: required set<set<Address>> rss
+  2: required set<set<set<Address>>> rsss
+  3: optional set<set<Address>> oss
+  4: optional set<set<set<Address>>> osss
+  5: set<set<Address>> ss
+  6: set<set<set<Address>>> sss
+}
+
+struct SetNestList {
+   1: required set<list<Address>> rsl
+   2: required set<set<list<Address>>> rssl
+   3: optional set<list<Address>> osl
+   4: optional set<set<list<Address>>> ossl
+   5: set<list<Address>> sl
+   6: set<set<list<Address>>> ssl
+}
+
+struct SetNestMap {
+  1: required set<map<Phone, Address>> rsm
+  2: required set<set<map<Phone, Address>>> rssm
+  3: required set<set<list<list<map<Phone, Address>>>>> rssllm
+  4: optional set<map<Phone, Address>> osm
+  5: optional set<set<map<Phone, Address>>> ossm
+  6: optional set<set<list<list<map<Phone, Address>>>>> ossllm
+  7: set<map<Phone, Address>> sm
+  8: set<set<map<Phone, Address>>> ssm
+  9: set<set<list<list<map<Phone, Address>>>>> ssllm
 }
 
 struct AString {


Mime
View raw message