parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tians...@apache.org
Subject git commit: PARQUET-90: integrate field ids in schema
Date Thu, 25 Sep 2014 18:26:05 GMT
Repository: incubator-parquet-mr
Updated Branches:
  refs/heads/master 0c4f13a84 -> 3a082e8e3


PARQUET-90: integrate field ids in schema

This integrates support for field is that was introduced in Parquet format.
Thrift and Protobufs ids will now be saved in the Parquet schema.

Author: julien <julien@twitter.com>

Closes #56 from julienledem/field_ids and squashes the following commits:

62c2809 [julien] remove withOriginalType; use Typles builder more
8ff0034 [julien] review feedback
084c8be [julien] binary compat
85d785c [julien] add proto id in schema; fix schema parsing for ids
d4be488 [julien] integrate field ids in schema


Project: http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/commit/3a082e8e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/tree/3a082e8e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/diff/3a082e8e

Branch: refs/heads/master
Commit: 3a082e8e390898646c094d20f4ec1eeba45b79ac
Parents: 0c4f13a
Author: julien <julien@twitter.com>
Authored: Thu Sep 25 11:25:53 2014 -0700
Committer: Tianshuo Deng <tdeng@twitter.com>
Committed: Thu Sep 25 11:25:53 2014 -0700

----------------------------------------------------------------------
 .../src/main/java/parquet/schema/GroupType.java |  82 ++++++--
 .../main/java/parquet/schema/MessageType.java   |  10 +-
 .../java/parquet/schema/MessageTypeParser.java  |  21 +-
 .../main/java/parquet/schema/PrimitiveType.java |  77 ++++---
 .../src/main/java/parquet/schema/Type.java      | 102 +++++++++-
 .../src/main/java/parquet/schema/Types.java     |  31 ++-
 .../java/parquet/parser/TestParquetParser.java  | 200 +++++++++++--------
 .../java/parquet/schema/TestMessageType.java    |  14 +-
 .../java/parquet/schema/TestTypeBuilders.java   |  16 +-
 .../converter/ParquetMetadataConverter.java     |   3 +
 .../java/parquet/pig/PigSchemaConverter.java    |  64 +++---
 .../parquet/proto/ProtoSchemaConverter.java     | 106 +++++-----
 .../parquet/proto/ProtoSchemaConverterTest.java |  58 +++---
 .../thrift/ThriftSchemaConvertVisitor.java      |  96 +++++----
 .../parquet/thrift/projection/FieldsPath.java   |   5 +-
 .../thrift/TestThriftSchemaConverter.java       | 113 ++++++-----
 16 files changed, 611 insertions(+), 387 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/3a082e8e/parquet-column/src/main/java/parquet/schema/GroupType.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/schema/GroupType.java b/parquet-column/src/main/java/parquet/schema/GroupType.java
index fefbcb2..3d16300 100644
--- a/parquet-column/src/main/java/parquet/schema/GroupType.java
+++ b/parquet-column/src/main/java/parquet/schema/GroupType.java
@@ -15,6 +15,8 @@
  */
 package parquet.schema;
 
+import static java.util.Arrays.asList;
+
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
@@ -40,7 +42,7 @@ public class GroupType extends Type {
    * @param fields the contained fields
    */
   public GroupType(Repetition repetition, String name, List<Type> fields) {
-    this(repetition, name, null, fields);
+    this(repetition, name, null, fields, null);
   }
 
   /**
@@ -49,7 +51,7 @@ public class GroupType extends Type {
    * @param fields the contained fields
    */
   public GroupType(Repetition repetition, String name, Type... fields) {
-    this(repetition, name, null, fields);
+    this(repetition, name, Arrays.asList(fields));
   }
 
   /**
@@ -58,6 +60,7 @@ public class GroupType extends Type {
    * @param originalType (optional) the original type to help with cross schema conversion (LIST, MAP, ...)
    * @param fields the contained fields
    */
+  @Deprecated
   public GroupType(Repetition repetition, String name, OriginalType originalType, Type... fields) {
     this(repetition, name, originalType, Arrays.asList(fields));
   }
@@ -68,8 +71,20 @@ public class GroupType extends Type {
    * @param originalType (optional) the original type to help with cross schema conversion (LIST, MAP, ...)
    * @param fields the contained fields
    */
+  @Deprecated
   public GroupType(Repetition repetition, String name, OriginalType originalType, List<Type> fields) {
-    super(name, repetition, originalType);
+    this(repetition, name, originalType, fields, null);
+  }
+
+  /**
+   * @param repetition OPTIONAL, REPEATED, REQUIRED
+   * @param name the name of the field
+   * @param originalType (optional) the original type to help with cross schema conversion (LIST, MAP, ...)
+   * @param fields the contained fields
+   * @param id the id of the field
+   */
+  GroupType(Repetition repetition, String name, OriginalType originalType, List<Type> fields, ID id) {
+    super(name, repetition, originalType, id);
     this.fields = fields;
     this.indexByName = new HashMap<String, Integer>();
     for (int i = 0; i < fields.size(); i++) {
@@ -78,6 +93,31 @@ public class GroupType extends Type {
   }
 
   /**
+   * @param id the field id
+   * @return a new GroupType with the same fields and a new id
+   */
+  @Override
+  public GroupType withId(int id) {
+    return new GroupType(getRepetition(), getName(), getOriginalType(), fields, new ID(id));
+  }
+
+  /**
+   * @param newFields
+   * @return a group with the same attributes and new fields.
+   */
+  public GroupType withNewFields(List<Type> newFields) {
+    return new GroupType(getRepetition(), getName(), getOriginalType(), newFields, getId());
+  }
+
+  /**
+   * @param newFields
+   * @return a group with the same attributes and new fields.
+   */
+  public GroupType withNewFields(Type... newFields) {
+    return withNewFields(asList(newFields));
+  }
+
+  /**
    * returns the name of the corresponding field
    * @param index the index of the desired field in this type
    * @return the name of the field at this index
@@ -169,6 +209,7 @@ public class GroupType extends Type {
         .append(" group ")
         .append(getName())
         .append(getOriginalType() == null ? "" : " (" + getOriginalType() +")")
+        .append(getId() == null ? "" : " = " + getId())
         .append(" {\n");
     membersDisplayString(sb, indent + "  ");
     sb.append(indent)
@@ -183,32 +224,33 @@ public class GroupType extends Type {
     visitor.visit(this);
   }
 
+  @Override @Deprecated
+  protected int typeHashCode() {
+    return hashCode();
+  }
+
+  @Override @Deprecated
+  protected boolean typeEquals(Type other) {
+    return equals(other);
+  }
+
   /**
    * {@inheritDoc}
    */
   @Override
-  protected int typeHashCode() {
-    int c = 17;
-    c += 31 * getRepetition().hashCode();
-    c += 31 * getName().hashCode();
-    c += 31 * getFields().hashCode();
-    return c;
+  public int hashCode() {
+    return super.hashCode() * 31 + getFields().hashCode();
   }
 
   /**
    * {@inheritDoc}
    */
   @Override
-  protected boolean typeEquals(Type other) {
-    Type otherType = (Type) other;
-    if (otherType.isPrimitive()) {
-      return false;
-    } else {
-      GroupType groupType = otherType.asGroupType();
-      return getRepetition() == groupType.getRepetition() &&
-          getName().equals(groupType.getName()) &&
-          getFields().equals(groupType.getFields());
-    }
+  protected boolean equals(Type otherType) {
+    return
+        !otherType.isPrimitive()
+        && super.equals(otherType)
+        && getFields().equals(otherType.asGroupType().getFields());
   }
 
   @Override
@@ -312,7 +354,7 @@ public class GroupType extends Type {
   List<Type> mergeFields(GroupType toMerge) {
     return mergeFields(toMerge, true);
   }
-  
+
   /**
    * produces the list of fields resulting from merging toMerge into the fields of this
    * @param toMerge the group containing the fields to merge

http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/3a082e8e/parquet-column/src/main/java/parquet/schema/MessageType.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/schema/MessageType.java b/parquet-column/src/main/java/parquet/schema/MessageType.java
index 9c05747..6815685 100644
--- a/parquet-column/src/main/java/parquet/schema/MessageType.java
+++ b/parquet-column/src/main/java/parquet/schema/MessageType.java
@@ -20,8 +20,6 @@ import java.util.List;
 
 import parquet.column.ColumnDescriptor;
 import parquet.io.InvalidRecordException;
-import parquet.schema.PrimitiveType.PrimitiveTypeName;
-
 
 /**
  * The root of a schema
@@ -109,10 +107,10 @@ public final class MessageType extends GroupType {
       // TODO: optimize this
       PrimitiveType primitiveType = getType(path).asPrimitiveType();
       columns.add(new ColumnDescriptor(
-                      path, 
-                      primitiveType.getPrimitiveTypeName(), 
+                      path,
+                      primitiveType.getPrimitiveTypeName(),
                       primitiveType.getTypeLength(),
-                      getMaxRepetitionLevel(path), 
+                      getMaxRepetitionLevel(path),
                       getMaxDefinitionLevel(path)));
     }
     return columns;
@@ -139,7 +137,7 @@ public final class MessageType extends GroupType {
   public MessageType union(MessageType toMerge) {
     return union(toMerge, true);
   }
-  
+
   public MessageType union(MessageType toMerge, boolean strict) {
     return new MessageType(this.getName(), mergeFields(toMerge, strict));
   }

http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/3a082e8e/parquet-column/src/main/java/parquet/schema/MessageTypeParser.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/schema/MessageTypeParser.java b/parquet-column/src/main/java/parquet/schema/MessageTypeParser.java
index b1dca02..01dfe17 100644
--- a/parquet-column/src/main/java/parquet/schema/MessageTypeParser.java
+++ b/parquet-column/src/main/java/parquet/schema/MessageTypeParser.java
@@ -21,6 +21,8 @@ import java.util.StringTokenizer;
 import parquet.Log;
 import parquet.schema.PrimitiveType.PrimitiveTypeName;
 import parquet.schema.Type.Repetition;
+import parquet.schema.Types.GroupBuilder;
+import parquet.schema.Types.PrimitiveBuilder;
 
 /**
  * Parses a schema from a textual format similar to that described in the Dremel paper.
@@ -38,7 +40,7 @@ public class MessageTypeParser {
     private StringBuffer currentLine = new StringBuffer();
 
     public Tokenizer(String schemaString, String string) {
-      st = new StringTokenizer(schemaString, " ,;{}()\n\t", true);
+      st = new StringTokenizer(schemaString, " ,;{}()\n\t=", true);
     }
 
     public String nextToken() {
@@ -107,8 +109,8 @@ public class MessageTypeParser {
     }
   }
 
-  private static void addGroupType(String t, Tokenizer st, Repetition r, Types.GroupBuilder builder) {
-    Types.GroupBuilder childBuilder = builder.group(r);
+  private static void addGroupType(String t, Tokenizer st, Repetition r, GroupBuilder<?> builder) {
+    GroupBuilder<?> childBuilder = builder.group(r);
     String name = st.nextToken();
 
     // Read annotation, if any.
@@ -120,7 +122,10 @@ public class MessageTypeParser {
       check(st.nextToken(), ")", "original type ended by )", st);
       t = st.nextToken();
     }
-
+    if (t.equals("=")) {
+      childBuilder.id(Integer.parseInt(st.nextToken()));
+      t = st.nextToken();
+    }
     try {
       addGroupTypeFields(t, st, childBuilder);
     } catch (IllegalArgumentException e) {
@@ -130,8 +135,8 @@ public class MessageTypeParser {
     childBuilder.named(name);
   }
 
-  private static void addPrimitiveType(String t, Tokenizer st, PrimitiveTypeName type, Repetition r, Types.GroupBuilder builder) {
-    Types.PrimitiveBuilder childBuilder = builder.primitive(type, r);
+  private static void addPrimitiveType(String t, Tokenizer st, PrimitiveTypeName type, Repetition r, Types.GroupBuilder<?> builder) {
+    PrimitiveBuilder<?> childBuilder = builder.primitive(type, r);
 
     if (type == PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY) {
       t = st.nextToken();
@@ -170,6 +175,10 @@ public class MessageTypeParser {
       check(t, ")", "original type ended by )", st);
       t = st.nextToken();
     }
+    if (t.equals("=")) {
+      childBuilder.id(Integer.parseInt(st.nextToken()));
+      t = st.nextToken();
+    }
     check(t, ";", "field ended by ';'", st);
 
     try {

http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/3a082e8e/parquet-column/src/main/java/parquet/schema/PrimitiveType.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/schema/PrimitiveType.java b/parquet-column/src/main/java/parquet/schema/PrimitiveType.java
index 3a734a9..88a402f 100644
--- a/parquet-column/src/main/java/parquet/schema/PrimitiveType.java
+++ b/parquet-column/src/main/java/parquet/schema/PrimitiveType.java
@@ -276,15 +276,15 @@ public final class PrimitiveType extends Type {
   private final PrimitiveTypeName primitive;
   private final int length;
   private final DecimalMetadata decimalMeta;
-  
+
   /**
    * @param repetition OPTIONAL, REPEATED, REQUIRED
    * @param primitive STRING, INT64, ...
    * @param name the name of the type
    */
-  public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, 
+  public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive,
                        String name) {
-    this(repetition, primitive, 0, name, null, null);
+    this(repetition, primitive, 0, name, null, null, null);
   }
 
   /**
@@ -294,7 +294,7 @@ public final class PrimitiveType extends Type {
    * @param name the name of the type
    */
   public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, int length, String name) {
-    this(repetition, primitive, length, name, null, null);
+    this(repetition, primitive, length, name, null, null, null);
   }
 
   /**
@@ -303,9 +303,9 @@ public final class PrimitiveType extends Type {
    * @param name the name of the type
    * @param originalType (optional) the original type to help with cross schema convertion (LIST, MAP, ...)
    */
-  public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, 
+  public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive,
                        String name, OriginalType originalType) {
-    this(repetition, primitive, 0, name, originalType, null);
+    this(repetition, primitive, 0, name, originalType, null, null);
   }
 
   /**
@@ -315,9 +315,10 @@ public final class PrimitiveType extends Type {
    * @param length the length if the type is FIXED_LEN_BYTE_ARRAY, 0 otherwise (XXX)
    * @param originalType (optional) the original type to help with cross schema conversion (LIST, MAP, ...)
    */
+  @Deprecated
   public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive,
                        int length, String name, OriginalType originalType) {
-    this(repetition, primitive, length, name, originalType, null);
+    this(repetition, primitive, length, name, originalType, null, null);
   }
 
   /**
@@ -327,17 +328,28 @@ public final class PrimitiveType extends Type {
    * @param length the length if the type is FIXED_LEN_BYTE_ARRAY, 0 otherwise
    * @param originalType (optional) the original type (MAP, DECIMAL, UTF8, ...)
    * @param decimalMeta (optional) metadata about the decimal type
+   * @param id the id of the field
    */
-  PrimitiveType(Repetition repetition, PrimitiveTypeName primitive,
-                       int length, String name, OriginalType originalType,
-                       DecimalMetadata decimalMeta) {
-    super(name, repetition, originalType);
+  PrimitiveType(
+      Repetition repetition, PrimitiveTypeName primitive,
+      int length, String name, OriginalType originalType,
+      DecimalMetadata decimalMeta, ID id) {
+    super(name, repetition, originalType, id);
     this.primitive = primitive;
     this.length = length;
     this.decimalMeta = decimalMeta;
   }
 
   /**
+   * @param id the field id
+   * @return a new PrimitiveType with the same fields and a new id
+   */
+  @Override
+  public PrimitiveType withId(int id) {
+    return new PrimitiveType(getRepetition(), primitive, length, getName(), getOriginalType(), decimalMeta, new ID(id));
+  }
+
+  /**
    * @return the primitive type
    */
   public PrimitiveTypeName getPrimitiveTypeName() {
@@ -399,36 +411,47 @@ public final class PrimitiveType extends Type {
       }
       sb.append(")");
     }
+    if (getId() != null) {
+      sb.append(" = ").append(getId());
+    }
+  }
+
+  @Override @Deprecated
+  protected int typeHashCode() {
+    return hashCode();
+  }
+
+  @Override @Deprecated
+  protected boolean typeEquals(Type other) {
+    return equals(other);
   }
 
   /**
    * {@inheritDoc}
    */
   @Override
-  protected boolean typeEquals(Type other) {
-    if (other.isPrimitive()) {
-      PrimitiveType primitiveType = other.asPrimitiveType();
-      if ((getPrimitiveTypeName() == PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY) &&
-          (getTypeLength() != primitiveType.getTypeLength())) {
-        return false;
-      }
-      return getRepetition() == primitiveType.getRepetition() &&
-          getPrimitiveTypeName().equals(primitiveType.getPrimitiveTypeName()) &&
-          getName().equals(primitiveType.getName());
-    } else {
+  protected boolean equals(Type other) {
+    if (!other.isPrimitive()) {
       return false;
     }
+    PrimitiveType otherPrimitive = other.asPrimitiveType();
+    return super.equals(other)
+        && primitive == otherPrimitive.getPrimitiveTypeName()
+        && length == otherPrimitive.length
+        && eqOrBothNull(decimalMeta, otherPrimitive.decimalMeta);
   }
 
   /**
    * {@inheritDoc}
    */
   @Override
-  protected int typeHashCode() {
-    int hash = 17;
-    hash += 31 * getRepetition().hashCode();
-    hash += 31 * getPrimitiveTypeName().hashCode();
-    hash += 31 * getName().hashCode();
+  public int hashCode() {
+    int hash = super.hashCode();
+    hash = hash * 31 + primitive.hashCode();
+    hash = hash * 31 + length;
+    if (decimalMeta != null) {
+      hash = hash * 31 + decimalMeta.hashCode();
+    }
     return hash;
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/3a082e8e/parquet-column/src/main/java/parquet/schema/Type.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/schema/Type.java b/parquet-column/src/main/java/parquet/schema/Type.java
index 2e02387..32128e3 100644
--- a/parquet-column/src/main/java/parquet/schema/Type.java
+++ b/parquet-column/src/main/java/parquet/schema/Type.java
@@ -15,6 +15,8 @@
  */
 package parquet.schema;
 
+import static parquet.Preconditions.checkNotNull;
+
 import java.util.List;
 
 import parquet.io.InvalidRecordException;
@@ -28,6 +30,39 @@ import parquet.io.InvalidRecordException;
 abstract public class Type {
 
   /**
+   * represents a field ID
+   *
+   * @author Julien Le Dem
+   *
+   */
+  public static final class ID {
+    private final int id;
+
+    public ID(int id) {
+      this.id = id;
+    }
+
+    public int intValue() {
+      return id;
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      return (obj instanceof ID) && ((ID)obj).id == id;
+    }
+
+    @Override
+    public int hashCode() {
+      return id;
+    }
+
+    @Override
+    public String toString() {
+      return String.valueOf(id);
+    }
+  }
+
+  /**
    * Constraint on the repetition of a field
    *
    * @author Julien Le Dem
@@ -73,13 +108,15 @@ abstract public class Type {
   private final String name;
   private final Repetition repetition;
   private final OriginalType originalType;
+  private final ID id;
 
   /**
    * @param name the name of the type
    * @param repetition OPTIONAL, REPEATED, REQUIRED
    */
+  @Deprecated
   public Type(String name, Repetition repetition) {
-    this(name, repetition, null);
+    this(name, repetition, null, null);
   }
 
   /**
@@ -87,14 +124,32 @@ abstract public class Type {
    * @param repetition OPTIONAL, REPEATED, REQUIRED
    * @param originalType (optional) the original type to help with cross schema conversion (LIST, MAP, ...)
    */
+  @Deprecated
   public Type(String name, Repetition repetition, OriginalType originalType) {
+    this(name, repetition, originalType, null);
+  }
+
+  /**
+   * @param name the name of the type
+   * @param repetition OPTIONAL, REPEATED, REQUIRED
+   * @param originalType (optional) the original type to help with cross schema conversion (LIST, MAP, ...)
+   * @param id (optional) the id of the fields.
+   */
+  Type(String name, Repetition repetition, OriginalType originalType, ID id) {
     super();
-    this.name = name;
-    this.repetition = repetition;
+    this.name = checkNotNull(name, "name");
+    this.repetition = checkNotNull(repetition, "repetition");
     this.originalType = originalType;
+    this.id = id;
   }
 
   /**
+   * @param id
+   * @return the same type with the id field set
+   */
+  public abstract Type withId(int id);
+
+  /**
    * @return the name of the type
    */
   public String getName() {
@@ -117,6 +172,13 @@ abstract public class Type {
   }
 
   /**
+   * @return the id of the field (if defined)
+   */
+  public ID getId() {
+    return id;
+  }
+
+  /**
    * @return the original type (LIST, MAP, ...)
    */
   public OriginalType getOriginalType() {
@@ -163,21 +225,43 @@ abstract public class Type {
    */
   abstract public void accept(TypeVisitor visitor);
 
+  @Deprecated
+  abstract protected int typeHashCode();
+
+  @Deprecated
+  abstract protected boolean typeEquals(Type other);
+
   @Override
   public int hashCode() {
-    return typeHashCode();
+    int c = repetition.hashCode();
+    c = 31 * c + name.hashCode();
+    if (originalType != null) {
+      c = 31 * c +  originalType.hashCode();
+    }
+    if (id != null) {
+      c = 31 * c + id.hashCode();
+    }
+    return c;
   }
 
-  protected abstract int typeHashCode();
-
-  protected abstract boolean typeEquals(Type other);
+  protected boolean equals(Type other) {
+    return
+        name.equals(other.name)
+        && repetition == other.repetition
+        && eqOrBothNull(repetition, other.repetition)
+        && eqOrBothNull(id, other.id);
+  };
 
   @Override
   public boolean equals(Object other) {
     if (!(other instanceof Type) || other == null) {
       return false;
     }
-    return typeEquals((Type)other);
+    return equals((Type)other);
+  }
+
+  protected boolean eqOrBothNull(Object o1, Object o2) {
+    return (o1 == null && o2 == null) || (o1 != null && o1.equals(o2));
   }
 
   protected abstract int getMaxRepetitionLevel(String[] path, int i);
@@ -195,7 +279,7 @@ abstract public class Type {
    * @return the union result of merging toMerge into this
    */
   protected abstract Type union(Type toMerge);
-  
+
   /**
    * @param toMerge the type to merge into this one
    * @param strict should schema primitive types match

http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/3a082e8e/parquet-column/src/main/java/parquet/schema/Types.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/schema/Types.java b/parquet-column/src/main/java/parquet/schema/Types.java
index 213b856..06077df 100644
--- a/parquet-column/src/main/java/parquet/schema/Types.java
+++ b/parquet-column/src/main/java/parquet/schema/Types.java
@@ -2,8 +2,10 @@ package parquet.schema;
 
 import java.util.ArrayList;
 import java.util.List;
+
 import parquet.Preconditions;
 import parquet.schema.PrimitiveType.PrimitiveTypeName;
+import parquet.schema.Type.ID;
 
 /**
  * This class provides fluent builders that produce Parquet schema Types.
@@ -117,6 +119,7 @@ public class Types {
 
     protected Type.Repetition repetition = null;
     protected OriginalType originalType = null;
+    protected Type.ID id = null;
     private boolean repetitionAlreadySet = false;
 
     /**
@@ -174,6 +177,19 @@ public class Types {
       return self();
     }
 
+    /**
+     * adds an id annotation to the type being built.
+     * <p>
+     * ids are used to capture the original id when converting from models using ids (thrift, protobufs)
+     *
+     * @param id the id of the field
+     * @return this builder for method chaining
+     */
+    public T id(int id) {
+      this.id = new ID(id);
+      return self();
+    }
+
     abstract protected Type build(String name);
 
     /**
@@ -204,6 +220,7 @@ public class Types {
         return returnClass.cast(type);
       }
     }
+
   }
 
   /**
@@ -296,7 +313,6 @@ public class Types {
                 primitiveType == PrimitiveTypeName.BINARY,
                 "UTF8 can only annotate binary fields");
             break;
-
           case DECIMAL:
             Preconditions.checkState(
                 (primitiveType == PrimitiveTypeName.INT32) ||
@@ -321,11 +337,18 @@ public class Types {
                   "FIXED(" + length + ") cannot store " + meta.getPrecision() +
                   " digits (max " + maxPrecision(length) + ")");
             }
+            break;
+          case ENUM:
+            Preconditions.checkState(
+                primitiveType == PrimitiveTypeName.BINARY,
+                "ENUM can only annotate binary fields");
+            break;
+          default:
+            throw new IllegalStateException(originalType + " can not be applied to a primitive type");
         }
       }
 
-      return new PrimitiveType(
-          repetition, primitiveType, length, name, originalType, meta);
+      return new PrimitiveType(repetition, primitiveType, length, name, originalType, meta, id);
     }
 
     private static long maxPrecision(int numBytes) {
@@ -487,7 +510,7 @@ public class Types {
     protected GroupType build(String name) {
       Preconditions.checkState(!fields.isEmpty(),
           "Cannot build an empty group");
-      return new GroupType(repetition, name, originalType, fields);
+      return new GroupType(repetition, name, originalType, fields, id);
     }
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/3a082e8e/parquet-column/src/test/java/parquet/parser/TestParquetParser.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/test/java/parquet/parser/TestParquetParser.java b/parquet-column/src/test/java/parquet/parser/TestParquetParser.java
index 7fd1184..d1fb975 100644
--- a/parquet-column/src/test/java/parquet/parser/TestParquetParser.java
+++ b/parquet-column/src/test/java/parquet/parser/TestParquetParser.java
@@ -16,69 +16,75 @@
 package parquet.parser;
 
 import static org.junit.Assert.assertEquals;
-import static parquet.schema.PrimitiveType.PrimitiveTypeName.*;
-import static parquet.schema.Type.Repetition.*;
+import static parquet.schema.MessageTypeParser.parseMessageType;
+import static parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY;
+import static parquet.schema.PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY;
+import static parquet.schema.PrimitiveType.PrimitiveTypeName.INT32;
+import static parquet.schema.PrimitiveType.PrimitiveTypeName.INT64;
+import static parquet.schema.Type.Repetition.OPTIONAL;
+import static parquet.schema.Type.Repetition.REPEATED;
+import static parquet.schema.Type.Repetition.REQUIRED;
+import static parquet.schema.Types.buildMessage;
 
-import org.junit.Assert;
 import org.junit.Test;
 
 import parquet.schema.GroupType;
 import parquet.schema.MessageType;
-import parquet.schema.MessageTypeParser;
 import parquet.schema.OriginalType;
 import parquet.schema.PrimitiveType;
 import parquet.schema.PrimitiveType.PrimitiveTypeName;
-import parquet.schema.Types;
+import parquet.schema.Types.MessageTypeBuilder;
 
 public class TestParquetParser {
-    @Test
-    public void testPaperExample() throws Exception {
-        String example = "message Document {\n" +
-                "  required int64 DocId;\n" +
-                "  optional group Links {\n" +
-                "    repeated int64 Backward;\n" +
-                "    repeated int64 Forward; }\n" +
-                "  repeated group Name {\n" +
-                "    repeated group Language {\n" +
-                "      required binary Code;\n" +
-                "      required binary Country; }\n" +
-                "    optional binary Url; }}";
-        MessageType parsed = MessageTypeParser.parseMessageType(example);
-        MessageType manuallyMade =
-            new MessageType("Document",
-                new PrimitiveType(REQUIRED, INT64, "DocId"),
-                new GroupType(OPTIONAL, "Links",
-                    new PrimitiveType(REPEATED, INT64, "Backward"),
-                    new PrimitiveType(REPEATED, INT64, "Forward")),
+  @Test
+  public void testPaperExample() throws Exception {
+    String example =
+        "message Document {\n" +
+        "  required int64 DocId;\n" +
+        "  optional group Links {\n" +
+        "    repeated int64 Backward;\n" +
+        "    repeated int64 Forward; }\n" +
+        "  repeated group Name {\n" +
+        "    repeated group Language {\n" +
+        "      required binary Code;\n" +
+        "      required binary Country; }\n" +
+        "    optional binary Url; }}";
+    MessageType parsed = parseMessageType(example);
+    MessageType manuallyMade =
+        new MessageType("Document",
+            new PrimitiveType(REQUIRED, INT64, "DocId"),
+            new GroupType(OPTIONAL, "Links",
+                new PrimitiveType(REPEATED, INT64, "Backward"),
+                new PrimitiveType(REPEATED, INT64, "Forward")),
                 new GroupType(REPEATED, "Name",
                     new GroupType(REPEATED, "Language",
                         new PrimitiveType(REQUIRED, BINARY, "Code"),
                         new PrimitiveType(REQUIRED, BINARY, "Country")),
-                    new PrimitiveType(OPTIONAL, BINARY, "Url")));
-        assertEquals(manuallyMade, parsed);
+                        new PrimitiveType(OPTIONAL, BINARY, "Url")));
+    assertEquals(manuallyMade, parsed);
 
-        MessageType parsedThenReparsed = MessageTypeParser.parseMessageType(parsed.toString());
+    MessageType parsedThenReparsed = parseMessageType(parsed.toString());
 
-        assertEquals(manuallyMade, parsedThenReparsed);
+    assertEquals(manuallyMade, parsedThenReparsed);
 
-        parsed = MessageTypeParser.parseMessageType("message m { required group a {required binary b;} required group c { required int64 d; }}");
-        manuallyMade =
-            new MessageType("m",
-                new GroupType(REQUIRED, "a",
-                    new PrimitiveType(REQUIRED, BINARY, "b")),
+    parsed = parseMessageType("message m { required group a {required binary b;} required group c { required int64 d; }}");
+    manuallyMade =
+        new MessageType("m",
+            new GroupType(REQUIRED, "a",
+                new PrimitiveType(REQUIRED, BINARY, "b")),
                 new GroupType(REQUIRED, "c",
                     new PrimitiveType(REQUIRED, INT64, "d")));
 
-        assertEquals(manuallyMade, parsed);
+    assertEquals(manuallyMade, parsed);
 
-        parsedThenReparsed = MessageTypeParser.parseMessageType(parsed.toString());
+    parsedThenReparsed = parseMessageType(parsed.toString());
 
-        assertEquals(manuallyMade, parsedThenReparsed);
-    }
+    assertEquals(manuallyMade, parsedThenReparsed);
+  }
 
   @Test
   public void testEachPrimitiveType() {
-    Types.MessageTypeBuilder builder = Types.buildMessage();
+    MessageTypeBuilder builder = buildMessage();
     StringBuilder schema = new StringBuilder();
     schema.append("message EachType {\n");
     for (PrimitiveTypeName type : PrimitiveTypeName.values()) {
@@ -88,40 +94,65 @@ public class TestParquetParser {
         builder.required(FIXED_LEN_BYTE_ARRAY).length(3).named("fixed_");
       } else {
         schema.append("  required ").append(type)
-            .append(" ").append(type).append("_;\n");
+        .append(" ").append(type).append("_;\n");
         builder.required(type).named(type.toString() + "_");
       }
     }
     schema.append("}\n");
     MessageType expected = builder.named("EachType");
 
-    MessageType parsed = MessageTypeParser.parseMessageType(schema.toString());
+    MessageType parsed = parseMessageType(schema.toString());
 
-    Assert.assertEquals(expected, parsed);
-    MessageType reparsed = MessageTypeParser.parseMessageType(parsed.toString());
-    Assert.assertEquals(expected, reparsed);
+    assertEquals(expected, parsed);
+    MessageType reparsed = parseMessageType(parsed.toString());
+    assertEquals(expected, reparsed);
   }
 
   @Test
   public void testUTF8Annotation() {
-    String message = "message StringMessage {\n" +
+    String message =
+        "message StringMessage {\n" +
         "  required binary string (UTF8);\n" +
         "}\n";
 
-    MessageType parsed = MessageTypeParser.parseMessageType(message);
-    MessageType expected = Types.buildMessage()
+    MessageType parsed = parseMessageType(message);
+    MessageType expected = buildMessage()
         .required(BINARY).as(OriginalType.UTF8).named("string")
         .named("StringMessage");
 
-    Assert.assertEquals(expected, parsed);
-    MessageType reparsed = MessageTypeParser.parseMessageType(parsed.toString());
-    Assert.assertEquals(expected, reparsed);
+    assertEquals(expected, parsed);
+    MessageType reparsed = parseMessageType(parsed.toString());
+    assertEquals(expected, reparsed);
+  }
+
+  @Test
+  public void testIDs() {
+    String message =
+        "message Message {\n" +
+        "  required binary string (UTF8) = 6;\n" +
+        "  required int32 i=1;\n" +
+        "  required binary s2= 3;\n" +
+        "  required binary s3 =4;\n" +
+        "}\n";
+
+    MessageType parsed = parseMessageType(message);
+    MessageType expected = buildMessage()
+        .required(BINARY).as(OriginalType.UTF8).id(6).named("string")
+        .required(INT32).id(1).named("i")
+        .required(BINARY).id(3).named("s2")
+        .required(BINARY).id(4).named("s3")
+        .named("Message");
+
+    assertEquals(expected, parsed);
+    MessageType reparsed = parseMessageType(parsed.toString());
+    assertEquals(expected, reparsed);
   }
 
   @Test
   public void testMAPAnnotations() {
     // this is primarily to test group annotations
-    String message = "message Message {\n" +
+    String message =
+        "message Message {\n" +
         "  optional group aMap (MAP) {\n" +
         "    repeated group map (MAP_KEY_VALUE) {\n" +
         "      required binary key (UTF8);\n" +
@@ -130,75 +161,78 @@ public class TestParquetParser {
         "  }\n" +
         "}\n";
 
-    MessageType parsed = MessageTypeParser.parseMessageType(message);
-    MessageType expected = Types.buildMessage()
+    MessageType parsed = parseMessageType(message);
+    MessageType expected = buildMessage()
         .optionalGroup()
-            .repeatedGroup()
-                .required(BINARY).as(OriginalType.UTF8).named("key")
-                .required(INT32).named("value")
-                .named("map")
-            .named("aMap")
+        .repeatedGroup()
+        .required(BINARY).as(OriginalType.UTF8).named("key")
+        .required(INT32).named("value")
+        .named("map")
+        .named("aMap")
         .named("Message");
 
-    Assert.assertEquals(expected, parsed);
-    MessageType reparsed = MessageTypeParser.parseMessageType(parsed.toString());
-    Assert.assertEquals(expected, reparsed);
+    assertEquals(expected, parsed);
+    MessageType reparsed = parseMessageType(parsed.toString());
+    assertEquals(expected, reparsed);
   }
 
   @Test
   public void testLISTAnnotation() {
     // this is primarily to test group annotations
-    String message = "message Message {\n" +
+    String message =
+        "message Message {\n" +
         "  required group aList (LIST) {\n" +
         "    repeated binary string (UTF8);\n" +
         "  }\n" +
         "}\n";
 
-    MessageType parsed = MessageTypeParser.parseMessageType(message);
-    MessageType expected = Types.buildMessage()
+    MessageType parsed = parseMessageType(message);
+    MessageType expected = buildMessage()
         .requiredGroup()
-            .repeated(BINARY).as(OriginalType.UTF8).named("string")
-            .named("aList")
+        .repeated(BINARY).as(OriginalType.UTF8).named("string")
+        .named("aList")
         .named("Message");
 
-    Assert.assertEquals(expected, parsed);
-    MessageType reparsed = MessageTypeParser.parseMessageType(parsed.toString());
-    Assert.assertEquals(expected, reparsed);
+    assertEquals(expected, parsed);
+    MessageType reparsed = parseMessageType(parsed.toString());
+    assertEquals(expected, reparsed);
   }
 
   @Test
   public void testDecimalFixedAnnotation() {
-    String message = "message DecimalMessage {\n" +
+    String message =
+        "message DecimalMessage {\n" +
         "  required FIXED_LEN_BYTE_ARRAY(4) aDecimal (DECIMAL(9,2));\n" +
         "}\n";
 
-    MessageType parsed = MessageTypeParser.parseMessageType(message);
-    MessageType expected = Types.buildMessage()
+    MessageType parsed = parseMessageType(message);
+    MessageType expected = buildMessage()
         .required(FIXED_LEN_BYTE_ARRAY).length(4)
-            .as(OriginalType.DECIMAL).precision(9).scale(2)
-            .named("aDecimal")
+        .as(OriginalType.DECIMAL).precision(9).scale(2)
+        .named("aDecimal")
         .named("DecimalMessage");
 
-    Assert.assertEquals(expected, parsed);
-    MessageType reparsed = MessageTypeParser.parseMessageType(parsed.toString());
-    Assert.assertEquals(expected, reparsed);
+    assertEquals(expected, parsed);
+    MessageType reparsed = parseMessageType(parsed.toString());
+    assertEquals(expected, reparsed);
   }
 
   @Test
   public void testDecimalBinaryAnnotation() {
-    String message = "message DecimalMessage {\n" +
+    String message =
+        "message DecimalMessage {\n" +
         "  required binary aDecimal (DECIMAL(9,2));\n" +
         "}\n";
 
-    MessageType parsed = MessageTypeParser.parseMessageType(message);
-    MessageType expected = Types.buildMessage()
+    MessageType parsed = parseMessageType(message);
+    MessageType expected = buildMessage()
         .required(BINARY).as(OriginalType.DECIMAL).precision(9).scale(2)
-            .named("aDecimal")
+        .named("aDecimal")
         .named("DecimalMessage");
 
-    Assert.assertEquals(expected, parsed);
-    MessageType reparsed = MessageTypeParser.parseMessageType(parsed.toString());
-    Assert.assertEquals(expected, reparsed);
+    assertEquals(expected, parsed);
+    MessageType reparsed = parseMessageType(parsed.toString());
+    assertEquals(expected, reparsed);
   }
 
 }

http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/3a082e8e/parquet-column/src/test/java/parquet/schema/TestMessageType.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/test/java/parquet/schema/TestMessageType.java b/parquet-column/src/test/java/parquet/schema/TestMessageType.java
index 46f9c07..a1d7a2f 100644
--- a/parquet-column/src/test/java/parquet/schema/TestMessageType.java
+++ b/parquet-column/src/test/java/parquet/schema/TestMessageType.java
@@ -32,7 +32,6 @@ import parquet.schema.PrimitiveType.PrimitiveTypeName;
 public class TestMessageType {
   @Test
   public void test() throws Exception {
-    System.out.println(Paper.schema.toString());
     MessageType schema = MessageTypeParser.parseMessageType(Paper.schema.toString());
     assertEquals(Paper.schema, schema);
     assertEquals(schema.toString(), Paper.schema.toString());
@@ -130,4 +129,17 @@ public class TestMessageType {
       assertEquals("can not merge type optional int32 a into optional binary a", e.getMessage());
     }
   }
+
+  @Test
+  public void testIDs() throws Exception {
+    MessageType schema = new MessageType("test",
+        new PrimitiveType(REQUIRED, BINARY, "foo").withId(4),
+        new GroupType(REQUIRED, "bar",
+            new PrimitiveType(REQUIRED, BINARY, "baz").withId(3)
+            ).withId(8)
+        );
+    MessageType schema2 = MessageTypeParser.parseMessageType(schema.toString());
+    assertEquals(schema, schema2);
+    assertEquals(schema.toString(), schema2.toString());
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/3a082e8e/parquet-column/src/test/java/parquet/schema/TestTypeBuilders.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/test/java/parquet/schema/TestTypeBuilders.java b/parquet-column/src/test/java/parquet/schema/TestTypeBuilders.java
index 5616055..5309ad8 100644
--- a/parquet-column/src/test/java/parquet/schema/TestTypeBuilders.java
+++ b/parquet-column/src/test/java/parquet/schema/TestTypeBuilders.java
@@ -194,7 +194,7 @@ public class TestTypeBuilders {
     // int32 primitive type
     MessageType expected = new MessageType("DecimalMessage",
         new PrimitiveType(REQUIRED, INT32, 0, "aDecimal",
-            OriginalType.DECIMAL, new DecimalMetadata(9, 2)));
+            OriginalType.DECIMAL, new DecimalMetadata(9, 2), null));
     MessageType builderType = Types.buildMessage()
         .required(INT32)
             .as(OriginalType.DECIMAL).precision(9).scale(2)
@@ -204,7 +204,7 @@ public class TestTypeBuilders {
     // int64 primitive type
     expected = new MessageType("DecimalMessage",
         new PrimitiveType(REQUIRED, INT64, 0, "aDecimal",
-            OriginalType.DECIMAL, new DecimalMetadata(18, 2)));
+            OriginalType.DECIMAL, new DecimalMetadata(18, 2), null));
     builderType = Types.buildMessage()
         .required(INT64)
             .as(OriginalType.DECIMAL).precision(18).scale(2)
@@ -214,7 +214,7 @@ public class TestTypeBuilders {
     // binary primitive type
     expected = new MessageType("DecimalMessage",
         new PrimitiveType(REQUIRED, BINARY, 0, "aDecimal",
-            OriginalType.DECIMAL, new DecimalMetadata(9, 2)));
+            OriginalType.DECIMAL, new DecimalMetadata(9, 2), null));
     builderType = Types.buildMessage()
         .required(BINARY).as(OriginalType.DECIMAL).precision(9).scale(2)
             .named("aDecimal")
@@ -223,7 +223,7 @@ public class TestTypeBuilders {
     // fixed primitive type
     expected = new MessageType("DecimalMessage",
         new PrimitiveType(REQUIRED, FIXED_LEN_BYTE_ARRAY, 4, "aDecimal",
-            OriginalType.DECIMAL, new DecimalMetadata(9, 2)));
+            OriginalType.DECIMAL, new DecimalMetadata(9, 2), null));
     builderType = Types.buildMessage()
         .required(FIXED_LEN_BYTE_ARRAY).length(4)
             .as(OriginalType.DECIMAL).precision(9).scale(2)
@@ -236,7 +236,7 @@ public class TestTypeBuilders {
   public void testDecimalAnnotationMissingScale() {
     MessageType expected = new MessageType("DecimalMessage",
         new PrimitiveType(REQUIRED, INT32, 0, "aDecimal",
-            OriginalType.DECIMAL, new DecimalMetadata(9, 0)));
+            OriginalType.DECIMAL, new DecimalMetadata(9, 0), null));
     MessageType builderType = Types.buildMessage()
         .required(INT32)
             .as(OriginalType.DECIMAL).precision(9)
@@ -246,7 +246,7 @@ public class TestTypeBuilders {
 
     expected = new MessageType("DecimalMessage",
         new PrimitiveType(REQUIRED, INT64, 0, "aDecimal",
-            OriginalType.DECIMAL, new DecimalMetadata(9, 0)));
+            OriginalType.DECIMAL, new DecimalMetadata(9, 0), null));
     builderType = Types.buildMessage()
         .required(INT64)
             .as(OriginalType.DECIMAL).precision(9)
@@ -256,7 +256,7 @@ public class TestTypeBuilders {
 
     expected = new MessageType("DecimalMessage",
         new PrimitiveType(REQUIRED, BINARY, 0, "aDecimal",
-            OriginalType.DECIMAL, new DecimalMetadata(9, 0)));
+            OriginalType.DECIMAL, new DecimalMetadata(9, 0), null));
     builderType = Types.buildMessage()
         .required(BINARY).as(OriginalType.DECIMAL).precision(9)
             .named("aDecimal")
@@ -265,7 +265,7 @@ public class TestTypeBuilders {
 
     expected = new MessageType("DecimalMessage",
         new PrimitiveType(REQUIRED, FIXED_LEN_BYTE_ARRAY, 7, "aDecimal",
-            OriginalType.DECIMAL, new DecimalMetadata(9, 0)));
+            OriginalType.DECIMAL, new DecimalMetadata(9, 0), null));
     builderType = Types.buildMessage()
         .required(FIXED_LEN_BYTE_ARRAY).length(7)
             .as(OriginalType.DECIMAL).precision(9)

http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/3a082e8e/parquet-hadoop/src/main/java/parquet/format/converter/ParquetMetadataConverter.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/main/java/parquet/format/converter/ParquetMetadataConverter.java b/parquet-hadoop/src/main/java/parquet/format/converter/ParquetMetadataConverter.java
index 76834d5..8b6a2b3 100644
--- a/parquet-hadoop/src/main/java/parquet/format/converter/ParquetMetadataConverter.java
+++ b/parquet-hadoop/src/main/java/parquet/format/converter/ParquetMetadataConverter.java
@@ -559,6 +559,9 @@ public class ParquetMetadataConverter {
       if (schemaElement.isSetConverted_type()) {
         childBuilder.as(getOriginalType(schemaElement.converted_type));
       }
+      if (schemaElement.isSetField_id()) {
+        childBuilder.id(schemaElement.field_id);
+      }
 
       childBuilder.named(schemaElement.name);
     }

http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/3a082e8e/parquet-pig/src/main/java/parquet/pig/PigSchemaConverter.java
----------------------------------------------------------------------
diff --git a/parquet-pig/src/main/java/parquet/pig/PigSchemaConverter.java b/parquet-pig/src/main/java/parquet/pig/PigSchemaConverter.java
index bfa7ac3..ff60aff 100644
--- a/parquet-pig/src/main/java/parquet/pig/PigSchemaConverter.java
+++ b/parquet-pig/src/main/java/parquet/pig/PigSchemaConverter.java
@@ -15,10 +15,13 @@
  */
 package parquet.pig;
 
+import static parquet.Log.DEBUG;
+
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
+
 import org.apache.pig.LoadPushDown.RequiredField;
 import org.apache.pig.LoadPushDown.RequiredFieldList;
 import org.apache.pig.data.DataType;
@@ -29,10 +32,8 @@ import org.apache.pig.impl.util.ObjectSerializer;
 import org.apache.pig.impl.util.Pair;
 import org.apache.pig.impl.util.Utils;
 import org.apache.pig.parser.ParserException;
-import parquet.Log;
-import static parquet.Log.DEBUG;
-import static parquet.pig.TupleReadSupport.PARQUET_PIG_REQUIRED_FIELDS;
 
+import parquet.Log;
 import parquet.schema.ConversionPatterns;
 import parquet.schema.GroupType;
 import parquet.schema.MessageType;
@@ -62,9 +63,9 @@ public class PigSchemaConverter {
   public PigSchemaConverter() {
     this(false);
   }
-  
+
   /**
-   * 
+   *
    * @param columnIndexAccess toggle between name and index based access (default: false)
    */
   public PigSchemaConverter(boolean columnIndexAccess) {
@@ -86,13 +87,13 @@ public class PigSchemaConverter {
   interface ColumnAccess {
     List<Type> filterTupleSchema(GroupType schemaToFilter, Schema pigSchema, RequiredFieldList requiredFieldsList);
   }
-  
+
   class ColumnIndexAccess implements ColumnAccess {
     @Override
     public List<Type> filterTupleSchema(GroupType schemaToFilter, Schema pigSchema, RequiredFieldList requiredFieldsList) {
       List<Type> newFields = new ArrayList<Type>();
       List<Pair<FieldSchema,Integer>> indexedFields = new ArrayList<Pair<FieldSchema,Integer>>();
-      
+
       try {
         if(requiredFieldsList == null) {
           int index = 0;
@@ -114,11 +115,11 @@ public class PigSchemaConverter {
         }
       } catch (FrontendException e) {
           throw new RuntimeException("Failed to filter requested fields", e);
-      }  
+      }
       return newFields;
-    } 
+    }
   }
-  
+
   class ColumnNameAccess implements ColumnAccess {
     @Override
     public List<Type> filterTupleSchema(GroupType schemaToFilter, Schema requestedPigSchema, RequiredFieldList requiredFieldsList) {
@@ -134,7 +135,7 @@ public class PigSchemaConverter {
       return newFields;
     }
   }
-  
+
   /**
    * @param pigSchema the pig schema to turn into a string representation
    * @return the sctring representation of the schema
@@ -148,14 +149,14 @@ public class PigSchemaConverter {
     if(requiredFieldString == null) {
         return null;
     }
-    
+
     try {
       return (RequiredFieldList) ObjectSerializer.deserialize(requiredFieldString);
     } catch (IOException e) {
       throw new RuntimeException("Failed to deserialize pushProjection", e);
     }
   }
-  
+
   static String serializeRequiredFieldList(RequiredFieldList requiredFieldList) {
     try {
       return ObjectSerializer.serialize(requiredFieldList);
@@ -163,7 +164,7 @@ public class PigSchemaConverter {
       throw new RuntimeException("Failed to searlize required fields.", e);
     }
   }
-  
+
   /**
    * converts a parquet schema into a pig schema
    * @param parquetSchema the parquet schema to convert to Pig schema
@@ -202,37 +203,37 @@ public class PigSchemaConverter {
 
   private FieldSchema getSimpleFieldSchema(final String fieldName, Type parquetType)
       throws FrontendException {
-    final PrimitiveTypeName parquetPrimitiveTypeName = 
+    final PrimitiveTypeName parquetPrimitiveTypeName =
         parquetType.asPrimitiveType().getPrimitiveTypeName();
     final OriginalType originalType = parquetType.getOriginalType();
     return parquetPrimitiveTypeName.convert(
         new PrimitiveTypeNameConverter<Schema.FieldSchema, FrontendException>() {
       @Override
-      public FieldSchema convertFLOAT(PrimitiveTypeName primitiveTypeName) 
+      public FieldSchema convertFLOAT(PrimitiveTypeName primitiveTypeName)
           throws FrontendException {
         return new FieldSchema(fieldName, null, DataType.FLOAT);
       }
 
       @Override
-      public FieldSchema convertDOUBLE(PrimitiveTypeName primitiveTypeName) 
+      public FieldSchema convertDOUBLE(PrimitiveTypeName primitiveTypeName)
           throws FrontendException {
         return new FieldSchema(fieldName, null, DataType.DOUBLE);
       }
 
       @Override
-      public FieldSchema convertINT32(PrimitiveTypeName primitiveTypeName) 
+      public FieldSchema convertINT32(PrimitiveTypeName primitiveTypeName)
           throws FrontendException {
         return new FieldSchema(fieldName, null, DataType.INTEGER);
       }
 
       @Override
-      public FieldSchema convertINT64(PrimitiveTypeName primitiveTypeName) 
+      public FieldSchema convertINT64(PrimitiveTypeName primitiveTypeName)
           throws FrontendException {
         return new FieldSchema(fieldName, null, DataType.LONG);
       }
 
       @Override
-      public FieldSchema convertINT96(PrimitiveTypeName primitiveTypeName) 
+      public FieldSchema convertINT96(PrimitiveTypeName primitiveTypeName)
           throws FrontendException {
         throw new FrontendException("NYI");
       }
@@ -244,13 +245,13 @@ public class PigSchemaConverter {
       }
 
       @Override
-      public FieldSchema convertBOOLEAN(PrimitiveTypeName primitiveTypeName) 
+      public FieldSchema convertBOOLEAN(PrimitiveTypeName primitiveTypeName)
           throws FrontendException {
         return new FieldSchema(fieldName, null, DataType.BOOLEAN);
       }
 
       @Override
-      public FieldSchema convertBINARY(PrimitiveTypeName primitiveTypeName) 
+      public FieldSchema convertBINARY(PrimitiveTypeName primitiveTypeName)
           throws FrontendException {
         if (originalType != null && originalType == OriginalType.UTF8) {
           return new FieldSchema(fieldName, null, DataType.CHARARRAY);
@@ -437,7 +438,7 @@ public class PigSchemaConverter {
   public MessageType filter(MessageType schemaToFilter, Schema requestedPigSchema) {
     return filter(schemaToFilter, requestedPigSchema, null);
   }
-  
+
   /**
    * filters a Parquet schema based on a pig schema for projection
    * @param schemaToFilter the schema to be filter
@@ -454,7 +455,7 @@ public class PigSchemaConverter {
     } catch (RuntimeException e) {
       throw new RuntimeException("can't filter " + schemaToFilter + " with " + requestedPigSchema, e);
     }
-  }  
+  }
 
   private Type filter(Type type, FieldSchema fieldSchema) {
     if (DEBUG) LOG.debug("filtering type:\n" + type + "\nwith:\n " + fieldSchema);
@@ -478,7 +479,7 @@ public class PigSchemaConverter {
 
   private Type filterTuple(GroupType tupleType, FieldSchema tupleFieldSchema) throws FrontendException {
     if (DEBUG) LOG.debug("filtering TUPLE schema:\n" + tupleType + "\nwith:\n " + tupleFieldSchema);
-    return new GroupType(tupleType.getRepetition(), tupleType.getName(), tupleType.getOriginalType(), columnAccess.filterTupleSchema(tupleType, tupleFieldSchema.schema, null));
+    return tupleType.withNewFields(columnAccess.filterTupleSchema(tupleType, tupleFieldSchema.schema, null));
   }
 
   private Type filterMap(GroupType mapType, FieldSchema mapFieldSchema) throws FrontendException {
@@ -491,13 +492,7 @@ public class PigSchemaConverter {
       throw new RuntimeException("this should be a Map Key/Value: " + mapType);
     }
     FieldSchema innerField = mapFieldSchema.schema.getField(0);
-    return new GroupType(
-        mapType.getRepetition(), mapType.getName(), mapType.getOriginalType(),
-        new GroupType(nested.getRepetition(), nested.getName(), nested.getOriginalType(),
-            nested.getType(0),
-            filter(nested.getType(1), innerField)
-            )
-        );
+    return mapType.withNewFields(nested.withNewFields(nested.getType(0), filter(nested.getType(1), innerField)));
   }
 
   private Type filterBag(GroupType bagType, FieldSchema bagFieldSchema) throws FrontendException {
@@ -511,9 +506,6 @@ public class PigSchemaConverter {
       // Bags always contain tuples => we skip the extra tuple that was inserted in that case.
       innerField = innerField.schema.getField(0);
     }
-    return new GroupType(
-        bagType.getRepetition(), bagType.getName(), bagType.getOriginalType(),
-        filter(nested, innerField)
-        );
+    return bagType.withNewFields(filter(nested, innerField));
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/3a082e8e/parquet-protobuf/src/main/java/parquet/proto/ProtoSchemaConverter.java
----------------------------------------------------------------------
diff --git a/parquet-protobuf/src/main/java/parquet/proto/ProtoSchemaConverter.java b/parquet-protobuf/src/main/java/parquet/proto/ProtoSchemaConverter.java
index 4b0530d..f349b60 100644
--- a/parquet-protobuf/src/main/java/parquet/proto/ProtoSchemaConverter.java
+++ b/parquet-protobuf/src/main/java/parquet/proto/ProtoSchemaConverter.java
@@ -15,22 +15,28 @@
  */
 package parquet.proto;
 
-import com.google.protobuf.Descriptors;
-import com.google.protobuf.Message;
-import com.twitter.elephantbird.util.Protobufs;
+import static parquet.schema.OriginalType.ENUM;
+import static parquet.schema.OriginalType.UTF8;
+import static parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY;
+import static parquet.schema.PrimitiveType.PrimitiveTypeName.BOOLEAN;
+import static parquet.schema.PrimitiveType.PrimitiveTypeName.DOUBLE;
+import static parquet.schema.PrimitiveType.PrimitiveTypeName.FLOAT;
+import static parquet.schema.PrimitiveType.PrimitiveTypeName.INT32;
+import static parquet.schema.PrimitiveType.PrimitiveTypeName.INT64;
+
+import java.util.List;
+
 import parquet.Log;
-import parquet.schema.ConversionPatterns;
-import parquet.schema.GroupType;
 import parquet.schema.MessageType;
-import parquet.schema.OriginalType;
-import parquet.schema.PrimitiveType;
 import parquet.schema.Type;
+import parquet.schema.Types;
+import parquet.schema.Types.Builder;
+import parquet.schema.Types.GroupBuilder;
 
-import java.util.ArrayList;
-import java.util.List;
-
-import static com.google.protobuf.Descriptors.FieldDescriptor.JavaType;
-import static parquet.schema.PrimitiveType.PrimitiveTypeName;
+import com.google.protobuf.Descriptors;
+import com.google.protobuf.Descriptors.FieldDescriptor.JavaType;
+import com.google.protobuf.Message;
+import com.twitter.elephantbird.util.Protobufs;
 
 /**
  * <p/>
@@ -45,72 +51,54 @@ public class ProtoSchemaConverter {
   public MessageType convert(Class<? extends Message> protobufClass) {
     LOG.debug("Converting protocol buffer class \"" + protobufClass + "\" to parquet schema.");
     Descriptors.Descriptor descriptor = Protobufs.getMessageDescriptor(protobufClass);
-
-    MessageType messageType = new MessageType(descriptor.getFullName(), convertFields(descriptor.getFields()));
-
+    MessageType messageType =
+        convertFields(Types.buildMessage(), descriptor.getFields())
+        .named(descriptor.getFullName());
     LOG.debug("Converter info:\n " + descriptor.toProto() + " was converted to \n" + messageType);
     return messageType;
   }
 
   /* Iterates over list of fields. **/
-  private List<Type> convertFields(List<Descriptors.FieldDescriptor> fieldDescriptors) {
-    List<Type> types = new ArrayList<Type>();
-
+  private <T> GroupBuilder<T> convertFields(GroupBuilder<T> groupBuilder, List<Descriptors.FieldDescriptor> fieldDescriptors) {
     for (Descriptors.FieldDescriptor fieldDescriptor : fieldDescriptors) {
-      String fieldName = fieldDescriptor.getName();
-      Type.Repetition repetition = getRepetition(fieldDescriptor);
-      Type type = convertScalarField(fieldName, fieldDescriptor, repetition);
-      types.add(type);
+      groupBuilder =
+          addField(fieldDescriptor, groupBuilder)
+          .id(fieldDescriptor.getNumber())
+          .named(fieldDescriptor.getName());
     }
-    return types;
+    return groupBuilder;
   }
 
   private Type.Repetition getRepetition(Descriptors.FieldDescriptor descriptor) {
-    Type.Repetition repetition;
     if (descriptor.isRequired()) {
-      repetition = Type.Repetition.REQUIRED;
+      return Type.Repetition.REQUIRED;
     } else if (descriptor.isRepeated()) {
-      repetition = Type.Repetition.REPEATED;
+      return Type.Repetition.REPEATED;
     } else {
-      repetition = Type.Repetition.OPTIONAL;
+      return Type.Repetition.OPTIONAL;
     }
-    return repetition;
   }
 
-  private Type convertScalarField(String fieldName, Descriptors.FieldDescriptor descriptor, Type.Repetition repetition) {
-
+  private <T> Builder<? extends Builder<?, GroupBuilder<T>>, GroupBuilder<T>> addField(Descriptors.FieldDescriptor descriptor, GroupBuilder<T> builder) {
+    Type.Repetition repetition = getRepetition(descriptor);
     JavaType javaType = descriptor.getJavaType();
-
     switch (javaType) {
-      case BOOLEAN : return primitive(fieldName, PrimitiveTypeName.BOOLEAN, repetition);
-      case INT : return primitive(fieldName, PrimitiveTypeName.INT32, repetition);
-      case LONG : return primitive(fieldName, PrimitiveTypeName.INT64, repetition);
-      case FLOAT : return primitive(fieldName, PrimitiveTypeName.FLOAT, repetition);
-      case DOUBLE: return primitive(fieldName, PrimitiveTypeName.DOUBLE, repetition);
-      case BYTE_STRING: return primitive(fieldName, PrimitiveTypeName.BINARY, repetition);
-      case STRING: return primitive(fieldName, PrimitiveTypeName.BINARY, repetition, OriginalType.UTF8);
-      case MESSAGE: {
-        Descriptors.Descriptor messageDescriptor = descriptor.getMessageType();
-        List<Type> fields = convertFields(messageDescriptor.getFields());
-        return new GroupType(repetition, fieldName, fields);
-      }
-      case ENUM: return primitive(fieldName, PrimitiveTypeName.BINARY, repetition, OriginalType.ENUM);
+    case BOOLEAN : return builder.primitive(BOOLEAN, repetition);
+    case INT : return builder.primitive(INT32, repetition);
+    case LONG : return builder.primitive(INT64, repetition);
+    case FLOAT : return builder.primitive(FLOAT, repetition);
+    case DOUBLE: return builder.primitive(DOUBLE, repetition);
+    case BYTE_STRING: return builder.primitive(BINARY, repetition);
+    case STRING: return builder.primitive(BINARY, repetition).as(UTF8);
+    case MESSAGE: {
+      GroupBuilder<GroupBuilder<T>> group = builder.group(repetition);
+      convertFields(group, descriptor.getMessageType().getFields());
+      return group;
+    }
+    case ENUM: return builder.primitive(BINARY, repetition).as(ENUM);
+    default:
+      throw new UnsupportedOperationException("Cannot convert Protocol Buffer: unknown type " + javaType);
     }
-
-    throw new UnsupportedOperationException("Cannot convert Protocol Buffer: unknown type " + javaType + " fieldName " + fieldName);
-  }
-
-  /**
-   * Makes primitive type with additional information. Used for String and Binary types
-   */
-  private Type primitive(String name, PrimitiveTypeName primitive,
-                         Type.Repetition repetition, OriginalType originalType) {
-    return new PrimitiveType(repetition, primitive, name, originalType);
-  }
-
-  private PrimitiveType primitive(String name, PrimitiveTypeName
-          primitive, Type.Repetition repetition) {
-    return new PrimitiveType(repetition, primitive, name, null);
   }
 
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/3a082e8e/parquet-protobuf/src/test/java/parquet/proto/ProtoSchemaConverterTest.java
----------------------------------------------------------------------
diff --git a/parquet-protobuf/src/test/java/parquet/proto/ProtoSchemaConverterTest.java b/parquet-protobuf/src/test/java/parquet/proto/ProtoSchemaConverterTest.java
index 0062e54..7163f51 100644
--- a/parquet-protobuf/src/test/java/parquet/proto/ProtoSchemaConverterTest.java
+++ b/parquet-protobuf/src/test/java/parquet/proto/ProtoSchemaConverterTest.java
@@ -45,28 +45,28 @@ public class ProtoSchemaConverterTest {
   public void testConvertAllDatatypes() throws Exception {
     String expectedSchema =
       "message TestProtobuf.SchemaConverterAllDatatypes {\n" +
-      "  optional double optionalDouble;\n" +
-      "  optional float optionalFloat;\n" +
-      "  optional int32 optionalInt32;\n" +
-      "  optional int64 optionalInt64;\n" +
-      "  optional int32 optionalUInt32;\n" +
-      "  optional int64 optionalUInt64;\n" +
-      "  optional int32 optionalSInt32;\n" +
-      "  optional int64 optionalSInt64;\n" +
-      "  optional int32 optionalFixed32;\n" +
-      "  optional int64 optionalFixed64;\n" +
-      "  optional int32 optionalSFixed32;\n" +
-      "  optional int64 optionalSFixed64;\n" +
-      "  optional boolean optionalBool;\n" +
-      "  optional binary optionalString (UTF8);\n" +
-      "  optional binary optionalBytes;\n" +
-      "  optional group optionalMessage {\n" +
-      "    optional int32 someId;\n" +
+      "  optional double optionalDouble = 1;\n" +
+      "  optional float optionalFloat = 2;\n" +
+      "  optional int32 optionalInt32 = 3;\n" +
+      "  optional int64 optionalInt64 = 4;\n" +
+      "  optional int32 optionalUInt32 = 5;\n" +
+      "  optional int64 optionalUInt64 = 6;\n" +
+      "  optional int32 optionalSInt32 = 7;\n" +
+      "  optional int64 optionalSInt64 = 8;\n" +
+      "  optional int32 optionalFixed32 = 9;\n" +
+      "  optional int64 optionalFixed64 = 10;\n" +
+      "  optional int32 optionalSFixed32 = 11;\n" +
+      "  optional int64 optionalSFixed64 = 12;\n" +
+      "  optional boolean optionalBool = 13;\n" +
+      "  optional binary optionalString (UTF8) = 14;\n" +
+      "  optional binary optionalBytes = 15;\n" +
+      "  optional group optionalMessage = 16 {\n" +
+      "    optional int32 someId = 3;\n" +
       "  }\n" +
-      "  optional group pbgroup {\n" +
-      "    optional int32 groupInt;\n" +
+      "  optional group pbgroup = 17 {\n" +
+      "    optional int32 groupInt = 2;\n" +
       "  }\n" +
-      " optional binary optionalEnum (ENUM);" +
+      " optional binary optionalEnum (ENUM)  = 18;" +
       "}";
 
     testConversion(TestProtobuf.SchemaConverterAllDatatypes.class, expectedSchema);
@@ -76,17 +76,17 @@ public class ProtoSchemaConverterTest {
   public void testConvertRepetition() throws Exception {
     String expectedSchema =
       "message TestProtobuf.SchemaConverterRepetition {\n" +
-        "  optional int32 optionalPrimitive;\n" +
-        "  required int32 requiredPrimitive;\n" +
-        "  repeated int32 repeatedPrimitive;\n" +
-        "  optional group optionalMessage {\n" +
-        "    optional int32 someId;\n" +
+        "  optional int32 optionalPrimitive = 1;\n" +
+        "  required int32 requiredPrimitive = 2;\n" +
+        "  repeated int32 repeatedPrimitive = 3;\n" +
+        "  optional group optionalMessage = 7 {\n" +
+        "    optional int32 someId = 3;\n" +
         "  }\n" +
-        "  required group requiredMessage {" +
-        "    optional int32 someId;\n" +
+        "  required group requiredMessage = 8 {" +
+        "    optional int32 someId= 3;\n" +
         "  }\n" +
-        "  repeated group repeatedMessage {" +
-        "    optional int32 someId;\n" +
+        "  repeated group repeatedMessage = 9 {" +
+        "    optional int32 someId = 3;\n" +
         "  }\n" +
         "}";
 

http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/3a082e8e/parquet-thrift/src/main/java/parquet/thrift/ThriftSchemaConvertVisitor.java
----------------------------------------------------------------------
diff --git a/parquet-thrift/src/main/java/parquet/thrift/ThriftSchemaConvertVisitor.java b/parquet-thrift/src/main/java/parquet/thrift/ThriftSchemaConvertVisitor.java
index 81a189b..f780dc5 100644
--- a/parquet-thrift/src/main/java/parquet/thrift/ThriftSchemaConvertVisitor.java
+++ b/parquet-thrift/src/main/java/parquet/thrift/ThriftSchemaConvertVisitor.java
@@ -16,21 +16,36 @@
 
 package parquet.thrift;
 
-import parquet.schema.*;
+import static parquet.schema.ConversionPatterns.listType;
+import static parquet.schema.ConversionPatterns.mapType;
+import static parquet.schema.OriginalType.ENUM;
+import static parquet.schema.OriginalType.UTF8;
+import static parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY;
+import static parquet.schema.PrimitiveType.PrimitiveTypeName.BOOLEAN;
+import static parquet.schema.PrimitiveType.PrimitiveTypeName.DOUBLE;
+import static parquet.schema.PrimitiveType.PrimitiveTypeName.INT32;
+import static parquet.schema.PrimitiveType.PrimitiveTypeName.INT64;
+import static parquet.schema.Type.Repetition.OPTIONAL;
+import static parquet.schema.Type.Repetition.REPEATED;
+import static parquet.schema.Type.Repetition.REQUIRED;
+import static parquet.schema.Types.primitive;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import parquet.schema.GroupType;
+import parquet.schema.MessageType;
+import parquet.schema.OriginalType;
+import parquet.schema.PrimitiveType;
+import parquet.schema.PrimitiveType.PrimitiveTypeName;
+import parquet.schema.Type;
+import parquet.schema.Types.PrimitiveBuilder;
 import parquet.thrift.projection.FieldProjectionFilter;
 import parquet.thrift.projection.FieldsPath;
 import parquet.thrift.projection.ThriftProjectionException;
 import parquet.thrift.struct.ThriftField;
 import parquet.thrift.struct.ThriftType;
 
-import java.util.ArrayList;
-import java.util.List;
-
-import static parquet.schema.OriginalType.ENUM;
-import static parquet.schema.OriginalType.UTF8;
-import static parquet.schema.PrimitiveType.PrimitiveTypeName.*;
-import static parquet.schema.Type.Repetition.*;
-
 /**
  * Visitor Class for converting a thrift definiton to parquet message type.
  * Projection can be done by providing a {@link FieldProjectionFilter}
@@ -85,7 +100,7 @@ public class ThriftSchemaConvertVisitor implements ThriftType.TypeVisitor {
     //restore Env
     currentName = mapName;
     currentRepetition = mapRepetition;
-    currentType = ConversionPatterns.mapType(currentRepetition, currentName,
+    currentType = mapType(currentRepetition, currentName,
             keyType,
             valueType);
   }
@@ -98,11 +113,11 @@ public class ThriftSchemaConvertVisitor implements ThriftType.TypeVisitor {
     currentName = currentName + "_tuple";
     currentRepetition = REPEATED;
     setElemField.getType().accept(this);
-    //after convertion, currentType is the nested type
+    //after conversion, currentType is the nested type
     if (currentType == null) {
       return;
     } else {
-      currentType = ConversionPatterns.listType(setRepetition, setName, currentType);
+      currentType = listType(setRepetition, setName, currentType);
     }
   }
 
@@ -114,11 +129,11 @@ public class ThriftSchemaConvertVisitor implements ThriftType.TypeVisitor {
     currentName = currentName + "_tuple";
     currentRepetition = REPEATED;
     setElemField.getType().accept(this);
-    //after convertion, currentType is the nested type
+    //after conversion, currentType is the nested type
     if (currentType == null) {
       return;
     } else {
-      currentType = ConversionPatterns.listType(listRepetition, listName, currentType);
+      currentType = listType(listRepetition, listName, currentType);
     }
 
   }
@@ -128,7 +143,7 @@ public class ThriftSchemaConvertVisitor implements ThriftType.TypeVisitor {
     if (currentType == null)
       return new MessageType(currentName, new ArrayList<Type>());
 
-    GroupType rootType = (GroupType) currentType;
+    GroupType rootType = currentType.asGroupType();
     return new MessageType(currentName, rootType.getFields());
   }
 
@@ -160,7 +175,8 @@ public class ThriftSchemaConvertVisitor implements ThriftType.TypeVisitor {
       currentFieldPath.push(field);
       field.getType().accept(this);
       if (currentType != null) {
-        types.add(currentType);//currentType is converted with the currentName(fieldName)
+        // currentType is converted with the currentName(fieldName)
+        types.add(currentType.withId(field.getFieldId()));
       }
       currentFieldPath.pop();
     }
@@ -169,66 +185,64 @@ public class ThriftSchemaConvertVisitor implements ThriftType.TypeVisitor {
 
   private boolean isCurrentlyMatchedFilter(){
      if(!fieldProjectionFilter.isMatched(currentFieldPath)){
-       currentType=null;
+       currentType = null;
        return false;
      }
     return true;
   }
 
+  private void primitiveType(PrimitiveTypeName type) {
+    primitiveType(type, null);
+  }
+
+  private void primitiveType(PrimitiveTypeName type, OriginalType orig) {
+    if (isCurrentlyMatchedFilter()) {
+      PrimitiveBuilder<PrimitiveType> b = primitive(type, currentRepetition);
+      if (orig != null) {
+        b = b.as(orig);
+      }
+      currentType = b.named(currentName);
+    }
+  }
+
   @Override
   public void visit(ThriftType.EnumType enumType) {
-    if (isCurrentlyMatchedFilter()){
-      currentType = new PrimitiveType(currentRepetition, BINARY, currentName, ENUM);
-    }
+    primitiveType(BINARY, ENUM);
   }
 
   @Override
   public void visit(ThriftType.BoolType boolType) {
-    if (isCurrentlyMatchedFilter()){
-      currentType = new PrimitiveType(currentRepetition, BOOLEAN, currentName);
-    }
+    primitiveType(BOOLEAN);
   }
 
   @Override
   public void visit(ThriftType.ByteType byteType) {
-    if (isCurrentlyMatchedFilter()){
-      currentType = new PrimitiveType(currentRepetition, INT32, currentName);
-    }
+    primitiveType(INT32);
   }
 
   @Override
   public void visit(ThriftType.DoubleType doubleType) {
-    if (isCurrentlyMatchedFilter()){
-      currentType = new PrimitiveType(currentRepetition, DOUBLE, currentName);
-    }
+    primitiveType(DOUBLE);
   }
 
   @Override
   public void visit(ThriftType.I16Type i16Type) {
-    if (isCurrentlyMatchedFilter()){
-      currentType = new PrimitiveType(currentRepetition, INT32, currentName);
-    }
+    primitiveType(INT32);
   }
 
   @Override
   public void visit(ThriftType.I32Type i32Type) {
-    if (isCurrentlyMatchedFilter()){
-      currentType = new PrimitiveType(currentRepetition, INT32, currentName);
-    }
+    primitiveType(INT32);
   }
 
   @Override
   public void visit(ThriftType.I64Type i64Type) {
-    if (isCurrentlyMatchedFilter()){
-      currentType = new PrimitiveType(currentRepetition, INT64, currentName);
-    }
+    primitiveType(INT64);
   }
 
   @Override
   public void visit(ThriftType.StringType stringType) {
-    if (isCurrentlyMatchedFilter()){
-      currentType = new PrimitiveType(currentRepetition, BINARY, currentName, UTF8);
-    }
+    primitiveType(BINARY, UTF8);
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/3a082e8e/parquet-thrift/src/main/java/parquet/thrift/projection/FieldsPath.java
----------------------------------------------------------------------
diff --git a/parquet-thrift/src/main/java/parquet/thrift/projection/FieldsPath.java b/parquet-thrift/src/main/java/parquet/thrift/projection/FieldsPath.java
index b88ad63..42e10f3 100644
--- a/parquet-thrift/src/main/java/parquet/thrift/projection/FieldsPath.java
+++ b/parquet-thrift/src/main/java/parquet/thrift/projection/FieldsPath.java
@@ -15,12 +15,11 @@
  */
 package parquet.thrift.projection;
 
-import com.twitter.elephantbird.thrift.TStructDescriptor;
+import java.util.ArrayList;
+
 import parquet.thrift.struct.ThriftField;
 import parquet.thrift.struct.ThriftType;
 
-import java.util.ArrayList;
-
 /**
  * represent field path for thrift field
  *

http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/3a082e8e/parquet-thrift/src/test/java/parquet/thrift/TestThriftSchemaConverter.java
----------------------------------------------------------------------
diff --git a/parquet-thrift/src/test/java/parquet/thrift/TestThriftSchemaConverter.java b/parquet-thrift/src/test/java/parquet/thrift/TestThriftSchemaConverter.java
index 1bb79cf..c0f80bf 100644
--- a/parquet-thrift/src/test/java/parquet/thrift/TestThriftSchemaConverter.java
+++ b/parquet-thrift/src/test/java/parquet/thrift/TestThriftSchemaConverter.java
@@ -15,19 +15,22 @@
  */
 package parquet.thrift;
 
-import com.twitter.data.proto.tutorial.thrift.AddressBook;
-import com.twitter.data.proto.tutorial.thrift.Person;
-import com.twitter.elephantbird.thrift.test.TestStructInMap;
+import static org.junit.Assert.assertEquals;
+import static parquet.schema.MessageTypeParser.parseMessageType;
+
+import org.apache.thrift.TBase;
 import org.junit.Test;
+
 import parquet.schema.MessageType;
 import parquet.schema.MessageTypeParser;
 import parquet.thrift.projection.FieldProjectionFilter;
 import parquet.thrift.projection.ThriftProjectionException;
 import parquet.thrift.struct.ThriftType;
 import parquet.thrift.struct.ThriftType.StructType;
-import parquet.thrift.test.TestPerson;
 
-import static org.junit.Assert.assertEquals;
+import com.twitter.data.proto.tutorial.thrift.AddressBook;
+import com.twitter.data.proto.tutorial.thrift.Person;
+import com.twitter.elephantbird.thrift.test.TestStructInMap;
 
 public class TestThriftSchemaConverter {
 
@@ -35,18 +38,18 @@ public class TestThriftSchemaConverter {
   public void testToMessageType() throws Exception {
     String expected =
             "message ParquetSchema {\n" +
-                    "  optional group persons (LIST) {\n" +
+                    "  optional group persons (LIST) = 1 {\n" +
                     "    repeated group persons_tuple {\n" +
-                    "      required group name {\n" +
-                    "        optional binary first_name (UTF8);\n" +
-                    "        optional binary last_name (UTF8);\n" +
+                    "      required group name = 1 {\n" +
+                    "        optional binary first_name (UTF8) = 1;\n" +
+                    "        optional binary last_name (UTF8) = 2;\n" +
                     "      }\n" +
-                    "      optional int32 id;\n" +
-                    "      optional binary email (UTF8);\n" +
-                    "      optional group phones (LIST) {\n" +
+                    "      optional int32 id = 2;\n" +
+                    "      optional binary email (UTF8) = 3;\n" +
+                    "      optional group phones (LIST) = 4 {\n" +
                     "        repeated group phones_tuple {\n" +
-                    "          optional binary number (UTF8);\n" +
-                    "          optional binary type (ENUM);\n" +
+                    "          optional binary number (UTF8) = 1;\n" +
+                    "          optional binary type (ENUM) = 2;\n" +
                     "        }\n" +
                     "      }\n" +
                     "    }\n" +
@@ -61,58 +64,58 @@ public class TestThriftSchemaConverter {
   public void testToProjectedThriftType() {
 
     shouldGetProjectedSchema("name/first_name", "message ParquetSchema {" +
-            "  required group name {" +
-            "    optional binary first_name (UTF8);" +
+            "  required group name = 1 {" +
+            "    optional binary first_name (UTF8) = 1;" +
             "  }}", Person.class);
 
     shouldGetProjectedSchema("name/first_name;name/last_name", "message ParquetSchema {" +
-            "  required group name {" +
-            "    optional binary first_name (UTF8);" +
-            "    optional binary last_name (UTF8);" +
+            "  required group name = 1 {" +
+            "    optional binary first_name (UTF8) = 1;" +
+            "    optional binary last_name (UTF8) = 2;" +
             "  }}", Person.class);
 
     shouldGetProjectedSchema("name/{first,last}_name;", "message ParquetSchema {" +
-            "  required group name {" +
-            "    optional binary first_name (UTF8);" +
-            "    optional binary last_name (UTF8);" +
+            "  required group name = 1 {" +
+            "    optional binary first_name (UTF8) = 1;" +
+            "    optional binary last_name (UTF8) = 2;" +
             "  }}", Person.class);
 
     shouldGetProjectedSchema("name/*", "message ParquetSchema {" +
-            "  required group name {" +
-            "    optional binary first_name (UTF8);" +
-            "    optional binary last_name (UTF8);" +
+            "  required group name = 1 {" +
+            "    optional binary first_name (UTF8) = 1;" +
+            "    optional binary last_name (UTF8) = 2;" +
             "  }" +
             "}", Person.class);
 
     shouldGetProjectedSchema("name/*", "message ParquetSchema {" +
-            "  required group name {" +
-            "    optional binary first_name (UTF8);" +
-            "    optional binary last_name (UTF8);" +
+            "  required group name = 1 {" +
+            "    optional binary first_name (UTF8) = 1;" +
+            "    optional binary last_name (UTF8) = 2;" +
             "  }" +
             "}", Person.class);
 
     shouldGetProjectedSchema("*/*_name", "message ParquetSchema {" +
-            "  required group name {" +
-            "    optional binary first_name (UTF8);" +
-            "    optional binary last_name (UTF8);" +
+            "  required group name = 1 {" +
+            "    optional binary first_name (UTF8) = 1;" +
+            "    optional binary last_name (UTF8) = 2;" +
             "  }" +
             "}", Person.class);
 
     shouldGetProjectedSchema("name/first_*", "message ParquetSchema {" +
-            "  required group name {" +
-            "    optional binary first_name (UTF8);" +
+            "  required group name = 1 {" +
+            "    optional binary first_name (UTF8) = 1;" +
             "  }" +
             "}", Person.class);
 
     shouldGetProjectedSchema("*/*", "message ParquetSchema {" +
-            "  required group name {" +
-            "  optional binary first_name (UTF8);" +
-            "  optional binary last_name (UTF8);" +
+            "  required group name = 1 {" +
+            "  optional binary first_name (UTF8) = 1;" +
+            "  optional binary last_name (UTF8) = 2;" +
             "} " +
-            "  optional group phones (LIST) {" +
+            "  optional group phones (LIST) = 4 {" +
             "    repeated group phones_tuple {" +
-            "      optional binary number (UTF8);" +
-            "      optional binary type (ENUM);" +
+            "      optional binary number (UTF8) = 1;" +
+            "      optional binary type (ENUM) = 2;" +
             "    }" +
             "}}", Person.class);
 
@@ -147,16 +150,16 @@ public class TestThriftSchemaConverter {
   public void testProjectMapThriftType() {
     //project nested map
     shouldGetProjectedSchema("name;names/key*;names/value/**", "message ParquetSchema {\n" +
-            "  optional binary name (UTF8);\n" +
-            "  optional group names (MAP) {\n" +
+            "  optional binary name (UTF8) = 1;\n" +
+            "  optional group names (MAP) = 2 {\n" +
             "    repeated group map (MAP_KEY_VALUE) {\n" +
             "      required binary key (UTF8);\n" +
             "      optional group value {\n" +
-            "        optional group name {\n" +
-            "          optional binary first_name (UTF8);\n" +
-            "          optional binary last_name (UTF8);\n" +
+            "        optional group name = 1 {\n" +
+            "          optional binary first_name (UTF8) = 1;\n" +
+            "          optional binary last_name (UTF8) = 2;\n" +
             "        }\n" +
-            "        optional group phones (MAP) {\n" +
+            "        optional group phones (MAP) = 2 {\n" +
             "          repeated group map (MAP_KEY_VALUE) {\n" +
             "            required binary key (ENUM);\n" +
             "            optional binary value (UTF8);\n" +
@@ -169,14 +172,14 @@ public class TestThriftSchemaConverter {
 
     //project only one level of nested map
     shouldGetProjectedSchema("name;names/key;names/value/name/*", "message ParquetSchema {\n" +
-            "  optional binary name (UTF8);\n" +
-            "  optional group names (MAP) {\n" +
+            "  optional binary name (UTF8) = 1;\n" +
+            "  optional group names (MAP) = 2 {\n" +
             "    repeated group map (MAP_KEY_VALUE) {\n" +
             "      required binary key (UTF8);\n" +
             "      optional group value {\n" +
-            "        optional group name {\n" +
-            "          optional binary first_name (UTF8);\n" +
-            "          optional binary last_name (UTF8);\n" +
+            "        optional group name = 1 {\n" +
+            "          optional binary first_name (UTF8) = 1;\n" +
+            "          optional binary last_name (UTF8) = 2;\n" +
             "        }\n" +
             "      }\n" +
             "    }\n" +
@@ -187,8 +190,8 @@ public class TestThriftSchemaConverter {
   @Test
   public void testProjectOnlyKeyInMap() {
     shouldGetProjectedSchema("name;names/key","message ParquetSchema {\n" +
-            "  optional binary name (UTF8);\n" +
-            "  optional group names (MAP) {\n" +
+            "  optional binary name (UTF8) = 1;\n" +
+            "  optional group names (MAP) = 2 {\n" +
             "    repeated group map (MAP_KEY_VALUE) {\n" +
             "      required binary key (UTF8);\n" +
             "    }\n" +
@@ -201,13 +204,13 @@ public class TestThriftSchemaConverter {
     System.out.println(getFilteredSchema("name;names/value/**", TestStructInMap.class));
   }
 
-  private void shouldGetProjectedSchema(String filterDesc, String expectedSchemaStr, Class thriftClass) {
+  private void shouldGetProjectedSchema(String filterDesc, String expectedSchemaStr, Class<? extends TBase<?,?>> thriftClass) {
     MessageType requestedSchema = getFilteredSchema(filterDesc, thriftClass);
-    MessageType expectedSchema = MessageTypeParser.parseMessageType(expectedSchemaStr);
+    MessageType expectedSchema = parseMessageType(expectedSchemaStr);
     assertEquals(expectedSchema, requestedSchema);
   }
 
-  private MessageType getFilteredSchema(String filterDesc, Class thriftClass) {
+  private MessageType getFilteredSchema(String filterDesc, Class<? extends TBase<?,?>> thriftClass) {
     FieldProjectionFilter fieldProjectionFilter = new FieldProjectionFilter(filterDesc);
     return new ThriftSchemaConverter(fieldProjectionFilter).convert(thriftClass);
   }


Mime
View raw message