arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jul...@apache.org
Subject arrow git commit: ARROW-257: Add a typeids Vector to Union type
Date Tue, 27 Sep 2016 19:05:03 GMT
Repository: arrow
Updated Branches:
  refs/heads/master bae33d622 -> 768c7d0be


ARROW-257: Add a typeids Vector to Union type

Author: Julien Le Dem <julien@dremio.com>

Closes #143 from julienledem/union and squashes the following commits:

cd1b711 [Julien Le Dem] ARROW-257: Add a typeids Vector to Union type


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/768c7d0b
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/768c7d0b
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/768c7d0b

Branch: refs/heads/master
Commit: 768c7d0be7dde9942235b5312c1c46ab035af86b
Parents: bae33d6
Author: Julien Le Dem <julien@dremio.com>
Authored: Tue Sep 27 11:54:35 2016 -0700
Committer: Julien Le Dem <julien@dremio.com>
Committed: Tue Sep 27 11:54:35 2016 -0700

----------------------------------------------------------------------
 format/Message.fbs                              |  5 +++
 .../vector/src/main/codegen/data/ArrowTypes.tdd |  2 +-
 .../src/main/codegen/templates/ArrowType.java   | 38 +++++++++++++++-----
 .../src/main/codegen/templates/UnionVector.java |  7 ++--
 .../org/apache/arrow/vector/types/Types.java    |  2 +-
 .../apache/arrow/vector/pojo/TestConvert.java   |  5 +--
 6 files changed, 45 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/768c7d0b/format/Message.fbs
----------------------------------------------------------------------
diff --git a/format/Message.fbs b/format/Message.fbs
index 07da862..288f5a1 100644
--- a/format/Message.fbs
+++ b/format/Message.fbs
@@ -23,8 +23,13 @@ table List {
 
 enum UnionMode:short { Sparse, Dense }
 
+/// A union is a complex type with children in Field
+/// By default ids in the type vector refer to the offsets in the children
+/// optionally typeIds provides an indirection between the child offset and the type id
+/// for each child typeIds[offset] is the id used in the type vector
 table Union {
   mode: UnionMode;
+  typeIds: [ int ]; // optional, describes typeid of each child.
 }
 
 table Int {

http://git-wip-us.apache.org/repos/asf/arrow/blob/768c7d0b/java/vector/src/main/codegen/data/ArrowTypes.tdd
----------------------------------------------------------------------
diff --git a/java/vector/src/main/codegen/data/ArrowTypes.tdd b/java/vector/src/main/codegen/data/ArrowTypes.tdd
index 9f81f0e..9624fec 100644
--- a/java/vector/src/main/codegen/data/ArrowTypes.tdd
+++ b/java/vector/src/main/codegen/data/ArrowTypes.tdd
@@ -30,7 +30,7 @@
     },
     {
       name: "Union",
-      fields: [{name: "mode", type: short}]
+      fields: [{name: "mode", type: short}, {name: "typeIds", type: "int[]"}]
     },
     {
       name: "Int",

http://git-wip-us.apache.org/repos/asf/arrow/blob/768c7d0b/java/vector/src/main/codegen/templates/ArrowType.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/codegen/templates/ArrowType.java b/java/vector/src/main/codegen/templates/ArrowType.java
index 29dee20..30f2c68 100644
--- a/java/vector/src/main/codegen/templates/ArrowType.java
+++ b/java/vector/src/main/codegen/templates/ArrowType.java
@@ -33,12 +33,23 @@ import org.apache.arrow.flatbuf.Type;
 
 import java.util.Objects;
 
+/**
+ * Arrow types
+ **/
 public abstract class ArrowType {
 
   public abstract byte getTypeType();
   public abstract int getType(FlatBufferBuilder builder);
   public abstract <T> T accept(ArrowTypeVisitor<T> visitor);
 
+  /**
+   * to visit the ArrowTypes
+   * <code>
+   *   type.accept(new ArrowTypeVisitor<Type>() {
+   *   ...
+   *   });
+   * </code>
+   */
   public static interface ArrowTypeVisitor<T> {
   <#list arrowTypes.types as type>
     T visit(${type.name} type);
@@ -55,9 +66,7 @@ public abstract class ArrowType {
     </#if>
 
     <#list fields as field>
-    <#assign fieldName = field.name>
-    <#assign fieldType = field.type>
-    ${fieldType} ${fieldName};
+    ${field.type} ${field.name};
     </#list>
 
     <#if type.fields?size != 0>
@@ -79,6 +88,9 @@ public abstract class ArrowType {
       <#if field.type == "String">
       int ${field.name} = builder.createString(this.${field.name});
       </#if>
+      <#if field.type == "int[]">
+      int ${field.name} = org.apache.arrow.flatbuf.${type.name}.create${field.name?cap_first}Vector(builder,
this.${field.name});
+      </#if>
       </#list>
       org.apache.arrow.flatbuf.${type.name}.start${type.name}(builder);
       <#list type.fields as field>
@@ -96,7 +108,7 @@ public abstract class ArrowType {
     public String toString() {
       return "${name}{"
       <#list fields as field>
-      + ", " + ${field.name}
+        + <#if field.type == "int[]">java.util.Arrays.toString(${field.name})<#else>${field.name}</#if><#if
field_has_next> + ", " </#if>
       </#list>
       + "}";
     }
@@ -115,8 +127,7 @@ public abstract class ArrowType {
       return true;
       <#else>
       ${type.name} that = (${type.name}) obj;
-      return
-      <#list type.fields as field>Objects.equals(this.${field.name}, that.${field.name})
<#if field_has_next>&&<#else>;</#if>
+      return <#list type.fields as field>Objects.deepEquals(this.${field.name}, that.${field.name})
<#if field_has_next>&&<#else>;</#if>
       </#list>
       </#if>
     }
@@ -134,9 +145,20 @@ public abstract class ArrowType {
     <#assign name = type.name>
     <#assign nameLower = type.name?lower_case>
     <#assign fields = type.fields>
-    case Type.${type.name}:
+    case Type.${type.name}: {
       org.apache.arrow.flatbuf.${type.name} ${nameLower}Type = (org.apache.arrow.flatbuf.${type.name})
field.type(new org.apache.arrow.flatbuf.${type.name}());
-      return new ${type.name}(<#list type.fields as field>${nameLower}Type.${field.name}()<#if
field_has_next>, </#if></#list>);
+      <#list type.fields as field>
+      <#if field.type == "int[]">
+      ${field.type} ${field.name} = new int[${nameLower}Type.${field.name}Length()];
+      for (int i = 0; i< ${field.name}.length; ++i) {
+        ${field.name}[i] = ${nameLower}Type.${field.name}(i);
+      }
+      <#else>
+      ${field.type} ${field.name} = ${nameLower}Type.${field.name}();
+      </#if>
+      </#list>
+      return new ${type.name}(<#list type.fields as field>${field.name}<#if field_has_next>,
</#if></#list>);
+    }
     </#list>
     default:
       throw new UnsupportedOperationException("Unsupported type: " + field.typeType());

http://git-wip-us.apache.org/repos/asf/arrow/blob/768c7d0b/java/vector/src/main/codegen/templates/UnionVector.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/codegen/templates/UnionVector.java b/java/vector/src/main/codegen/templates/UnionVector.java
index 3014bbb..b14314d 100644
--- a/java/vector/src/main/codegen/templates/UnionVector.java
+++ b/java/vector/src/main/codegen/templates/UnionVector.java
@@ -232,10 +232,13 @@ public class UnionVector implements FieldVector {
   @Override
   public Field getField() {
     List<org.apache.arrow.vector.types.pojo.Field> childFields = new ArrayList<>();
-    for (ValueVector v : internalMap.getChildren()) {
+    List<FieldVector> children = internalMap.getChildren();
+    int[] typeIds = new int[children.size()];
+    for (ValueVector v : children) {
+      typeIds[childFields.size()] = v.getMinorType().ordinal();
       childFields.add(v.getField());
     }
-    return new Field(name, true, new ArrowType.Union(Sparse), childFields);
+    return new Field(name, true, new ArrowType.Union(Sparse, typeIds), childFields);
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/arrow/blob/768c7d0b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java
index 181d835..6e63ae2 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java
@@ -472,7 +472,7 @@ public class Types {
         return new UnionListWriter((ListVector) vector);
       }
     },
-    UNION(new Union(UnionMode.Sparse)) {
+    UNION(new Union(UnionMode.Sparse, null)) {
       @Override
       public Field getField() {
         throw new UnsupportedOperationException("Cannot get simple field for Union type");

http://git-wip-us.apache.org/repos/asf/arrow/blob/768c7d0b/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java
----------------------------------------------------------------------
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java b/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java
index 448117d..ed740cd 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java
@@ -22,11 +22,12 @@ import static org.apache.arrow.flatbuf.Precision.SINGLE;
 import static org.junit.Assert.assertEquals;
 
 import org.apache.arrow.flatbuf.UnionMode;
+import org.apache.arrow.vector.types.Types.MinorType;
 import org.apache.arrow.vector.types.pojo.ArrowType.FloatingPoint;
 import org.apache.arrow.vector.types.pojo.ArrowType.Int;
 import org.apache.arrow.vector.types.pojo.ArrowType.List;
-import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp;
 import org.apache.arrow.vector.types.pojo.ArrowType.Struct_;
+import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp;
 import org.apache.arrow.vector.types.pojo.ArrowType.Union;
 import org.apache.arrow.vector.types.pojo.ArrowType.Utf8;
 import org.apache.arrow.vector.types.pojo.Field;
@@ -78,7 +79,7 @@ public class TestConvert {
     childrenBuilder.add(new Field("child4", true, new List(), ImmutableList.<Field>of(
         new Field("child4.1", true, Utf8.INSTANCE, null)
         )));
-    childrenBuilder.add(new Field("child5", true, new Union(UnionMode.Sparse), ImmutableList.<Field>of(
+    childrenBuilder.add(new Field("child5", true, new Union(UnionMode.Sparse, new int[] {
MinorType.TIMESTAMP.ordinal(), MinorType.FLOAT8.ordinal() } ), ImmutableList.<Field>of(
         new Field("child5.1", true, new Timestamp("UTC"), null),
         new Field("child5.2", true, new FloatingPoint(DOUBLE), ImmutableList.<Field>of())
         )));


Mime
View raw message