arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject arrow git commit: ARROW-271: Update Field structure to be more explicit
Date Sun, 28 Aug 2016 17:45:43 GMT
Repository: arrow
Updated Branches:
  refs/heads/master 907cc5a12 -> e081a4c27


ARROW-271: Update Field structure to be more explicit

This is a proposal. I have not updated the code depending on this yet.

Author: Julien Le Dem <julien@dremio.com>

Closes #124 from julienledem/record_batch and squashes the following commits:

8e42d74 [Julien Le Dem] ARROW-271: Update Field structure to be more explicit add bit_width
to vector layout


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/e081a4c2
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/e081a4c2
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/e081a4c2

Branch: refs/heads/master
Commit: e081a4c27a5a592251f9f325a05479d4120e30e6
Parents: 907cc5a
Author: Julien Le Dem <julien@dremio.com>
Authored: Sun Aug 28 13:45:34 2016 -0400
Committer: Wes McKinney <wesm@apache.org>
Committed: Sun Aug 28 13:45:34 2016 -0400

----------------------------------------------------------------------
 format/Message.fbs                              | 26 +++++++---
 .../codegen/templates/NullableValueVectors.java |  6 ++-
 .../arrow/vector/schema/ArrowVectorType.java    |  2 +-
 .../apache/arrow/vector/schema/TypeLayout.java  | 22 +++++++-
 .../arrow/vector/schema/VectorLayout.java       | 54 ++++++++++++++++----
 .../apache/arrow/vector/types/pojo/Field.java   | 43 ++++++++--------
 .../apache/arrow/vector/pojo/TestConvert.java   |  2 +
 7 files changed, 115 insertions(+), 40 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/e081a4c2/format/Message.fbs
----------------------------------------------------------------------
diff --git a/format/Message.fbs b/format/Message.fbs
index 71428b5..9c95724 100644
--- a/format/Message.fbs
+++ b/format/Message.fbs
@@ -92,18 +92,32 @@ union Type {
   JSONScalar
 }
 
+/// ----------------------------------------------------------------------
+/// The possible types of a vector
+
 enum VectorType: short {
-  /// used in List type Dense Union and variable length primitive types (String, Binary)
+  /// used in List type, Dense Union and variable length primitive types (String, Binary)
   OFFSET,
-  /// fixed length primitive values
-  VALUES,
-  /// Bit vector indicated if each value is null
+  /// actual data, either wixed width primitive types in slots or variable width delimited
by an OFFSET vector
+  DATA,
+  /// Bit vector indicating if each value is null
   VALIDITY,
   /// Type vector used in Union type
   TYPE
 }
 
 /// ----------------------------------------------------------------------
+/// represents the physical layout of a buffer
+/// buffers have fixed width slots of a given type
+
+table VectorLayout {
+  /// the width of a slot in the buffer (typically 1, 8, 16, 32 or 64)
+  bit_width: short;
+  /// the purpose of the vector
+  type: VectorType;
+}
+
+/// ----------------------------------------------------------------------
 /// A field represents a named column in a record / row batch or child of a
 /// nested type.
 ///
@@ -121,10 +135,10 @@ table Field {
   dictionary: long;
   // children apply only to Nested data types like Struct, List and Union
   children: [Field];
-  /// the buffers produced for this type (as derived from the Type)
+  /// layout of buffers produced for this type (as derived from the Type)
   /// does not include children
   /// each recordbatch will return instances of those Buffers.
-  buffers: [ VectorType ];
+  layout: [ VectorLayout ];
 }
 
 /// ----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/arrow/blob/e081a4c2/java/vector/src/main/codegen/templates/NullableValueVectors.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/codegen/templates/NullableValueVectors.java b/java/vector/src/main/codegen/templates/NullableValueVectors.java
index 6b1aa04..bb2c001 100644
--- a/java/vector/src/main/codegen/templates/NullableValueVectors.java
+++ b/java/vector/src/main/codegen/templates/NullableValueVectors.java
@@ -34,6 +34,8 @@ import java.util.Collections;
 
 <#include "/@includes/vv_imports.ftl" />
 
+import org.apache.arrow.flatbuf.Precision;
+
 /**
  * Nullable${minor.class} implements a vector of values which could be null.  Elements in
the vector
  * are first checked against a fixed length vector of boolean values.  Then the element is
retrieved
@@ -97,9 +99,9 @@ public final class ${className} extends BaseDataValueVector implements <#if
type
   <#elseif minor.class == "Time">
     field = new Field(name, true, new org.apache.arrow.vector.types.pojo.ArrowType.Time(),
null);
   <#elseif minor.class == "Float4">
-    field = new Field(name, true, new FloatingPoint(org.apache.arrow.flatbuf.Precision.SINGLE),
null);
+    field = new Field(name, true, new FloatingPoint(Precision.SINGLE), null);
   <#elseif minor.class == "Float8">
-    field = new Field(name, true, new FloatingPoint(org.apache.arrow.flatbuf.Precision.DOUBLE),
null);
+    field = new Field(name, true, new FloatingPoint(Precision.DOUBLE), null);
   <#elseif minor.class == "TimeStamp">
     field = new Field(name, true, new org.apache.arrow.vector.types.pojo.ArrowType.Timestamp(""),
null);
   <#elseif minor.class == "IntervalDay">

http://git-wip-us.apache.org/repos/asf/arrow/blob/e081a4c2/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowVectorType.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowVectorType.java
b/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowVectorType.java
index e3d3e34..9b7fa45 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowVectorType.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowVectorType.java
@@ -21,7 +21,7 @@ import org.apache.arrow.flatbuf.VectorType;
 
 public class ArrowVectorType {
 
-  public static final ArrowVectorType VALUES = new ArrowVectorType(VectorType.VALUES);
+  public static final ArrowVectorType DATA = new ArrowVectorType(VectorType.DATA);
   public static final ArrowVectorType OFFSET = new ArrowVectorType(VectorType.OFFSET);
   public static final ArrowVectorType VALIDITY = new ArrowVectorType(VectorType.VALIDITY);
   public static final ArrowVectorType TYPE = new ArrowVectorType(VectorType.TYPE);

http://git-wip-us.apache.org/repos/asf/arrow/blob/e081a4c2/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java
index 1275e0e..15cd498 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java
@@ -49,6 +49,8 @@ import org.apache.arrow.vector.types.pojo.ArrowType.Tuple;
 import org.apache.arrow.vector.types.pojo.ArrowType.Union;
 import org.apache.arrow.vector.types.pojo.ArrowType.Utf8;
 
+import com.google.common.base.Preconditions;
+
 /**
  * The layout of vectors for a given type
  * It defines its own vectors followed by the vectors for the children
@@ -182,7 +184,7 @@ public class TypeLayout {
 
   public TypeLayout(List<VectorLayout> vectors) {
     super();
-    this.vectors = vectors;
+    this.vectors = Preconditions.checkNotNull(vectors);
   }
 
   public TypeLayout(VectorLayout... vectors) {
@@ -205,4 +207,22 @@ public class TypeLayout {
   public String toString() {
     return "TypeLayout{" + vectors + "}";
   }
+
+  @Override
+  public int hashCode() {
+    return vectors.hashCode();
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (this == obj)
+      return true;
+    if (obj == null)
+      return false;
+    if (getClass() != obj.getClass())
+      return false;
+    TypeLayout other = (TypeLayout) obj;
+    return vectors.equals(other.vectors);
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/arrow/blob/e081a4c2/java/vector/src/main/java/org/apache/arrow/vector/schema/VectorLayout.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/schema/VectorLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/schema/VectorLayout.java
index 421ebcb..532e9d2 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/schema/VectorLayout.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/schema/VectorLayout.java
@@ -17,21 +17,24 @@
  */
 package org.apache.arrow.vector.schema;
 
+import static org.apache.arrow.vector.schema.ArrowVectorType.DATA;
 import static org.apache.arrow.vector.schema.ArrowVectorType.OFFSET;
 import static org.apache.arrow.vector.schema.ArrowVectorType.TYPE;
 import static org.apache.arrow.vector.schema.ArrowVectorType.VALIDITY;
-import static org.apache.arrow.vector.schema.ArrowVectorType.VALUES;
 
-public class VectorLayout {
+import com.google.common.base.Preconditions;
+import com.google.flatbuffers.FlatBufferBuilder;
+
+public class VectorLayout implements FBSerializable {
 
   private static final VectorLayout VALIDITY_VECTOR = new VectorLayout(VALIDITY, 1);
   private static final VectorLayout OFFSET_VECTOR = new VectorLayout(OFFSET, 32);
   private static final VectorLayout TYPE_VECTOR = new VectorLayout(TYPE, 32);
-  private static final VectorLayout BOOLEAN_VECTOR = new VectorLayout(VALUES, 1);
-  private static final VectorLayout VALUES_64 = new VectorLayout(VALUES, 64);
-  private static final VectorLayout VALUES_32 = new VectorLayout(VALUES, 32);
-  private static final VectorLayout VALUES_16 = new VectorLayout(VALUES, 16);
-  private static final VectorLayout VALUES_8 = new VectorLayout(VALUES, 8);
+  private static final VectorLayout BOOLEAN_VECTOR = new VectorLayout(DATA, 1);
+  private static final VectorLayout VALUES_64 = new VectorLayout(DATA, 64);
+  private static final VectorLayout VALUES_32 = new VectorLayout(DATA, 32);
+  private static final VectorLayout VALUES_16 = new VectorLayout(DATA, 16);
+  private static final VectorLayout VALUES_8 = new VectorLayout(DATA, 8);
 
   public static VectorLayout typeVector() {
     return TYPE_VECTOR;
@@ -68,14 +71,21 @@ public class VectorLayout {
     return dataVector(8);
   }
 
-  private final int typeBitWidth;
+  private final short typeBitWidth;
 
   private final ArrowVectorType type;
 
   private VectorLayout(ArrowVectorType type, int typeBitWidth) {
     super();
-    this.type = type;
-    this.typeBitWidth = typeBitWidth;
+    this.type = Preconditions.checkNotNull(type);
+    this.typeBitWidth = (short)typeBitWidth;
+    if (typeBitWidth <= 0) {
+      throw new IllegalArgumentException("bitWidth invalid: " + typeBitWidth);
+    }
+  }
+
+  public VectorLayout(org.apache.arrow.flatbuf.VectorLayout layout) {
+    this(new ArrowVectorType(layout.type()), layout.bitWidth());
   }
 
   public int getTypeBitWidth() {
@@ -90,4 +100,28 @@ public class VectorLayout {
   public String toString() {
     return String.format("{width=%s,type=%s}", typeBitWidth, type);
   }
+
+  @Override
+  public int hashCode() {
+    return 31 * (31 + type.hashCode()) + typeBitWidth;
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (this == obj)
+      return true;
+    if (obj == null)
+      return false;
+    if (getClass() != obj.getClass())
+      return false;
+    VectorLayout other = (VectorLayout) obj;
+    return type.equals(other.type) && (typeBitWidth == other.typeBitWidth);
+  }
+
+  @Override
+  public int writeTo(FlatBufferBuilder builder) {;
+    return org.apache.arrow.flatbuf.VectorLayout.createVectorLayout(builder, typeBitWidth,
type.getType());
+  }
+
+
 }

http://git-wip-us.apache.org/repos/asf/arrow/blob/e081a4c2/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java
index 36712b9..cfa1ed4 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java
@@ -20,12 +20,11 @@ package org.apache.arrow.vector.types.pojo;
 
 import static org.apache.arrow.vector.types.pojo.ArrowType.getTypeForField;
 
-import java.util.ArrayList;
 import java.util.List;
 import java.util.Objects;
 
-import org.apache.arrow.vector.schema.ArrowVectorType;
 import org.apache.arrow.vector.schema.TypeLayout;
+import org.apache.arrow.vector.schema.VectorLayout;
 
 import com.google.common.collect.ImmutableList;
 import com.google.flatbuffers.FlatBufferBuilder;
@@ -37,7 +36,7 @@ public class Field {
   private final List<Field> children;
   private final TypeLayout typeLayout;
 
-  public Field(String name, boolean nullable, ArrowType type, List<Field> children)
{
+  private Field(String name, boolean nullable, ArrowType type, List<Field> children,
TypeLayout typeLayout) {
     this.name = name;
     this.nullable = nullable;
     this.type = type;
@@ -46,34 +45,37 @@ public class Field {
     } else {
       this.children = children;
     }
-    this.typeLayout = TypeLayout.getTypeLayout(type);
+    this.typeLayout = typeLayout;
+  }
+
+  public Field(String name, boolean nullable, ArrowType type, List<Field> children)
{
+    this(name, nullable, type, children, TypeLayout.getTypeLayout(type));
   }
 
   public static Field convertField(org.apache.arrow.flatbuf.Field field) {
     String name = field.name();
     boolean nullable = field.nullable();
     ArrowType type = getTypeForField(field);
-    List<ArrowVectorType> buffers = new ArrayList<>();
-    for (int i = 0; i < field.buffersLength(); ++i) {
-      buffers.add(new ArrowVectorType(field.buffers(i)));
+    ImmutableList.Builder<org.apache.arrow.vector.schema.VectorLayout> layout = ImmutableList.builder();
+    for (int i = 0; i < field.layoutLength(); ++i) {
+      layout.add(new org.apache.arrow.vector.schema.VectorLayout(field.layout(i)));
     }
     ImmutableList.Builder<Field> childrenBuilder = ImmutableList.builder();
     for (int i = 0; i < field.childrenLength(); i++) {
       childrenBuilder.add(convertField(field.children(i)));
     }
     List<Field> children = childrenBuilder.build();
-    Field result = new Field(name, nullable, type, children);
-    TypeLayout typeLayout = result.getTypeLayout();
-    if (typeLayout.getVectors().size() != field.buffersLength()) {
-      List<ArrowVectorType> types = new ArrayList<>();
-      for (int i = 0; i < field.buffersLength(); i++) {
-        types.add(new ArrowVectorType(field.buffers(i)));
-      }
-      throw new IllegalArgumentException("Deserialized field does not match expected vectors.
expected: " + typeLayout.getVectorTypes() + " got " + types);
-    }
+    Field result = new Field(name, nullable, type, children, new TypeLayout(layout.build()));
     return result;
   }
 
+  public void validate() {
+    TypeLayout expectedLayout = TypeLayout.getTypeLayout(type);
+    if (!expectedLayout.equals(typeLayout)) {
+      throw new IllegalArgumentException("Deserialized field does not match expected vectors.
expected: " + expectedLayout + " got " + typeLayout);
+    }
+  }
+
   public int getField(FlatBufferBuilder builder) {
     int nameOffset = builder.createString(name);
     int typeOffset = type.getType(builder);
@@ -82,18 +84,19 @@ public class Field {
       childrenData[i] = children.get(i).getField(builder);
     }
     int childrenOffset = org.apache.arrow.flatbuf.Field.createChildrenVector(builder, childrenData);
-    short[] buffersData = new short[typeLayout.getVectors().size()];
+    int[] buffersData = new int[typeLayout.getVectors().size()];
     for (int i = 0; i < buffersData.length; i++) {
-      buffersData[i] = typeLayout.getVectors().get(i).getType().getType();
+      VectorLayout vectorLayout = typeLayout.getVectors().get(i);
+      buffersData[i] = vectorLayout.writeTo(builder);
     }
-    int buffersOffset =  org.apache.arrow.flatbuf.Field.createBuffersVector(builder, buffersData
);
+    int layoutOffset =  org.apache.arrow.flatbuf.Field.createLayoutVector(builder, buffersData);
     org.apache.arrow.flatbuf.Field.startField(builder);
     org.apache.arrow.flatbuf.Field.addName(builder, nameOffset);
     org.apache.arrow.flatbuf.Field.addNullable(builder, nullable);
     org.apache.arrow.flatbuf.Field.addTypeType(builder, type.getTypeType());
     org.apache.arrow.flatbuf.Field.addType(builder, typeOffset);
     org.apache.arrow.flatbuf.Field.addChildren(builder, childrenOffset);
-    org.apache.arrow.flatbuf.Field.addBuffers(builder, buffersOffset);
+    org.apache.arrow.flatbuf.Field.addLayout(builder, layoutOffset);
     return org.apache.arrow.flatbuf.Field.endField(builder);
   }
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/e081a4c2/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java
----------------------------------------------------------------------
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java b/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java
index 61327f1..e557cc8 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java
@@ -22,6 +22,8 @@ import static org.apache.arrow.flatbuf.Precision.SINGLE;
 import static org.junit.Assert.assertEquals;
 
 import org.apache.arrow.flatbuf.UnionMode;
+import static org.junit.Assert.assertEquals;
+
 import org.apache.arrow.vector.types.pojo.ArrowType.FloatingPoint;
 import org.apache.arrow.vector.types.pojo.ArrowType.Int;
 import org.apache.arrow.vector.types.pojo.ArrowType.List;


Mime
View raw message