Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id BF9F8200B6C for ; Sun, 28 Aug 2016 19:45:45 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id B31F4160AB4; Sun, 28 Aug 2016 17:45:45 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 8798A160AA6 for ; Sun, 28 Aug 2016 19:45:44 +0200 (CEST) Received: (qmail 11097 invoked by uid 500); 28 Aug 2016 17:45:43 -0000 Mailing-List: contact commits-help@arrow.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@arrow.apache.org Delivered-To: mailing list commits@arrow.apache.org Received: (qmail 11088 invoked by uid 99); 28 Aug 2016 17:45:43 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 28 Aug 2016 17:45:43 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 29939DFF56; Sun, 28 Aug 2016 17:45:43 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: wesm@apache.org To: commits@arrow.apache.org Message-Id: X-Mailer: ASF-Git Admin Mailer Subject: arrow git commit: ARROW-271: Update Field structure to be more explicit Date: Sun, 28 Aug 2016 17:45:43 +0000 (UTC) archived-at: Sun, 28 Aug 2016 17:45:45 -0000 Repository: arrow Updated Branches: refs/heads/master 907cc5a12 -> e081a4c27 ARROW-271: Update Field structure to be more explicit This is a proposal. I have not updated the code depending on this yet. Author: Julien Le Dem Closes #124 from julienledem/record_batch and squashes the following commits: 8e42d74 [Julien Le Dem] ARROW-271: Update Field structure to be more explicit add bit_width to vector layout Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/e081a4c2 Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/e081a4c2 Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/e081a4c2 Branch: refs/heads/master Commit: e081a4c27a5a592251f9f325a05479d4120e30e6 Parents: 907cc5a Author: Julien Le Dem Authored: Sun Aug 28 13:45:34 2016 -0400 Committer: Wes McKinney Committed: Sun Aug 28 13:45:34 2016 -0400 ---------------------------------------------------------------------- format/Message.fbs | 26 +++++++--- .../codegen/templates/NullableValueVectors.java | 6 ++- .../arrow/vector/schema/ArrowVectorType.java | 2 +- .../apache/arrow/vector/schema/TypeLayout.java | 22 +++++++- .../arrow/vector/schema/VectorLayout.java | 54 ++++++++++++++++---- .../apache/arrow/vector/types/pojo/Field.java | 43 ++++++++-------- .../apache/arrow/vector/pojo/TestConvert.java | 2 + 7 files changed, 115 insertions(+), 40 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/e081a4c2/format/Message.fbs ---------------------------------------------------------------------- diff --git a/format/Message.fbs b/format/Message.fbs index 71428b5..9c95724 100644 --- a/format/Message.fbs +++ b/format/Message.fbs @@ -92,18 +92,32 @@ union Type { JSONScalar } +/// ---------------------------------------------------------------------- +/// The possible types of a vector + enum VectorType: short { - /// used in List type Dense Union and variable length primitive types (String, Binary) + /// used in List type, Dense Union and variable length primitive types (String, Binary) OFFSET, - /// fixed length primitive values - VALUES, - /// Bit vector indicated if each value is null + /// actual data, either wixed width primitive types in slots or variable width delimited by an OFFSET vector + DATA, + /// Bit vector indicating if each value is null VALIDITY, /// Type vector used in Union type TYPE } /// ---------------------------------------------------------------------- +/// represents the physical layout of a buffer +/// buffers have fixed width slots of a given type + +table VectorLayout { + /// the width of a slot in the buffer (typically 1, 8, 16, 32 or 64) + bit_width: short; + /// the purpose of the vector + type: VectorType; +} + +/// ---------------------------------------------------------------------- /// A field represents a named column in a record / row batch or child of a /// nested type. /// @@ -121,10 +135,10 @@ table Field { dictionary: long; // children apply only to Nested data types like Struct, List and Union children: [Field]; - /// the buffers produced for this type (as derived from the Type) + /// layout of buffers produced for this type (as derived from the Type) /// does not include children /// each recordbatch will return instances of those Buffers. - buffers: [ VectorType ]; + layout: [ VectorLayout ]; } /// ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/e081a4c2/java/vector/src/main/codegen/templates/NullableValueVectors.java ---------------------------------------------------------------------- diff --git a/java/vector/src/main/codegen/templates/NullableValueVectors.java b/java/vector/src/main/codegen/templates/NullableValueVectors.java index 6b1aa04..bb2c001 100644 --- a/java/vector/src/main/codegen/templates/NullableValueVectors.java +++ b/java/vector/src/main/codegen/templates/NullableValueVectors.java @@ -34,6 +34,8 @@ import java.util.Collections; <#include "/@includes/vv_imports.ftl" /> +import org.apache.arrow.flatbuf.Precision; + /** * Nullable${minor.class} implements a vector of values which could be null. Elements in the vector * are first checked against a fixed length vector of boolean values. Then the element is retrieved @@ -97,9 +99,9 @@ public final class ${className} extends BaseDataValueVector implements <#if type <#elseif minor.class == "Time"> field = new Field(name, true, new org.apache.arrow.vector.types.pojo.ArrowType.Time(), null); <#elseif minor.class == "Float4"> - field = new Field(name, true, new FloatingPoint(org.apache.arrow.flatbuf.Precision.SINGLE), null); + field = new Field(name, true, new FloatingPoint(Precision.SINGLE), null); <#elseif minor.class == "Float8"> - field = new Field(name, true, new FloatingPoint(org.apache.arrow.flatbuf.Precision.DOUBLE), null); + field = new Field(name, true, new FloatingPoint(Precision.DOUBLE), null); <#elseif minor.class == "TimeStamp"> field = new Field(name, true, new org.apache.arrow.vector.types.pojo.ArrowType.Timestamp(""), null); <#elseif minor.class == "IntervalDay"> http://git-wip-us.apache.org/repos/asf/arrow/blob/e081a4c2/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowVectorType.java ---------------------------------------------------------------------- diff --git a/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowVectorType.java b/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowVectorType.java index e3d3e34..9b7fa45 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowVectorType.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowVectorType.java @@ -21,7 +21,7 @@ import org.apache.arrow.flatbuf.VectorType; public class ArrowVectorType { - public static final ArrowVectorType VALUES = new ArrowVectorType(VectorType.VALUES); + public static final ArrowVectorType DATA = new ArrowVectorType(VectorType.DATA); public static final ArrowVectorType OFFSET = new ArrowVectorType(VectorType.OFFSET); public static final ArrowVectorType VALIDITY = new ArrowVectorType(VectorType.VALIDITY); public static final ArrowVectorType TYPE = new ArrowVectorType(VectorType.TYPE); http://git-wip-us.apache.org/repos/asf/arrow/blob/e081a4c2/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java ---------------------------------------------------------------------- diff --git a/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java index 1275e0e..15cd498 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java @@ -49,6 +49,8 @@ import org.apache.arrow.vector.types.pojo.ArrowType.Tuple; import org.apache.arrow.vector.types.pojo.ArrowType.Union; import org.apache.arrow.vector.types.pojo.ArrowType.Utf8; +import com.google.common.base.Preconditions; + /** * The layout of vectors for a given type * It defines its own vectors followed by the vectors for the children @@ -182,7 +184,7 @@ public class TypeLayout { public TypeLayout(List vectors) { super(); - this.vectors = vectors; + this.vectors = Preconditions.checkNotNull(vectors); } public TypeLayout(VectorLayout... vectors) { @@ -205,4 +207,22 @@ public class TypeLayout { public String toString() { return "TypeLayout{" + vectors + "}"; } + + @Override + public int hashCode() { + return vectors.hashCode(); + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + TypeLayout other = (TypeLayout) obj; + return vectors.equals(other.vectors); + } + } http://git-wip-us.apache.org/repos/asf/arrow/blob/e081a4c2/java/vector/src/main/java/org/apache/arrow/vector/schema/VectorLayout.java ---------------------------------------------------------------------- diff --git a/java/vector/src/main/java/org/apache/arrow/vector/schema/VectorLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/schema/VectorLayout.java index 421ebcb..532e9d2 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/schema/VectorLayout.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/schema/VectorLayout.java @@ -17,21 +17,24 @@ */ package org.apache.arrow.vector.schema; +import static org.apache.arrow.vector.schema.ArrowVectorType.DATA; import static org.apache.arrow.vector.schema.ArrowVectorType.OFFSET; import static org.apache.arrow.vector.schema.ArrowVectorType.TYPE; import static org.apache.arrow.vector.schema.ArrowVectorType.VALIDITY; -import static org.apache.arrow.vector.schema.ArrowVectorType.VALUES; -public class VectorLayout { +import com.google.common.base.Preconditions; +import com.google.flatbuffers.FlatBufferBuilder; + +public class VectorLayout implements FBSerializable { private static final VectorLayout VALIDITY_VECTOR = new VectorLayout(VALIDITY, 1); private static final VectorLayout OFFSET_VECTOR = new VectorLayout(OFFSET, 32); private static final VectorLayout TYPE_VECTOR = new VectorLayout(TYPE, 32); - private static final VectorLayout BOOLEAN_VECTOR = new VectorLayout(VALUES, 1); - private static final VectorLayout VALUES_64 = new VectorLayout(VALUES, 64); - private static final VectorLayout VALUES_32 = new VectorLayout(VALUES, 32); - private static final VectorLayout VALUES_16 = new VectorLayout(VALUES, 16); - private static final VectorLayout VALUES_8 = new VectorLayout(VALUES, 8); + private static final VectorLayout BOOLEAN_VECTOR = new VectorLayout(DATA, 1); + private static final VectorLayout VALUES_64 = new VectorLayout(DATA, 64); + private static final VectorLayout VALUES_32 = new VectorLayout(DATA, 32); + private static final VectorLayout VALUES_16 = new VectorLayout(DATA, 16); + private static final VectorLayout VALUES_8 = new VectorLayout(DATA, 8); public static VectorLayout typeVector() { return TYPE_VECTOR; @@ -68,14 +71,21 @@ public class VectorLayout { return dataVector(8); } - private final int typeBitWidth; + private final short typeBitWidth; private final ArrowVectorType type; private VectorLayout(ArrowVectorType type, int typeBitWidth) { super(); - this.type = type; - this.typeBitWidth = typeBitWidth; + this.type = Preconditions.checkNotNull(type); + this.typeBitWidth = (short)typeBitWidth; + if (typeBitWidth <= 0) { + throw new IllegalArgumentException("bitWidth invalid: " + typeBitWidth); + } + } + + public VectorLayout(org.apache.arrow.flatbuf.VectorLayout layout) { + this(new ArrowVectorType(layout.type()), layout.bitWidth()); } public int getTypeBitWidth() { @@ -90,4 +100,28 @@ public class VectorLayout { public String toString() { return String.format("{width=%s,type=%s}", typeBitWidth, type); } + + @Override + public int hashCode() { + return 31 * (31 + type.hashCode()) + typeBitWidth; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + VectorLayout other = (VectorLayout) obj; + return type.equals(other.type) && (typeBitWidth == other.typeBitWidth); + } + + @Override + public int writeTo(FlatBufferBuilder builder) {; + return org.apache.arrow.flatbuf.VectorLayout.createVectorLayout(builder, typeBitWidth, type.getType()); + } + + } http://git-wip-us.apache.org/repos/asf/arrow/blob/e081a4c2/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java ---------------------------------------------------------------------- diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java index 36712b9..cfa1ed4 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java @@ -20,12 +20,11 @@ package org.apache.arrow.vector.types.pojo; import static org.apache.arrow.vector.types.pojo.ArrowType.getTypeForField; -import java.util.ArrayList; import java.util.List; import java.util.Objects; -import org.apache.arrow.vector.schema.ArrowVectorType; import org.apache.arrow.vector.schema.TypeLayout; +import org.apache.arrow.vector.schema.VectorLayout; import com.google.common.collect.ImmutableList; import com.google.flatbuffers.FlatBufferBuilder; @@ -37,7 +36,7 @@ public class Field { private final List children; private final TypeLayout typeLayout; - public Field(String name, boolean nullable, ArrowType type, List children) { + private Field(String name, boolean nullable, ArrowType type, List children, TypeLayout typeLayout) { this.name = name; this.nullable = nullable; this.type = type; @@ -46,34 +45,37 @@ public class Field { } else { this.children = children; } - this.typeLayout = TypeLayout.getTypeLayout(type); + this.typeLayout = typeLayout; + } + + public Field(String name, boolean nullable, ArrowType type, List children) { + this(name, nullable, type, children, TypeLayout.getTypeLayout(type)); } public static Field convertField(org.apache.arrow.flatbuf.Field field) { String name = field.name(); boolean nullable = field.nullable(); ArrowType type = getTypeForField(field); - List buffers = new ArrayList<>(); - for (int i = 0; i < field.buffersLength(); ++i) { - buffers.add(new ArrowVectorType(field.buffers(i))); + ImmutableList.Builder layout = ImmutableList.builder(); + for (int i = 0; i < field.layoutLength(); ++i) { + layout.add(new org.apache.arrow.vector.schema.VectorLayout(field.layout(i))); } ImmutableList.Builder childrenBuilder = ImmutableList.builder(); for (int i = 0; i < field.childrenLength(); i++) { childrenBuilder.add(convertField(field.children(i))); } List children = childrenBuilder.build(); - Field result = new Field(name, nullable, type, children); - TypeLayout typeLayout = result.getTypeLayout(); - if (typeLayout.getVectors().size() != field.buffersLength()) { - List types = new ArrayList<>(); - for (int i = 0; i < field.buffersLength(); i++) { - types.add(new ArrowVectorType(field.buffers(i))); - } - throw new IllegalArgumentException("Deserialized field does not match expected vectors. expected: " + typeLayout.getVectorTypes() + " got " + types); - } + Field result = new Field(name, nullable, type, children, new TypeLayout(layout.build())); return result; } + public void validate() { + TypeLayout expectedLayout = TypeLayout.getTypeLayout(type); + if (!expectedLayout.equals(typeLayout)) { + throw new IllegalArgumentException("Deserialized field does not match expected vectors. expected: " + expectedLayout + " got " + typeLayout); + } + } + public int getField(FlatBufferBuilder builder) { int nameOffset = builder.createString(name); int typeOffset = type.getType(builder); @@ -82,18 +84,19 @@ public class Field { childrenData[i] = children.get(i).getField(builder); } int childrenOffset = org.apache.arrow.flatbuf.Field.createChildrenVector(builder, childrenData); - short[] buffersData = new short[typeLayout.getVectors().size()]; + int[] buffersData = new int[typeLayout.getVectors().size()]; for (int i = 0; i < buffersData.length; i++) { - buffersData[i] = typeLayout.getVectors().get(i).getType().getType(); + VectorLayout vectorLayout = typeLayout.getVectors().get(i); + buffersData[i] = vectorLayout.writeTo(builder); } - int buffersOffset = org.apache.arrow.flatbuf.Field.createBuffersVector(builder, buffersData ); + int layoutOffset = org.apache.arrow.flatbuf.Field.createLayoutVector(builder, buffersData); org.apache.arrow.flatbuf.Field.startField(builder); org.apache.arrow.flatbuf.Field.addName(builder, nameOffset); org.apache.arrow.flatbuf.Field.addNullable(builder, nullable); org.apache.arrow.flatbuf.Field.addTypeType(builder, type.getTypeType()); org.apache.arrow.flatbuf.Field.addType(builder, typeOffset); org.apache.arrow.flatbuf.Field.addChildren(builder, childrenOffset); - org.apache.arrow.flatbuf.Field.addBuffers(builder, buffersOffset); + org.apache.arrow.flatbuf.Field.addLayout(builder, layoutOffset); return org.apache.arrow.flatbuf.Field.endField(builder); } http://git-wip-us.apache.org/repos/asf/arrow/blob/e081a4c2/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java ---------------------------------------------------------------------- diff --git a/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java b/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java index 61327f1..e557cc8 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java @@ -22,6 +22,8 @@ import static org.apache.arrow.flatbuf.Precision.SINGLE; import static org.junit.Assert.assertEquals; import org.apache.arrow.flatbuf.UnionMode; +import static org.junit.Assert.assertEquals; + import org.apache.arrow.vector.types.pojo.ArrowType.FloatingPoint; import org.apache.arrow.vector.types.pojo.ArrowType.Int; import org.apache.arrow.vector.types.pojo.ArrowType.List;