Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id CFB45200BAC for ; Wed, 12 Oct 2016 05:11:55 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id CE17E160AF3; Wed, 12 Oct 2016 03:11:55 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id EB1C5160AE6 for ; Wed, 12 Oct 2016 05:11:54 +0200 (CEST) Received: (qmail 41734 invoked by uid 500); 12 Oct 2016 03:11:54 -0000 Mailing-List: contact commits-help@arrow.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@arrow.apache.org Delivered-To: mailing list commits@arrow.apache.org Received: (qmail 41725 invoked by uid 99); 12 Oct 2016 03:11:54 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 12 Oct 2016 03:11:54 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id B4DF7E0902; Wed, 12 Oct 2016 03:11:53 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: julien@apache.org To: commits@arrow.apache.org Message-Id: <3c9ea21f37504f2e8d37cf48f55d5424@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: arrow git commit: ARROW-275: Add tests for UnionVector in Arrow File Date: Wed, 12 Oct 2016 03:11:53 +0000 (UTC) archived-at: Wed, 12 Oct 2016 03:11:56 -0000 Repository: arrow Updated Branches: refs/heads/master 3919a2778 -> bf749f55a ARROW-275: Add tests for UnionVector in Arrow File Author: Julien Le Dem Closes #169 from julienledem/union_test and squashes the following commits: 120f504 [Julien Le Dem] ARROW-275: Add tests for UnionVector in Arrow File Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/bf749f55 Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/bf749f55 Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/bf749f55 Branch: refs/heads/master Commit: bf749f55a1e24d79b08813a39ce51e9aaf6fb425 Parents: 3919a27 Author: Julien Le Dem Authored: Tue Oct 11 20:11:48 2016 -0700 Committer: Julien Le Dem Committed: Tue Oct 11 20:11:48 2016 -0700 ---------------------------------------------------------------------- .../src/main/codegen/templates/UnionReader.java | 4 + .../src/main/codegen/templates/UnionVector.java | 30 ++--- .../org/apache/arrow/vector/VectorLoader.java | 2 + .../apache/arrow/vector/schema/TypeLayout.java | 3 +- .../apache/arrow/vector/file/TestArrowFile.java | 110 ++++++++++++++++++- 5 files changed, 127 insertions(+), 22 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/bf749f55/java/vector/src/main/codegen/templates/UnionReader.java ---------------------------------------------------------------------- diff --git a/java/vector/src/main/codegen/templates/UnionReader.java b/java/vector/src/main/codegen/templates/UnionReader.java index 7351ae3..c56e95c 100644 --- a/java/vector/src/main/codegen/templates/UnionReader.java +++ b/java/vector/src/main/codegen/templates/UnionReader.java @@ -134,6 +134,10 @@ public class UnionReader extends AbstractFieldReader { + public int size() { + return getReaderForIndex(idx()).size(); + } + <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first /> <#assign uncappedName = name?uncap_first/> <#assign boxedType = (minor.boxedType!type.boxedType) /> http://git-wip-us.apache.org/repos/asf/arrow/blob/bf749f55/java/vector/src/main/codegen/templates/UnionVector.java ---------------------------------------------------------------------- diff --git a/java/vector/src/main/codegen/templates/UnionVector.java b/java/vector/src/main/codegen/templates/UnionVector.java index b14314d..5ca3f90 100644 --- a/java/vector/src/main/codegen/templates/UnionVector.java +++ b/java/vector/src/main/codegen/templates/UnionVector.java @@ -15,17 +15,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - -import com.google.common.collect.ImmutableList; -import com.google.flatbuffers.FlatBufferBuilder; -import io.netty.buffer.ArrowBuf; -import org.apache.arrow.flatbuf.Field; -import org.apache.arrow.flatbuf.Type; -import org.apache.arrow.flatbuf.Union; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.types.pojo.ArrowType; - -import java.util.ArrayList; import java.util.List; <@pp.dropOutputFile /> @@ -39,7 +28,9 @@ package org.apache.arrow.vector.complex; <#include "/@includes/vv_imports.ftl" /> import com.google.common.collect.ImmutableList; import java.util.ArrayList; +import java.util.Collections; import java.util.Iterator; +import org.apache.arrow.vector.BaseDataValueVector; import org.apache.arrow.vector.complex.impl.ComplexCopier; import org.apache.arrow.vector.util.CallBack; import org.apache.arrow.vector.schema.ArrowFieldNode; @@ -47,6 +38,7 @@ import org.apache.arrow.vector.schema.ArrowFieldNode; import static org.apache.arrow.flatbuf.UnionMode.Sparse; + /* * This class is generated using freemarker and the ${.template_name} template. */ @@ -81,6 +73,7 @@ public class UnionVector implements FieldVector { private ValueVector singleVector; private final CallBack callBack; + private final List innerVectors; public UnionVector(String name, BufferAllocator allocator, CallBack callBack) { this.name = name; @@ -88,6 +81,7 @@ public class UnionVector implements FieldVector { this.internalMap = new MapVector("internal", allocator, callBack); this.typeVector = new UInt1Vector("types", allocator); this.callBack = callBack; + this.innerVectors = Collections.unmodifiableList(Arrays.asList(typeVector)); } public BufferAllocator getAllocator() { @@ -101,30 +95,28 @@ public class UnionVector implements FieldVector { @Override public void initializeChildrenFromFields(List children) { - getMap().initializeChildrenFromFields(children); + internalMap.initializeChildrenFromFields(children); } @Override public List getChildrenFromFields() { - return getMap().getChildrenFromFields(); + return internalMap.getChildrenFromFields(); } @Override public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers) { - // TODO - throw new UnsupportedOperationException(); + BaseDataValueVector.load(getFieldInnerVectors(), ownBuffers); + this.valueCount = fieldNode.getLength(); } @Override public List getFieldBuffers() { - // TODO - throw new UnsupportedOperationException(); + return BaseDataValueVector.unload(getFieldInnerVectors()); } @Override public List getFieldInnerVectors() { - // TODO - throw new UnsupportedOperationException(); + return this.innerVectors; } public NullableMapVector getMap() { http://git-wip-us.apache.org/repos/asf/arrow/blob/bf749f55/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java ---------------------------------------------------------------------- diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java index 58ac68b..b7040da 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java @@ -74,6 +74,8 @@ public class VectorLoader { } private void loadBuffers(FieldVector vector, Field field, Iterator buffers, Iterator nodes) { + checkArgument(nodes.hasNext(), + "no more field nodes for for field " + field + " and vector " + vector); ArrowFieldNode fieldNode = nodes.next(); List typeLayout = field.getTypeLayout().getVectors(); List ownBuffers = new ArrayList<>(typeLayout.size()); http://git-wip-us.apache.org/repos/asf/arrow/blob/bf749f55/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java ---------------------------------------------------------------------- diff --git a/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java index 06ae203..c5f53fe 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java @@ -82,8 +82,7 @@ public class TypeLayout { break; case UnionMode.Sparse: vectors = asList( - validityVector(), - typeVector() + typeVector() // type of the value at the index or 0 if null ); break; default: http://git-wip-us.apache.org/repos/asf/arrow/blob/bf749f55/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java ---------------------------------------------------------------------- diff --git a/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java b/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java index 7a5e7b5..0f28d53 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java @@ -266,7 +266,7 @@ public class TestArrowFile { Assert.assertEquals(i % 3, rootReader.reader("list").size()); NullableTimeStampHolder h = new NullableTimeStampHolder(); rootReader.reader("map").reader("timestamp").read(h); - Assert.assertEquals(i, h.value % COUNT); + Assert.assertEquals(i, h.value); } } @@ -339,4 +339,112 @@ public class TestArrowFile { } } + @Test + public void testWriteReadUnion() throws IOException { + File file = new File("target/mytest_write_union.arrow"); + int count = COUNT; + try ( + BufferAllocator vectorAllocator = allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE); + NullableMapVector parent = new NullableMapVector("parent", vectorAllocator, null)) { + + writeUnionData(count, parent); + + printVectors(parent.getChildrenFromFields()); + + validateUnionData(count, parent); + + write(parent.getChild("root"), file); + } + // read + try ( + BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE); + FileInputStream fileInputStream = new FileInputStream(file); + ArrowReader arrowReader = new ArrowReader(fileInputStream.getChannel(), readerAllocator); + BufferAllocator vectorAllocator = allocator.newChildAllocator("final vectors", 0, Integer.MAX_VALUE); + NullableMapVector parent = new NullableMapVector("parent", vectorAllocator, null) + ) { + ArrowFooter footer = arrowReader.readFooter(); + Schema schema = footer.getSchema(); + LOGGER.debug("reading schema: " + schema); + + // initialize vectors + + NullableMapVector root = parent.addOrGet("root", MinorType.MAP, NullableMapVector.class); + VectorLoader vectorLoader = new VectorLoader(schema, root); + + List recordBatches = footer.getRecordBatches(); + for (ArrowBlock rbBlock : recordBatches) { + try (ArrowRecordBatch recordBatch = arrowReader.readRecordBatch(rbBlock)) { + vectorLoader.load(recordBatch); + } + validateUnionData(count, parent); + } + } + } + + public void validateUnionData(int count, MapVector parent) { + MapReader rootReader = new SingleMapReaderImpl(parent).reader("root"); + for (int i = 0; i < count; i++) { + rootReader.setPosition(i); + switch (i % 4) { + case 0: + Assert.assertEquals(i, rootReader.reader("union").readInteger().intValue()); + break; + case 1: + Assert.assertEquals(i, rootReader.reader("union").readLong().longValue()); + break; + case 2: + Assert.assertEquals(i % 3, rootReader.reader("union").size()); + break; + case 3: + NullableTimeStampHolder h = new NullableTimeStampHolder(); + rootReader.reader("union").reader("timestamp").read(h); + Assert.assertEquals(i, h.value); + break; + } + } + } + + public void writeUnionData(int count, NullableMapVector parent) { + ArrowBuf varchar = allocator.buffer(3); + varchar.readerIndex(0); + varchar.setByte(0, 'a'); + varchar.setByte(1, 'b'); + varchar.setByte(2, 'c'); + varchar.writerIndex(3); + ComplexWriter writer = new ComplexWriterImpl("root", parent); + MapWriter rootWriter = writer.rootAsMap(); + IntWriter intWriter = rootWriter.integer("union"); + BigIntWriter bigIntWriter = rootWriter.bigInt("union"); + ListWriter listWriter = rootWriter.list("union"); + MapWriter mapWriter = rootWriter.map("union"); + for (int i = 0; i < count; i++) { + switch (i % 4) { + case 0: + intWriter.setPosition(i); + intWriter.writeInt(i); + break; + case 1: + bigIntWriter.setPosition(i); + bigIntWriter.writeBigInt(i); + break; + case 2: + listWriter.setPosition(i); + listWriter.startList(); + for (int j = 0; j < i % 3; j++) { + listWriter.varChar().writeVarChar(0, 3, varchar); + } + listWriter.endList(); + break; + case 3: + mapWriter.setPosition(i); + mapWriter.start(); + mapWriter.timeStamp("timestamp").writeTimeStamp(i); + mapWriter.end(); + break; + } + } + writer.setValueCount(count); + varchar.release(); + } }