Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id E51E7200C4E for ; Thu, 23 Mar 2017 04:08:07 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id E3D66160B91; Thu, 23 Mar 2017 03:08:07 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 360FF160B86 for ; Thu, 23 Mar 2017 04:08:07 +0100 (CET) Received: (qmail 80074 invoked by uid 500); 23 Mar 2017 03:08:06 -0000 Mailing-List: contact commits-help@arrow.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@arrow.apache.org Delivered-To: mailing list commits@arrow.apache.org Received: (qmail 80065 invoked by uid 99); 23 Mar 2017 03:08:06 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 23 Mar 2017 03:08:06 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 4EAC7DFE34; Thu, 23 Mar 2017 03:08:06 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: wesm@apache.org To: commits@arrow.apache.org Message-Id: <5584a614053e46a5a42620d2e83784e2@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: arrow git commit: ARROW-691: [Java] Encode dictionary type in message format Date: Thu, 23 Mar 2017 03:08:06 +0000 (UTC) archived-at: Thu, 23 Mar 2017 03:08:08 -0000 Repository: arrow Updated Branches: refs/heads/master bf2acf6cb -> 990e2bde7 ARROW-691: [Java] Encode dictionary type in message format Author: Emilio Lahr-Vivaz Closes #422 from elahrvivaz/ARROW-691 and squashes the following commits: c1adad1 [Emilio Lahr-Vivaz] ARROW-691 Encode dictionary type in message format Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/990e2bde Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/990e2bde Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/990e2bde Branch: refs/heads/master Commit: 990e2bde758ac8bc6e4497ae1bc37f89b71bb5cf Parents: bf2acf6 Author: Emilio Lahr-Vivaz Authored: Wed Mar 22 23:08:01 2017 -0400 Committer: Wes McKinney Committed: Wed Mar 22 23:08:01 2017 -0400 ---------------------------------------------------------------------- .../vector/types/pojo/DictionaryEncoding.java | 18 ++++++++++++++++++ .../org/apache/arrow/vector/types/pojo/Field.java | 3 ++- .../vector/stream/MessageSerializerTest.java | 15 +++++++++++++++ 3 files changed, 35 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/990e2bde/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/DictionaryEncoding.java ---------------------------------------------------------------------- diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/DictionaryEncoding.java b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/DictionaryEncoding.java index 6d35cde..32568d3 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/DictionaryEncoding.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/DictionaryEncoding.java @@ -18,6 +18,8 @@ ******************************************************************************/ package org.apache.arrow.vector.types.pojo; +import java.util.Objects; + import org.apache.arrow.vector.types.pojo.ArrowType.Int; public class DictionaryEncoding { @@ -48,4 +50,20 @@ public class DictionaryEncoding { public String toString() { return "DictionaryEncoding[id=" + id + ",ordered=" + ordered + ",indexType=" + indexType + "]"; } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } else if (o == null || getClass() != o.getClass()) { + return false; + } + DictionaryEncoding that = (DictionaryEncoding) o; + return id == that.id && ordered == that.ordered && Objects.equals(indexType, that.indexType); + } + + @Override + public int hashCode() { + return Objects.hash(id, ordered, indexType); + } } http://git-wip-us.apache.org/repos/asf/arrow/blob/990e2bde/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java ---------------------------------------------------------------------- diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java index c310b90..011f0e6 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java @@ -121,10 +121,11 @@ public class Field { int typeOffset = type.getType(builder); int dictionaryOffset = -1; if (dictionary != null) { - // TODO encode dictionary type - currently type is only signed 32 bit int (default null) + int dictionaryType = dictionary.getIndexType().getType(builder); org.apache.arrow.flatbuf.DictionaryEncoding.startDictionaryEncoding(builder); org.apache.arrow.flatbuf.DictionaryEncoding.addId(builder, dictionary.getId()); org.apache.arrow.flatbuf.DictionaryEncoding.addIsOrdered(builder, dictionary.isOrdered()); + org.apache.arrow.flatbuf.DictionaryEncoding.addIndexType(builder, dictionaryType); dictionaryOffset = org.apache.arrow.flatbuf.DictionaryEncoding.endDictionaryEncoding(builder); } int[] childrenData = new int[children.size()]; http://git-wip-us.apache.org/repos/asf/arrow/blob/990e2bde/java/vector/src/test/java/org/apache/arrow/vector/stream/MessageSerializerTest.java ---------------------------------------------------------------------- diff --git a/java/vector/src/test/java/org/apache/arrow/vector/stream/MessageSerializerTest.java b/java/vector/src/test/java/org/apache/arrow/vector/stream/MessageSerializerTest.java index bb2ccf8..d3d49d5 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/stream/MessageSerializerTest.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/stream/MessageSerializerTest.java @@ -37,6 +37,7 @@ import org.apache.arrow.vector.schema.ArrowFieldNode; import org.apache.arrow.vector.schema.ArrowMessage; import org.apache.arrow.vector.schema.ArrowRecordBatch; import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.DictionaryEncoding; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.Schema; import org.junit.Test; @@ -73,6 +74,20 @@ public class MessageSerializerTest { } @Test + public void testSchemaDictionaryMessageSerialization() throws IOException { + DictionaryEncoding dictionary = new DictionaryEncoding(9L, false, new ArrowType.Int(8, true)); + Field field = new Field("test", true, ArrowType.Utf8.INSTANCE, dictionary, null); + Schema schema = new Schema(Collections.singletonList(field)); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + long size = MessageSerializer.serialize(new WriteChannel(Channels.newChannel(out)), schema); + assertEquals(size, out.toByteArray().length); + + ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); + Schema deserialized = MessageSerializer.deserializeSchema(new ReadChannel(Channels.newChannel(in))); + assertEquals(schema, deserialized); + } + + @Test public void testSerializeRecordBatch() throws IOException { byte[] validity = new byte[] { (byte)255, 0}; // second half is "undefined"