arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jul...@apache.org
Subject arrow git commit: ARROW-372: json vector serialization format
Date Wed, 09 Nov 2016 16:55:56 GMT
Repository: arrow
Updated Branches:
  refs/heads/master 6996c17f7 -> 4fa7ac4f6


ARROW-372: json vector serialization format

This format serializes the vectors in JSON.
It is not a generic JSON to arrow converter but rather a human readable version of the vectors
to help with tests.

Author: Julien Le Dem <julien@dremio.com>

Closes #201 from julienledem/json_file and squashes the following commits:

2e63bec [Julien Le Dem] add missing license
5588729 [Julien Le Dem] refactor tests, improve format
5ef5356 [Julien Le Dem] improve format to allow empty column name
746430c [Julien Le Dem] ARROW-372: Create JSON arrow file format for integration tests


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/4fa7ac4f
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/4fa7ac4f
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/4fa7ac4f

Branch: refs/heads/master
Commit: 4fa7ac4f6ca30c34a73fb84d9d56d54aed96491b
Parents: 6996c17
Author: Julien Le Dem <julien@dremio.com>
Authored: Wed Nov 9 08:55:51 2016 -0800
Committer: Julien Le Dem <julien@dremio.com>
Committed: Wed Nov 9 08:55:51 2016 -0800

----------------------------------------------------------------------
 .../arrow/vector/file/json/JsonFileReader.java  | 223 +++++++++++++++++++
 .../arrow/vector/file/json/JsonFileWriter.java  | 167 ++++++++++++++
 .../apache/arrow/vector/file/BaseFileTest.java  | 220 ++++++++++++++++++
 .../apache/arrow/vector/file/TestArrowFile.java | 200 +----------------
 .../arrow/vector/file/json/TestJSONFile.java    | 120 ++++++++++
 5 files changed, 741 insertions(+), 189 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/4fa7ac4f/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java
b/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java
new file mode 100644
index 0000000..859a3a0
--- /dev/null
+++ b/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java
@@ -0,0 +1,223 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+package org.apache.arrow.vector.file.json;
+
+import static com.fasterxml.jackson.core.JsonToken.END_ARRAY;
+import static com.fasterxml.jackson.core.JsonToken.END_OBJECT;
+import static com.fasterxml.jackson.core.JsonToken.START_ARRAY;
+import static com.fasterxml.jackson.core.JsonToken.START_OBJECT;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.BitVector;
+import org.apache.arrow.vector.BufferBacked;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.SmallIntVector;
+import org.apache.arrow.vector.TimeStampVector;
+import org.apache.arrow.vector.TinyIntVector;
+import org.apache.arrow.vector.UInt1Vector;
+import org.apache.arrow.vector.UInt2Vector;
+import org.apache.arrow.vector.UInt4Vector;
+import org.apache.arrow.vector.UInt8Vector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.ValueVector.Mutator;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.schema.ArrowVectorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+
+import com.fasterxml.jackson.core.JsonParseException;
+import com.fasterxml.jackson.core.JsonParser;
+import com.fasterxml.jackson.core.JsonToken;
+import com.fasterxml.jackson.databind.MappingJsonFactory;
+import com.google.common.base.Objects;
+
+public class JsonFileReader {
+  private final File inputFile;
+  private final JsonParser parser;
+  private final BufferAllocator allocator;
+  private Schema schema;
+
+  public JsonFileReader(File inputFile, BufferAllocator allocator) throws JsonParseException,
IOException {
+    super();
+    this.inputFile = inputFile;
+    this.allocator = allocator;
+    MappingJsonFactory jsonFactory = new MappingJsonFactory();
+    this.parser = jsonFactory.createParser(inputFile);
+  }
+
+  public Schema start() throws JsonParseException, IOException {
+    readToken(START_OBJECT);
+    {
+      this.schema = readNextField("schema", Schema.class);
+      nextFieldIs("batches");
+      readToken(START_ARRAY);
+      return schema;
+    }
+  }
+
+  public VectorSchemaRoot read() throws IOException {
+    VectorSchemaRoot recordBatch = new VectorSchemaRoot(schema, allocator);
+    readToken(START_OBJECT);
+    {
+      int count = readNextField("count", Integer.class);
+      recordBatch.setRowCount(count);
+      nextFieldIs("columns");
+      readToken(START_ARRAY);
+      {
+        for (Field field : schema.getFields()) {
+          FieldVector vector = recordBatch.getVector(field.getName());
+          readVector(field, vector);
+        }
+      }
+      readToken(END_ARRAY);
+    }
+    readToken(END_OBJECT);
+    return recordBatch;
+  }
+
+  private void readVector(Field field, FieldVector vector) throws JsonParseException, IOException
{
+    List<ArrowVectorType> vectorTypes = field.getTypeLayout().getVectorTypes();
+    List<BufferBacked> fieldInnerVectors = vector.getFieldInnerVectors();
+    if (vectorTypes.size() != fieldInnerVectors.size()) {
+      throw new IllegalArgumentException("vector types and inner vectors are not the same
size: " + vectorTypes.size() + " != " + fieldInnerVectors.size());
+    }
+    readToken(START_OBJECT);
+    {
+      String name = readNextField("name", String.class);
+      if (!Objects.equal(field.getName(), name)) {
+        throw new IllegalArgumentException("Expected field " + field.getName() + " but got
" + name);
+      }
+      int count = readNextField("count", Integer.class);
+      for (int v = 0; v < vectorTypes.size(); v++) {
+        ArrowVectorType vectorType = vectorTypes.get(v);
+        BufferBacked innerVector = fieldInnerVectors.get(v);
+        nextFieldIs(vectorType.getName());
+        readToken(START_ARRAY);
+        ValueVector valueVector = (ValueVector)innerVector;
+        valueVector.allocateNew();
+        Mutator mutator = valueVector.getMutator();
+        mutator.setValueCount(count);
+        for (int i = 0; i < count; i++) {
+          parser.nextToken();
+          setValueFromParser(valueVector, i);
+        }
+        readToken(END_ARRAY);
+      }
+      // if children
+      List<Field> fields = field.getChildren();
+      if (!fields.isEmpty()) {
+        List<FieldVector> vectorChildren = vector.getChildrenFromFields();
+        if (fields.size() != vectorChildren.size()) {
+          throw new IllegalArgumentException("fields and children are not the same size:
" + fields.size() + " != " + vectorChildren.size());
+        }
+        nextFieldIs("children");
+        readToken(START_ARRAY);
+        for (int i = 0; i < fields.size(); i++) {
+          Field childField = fields.get(i);
+          FieldVector childVector = vectorChildren.get(i);
+          readVector(childField, childVector);
+        }
+        readToken(END_ARRAY);
+      }
+    }
+    readToken(END_OBJECT);
+  }
+
+  private void setValueFromParser(ValueVector valueVector, int i) throws IOException {
+    switch (valueVector.getMinorType()) {
+    case BIT:
+      ((BitVector)valueVector).getMutator().set(i, parser.readValueAs(Boolean.class) ? 1
: 0);
+      break;
+    case TINYINT:
+      ((TinyIntVector)valueVector).getMutator().set(i, parser.readValueAs(Integer.class));
+      break;
+    case SMALLINT:
+      ((SmallIntVector)valueVector).getMutator().set(i, parser.readValueAs(Integer.class));
+      break;
+    case INT:
+      ((IntVector)valueVector).getMutator().set(i, parser.readValueAs(Integer.class));
+      break;
+    case BIGINT:
+      ((BigIntVector)valueVector).getMutator().set(i, parser.readValueAs(Long.class));
+      break;
+    case UINT1:
+      ((UInt1Vector)valueVector).getMutator().set(i, parser.readValueAs(Integer.class));
+      break;
+    case UINT2:
+      ((UInt2Vector)valueVector).getMutator().set(i, parser.readValueAs(Integer.class));
+      break;
+    case UINT4:
+      ((UInt4Vector)valueVector).getMutator().set(i, parser.readValueAs(Integer.class));
+      break;
+    case UINT8:
+      ((UInt8Vector)valueVector).getMutator().set(i, parser.readValueAs(Long.class));
+      break;
+    case FLOAT4:
+      ((Float4Vector)valueVector).getMutator().set(i, parser.readValueAs(Float.class));
+      break;
+    case FLOAT8:
+      ((Float8Vector)valueVector).getMutator().set(i, parser.readValueAs(Double.class));
+      break;
+    case VARCHAR:
+      ((VarCharVector)valueVector).getMutator().setSafe(i, parser.readValueAs(String.class).getBytes(UTF_8));
+      break;
+    case TIMESTAMP:
+      ((TimeStampVector)valueVector).getMutator().set(i, parser.readValueAs(Long.class));
+      break;
+    default:
+      throw new UnsupportedOperationException("minor type: " + valueVector.getMinorType());
+    }
+  }
+
+  public void close() throws IOException {
+    readToken(END_ARRAY);
+    readToken(END_OBJECT);
+    parser.close();
+  }
+
+  private <T> T readNextField(String expectedFieldName, Class<T> c) throws IOException,
JsonParseException {
+    nextFieldIs(expectedFieldName);
+    parser.nextToken();
+    return parser.readValueAs(c);
+  }
+
+  private void nextFieldIs(String expectedFieldName) throws IOException, JsonParseException
{
+    String name = parser.nextFieldName();
+    if (name == null || !name.equals(expectedFieldName)) {
+      throw new IllegalStateException("Expected " + expectedFieldName + " but got " + name);
+    }
+  }
+
+  private void readToken(JsonToken expected) throws JsonParseException, IOException {
+    JsonToken t = parser.nextToken();
+    if (t != expected) {
+      throw new IllegalStateException("Expected " + expected + " but got " + t);
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/arrow/blob/4fa7ac4f/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileWriter.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileWriter.java
b/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileWriter.java
new file mode 100644
index 0000000..47c1a7d
--- /dev/null
+++ b/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileWriter.java
@@ -0,0 +1,167 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+package org.apache.arrow.vector.file.json;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.arrow.vector.BitVector;
+import org.apache.arrow.vector.BufferBacked;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.TimeStampVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.ValueVector.Accessor;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.schema.ArrowVectorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+
+import com.fasterxml.jackson.core.JsonEncoding;
+import com.fasterxml.jackson.core.JsonGenerator;
+import com.fasterxml.jackson.core.util.DefaultPrettyPrinter;
+import com.fasterxml.jackson.core.util.DefaultPrettyPrinter.NopIndenter;
+import com.fasterxml.jackson.databind.MappingJsonFactory;
+
+public class JsonFileWriter {
+
+  public static final class JSONWriteConfig {
+    private final boolean pretty;
+    private JSONWriteConfig(boolean pretty) {
+      this.pretty = pretty;
+    }
+    private JSONWriteConfig() {
+      this.pretty = false;
+    }
+    public JSONWriteConfig pretty(boolean pretty) {
+      return new JSONWriteConfig(pretty);
+    }
+  }
+
+  public static JSONWriteConfig config() {
+    return new JSONWriteConfig();
+  }
+
+  private final JsonGenerator generator;
+  private Schema schema;
+
+  public JsonFileWriter(File outputFile) throws IOException {
+    this(outputFile, config());
+  }
+
+  public JsonFileWriter(File outputFile, JSONWriteConfig config) throws IOException {
+    MappingJsonFactory jsonFactory = new MappingJsonFactory();
+    this.generator = jsonFactory.createGenerator(outputFile, JsonEncoding.UTF8);
+    if (config.pretty) {
+      DefaultPrettyPrinter prettyPrinter = new DefaultPrettyPrinter();
+      prettyPrinter.indentArraysWith(NopIndenter.instance);
+      this.generator.setPrettyPrinter(prettyPrinter);
+    }
+  }
+
+  public void start(Schema schema) throws IOException {
+    this.schema = schema;
+    generator.writeStartObject();
+    generator.writeObjectField("schema", schema);
+    generator.writeArrayFieldStart("batches");
+  }
+
+  public void write(VectorSchemaRoot recordBatch) throws IOException {
+    if (!recordBatch.getSchema().equals(schema)) {
+      throw new IllegalArgumentException("record batches must have the same schema: " + schema);
+    }
+    generator.writeStartObject();
+    {
+      generator.writeObjectField("count", recordBatch.getRowCount());
+      generator.writeArrayFieldStart("columns");
+      for (Field field : schema.getFields()) {
+        FieldVector vector = recordBatch.getVector(field.getName());
+        writeVector(field, vector);
+      }
+      generator.writeEndArray();
+    }
+    generator.writeEndObject();
+  }
+
+  private void writeVector(Field field, FieldVector vector) throws IOException {
+    List<ArrowVectorType> vectorTypes = field.getTypeLayout().getVectorTypes();
+    List<BufferBacked> fieldInnerVectors = vector.getFieldInnerVectors();
+    if (vectorTypes.size() != fieldInnerVectors.size()) {
+      throw new IllegalArgumentException("vector types and inner vectors are not the same
size: " + vectorTypes.size() + " != " + fieldInnerVectors.size());
+    }
+    generator.writeStartObject();
+    {
+      generator.writeObjectField("name", field.getName());
+      int valueCount = vector.getAccessor().getValueCount();
+      generator.writeObjectField("count", valueCount);
+      for (int v = 0; v < vectorTypes.size(); v++) {
+        ArrowVectorType vectorType = vectorTypes.get(v);
+        BufferBacked innerVector = fieldInnerVectors.get(v);
+        generator.writeArrayFieldStart(vectorType.getName());
+        ValueVector valueVector = (ValueVector)innerVector;
+        for (int i = 0; i < valueCount; i++) {
+          writeValueToGenerator(valueVector, i);
+        }
+        generator.writeEndArray();
+      }
+      List<Field> fields = field.getChildren();
+      List<FieldVector> children = vector.getChildrenFromFields();
+      if (fields.size() != children.size()) {
+        throw new IllegalArgumentException("fields and children are not the same size: "
+ fields.size() + " != " + children.size());
+      }
+      if (fields.size() > 0) {
+        generator.writeArrayFieldStart("children");
+        for (int i = 0; i < fields.size(); i++) {
+          Field childField = fields.get(i);
+          FieldVector childVector = children.get(i);
+          writeVector(childField, childVector);
+        }
+        generator.writeEndArray();
+      }
+    }
+    generator.writeEndObject();
+  }
+
+  private void writeValueToGenerator(ValueVector valueVector, int i) throws IOException {
+    switch (valueVector.getMinorType()) {
+      case TIMESTAMP:
+        generator.writeNumber(((TimeStampVector)valueVector).getAccessor().get(i));
+        break;
+      case BIT:
+        generator.writeNumber(((BitVector)valueVector).getAccessor().get(i));
+        break;
+      default:
+        // TODO: each type
+        Accessor accessor = valueVector.getAccessor();
+        Object value = accessor.getObject(i);
+        if (value instanceof Number || value instanceof Boolean) {
+          generator.writeObject(value);
+        } else {
+          generator.writeObject(value.toString());
+        }
+        break;
+    }
+  }
+
+  public void close() throws IOException {
+    generator.writeEndArray();
+    generator.writeEndObject();
+    generator.close();
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/arrow/blob/4fa7ac4f/java/vector/src/test/java/org/apache/arrow/vector/file/BaseFileTest.java
----------------------------------------------------------------------
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/file/BaseFileTest.java b/java/vector/src/test/java/org/apache/arrow/vector/file/BaseFileTest.java
new file mode 100644
index 0000000..6e577b5
--- /dev/null
+++ b/java/vector/src/test/java/org/apache/arrow/vector/file/BaseFileTest.java
@@ -0,0 +1,220 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.vector.file;
+
+import java.util.List;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.ValueVector.Accessor;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.complex.MapVector;
+import org.apache.arrow.vector.complex.NullableMapVector;
+import org.apache.arrow.vector.complex.impl.ComplexWriterImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.complex.writer.BaseWriter.ComplexWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter;
+import org.apache.arrow.vector.complex.writer.BigIntWriter;
+import org.apache.arrow.vector.complex.writer.IntWriter;
+import org.apache.arrow.vector.holders.NullableTimeStampHolder;
+import org.joda.time.DateTimeZone;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import io.netty.buffer.ArrowBuf;
+
+/**
+ * Helps testing the file formats
+ */
+public class BaseFileTest {
+  private static final Logger LOGGER = LoggerFactory.getLogger(BaseFileTest.class);
+  protected static final int COUNT = 10;
+  protected BufferAllocator allocator;
+
+  private DateTimeZone defaultTimezone = DateTimeZone.getDefault();
+
+  @Before
+  public void init() {
+    DateTimeZone.setDefault(DateTimeZone.forOffsetHours(2));
+    allocator = new RootAllocator(Integer.MAX_VALUE);
+  }
+
+  @After
+  public void tearDown() {
+    allocator.close();
+    DateTimeZone.setDefault(defaultTimezone);
+  }
+
+  protected void validateContent(int count, VectorSchemaRoot root) {
+    for (int i = 0; i < count; i++) {
+      Assert.assertEquals(i, root.getVector("int").getAccessor().getObject(i));
+      Assert.assertEquals(Long.valueOf(i), root.getVector("bigInt").getAccessor().getObject(i));
+    }
+  }
+
+  protected void writeComplexData(int count, MapVector parent) {
+    ArrowBuf varchar = allocator.buffer(3);
+    varchar.readerIndex(0);
+    varchar.setByte(0, 'a');
+    varchar.setByte(1, 'b');
+    varchar.setByte(2, 'c');
+    varchar.writerIndex(3);
+    ComplexWriter writer = new ComplexWriterImpl("root", parent);
+    MapWriter rootWriter = writer.rootAsMap();
+    IntWriter intWriter = rootWriter.integer("int");
+    BigIntWriter bigIntWriter = rootWriter.bigInt("bigInt");
+    ListWriter listWriter = rootWriter.list("list");
+    MapWriter mapWriter = rootWriter.map("map");
+    for (int i = 0; i < count; i++) {
+      if (i % 5 != 3) {
+        intWriter.setPosition(i);
+        intWriter.writeInt(i);
+      }
+      bigIntWriter.setPosition(i);
+      bigIntWriter.writeBigInt(i);
+      listWriter.setPosition(i);
+      listWriter.startList();
+      for (int j = 0; j < i % 3; j++) {
+        listWriter.varChar().writeVarChar(0, 3, varchar);
+      }
+      listWriter.endList();
+      mapWriter.setPosition(i);
+      mapWriter.start();
+      mapWriter.timeStamp("timestamp").writeTimeStamp(i);
+      mapWriter.end();
+    }
+    writer.setValueCount(count);
+    varchar.release();
+  }
+
+  public void printVectors(List<FieldVector> vectors) {
+    for (FieldVector vector : vectors) {
+      LOGGER.debug(vector.getField().getName());
+      Accessor accessor = vector.getAccessor();
+      int valueCount = accessor.getValueCount();
+      for (int i = 0; i < valueCount; i++) {
+        LOGGER.debug(String.valueOf(accessor.getObject(i)));
+      }
+    }
+  }
+
+  protected void validateComplexContent(int count, VectorSchemaRoot root) {
+    Assert.assertEquals(count, root.getRowCount());
+    printVectors(root.getFieldVectors());
+    for (int i = 0; i < count; i++) {
+      Object intVal = root.getVector("int").getAccessor().getObject(i);
+      if (i % 5 != 3) {
+        Assert.assertEquals(i, intVal);
+      } else {
+        Assert.assertNull(intVal);
+      }
+      Assert.assertEquals(Long.valueOf(i), root.getVector("bigInt").getAccessor().getObject(i));
+      Assert.assertEquals(i % 3, ((List<?>)root.getVector("list").getAccessor().getObject(i)).size());
+      NullableTimeStampHolder h = new NullableTimeStampHolder();
+      FieldReader mapReader = root.getVector("map").getReader();
+      mapReader.setPosition(i);
+      mapReader.reader("timestamp").read(h);
+      Assert.assertEquals(i, h.value);
+    }
+  }
+
+  protected void writeData(int count, MapVector parent) {
+    ComplexWriter writer = new ComplexWriterImpl("root", parent);
+    MapWriter rootWriter = writer.rootAsMap();
+    IntWriter intWriter = rootWriter.integer("int");
+    BigIntWriter bigIntWriter = rootWriter.bigInt("bigInt");
+    for (int i = 0; i < count; i++) {
+      intWriter.setPosition(i);
+      intWriter.writeInt(i);
+      bigIntWriter.setPosition(i);
+      bigIntWriter.writeBigInt(i);
+    }
+    writer.setValueCount(count);
+  }
+
+  public void validateUnionData(int count, VectorSchemaRoot root) {
+    FieldReader unionReader = root.getVector("union").getReader();
+    for (int i = 0; i < count; i++) {
+      unionReader.setPosition(i);
+      switch (i % 4) {
+      case 0:
+        Assert.assertEquals(i, unionReader.readInteger().intValue());
+        break;
+      case 1:
+        Assert.assertEquals(i, unionReader.readLong().longValue());
+        break;
+      case 2:
+        Assert.assertEquals(i % 3, unionReader.size());
+        break;
+      case 3:
+        NullableTimeStampHolder h = new NullableTimeStampHolder();
+        unionReader.reader("timestamp").read(h);
+        Assert.assertEquals(i, h.value);
+        break;
+      }
+    }
+  }
+
+  public void writeUnionData(int count, NullableMapVector parent) {
+    ArrowBuf varchar = allocator.buffer(3);
+    varchar.readerIndex(0);
+    varchar.setByte(0, 'a');
+    varchar.setByte(1, 'b');
+    varchar.setByte(2, 'c');
+    varchar.writerIndex(3);
+    ComplexWriter writer = new ComplexWriterImpl("root", parent);
+    MapWriter rootWriter = writer.rootAsMap();
+    IntWriter intWriter = rootWriter.integer("union");
+    BigIntWriter bigIntWriter = rootWriter.bigInt("union");
+    ListWriter listWriter = rootWriter.list("union");
+    MapWriter mapWriter = rootWriter.map("union");
+    for (int i = 0; i < count; i++) {
+      switch (i % 4) {
+      case 0:
+        intWriter.setPosition(i);
+        intWriter.writeInt(i);
+        break;
+      case 1:
+        bigIntWriter.setPosition(i);
+        bigIntWriter.writeBigInt(i);
+        break;
+      case 2:
+        listWriter.setPosition(i);
+        listWriter.startList();
+        for (int j = 0; j < i % 3; j++) {
+          listWriter.varChar().writeVarChar(0, 3, varchar);
+        }
+        listWriter.endList();
+        break;
+      case 3:
+        mapWriter.setPosition(i);
+        mapWriter.start();
+        mapWriter.timeStamp("timestamp").writeTimeStamp(i);
+        mapWriter.end();
+        break;
+      }
+    }
+    writer.setValueCount(count);
+    varchar.release();
+  }
+}

http://git-wip-us.apache.org/repos/asf/arrow/blob/4fa7ac4f/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java
----------------------------------------------------------------------
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java b/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java
index e97bc14..c9e60ee 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java
@@ -27,53 +27,22 @@ import java.io.IOException;
 import java.util.List;
 
 import org.apache.arrow.memory.BufferAllocator;
-import org.apache.arrow.memory.RootAllocator;
 import org.apache.arrow.vector.FieldVector;
-import org.apache.arrow.vector.ValueVector.Accessor;
 import org.apache.arrow.vector.VectorLoader;
 import org.apache.arrow.vector.VectorSchemaRoot;
 import org.apache.arrow.vector.VectorUnloader;
 import org.apache.arrow.vector.complex.MapVector;
 import org.apache.arrow.vector.complex.NullableMapVector;
-import org.apache.arrow.vector.complex.impl.ComplexWriterImpl;
-import org.apache.arrow.vector.complex.reader.FieldReader;
-import org.apache.arrow.vector.complex.writer.BaseWriter.ComplexWriter;
-import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter;
-import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter;
-import org.apache.arrow.vector.complex.writer.BigIntWriter;
-import org.apache.arrow.vector.complex.writer.IntWriter;
-import org.apache.arrow.vector.holders.NullableTimeStampHolder;
 import org.apache.arrow.vector.schema.ArrowBuffer;
 import org.apache.arrow.vector.schema.ArrowRecordBatch;
 import org.apache.arrow.vector.types.pojo.Schema;
-import org.joda.time.DateTimeZone;
-import org.junit.After;
 import org.junit.Assert;
-import org.junit.Before;
 import org.junit.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import io.netty.buffer.ArrowBuf;
-
-public class TestArrowFile {
+public class TestArrowFile extends BaseFileTest {
   private static final Logger LOGGER = LoggerFactory.getLogger(TestArrowFile.class);
-  private static final int COUNT = 10;
-  private BufferAllocator allocator;
-
-  private DateTimeZone defaultTimezone = DateTimeZone.getDefault();
-
-  @Before
-  public void init() {
-    DateTimeZone.setDefault(DateTimeZone.forOffsetHours(2));
-    allocator = new RootAllocator(Integer.MAX_VALUE);
-  }
-
-  @After
-  public void tearDown() {
-    allocator.close();
-    DateTimeZone.setDefault(defaultTimezone);
-  }
 
   @Test
   public void testWrite() throws IOException {
@@ -101,54 +70,6 @@ public class TestArrowFile {
     }
   }
 
-  private void writeComplexData(int count, MapVector parent) {
-    ArrowBuf varchar = allocator.buffer(3);
-    varchar.readerIndex(0);
-    varchar.setByte(0, 'a');
-    varchar.setByte(1, 'b');
-    varchar.setByte(2, 'c');
-    varchar.writerIndex(3);
-    ComplexWriter writer = new ComplexWriterImpl("root", parent);
-    MapWriter rootWriter = writer.rootAsMap();
-    IntWriter intWriter = rootWriter.integer("int");
-    BigIntWriter bigIntWriter = rootWriter.bigInt("bigInt");
-    ListWriter listWriter = rootWriter.list("list");
-    MapWriter mapWriter = rootWriter.map("map");
-    for (int i = 0; i < count; i++) {
-      intWriter.setPosition(i);
-      intWriter.writeInt(i);
-      bigIntWriter.setPosition(i);
-      bigIntWriter.writeBigInt(i);
-      listWriter.setPosition(i);
-      listWriter.startList();
-      for (int j = 0; j < i % 3; j++) {
-        listWriter.varChar().writeVarChar(0, 3, varchar);
-      }
-      listWriter.endList();
-      mapWriter.setPosition(i);
-      mapWriter.start();
-      mapWriter.timeStamp("timestamp").writeTimeStamp(i);
-      mapWriter.end();
-    }
-    writer.setValueCount(count);
-    varchar.release();
-  }
-
-
-  private void writeData(int count, MapVector parent) {
-    ComplexWriter writer = new ComplexWriterImpl("root", parent);
-    MapWriter rootWriter = writer.rootAsMap();
-    IntWriter intWriter = rootWriter.integer("int");
-    BigIntWriter bigIntWriter = rootWriter.bigInt("bigInt");
-    for (int i = 0; i < count; i++) {
-      intWriter.setPosition(i);
-      intWriter.writeInt(i);
-      bigIntWriter.setPosition(i);
-      bigIntWriter.writeBigInt(i);
-    }
-    writer.setValueCount(count);
-  }
-
   @Test
   public void testWriteRead() throws IOException {
     File file = new File("target/mytest.arrow");
@@ -197,13 +118,6 @@ public class TestArrowFile {
     }
   }
 
-  private void validateContent(int count, VectorSchemaRoot root) {
-    for (int i = 0; i < count; i++) {
-      Assert.assertEquals(i, root.getVector("int").getAccessor().getObject(i));
-      Assert.assertEquals(Long.valueOf(i), root.getVector("bigInt").getAccessor().getObject(i));
-    }
-  }
-
   @Test
   public void testWriteReadComplex() throws IOException {
     File file = new File("target/mytest_complex.arrow");
@@ -244,45 +158,6 @@ public class TestArrowFile {
     }
   }
 
-  public void printVectors(List<FieldVector> vectors) {
-    for (FieldVector vector : vectors) {
-      LOGGER.debug(vector.getField().getName());
-      Accessor accessor = vector.getAccessor();
-      int valueCount = accessor.getValueCount();
-      for (int i = 0; i < valueCount; i++) {
-        LOGGER.debug(String.valueOf(accessor.getObject(i)));
-      }
-    }
-  }
-
-  private void validateComplexContent(int count, VectorSchemaRoot root) {
-    Assert.assertEquals(count, root.getRowCount());
-    printVectors(root.getFieldVectors());
-    for (int i = 0; i < count; i++) {
-      Assert.assertEquals(i, root.getVector("int").getAccessor().getObject(i));
-      Assert.assertEquals(Long.valueOf(i), root.getVector("bigInt").getAccessor().getObject(i));
-      Assert.assertEquals(i % 3, ((List<?>)root.getVector("list").getAccessor().getObject(i)).size());
-      NullableTimeStampHolder h = new NullableTimeStampHolder();
-      FieldReader mapReader = root.getVector("map").getReader();
-      mapReader.setPosition(i);
-      mapReader.reader("timestamp").read(h);
-      Assert.assertEquals(i, h.value);
-    }
-  }
-
-  private void write(FieldVector parent, File file) throws FileNotFoundException, IOException
{
-    VectorUnloader vectorUnloader = newVectorUnloader(parent);
-    Schema schema = vectorUnloader.getSchema();
-    LOGGER.debug("writing schema: " + schema);
-    try (
-        FileOutputStream fileOutputStream = new FileOutputStream(file);
-        ArrowWriter arrowWriter = new ArrowWriter(fileOutputStream.getChannel(), schema);
-        ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch();
-            ) {
-      arrowWriter.writeRecordBatch(recordBatch);
-    }
-  }
-
   @Test
   public void testWriteReadMultipleRBs() throws IOException {
     File file = new File("target/mytest_multiple.arrow");
@@ -381,69 +256,16 @@ public class TestArrowFile {
     }
   }
 
-  public void validateUnionData(int count, VectorSchemaRoot root) {
-    FieldReader unionReader = root.getVector("union").getReader();
-    for (int i = 0; i < count; i++) {
-      unionReader.setPosition(i);
-      switch (i % 4) {
-      case 0:
-        Assert.assertEquals(i, unionReader.readInteger().intValue());
-        break;
-      case 1:
-        Assert.assertEquals(i, unionReader.readLong().longValue());
-        break;
-      case 2:
-        Assert.assertEquals(i % 3, unionReader.size());
-        break;
-      case 3:
-        NullableTimeStampHolder h = new NullableTimeStampHolder();
-        unionReader.reader("timestamp").read(h);
-        Assert.assertEquals(i, h.value);
-        break;
-      }
-    }
-  }
-
-  public void writeUnionData(int count, NullableMapVector parent) {
-    ArrowBuf varchar = allocator.buffer(3);
-    varchar.readerIndex(0);
-    varchar.setByte(0, 'a');
-    varchar.setByte(1, 'b');
-    varchar.setByte(2, 'c');
-    varchar.writerIndex(3);
-    ComplexWriter writer = new ComplexWriterImpl("root", parent);
-    MapWriter rootWriter = writer.rootAsMap();
-    IntWriter intWriter = rootWriter.integer("union");
-    BigIntWriter bigIntWriter = rootWriter.bigInt("union");
-    ListWriter listWriter = rootWriter.list("union");
-    MapWriter mapWriter = rootWriter.map("union");
-    for (int i = 0; i < count; i++) {
-      switch (i % 4) {
-      case 0:
-        intWriter.setPosition(i);
-        intWriter.writeInt(i);
-        break;
-      case 1:
-        bigIntWriter.setPosition(i);
-        bigIntWriter.writeBigInt(i);
-        break;
-      case 2:
-        listWriter.setPosition(i);
-        listWriter.startList();
-        for (int j = 0; j < i % 3; j++) {
-          listWriter.varChar().writeVarChar(0, 3, varchar);
-        }
-        listWriter.endList();
-        break;
-      case 3:
-        mapWriter.setPosition(i);
-        mapWriter.start();
-        mapWriter.timeStamp("timestamp").writeTimeStamp(i);
-        mapWriter.end();
-        break;
-      }
+  private void write(FieldVector parent, File file) throws FileNotFoundException, IOException
{
+    VectorUnloader vectorUnloader = newVectorUnloader(parent);
+    Schema schema = vectorUnloader.getSchema();
+    LOGGER.debug("writing schema: " + schema);
+    try (
+        FileOutputStream fileOutputStream = new FileOutputStream(file);
+        ArrowWriter arrowWriter = new ArrowWriter(fileOutputStream.getChannel(), schema);
+        ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch();
+            ) {
+      arrowWriter.writeRecordBatch(recordBatch);
     }
-    writer.setValueCount(count);
-    varchar.release();
   }
 }

http://git-wip-us.apache.org/repos/asf/arrow/blob/4fa7ac4f/java/vector/src/test/java/org/apache/arrow/vector/file/json/TestJSONFile.java
----------------------------------------------------------------------
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/file/json/TestJSONFile.java
b/java/vector/src/test/java/org/apache/arrow/vector/file/json/TestJSONFile.java
new file mode 100644
index 0000000..7d25003
--- /dev/null
+++ b/java/vector/src/test/java/org/apache/arrow/vector/file/json/TestJSONFile.java
@@ -0,0 +1,120 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.vector.file.json;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.complex.MapVector;
+import org.apache.arrow.vector.complex.NullableMapVector;
+import org.apache.arrow.vector.file.BaseFileTest;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TestJSONFile extends BaseFileTest {
+  private static final Logger LOGGER = LoggerFactory.getLogger(TestJSONFile.class);
+
+  @Test
+  public void testWriteReadComplexJSON() throws IOException {
+    File file = new File("target/mytest_complex.json");
+    int count = COUNT;
+
+    // write
+    try (
+        BufferAllocator originalVectorAllocator = allocator.newChildAllocator("original vectors",
0, Integer.MAX_VALUE);
+        MapVector parent = new MapVector("parent", originalVectorAllocator, null)) {
+      writeComplexData(count, parent);
+      writeJSON(file, new VectorSchemaRoot(parent.getChild("root")));
+    }
+
+    // read
+    try (
+        BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE);
+        ) {
+      JsonFileReader reader = new JsonFileReader(file, readerAllocator);
+      Schema schema = reader.start();
+      LOGGER.debug("reading schema: " + schema);
+
+      // initialize vectors
+      try (VectorSchemaRoot root = reader.read();) {
+        validateComplexContent(count, root);
+      }
+      reader.close();
+    }
+  }
+
+  @Test
+  public void testWriteComplexJSON() throws IOException {
+    File file = new File("target/mytest_write_complex.json");
+    int count = COUNT;
+    try (
+        BufferAllocator vectorAllocator = allocator.newChildAllocator("original vectors",
0, Integer.MAX_VALUE);
+        NullableMapVector parent = new NullableMapVector("parent", vectorAllocator, null))
{
+      writeComplexData(count, parent);
+      VectorSchemaRoot root = new VectorSchemaRoot(parent.getChild("root"));
+      validateComplexContent(root.getRowCount(), root);
+      writeJSON(file, root);
+    }
+  }
+
+  public void writeJSON(File file, VectorSchemaRoot root) throws IOException {
+    JsonFileWriter writer = new JsonFileWriter(file, JsonFileWriter.config().pretty(true));
+    writer.start(root.getSchema());
+    writer.write(root);
+    writer.close();
+  }
+
+
+  @Test
+  public void testWriteReadUnionJSON() throws IOException {
+    File file = new File("target/mytest_write_union.json");
+    int count = COUNT;
+    try (
+        BufferAllocator vectorAllocator = allocator.newChildAllocator("original vectors",
0, Integer.MAX_VALUE);
+        NullableMapVector parent = new NullableMapVector("parent", vectorAllocator, null))
{
+
+      writeUnionData(count, parent);
+
+      printVectors(parent.getChildrenFromFields());
+
+      VectorSchemaRoot root = new VectorSchemaRoot(parent.getChild("root"));
+      validateUnionData(count, root);
+
+      writeJSON(file, root);
+    }
+ // read
+    try (
+        BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE);
+        BufferAllocator vectorAllocator = allocator.newChildAllocator("final vectors", 0,
Integer.MAX_VALUE);
+        ) {
+      JsonFileReader reader = new JsonFileReader(file, readerAllocator);
+      Schema schema = reader.start();
+      LOGGER.debug("reading schema: " + schema);
+
+      // initialize vectors
+      try (VectorSchemaRoot root = reader.read();) {
+        validateUnionData(count, root);
+      }
+    }
+  }
+
+}


Mime
View raw message