arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject arrow git commit: ARROW-393: [JAVA] JSON file reader fails to set the buffer size on String data vector
Date Wed, 30 Nov 2016 03:23:26 GMT
Repository: arrow
Updated Branches:
  refs/heads/master e3c167bd1 -> 65b74b350


ARROW-393: [JAVA] JSON file reader fails to set the buffer size on String data vector

Fixed by calling setValueCount after setting the values instead of before.
Since we set the inner vectors of NullableVarCharVector directly we don't have to worry about
it's lastSet field and the way null values are handled.

Author: Julien Le Dem <julien@dremio.com>

Closes #218 from julienledem/json_read_varchar and squashes the following commits:

e147906 [Julien Le Dem] ARROW-393: [JAVA] JSON file reader fails to set the buffer size on
String data vector


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/65b74b35
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/65b74b35
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/65b74b35

Branch: refs/heads/master
Commit: 65b74b350209ee3f930a00a0270e1d7c3d485c93
Parents: e3c167b
Author: Julien Le Dem <julien@dremio.com>
Authored: Tue Nov 29 22:23:19 2016 -0500
Committer: Wes McKinney <wes.mckinney@twosigma.com>
Committed: Tue Nov 29 22:23:19 2016 -0500

----------------------------------------------------------------------
 .../org/apache/arrow/tools/Integration.java     |  2 +-
 .../org/apache/arrow/tools/TestIntegration.java | 54 +++++++++++++++++++-
 .../arrow/vector/file/json/JsonFileReader.java  |  8 ++-
 .../arrow/vector/schema/ArrowVectorType.java    | 15 ++++++
 4 files changed, 72 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/65b74b35/java/tools/src/main/java/org/apache/arrow/tools/Integration.java
----------------------------------------------------------------------
diff --git a/java/tools/src/main/java/org/apache/arrow/tools/Integration.java b/java/tools/src/main/java/org/apache/arrow/tools/Integration.java
index fa4bedc..85af30d 100644
--- a/java/tools/src/main/java/org/apache/arrow/tools/Integration.java
+++ b/java/tools/src/main/java/org/apache/arrow/tools/Integration.java
@@ -80,7 +80,7 @@ public class Integration {
           Schema schema = footer.getSchema();
           LOGGER.debug("Input file size: " + arrowFile.length());
           LOGGER.debug("Found schema: " + schema);
-          try (JsonFileWriter writer = new JsonFileWriter(jsonFile);) {
+          try (JsonFileWriter writer = new JsonFileWriter(jsonFile, JsonFileWriter.config().pretty(true));)
{
             writer.start(schema);
             List<ArrowBlock> recordBatches = footer.getRecordBatches();
             for (ArrowBlock rbBlock : recordBatches) {

http://git-wip-us.apache.org/repos/asf/arrow/blob/65b74b35/java/tools/src/test/java/org/apache/arrow/tools/TestIntegration.java
----------------------------------------------------------------------
diff --git a/java/tools/src/test/java/org/apache/arrow/tools/TestIntegration.java b/java/tools/src/test/java/org/apache/arrow/tools/TestIntegration.java
index bb69ed1..464144b 100644
--- a/java/tools/src/test/java/org/apache/arrow/tools/TestIntegration.java
+++ b/java/tools/src/test/java/org/apache/arrow/tools/TestIntegration.java
@@ -24,9 +24,12 @@ import static org.apache.arrow.tools.ArrowFileTestFixtures.writeData;
 import static org.apache.arrow.tools.ArrowFileTestFixtures.writeInput;
 import static org.junit.Assert.fail;
 
+import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.IOException;
+import java.io.StringReader;
+import java.util.Map;
 
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
@@ -44,6 +47,11 @@ import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.TemporaryFolder;
 
+import com.fasterxml.jackson.core.util.DefaultPrettyPrinter;
+import com.fasterxml.jackson.core.util.DefaultPrettyPrinter.NopIndenter;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.SerializationFeature;
+
 public class TestIntegration {
 
   @Rule
@@ -69,7 +77,7 @@ public class TestIntegration {
     File testOutFile = testFolder.newFile("testOut.arrow");
     testOutFile.delete();
 
-    // generate an arow file
+    // generate an arrow file
     writeInput(testInFile, allocator);
 
     Integration integration = new Integration();
@@ -91,6 +99,50 @@ public class TestIntegration {
   }
 
   @Test
+  public void testJSONRoundTripWithVariableWidth() throws Exception {
+    File testJSONFile = new File("../../integration/data/simple.json");
+    File testOutFile = testFolder.newFile("testOut.arrow");
+    File testRoundTripJSONFile = testFolder.newFile("testOut.json");
+    testOutFile.delete();
+    testRoundTripJSONFile.delete();
+
+    Integration integration = new Integration();
+
+    // convert to arrow
+    String[] args1 = { "-arrow", testOutFile.getAbsolutePath(), "-json",  testJSONFile.getAbsolutePath(),
"-command", Command.JSON_TO_ARROW.name()};
+    integration.run(args1);
+
+    // convert back to json
+    String[] args2 = { "-arrow", testOutFile.getAbsolutePath(), "-json",  testRoundTripJSONFile.getAbsolutePath(),
"-command", Command.ARROW_TO_JSON.name()};
+    integration.run(args2);
+
+    BufferedReader orig = readNormalized(testJSONFile);
+    BufferedReader rt = readNormalized(testRoundTripJSONFile);
+    String i, o;
+    int j = 0;
+    while ((i = orig.readLine()) != null && (o = rt.readLine()) != null) {
+      Assert.assertEquals("line: " + j, i, o);
+      ++j;
+    }
+  }
+
+  private ObjectMapper om = new ObjectMapper();
+  {
+    DefaultPrettyPrinter prettyPrinter = new DefaultPrettyPrinter();
+    prettyPrinter.indentArraysWith(NopIndenter.instance);
+    om.setDefaultPrettyPrinter(prettyPrinter);
+    om.enable(SerializationFeature.INDENT_OUTPUT);
+    om.enable(SerializationFeature.ORDER_MAP_ENTRIES_BY_KEYS);
+  }
+
+  private BufferedReader readNormalized(File f) throws IOException {
+    Map<?,?> tree = om.readValue(f, Map.class);
+    String normalized = om.writeValueAsString(tree);
+    return new BufferedReader(new StringReader(normalized));
+  }
+
+
+  @Test
   public void testInvalid() throws Exception {
     File testValidInFile = testFolder.newFile("testValidIn.arrow");
     File testInvalidInFile = testFolder.newFile("testInvalidIn.arrow");

http://git-wip-us.apache.org/repos/asf/arrow/blob/65b74b35/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java
b/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java
index f205982..26dd3f6 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java
@@ -22,6 +22,7 @@ import static com.fasterxml.jackson.core.JsonToken.END_OBJECT;
 import static com.fasterxml.jackson.core.JsonToken.START_ARRAY;
 import static com.fasterxml.jackson.core.JsonToken.START_OBJECT;
 import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.apache.arrow.vector.schema.ArrowVectorType.OFFSET;
 
 import java.io.File;
 import java.io.IOException;
@@ -128,15 +129,12 @@ public class JsonFileReader implements AutoCloseable {
         valueVector.allocateNew();
         Mutator mutator = valueVector.getMutator();
 
-        int innerVectorCount = count;
-        if (vectorType.getName() == "OFFSET") {
-          innerVectorCount++;
-        }
-        mutator.setValueCount(innerVectorCount);
+        int innerVectorCount = vectorType.equals(OFFSET) ? count + 1 : count;
         for (int i = 0; i < innerVectorCount; i++) {
           parser.nextToken();
           setValueFromParser(valueVector, i);
         }
+        mutator.setValueCount(innerVectorCount);
         readToken(END_ARRAY);
       }
       // if children

http://git-wip-us.apache.org/repos/asf/arrow/blob/65b74b35/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowVectorType.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowVectorType.java
b/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowVectorType.java
index 8fe8e48..68da705 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowVectorType.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowVectorType.java
@@ -81,4 +81,19 @@ public class ArrowVectorType {
   public String toString() {
     return getName();
   }
+
+  @Override
+  public int hashCode() {
+    return type;
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (obj instanceof ArrowVectorType) {
+      ArrowVectorType other = (ArrowVectorType) obj;
+      return type == other.type;
+    }
+    return false;
+  }
+
 }


Mime
View raw message