arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject arrow git commit: ARROW-395: Arrow file format writes record batches in reverse order.
Date Wed, 30 Nov 2016 23:14:38 GMT
Repository: arrow
Updated Branches:
  refs/heads/master 859018b3c -> 072b7d671


ARROW-395: Arrow file format writes record batches in reverse order.

Author: Julien Le Dem <julien@dremio.com>

Closes #220 from julienledem/rb_order and squashes the following commits:

ae5b7f8 [Julien Le Dem] ARROW-395: Arrow file format writes record batches in reverse order.


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/072b7d67
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/072b7d67
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/072b7d67

Branch: refs/heads/master
Commit: 072b7d671356721bc57da8703ed0939749cf4880
Parents: 859018b
Author: Julien Le Dem <julien@dremio.com>
Authored: Wed Nov 30 18:14:31 2016 -0500
Committer: Wes McKinney <wes.mckinney@twosigma.com>
Committed: Wed Nov 30 18:14:31 2016 -0500

----------------------------------------------------------------------
 .../apache/arrow/vector/file/ArrowFooter.java   | 17 +++++--------
 .../apache/arrow/vector/file/TestArrowFile.java | 25 +++++++++++++-------
 2 files changed, 23 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/072b7d67/java/vector/src/main/java/org/apache/arrow/vector/file/ArrowFooter.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/file/ArrowFooter.java b/java/vector/src/main/java/org/apache/arrow/vector/file/ArrowFooter.java
index 01e175b..3be1929 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/file/ArrowFooter.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/file/ArrowFooter.java
@@ -17,6 +17,8 @@
  */
 package org.apache.arrow.vector.file;
 
+import static org.apache.arrow.vector.schema.FBSerializables.writeAllStructsToVector;
+
 import java.util.ArrayList;
 import java.util.List;
 
@@ -52,10 +54,10 @@ public class ArrowFooter implements FBSerializable {
 
   private static List<ArrowBlock> recordBatches(Footer footer) {
     List<ArrowBlock> recordBatches = new ArrayList<>();
-    Block tempBLock = new Block();
+    Block tempBlock = new Block();
     int recordBatchesLength = footer.recordBatchesLength();
     for (int i = 0; i < recordBatchesLength; i++) {
-      Block block = footer.recordBatches(tempBLock, i);
+      Block block = footer.recordBatches(tempBlock, i);
       recordBatches.add(new ArrowBlock(block.offset(), block.metaDataLength(), block.bodyLength()));
     }
     return recordBatches;
@@ -88,9 +90,9 @@ public class ArrowFooter implements FBSerializable {
   public int writeTo(FlatBufferBuilder builder) {
     int schemaIndex = schema.getSchema(builder);
     Footer.startDictionariesVector(builder, dictionaries.size());
-    int dicsOffset = endVector(builder, dictionaries);
+    int dicsOffset = writeAllStructsToVector(builder, dictionaries);
     Footer.startRecordBatchesVector(builder, recordBatches.size());
-    int rbsOffset = endVector(builder, recordBatches);
+    int rbsOffset = writeAllStructsToVector(builder, recordBatches);
     Footer.startFooter(builder);
     Footer.addSchema(builder, schemaIndex);
     Footer.addDictionaries(builder, dicsOffset);
@@ -98,13 +100,6 @@ public class ArrowFooter implements FBSerializable {
     return Footer.endFooter(builder);
   }
 
-  private int endVector(FlatBufferBuilder builder, List<ArrowBlock> blocks) {
-    for (ArrowBlock block : blocks) {
-      block.writeTo(builder);
-    }
-    return builder.endVector();
-  }
-
   @Override
   public int hashCode() {
     final int prime = 31;

http://git-wip-us.apache.org/repos/asf/arrow/blob/072b7d67/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java
----------------------------------------------------------------------
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java b/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java
index c9e60ee..5fa18b3 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java
@@ -161,24 +161,27 @@ public class TestArrowFile extends BaseFileTest {
   @Test
   public void testWriteReadMultipleRBs() throws IOException {
     File file = new File("target/mytest_multiple.arrow");
-    int count = COUNT;
+    int[] counts = { 10, 5 };
 
     // write
     try (
         BufferAllocator originalVectorAllocator = allocator.newChildAllocator("original vectors",
0, Integer.MAX_VALUE);
         MapVector parent = new MapVector("parent", originalVectorAllocator, null);
         FileOutputStream fileOutputStream = new FileOutputStream(file);) {
-      writeData(count, parent);
-      VectorUnloader vectorUnloader = newVectorUnloader(parent.getChild("root"));
-      Schema schema = vectorUnloader.getSchema();
+      writeData(counts[0], parent);
+      VectorUnloader vectorUnloader0 = newVectorUnloader(parent.getChild("root"));
+      Schema schema = vectorUnloader0.getSchema();
       Assert.assertEquals(2, schema.getFields().size());
       try (ArrowWriter arrowWriter = new ArrowWriter(fileOutputStream.getChannel(), schema);)
{
-        try (ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch()) {
+        try (ArrowRecordBatch recordBatch = vectorUnloader0.getRecordBatch()) {
+          Assert.assertEquals("RB #0", counts[0], recordBatch.getLength());
           arrowWriter.writeRecordBatch(recordBatch);
         }
         parent.allocateNew();
-        writeData(count, parent);
-        try (ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch()) {
+        writeData(counts[1], parent); // if we write the same data we don't catch that the
metadata is stored in the wrong order.
+        VectorUnloader vectorUnloader1 = newVectorUnloader(parent.getChild("root"));
+        try (ArrowRecordBatch recordBatch = vectorUnloader1.getRecordBatch()) {
+          Assert.assertEquals("RB #1", counts[1], recordBatch.getLength());
           arrowWriter.writeRecordBatch(recordBatch);
         }
       }
@@ -195,21 +198,27 @@ public class TestArrowFile extends BaseFileTest {
       ArrowFooter footer = arrowReader.readFooter();
       Schema schema = footer.getSchema();
       LOGGER.debug("reading schema: " + schema);
+      int i = 0;
       try (VectorSchemaRoot root = new VectorSchemaRoot(schema, vectorAllocator);) {
         VectorLoader vectorLoader = new VectorLoader(root);
         List<ArrowBlock> recordBatches = footer.getRecordBatches();
         Assert.assertEquals(2, recordBatches.size());
+        long previousOffset = 0;
         for (ArrowBlock rbBlock : recordBatches) {
+          Assert.assertTrue(rbBlock.getOffset() + " > " + previousOffset, rbBlock.getOffset()
> previousOffset);
+          previousOffset = rbBlock.getOffset();
           Assert.assertEquals(0, rbBlock.getOffset() % 8);
           Assert.assertEquals(0, rbBlock.getMetadataLength() % 8);
           try (ArrowRecordBatch recordBatch = arrowReader.readRecordBatch(rbBlock)) {
+            Assert.assertEquals("RB #" + i, counts[i], recordBatch.getLength());
             List<ArrowBuffer> buffersLayout = recordBatch.getBuffersLayout();
             for (ArrowBuffer arrowBuffer : buffersLayout) {
               Assert.assertEquals(0, arrowBuffer.getOffset() % 8);
             }
             vectorLoader.load(recordBatch);
-            validateContent(count, root);
+            validateContent(counts[i], root);
           }
+          ++i;
         }
       }
     }


Mime
View raw message