arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jacq...@apache.org
Subject arrow git commit: ARROW-1237: [JAVA] expose the ability to set lastSet
Date Mon, 24 Jul 2017 15:15:10 GMT
Repository: arrow
Updated Branches:
  refs/heads/master e1b098e93 -> 457bb071d


ARROW-1237: [JAVA] expose the ability to set lastSet

The changes here expose the ability to set "lastSet" on Nullable<var length>Vector.
I believe this is needed only for NullableVarCharVector and NullableVarBinaryVector. Hence
the API is exposed through NullableValueVectors.java

Author: siddharth <siddharth@dremio.com>

Closes #868 from siddharthteotia/ARROW-1237 and squashes the following commits:

786dfea [siddharth] ARROW-1237: addressed review comments and added more tests
73b2fc5 [siddharth] ARROW-1237: added some unit tests
f8c7277 [siddharth] ARROW-1237: expose the ability to set lastSet


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/457bb071
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/457bb071
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/457bb071

Branch: refs/heads/master
Commit: 457bb071df95e71eef45159619cc6b531fdec08f
Parents: e1b098e
Author: siddharth <siddharth@dremio.com>
Authored: Mon Jul 24 08:13:24 2017 -0700
Committer: Jacques Nadeau <jacques@dremio.com>
Committed: Mon Jul 24 08:13:24 2017 -0700

----------------------------------------------------------------------
 .gitignore                                      |   3 +-
 .../codegen/templates/NullableValueVectors.java |  16 ++
 .../apache/arrow/vector/complex/ListVector.java |   6 +
 .../org/apache/arrow/vector/TestListVector.java | 154 +++++++++++++++++++
 .../apache/arrow/vector/TestValueVector.java    | 132 +++++++++++++++-
 5 files changed, 309 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/457bb071/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
index dd69b6c..e6dfe19 100644
--- a/.gitignore
+++ b/.gitignore
@@ -27,4 +27,5 @@ MANIFEST
 
 cpp/.idea/
 python/.eggs/
-.vscode
\ No newline at end of file
+.vscode
+.idea/

http://git-wip-us.apache.org/repos/asf/arrow/blob/457bb071/java/vector/src/main/codegen/templates/NullableValueVectors.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/codegen/templates/NullableValueVectors.java b/java/vector/src/main/codegen/templates/NullableValueVectors.java
index 092097b..1decd0b 100644
--- a/java/vector/src/main/codegen/templates/NullableValueVectors.java
+++ b/java/vector/src/main/codegen/templates/NullableValueVectors.java
@@ -699,6 +699,22 @@ public final class ${className} extends BaseDataValueVector implements
<#if type
       setCount = 0;
       <#if type.major = "VarLen">lastSet = -1;</#if>
     }
+
+    public void setLastSet(int value) {
+      <#if type.major = "VarLen">
+        lastSet = value;
+      <#else>
+        throw new UnsupportedOperationException();
+      </#if>
+    }
+
+    public int getLastSet() {
+      <#if type.major != "VarLen">
+        throw new UnsupportedOperationException();
+      <#else>
+        return lastSet;
+      </#if>
+    }
   }
 }
 </#list>

http://git-wip-us.apache.org/repos/asf/arrow/blob/457bb071/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java
index 4ab624f..6357294 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java
@@ -393,6 +393,12 @@ public class ListVector extends BaseRepeatedValueVector implements FieldVector,
       vector.getMutator().setValueCount(childValueCount);
       bits.getMutator().setValueCount(valueCount);
     }
+
+    public void setLastSet(int value) {
+      lastSet = value;
+    }
+
+    public int getLastSet() { return lastSet; }
   }
 
 }

http://git-wip-us.apache.org/repos/asf/arrow/blob/457bb071/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java
----------------------------------------------------------------------
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java
index 11be329..29ea762 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java
@@ -17,15 +17,26 @@
  */
 package org.apache.arrow.vector;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.vector.complex.ListVector;
 import org.apache.arrow.vector.complex.impl.UnionListWriter;
+import org.apache.arrow.vector.complex.impl.UnionListReader;
 import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.complex.writer.FieldWriter;
+import org.apache.arrow.vector.holders.NullableBigIntHolder;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.Types.*;
+import org.apache.arrow.vector.types.pojo.FieldType;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
 
+import java.util.List;
+
 public class TestListVector {
 
   private BufferAllocator allocator;
@@ -80,4 +91,147 @@ public class TestListVector {
       Assert.assertTrue("shouldn't be null", reader.isSet());
     }
   }
+
+  @Test
+  public void testSetLastSetUsage() throws Exception {
+    try (ListVector listVector = ListVector.empty("input", allocator)) {
+
+      /* Explicitly add the dataVector */
+      MinorType type = MinorType.BIGINT;
+      listVector.addOrGetVector(FieldType.nullable(type.getType()));
+
+      /* allocate memory */
+      listVector.allocateNew();
+
+      /* get inner vectors; bitVector and offsetVector */
+      List<BufferBacked> innerVectors = listVector.getFieldInnerVectors();
+      BitVector bitVector = (BitVector)innerVectors.get(0);
+      UInt4Vector offsetVector = (UInt4Vector)innerVectors.get(1);
+
+      /* get the underlying data vector -- NullableBigIntVector */
+      NullableBigIntVector dataVector = (NullableBigIntVector)listVector.getDataVector();
+
+      /* check current lastSet */
+      assertEquals(Integer.toString(0), Integer.toString(listVector.getMutator().getLastSet()));
+
+      int index = 0;
+      int offset = 0;
+
+      /* write [10, 11, 12] to the list vector at index */
+      bitVector.getMutator().setSafe(index, 1);
+      dataVector.getMutator().setSafe(0, 1, 10);
+      dataVector.getMutator().setSafe(1, 1, 11);
+      dataVector.getMutator().setSafe(2, 1, 12);
+      offsetVector.getMutator().setSafe(index + 1, 3);
+
+      index  += 1;
+
+      /* write [13, 14] to the list vector at index 1 */
+      bitVector.getMutator().setSafe(index, 1);
+      dataVector.getMutator().setSafe(3, 1, 13);
+      dataVector.getMutator().setSafe(4, 1, 14);
+      offsetVector.getMutator().setSafe(index + 1, 5);
+
+      index += 1;
+
+      /* write [15, 16, 17] to the list vector at index 2 */
+      bitVector.getMutator().setSafe(index, 1);
+      dataVector.getMutator().setSafe(5, 1, 15);
+      dataVector.getMutator().setSafe(6, 1, 16);
+      dataVector.getMutator().setSafe(7, 1, 17);
+      offsetVector.getMutator().setSafe(index + 1, 8);
+
+      /* check current lastSet */
+      assertEquals(Integer.toString(0), Integer.toString(listVector.getMutator().getLastSet()));
+
+      /* set lastset and arbitrary valuecount for list vector.
+       *
+       * NOTE: if we don't execute setLastSet() before setLastValueCount(), then
+       * the latter will corrupt the offsetVector and thus the accessor will not
+       * retrieve the correct values from underlying dataVector. Run the test
+       * by commenting out next line and we should see failures from 5th assert
+       * onwards. This is why doing setLastSet() is important before setValueCount()
+       * once the vector has been loaded.
+       *
+       * Another important thing to remember is the value of lastSet itself.
+       * Even though the listVector has elements till index 2 only, the lastSet should
+       * be set as 3. This is because the offsetVector has valid offsets filled till index
3.
+       * If we do setLastSet(2), the offsetVector at index 3 will contain incorrect value
+       * after execution of setValueCount().
+       *
+       * correct state of the listVector
+       * bitvector    {1, 1, 1, 0, 0.... }
+       * offsetvector {0, 3, 5, 8, 8, 8.....}
+       * datavector   { [10, 11, 12],
+       *                [13, 14],
+       *                [15, 16, 17]
+       *              }
+       *
+       * if we don't do setLastSet() before setValueCount --> incorrect state
+       * bitvector    {1, 1, 1, 0, 0.... }
+       * offsetvector {0, 0, 0, 0, 0, 0.....}
+       * datavector   { [10, 11, 12],
+       *                [13, 14],
+       *                [15, 16, 17]
+       *              }
+       *
+       * if we do setLastSet(2) before setValueCount --> incorrect state
+       * bitvector    {1, 1, 1, 0, 0.... }
+       * offsetvector {0, 3, 5, 5, 5, 5.....}
+       * datavector   { [10, 11, 12],
+       *                [13, 14],
+       *                [15, 16, 17]
+       *              }
+       */
+      listVector.getMutator().setLastSet(3);
+      listVector.getMutator().setValueCount(10);
+
+      /* check the vector output */
+      final UInt4Vector.Accessor offsetAccessor = offsetVector.getAccessor();
+      final ValueVector.Accessor valueAccessor = dataVector.getAccessor();
+
+      index  = 0;
+      offset = offsetAccessor.get(index);
+      assertEquals(Integer.toString(0), Integer.toString(offset));
+
+      Object actual = valueAccessor.getObject(offset);
+      assertEquals(new Long(10), (Long)actual);
+      offset++;
+      actual = valueAccessor.getObject(offset);
+      assertEquals(new Long(11), (Long)actual);
+      offset++;
+      actual = valueAccessor.getObject(offset);
+      assertEquals(new Long(12), (Long)actual);
+
+      index++;
+      offset = offsetAccessor.get(index);
+      assertEquals(Integer.toString(3), Integer.toString(offset));
+
+      actual = valueAccessor.getObject(offset);
+      assertEquals(new Long(13), (Long)actual);
+      offset++;
+      actual = valueAccessor.getObject(offset);
+      assertEquals(new Long(14), (Long)actual);
+
+      index++;
+      offset = offsetAccessor.get(index);
+      assertEquals(Integer.toString(5), Integer.toString(offset));
+
+      actual = valueAccessor.getObject(offsetAccessor.get(index));
+      assertEquals(new Long(15), (Long)actual);
+      offset++;
+      actual = valueAccessor.getObject(offset);
+      assertEquals(new Long(16), (Long)actual);
+      offset++;
+      actual = valueAccessor.getObject(offset);
+      assertEquals(new Long(17), (Long)actual);
+
+      index++;
+      offset = offsetAccessor.get(index);
+      assertEquals(Integer.toString(8), Integer.toString(offset));
+
+      actual = valueAccessor.getObject(offsetAccessor.get(index));
+      assertNull(actual);
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/arrow/blob/457bb071/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
----------------------------------------------------------------------
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
index 63543b0..f5508aa 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
@@ -26,11 +26,15 @@ import static org.junit.Assert.assertTrue;
 
 import java.nio.charset.Charset;
 import java.util.List;
+import java.util.ArrayList;
 
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
+
+import org.apache.arrow.vector.schema.ArrowRecordBatch;
 import org.apache.arrow.vector.schema.TypeLayout;
 import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Schema;
 import org.apache.arrow.vector.types.pojo.ArrowType;
 import org.apache.arrow.vector.types.pojo.Field;
 import org.junit.After;
@@ -56,6 +60,9 @@ public class TestValueVector {
   private final static byte[] STR1 = "AAAAA1".getBytes(utf8Charset);
   private final static byte[] STR2 = "BBBBBBBBB2".getBytes(utf8Charset);
   private final static byte[] STR3 = "CCCC3".getBytes(utf8Charset);
+  private final static byte[] STR4 = "DDDDDDDD4".getBytes(utf8Charset);
+  private final static byte[] STR5 = "EEE5".getBytes(utf8Charset);
+  private final static byte[] STR6 = "FFFFF6".getBytes(utf8Charset);
 
   @After
   public void terminate() throws Exception {
@@ -509,11 +516,134 @@ public class TestValueVector {
         } else {
           assertEquals(Integer.toString(i), vector2.getAccessor().getObject(i).toString());
         }
-
       }
+    }
+  }
+
+  @Test
+  public void testSetLastSetUsage() {
+    try (final NullableVarCharVector vector = new NullableVarCharVector("myvector", allocator))
{
 
+      final NullableVarCharVector.Mutator mutator = vector.getMutator();
+
+      vector.allocateNew(1024 * 10, 1024);
+
+      setBytes(0, STR1, vector);
+      setBytes(1, STR2, vector);
+      setBytes(2, STR3, vector);
+      setBytes(3, STR4, vector);
+      setBytes(4, STR5, vector);
+      setBytes(5, STR6, vector);
+
+      /* Check current lastSet */
+      assertEquals(Integer.toString(-1), Integer.toString(mutator.getLastSet()));
+
+      /* Check the vector output */
+      final NullableVarCharVector.Accessor accessor = vector.getAccessor();
+      assertArrayEquals(STR1, accessor.get(0));
+      assertArrayEquals(STR2, accessor.get(1));
+      assertArrayEquals(STR3, accessor.get(2));
+      assertArrayEquals(STR4, accessor.get(3));
+      assertArrayEquals(STR5, accessor.get(4));
+      assertArrayEquals(STR6, accessor.get(5));
+
+      /*
+       * If we don't do setLastSe(5) before setValueCount(), then the latter will corrupt
+       * the value vector by filling in all positions [0,valuecount-1] will empty byte arrays.
+       * Run the test by commenting out next line and we should see incorrect vector output.
+       */
+      mutator.setLastSet(5);
+      mutator.setValueCount(20);
+
+      /* Check the vector output again */
+      assertArrayEquals(STR1, accessor.get(0));
+      assertArrayEquals(STR2, accessor.get(1));
+      assertArrayEquals(STR3, accessor.get(2));
+      assertArrayEquals(STR4, accessor.get(3));
+      assertArrayEquals(STR5, accessor.get(4));
+      assertArrayEquals(STR6, accessor.get(5));
+    }
+  }
 
+  @Test
+  public void testVectorLoadUnload() {
+
+    try (final NullableVarCharVector vector1 = new NullableVarCharVector("myvector", allocator))
{
+
+      final NullableVarCharVector.Mutator mutator1 = vector1.getMutator();
+
+      vector1.allocateNew(1024 * 10, 1024);
+
+      mutator1.set(0, STR1);
+      mutator1.set(1, STR2);
+      mutator1.set(2, STR3);
+      mutator1.set(3, STR4);
+      mutator1.set(4, STR5);
+      mutator1.set(5, STR6);
+      assertEquals(Integer.toString(5), Integer.toString(mutator1.getLastSet()));
+      mutator1.setValueCount(15);
+      assertEquals(Integer.toString(14), Integer.toString(mutator1.getLastSet()));
+
+      /* Check the vector output */
+      final NullableVarCharVector.Accessor accessor1 = vector1.getAccessor();
+      assertArrayEquals(STR1, accessor1.get(0));
+      assertArrayEquals(STR2, accessor1.get(1));
+      assertArrayEquals(STR3, accessor1.get(2));
+      assertArrayEquals(STR4, accessor1.get(3));
+      assertArrayEquals(STR5, accessor1.get(4));
+      assertArrayEquals(STR6, accessor1.get(5));
+
+      Field field = vector1.getField();
+      String fieldName = field.getName();
+
+      List<Field> fields = new ArrayList<Field>();
+      List<FieldVector> fieldVectors = new ArrayList<FieldVector>();
+
+      fields.add(field);
+      fieldVectors.add(vector1);
+
+      Schema schema = new Schema(fields);
+
+      VectorSchemaRoot schemaRoot1 = new VectorSchemaRoot(schema, fieldVectors, accessor1.getValueCount());
+      VectorUnloader vectorUnloader = new VectorUnloader(schemaRoot1);
+
+      try (
+              ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch();
+              BufferAllocator finalVectorsAllocator = allocator.newChildAllocator("new vector",
0, Long.MAX_VALUE);
+              VectorSchemaRoot schemaRoot2 = VectorSchemaRoot.create(schema, finalVectorsAllocator);
+      ) {
+
+        VectorLoader vectorLoader = new VectorLoader(schemaRoot2);
+        vectorLoader.load(recordBatch);
+
+        NullableVarCharVector vector2 = (NullableVarCharVector)schemaRoot2.getVector(fieldName);
+        NullableVarCharVector.Mutator mutator2 = vector2.getMutator();
+
+        /*
+         * lastSet would have internally been set by VectorLoader.load() when it invokes
+         * loadFieldBuffers.
+         */
+        assertEquals(Integer.toString(14), Integer.toString(mutator2.getLastSet()));
+        mutator2.setValueCount(25);
+        assertEquals(Integer.toString(24), Integer.toString(mutator2.getLastSet()));
+
+        /* Check the vector output */
+        final NullableVarCharVector.Accessor accessor2 = vector2.getAccessor();
+        assertArrayEquals(STR1, accessor2.get(0));
+        assertArrayEquals(STR2, accessor2.get(1));
+        assertArrayEquals(STR3, accessor2.get(2));
+        assertArrayEquals(STR4, accessor2.get(3));
+        assertArrayEquals(STR5, accessor2.get(4));
+        assertArrayEquals(STR6, accessor2.get(5));
+      }
     }
   }
 
+  public static void setBytes(int index, byte[] bytes, NullableVarCharVector vector) {
+    final int currentOffset = vector.values.offsetVector.getAccessor().get(index);
+
+    vector.bits.getMutator().setToOne(index);
+    vector.values.offsetVector.getMutator().set(index + 1, currentOffset + bytes.length);
+    vector.values.data.setBytes(currentOffset, bytes, 0, bytes.length);
+  }
 }


Mime
View raw message