arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject arrow git commit: ARROW-1444: [JAVA] fix last byte copy in BitVector splitAndTransfer
Date Sat, 02 Sep 2017 14:57:31 GMT
Repository: arrow
Updated Branches:
  refs/heads/master 4956e90a7 -> 848a0f782


ARROW-1444: [JAVA] fix last byte copy in BitVector splitAndTransfer

cc @jacques-n

Author: siddharth <siddharth@dremio.com>

Closes #1023 from siddharthteotia/ARROW-1444 and squashes the following commits:

eb5f6578 [siddharth] ARROW-1444: fix last byte copy in BitVector splitAndTransfer


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/848a0f78
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/848a0f78
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/848a0f78

Branch: refs/heads/master
Commit: 848a0f782dddac788d01eb8ea230957b8372e2dd
Parents: 4956e90
Author: siddharth <siddharth@dremio.com>
Authored: Sat Sep 2 10:57:11 2017 -0400
Committer: Wes McKinney <wes.mckinney@twosigma.com>
Committed: Sat Sep 2 10:57:11 2017 -0400

----------------------------------------------------------------------
 .../java/org/apache/arrow/vector/BitVector.java |  59 +++++--
 .../org/apache/arrow/vector/TestBitVector.java  | 163 +++++++++++++++----
 2 files changed, 171 insertions(+), 51 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/848a0f78/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java
index 44001d4..8a60273 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java
@@ -276,9 +276,11 @@ public final class BitVector extends BaseDataValueVector implements FixedWidthVe
 
   public void splitAndTransferTo(int startIndex, int length, BitVector target) {
     assert startIndex + length <= valueCount;
-    int firstByte = getByteIndex(startIndex);
-    int byteSize = getSizeFromCount(length);
+    int firstByteSource = getByteIndex(startIndex);
+    int lastByteSource = getByteIndex(valueCount - 1);
+    int byteSizeTarget = getSizeFromCount(length);
     int offset = startIndex % 8;
+
     if (length > 0) {
       if (offset == 0) {
         target.clear();
@@ -286,32 +288,59 @@ public final class BitVector extends BaseDataValueVector implements
FixedWidthVe
         if (target.data != null) {
           target.data.release();
         }
-        target.data = data.slice(firstByte, byteSize);
+        target.data = data.slice(firstByteSource, byteSizeTarget);
         target.data.retain(1);
-      } else {
+      }
+      else {
         // Copy data
         // When the first bit starts from the middle of a byte (offset != 0), copy data from
src BitVector.
         // Each byte in the target is composed by a part in i-th byte, another part in (i+1)-th
byte.
-        // The last byte copied to target is a bit tricky :
-        //   1) if length requires partly byte (length % 8 !=0), copy the remaining bits
only.
-        //   2) otherwise, copy the last byte in the same way as to the prior bytes.
+
         target.clear();
-        target.allocateNew(length);
+        target.allocateNew(byteSizeTarget * 8);
+
         // TODO maybe do this one word at a time, rather than byte?
-        for (int i = 0; i < byteSize - 1; i++) {
-          target.data.setByte(i, (((this.data.getByte(firstByte + i) & 0xFF) >>>
offset) + (this.data.getByte(firstByte + i + 1) << (8 - offset))));
+
+        for (int i = 0; i < byteSizeTarget - 1; i++) {
+          byte b1 = getBitsFromCurrentByte(this.data, firstByteSource + i, offset);
+          byte b2 = getBitsFromNextByte(this.data, firstByteSource + i + 1, offset);
+
+          target.data.setByte(i, (b1 + b2));
         }
-        if (length % 8 != 0) {
-          target.data.setByte(byteSize - 1, ((this.data.getByte(firstByte + byteSize - 1)
& 0xFF) >>> offset));
-        } else {
-          target.data.setByte(byteSize - 1,
-              (((this.data.getByte(firstByte + byteSize - 1) & 0xFF) >>> offset)
+ (this.data.getByte(firstByte + byteSize) << (8 - offset))));
+
+        /* Copying the last piece is done in the following manner:
+         * if the source vector has 1 or more bytes remaining, we copy
+         * the last piece as a byte formed by shifting data
+         * from the current byte and the next byte.
+         *
+         * if the source vector has no more bytes remaining
+         * (we are at the last byte), we copy the last piece as a byte
+         * by shifting data from the current byte.
+         */
+        if((firstByteSource + byteSizeTarget - 1) < lastByteSource) {
+          byte b1 = getBitsFromCurrentByte(this.data, firstByteSource + byteSizeTarget -
1, offset);
+          byte b2 = getBitsFromNextByte(this.data, firstByteSource + byteSizeTarget, offset);
+
+          target.data.setByte(byteSizeTarget - 1, b1 + b2);
+        }
+        else {
+          byte b1 = getBitsFromCurrentByte(this.data, firstByteSource + byteSizeTarget -
1, offset);
+
+          target.data.setByte(byteSizeTarget - 1, b1);
         }
       }
     }
     target.getMutator().setValueCount(length);
   }
 
+  private static byte getBitsFromCurrentByte(ArrowBuf data, int index, int offset) {
+    return (byte)((data.getByte(index) & 0xFF) >>> offset);
+  }
+
+  private static byte getBitsFromNextByte(ArrowBuf data, int index, int offset) {
+    return (byte)((data.getByte(index) << (8 - offset)));
+  }
+
   private class TransferImpl implements TransferPair {
     BitVector to;
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/848a0f78/java/vector/src/test/java/org/apache/arrow/vector/TestBitVector.java
----------------------------------------------------------------------
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestBitVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestBitVector.java
index 1631660..82e61be 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestBitVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestBitVector.java
@@ -99,47 +99,138 @@ public class TestBitVector {
         }
       }
 
-      final TransferPair transferPair = sourceVector.getTransferPair(allocator);
-      final BitVector toVector = (BitVector) transferPair.getTo();
-      final BitVector.Accessor toAccessor = toVector.getAccessor();
-      final BitVector.Mutator toMutator = toVector.getMutator();
-
-      /*
-       * form test cases such that we cover:
-       *
-       * (1) the start index is exactly where a particular byte starts in the source bit
vector
-       * (2) the start index is randomly positioned within a byte in the source bit vector
-       *    (2.1) the length is a multiple of 8
-       *    (2.2) the length is not a multiple of 8
-       */
-      final int[][] transferLengths = {{0, 8},     /* (1) */
-          {8, 10},    /* (1) */
-          {18, 0},    /* zero length scenario */
-          {18, 8},    /* (2.1) */
-          {26, 0},    /* zero length scenario */
-          {26, 14}    /* (2.2) */
-      };
-
-      for (final int[] transferLength : transferLengths) {
-        final int start = transferLength[0];
-        final int length = transferLength[1];
-
-        transferPair.splitAndTransfer(start, length);
-
-        /* check the toVector output after doing splitAndTransfer */
-        for (int i = 0; i < length; i++) {
-          int result = toAccessor.get(i);
-          if ((i & 1) == 1) {
-            assertEquals(Integer.toString(1), Integer.toString(result));
-          } else {
-            assertEquals(Integer.toString(0), Integer.toString(result));
+      try (final BitVector toVector = new BitVector("toVector", allocator)) {
+        final TransferPair transferPair = sourceVector.makeTransferPair(toVector);
+        final BitVector.Accessor toAccessor = toVector.getAccessor();
+        final BitVector.Mutator toMutator = toVector.getMutator();
+
+        /*
+         * form test cases such that we cover:
+         *
+         * (1) the start index is exactly where a particular byte starts in the source bit
vector
+         * (2) the start index is randomly positioned within a byte in the source bit vector
+         *    (2.1) the length is a multiple of 8
+         *    (2.2) the length is not a multiple of 8
+         */
+        final int[][] transferLengths = {{0, 8}, {8, 10}, {18, 0}, {18, 8}, {26, 0}, {26,
14}};
+
+        for (final int[] transferLength : transferLengths) {
+          final int start = transferLength[0];
+          final int length = transferLength[1];
+
+          transferPair.splitAndTransfer(start, length);
+
+          /* check the toVector output after doing splitAndTransfer */
+          for (int i = 0; i < length; i++) {
+            int actual = toAccessor.get(i);
+            int expected = sourceAccessor.get(start + i);
+            assertEquals("different data values not expected --> sourceVector index: "
+ (start + i) + " toVector index: " + i,
+                    expected, actual);
           }
         }
+      }
+    }
+  }
+
+  @Test
+  public void testSplitAndTransfer1() throws Exception {
+
+    try (final BitVector sourceVector = new BitVector("bitvector", allocator)) {
+      final BitVector.Mutator sourceMutator = sourceVector.getMutator();
+      final BitVector.Accessor sourceAccessor = sourceVector.getAccessor();
+
+      sourceVector.allocateNew(8190);
+
+      /* populate the bitvector */
+      for (int i = 0; i < 8190; i++) {
+        sourceMutator.set(i, 1);
+      }
+
+      sourceMutator.setValueCount(8190);
+
+      /* check the vector output */
+      for (int i = 0; i < 8190; i++) {
+        int result = sourceAccessor.get(i);
+        assertEquals(Integer.toString(1), Integer.toString(result));
+      }
+
+      try (final BitVector toVector = new BitVector("toVector", allocator)) {
+        final TransferPair transferPair = sourceVector.makeTransferPair(toVector);
+        final BitVector.Accessor toAccessor = toVector.getAccessor();
+        final BitVector.Mutator toMutator = toVector.getMutator();
+
+        final int[][] transferLengths = {{0, 4095}, {4095, 4095}};
+
+        for (final int[] transferLength : transferLengths) {
+          final int start = transferLength[0];
+          final int length = transferLength[1];
+
+          transferPair.splitAndTransfer(start, length);
+
+          /* check the toVector output after doing splitAndTransfer */
+          for (int i = 0; i < length; i++) {
+            int actual = toAccessor.get(i);
+            int expected = sourceAccessor.get(start + i);
+            assertEquals("different data values not expected --> sourceVector index: "
+ (start + i) + " toVector index: " + i,
+                    expected, actual);
+          }
+        }
+      }
+    }
+  }
+
+  @Test
+  public void testSplitAndTransfer2() throws Exception {
+
+    try (final BitVector sourceVector = new BitVector("bitvector", allocator)) {
+      final BitVector.Mutator sourceMutator = sourceVector.getMutator();
+      final BitVector.Accessor sourceAccessor = sourceVector.getAccessor();
+
+      sourceVector.allocateNew(32);
+
+      /* populate the bitvector */
+      for (int i = 0; i < 32; i++) {
+        if ((i & 1) == 1) {
+          sourceMutator.set(i, 1);
+        } else {
+          sourceMutator.set(i, 0);
+        }
+      }
+
+      sourceMutator.setValueCount(32);
 
-        toVector.clear();
+      /* check the vector output */
+      for (int i = 0; i < 32; i++) {
+        int result = sourceAccessor.get(i);
+        if ((i & 1) == 1) {
+          assertEquals(Integer.toString(1), Integer.toString(result));
+        } else {
+          assertEquals(Integer.toString(0), Integer.toString(result));
+        }
       }
 
-      sourceVector.close();
+      try (final BitVector toVector = new BitVector("toVector", allocator)) {
+        final TransferPair transferPair = sourceVector.makeTransferPair(toVector);
+        final BitVector.Accessor toAccessor = toVector.getAccessor();
+        final BitVector.Mutator toMutator = toVector.getMutator();
+
+        final int[][] transferLengths = {{5,22}, {5,24}, {5,25}, {5,27}, {0,31}, {5,7}, {2,3}};
+
+        for (final int[] transferLength : transferLengths) {
+          final int start = transferLength[0];
+          final int length = transferLength[1];
+
+          transferPair.splitAndTransfer(start, length);
+
+          /* check the toVector output after doing splitAndTransfer */
+          for (int i = 0; i < length; i++) {
+            int actual = toAccessor.get(i);
+            int expected = sourceAccessor.get(start + i);
+            assertEquals("different data values not expected --> sourceVector index: "
+ (start + i) + " toVector index: " + i,
+                    expected, actual);
+          }
+        }
+      }
     }
   }
 


Mime
View raw message