arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jacq...@apache.org
Subject [02/17] arrow git commit: ARROW-1: Initial Arrow Code Commit
Date Wed, 17 Feb 2016 12:39:37 GMT
http://git-wip-us.apache.org/repos/asf/arrow/blob/fa5f0299/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java b/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java
new file mode 100644
index 0000000..576a5b6
--- /dev/null
+++ b/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java
@@ -0,0 +1,737 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.vector.util;
+
+import io.netty.buffer.ArrowBuf;
+import io.netty.buffer.ByteBuf;
+import io.netty.buffer.UnpooledByteBufAllocator;
+
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+
+import org.apache.arrow.vector.holders.Decimal38SparseHolder;
+
+public class DecimalUtility extends CoreDecimalUtility{
+
+    public final static int MAX_DIGITS = 9;
+    public final static int DIGITS_BASE = 1000000000;
+    public final static int DIGITS_MAX = 999999999;
+    public final static int INTEGER_SIZE = (Integer.SIZE/8);
+
+    public final static String[] decimalToString = {"",
+            "0",
+            "00",
+            "000",
+            "0000",
+            "00000",
+            "000000",
+            "0000000",
+            "00000000",
+            "000000000"};
+
+    public final static long[] scale_long_constants = {
+        1,
+        10,
+        100,
+        1000,
+        10000,
+        100000,
+        1000000,
+        10000000,
+        100000000,
+        1000000000,
+        10000000000l,
+        100000000000l,
+        1000000000000l,
+        10000000000000l,
+        100000000000000l,
+        1000000000000000l,
+        10000000000000000l,
+        100000000000000000l,
+        1000000000000000000l};
+
+    /*
+     * Simple function that returns the static precomputed
+     * power of ten, instead of using Math.pow
+     */
+    public static long getPowerOfTen(int power) {
+      assert power >= 0 && power < scale_long_constants.length;
+      return scale_long_constants[(power)];
+    }
+
+    /*
+     * Math.pow returns a double and while multiplying with large digits
+     * in the decimal data type we encounter noise. So instead of multiplying
+     * with Math.pow we use the static constants to perform the multiplication
+     */
+    public static long adjustScaleMultiply(long input, int factor) {
+      int index = Math.abs(factor);
+      assert index >= 0 && index < scale_long_constants.length;
+      if (factor >= 0) {
+        return input * scale_long_constants[index];
+      } else {
+        return input / scale_long_constants[index];
+      }
+    }
+
+    public static long adjustScaleDivide(long input, int factor) {
+      int index = Math.abs(factor);
+      assert index >= 0 && index < scale_long_constants.length;
+      if (factor >= 0) {
+        return input / scale_long_constants[index];
+      } else {
+        return input * scale_long_constants[index];
+      }
+    }
+
+    /* Given the number of actual digits this function returns the
+     * number of indexes it will occupy in the array of integers
+     * which are stored in base 1 billion
+     */
+    public static int roundUp(int ndigits) {
+        return (ndigits + MAX_DIGITS - 1)/MAX_DIGITS;
+    }
+
+    /* Returns a string representation of the given integer
+     * If the length of the given integer is less than the
+     * passed length, this function will prepend zeroes to the string
+     */
+    public static StringBuilder toStringWithZeroes(int number, int desiredLength) {
+        String value = ((Integer) number).toString();
+        int length = value.length();
+
+        StringBuilder str = new StringBuilder();
+        str.append(decimalToString[desiredLength - length]);
+        str.append(value);
+
+        return str;
+    }
+
+    public static StringBuilder toStringWithZeroes(long number, int desiredLength) {
+        String value = ((Long) number).toString();
+        int length = value.length();
+
+        StringBuilder str = new StringBuilder();
+
+        // Desired length can be > MAX_DIGITS
+        int zeroesLength = desiredLength - length;
+        while (zeroesLength > MAX_DIGITS) {
+            str.append(decimalToString[MAX_DIGITS]);
+            zeroesLength -= MAX_DIGITS;
+        }
+        str.append(decimalToString[zeroesLength]);
+        str.append(value);
+
+        return str;
+    }
+
+  public static BigDecimal getBigDecimalFromIntermediate(ByteBuf data, int startIndex, int nDecimalDigits, int scale) {
+
+        // In the intermediate representation we don't pad the scale with zeroes, so set truncate = false
+        return getBigDecimalFromDrillBuf(data, startIndex, nDecimalDigits, scale, false);
+    }
+
+    public static BigDecimal getBigDecimalFromSparse(ArrowBuf data, int startIndex, int nDecimalDigits, int scale) {
+
+        // In the sparse representation we pad the scale with zeroes for ease of arithmetic, need to truncate
+        return getBigDecimalFromDrillBuf(data, startIndex, nDecimalDigits, scale, true);
+    }
+
+    public static BigDecimal getBigDecimalFromDrillBuf(ArrowBuf bytebuf, int start, int length, int scale) {
+      byte[] value = new byte[length];
+      bytebuf.getBytes(start, value, 0, length);
+      BigInteger unscaledValue = new BigInteger(value);
+      return new BigDecimal(unscaledValue, scale);
+    }
+
+  public static BigDecimal getBigDecimalFromByteBuffer(ByteBuffer bytebuf, int start, int length, int scale) {
+    byte[] value = new byte[length];
+    bytebuf.get(value);
+    BigInteger unscaledValue = new BigInteger(value);
+    return new BigDecimal(unscaledValue, scale);
+  }
+
+    /* Create a BigDecimal object using the data in the DrillBuf.
+     * This function assumes that data is provided in a non-dense format
+     * It works on both sparse and intermediate representations.
+     */
+  public static BigDecimal getBigDecimalFromDrillBuf(ByteBuf data, int startIndex, int nDecimalDigits, int scale,
+      boolean truncateScale) {
+
+        // For sparse decimal type we have padded zeroes at the end, strip them while converting to BigDecimal.
+        int actualDigits;
+
+        // Initialize the BigDecimal, first digit in the DrillBuf has the sign so mask it out
+        BigInteger decimalDigits = BigInteger.valueOf((data.getInt(startIndex)) & 0x7FFFFFFF);
+
+        BigInteger base = BigInteger.valueOf(DIGITS_BASE);
+
+        for (int i = 1; i < nDecimalDigits; i++) {
+
+            BigInteger temp = BigInteger.valueOf(data.getInt(startIndex + (i * INTEGER_SIZE)));
+            decimalDigits = decimalDigits.multiply(base);
+            decimalDigits = decimalDigits.add(temp);
+        }
+
+        // Truncate any additional padding we might have added
+        if (truncateScale == true && scale > 0 && (actualDigits = scale % MAX_DIGITS) != 0) {
+            BigInteger truncate = BigInteger.valueOf((int)Math.pow(10, (MAX_DIGITS - actualDigits)));
+            decimalDigits = decimalDigits.divide(truncate);
+        }
+
+        // set the sign
+        if ((data.getInt(startIndex) & 0x80000000) != 0) {
+            decimalDigits = decimalDigits.negate();
+        }
+
+        BigDecimal decimal = new BigDecimal(decimalDigits, scale);
+
+        return decimal;
+    }
+
+    /* This function returns a BigDecimal object from the dense decimal representation.
+     * First step is to convert the dense representation into an intermediate representation
+     * and then invoke getBigDecimalFromDrillBuf() to get the BigDecimal object
+     */
+    public static BigDecimal getBigDecimalFromDense(ArrowBuf data, int startIndex, int nDecimalDigits, int scale, int maxPrecision, int width) {
+
+        /* This method converts the dense representation to
+         * an intermediate representation. The intermediate
+         * representation has one more integer than the dense
+         * representation.
+         */
+        byte[] intermediateBytes = new byte[((nDecimalDigits + 1) * INTEGER_SIZE)];
+
+        // Start storing from the least significant byte of the first integer
+        int intermediateIndex = 3;
+
+        int[] mask = {0x03, 0x0F, 0x3F, 0xFF};
+        int[] reverseMask = {0xFC, 0xF0, 0xC0, 0x00};
+
+        int maskIndex;
+        int shiftOrder;
+        byte shiftBits;
+
+        // TODO: Some of the logic here is common with casting from Dense to Sparse types, factor out common code
+        if (maxPrecision == 38) {
+            maskIndex = 0;
+            shiftOrder = 6;
+            shiftBits = 0x00;
+            intermediateBytes[intermediateIndex++] = (byte) (data.getByte(startIndex) & 0x7F);
+        } else if (maxPrecision == 28) {
+            maskIndex = 1;
+            shiftOrder = 4;
+            shiftBits = (byte) ((data.getByte(startIndex) & 0x03) << shiftOrder);
+            intermediateBytes[intermediateIndex++] = (byte) (((data.getByte(startIndex) & 0x3C) & 0xFF) >>> 2);
+        } else {
+            throw new UnsupportedOperationException("Dense types with max precision 38 and 28 are only supported");
+        }
+
+        int inputIndex = 1;
+        boolean sign = false;
+
+        if ((data.getByte(startIndex) & 0x80) != 0) {
+            sign = true;
+        }
+
+        while (inputIndex < width) {
+
+            intermediateBytes[intermediateIndex] = (byte) ((shiftBits) | (((data.getByte(startIndex + inputIndex) & reverseMask[maskIndex]) & 0xFF) >>> (8 - shiftOrder)));
+
+            shiftBits = (byte) ((data.getByte(startIndex + inputIndex) & mask[maskIndex]) << shiftOrder);
+
+            inputIndex++;
+            intermediateIndex++;
+
+            if (((inputIndex - 1) % INTEGER_SIZE) == 0) {
+                shiftBits = (byte) ((shiftBits & 0xFF) >>> 2);
+                maskIndex++;
+                shiftOrder -= 2;
+            }
+
+        }
+        /* copy the last byte */
+        intermediateBytes[intermediateIndex] = shiftBits;
+
+        if (sign == true) {
+            intermediateBytes[0] = (byte) (intermediateBytes[0] | 0x80);
+        }
+
+    final ByteBuf intermediate = UnpooledByteBufAllocator.DEFAULT.buffer(intermediateBytes.length);
+    try {
+        intermediate.setBytes(0, intermediateBytes);
+
+      BigDecimal ret = getBigDecimalFromIntermediate(intermediate, 0, nDecimalDigits + 1, scale);
+      return ret;
+    } finally {
+      intermediate.release();
+    }
+
+    }
+
+    /*
+     * Function converts the BigDecimal and stores it in out internal sparse representation
+     */
+  public static void getSparseFromBigDecimal(BigDecimal input, ByteBuf data, int startIndex, int scale, int precision,
+      int nDecimalDigits) {
+
+        // Initialize the buffer
+        for (int i = 0; i < nDecimalDigits; i++) {
+          data.setInt(startIndex + (i * INTEGER_SIZE), 0);
+        }
+
+        boolean sign = false;
+
+        if (input.signum() == -1) {
+            // negative input
+            sign = true;
+            input = input.abs();
+        }
+
+        // Truncate the input as per the scale provided
+        input = input.setScale(scale, BigDecimal.ROUND_HALF_UP);
+
+        // Separate out the integer part
+        BigDecimal integerPart = input.setScale(0, BigDecimal.ROUND_DOWN);
+
+        int destIndex = nDecimalDigits - roundUp(scale) - 1;
+
+        // we use base 1 billion integer digits for out integernal representation
+        BigDecimal base = new BigDecimal(DIGITS_BASE);
+
+        while (integerPart.compareTo(BigDecimal.ZERO) == 1) {
+            // store the modulo as the integer value
+            data.setInt(startIndex + (destIndex * INTEGER_SIZE), (integerPart.remainder(base)).intValue());
+            destIndex--;
+            // Divide by base 1 billion
+            integerPart = (integerPart.divide(base)).setScale(0, BigDecimal.ROUND_DOWN);
+        }
+
+        /* Sparse representation contains padding of additional zeroes
+         * so each digit contains MAX_DIGITS for ease of arithmetic
+         */
+        int actualDigits;
+        if ((actualDigits = (scale % MAX_DIGITS)) != 0) {
+            // Pad additional zeroes
+            scale = scale + (MAX_DIGITS - actualDigits);
+            input = input.setScale(scale, BigDecimal.ROUND_DOWN);
+        }
+
+        //separate out the fractional part
+        BigDecimal fractionalPart = input.remainder(BigDecimal.ONE).movePointRight(scale);
+
+        destIndex = nDecimalDigits - 1;
+
+        while (scale > 0) {
+            // Get next set of MAX_DIGITS (9) store it in the DrillBuf
+            fractionalPart = fractionalPart.movePointLeft(MAX_DIGITS);
+            BigDecimal temp = fractionalPart.remainder(BigDecimal.ONE);
+
+            data.setInt(startIndex + (destIndex * INTEGER_SIZE), (temp.unscaledValue().intValue()));
+            destIndex--;
+
+            fractionalPart = fractionalPart.setScale(0, BigDecimal.ROUND_DOWN);
+            scale -= MAX_DIGITS;
+        }
+
+        // Set the negative sign
+        if (sign == true) {
+            data.setInt(startIndex, data.getInt(startIndex) | 0x80000000);
+        }
+
+    }
+
+
+    public static long getDecimal18FromBigDecimal(BigDecimal input, int scale, int precision) {
+        // Truncate or pad to set the input to the correct scale
+        input = input.setScale(scale, BigDecimal.ROUND_HALF_UP);
+
+        return (input.unscaledValue().longValue());
+    }
+
+    public static BigDecimal getBigDecimalFromPrimitiveTypes(int input, int scale, int precision) {
+      return BigDecimal.valueOf(input, scale);
+    }
+
+    public static BigDecimal getBigDecimalFromPrimitiveTypes(long input, int scale, int precision) {
+      return BigDecimal.valueOf(input, scale);
+    }
+
+
+    public static int compareDenseBytes(ArrowBuf left, int leftStart, boolean leftSign, ArrowBuf right, int rightStart, boolean rightSign, int width) {
+
+      int invert = 1;
+
+      /* If signs are different then simply look at the
+       * sign of the two inputs and determine which is greater
+       */
+      if (leftSign != rightSign) {
+
+        return((leftSign == true) ? -1 : 1);
+      } else if(leftSign == true) {
+        /* Both inputs are negative, at the end we will
+         * have to invert the comparison
+         */
+        invert = -1;
+      }
+
+      int cmp = 0;
+
+      for (int i = 0; i < width; i++) {
+        byte leftByte  = left.getByte(leftStart + i);
+        byte rightByte = right.getByte(rightStart + i);
+        // Unsigned byte comparison
+        if ((leftByte & 0xFF) > (rightByte & 0xFF)) {
+          cmp = 1;
+          break;
+        } else if ((leftByte & 0xFF) < (rightByte & 0xFF)) {
+          cmp = -1;
+          break;
+        }
+      }
+      cmp *= invert; // invert the comparison if both were negative values
+
+      return cmp;
+    }
+
+    public static int getIntegerFromSparseBuffer(ArrowBuf buffer, int start, int index) {
+      int value = buffer.getInt(start + (index * 4));
+
+      if (index == 0) {
+        /* the first byte contains sign bit, return value without it */
+        value = (value & 0x7FFFFFFF);
+      }
+      return value;
+    }
+
+    public static void setInteger(ArrowBuf buffer, int start, int index, int value) {
+      buffer.setInt(start + (index * 4), value);
+    }
+
+    public static int compareSparseBytes(ArrowBuf left, int leftStart, boolean leftSign, int leftScale, int leftPrecision, ArrowBuf right, int rightStart, boolean rightSign, int rightPrecision, int rightScale, int width, int nDecimalDigits, boolean absCompare) {
+
+      int invert = 1;
+
+      if (absCompare == false) {
+        if (leftSign != rightSign) {
+          return (leftSign == true) ? -1 : 1;
+        }
+
+        // Both values are negative invert the outcome of the comparison
+        if (leftSign == true) {
+          invert = -1;
+        }
+      }
+
+      int cmp = compareSparseBytesInner(left, leftStart, leftSign, leftScale, leftPrecision, right, rightStart, rightSign, rightPrecision, rightScale, width, nDecimalDigits);
+      return cmp * invert;
+    }
+    public static int compareSparseBytesInner(ArrowBuf left, int leftStart, boolean leftSign, int leftScale, int leftPrecision, ArrowBuf right, int rightStart, boolean rightSign, int rightPrecision, int rightScale, int width, int nDecimalDigits) {
+      /* compute the number of integer digits in each decimal */
+      int leftInt  = leftPrecision - leftScale;
+      int rightInt = rightPrecision - rightScale;
+
+      /* compute the number of indexes required for storing integer digits */
+      int leftIntRoundedUp = org.apache.arrow.vector.util.DecimalUtility.roundUp(leftInt);
+      int rightIntRoundedUp = org.apache.arrow.vector.util.DecimalUtility.roundUp(rightInt);
+
+      /* compute number of indexes required for storing scale */
+      int leftScaleRoundedUp = org.apache.arrow.vector.util.DecimalUtility.roundUp(leftScale);
+      int rightScaleRoundedUp = org.apache.arrow.vector.util.DecimalUtility.roundUp(rightScale);
+
+      /* compute index of the most significant integer digits */
+      int leftIndex1 = nDecimalDigits - leftScaleRoundedUp - leftIntRoundedUp;
+      int rightIndex1 = nDecimalDigits - rightScaleRoundedUp - rightIntRoundedUp;
+
+      int leftStopIndex = nDecimalDigits - leftScaleRoundedUp;
+      int rightStopIndex = nDecimalDigits - rightScaleRoundedUp;
+
+      /* Discard the zeroes in the integer part */
+      while (leftIndex1 < leftStopIndex) {
+        if (getIntegerFromSparseBuffer(left, leftStart, leftIndex1) != 0) {
+          break;
+        }
+
+        /* Digit in this location is zero, decrement the actual number
+         * of integer digits
+         */
+        leftIntRoundedUp--;
+        leftIndex1++;
+      }
+
+      /* If we reached the stop index then the number of integers is zero */
+      if (leftIndex1 == leftStopIndex) {
+        leftIntRoundedUp = 0;
+      }
+
+      while (rightIndex1 < rightStopIndex) {
+        if (getIntegerFromSparseBuffer(right, rightStart, rightIndex1) != 0) {
+          break;
+        }
+
+        /* Digit in this location is zero, decrement the actual number
+         * of integer digits
+         */
+        rightIntRoundedUp--;
+        rightIndex1++;
+      }
+
+      if (rightIndex1 == rightStopIndex) {
+        rightIntRoundedUp = 0;
+      }
+
+      /* We have the accurate number of non-zero integer digits,
+       * if the number of integer digits are different then we can determine
+       * which decimal is larger and needn't go down to comparing individual values
+       */
+      if (leftIntRoundedUp > rightIntRoundedUp) {
+        return 1;
+      }
+      else if (rightIntRoundedUp > leftIntRoundedUp) {
+        return -1;
+      }
+
+      /* The number of integer digits are the same, set the each index
+       * to the first non-zero integer and compare each digit
+       */
+      leftIndex1 = nDecimalDigits - leftScaleRoundedUp - leftIntRoundedUp;
+      rightIndex1 = nDecimalDigits - rightScaleRoundedUp - rightIntRoundedUp;
+
+      while (leftIndex1 < leftStopIndex && rightIndex1 < rightStopIndex) {
+        if (getIntegerFromSparseBuffer(left, leftStart, leftIndex1) > getIntegerFromSparseBuffer(right, rightStart, rightIndex1)) {
+          return 1;
+        }
+        else if (getIntegerFromSparseBuffer(right, rightStart, rightIndex1) > getIntegerFromSparseBuffer(left, leftStart, leftIndex1)) {
+          return -1;
+        }
+
+        leftIndex1++;
+        rightIndex1++;
+      }
+
+      /* The integer part of both the decimal's are equal, now compare
+       * each individual fractional part. Set the index to be at the
+       * beginning of the fractional part
+       */
+      leftIndex1 = leftStopIndex;
+      rightIndex1 = rightStopIndex;
+
+      /* Stop indexes will be the end of the array */
+      leftStopIndex = nDecimalDigits;
+      rightStopIndex = nDecimalDigits;
+
+      /* compare the two fractional parts of the decimal */
+      while (leftIndex1 < leftStopIndex && rightIndex1 < rightStopIndex) {
+        if (getIntegerFromSparseBuffer(left, leftStart, leftIndex1) > getIntegerFromSparseBuffer(right, rightStart, rightIndex1)) {
+          return 1;
+        }
+        else if (getIntegerFromSparseBuffer(right, rightStart, rightIndex1) > getIntegerFromSparseBuffer(left, leftStart, leftIndex1)) {
+          return -1;
+        }
+
+        leftIndex1++;
+        rightIndex1++;
+      }
+
+      /* Till now the fractional part of the decimals are equal, check
+       * if one of the decimal has fractional part that is remaining
+       * and is non-zero
+       */
+      while (leftIndex1 < leftStopIndex) {
+        if (getIntegerFromSparseBuffer(left, leftStart, leftIndex1) != 0) {
+          return 1;
+        }
+        leftIndex1++;
+      }
+
+      while(rightIndex1 < rightStopIndex) {
+        if (getIntegerFromSparseBuffer(right, rightStart, rightIndex1) != 0) {
+          return -1;
+        }
+        rightIndex1++;
+      }
+
+      /* Both decimal values are equal */
+      return 0;
+    }
+
+    public static BigDecimal getBigDecimalFromByteArray(byte[] bytes, int start, int length, int scale) {
+      byte[] value = Arrays.copyOfRange(bytes, start, start + length);
+      BigInteger unscaledValue = new BigInteger(value);
+      return new BigDecimal(unscaledValue, scale);
+    }
+
+  public static void roundDecimal(ArrowBuf result, int start, int nDecimalDigits, int desiredScale, int currentScale) {
+    int newScaleRoundedUp  = org.apache.arrow.vector.util.DecimalUtility.roundUp(desiredScale);
+    int origScaleRoundedUp = org.apache.arrow.vector.util.DecimalUtility.roundUp(currentScale);
+
+    if (desiredScale < currentScale) {
+
+      boolean roundUp = false;
+
+      //Extract the first digit to be truncated to check if we need to round up
+      int truncatedScaleIndex = desiredScale + 1;
+      if (truncatedScaleIndex <= currentScale) {
+        int extractDigitIndex = nDecimalDigits - origScaleRoundedUp -1;
+        extractDigitIndex += org.apache.arrow.vector.util.DecimalUtility.roundUp(truncatedScaleIndex);
+        int extractDigit = getIntegerFromSparseBuffer(result, start, extractDigitIndex);
+        int temp = org.apache.arrow.vector.util.DecimalUtility.MAX_DIGITS - (truncatedScaleIndex % org.apache.arrow.vector.util.DecimalUtility.MAX_DIGITS);
+        if (temp != 0) {
+          extractDigit = extractDigit / (int) (Math.pow(10, temp));
+        }
+        if ((extractDigit % 10)  > 4) {
+          roundUp = true;
+        }
+      }
+
+      // Get the source index beyond which we will truncate
+      int srcIntIndex = nDecimalDigits - origScaleRoundedUp - 1;
+      int srcIndex = srcIntIndex + newScaleRoundedUp;
+
+      // Truncate the remaining fractional part, move the integer part
+      int destIndex = nDecimalDigits - 1;
+      if (srcIndex != destIndex) {
+        while (srcIndex >= 0) {
+          setInteger(result, start, destIndex--, getIntegerFromSparseBuffer(result, start, srcIndex--));
+        }
+
+        // Set the remaining portion of the decimal to be zeroes
+        while (destIndex >= 0) {
+          setInteger(result, start, destIndex--, 0);
+        }
+        srcIndex = nDecimalDigits - 1;
+      }
+
+      // We truncated the decimal digit. Now we need to truncate within the base 1 billion fractional digit
+      int truncateFactor = org.apache.arrow.vector.util.DecimalUtility.MAX_DIGITS - (desiredScale % org.apache.arrow.vector.util.DecimalUtility.MAX_DIGITS);
+      if (truncateFactor != org.apache.arrow.vector.util.DecimalUtility.MAX_DIGITS) {
+        truncateFactor = (int) Math.pow(10, truncateFactor);
+        int fractionalDigits = getIntegerFromSparseBuffer(result, start, nDecimalDigits - 1);
+        fractionalDigits /= truncateFactor;
+        setInteger(result, start, nDecimalDigits - 1, fractionalDigits * truncateFactor);
+      }
+
+      // Finally round up the digit if needed
+      if (roundUp == true) {
+        srcIndex = nDecimalDigits - 1;
+        int carry;
+        if (truncateFactor != org.apache.arrow.vector.util.DecimalUtility.MAX_DIGITS) {
+          carry = truncateFactor;
+        } else {
+          carry = 1;
+        }
+
+        while (srcIndex >= 0) {
+          int value = getIntegerFromSparseBuffer(result, start, srcIndex);
+          value += carry;
+
+          if (value >= org.apache.arrow.vector.util.DecimalUtility.DIGITS_BASE) {
+            setInteger(result, start, srcIndex--, value % org.apache.arrow.vector.util.DecimalUtility.DIGITS_BASE);
+            carry = value / org.apache.arrow.vector.util.DecimalUtility.DIGITS_BASE;
+          } else {
+            setInteger(result, start, srcIndex--, value);
+            carry = 0;
+            break;
+          }
+        }
+      }
+    } else if (desiredScale > currentScale) {
+      // Add fractional digits to the decimal
+
+      // Check if we need to shift the decimal digits to the left
+      if (newScaleRoundedUp > origScaleRoundedUp) {
+        int srcIndex  = 0;
+        int destIndex = newScaleRoundedUp - origScaleRoundedUp;
+
+        // Check while extending scale, we are not overwriting integer part
+        while (srcIndex < destIndex) {
+          if (getIntegerFromSparseBuffer(result, start, srcIndex++) != 0) {
+            throw new RuntimeException("Truncate resulting in loss of integer part, reduce scale specified");
+          }
+        }
+
+        srcIndex = 0;
+        while (destIndex < nDecimalDigits) {
+          setInteger(result, start, srcIndex++, getIntegerFromSparseBuffer(result, start, destIndex++));
+        }
+
+        // Clear the remaining part
+        while (srcIndex < nDecimalDigits) {
+          setInteger(result, start, srcIndex++, 0);
+        }
+      }
+    }
+  }
+
+  public static int getFirstFractionalDigit(int decimal, int scale) {
+    if (scale == 0) {
+      return 0;
+    }
+    int temp = (int) adjustScaleDivide(decimal, scale - 1);
+    return Math.abs(temp % 10);
+  }
+
+  public static int getFirstFractionalDigit(long decimal, int scale) {
+    if (scale == 0) {
+      return 0;
+    }
+    long temp = adjustScaleDivide(decimal, scale - 1);
+    return (int) (Math.abs(temp % 10));
+  }
+
+  public static int getFirstFractionalDigit(ArrowBuf data, int scale, int start, int nDecimalDigits) {
+    if (scale == 0) {
+      return 0;
+    }
+
+    int index = nDecimalDigits - roundUp(scale);
+    return (int) (adjustScaleDivide(data.getInt(start + (index * INTEGER_SIZE)), MAX_DIGITS - 1));
+  }
+
+  public static int compareSparseSamePrecScale(ArrowBuf left, int lStart, byte[] right, int length) {
+    // check the sign first
+    boolean lSign = (left.getInt(lStart) & 0x80000000) != 0;
+    boolean rSign = ByteFunctionHelpers.getSign(right);
+    int cmp = 0;
+
+    if (lSign != rSign) {
+      return (lSign == false) ? 1 : -1;
+    }
+
+    // invert the comparison if we are comparing negative numbers
+    int invert = (lSign == true) ? -1 : 1;
+
+    // compare byte by byte
+    int n = 0;
+    int lPos = lStart;
+    int rPos = 0;
+    while (n < length/4) {
+      int leftInt = Decimal38SparseHolder.getInteger(n, lStart, left);
+      int rightInt = ByteFunctionHelpers.getInteger(right, n);
+      if (leftInt != rightInt) {
+        cmp =  (leftInt - rightInt ) > 0 ? 1 : -1;
+        break;
+      }
+      n++;
+    }
+    return cmp * invert;
+  }
+}
+

http://git-wip-us.apache.org/repos/asf/arrow/blob/fa5f0299/java/vector/src/main/java/org/apache/arrow/vector/util/JsonStringArrayList.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/JsonStringArrayList.java b/java/vector/src/main/java/org/apache/arrow/vector/util/JsonStringArrayList.java
new file mode 100644
index 0000000..7aeaa12
--- /dev/null
+++ b/java/vector/src/main/java/org/apache/arrow/vector/util/JsonStringArrayList.java
@@ -0,0 +1,57 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.vector.util;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+public class JsonStringArrayList<E> extends ArrayList<E> {
+
+  private static ObjectMapper mapper;
+
+  static {
+    mapper = new ObjectMapper();
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (this == obj) {
+      return true;
+    }
+    if (obj == null) {
+      return false;
+    }
+    if (!(obj instanceof List)) {
+      return false;
+    }
+    List other = (List) obj;
+    return this.size() == other.size() && this.containsAll(other);
+  }
+
+  @Override
+  public final String toString() {
+    try {
+      return mapper.writeValueAsString(this);
+    } catch(JsonProcessingException e) {
+      throw new IllegalStateException("Cannot serialize array list to JSON string", e);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/arrow/blob/fa5f0299/java/vector/src/main/java/org/apache/arrow/vector/util/JsonStringHashMap.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/JsonStringHashMap.java b/java/vector/src/main/java/org/apache/arrow/vector/util/JsonStringHashMap.java
new file mode 100644
index 0000000..750dd59
--- /dev/null
+++ b/java/vector/src/main/java/org/apache/arrow/vector/util/JsonStringHashMap.java
@@ -0,0 +1,76 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.vector.util;
+
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+/*
+ * Simple class that extends the regular java.util.HashMap but overrides the
+ * toString() method of the HashMap class to produce a JSON string instead
+ */
+public class JsonStringHashMap<K, V> extends LinkedHashMap<K, V> {
+
+  private static ObjectMapper mapper;
+
+  static {
+    mapper = new ObjectMapper();
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (this == obj) {
+      return true;
+    }
+    if (obj == null) {
+      return false;
+    }
+    if (!(obj instanceof Map)) {
+      return false;
+    }
+    Map other = (Map) obj;
+    if (this.size() != other.size()) {
+      return false;
+    }
+    for (K key : this.keySet()) {
+      if (this.get(key) == null ) {
+        if (other.get(key) == null) {
+          continue;
+        } else {
+          return false;
+        }
+      }
+      if ( ! this.get(key).equals(other.get(key))) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  @Override
+  public final String toString() {
+    try {
+      return mapper.writeValueAsString(this);
+    } catch(JsonProcessingException e) {
+      throw new IllegalStateException("Cannot serialize hash map to JSON string", e);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/arrow/blob/fa5f0299/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinal.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinal.java b/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinal.java
new file mode 100644
index 0000000..dea433e
--- /dev/null
+++ b/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinal.java
@@ -0,0 +1,248 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.vector.util;
+
+import java.util.AbstractMap;
+import java.util.Collection;
+import java.util.Map;
+import java.util.Set;
+
+import com.google.common.base.Function;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+import io.netty.util.collection.IntObjectHashMap;
+import io.netty.util.collection.IntObjectMap;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * An implementation of map that supports constant time look-up by a generic key or an ordinal.
+ *
+ * This class extends the functionality a regular {@link Map} with ordinal lookup support.
+ * Upon insertion an unused ordinal is assigned to the inserted (key, value) tuple.
+ * Upon update the same ordinal id is re-used while value is replaced.
+ * Upon deletion of an existing item, its corresponding ordinal is recycled and could be used by another item.
+ *
+ * For any instance with N items, this implementation guarantees that ordinals are in the range of [0, N). However,
+ * the ordinal assignment is dynamic and may change after an insertion or deletion. Consumers of this class are
+ * responsible for explicitly checking the ordinal corresponding to a key via
+ * {@link org.apache.arrow.vector.util.MapWithOrdinal#getOrdinal(Object)} before attempting to execute a lookup
+ * with an ordinal.
+ *
+ * @param <K> key type
+ * @param <V> value type
+ */
+
+public class MapWithOrdinal<K, V> implements Map<K, V> {
+  private final static Logger logger = LoggerFactory.getLogger(MapWithOrdinal.class);
+
+  private final Map<K, Entry<Integer, V>> primary = Maps.newLinkedHashMap();
+  private final IntObjectHashMap<V> secondary = new IntObjectHashMap<>();
+
+  private final Map<K, V> delegate = new Map<K, V>() {
+    @Override
+    public boolean isEmpty() {
+      return size() == 0;
+    }
+
+    @Override
+    public int size() {
+      return primary.size();
+    }
+
+    @Override
+    public boolean containsKey(Object key) {
+      return primary.containsKey(key);
+    }
+
+    @Override
+    public boolean containsValue(Object value) {
+      return primary.containsValue(value);
+    }
+
+    @Override
+    public V get(Object key) {
+      Entry<Integer, V> pair = primary.get(key);
+      if (pair != null) {
+        return pair.getValue();
+      }
+      return null;
+    }
+
+    @Override
+    public V put(K key, V value) {
+      final Entry<Integer, V> oldPair = primary.get(key);
+      // if key exists try replacing otherwise, assign a new ordinal identifier
+      final int ordinal = oldPair == null ? primary.size():oldPair.getKey();
+      primary.put(key, new AbstractMap.SimpleImmutableEntry<>(ordinal, value));
+      secondary.put(ordinal, value);
+      return oldPair==null ? null:oldPair.getValue();
+    }
+
+    @Override
+    public V remove(Object key) {
+      final Entry<Integer, V> oldPair = primary.remove(key);
+      if (oldPair!=null) {
+        final int lastOrdinal = secondary.size();
+        final V last = secondary.get(lastOrdinal);
+        // normalize mappings so that all numbers until primary.size() is assigned
+        // swap the last element with the deleted one
+        secondary.put(oldPair.getKey(), last);
+        primary.put((K) key, new AbstractMap.SimpleImmutableEntry<>(oldPair.getKey(), last));
+      }
+      return oldPair==null ? null:oldPair.getValue();
+    }
+
+    @Override
+    public void putAll(Map<? extends K, ? extends V> m) {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public void clear() {
+      primary.clear();
+      secondary.clear();
+    }
+
+    @Override
+    public Set<K> keySet() {
+      return primary.keySet();
+    }
+
+    @Override
+    public Collection<V> values() {
+      return Lists.newArrayList(Iterables.transform(secondary.entries(), new Function<IntObjectMap.Entry<V>, V>() {
+        @Override
+        public V apply(IntObjectMap.Entry<V> entry) {
+          return Preconditions.checkNotNull(entry).value();
+        }
+      }));
+    }
+
+    @Override
+    public Set<Entry<K, V>> entrySet() {
+      return Sets.newHashSet(Iterables.transform(primary.entrySet(), new Function<Entry<K, Entry<Integer, V>>, Entry<K, V>>() {
+        @Override
+        public Entry<K, V> apply(Entry<K, Entry<Integer, V>> entry) {
+          return new AbstractMap.SimpleImmutableEntry<>(entry.getKey(), entry.getValue().getValue());
+        }
+      }));
+    }
+  };
+
+  /**
+   * Returns the value corresponding to the given ordinal
+   *
+   * @param id ordinal value for lookup
+   * @return an instance of V
+   */
+  public V getByOrdinal(int id) {
+    return secondary.get(id);
+  }
+
+  /**
+   * Returns the ordinal corresponding to the given key.
+   *
+   * @param key key for ordinal lookup
+   * @return ordinal value corresponding to key if it exists or -1
+   */
+  public int getOrdinal(K key) {
+    Entry<Integer, V> pair = primary.get(key);
+    if (pair != null) {
+      return pair.getKey();
+    }
+    return -1;
+  }
+
+  @Override
+  public int size() {
+    return delegate.size();
+  }
+
+  @Override
+  public boolean isEmpty() {
+    return delegate.isEmpty();
+  }
+
+  @Override
+  public V get(Object key) {
+    return delegate.get(key);
+  }
+
+  /**
+   * Inserts the tuple (key, value) into the map extending the semantics of {@link Map#put} with automatic ordinal
+   * assignment. A new ordinal is assigned if key does not exists. Otherwise the same ordinal is re-used but the value
+   * is replaced.
+   *
+   * {@see java.util.Map#put}
+   */
+  @Override
+  public V put(K key, V value) {
+    return delegate.put(key, value);
+  }
+
+  @Override
+  public Collection<V> values() {
+    return delegate.values();
+  }
+
+  @Override
+  public boolean containsKey(Object key) {
+    return delegate.containsKey(key);
+  }
+
+  @Override
+  public boolean containsValue(Object value) {
+    return delegate.containsValue(value);
+  }
+
+  /**
+   * Removes the element corresponding to the key if exists extending the semantics of {@link Map#remove} with ordinal
+   * re-cycling. The ordinal corresponding to the given key may be re-assigned to another tuple. It is important that
+   * consumer checks the ordinal value via {@link #getOrdinal(Object)} before attempting to look-up by ordinal.
+   *
+   * {@see java.util.Map#remove}
+   */
+  @Override
+  public V remove(Object key) {
+    return delegate.remove(key);
+  }
+
+  @Override
+  public void putAll(Map<? extends K, ? extends V> m) {
+    delegate.putAll(m);
+  }
+
+  @Override
+  public void clear() {
+    delegate.clear();
+  }
+
+  @Override
+  public Set<K> keySet() {
+    return delegate.keySet();
+  }
+
+  @Override
+  public Set<Entry<K, V>> entrySet() {
+    return delegate.entrySet();
+  }
+}

http://git-wip-us.apache.org/repos/asf/arrow/blob/fa5f0299/java/vector/src/main/java/org/apache/arrow/vector/util/OversizedAllocationException.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/OversizedAllocationException.java b/java/vector/src/main/java/org/apache/arrow/vector/util/OversizedAllocationException.java
new file mode 100644
index 0000000..ec628b2
--- /dev/null
+++ b/java/vector/src/main/java/org/apache/arrow/vector/util/OversizedAllocationException.java
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.vector.util;
+
+
+/**
+ * An exception that is used to signal that allocation request in bytes is greater than the maximum allowed by
+ * {@link org.apache.arrow.memory.BufferAllocator#buffer(int) allocator}.
+ *
+ * <p>Operators should handle this exception to split the batch and later resume the execution on the next
+ * {@link RecordBatch#next() iteration}.</p>
+ *
+ */
+public class OversizedAllocationException extends RuntimeException {
+  public OversizedAllocationException() {
+    super();
+  }
+
+  public OversizedAllocationException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) {
+    super(message, cause, enableSuppression, writableStackTrace);
+  }
+
+  public OversizedAllocationException(String message, Throwable cause) {
+    super(message, cause);
+  }
+
+  public OversizedAllocationException(String message) {
+    super(message);
+  }
+
+  public OversizedAllocationException(Throwable cause) {
+    super(cause);
+  }
+}

http://git-wip-us.apache.org/repos/asf/arrow/blob/fa5f0299/java/vector/src/main/java/org/apache/arrow/vector/util/SchemaChangeRuntimeException.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/SchemaChangeRuntimeException.java b/java/vector/src/main/java/org/apache/arrow/vector/util/SchemaChangeRuntimeException.java
new file mode 100644
index 0000000..c281561
--- /dev/null
+++ b/java/vector/src/main/java/org/apache/arrow/vector/util/SchemaChangeRuntimeException.java
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.vector.util;
+
+
+public class SchemaChangeRuntimeException extends RuntimeException {
+  public SchemaChangeRuntimeException() {
+    super();
+  }
+
+  public SchemaChangeRuntimeException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) {
+    super(message, cause, enableSuppression, writableStackTrace);
+  }
+
+  public SchemaChangeRuntimeException(String message, Throwable cause) {
+    super(message, cause);
+  }
+
+  public SchemaChangeRuntimeException(String message) {
+    super(message);
+  }
+
+  public SchemaChangeRuntimeException(Throwable cause) {
+    super(cause);
+  }
+}

http://git-wip-us.apache.org/repos/asf/arrow/blob/fa5f0299/java/vector/src/main/java/org/apache/arrow/vector/util/Text.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/Text.java b/java/vector/src/main/java/org/apache/arrow/vector/util/Text.java
new file mode 100644
index 0000000..3919f06
--- /dev/null
+++ b/java/vector/src/main/java/org/apache/arrow/vector/util/Text.java
@@ -0,0 +1,621 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.vector.util;
+
+import java.io.DataInput;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CharacterCodingException;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CodingErrorAction;
+import java.nio.charset.MalformedInputException;
+import java.text.CharacterIterator;
+import java.text.StringCharacterIterator;
+import java.util.Arrays;
+
+import com.fasterxml.jackson.core.JsonGenerationException;
+import com.fasterxml.jackson.core.JsonGenerator;
+import com.fasterxml.jackson.databind.SerializerProvider;
+import com.fasterxml.jackson.databind.annotation.JsonSerialize;
+import com.fasterxml.jackson.databind.ser.std.StdSerializer;
+
+/**
+ * A simplified byte wrapper similar to Hadoop's Text class without all the dependencies. Lifted from Hadoop 2.7.1
+ */
+@JsonSerialize(using = Text.TextSerializer.class)
+public class Text {
+
+  private static ThreadLocal<CharsetEncoder> ENCODER_FACTORY =
+      new ThreadLocal<CharsetEncoder>() {
+        @Override
+        protected CharsetEncoder initialValue() {
+          return Charset.forName("UTF-8").newEncoder().
+              onMalformedInput(CodingErrorAction.REPORT).
+              onUnmappableCharacter(CodingErrorAction.REPORT);
+        }
+      };
+
+  private static ThreadLocal<CharsetDecoder> DECODER_FACTORY =
+      new ThreadLocal<CharsetDecoder>() {
+        @Override
+        protected CharsetDecoder initialValue() {
+          return Charset.forName("UTF-8").newDecoder().
+              onMalformedInput(CodingErrorAction.REPORT).
+              onUnmappableCharacter(CodingErrorAction.REPORT);
+        }
+      };
+
+  private static final byte[] EMPTY_BYTES = new byte[0];
+
+  private byte[] bytes;
+  private int length;
+
+  public Text() {
+    bytes = EMPTY_BYTES;
+  }
+
+  /**
+   * Construct from a string.
+   */
+  public Text(String string) {
+    set(string);
+  }
+
+  /** Construct from another text. */
+  public Text(Text utf8) {
+    set(utf8);
+  }
+
+  /**
+   * Construct from a byte array.
+   */
+  public Text(byte[] utf8) {
+    set(utf8);
+  }
+
+  /**
+   * Get a copy of the bytes that is exactly the length of the data. See {@link #getBytes()} for faster access to the
+   * underlying array.
+   */
+  public byte[] copyBytes() {
+    byte[] result = new byte[length];
+    System.arraycopy(bytes, 0, result, 0, length);
+    return result;
+  }
+
+  /**
+   * Returns the raw bytes; however, only data up to {@link #getLength()} is valid. Please use {@link #copyBytes()} if
+   * you need the returned array to be precisely the length of the data.
+   */
+  public byte[] getBytes() {
+    return bytes;
+  }
+
+  /** Returns the number of bytes in the byte array */
+  public int getLength() {
+    return length;
+  }
+
+  /**
+   * Returns the Unicode Scalar Value (32-bit integer value) for the character at <code>position</code>. Note that this
+   * method avoids using the converter or doing String instantiation
+   *
+   * @return the Unicode scalar value at position or -1 if the position is invalid or points to a trailing byte
+   */
+  public int charAt(int position) {
+    if (position > this.length)
+    {
+      return -1; // too long
+    }
+    if (position < 0)
+    {
+      return -1; // duh.
+    }
+
+    ByteBuffer bb = (ByteBuffer) ByteBuffer.wrap(bytes).position(position);
+    return bytesToCodePoint(bb.slice());
+  }
+
+  public int find(String what) {
+    return find(what, 0);
+  }
+
+  /**
+   * Finds any occurence of <code>what</code> in the backing buffer, starting as position <code>start</code>. The
+   * starting position is measured in bytes and the return value is in terms of byte position in the buffer. The backing
+   * buffer is not converted to a string for this operation.
+   *
+   * @return byte position of the first occurence of the search string in the UTF-8 buffer or -1 if not found
+   */
+  public int find(String what, int start) {
+    try {
+      ByteBuffer src = ByteBuffer.wrap(this.bytes, 0, this.length);
+      ByteBuffer tgt = encode(what);
+      byte b = tgt.get();
+      src.position(start);
+
+      while (src.hasRemaining()) {
+        if (b == src.get()) { // matching first byte
+          src.mark(); // save position in loop
+          tgt.mark(); // save position in target
+          boolean found = true;
+          int pos = src.position() - 1;
+          while (tgt.hasRemaining()) {
+            if (!src.hasRemaining()) { // src expired first
+              tgt.reset();
+              src.reset();
+              found = false;
+              break;
+            }
+            if (!(tgt.get() == src.get())) {
+              tgt.reset();
+              src.reset();
+              found = false;
+              break; // no match
+            }
+          }
+          if (found) {
+            return pos;
+          }
+        }
+      }
+      return -1; // not found
+    } catch (CharacterCodingException e) {
+      // can't get here
+      e.printStackTrace();
+      return -1;
+    }
+  }
+
+  /**
+   * Set to contain the contents of a string.
+   */
+  public void set(String string) {
+    try {
+      ByteBuffer bb = encode(string, true);
+      bytes = bb.array();
+      length = bb.limit();
+    } catch (CharacterCodingException e) {
+      throw new RuntimeException("Should not have happened ", e);
+    }
+  }
+
+  /**
+   * Set to a utf8 byte array
+   */
+  public void set(byte[] utf8) {
+    set(utf8, 0, utf8.length);
+  }
+
+  /** copy a text. */
+  public void set(Text other) {
+    set(other.getBytes(), 0, other.getLength());
+  }
+
+  /**
+   * Set the Text to range of bytes
+   *
+   * @param utf8
+   *          the data to copy from
+   * @param start
+   *          the first position of the new string
+   * @param len
+   *          the number of bytes of the new string
+   */
+  public void set(byte[] utf8, int start, int len) {
+    setCapacity(len, false);
+    System.arraycopy(utf8, start, bytes, 0, len);
+    this.length = len;
+  }
+
+  /**
+   * Append a range of bytes to the end of the given text
+   *
+   * @param utf8
+   *          the data to copy from
+   * @param start
+   *          the first position to append from utf8
+   * @param len
+   *          the number of bytes to append
+   */
+  public void append(byte[] utf8, int start, int len) {
+    setCapacity(length + len, true);
+    System.arraycopy(utf8, start, bytes, length, len);
+    length += len;
+  }
+
+  /**
+   * Clear the string to empty.
+   *
+   * <em>Note</em>: For performance reasons, this call does not clear the underlying byte array that is retrievable via
+   * {@link #getBytes()}. In order to free the byte-array memory, call {@link #set(byte[])} with an empty byte array
+   * (For example, <code>new byte[0]</code>).
+   */
+  public void clear() {
+    length = 0;
+  }
+
+  /*
+   * Sets the capacity of this Text object to <em>at least</em> <code>len</code> bytes. If the current buffer is longer,
+   * then the capacity and existing content of the buffer are unchanged. If <code>len</code> is larger than the current
+   * capacity, the Text object's capacity is increased to match.
+   *
+   * @param len the number of bytes we need
+   *
+   * @param keepData should the old data be kept
+   */
+  private void setCapacity(int len, boolean keepData) {
+    if (bytes == null || bytes.length < len) {
+      if (bytes != null && keepData) {
+        bytes = Arrays.copyOf(bytes, Math.max(len, length << 1));
+      } else {
+        bytes = new byte[len];
+      }
+    }
+  }
+
+  /**
+   * Convert text back to string
+   *
+   * @see java.lang.Object#toString()
+   */
+  @Override
+  public String toString() {
+    try {
+      return decode(bytes, 0, length);
+    } catch (CharacterCodingException e) {
+      throw new RuntimeException("Should not have happened ", e);
+    }
+  }
+
+  /**
+   * Read a Text object whose length is already known. This allows creating Text from a stream which uses a different
+   * serialization format.
+   */
+  public void readWithKnownLength(DataInput in, int len) throws IOException {
+    setCapacity(len, false);
+    in.readFully(bytes, 0, len);
+    length = len;
+  }
+
+  /** Returns true iff <code>o</code> is a Text with the same contents. */
+  @Override
+  public boolean equals(Object o) {
+    if (!(o instanceof Text)) {
+      return false;
+    }
+
+    final Text that = (Text) o;
+    if (this.getLength() != that.getLength()) {
+      return false;
+    }
+
+    byte[] thisBytes = Arrays.copyOf(this.getBytes(), getLength());
+    byte[] thatBytes = Arrays.copyOf(that.getBytes(), getLength());
+    return Arrays.equals(thisBytes, thatBytes);
+
+  }
+
+  @Override
+  public int hashCode() {
+    return super.hashCode();
+  }
+
+  // / STATIC UTILITIES FROM HERE DOWN
+  /**
+   * Converts the provided byte array to a String using the UTF-8 encoding. If the input is malformed, replace by a
+   * default value.
+   */
+  public static String decode(byte[] utf8) throws CharacterCodingException {
+    return decode(ByteBuffer.wrap(utf8), true);
+  }
+
+  public static String decode(byte[] utf8, int start, int length)
+      throws CharacterCodingException {
+    return decode(ByteBuffer.wrap(utf8, start, length), true);
+  }
+
+  /**
+   * Converts the provided byte array to a String using the UTF-8 encoding. If <code>replace</code> is true, then
+   * malformed input is replaced with the substitution character, which is U+FFFD. Otherwise the method throws a
+   * MalformedInputException.
+   */
+  public static String decode(byte[] utf8, int start, int length, boolean replace)
+      throws CharacterCodingException {
+    return decode(ByteBuffer.wrap(utf8, start, length), replace);
+  }
+
+  private static String decode(ByteBuffer utf8, boolean replace)
+      throws CharacterCodingException {
+    CharsetDecoder decoder = DECODER_FACTORY.get();
+    if (replace) {
+      decoder.onMalformedInput(
+          java.nio.charset.CodingErrorAction.REPLACE);
+      decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
+    }
+    String str = decoder.decode(utf8).toString();
+    // set decoder back to its default value: REPORT
+    if (replace) {
+      decoder.onMalformedInput(CodingErrorAction.REPORT);
+      decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
+    }
+    return str;
+  }
+
+  /**
+   * Converts the provided String to bytes using the UTF-8 encoding. If the input is malformed, invalid chars are
+   * replaced by a default value.
+   *
+   * @return ByteBuffer: bytes stores at ByteBuffer.array() and length is ByteBuffer.limit()
+   */
+
+  public static ByteBuffer encode(String string)
+      throws CharacterCodingException {
+    return encode(string, true);
+  }
+
+  /**
+   * Converts the provided String to bytes using the UTF-8 encoding. If <code>replace</code> is true, then malformed
+   * input is replaced with the substitution character, which is U+FFFD. Otherwise the method throws a
+   * MalformedInputException.
+   *
+   * @return ByteBuffer: bytes stores at ByteBuffer.array() and length is ByteBuffer.limit()
+   */
+  public static ByteBuffer encode(String string, boolean replace)
+      throws CharacterCodingException {
+    CharsetEncoder encoder = ENCODER_FACTORY.get();
+    if (replace) {
+      encoder.onMalformedInput(CodingErrorAction.REPLACE);
+      encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
+    }
+    ByteBuffer bytes =
+        encoder.encode(CharBuffer.wrap(string.toCharArray()));
+    if (replace) {
+      encoder.onMalformedInput(CodingErrorAction.REPORT);
+      encoder.onUnmappableCharacter(CodingErrorAction.REPORT);
+    }
+    return bytes;
+  }
+
+  static final public int DEFAULT_MAX_LEN = 1024 * 1024;
+
+  // //// states for validateUTF8
+
+  private static final int LEAD_BYTE = 0;
+
+  private static final int TRAIL_BYTE_1 = 1;
+
+  private static final int TRAIL_BYTE = 2;
+
+  /**
+   * Check if a byte array contains valid utf-8
+   *
+   * @param utf8
+   *          byte array
+   * @throws MalformedInputException
+   *           if the byte array contains invalid utf-8
+   */
+  public static void validateUTF8(byte[] utf8) throws MalformedInputException {
+    validateUTF8(utf8, 0, utf8.length);
+  }
+
+  /**
+   * Check to see if a byte array is valid utf-8
+   *
+   * @param utf8
+   *          the array of bytes
+   * @param start
+   *          the offset of the first byte in the array
+   * @param len
+   *          the length of the byte sequence
+   * @throws MalformedInputException
+   *           if the byte array contains invalid bytes
+   */
+  public static void validateUTF8(byte[] utf8, int start, int len)
+      throws MalformedInputException {
+    int count = start;
+    int leadByte = 0;
+    int length = 0;
+    int state = LEAD_BYTE;
+    while (count < start + len) {
+      int aByte = utf8[count] & 0xFF;
+
+      switch (state) {
+      case LEAD_BYTE:
+        leadByte = aByte;
+        length = bytesFromUTF8[aByte];
+
+        switch (length) {
+        case 0: // check for ASCII
+          if (leadByte > 0x7F) {
+            throw new MalformedInputException(count);
+          }
+          break;
+        case 1:
+          if (leadByte < 0xC2 || leadByte > 0xDF) {
+            throw new MalformedInputException(count);
+          }
+          state = TRAIL_BYTE_1;
+          break;
+        case 2:
+          if (leadByte < 0xE0 || leadByte > 0xEF) {
+            throw new MalformedInputException(count);
+          }
+          state = TRAIL_BYTE_1;
+          break;
+        case 3:
+          if (leadByte < 0xF0 || leadByte > 0xF4) {
+            throw new MalformedInputException(count);
+          }
+          state = TRAIL_BYTE_1;
+          break;
+        default:
+          // too long! Longest valid UTF-8 is 4 bytes (lead + three)
+          // or if < 0 we got a trail byte in the lead byte position
+          throw new MalformedInputException(count);
+        } // switch (length)
+        break;
+
+      case TRAIL_BYTE_1:
+        if (leadByte == 0xF0 && aByte < 0x90) {
+          throw new MalformedInputException(count);
+        }
+        if (leadByte == 0xF4 && aByte > 0x8F) {
+          throw new MalformedInputException(count);
+        }
+        if (leadByte == 0xE0 && aByte < 0xA0) {
+          throw new MalformedInputException(count);
+        }
+        if (leadByte == 0xED && aByte > 0x9F) {
+          throw new MalformedInputException(count);
+        }
+        // falls through to regular trail-byte test!!
+      case TRAIL_BYTE:
+        if (aByte < 0x80 || aByte > 0xBF) {
+          throw new MalformedInputException(count);
+        }
+        if (--length == 0) {
+          state = LEAD_BYTE;
+        } else {
+          state = TRAIL_BYTE;
+        }
+        break;
+      default:
+        break;
+      } // switch (state)
+      count++;
+    }
+  }
+
+  /**
+   * Magic numbers for UTF-8. These are the number of bytes that <em>follow</em> a given lead byte. Trailing bytes have
+   * the value -1. The values 4 and 5 are presented in this table, even though valid UTF-8 cannot include the five and
+   * six byte sequences.
+   */
+  static final int[] bytesFromUTF8 =
+  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0,
+      // trail bytes
+      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1,
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+      1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
+      3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5 };
+
+  /**
+   * Returns the next code point at the current position in the buffer. The buffer's position will be incremented. Any
+   * mark set on this buffer will be changed by this method!
+   */
+  public static int bytesToCodePoint(ByteBuffer bytes) {
+    bytes.mark();
+    byte b = bytes.get();
+    bytes.reset();
+    int extraBytesToRead = bytesFromUTF8[(b & 0xFF)];
+    if (extraBytesToRead < 0)
+    {
+      return -1; // trailing byte!
+    }
+    int ch = 0;
+
+    switch (extraBytesToRead) {
+    case 5:
+      ch += (bytes.get() & 0xFF);
+      ch <<= 6; /* remember, illegal UTF-8 */
+    case 4:
+      ch += (bytes.get() & 0xFF);
+      ch <<= 6; /* remember, illegal UTF-8 */
+    case 3:
+      ch += (bytes.get() & 0xFF);
+      ch <<= 6;
+    case 2:
+      ch += (bytes.get() & 0xFF);
+      ch <<= 6;
+    case 1:
+      ch += (bytes.get() & 0xFF);
+      ch <<= 6;
+    case 0:
+      ch += (bytes.get() & 0xFF);
+    }
+    ch -= offsetsFromUTF8[extraBytesToRead];
+
+    return ch;
+  }
+
+  static final int offsetsFromUTF8[] =
+  { 0x00000000, 0x00003080,
+      0x000E2080, 0x03C82080, 0xFA082080, 0x82082080 };
+
+  /**
+   * For the given string, returns the number of UTF-8 bytes required to encode the string.
+   *
+   * @param string
+   *          text to encode
+   * @return number of UTF-8 bytes required to encode
+   */
+  public static int utf8Length(String string) {
+    CharacterIterator iter = new StringCharacterIterator(string);
+    char ch = iter.first();
+    int size = 0;
+    while (ch != CharacterIterator.DONE) {
+      if ((ch >= 0xD800) && (ch < 0xDC00)) {
+        // surrogate pair?
+        char trail = iter.next();
+        if ((trail > 0xDBFF) && (trail < 0xE000)) {
+          // valid pair
+          size += 4;
+        } else {
+          // invalid pair
+          size += 3;
+          iter.previous(); // rewind one
+        }
+      } else if (ch < 0x80) {
+        size++;
+      } else if (ch < 0x800) {
+        size += 2;
+      } else {
+        // ch < 0x10000, that is, the largest char value
+        size += 3;
+      }
+      ch = iter.next();
+    }
+    return size;
+  }
+
+  public static class TextSerializer extends StdSerializer<Text> {
+
+    public TextSerializer() {
+      super(Text.class);
+    }
+
+    @Override
+    public void serialize(Text text, JsonGenerator jsonGenerator, SerializerProvider serializerProvider)
+        throws IOException, JsonGenerationException {
+      jsonGenerator.writeString(text.toString());
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/arrow/blob/fa5f0299/java/vector/src/main/java/org/apache/arrow/vector/util/TransferPair.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/TransferPair.java b/java/vector/src/main/java/org/apache/arrow/vector/util/TransferPair.java
new file mode 100644
index 0000000..6e68d55
--- /dev/null
+++ b/java/vector/src/main/java/org/apache/arrow/vector/util/TransferPair.java
@@ -0,0 +1,27 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.vector.util;
+
+import org.apache.arrow.vector.ValueVector;
+
+public interface TransferPair {
+  public void transfer();
+  public void splitAndTransfer(int startIndex, int length);
+  public ValueVector getTo();
+  public void copyValueSafe(int from, int to);
+}


Mime
View raw message