hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From omal...@apache.org
Subject [03/16] hive git commit: HIVE-11890. Create ORC submodue. (omalley reviewed by prasanthj)
Date Fri, 11 Dec 2015 23:28:00 GMT
http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SerializationUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SerializationUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SerializationUtils.java
deleted file mode 100644
index 8cc836e..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SerializationUtils.java
+++ /dev/null
@@ -1,1291 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.io.orc;
-
-import java.io.EOFException;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.math.BigInteger;
-
-public final class SerializationUtils {
-
-  private final static int BUFFER_SIZE = 64;
-  private final byte[] readBuffer;
-  private final byte[] writeBuffer;
-
-  public SerializationUtils() {
-    this.readBuffer = new byte[BUFFER_SIZE];
-    this.writeBuffer = new byte[BUFFER_SIZE];
-  }
-
-  void writeVulong(OutputStream output, long value) throws IOException {
-    while (true) {
-      if ((value & ~0x7f) == 0) {
-        output.write((byte) value);
-        return;
-      } else {
-        output.write((byte) (0x80 | (value & 0x7f)));
-        value >>>= 7;
-      }
-    }
-  }
-
-  void writeVslong(OutputStream output, long value) throws IOException {
-    writeVulong(output, (value << 1) ^ (value >> 63));
-  }
-
-
-  long readVulong(InputStream in) throws IOException {
-    long result = 0;
-    long b;
-    int offset = 0;
-    do {
-      b = in.read();
-      if (b == -1) {
-        throw new EOFException("Reading Vulong past EOF");
-      }
-      result |= (0x7f & b) << offset;
-      offset += 7;
-    } while (b >= 0x80);
-    return result;
-  }
-
-  long readVslong(InputStream in) throws IOException {
-    long result = readVulong(in);
-    return (result >>> 1) ^ -(result & 1);
-  }
-
-  public float readFloat(InputStream in) throws IOException {
-    int ser = in.read() | (in.read() << 8) | (in.read() << 16) |
-      (in.read() << 24);
-    return Float.intBitsToFloat(ser);
-  }
-
-  void writeFloat(OutputStream output, float value) throws IOException {
-    int ser = Float.floatToIntBits(value);
-    output.write(ser & 0xff);
-    output.write((ser >> 8) & 0xff);
-    output.write((ser >> 16) & 0xff);
-    output.write((ser >> 24) & 0xff);
-  }
-
-  public double readDouble(InputStream in) throws IOException {
-    return Double.longBitsToDouble(readLongLE(in));
-  }
-
-  long readLongLE(InputStream in) throws IOException {
-    in.read(readBuffer, 0, 8);
-    return (((readBuffer[0] & 0xff) << 0)
-        + ((readBuffer[1] & 0xff) << 8)
-        + ((readBuffer[2] & 0xff) << 16)
-        + ((long) (readBuffer[3] & 0xff) << 24)
-        + ((long) (readBuffer[4] & 0xff) << 32)
-        + ((long) (readBuffer[5] & 0xff) << 40)
-        + ((long) (readBuffer[6] & 0xff) << 48)
-        + ((long) (readBuffer[7] & 0xff) << 56));
-  }
-
-  void writeDouble(OutputStream output, double value) throws IOException {
-    writeLongLE(output, Double.doubleToLongBits(value));
-  }
-
-  private void writeLongLE(OutputStream output, long value) throws IOException {
-    writeBuffer[0] = (byte) ((value >> 0)  & 0xff);
-    writeBuffer[1] = (byte) ((value >> 8)  & 0xff);
-    writeBuffer[2] = (byte) ((value >> 16) & 0xff);
-    writeBuffer[3] = (byte) ((value >> 24) & 0xff);
-    writeBuffer[4] = (byte) ((value >> 32) & 0xff);
-    writeBuffer[5] = (byte) ((value >> 40) & 0xff);
-    writeBuffer[6] = (byte) ((value >> 48) & 0xff);
-    writeBuffer[7] = (byte) ((value >> 56) & 0xff);
-    output.write(writeBuffer, 0, 8);
-  }
-
-  /**
-   * Write the arbitrarily sized signed BigInteger in vint format.
-   *
-   * Signed integers are encoded using the low bit as the sign bit using zigzag
-   * encoding.
-   *
-   * Each byte uses the low 7 bits for data and the high bit for stop/continue.
-   *
-   * Bytes are stored LSB first.
-   * @param output the stream to write to
-   * @param value the value to output
-   * @throws IOException
-   */
-  static void writeBigInteger(OutputStream output,
-                              BigInteger value) throws IOException {
-    // encode the signed number as a positive integer
-    value = value.shiftLeft(1);
-    int sign = value.signum();
-    if (sign < 0) {
-      value = value.negate();
-      value = value.subtract(BigInteger.ONE);
-    }
-    int length = value.bitLength();
-    while (true) {
-      long lowBits = value.longValue() & 0x7fffffffffffffffL;
-      length -= 63;
-      // write out the next 63 bits worth of data
-      for(int i=0; i < 9; ++i) {
-        // if this is the last byte, leave the high bit off
-        if (length <= 0 && (lowBits & ~0x7f) == 0) {
-          output.write((byte) lowBits);
-          return;
-        } else {
-          output.write((byte) (0x80 | (lowBits & 0x7f)));
-          lowBits >>>= 7;
-        }
-      }
-      value = value.shiftRight(63);
-    }
-  }
-
-  /**
-   * Read the signed arbitrary sized BigInteger BigInteger in vint format
-   * @param input the stream to read from
-   * @return the read BigInteger
-   * @throws IOException
-   */
-  static BigInteger readBigInteger(InputStream input) throws IOException {
-    BigInteger result = BigInteger.ZERO;
-    long work = 0;
-    int offset = 0;
-    long b;
-    do {
-      b = input.read();
-      if (b == -1) {
-        throw new EOFException("Reading BigInteger past EOF from " + input);
-      }
-      work |= (0x7f & b) << (offset % 63);
-      offset += 7;
-      // if we've read 63 bits, roll them into the result
-      if (offset == 63) {
-        result = BigInteger.valueOf(work);
-        work = 0;
-      } else if (offset % 63 == 0) {
-        result = result.or(BigInteger.valueOf(work).shiftLeft(offset-63));
-        work = 0;
-      }
-    } while (b >= 0x80);
-    if (work != 0) {
-      result = result.or(BigInteger.valueOf(work).shiftLeft((offset/63)*63));
-    }
-    // convert back to a signed number
-    boolean isNegative = result.testBit(0);
-    if (isNegative) {
-      result = result.add(BigInteger.ONE);
-      result = result.negate();
-    }
-    result = result.shiftRight(1);
-    return result;
-  }
-
-  enum FixedBitSizes {
-    ONE, TWO, THREE, FOUR, FIVE, SIX, SEVEN, EIGHT, NINE, TEN, ELEVEN, TWELVE,
-    THIRTEEN, FOURTEEN, FIFTEEN, SIXTEEN, SEVENTEEN, EIGHTEEN, NINETEEN,
-    TWENTY, TWENTYONE, TWENTYTWO, TWENTYTHREE, TWENTYFOUR, TWENTYSIX,
-    TWENTYEIGHT, THIRTY, THIRTYTWO, FORTY, FORTYEIGHT, FIFTYSIX, SIXTYFOUR;
-  }
-
-  /**
-   * Count the number of bits required to encode the given value
-   * @param value
-   * @return bits required to store value
-   */
-  int findClosestNumBits(long value) {
-    int count = 0;
-    while (value != 0) {
-      count++;
-      value = value >>> 1;
-    }
-    return getClosestFixedBits(count);
-  }
-
-  /**
-   * zigzag encode the given value
-   * @param val
-   * @return zigzag encoded value
-   */
-  long zigzagEncode(long val) {
-    return (val << 1) ^ (val >> 63);
-  }
-
-  /**
-   * zigzag decode the given value
-   * @param val
-   * @return zizag decoded value
-   */
-  long zigzagDecode(long val) {
-    return (val >>> 1) ^ -(val & 1);
-  }
-
-  /**
-   * Compute the bits required to represent pth percentile value
-   * @param data - array
-   * @param p - percentile value (>=0.0 to <=1.0)
-   * @return pth percentile bits
-   */
-  int percentileBits(long[] data, int offset, int length, double p) {
-    if ((p > 1.0) || (p <= 0.0)) {
-      return -1;
-    }
-
-    // histogram that store the encoded bit requirement for each values.
-    // maximum number of bits that can encoded is 32 (refer FixedBitSizes)
-    int[] hist = new int[32];
-
-    // compute the histogram
-    for(int i = offset; i < (offset + length); i++) {
-      int idx = encodeBitWidth(findClosestNumBits(data[i]));
-      hist[idx] += 1;
-    }
-
-    int perLen = (int) (length * (1.0 - p));
-
-    // return the bits required by pth percentile length
-    for(int i = hist.length - 1; i >= 0; i--) {
-      perLen -= hist[i];
-      if (perLen < 0) {
-        return decodeBitWidth(i);
-      }
-    }
-
-    return 0;
-  }
-
-  /**
-   * Read n bytes in big endian order and convert to long
-   * @param b - byte array
-   * @return long value
-   */
-  long bytesToLongBE(InStream input, int n) throws IOException {
-    long out = 0;
-    long val = 0;
-    while (n > 0) {
-      n--;
-      // store it in a long and then shift else integer overflow will occur
-      val = input.read();
-      out |= (val << (n * 8));
-    }
-    return out;
-  }
-
-  /**
-   * Calculate the number of bytes required
-   * @param n - number of values
-   * @param numBits - bit width
-   * @return number of bytes required
-   */
-  int getTotalBytesRequired(int n, int numBits) {
-    return (n * numBits + 7) / 8;
-  }
-
-  /**
-   * For a given fixed bit this function will return the closest available fixed
-   * bit
-   * @param n
-   * @return closest valid fixed bit
-   */
-  int getClosestFixedBits(int n) {
-    if (n == 0) {
-      return 1;
-    }
-
-    if (n >= 1 && n <= 24) {
-      return n;
-    } else if (n > 24 && n <= 26) {
-      return 26;
-    } else if (n > 26 && n <= 28) {
-      return 28;
-    } else if (n > 28 && n <= 30) {
-      return 30;
-    } else if (n > 30 && n <= 32) {
-      return 32;
-    } else if (n > 32 && n <= 40) {
-      return 40;
-    } else if (n > 40 && n <= 48) {
-      return 48;
-    } else if (n > 48 && n <= 56) {
-      return 56;
-    } else {
-      return 64;
-    }
-  }
-
-  public int getClosestAlignedFixedBits(int n) {
-    if (n == 0 ||  n == 1) {
-      return 1;
-    } else if (n > 1 && n <= 2) {
-      return 2;
-    } else if (n > 2 && n <= 4) {
-      return 4;
-    } else if (n > 4 && n <= 8) {
-      return 8;
-    } else if (n > 8 && n <= 16) {
-      return 16;
-    } else if (n > 16 && n <= 24) {
-      return 24;
-    } else if (n > 24 && n <= 32) {
-      return 32;
-    } else if (n > 32 && n <= 40) {
-      return 40;
-    } else if (n > 40 && n <= 48) {
-      return 48;
-    } else if (n > 48 && n <= 56) {
-      return 56;
-    } else {
-      return 64;
-    }
-  }
-
-  /**
-   * Finds the closest available fixed bit width match and returns its encoded
-   * value (ordinal)
-   * @param n - fixed bit width to encode
-   * @return encoded fixed bit width
-   */
-  int encodeBitWidth(int n) {
-    n = getClosestFixedBits(n);
-
-    if (n >= 1 && n <= 24) {
-      return n - 1;
-    } else if (n > 24 && n <= 26) {
-      return FixedBitSizes.TWENTYSIX.ordinal();
-    } else if (n > 26 && n <= 28) {
-      return FixedBitSizes.TWENTYEIGHT.ordinal();
-    } else if (n > 28 && n <= 30) {
-      return FixedBitSizes.THIRTY.ordinal();
-    } else if (n > 30 && n <= 32) {
-      return FixedBitSizes.THIRTYTWO.ordinal();
-    } else if (n > 32 && n <= 40) {
-      return FixedBitSizes.FORTY.ordinal();
-    } else if (n > 40 && n <= 48) {
-      return FixedBitSizes.FORTYEIGHT.ordinal();
-    } else if (n > 48 && n <= 56) {
-      return FixedBitSizes.FIFTYSIX.ordinal();
-    } else {
-      return FixedBitSizes.SIXTYFOUR.ordinal();
-    }
-  }
-
-  /**
-   * Decodes the ordinal fixed bit value to actual fixed bit width value
-   * @param n - encoded fixed bit width
-   * @return decoded fixed bit width
-   */
-  int decodeBitWidth(int n) {
-    if (n >= FixedBitSizes.ONE.ordinal()
-        && n <= FixedBitSizes.TWENTYFOUR.ordinal()) {
-      return n + 1;
-    } else if (n == FixedBitSizes.TWENTYSIX.ordinal()) {
-      return 26;
-    } else if (n == FixedBitSizes.TWENTYEIGHT.ordinal()) {
-      return 28;
-    } else if (n == FixedBitSizes.THIRTY.ordinal()) {
-      return 30;
-    } else if (n == FixedBitSizes.THIRTYTWO.ordinal()) {
-      return 32;
-    } else if (n == FixedBitSizes.FORTY.ordinal()) {
-      return 40;
-    } else if (n == FixedBitSizes.FORTYEIGHT.ordinal()) {
-      return 48;
-    } else if (n == FixedBitSizes.FIFTYSIX.ordinal()) {
-      return 56;
-    } else {
-      return 64;
-    }
-  }
-
-  /**
-   * Bitpack and write the input values to underlying output stream
-   * @param input - values to write
-   * @param offset - offset
-   * @param len - length
-   * @param bitSize - bit width
-   * @param output - output stream
-   * @throws IOException
-   */
-  void writeInts(long[] input, int offset, int len, int bitSize,
-                        OutputStream output) throws IOException {
-    if (input == null || input.length < 1 || offset < 0 || len < 1
-        || bitSize < 1) {
-      return;
-    }
-
-    switch (bitSize) {
-    case 1:
-      unrolledBitPack1(input, offset, len, output);
-      return;
-    case 2:
-      unrolledBitPack2(input, offset, len, output);
-      return;
-    case 4:
-      unrolledBitPack4(input, offset, len, output);
-      return;
-    case 8:
-      unrolledBitPack8(input, offset, len, output);
-      return;
-    case 16:
-      unrolledBitPack16(input, offset, len, output);
-      return;
-    case 24:
-      unrolledBitPack24(input, offset, len, output);
-      return;
-    case 32:
-      unrolledBitPack32(input, offset, len, output);
-      return;
-    case 40:
-      unrolledBitPack40(input, offset, len, output);
-      return;
-    case 48:
-      unrolledBitPack48(input, offset, len, output);
-      return;
-    case 56:
-      unrolledBitPack56(input, offset, len, output);
-      return;
-    case 64:
-      unrolledBitPack64(input, offset, len, output);
-      return;
-    default:
-      break;
-    }
-
-    int bitsLeft = 8;
-    byte current = 0;
-    for(int i = offset; i < (offset + len); i++) {
-      long value = input[i];
-      int bitsToWrite = bitSize;
-      while (bitsToWrite > bitsLeft) {
-        // add the bits to the bottom of the current word
-        current |= value >>> (bitsToWrite - bitsLeft);
-        // subtract out the bits we just added
-        bitsToWrite -= bitsLeft;
-        // zero out the bits above bitsToWrite
-        value &= (1L << bitsToWrite) - 1;
-        output.write(current);
-        current = 0;
-        bitsLeft = 8;
-      }
-      bitsLeft -= bitsToWrite;
-      current |= value << bitsLeft;
-      if (bitsLeft == 0) {
-        output.write(current);
-        current = 0;
-        bitsLeft = 8;
-      }
-    }
-
-    // flush
-    if (bitsLeft != 8) {
-      output.write(current);
-      current = 0;
-      bitsLeft = 8;
-    }
-  }
-
-  private void unrolledBitPack1(long[] input, int offset, int len,
-      OutputStream output) throws IOException {
-    final int numHops = 8;
-    final int remainder = len % numHops;
-    final int endOffset = offset + len;
-    final int endUnroll = endOffset - remainder;
-    int val = 0;
-    for (int i = offset; i < endUnroll; i = i + numHops) {
-      val = (int) (val | ((input[i] & 1) << 7)
-          | ((input[i + 1] & 1) << 6)
-          | ((input[i + 2] & 1) << 5)
-          | ((input[i + 3] & 1) << 4)
-          | ((input[i + 4] & 1) << 3)
-          | ((input[i + 5] & 1) << 2)
-          | ((input[i + 6] & 1) << 1)
-          | (input[i + 7]) & 1);
-      output.write(val);
-      val = 0;
-    }
-
-    if (remainder > 0) {
-      int startShift = 7;
-      for (int i = endUnroll; i < endOffset; i++) {
-        val = (int) (val | (input[i] & 1) << startShift);
-        startShift -= 1;
-      }
-      output.write(val);
-    }
-  }
-
-  private void unrolledBitPack2(long[] input, int offset, int len,
-      OutputStream output) throws IOException {
-    final int numHops = 4;
-    final int remainder = len % numHops;
-    final int endOffset = offset + len;
-    final int endUnroll = endOffset - remainder;
-    int val = 0;
-    for (int i = offset; i < endUnroll; i = i + numHops) {
-      val = (int) (val | ((input[i] & 3) << 6)
-          | ((input[i + 1] & 3) << 4)
-          | ((input[i + 2] & 3) << 2)
-          | (input[i + 3]) & 3);
-      output.write(val);
-      val = 0;
-    }
-
-    if (remainder > 0) {
-      int startShift = 6;
-      for (int i = endUnroll; i < endOffset; i++) {
-        val = (int) (val | (input[i] & 3) << startShift);
-        startShift -= 2;
-      }
-      output.write(val);
-    }
-  }
-
-  private void unrolledBitPack4(long[] input, int offset, int len,
-      OutputStream output) throws IOException {
-    final int numHops = 2;
-    final int remainder = len % numHops;
-    final int endOffset = offset + len;
-    final int endUnroll = endOffset - remainder;
-    int val = 0;
-    for (int i = offset; i < endUnroll; i = i + numHops) {
-      val = (int) (val | ((input[i] & 15) << 4) | (input[i + 1]) & 15);
-      output.write(val);
-      val = 0;
-    }
-
-    if (remainder > 0) {
-      int startShift = 4;
-      for (int i = endUnroll; i < endOffset; i++) {
-        val = (int) (val | (input[i] & 15) << startShift);
-        startShift -= 4;
-      }
-      output.write(val);
-    }
-  }
-
-  private void unrolledBitPack8(long[] input, int offset, int len,
-      OutputStream output) throws IOException {
-    unrolledBitPackBytes(input, offset, len, output, 1);
-  }
-
-  private void unrolledBitPack16(long[] input, int offset, int len,
-      OutputStream output) throws IOException {
-    unrolledBitPackBytes(input, offset, len, output, 2);
-  }
-
-  private void unrolledBitPack24(long[] input, int offset, int len,
-      OutputStream output) throws IOException {
-    unrolledBitPackBytes(input, offset, len, output, 3);
-  }
-
-  private void unrolledBitPack32(long[] input, int offset, int len,
-      OutputStream output) throws IOException {
-    unrolledBitPackBytes(input, offset, len, output, 4);
-  }
-
-  private void unrolledBitPack40(long[] input, int offset, int len,
-      OutputStream output) throws IOException {
-    unrolledBitPackBytes(input, offset, len, output, 5);
-  }
-
-  private void unrolledBitPack48(long[] input, int offset, int len,
-      OutputStream output) throws IOException {
-    unrolledBitPackBytes(input, offset, len, output, 6);
-  }
-
-  private void unrolledBitPack56(long[] input, int offset, int len,
-      OutputStream output) throws IOException {
-    unrolledBitPackBytes(input, offset, len, output, 7);
-  }
-
-  private void unrolledBitPack64(long[] input, int offset, int len,
-      OutputStream output) throws IOException {
-    unrolledBitPackBytes(input, offset, len, output, 8);
-  }
-
-  private void unrolledBitPackBytes(long[] input, int offset, int len, OutputStream output, int numBytes) throws IOException {
-    final int numHops = 8;
-    final int remainder = len % numHops;
-    final int endOffset = offset + len;
-    final int endUnroll = endOffset - remainder;
-    int i = offset;
-    for (; i < endUnroll; i = i + numHops) {
-      writeLongBE(output, input, i, numHops, numBytes);
-    }
-
-    if (remainder > 0) {
-      writeRemainingLongs(output, i, input, remainder, numBytes);
-    }
-  }
-
-  private void writeRemainingLongs(OutputStream output, int offset, long[] input, int remainder,
-      int numBytes) throws IOException {
-    final int numHops = remainder;
-
-    int idx = 0;
-    switch (numBytes) {
-    case 1:
-      while (remainder > 0) {
-        writeBuffer[idx] = (byte) (input[offset + idx] & 255);
-        remainder--;
-        idx++;
-      }
-      break;
-    case 2:
-      while (remainder > 0) {
-        writeLongBE2(output, input[offset + idx], idx * 2);
-        remainder--;
-        idx++;
-      }
-      break;
-    case 3:
-      while (remainder > 0) {
-        writeLongBE3(output, input[offset + idx], idx * 3);
-        remainder--;
-        idx++;
-      }
-      break;
-    case 4:
-      while (remainder > 0) {
-        writeLongBE4(output, input[offset + idx], idx * 4);
-        remainder--;
-        idx++;
-      }
-      break;
-    case 5:
-      while (remainder > 0) {
-        writeLongBE5(output, input[offset + idx], idx * 5);
-        remainder--;
-        idx++;
-      }
-      break;
-    case 6:
-      while (remainder > 0) {
-        writeLongBE6(output, input[offset + idx], idx * 6);
-        remainder--;
-        idx++;
-      }
-      break;
-    case 7:
-      while (remainder > 0) {
-        writeLongBE7(output, input[offset + idx], idx * 7);
-        remainder--;
-        idx++;
-      }
-      break;
-    case 8:
-      while (remainder > 0) {
-        writeLongBE8(output, input[offset + idx], idx * 8);
-        remainder--;
-        idx++;
-      }
-      break;
-    default:
-      break;
-    }
-
-    final int toWrite = numHops * numBytes;
-    output.write(writeBuffer, 0, toWrite);
-  }
-
-  private void writeLongBE(OutputStream output, long[] input, int offset, int numHops, int numBytes) throws IOException {
-
-    switch (numBytes) {
-    case 1:
-      writeBuffer[0] = (byte) (input[offset + 0] & 255);
-      writeBuffer[1] = (byte) (input[offset + 1] & 255);
-      writeBuffer[2] = (byte) (input[offset + 2] & 255);
-      writeBuffer[3] = (byte) (input[offset + 3] & 255);
-      writeBuffer[4] = (byte) (input[offset + 4] & 255);
-      writeBuffer[5] = (byte) (input[offset + 5] & 255);
-      writeBuffer[6] = (byte) (input[offset + 6] & 255);
-      writeBuffer[7] = (byte) (input[offset + 7] & 255);
-      break;
-    case 2:
-      writeLongBE2(output, input[offset + 0], 0);
-      writeLongBE2(output, input[offset + 1], 2);
-      writeLongBE2(output, input[offset + 2], 4);
-      writeLongBE2(output, input[offset + 3], 6);
-      writeLongBE2(output, input[offset + 4], 8);
-      writeLongBE2(output, input[offset + 5], 10);
-      writeLongBE2(output, input[offset + 6], 12);
-      writeLongBE2(output, input[offset + 7], 14);
-      break;
-    case 3:
-      writeLongBE3(output, input[offset + 0], 0);
-      writeLongBE3(output, input[offset + 1], 3);
-      writeLongBE3(output, input[offset + 2], 6);
-      writeLongBE3(output, input[offset + 3], 9);
-      writeLongBE3(output, input[offset + 4], 12);
-      writeLongBE3(output, input[offset + 5], 15);
-      writeLongBE3(output, input[offset + 6], 18);
-      writeLongBE3(output, input[offset + 7], 21);
-      break;
-    case 4:
-      writeLongBE4(output, input[offset + 0], 0);
-      writeLongBE4(output, input[offset + 1], 4);
-      writeLongBE4(output, input[offset + 2], 8);
-      writeLongBE4(output, input[offset + 3], 12);
-      writeLongBE4(output, input[offset + 4], 16);
-      writeLongBE4(output, input[offset + 5], 20);
-      writeLongBE4(output, input[offset + 6], 24);
-      writeLongBE4(output, input[offset + 7], 28);
-      break;
-    case 5:
-      writeLongBE5(output, input[offset + 0], 0);
-      writeLongBE5(output, input[offset + 1], 5);
-      writeLongBE5(output, input[offset + 2], 10);
-      writeLongBE5(output, input[offset + 3], 15);
-      writeLongBE5(output, input[offset + 4], 20);
-      writeLongBE5(output, input[offset + 5], 25);
-      writeLongBE5(output, input[offset + 6], 30);
-      writeLongBE5(output, input[offset + 7], 35);
-      break;
-    case 6:
-      writeLongBE6(output, input[offset + 0], 0);
-      writeLongBE6(output, input[offset + 1], 6);
-      writeLongBE6(output, input[offset + 2], 12);
-      writeLongBE6(output, input[offset + 3], 18);
-      writeLongBE6(output, input[offset + 4], 24);
-      writeLongBE6(output, input[offset + 5], 30);
-      writeLongBE6(output, input[offset + 6], 36);
-      writeLongBE6(output, input[offset + 7], 42);
-      break;
-    case 7:
-      writeLongBE7(output, input[offset + 0], 0);
-      writeLongBE7(output, input[offset + 1], 7);
-      writeLongBE7(output, input[offset + 2], 14);
-      writeLongBE7(output, input[offset + 3], 21);
-      writeLongBE7(output, input[offset + 4], 28);
-      writeLongBE7(output, input[offset + 5], 35);
-      writeLongBE7(output, input[offset + 6], 42);
-      writeLongBE7(output, input[offset + 7], 49);
-      break;
-    case 8:
-      writeLongBE8(output, input[offset + 0], 0);
-      writeLongBE8(output, input[offset + 1], 8);
-      writeLongBE8(output, input[offset + 2], 16);
-      writeLongBE8(output, input[offset + 3], 24);
-      writeLongBE8(output, input[offset + 4], 32);
-      writeLongBE8(output, input[offset + 5], 40);
-      writeLongBE8(output, input[offset + 6], 48);
-      writeLongBE8(output, input[offset + 7], 56);
-      break;
-      default:
-        break;
-    }
-
-    final int toWrite = numHops * numBytes;
-    output.write(writeBuffer, 0, toWrite);
-  }
-
-  private void writeLongBE2(OutputStream output, long val, int wbOffset) {
-    writeBuffer[wbOffset + 0] =  (byte) (val >>> 8);
-    writeBuffer[wbOffset + 1] =  (byte) (val >>> 0);
-  }
-
-  private void writeLongBE3(OutputStream output, long val, int wbOffset) {
-    writeBuffer[wbOffset + 0] =  (byte) (val >>> 16);
-    writeBuffer[wbOffset + 1] =  (byte) (val >>> 8);
-    writeBuffer[wbOffset + 2] =  (byte) (val >>> 0);
-  }
-
-  private void writeLongBE4(OutputStream output, long val, int wbOffset) {
-    writeBuffer[wbOffset + 0] =  (byte) (val >>> 24);
-    writeBuffer[wbOffset + 1] =  (byte) (val >>> 16);
-    writeBuffer[wbOffset + 2] =  (byte) (val >>> 8);
-    writeBuffer[wbOffset + 3] =  (byte) (val >>> 0);
-  }
-
-  private void writeLongBE5(OutputStream output, long val, int wbOffset) {
-    writeBuffer[wbOffset + 0] =  (byte) (val >>> 32);
-    writeBuffer[wbOffset + 1] =  (byte) (val >>> 24);
-    writeBuffer[wbOffset + 2] =  (byte) (val >>> 16);
-    writeBuffer[wbOffset + 3] =  (byte) (val >>> 8);
-    writeBuffer[wbOffset + 4] =  (byte) (val >>> 0);
-  }
-
-  private void writeLongBE6(OutputStream output, long val, int wbOffset) {
-    writeBuffer[wbOffset + 0] =  (byte) (val >>> 40);
-    writeBuffer[wbOffset + 1] =  (byte) (val >>> 32);
-    writeBuffer[wbOffset + 2] =  (byte) (val >>> 24);
-    writeBuffer[wbOffset + 3] =  (byte) (val >>> 16);
-    writeBuffer[wbOffset + 4] =  (byte) (val >>> 8);
-    writeBuffer[wbOffset + 5] =  (byte) (val >>> 0);
-  }
-
-  private void writeLongBE7(OutputStream output, long val, int wbOffset) {
-    writeBuffer[wbOffset + 0] =  (byte) (val >>> 48);
-    writeBuffer[wbOffset + 1] =  (byte) (val >>> 40);
-    writeBuffer[wbOffset + 2] =  (byte) (val >>> 32);
-    writeBuffer[wbOffset + 3] =  (byte) (val >>> 24);
-    writeBuffer[wbOffset + 4] =  (byte) (val >>> 16);
-    writeBuffer[wbOffset + 5] =  (byte) (val >>> 8);
-    writeBuffer[wbOffset + 6] =  (byte) (val >>> 0);
-  }
-
-  private void writeLongBE8(OutputStream output, long val, int wbOffset) {
-    writeBuffer[wbOffset + 0] =  (byte) (val >>> 56);
-    writeBuffer[wbOffset + 1] =  (byte) (val >>> 48);
-    writeBuffer[wbOffset + 2] =  (byte) (val >>> 40);
-    writeBuffer[wbOffset + 3] =  (byte) (val >>> 32);
-    writeBuffer[wbOffset + 4] =  (byte) (val >>> 24);
-    writeBuffer[wbOffset + 5] =  (byte) (val >>> 16);
-    writeBuffer[wbOffset + 6] =  (byte) (val >>> 8);
-    writeBuffer[wbOffset + 7] =  (byte) (val >>> 0);
-  }
-
-  /**
-   * Read bitpacked integers from input stream
-   * @param buffer - input buffer
-   * @param offset - offset
-   * @param len - length
-   * @param bitSize - bit width
-   * @param input - input stream
-   * @throws IOException
-   */
-  void readInts(long[] buffer, int offset, int len, int bitSize,
-                       InStream input) throws IOException {
-    int bitsLeft = 0;
-    int current = 0;
-
-    switch (bitSize) {
-    case 1:
-      unrolledUnPack1(buffer, offset, len, input);
-      return;
-    case 2:
-      unrolledUnPack2(buffer, offset, len, input);
-      return;
-    case 4:
-      unrolledUnPack4(buffer, offset, len, input);
-      return;
-    case 8:
-      unrolledUnPack8(buffer, offset, len, input);
-      return;
-    case 16:
-      unrolledUnPack16(buffer, offset, len, input);
-      return;
-    case 24:
-      unrolledUnPack24(buffer, offset, len, input);
-      return;
-    case 32:
-      unrolledUnPack32(buffer, offset, len, input);
-      return;
-    case 40:
-      unrolledUnPack40(buffer, offset, len, input);
-      return;
-    case 48:
-      unrolledUnPack48(buffer, offset, len, input);
-      return;
-    case 56:
-      unrolledUnPack56(buffer, offset, len, input);
-      return;
-    case 64:
-      unrolledUnPack64(buffer, offset, len, input);
-      return;
-    default:
-      break;
-    }
-
-    for(int i = offset; i < (offset + len); i++) {
-      long result = 0;
-      int bitsLeftToRead = bitSize;
-      while (bitsLeftToRead > bitsLeft) {
-        result <<= bitsLeft;
-        result |= current & ((1 << bitsLeft) - 1);
-        bitsLeftToRead -= bitsLeft;
-        current = input.read();
-        bitsLeft = 8;
-      }
-
-      // handle the left over bits
-      if (bitsLeftToRead > 0) {
-        result <<= bitsLeftToRead;
-        bitsLeft -= bitsLeftToRead;
-        result |= (current >> bitsLeft) & ((1 << bitsLeftToRead) - 1);
-      }
-      buffer[i] = result;
-    }
-  }
-
-
-  private void unrolledUnPack1(long[] buffer, int offset, int len,
-      InStream input) throws IOException {
-    final int numHops = 8;
-    final int remainder = len % numHops;
-    final int endOffset = offset + len;
-    final int endUnroll = endOffset - remainder;
-    int val = 0;
-    for (int i = offset; i < endUnroll; i = i + numHops) {
-      val = input.read();
-      buffer[i] = (val >>> 7) & 1;
-      buffer[i + 1] = (val >>> 6) & 1;
-      buffer[i + 2] = (val >>> 5) & 1;
-      buffer[i + 3] = (val >>> 4) & 1;
-      buffer[i + 4] = (val >>> 3) & 1;
-      buffer[i + 5] = (val >>> 2) & 1;
-      buffer[i + 6] = (val >>> 1) & 1;
-      buffer[i + 7] = val & 1;
-    }
-
-    if (remainder > 0) {
-      int startShift = 7;
-      val = input.read();
-      for (int i = endUnroll; i < endOffset; i++) {
-        buffer[i] = (val >>> startShift) & 1;
-        startShift -= 1;
-      }
-    }
-  }
-
-  private void unrolledUnPack2(long[] buffer, int offset, int len,
-      InStream input) throws IOException {
-    final int numHops = 4;
-    final int remainder = len % numHops;
-    final int endOffset = offset + len;
-    final int endUnroll = endOffset - remainder;
-    int val = 0;
-    for (int i = offset; i < endUnroll; i = i + numHops) {
-      val = input.read();
-      buffer[i] = (val >>> 6) & 3;
-      buffer[i + 1] = (val >>> 4) & 3;
-      buffer[i + 2] = (val >>> 2) & 3;
-      buffer[i + 3] = val & 3;
-    }
-
-    if (remainder > 0) {
-      int startShift = 6;
-      val = input.read();
-      for (int i = endUnroll; i < endOffset; i++) {
-        buffer[i] = (val >>> startShift) & 3;
-        startShift -= 2;
-      }
-    }
-  }
-
-  private void unrolledUnPack4(long[] buffer, int offset, int len,
-      InStream input) throws IOException {
-    final int numHops = 2;
-    final int remainder = len % numHops;
-    final int endOffset = offset + len;
-    final int endUnroll = endOffset - remainder;
-    int val = 0;
-    for (int i = offset; i < endUnroll; i = i + numHops) {
-      val = input.read();
-      buffer[i] = (val >>> 4) & 15;
-      buffer[i + 1] = val & 15;
-    }
-
-    if (remainder > 0) {
-      int startShift = 4;
-      val = input.read();
-      for (int i = endUnroll; i < endOffset; i++) {
-        buffer[i] = (val >>> startShift) & 15;
-        startShift -= 4;
-      }
-    }
-  }
-
-  private void unrolledUnPack8(long[] buffer, int offset, int len,
-      InStream input) throws IOException {
-    unrolledUnPackBytes(buffer, offset, len, input, 1);
-  }
-
-  private void unrolledUnPack16(long[] buffer, int offset, int len,
-      InStream input) throws IOException {
-    unrolledUnPackBytes(buffer, offset, len, input, 2);
-  }
-
-  private void unrolledUnPack24(long[] buffer, int offset, int len,
-      InStream input) throws IOException {
-    unrolledUnPackBytes(buffer, offset, len, input, 3);
-  }
-
-  private void unrolledUnPack32(long[] buffer, int offset, int len,
-      InStream input) throws IOException {
-    unrolledUnPackBytes(buffer, offset, len, input, 4);
-  }
-
-  private void unrolledUnPack40(long[] buffer, int offset, int len,
-      InStream input) throws IOException {
-    unrolledUnPackBytes(buffer, offset, len, input, 5);
-  }
-
-  private void unrolledUnPack48(long[] buffer, int offset, int len,
-      InStream input) throws IOException {
-    unrolledUnPackBytes(buffer, offset, len, input, 6);
-  }
-
-  private void unrolledUnPack56(long[] buffer, int offset, int len,
-      InStream input) throws IOException {
-    unrolledUnPackBytes(buffer, offset, len, input, 7);
-  }
-
-  private void unrolledUnPack64(long[] buffer, int offset, int len,
-      InStream input) throws IOException {
-    unrolledUnPackBytes(buffer, offset, len, input, 8);
-  }
-
-  private void unrolledUnPackBytes(long[] buffer, int offset, int len, InStream input, int numBytes)
-      throws IOException {
-    final int numHops = 8;
-    final int remainder = len % numHops;
-    final int endOffset = offset + len;
-    final int endUnroll = endOffset - remainder;
-    int i = offset;
-    for (; i < endUnroll; i = i + numHops) {
-      readLongBE(input, buffer, i, numHops, numBytes);
-    }
-
-    if (remainder > 0) {
-      readRemainingLongs(buffer, i, input, remainder, numBytes);
-    }
-  }
-
-  private void readRemainingLongs(long[] buffer, int offset, InStream input, int remainder,
-      int numBytes) throws IOException {
-    final int toRead = remainder * numBytes;
-    // bulk read to buffer
-    int bytesRead = input.read(readBuffer, 0, toRead);
-    while (bytesRead != toRead) {
-      bytesRead += input.read(readBuffer, bytesRead, toRead - bytesRead);
-    }
-
-    int idx = 0;
-    switch (numBytes) {
-    case 1:
-      while (remainder > 0) {
-        buffer[offset++] = readBuffer[idx] & 255;
-        remainder--;
-        idx++;
-      }
-      break;
-    case 2:
-      while (remainder > 0) {
-        buffer[offset++] = readLongBE2(input, idx * 2);
-        remainder--;
-        idx++;
-      }
-      break;
-    case 3:
-      while (remainder > 0) {
-        buffer[offset++] = readLongBE3(input, idx * 3);
-        remainder--;
-        idx++;
-      }
-      break;
-    case 4:
-      while (remainder > 0) {
-        buffer[offset++] = readLongBE4(input, idx * 4);
-        remainder--;
-        idx++;
-      }
-      break;
-    case 5:
-      while (remainder > 0) {
-        buffer[offset++] = readLongBE5(input, idx * 5);
-        remainder--;
-        idx++;
-      }
-      break;
-    case 6:
-      while (remainder > 0) {
-        buffer[offset++] = readLongBE6(input, idx * 6);
-        remainder--;
-        idx++;
-      }
-      break;
-    case 7:
-      while (remainder > 0) {
-        buffer[offset++] = readLongBE7(input, idx * 7);
-        remainder--;
-        idx++;
-      }
-      break;
-    case 8:
-      while (remainder > 0) {
-        buffer[offset++] = readLongBE8(input, idx * 8);
-        remainder--;
-        idx++;
-      }
-      break;
-    default:
-      break;
-    }
-  }
-
-  private void readLongBE(InStream in, long[] buffer, int start, int numHops, int numBytes)
-      throws IOException {
-    final int toRead = numHops * numBytes;
-    // bulk read to buffer
-    int bytesRead = in.read(readBuffer, 0, toRead);
-    while (bytesRead != toRead) {
-      bytesRead += in.read(readBuffer, bytesRead, toRead - bytesRead);
-    }
-
-    switch (numBytes) {
-    case 1:
-      buffer[start + 0] = readBuffer[0] & 255;
-      buffer[start + 1] = readBuffer[1] & 255;
-      buffer[start + 2] = readBuffer[2] & 255;
-      buffer[start + 3] = readBuffer[3] & 255;
-      buffer[start + 4] = readBuffer[4] & 255;
-      buffer[start + 5] = readBuffer[5] & 255;
-      buffer[start + 6] = readBuffer[6] & 255;
-      buffer[start + 7] = readBuffer[7] & 255;
-      break;
-    case 2:
-      buffer[start + 0] = readLongBE2(in, 0);
-      buffer[start + 1] = readLongBE2(in, 2);
-      buffer[start + 2] = readLongBE2(in, 4);
-      buffer[start + 3] = readLongBE2(in, 6);
-      buffer[start + 4] = readLongBE2(in, 8);
-      buffer[start + 5] = readLongBE2(in, 10);
-      buffer[start + 6] = readLongBE2(in, 12);
-      buffer[start + 7] = readLongBE2(in, 14);
-      break;
-    case 3:
-      buffer[start + 0] = readLongBE3(in, 0);
-      buffer[start + 1] = readLongBE3(in, 3);
-      buffer[start + 2] = readLongBE3(in, 6);
-      buffer[start + 3] = readLongBE3(in, 9);
-      buffer[start + 4] = readLongBE3(in, 12);
-      buffer[start + 5] = readLongBE3(in, 15);
-      buffer[start + 6] = readLongBE3(in, 18);
-      buffer[start + 7] = readLongBE3(in, 21);
-      break;
-    case 4:
-      buffer[start + 0] = readLongBE4(in, 0);
-      buffer[start + 1] = readLongBE4(in, 4);
-      buffer[start + 2] = readLongBE4(in, 8);
-      buffer[start + 3] = readLongBE4(in, 12);
-      buffer[start + 4] = readLongBE4(in, 16);
-      buffer[start + 5] = readLongBE4(in, 20);
-      buffer[start + 6] = readLongBE4(in, 24);
-      buffer[start + 7] = readLongBE4(in, 28);
-      break;
-    case 5:
-      buffer[start + 0] = readLongBE5(in, 0);
-      buffer[start + 1] = readLongBE5(in, 5);
-      buffer[start + 2] = readLongBE5(in, 10);
-      buffer[start + 3] = readLongBE5(in, 15);
-      buffer[start + 4] = readLongBE5(in, 20);
-      buffer[start + 5] = readLongBE5(in, 25);
-      buffer[start + 6] = readLongBE5(in, 30);
-      buffer[start + 7] = readLongBE5(in, 35);
-      break;
-    case 6:
-      buffer[start + 0] = readLongBE6(in, 0);
-      buffer[start + 1] = readLongBE6(in, 6);
-      buffer[start + 2] = readLongBE6(in, 12);
-      buffer[start + 3] = readLongBE6(in, 18);
-      buffer[start + 4] = readLongBE6(in, 24);
-      buffer[start + 5] = readLongBE6(in, 30);
-      buffer[start + 6] = readLongBE6(in, 36);
-      buffer[start + 7] = readLongBE6(in, 42);
-      break;
-    case 7:
-      buffer[start + 0] = readLongBE7(in, 0);
-      buffer[start + 1] = readLongBE7(in, 7);
-      buffer[start + 2] = readLongBE7(in, 14);
-      buffer[start + 3] = readLongBE7(in, 21);
-      buffer[start + 4] = readLongBE7(in, 28);
-      buffer[start + 5] = readLongBE7(in, 35);
-      buffer[start + 6] = readLongBE7(in, 42);
-      buffer[start + 7] = readLongBE7(in, 49);
-      break;
-    case 8:
-      buffer[start + 0] = readLongBE8(in, 0);
-      buffer[start + 1] = readLongBE8(in, 8);
-      buffer[start + 2] = readLongBE8(in, 16);
-      buffer[start + 3] = readLongBE8(in, 24);
-      buffer[start + 4] = readLongBE8(in, 32);
-      buffer[start + 5] = readLongBE8(in, 40);
-      buffer[start + 6] = readLongBE8(in, 48);
-      buffer[start + 7] = readLongBE8(in, 56);
-      break;
-    default:
-      break;
-    }
-  }
-
-  private long readLongBE2(InStream in, int rbOffset) {
-    return (((readBuffer[rbOffset] & 255) << 8)
-        + ((readBuffer[rbOffset + 1] & 255) << 0));
-  }
-
-  private long readLongBE3(InStream in, int rbOffset) {
-    return (((readBuffer[rbOffset] & 255) << 16)
-        + ((readBuffer[rbOffset + 1] & 255) << 8)
-        + ((readBuffer[rbOffset + 2] & 255) << 0));
-  }
-
-  private long readLongBE4(InStream in, int rbOffset) {
-    return (((long) (readBuffer[rbOffset] & 255) << 24)
-        + ((readBuffer[rbOffset + 1] & 255) << 16)
-        + ((readBuffer[rbOffset + 2] & 255) << 8)
-        + ((readBuffer[rbOffset + 3] & 255) << 0));
-  }
-
-  private long readLongBE5(InStream in, int rbOffset) {
-    return (((long) (readBuffer[rbOffset] & 255) << 32)
-        + ((long) (readBuffer[rbOffset + 1] & 255) << 24)
-        + ((readBuffer[rbOffset + 2] & 255) << 16)
-        + ((readBuffer[rbOffset + 3] & 255) << 8)
-        + ((readBuffer[rbOffset + 4] & 255) << 0));
-  }
-
-  private long readLongBE6(InStream in, int rbOffset) {
-    return (((long) (readBuffer[rbOffset] & 255) << 40)
-        + ((long) (readBuffer[rbOffset + 1] & 255) << 32)
-        + ((long) (readBuffer[rbOffset + 2] & 255) << 24)
-        + ((readBuffer[rbOffset + 3] & 255) << 16)
-        + ((readBuffer[rbOffset + 4] & 255) << 8)
-        + ((readBuffer[rbOffset + 5] & 255) << 0));
-  }
-
-  private long readLongBE7(InStream in, int rbOffset) {
-    return (((long) (readBuffer[rbOffset] & 255) << 48)
-        + ((long) (readBuffer[rbOffset + 1] & 255) << 40)
-        + ((long) (readBuffer[rbOffset + 2] & 255) << 32)
-        + ((long) (readBuffer[rbOffset + 3] & 255) << 24)
-        + ((readBuffer[rbOffset + 4] & 255) << 16)
-        + ((readBuffer[rbOffset + 5] & 255) << 8)
-        + ((readBuffer[rbOffset + 6] & 255) << 0));
-  }
-
-  private long readLongBE8(InStream in, int rbOffset) {
-    return (((long) (readBuffer[rbOffset] & 255) << 56)
-        + ((long) (readBuffer[rbOffset + 1] & 255) << 48)
-        + ((long) (readBuffer[rbOffset + 2] & 255) << 40)
-        + ((long) (readBuffer[rbOffset + 3] & 255) << 32)
-        + ((long) (readBuffer[rbOffset + 4] & 255) << 24)
-        + ((readBuffer[rbOffset + 5] & 255) << 16)
-        + ((readBuffer[rbOffset + 6] & 255) << 8)
-        + ((readBuffer[rbOffset + 7] & 255) << 0));
-  }
-
-  // Do not want to use Guava LongMath.checkedSubtract() here as it will throw
-  // ArithmeticException in case of overflow
-  public boolean isSafeSubtract(long left, long right) {
-    return (left ^ right) >= 0 | (left ^ (left - right)) >= 0;
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SettableUncompressedStream.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SettableUncompressedStream.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SettableUncompressedStream.java
deleted file mode 100644
index 3496de9..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SettableUncompressedStream.java
+++ /dev/null
@@ -1,43 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-import java.util.List;
-
-import org.apache.hadoop.hive.common.DiskRangeInfo;
-import org.apache.hadoop.hive.common.io.DiskRange;
-
-/**
- * An uncompressed stream whose underlying byte buffer can be set.
- */
-public class SettableUncompressedStream extends InStream.UncompressedStream {
-
-  public SettableUncompressedStream(String name, List<DiskRange> input, long length) {
-    super(name, input, length);
-    setOffset(input);
-  }
-
-  public void setBuffers(DiskRangeInfo diskRangeInfo) {
-    reset(diskRangeInfo.getDiskRanges(), diskRangeInfo.getTotalLength());
-    setOffset(diskRangeInfo.getDiskRanges());
-  }
-
-  private void setOffset(List<DiskRange> list) {
-    currentOffset = list.isEmpty() ? 0 : list.get(0).getOffset();
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SnappyCodec.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SnappyCodec.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SnappyCodec.java
deleted file mode 100644
index 820f215..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SnappyCodec.java
+++ /dev/null
@@ -1,109 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.io.orc;
-
-import org.apache.hadoop.hive.shims.HadoopShims.DirectDecompressorShim;
-import org.apache.hadoop.hive.shims.ShimLoader;
-import org.apache.hadoop.hive.shims.HadoopShims.DirectCompressionType;
-import org.iq80.snappy.Snappy;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.EnumSet;
-
-class SnappyCodec implements CompressionCodec, DirectDecompressionCodec {
-
-  Boolean direct = null;
-
-  @Override
-  public boolean compress(ByteBuffer in, ByteBuffer out,
-                          ByteBuffer overflow) throws IOException {
-    int inBytes = in.remaining();
-    // I should work on a patch for Snappy to support an overflow buffer
-    // to prevent the extra buffer copy.
-    byte[] compressed = new byte[Snappy.maxCompressedLength(inBytes)];
-    int outBytes =
-        Snappy.compress(in.array(), in.arrayOffset() + in.position(), inBytes,
-            compressed, 0);
-    if (outBytes < inBytes) {
-      int remaining = out.remaining();
-      if (remaining >= outBytes) {
-        System.arraycopy(compressed, 0, out.array(), out.arrayOffset() +
-            out.position(), outBytes);
-        out.position(out.position() + outBytes);
-      } else {
-        System.arraycopy(compressed, 0, out.array(), out.arrayOffset() +
-            out.position(), remaining);
-        out.position(out.limit());
-        System.arraycopy(compressed, remaining, overflow.array(),
-            overflow.arrayOffset(), outBytes - remaining);
-        overflow.position(outBytes - remaining);
-      }
-      return true;
-    } else {
-      return false;
-    }
-  }
-
-  @Override
-  public void decompress(ByteBuffer in, ByteBuffer out) throws IOException {
-    if(in.isDirect() && out.isDirect()) {
-      directDecompress(in, out);
-      return;
-    }
-    int inOffset = in.position();
-    int uncompressLen =
-        Snappy.uncompress(in.array(), in.arrayOffset() + inOffset,
-        in.limit() - inOffset, out.array(), out.arrayOffset() + out.position());
-    out.position(uncompressLen + out.position());
-    out.flip();
-  }
-
-  @Override
-  public boolean isAvailable() {
-    if (direct == null) {
-      try {
-        if (ShimLoader.getHadoopShims().getDirectDecompressor(
-            DirectCompressionType.SNAPPY) != null) {
-          direct = Boolean.valueOf(true);
-        } else {
-          direct = Boolean.valueOf(false);
-        }
-      } catch (UnsatisfiedLinkError ule) {
-        direct = Boolean.valueOf(false);
-      }
-    }
-    return direct.booleanValue();
-  }
-
-  @Override
-  public void directDecompress(ByteBuffer in, ByteBuffer out)
-      throws IOException {
-    DirectDecompressorShim decompressShim = ShimLoader.getHadoopShims()
-        .getDirectDecompressor(DirectCompressionType.SNAPPY);
-    decompressShim.decompress(in, out);
-    out.flip(); // flip for read
-  }
-
-  @Override
-  public CompressionCodec modify(EnumSet<Modifier> modifiers) {
-    // snappy allows no modifications
-    return this;
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StreamName.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StreamName.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StreamName.java
deleted file mode 100644
index 9ff0b1f..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StreamName.java
+++ /dev/null
@@ -1,95 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.io.orc;
-
-/**
- * The name of a stream within a stripe.
- */
-public class StreamName implements Comparable<StreamName> {
-  private final int column;
-  private final OrcProto.Stream.Kind kind;
-
-  public static enum Area {
-    DATA, INDEX
-  }
-
-  public StreamName(int column, OrcProto.Stream.Kind kind) {
-    this.column = column;
-    this.kind = kind;
-  }
-
-  public boolean equals(Object obj) {
-    if (obj != null && obj instanceof  StreamName) {
-      StreamName other = (StreamName) obj;
-      return other.column == column && other.kind == kind;
-    } else {
-      return false;
-    }
-  }
-
-  @Override
-  public int compareTo(StreamName streamName) {
-    if (streamName == null) {
-      return -1;
-    }
-    Area area = getArea(kind);
-    Area otherArea = streamName.getArea(streamName.kind);
-    if (area != otherArea) {
-      return -area.compareTo(otherArea);
-    }
-    if (column != streamName.column) {
-      return column < streamName.column ? -1 : 1;
-    }
-    return kind.compareTo(streamName.kind);
-  }
-
-  public int getColumn() {
-    return column;
-  }
-
-  public OrcProto.Stream.Kind getKind() {
-    return kind;
-  }
-
-  public Area getArea() {
-    return getArea(kind);
-  }
-
-  public static Area getArea(OrcProto.Stream.Kind kind) {
-    switch (kind) {
-      case ROW_INDEX:
-      case DICTIONARY_COUNT:
-      case BLOOM_FILTER:
-        return Area.INDEX;
-      default:
-        return Area.DATA;
-    }
-  }
-
-  @Override
-  public String toString() {
-    return "Stream for column " + column + " kind " + kind;
-  }
-
-  @Override
-  public int hashCode() {
-    return column * 101 + kind.getNumber();
-  }
-}
-

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringColumnStatistics.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringColumnStatistics.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringColumnStatistics.java
deleted file mode 100644
index 3a49269..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringColumnStatistics.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-/**
- * Statistics for string columns.
- */
-public interface StringColumnStatistics extends ColumnStatistics {
-  /**
-   * Get the minimum string.
-   * @return the minimum
-   */
-  String getMinimum();
-
-  /**
-   * Get the maximum string.
-   * @return the maximum
-   */
-  String getMaximum();
-
-  /**
-   * Get the total length of all strings
-   * @return the sum (total length)
-   */
-  long getSum();
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringRedBlackTree.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringRedBlackTree.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringRedBlackTree.java
deleted file mode 100644
index e0c52e7..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringRedBlackTree.java
+++ /dev/null
@@ -1,207 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-import java.io.IOException;
-import java.io.OutputStream;
-
-import org.apache.hadoop.io.Text;
-
-/**
- * A red-black tree that stores strings. The strings are stored as UTF-8 bytes
- * and an offset for each entry.
- */
-class StringRedBlackTree extends RedBlackTree {
-  private final DynamicByteArray byteArray = new DynamicByteArray();
-  private final DynamicIntArray keyOffsets;
-  private final Text newKey = new Text();
-
-  public StringRedBlackTree(int initialCapacity) {
-    super(initialCapacity);
-    keyOffsets = new DynamicIntArray(initialCapacity);
-  }
-
-  public int add(String value) {
-    newKey.set(value);
-    return addNewKey();
-  }
-
-  private int addNewKey() {
-    // if the newKey is actually new, add it to our byteArray and store the offset & length
-    if (add()) {
-      int len = newKey.getLength();
-      keyOffsets.add(byteArray.add(newKey.getBytes(), 0, len));
-    }
-    return lastAdd;
-  }
-
-  public int add(Text value) {
-    newKey.set(value);
-    return addNewKey();
-  }
-
-  public int add(byte[] bytes, int offset, int length) {
-    newKey.set(bytes, offset, length);
-    return addNewKey();
-  }
-
-  @Override
-  protected int compareValue(int position) {
-    int start = keyOffsets.get(position);
-    int end;
-    if (position + 1 == keyOffsets.size()) {
-      end = byteArray.size();
-    } else {
-      end = keyOffsets.get(position+1);
-    }
-    return byteArray.compare(newKey.getBytes(), 0, newKey.getLength(),
-                             start, end - start);
-  }
-
-  /**
-   * The information about each node.
-   */
-  public interface VisitorContext {
-    /**
-     * Get the position where the key was originally added.
-     * @return the number returned by add.
-     */
-    int getOriginalPosition();
-
-    /**
-     * Write the bytes for the string to the given output stream.
-     * @param out the stream to write to.
-     * @throws IOException
-     */
-    void writeBytes(OutputStream out) throws IOException;
-
-    /**
-     * Get the original string.
-     * @return the string
-     */
-    Text getText();
-
-    /**
-     * Get the number of bytes.
-     * @return the string's length in bytes
-     */
-    int getLength();
-  }
-
-  /**
-   * The interface for visitors.
-   */
-  public interface Visitor {
-    /**
-     * Called once for each node of the tree in sort order.
-     * @param context the information about each node
-     * @throws IOException
-     */
-    void visit(VisitorContext context) throws IOException;
-  }
-
-  private class VisitorContextImpl implements VisitorContext {
-    private int originalPosition;
-    private int start;
-    private int end;
-    private final Text text = new Text();
-
-    public int getOriginalPosition() {
-      return originalPosition;
-    }
-
-    public Text getText() {
-      byteArray.setText(text, start, end - start);
-      return text;
-    }
-
-    public void writeBytes(OutputStream out) throws IOException {
-      byteArray.write(out, start, end - start);
-    }
-
-    public int getLength() {
-      return end - start;
-    }
-
-    void setPosition(int position) {
-      originalPosition = position;
-      start = keyOffsets.get(originalPosition);
-      if (position + 1 == keyOffsets.size()) {
-        end = byteArray.size();
-      } else {
-        end = keyOffsets.get(originalPosition + 1);
-      }
-    }
-  }
-
-  private void recurse(int node, Visitor visitor, VisitorContextImpl context
-                      ) throws IOException {
-    if (node != NULL) {
-      recurse(getLeft(node), visitor, context);
-      context.setPosition(node);
-      visitor.visit(context);
-      recurse(getRight(node), visitor, context);
-    }
-  }
-
-  /**
-   * Visit all of the nodes in the tree in sorted order.
-   * @param visitor the action to be applied to each node
-   * @throws IOException
-   */
-  public void visit(Visitor visitor) throws IOException {
-    recurse(root, visitor, new VisitorContextImpl());
-  }
-
-  /**
-   * Reset the table to empty.
-   */
-  public void clear() {
-    super.clear();
-    byteArray.clear();
-    keyOffsets.clear();
-  }
-
-  public void getText(Text result, int originalPosition) {
-    int offset = keyOffsets.get(originalPosition);
-    int length;
-    if (originalPosition + 1 == keyOffsets.size()) {
-      length = byteArray.size() - offset;
-    } else {
-      length = keyOffsets.get(originalPosition + 1) - offset;
-    }
-    byteArray.setText(result, offset, length);
-  }
-
-  /**
-   * Get the size of the character data in the table.
-   * @return the bytes used by the table
-   */
-  public int getCharacterSize() {
-    return byteArray.size();
-  }
-
-  /**
-   * Calculate the approximate size in memory.
-   * @return the number of bytes used in storing the tree.
-   */
-  public long getSizeInBytes() {
-    return byteArray.getSizeInBytes() + keyOffsets.getSizeInBytes() +
-      super.getSizeInBytes();
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StripeInformation.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StripeInformation.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StripeInformation.java
deleted file mode 100644
index 2ac4601..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StripeInformation.java
+++ /dev/null
@@ -1,59 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-/**
- * Information about the stripes in an ORC file that is provided by the Reader.
- */
-public interface StripeInformation {
-  /**
-   * Get the byte offset of the start of the stripe.
-   * @return the bytes from the start of the file
-   */
-  long getOffset();
-
-  /**
-   * Get the total length of the stripe in bytes.
-   * @return the number of bytes in the stripe
-   */
-  long getLength();
-
-  /**
-   * Get the length of the stripe's indexes.
-   * @return the number of bytes in the index
-   */
-  long getIndexLength();
-
-  /**
-   * Get the length of the stripe's data.
-   * @return the number of bytes in the stripe
-   */
-  long getDataLength();
-
-  /**
-   * Get the length of the stripe's tail section, which contains its index.
-   * @return the number of bytes in the tail
-   */
-  long getFooterLength();
-
-  /**
-   * Get the number of rows in the stripe.
-   * @return a count of the number of rows
-   */
-  long getNumberOfRows();
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StripeStatistics.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StripeStatistics.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StripeStatistics.java
deleted file mode 100644
index 6fc1ab6..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StripeStatistics.java
+++ /dev/null
@@ -1,42 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.io.orc;
-
-import java.util.List;
-
-public class StripeStatistics {
-  private final List<OrcProto.ColumnStatistics> cs;
-
-  StripeStatistics(List<OrcProto.ColumnStatistics> list) {
-    this.cs = list;
-  }
-
-  /**
-   * Return list of column statistics
-   *
-   * @return column stats
-   */
-  public ColumnStatistics[] getColumnStatistics() {
-    ColumnStatistics[] result = new ColumnStatistics[cs.size()];
-    for (int i = 0; i < result.length; ++i) {
-      result[i] = ColumnStatisticsImpl.deserialize(cs.get(i));
-    }
-    return result;
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TimestampColumnStatistics.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TimestampColumnStatistics.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TimestampColumnStatistics.java
deleted file mode 100644
index ef42f50..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TimestampColumnStatistics.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.io.orc;
-
-import java.sql.Timestamp;
-
-/**
- * Statistics for Timestamp columns.
- */
-public interface TimestampColumnStatistics extends ColumnStatistics {
-  /**
-   * Get the minimum value for the column.
-   * @return minimum value
-   */
-  Timestamp getMinimum();
-
-  /**
-   * Get the maximum value for the column.
-   * @return maximum value
-   */
-  Timestamp getMaximum();
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java
index 8c13571..3fe28d8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java
@@ -57,13 +57,25 @@ import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+import org.apache.orc.impl.BitFieldReader;
+import org.apache.orc.impl.DynamicByteArray;
+import org.apache.orc.impl.InStream;
+import org.apache.orc.impl.IntegerReader;
+import org.apache.orc.OrcProto;
+import org.apache.orc.impl.PositionProvider;
+import org.apache.orc.impl.RunLengthByteReader;
+import org.apache.orc.impl.RunLengthIntegerReader;
+import org.apache.orc.impl.RunLengthIntegerReaderV2;
+import org.apache.orc.impl.SerializationUtils;
+import org.apache.orc.impl.StreamName;
 
 /**
  * Factory for creating ORC tree readers.
  */
 public class TreeReaderFactory {
 
-  public static final Logger LOG = LoggerFactory.getLogger(TreeReaderFactory.class);
+  private static final Logger LOG =
+    LoggerFactory.getLogger(TreeReaderFactory.class);
 
   public static class TreeReaderSchema {
 

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TypeDescription.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TypeDescription.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TypeDescription.java
deleted file mode 100644
index b365408..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TypeDescription.java
+++ /dev/null
@@ -1,540 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.io.orc;
-
-import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-
-/**
- * This is the description of the types in an ORC file.
- */
-public class TypeDescription {
-  private static final int MAX_PRECISION = 38;
-  private static final int MAX_SCALE = 38;
-  private static final int DEFAULT_PRECISION = 38;
-  private static final int DEFAULT_SCALE = 10;
-  private static final int DEFAULT_LENGTH = 256;
-  public enum Category {
-    BOOLEAN("boolean", true),
-    BYTE("tinyint", true),
-    SHORT("smallint", true),
-    INT("int", true),
-    LONG("bigint", true),
-    FLOAT("float", true),
-    DOUBLE("double", true),
-    STRING("string", true),
-    DATE("date", true),
-    TIMESTAMP("timestamp", true),
-    BINARY("binary", true),
-    DECIMAL("decimal", true),
-    VARCHAR("varchar", true),
-    CHAR("char", true),
-    LIST("array", false),
-    MAP("map", false),
-    STRUCT("struct", false),
-    UNION("union", false);
-
-    Category(String name, boolean isPrimitive) {
-      this.name = name;
-      this.isPrimitive = isPrimitive;
-    }
-
-    final boolean isPrimitive;
-    final String name;
-
-    public boolean isPrimitive() {
-      return isPrimitive;
-    }
-
-    public String getName() {
-      return name;
-    }
-  }
-
-  public static TypeDescription createBoolean() {
-    return new TypeDescription(Category.BOOLEAN);
-  }
-
-  public static TypeDescription createByte() {
-    return new TypeDescription(Category.BYTE);
-  }
-
-  public static TypeDescription createShort() {
-    return new TypeDescription(Category.SHORT);
-  }
-
-  public static TypeDescription createInt() {
-    return new TypeDescription(Category.INT);
-  }
-
-  public static TypeDescription createLong() {
-    return new TypeDescription(Category.LONG);
-  }
-
-  public static TypeDescription createFloat() {
-    return new TypeDescription(Category.FLOAT);
-  }
-
-  public static TypeDescription createDouble() {
-    return new TypeDescription(Category.DOUBLE);
-  }
-
-  public static TypeDescription createString() {
-    return new TypeDescription(Category.STRING);
-  }
-
-  public static TypeDescription createDate() {
-    return new TypeDescription(Category.DATE);
-  }
-
-  public static TypeDescription createTimestamp() {
-    return new TypeDescription(Category.TIMESTAMP);
-  }
-
-  public static TypeDescription createBinary() {
-    return new TypeDescription(Category.BINARY);
-  }
-
-  public static TypeDescription createDecimal() {
-    return new TypeDescription(Category.DECIMAL);
-  }
-
-  /**
-   * For decimal types, set the precision.
-   * @param precision the new precision
-   * @return this
-   */
-  public TypeDescription withPrecision(int precision) {
-    if (category != Category.DECIMAL) {
-      throw new IllegalArgumentException("precision is only allowed on decimal"+
-         " and not " + category.name);
-    } else if (precision < 1 || precision > MAX_PRECISION || scale > precision){
-      throw new IllegalArgumentException("precision " + precision +
-          " is out of range 1 .. " + scale);
-    }
-    this.precision = precision;
-    return this;
-  }
-
-  /**
-   * For decimal types, set the scale.
-   * @param scale the new scale
-   * @return this
-   */
-  public TypeDescription withScale(int scale) {
-    if (category != Category.DECIMAL) {
-      throw new IllegalArgumentException("scale is only allowed on decimal"+
-          " and not " + category.name);
-    } else if (scale < 0 || scale > MAX_SCALE || scale > precision) {
-      throw new IllegalArgumentException("scale is out of range at " + scale);
-    }
-    this.scale = scale;
-    return this;
-  }
-
-  public static TypeDescription createVarchar() {
-    return new TypeDescription(Category.VARCHAR);
-  }
-
-  public static TypeDescription createChar() {
-    return new TypeDescription(Category.CHAR);
-  }
-
-  /**
-   * Set the maximum length for char and varchar types.
-   * @param maxLength the maximum value
-   * @return this
-   */
-  public TypeDescription withMaxLength(int maxLength) {
-    if (category != Category.VARCHAR && category != Category.CHAR) {
-      throw new IllegalArgumentException("maxLength is only allowed on char" +
-                   " and varchar and not " + category.name);
-    }
-    this.maxLength = maxLength;
-    return this;
-  }
-
-  public static TypeDescription createList(TypeDescription childType) {
-    TypeDescription result = new TypeDescription(Category.LIST);
-    result.children.add(childType);
-    childType.parent = result;
-    return result;
-  }
-
-  public static TypeDescription createMap(TypeDescription keyType,
-                                          TypeDescription valueType) {
-    TypeDescription result = new TypeDescription(Category.MAP);
-    result.children.add(keyType);
-    result.children.add(valueType);
-    keyType.parent = result;
-    valueType.parent = result;
-    return result;
-  }
-
-  public static TypeDescription createUnion() {
-    return new TypeDescription(Category.UNION);
-  }
-
-  public static TypeDescription createStruct() {
-    return new TypeDescription(Category.STRUCT);
-  }
-
-  /**
-   * Add a child to a union type.
-   * @param child a new child type to add
-   * @return the union type.
-   */
-  public TypeDescription addUnionChild(TypeDescription child) {
-    if (category != Category.UNION) {
-      throw new IllegalArgumentException("Can only add types to union type" +
-          " and not " + category);
-    }
-    children.add(child);
-    child.parent = this;
-    return this;
-  }
-
-  /**
-   * Add a field to a struct type as it is built.
-   * @param field the field name
-   * @param fieldType the type of the field
-   * @return the struct type
-   */
-  public TypeDescription addField(String field, TypeDescription fieldType) {
-    if (category != Category.STRUCT) {
-      throw new IllegalArgumentException("Can only add fields to struct type" +
-          " and not " + category);
-    }
-    fieldNames.add(field);
-    children.add(fieldType);
-    fieldType.parent = this;
-    return this;
-  }
-
-  /**
-   * Get the id for this type.
-   * The first call will cause all of the the ids in tree to be assigned, so
-   * it should not be called before the type is completely built.
-   * @return the sequential id
-   */
-  public int getId() {
-    // if the id hasn't been assigned, assign all of the ids from the root
-    if (id == -1) {
-      TypeDescription root = this;
-      while (root.parent != null) {
-        root = root.parent;
-      }
-      root.assignIds(0);
-    }
-    return id;
-  }
-
-  /**
-   * Get the maximum id assigned to this type or its children.
-   * The first call will cause all of the the ids in tree to be assigned, so
-   * it should not be called before the type is completely built.
-   * @return the maximum id assigned under this type
-   */
-  public int getMaximumId() {
-    // if the id hasn't been assigned, assign all of the ids from the root
-    if (maxId == -1) {
-      TypeDescription root = this;
-      while (root.parent != null) {
-        root = root.parent;
-      }
-      root.assignIds(0);
-    }
-    return maxId;
-  }
-
-  private ColumnVector createColumn() {
-    switch (category) {
-      case BOOLEAN:
-      case BYTE:
-      case SHORT:
-      case INT:
-      case LONG:
-      case TIMESTAMP:
-      case DATE:
-        return new LongColumnVector();
-      case FLOAT:
-      case DOUBLE:
-        return new DoubleColumnVector();
-      case DECIMAL:
-        return new DecimalColumnVector(precision, scale);
-      case STRING:
-      case BINARY:
-      case CHAR:
-      case VARCHAR:
-        return new BytesColumnVector();
-      case STRUCT: {
-        ColumnVector[] fieldVector = new ColumnVector[children.size()];
-        for(int i=0; i < fieldVector.length; ++i) {
-          fieldVector[i] = children.get(i).createColumn();
-        }
-        return new StructColumnVector(VectorizedRowBatch.DEFAULT_SIZE,
-                fieldVector);
-      }
-      case UNION: {
-        ColumnVector[] fieldVector = new ColumnVector[children.size()];
-        for(int i=0; i < fieldVector.length; ++i) {
-          fieldVector[i] = children.get(i).createColumn();
-        }
-        return new UnionColumnVector(VectorizedRowBatch.DEFAULT_SIZE,
-            fieldVector);
-      }
-      case LIST:
-        return new ListColumnVector(VectorizedRowBatch.DEFAULT_SIZE,
-            children.get(0).createColumn());
-      case MAP:
-        return new MapColumnVector(VectorizedRowBatch.DEFAULT_SIZE,
-            children.get(0).createColumn(), children.get(1).createColumn());
-      default:
-        throw new IllegalArgumentException("Unknown type " + category);
-    }
-  }
-
-  public VectorizedRowBatch createRowBatch() {
-    VectorizedRowBatch result;
-    if (category == Category.STRUCT) {
-      result = new VectorizedRowBatch(children.size(),
-          VectorizedRowBatch.DEFAULT_SIZE);
-      for(int i=0; i < result.cols.length; ++i) {
-        result.cols[i] = children.get(i).createColumn();
-      }
-    } else {
-      result = new VectorizedRowBatch(1, VectorizedRowBatch.DEFAULT_SIZE);
-      result.cols[0] = createColumn();
-    }
-    result.reset();
-    return result;
-  }
-
-  /**
-   * Get the kind of this type.
-   * @return get the category for this type.
-   */
-  public Category getCategory() {
-    return category;
-  }
-
-  /**
-   * Get the maximum length of the type. Only used for char and varchar types.
-   * @return the maximum length of the string type
-   */
-  public int getMaxLength() {
-    return maxLength;
-  }
-
-  /**
-   * Get the precision of the decimal type.
-   * @return the number of digits for the precision.
-   */
-  public int getPrecision() {
-    return precision;
-  }
-
-  /**
-   * Get the scale of the decimal type.
-   * @return the number of digits for the scale.
-   */
-  public int getScale() {
-    return scale;
-  }
-
-  /**
-   * For struct types, get the list of field names.
-   * @return the list of field names.
-   */
-  public List<String> getFieldNames() {
-    return Collections.unmodifiableList(fieldNames);
-  }
-
-  /**
-   * Get the subtypes of this type.
-   * @return the list of children types
-   */
-  public List<TypeDescription> getChildren() {
-    return children == null ? null : Collections.unmodifiableList(children);
-  }
-
-  /**
-   * Assign ids to all of the nodes under this one.
-   * @param startId the lowest id to assign
-   * @return the next available id
-   */
-  private int assignIds(int startId) {
-    id = startId++;
-    if (children != null) {
-      for (TypeDescription child : children) {
-        startId = child.assignIds(startId);
-      }
-    }
-    maxId = startId - 1;
-    return startId;
-  }
-
-  private TypeDescription(Category category) {
-    this.category = category;
-    if (category.isPrimitive) {
-      children = null;
-    } else {
-      children = new ArrayList<>();
-    }
-    if (category == Category.STRUCT) {
-      fieldNames = new ArrayList<>();
-    } else {
-      fieldNames = null;
-    }
-  }
-
-  private int id = -1;
-  private int maxId = -1;
-  private TypeDescription parent;
-  private final Category category;
-  private final List<TypeDescription> children;
-  private final List<String> fieldNames;
-  private int maxLength = DEFAULT_LENGTH;
-  private int precision = DEFAULT_PRECISION;
-  private int scale = DEFAULT_SCALE;
-
-  public void printToBuffer(StringBuilder buffer) {
-    buffer.append(category.name);
-    switch (category) {
-      case DECIMAL:
-        buffer.append('(');
-        buffer.append(precision);
-        buffer.append(',');
-        buffer.append(scale);
-        buffer.append(')');
-        break;
-      case CHAR:
-      case VARCHAR:
-        buffer.append('(');
-        buffer.append(maxLength);
-        buffer.append(')');
-        break;
-      case LIST:
-      case MAP:
-      case UNION:
-        buffer.append('<');
-        for(int i=0; i < children.size(); ++i) {
-          if (i != 0) {
-            buffer.append(',');
-          }
-          children.get(i).printToBuffer(buffer);
-        }
-        buffer.append('>');
-        break;
-      case STRUCT:
-        buffer.append('<');
-        for(int i=0; i < children.size(); ++i) {
-          if (i != 0) {
-            buffer.append(',');
-          }
-          buffer.append(fieldNames.get(i));
-          buffer.append(':');
-          children.get(i).printToBuffer(buffer);
-        }
-        buffer.append('>');
-        break;
-      default:
-        break;
-    }
-  }
-
-  public String toString() {
-    StringBuilder buffer = new StringBuilder();
-    printToBuffer(buffer);
-    return buffer.toString();
-  }
-
-  private void printJsonToBuffer(String prefix, StringBuilder buffer,
-                                 int indent) {
-    for(int i=0; i < indent; ++i) {
-      buffer.append(' ');
-    }
-    buffer.append(prefix);
-    buffer.append("{\"category\": \"");
-    buffer.append(category.name);
-    buffer.append("\", \"id\": ");
-    buffer.append(getId());
-    buffer.append(", \"max\": ");
-    buffer.append(maxId);
-    switch (category) {
-      case DECIMAL:
-        buffer.append(", \"precision\": ");
-        buffer.append(precision);
-        buffer.append(", \"scale\": ");
-        buffer.append(scale);
-        break;
-      case CHAR:
-      case VARCHAR:
-        buffer.append(", \"length\": ");
-        buffer.append(maxLength);
-        break;
-      case LIST:
-      case MAP:
-      case UNION:
-        buffer.append(", \"children\": [");
-        for(int i=0; i < children.size(); ++i) {
-          buffer.append('\n');
-          children.get(i).printJsonToBuffer("", buffer, indent + 2);
-          if (i != children.size() - 1) {
-            buffer.append(',');
-          }
-        }
-        buffer.append("]");
-        break;
-      case STRUCT:
-        buffer.append(", \"fields\": [");
-        for(int i=0; i < children.size(); ++i) {
-          buffer.append('\n');
-          children.get(i).printJsonToBuffer("\"" + fieldNames.get(i) + "\": ",
-              buffer, indent + 2);
-          if (i != children.size() - 1) {
-            buffer.append(',');
-          }
-        }
-        buffer.append(']');
-        break;
-      default:
-        break;
-    }
-    buffer.append('}');
-  }
-
-  public String toJson() {
-    StringBuilder buffer = new StringBuilder();
-    printJsonToBuffer("", buffer, 0);
-    return buffer.toString();
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java
index 6d280c8..e08aaf3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java
@@ -39,6 +39,8 @@ import org.apache.hadoop.mapred.InputSplit;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.RecordReader;
 import org.apache.hadoop.mapred.Reporter;
+import org.apache.orc.OrcProto;
+import org.apache.orc.TypeDescription;
 
 /**
  * A MapReduce/Hive input format for ORC files.
@@ -64,7 +66,7 @@ public class VectorizedOrcInputFormat extends FileInputFormat<NullWritable, Vect
       /**
        * Do we have schema on read in the configuration variables?
        */
-      TypeDescription schema = OrcUtils.getDesiredRowTypeDescr(conf);
+      TypeDescription schema = OrcInputFormat.getDesiredRowTypeDescr(conf);
 
       List<OrcProto.Type> types = file.getTypes();
       Reader.Options options = new Reader.Options();

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Writer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Writer.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Writer.java
index 1873ed1..1f5927a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Writer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Writer.java
@@ -18,32 +18,12 @@
 
 package org.apache.hadoop.hive.ql.io.orc;
 
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-
 import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.List;
 
 /**
- * The interface for writing ORC files.
+ * The HIVE interface for writing ORC files.
  */
-public interface Writer {
-
-  /**
-   * Get the schema for this writer
-   * @return the file schema
-   */
-  TypeDescription getSchema();
-
-  /**
-   * Add arbitrary meta-data to the ORC file. This may be called at any point
-   * until the Writer is closed. If the same key is passed a second time, the
-   * second value will replace the first.
-   * @param key a key to label the data with.
-   * @param value the contents of the metadata.
-   */
-  void addUserMetadata(String key, ByteBuffer value);
+public interface Writer extends org.apache.orc.Writer {
 
   /**
    * Add a row to the ORC file.
@@ -51,68 +31,4 @@ public interface Writer {
    * @throws IOException
    */
   void addRow(Object row) throws IOException;
-
-  /**
-   * Add a row batch to the ORC file.
-   * @param batch the rows to add
-   */
-  void addRowBatch(VectorizedRowBatch batch) throws IOException;
-
-  /**
-   * Flush all of the buffers and close the file. No methods on this writer
-   * should be called afterwards.
-   * @throws IOException
-   */
-  void close() throws IOException;
-
-  /**
-   * Return the deserialized data size. Raw data size will be compute when
-   * writing the file footer. Hence raw data size value will be available only
-   * after closing the writer.
-   *
-   * @return raw data size
-   */
-  long getRawDataSize();
-
-  /**
-   * Return the number of rows in file. Row count gets updated when flushing
-   * the stripes. To get accurate row count this method should be called after
-   * closing the writer.
-   *
-   * @return row count
-   */
-  long getNumberOfRows();
-
-  /**
-   * Write an intermediate footer on the file such that if the file is
-   * truncated to the returned offset, it would be a valid ORC file.
-   * @return the offset that would be a valid end location for an ORC file
-   */
-  long writeIntermediateFooter() throws IOException;
-
-  /**
-   * Fast stripe append to ORC file. This interface is used for fast ORC file
-   * merge with other ORC files. When merging, the file to be merged should pass
-   * stripe in binary form along with stripe information and stripe statistics.
-   * After appending last stripe of a file, use appendUserMetadata() to append
-   * any user metadata.
-   * @param stripe - stripe as byte array
-   * @param offset - offset within byte array
-   * @param length - length of stripe within byte array
-   * @param stripeInfo - stripe information
-   * @param stripeStatistics - stripe statistics (Protobuf objects can be
-   *                         merged directly)
-   * @throws IOException
-   */
-  public void appendStripe(byte[] stripe, int offset, int length,
-      StripeInformation stripeInfo,
-      OrcProto.StripeStatistics stripeStatistics) throws IOException;
-
-  /**
-   * When fast stripe append is used for merging ORC stripes, after appending
-   * the last stripe from a file, this interface must be used to merge any
-   * user metadata.
-   * @param userMetadata - user metadata
-   */
-  public void appendUserMetadata(List<OrcProto.UserMetadataItem> userMetadata);
 }


Mime
View raw message